parser.y (14281B)
1 %token CHAR CCL NCCL STR DELIM SCON ITER NEWE NULLS 2 %left SCON '/' NEWE 3 %left '|' 4 %left '$' '^' 5 %left CHAR CCL NCCL '(' '.' STR NULLS 6 %left ITER 7 %left CAT 8 %left '*' '+' '?' 9 10 %{ 11 # include "ldefs.h" 12 #define YYSTYPE union _yystype_ 13 union _yystype_ 14 { 15 int i; 16 uchar *cp; 17 }; 18 %} 19 %% 20 %{ 21 int i; 22 int j,k; 23 int g; 24 uchar *p; 25 %} 26 acc : lexinput 27 ={ 28 # ifdef DEBUG 29 if(debug) sect2dump(); 30 # endif 31 } 32 ; 33 lexinput: defns delim prods end 34 | defns delim end 35 ={ 36 if(!funcflag)phead2(); 37 funcflag = TRUE; 38 } 39 | error 40 ={ 41 # ifdef DEBUG 42 if(debug) { 43 sect1dump(); 44 sect2dump(); 45 } 46 # endif 47 } 48 ; 49 end: delim | ; 50 defns: defns STR STR 51 ={ strcpy((char*)dp,(char*)$2.cp); 52 def[dptr] = dp; 53 dp += strlen((char*)$2.cp) + 1; 54 strcpy((char*)dp,(char*)$3.cp); 55 subs[dptr++] = dp; 56 if(dptr >= DEFSIZE) 57 error("Too many definitions"); 58 dp += strlen((char*)$3.cp) + 1; 59 if(dp >= dchar+DEFCHAR) 60 error("Definitions too long"); 61 subs[dptr]=def[dptr]=0; /* for lookup - require ending null */ 62 } 63 | 64 ; 65 delim: DELIM 66 ={ 67 # ifdef DEBUG 68 if(sect == DEFSECTION && debug) sect1dump(); 69 # endif 70 sect++; 71 } 72 ; 73 prods: prods pr 74 ={ $$.i = mn2(RNEWE,$1.i,$2.i); 75 } 76 | pr 77 ={ $$.i = $1.i;} 78 ; 79 pr: r NEWE 80 ={ 81 if(divflg == TRUE) 82 i = mn1(S1FINAL,casecount); 83 else i = mn1(FINAL,casecount); 84 $$.i = mn2(RCAT,$1.i,i); 85 divflg = FALSE; 86 casecount++; 87 } 88 | error NEWE 89 ={ 90 # ifdef DEBUG 91 if(debug) sect2dump(); 92 # endif 93 } 94 r: CHAR 95 ={ $$.i = mn0($1.i); } 96 | STR 97 ={ 98 p = $1.cp; 99 i = mn0(*p++); 100 while(*p) 101 i = mn2(RSTR,i,*p++); 102 $$.i = i; 103 } 104 | '.' 105 ={ symbol['\n'] = 0; 106 if(psave == FALSE){ 107 p = ccptr; 108 psave = ccptr; 109 for(i=1;i<'\n';i++){ 110 symbol[i] = 1; 111 *ccptr++ = i; 112 } 113 for(i='\n'+1;i<NCH;i++){ 114 symbol[i] = 1; 115 *ccptr++ = i; 116 } 117 *ccptr++ = 0; 118 if(ccptr > ccl+CCLSIZE) 119 error("Too many large character classes"); 120 } 121 else 122 p = psave; 123 $$.i = mnp(RCCL,p); 124 cclinter(1); 125 } 126 | CCL 127 ={ $$.i = mnp(RCCL,$1.cp); } 128 | NCCL 129 ={ $$.i = mnp(RNCCL,$1.cp); } 130 | r '*' 131 ={ $$.i = mn1(STAR,$1.i); } 132 | r '+' 133 ={ $$.i = mn1(PLUS,$1.i); } 134 | r '?' 135 ={ $$.i = mn1(QUEST,$1.i); } 136 | r '|' r 137 ={ $$.i = mn2(BAR,$1.i,$3.i); } 138 | r r %prec CAT 139 ={ $$.i = mn2(RCAT,$1.i,$2.i); } 140 | r '/' r 141 ={ if(!divflg){ 142 j = mn1(S2FINAL,-casecount); 143 i = mn2(RCAT,$1.i,j); 144 $$.i = mn2(DIV,i,$3.i); 145 } 146 else { 147 $$.i = mn2(RCAT,$1.i,$3.i); 148 warning("Extra slash removed"); 149 } 150 divflg = TRUE; 151 } 152 | r ITER ',' ITER '}' 153 ={ if($2.i > $4.i){ 154 i = $2.i; 155 $2.i = $4.i; 156 $4.i = i; 157 } 158 if($4.i <= 0) 159 warning("Iteration range must be positive"); 160 else { 161 j = $1.i; 162 for(k = 2; k<=$2.i;k++) 163 j = mn2(RCAT,j,dupl($1.i)); 164 for(i = $2.i+1; i<=$4.i; i++){ 165 g = dupl($1.i); 166 for(k=2;k<=i;k++) 167 g = mn2(RCAT,g,dupl($1.i)); 168 j = mn2(BAR,j,g); 169 } 170 $$.i = j; 171 } 172 } 173 | r ITER '}' 174 ={ 175 if($2.i < 0)warning("Can't have negative iteration"); 176 else if($2.i == 0) $$.i = mn0(RNULLS); 177 else { 178 j = $1.i; 179 for(k=2;k<=$2.i;k++) 180 j = mn2(RCAT,j,dupl($1.i)); 181 $$.i = j; 182 } 183 } 184 | r ITER ',' '}' 185 ={ 186 /* from n to infinity */ 187 if($2.i < 0)warning("Can't have negative iteration"); 188 else if($2.i == 0) $$.i = mn1(STAR,$1.i); 189 else if($2.i == 1)$$.i = mn1(PLUS,$1.i); 190 else { /* >= 2 iterations minimum */ 191 j = $1.i; 192 for(k=2;k<$2.i;k++) 193 j = mn2(RCAT,j,dupl($1.i)); 194 k = mn1(PLUS,dupl($1.i)); 195 $$.i = mn2(RCAT,j,k); 196 } 197 } 198 | SCON r 199 ={ $$.i = mn2(RSCON,$2.i,(uintptr)$1.cp); } 200 | '^' r 201 ={ $$.i = mn1(CARAT,$2.i); } 202 | r '$' 203 ={ i = mn0('\n'); 204 if(!divflg){ 205 j = mn1(S2FINAL,-casecount); 206 k = mn2(RCAT,$1.i,j); 207 $$.i = mn2(DIV,k,i); 208 } 209 else $$.i = mn2(RCAT,$1.i,i); 210 divflg = TRUE; 211 } 212 | '(' r ')' 213 ={ $$.i = $2.i; } 214 | NULLS 215 ={ $$.i = mn0(RNULLS); } 216 ; 217 %% 218 int 219 yylex(void) 220 { 221 uchar *p; 222 int c, i; 223 uchar *t, *xp; 224 int n, j, k, x; 225 static int sectbegin; 226 static uchar token[TOKENSIZE]; 227 static int iter; 228 229 # ifdef DEBUG 230 yylval.i = 0; 231 # endif 232 233 if(sect == DEFSECTION) { /* definitions section */ 234 while(!eof) { 235 if(prev == '\n'){ /* next char is at beginning of line */ 236 getl(p=buf); 237 switch(*p){ 238 case '%': 239 switch(*(p+1)){ 240 case '%': 241 lgate(); 242 Bprint(&fout,"#define YYNEWLINE %d\n",'\n'); 243 Bprint(&fout,"int\nyylex(void){\nint nstr; extern int yyprevious;\nif(yyprevious){}\n"); 244 sectbegin = TRUE; 245 i = treesize*(sizeof(*name)+sizeof(*left)+ 246 sizeof(*right)+sizeof(*nullstr)+sizeof(*parent))+ALITTLEEXTRA; 247 p = myalloc(i,1); 248 if(p == 0) 249 error("Too little core for parse tree"); 250 free(p); 251 name = myalloc(treesize,sizeof(*name)); 252 left = myalloc(treesize,sizeof(*left)); 253 right = myalloc(treesize,sizeof(*right)); 254 nullstr = myalloc(treesize,sizeof(*nullstr)); 255 parent = myalloc(treesize,sizeof(*parent)); 256 ptr = myalloc(treesize,sizeof(*ptr)); 257 if(name == 0 || left == 0 || right == 0 || parent == 0 || nullstr == 0 || ptr == 0) 258 error("Too little core for parse tree"); 259 return(freturn(DELIM)); 260 case 'p': case 'P': /* has overridden number of positions */ 261 while(*p && !isdigit(*p))p++; 262 maxpos = atol((char*)p); 263 # ifdef DEBUG 264 if (debug) print("positions (%%p) now %d\n",maxpos); 265 # endif 266 if(report == 2)report = 1; 267 continue; 268 case 'n': case 'N': /* has overridden number of states */ 269 while(*p && !isdigit(*p))p++; 270 nstates = atol((char*)p); 271 # ifdef DEBUG 272 if(debug)print( " no. states (%%n) now %d\n",nstates); 273 # endif 274 if(report == 2)report = 1; 275 continue; 276 case 'e': case 'E': /* has overridden number of tree nodes */ 277 while(*p && !isdigit(*p))p++; 278 treesize = atol((char*)p); 279 # ifdef DEBUG 280 if (debug) print("treesize (%%e) now %d\n",treesize); 281 # endif 282 if(report == 2)report = 1; 283 continue; 284 case 'o': case 'O': 285 while (*p && !isdigit(*p))p++; 286 outsize = atol((char*)p); 287 if (report ==2) report=1; 288 continue; 289 case 'a': case 'A': /* has overridden number of transitions */ 290 while(*p && !isdigit(*p))p++; 291 if(report == 2)report = 1; 292 ntrans = atol((char*)p); 293 # ifdef DEBUG 294 if (debug)print("N. trans (%%a) now %d\n",ntrans); 295 # endif 296 continue; 297 case 'k': case 'K': /* overriden packed char classes */ 298 while (*p && !isdigit(*p))p++; 299 if (report==2) report=1; 300 free(pchar); 301 pchlen = atol((char*)p); 302 # ifdef DEBUG 303 if (debug) print( "Size classes (%%k) now %d\n",pchlen); 304 # endif 305 pchar=pcptr=myalloc(pchlen, sizeof(*pchar)); 306 continue; 307 case '{': 308 lgate(); 309 while(getl(p) && strcmp((char*)p,"%}") != 0) 310 Bprint(&fout, "%s\n",(char*)p); 311 if(p[0] == '%') continue; 312 error("Premature eof"); 313 case 's': case 'S': /* start conditions */ 314 lgate(); 315 while(*p && strchr(" \t,", *p) == 0) p++; 316 n = TRUE; 317 while(n){ 318 while(*p && strchr(" \t,", *p)) p++; 319 t = p; 320 while(*p && strchr(" \t,", *p) == 0)p++; 321 if(!*p) n = FALSE; 322 *p++ = 0; 323 if (*t == 0) continue; 324 i = sptr*2; 325 Bprint(&fout,"#define %s %d\n",(char*)t,i); 326 strcpy((char*)sp, (char*)t); 327 sname[sptr++] = sp; 328 sname[sptr] = 0; /* required by lookup */ 329 if(sptr >= STARTSIZE) 330 error("Too many start conditions"); 331 sp += strlen((char*)sp) + 1; 332 if(sp >= stchar+STARTCHAR) 333 error("Start conditions too long"); 334 } 335 continue; 336 default: 337 warning("Invalid request %s",p); 338 continue; 339 } /* end of switch after seeing '%' */ 340 case ' ': case '\t': /* must be code */ 341 lgate(); 342 Bprint(&fout, "%s\n",(char*)p); 343 continue; 344 default: /* definition */ 345 while(*p && !isspace(*p)) p++; 346 if(*p == 0) 347 continue; 348 prev = *p; 349 *p = 0; 350 bptr = p+1; 351 yylval.cp = buf; 352 if(isdigit(buf[0])) 353 warning("Substitution strings may not begin with digits"); 354 return(freturn(STR)); 355 } 356 } 357 /* still sect 1, but prev != '\n' */ 358 else { 359 p = bptr; 360 while(*p && isspace(*p)) p++; 361 if(*p == 0) 362 warning("No translation given - null string assumed"); 363 strcpy((char*)token, (char*)p); 364 yylval.cp = token; 365 prev = '\n'; 366 return(freturn(STR)); 367 } 368 } 369 /* end of section one processing */ 370 } else if(sect == RULESECTION){ /* rules and actions */ 371 while(!eof){ 372 switch(c=gch()){ 373 case '\0': 374 return(freturn(0)); 375 case '\n': 376 if(prev == '\n') continue; 377 x = NEWE; 378 break; 379 case ' ': 380 case '\t': 381 if(sectbegin == TRUE){ 382 cpyact(); 383 while((c=gch()) && c != '\n'); 384 continue; 385 } 386 if(!funcflag)phead2(); 387 funcflag = TRUE; 388 Bprint(&fout,"case %d:\n",casecount); 389 if(cpyact()) 390 Bprint(&fout,"break;\n"); 391 while((c=gch()) && c != '\n'); 392 if(peek == ' ' || peek == '\t' || sectbegin == TRUE){ 393 warning("Executable statements should occur right after %%"); 394 continue; 395 } 396 x = NEWE; 397 break; 398 case '%': 399 if(prev != '\n') goto character; 400 if(peek == '{'){ /* included code */ 401 getl(buf); 402 while(!eof && getl(buf) && strcmp("%}",(char*)buf) != 0) 403 Bprint(&fout,"%s\n",(char*)buf); 404 continue; 405 } 406 if(peek == '%'){ 407 gch(); 408 gch(); 409 x = DELIM; 410 break; 411 } 412 goto character; 413 case '|': 414 if(peek == ' ' || peek == '\t' || peek == '\n'){ 415 Bprint(&fout,"%d\n",30000+casecount++); 416 continue; 417 } 418 x = '|'; 419 break; 420 case '$': 421 if(peek == '\n' || peek == ' ' || peek == '\t' || peek == '|' || peek == '/'){ 422 x = c; 423 break; 424 } 425 goto character; 426 case '^': 427 if(prev != '\n' && scon != TRUE) goto character; /* valid only at line begin */ 428 x = c; 429 break; 430 case '?': 431 case '+': 432 case '.': 433 case '*': 434 case '(': 435 case ')': 436 case ',': 437 case '/': 438 x = c; 439 break; 440 case '}': 441 iter = FALSE; 442 x = c; 443 break; 444 case '{': /* either iteration or definition */ 445 if(isdigit(c=gch())){ /* iteration */ 446 iter = TRUE; 447 ieval: 448 i = 0; 449 while(isdigit(c)){ 450 token[i++] = c; 451 c = gch(); 452 } 453 token[i] = 0; 454 yylval.i = atol((char*)token); 455 munputc(c); 456 x = ITER; 457 break; 458 } else { /* definition */ 459 i = 0; 460 while(c && c!='}'){ 461 token[i++] = c; 462 c = gch(); 463 } 464 token[i] = 0; 465 i = lookup(token,def); 466 if(i < 0) 467 warning("Definition %s not found",token); 468 else 469 munputs(subs[i]); 470 continue; 471 } 472 case '<': /* start condition ? */ 473 if(prev != '\n') /* not at line begin, not start */ 474 goto character; 475 t = slptr; 476 do { 477 i = 0; 478 c = gch(); 479 while(c != ',' && c && c != '>'){ 480 token[i++] = c; 481 c = gch(); 482 } 483 token[i] = 0; 484 if(i == 0) 485 goto character; 486 i = lookup(token,sname); 487 if(i < 0) { 488 warning("Undefined start condition %s",token); 489 continue; 490 } 491 *slptr++ = i+1; 492 } while(c && c != '>'); 493 *slptr++ = 0; 494 /* check if previous value re-usable */ 495 for (xp=slist; xp<t; ){ 496 if (strcmp((char*)xp, (char*)t)==0) 497 break; 498 while (*xp++); 499 } 500 if (xp<t){ 501 /* re-use previous pointer to string */ 502 slptr=t; 503 t=xp; 504 } 505 if(slptr > slist+STARTSIZE) /* note not packed ! */ 506 error("Too many start conditions used"); 507 yylval.cp = t; 508 x = SCON; 509 break; 510 case '"': 511 i = 0; 512 while((c=gch()) && c != '"' && c != '\n'){ 513 if(c == '\\') c = usescape(gch()); 514 token[i++] = c; 515 if(i > TOKENSIZE){ 516 warning("String too long"); 517 i = TOKENSIZE-1; 518 break; 519 } 520 } 521 if(c == '\n') { 522 yyline--; 523 warning("Non-terminated string"); 524 yyline++; 525 } 526 token[i] = 0; 527 if(i == 0)x = NULLS; 528 else if(i == 1){ 529 yylval.i = token[0]; 530 x = CHAR; 531 } else { 532 yylval.cp = token; 533 x = STR; 534 } 535 break; 536 case '[': 537 for(i=1;i<NCH;i++) symbol[i] = 0; 538 x = CCL; 539 if((c = gch()) == '^'){ 540 x = NCCL; 541 c = gch(); 542 } 543 while(c != ']' && c){ 544 if(c == '\\') c = usescape(gch()); 545 symbol[c] = 1; 546 j = c; 547 if((c=gch()) == '-' && peek != ']'){ /* range specified */ 548 c = gch(); 549 if(c == '\\') c = usescape(gch()); 550 k = c; 551 if(j > k) { 552 n = j; 553 j = k; 554 k = n; 555 } 556 if(!(('A' <= j && k <= 'Z') || 557 ('a' <= j && k <= 'z') || 558 ('0' <= j && k <= '9'))) 559 warning("Non-portable Character Class"); 560 for(n=j+1;n<=k;n++) 561 symbol[n] = 1; /* implementation dependent */ 562 c = gch(); 563 } 564 } 565 /* try to pack ccl's */ 566 i = 0; 567 for(j=0;j<NCH;j++) 568 if(symbol[j])token[i++] = j; 569 token[i] = 0; 570 p = ccl; 571 while(p <ccptr && strcmp((char*)token,(char*)p) != 0)p++; 572 if(p < ccptr) /* found it */ 573 yylval.cp = p; 574 else { 575 yylval.cp = ccptr; 576 strcpy((char*)ccptr,(char*)token); 577 ccptr += strlen((char*)token) + 1; 578 if(ccptr >= ccl+CCLSIZE) 579 error("Too many large character classes"); 580 } 581 cclinter(x==CCL); 582 break; 583 case '\\': 584 c = usescape(gch()); 585 default: 586 character: 587 if(iter){ /* second part of an iteration */ 588 iter = FALSE; 589 if('0' <= c && c <= '9') 590 goto ieval; 591 } 592 if(isalpha(peek)){ 593 i = 0; 594 yylval.cp = token; 595 token[i++] = c; 596 while(isalpha(peek)) 597 token[i++] = gch(); 598 if(peek == '?' || peek == '*' || peek == '+') 599 munputc(token[--i]); 600 token[i] = 0; 601 if(i == 1){ 602 yylval.i = token[0]; 603 x = CHAR; 604 } 605 else x = STR; 606 } else { 607 yylval.i = c; 608 x = CHAR; 609 } 610 } 611 scon = FALSE; 612 if(x == SCON)scon = TRUE; 613 sectbegin = FALSE; 614 return(freturn(x)); 615 } 616 } 617 /* section three */ 618 ptail(); 619 # ifdef DEBUG 620 if(debug) 621 Bprint(&fout,"\n/*this comes from section three - debug */\n"); 622 # endif 623 while(getl(buf) && !eof) 624 Bprint(&fout,"%s\n",(char*)buf); 625 return(freturn(0)); 626 } 627 /* end of yylex */ 628 # ifdef DEBUG 629 int 630 freturn(int i) 631 { 632 if(yydebug) { 633 print("now return "); 634 if(i < NCH) allprint(i); 635 else print("%d",i); 636 printf(" yylval = "); 637 switch(i){ 638 case STR: case CCL: case NCCL: 639 strpt(yylval.cp); 640 break; 641 case CHAR: 642 allprint(yylval.i); 643 break; 644 default: 645 print("%d",yylval.i); 646 break; 647 } 648 print("\n"); 649 } 650 return(i); 651 } 652 # endif