plan9port

fork of plan9port with libvec, libstr and libsdb
Log | Files | Refs | README | LICENSE

parser.y (14281B)


      1 %token CHAR CCL NCCL STR DELIM SCON ITER NEWE NULLS
      2 %left SCON '/' NEWE
      3 %left '|'
      4 %left '$' '^'
      5 %left CHAR CCL NCCL '(' '.' STR NULLS
      6 %left ITER
      7 %left CAT
      8 %left '*' '+' '?'
      9 
     10 %{
     11 # include "ldefs.h"
     12 #define YYSTYPE union _yystype_
     13 union _yystype_
     14 {
     15 	int	i;
     16 	uchar	*cp;
     17 };
     18 %}
     19 %%
     20 %{
     21 int i;
     22 int j,k;
     23 int g;
     24 uchar *p;
     25 %}
     26 acc	:	lexinput
     27 	={	
     28 # ifdef DEBUG
     29 		if(debug) sect2dump();
     30 # endif
     31 	}
     32 	;
     33 lexinput:	defns delim prods end
     34 	|	defns delim end
     35 	={
     36 		if(!funcflag)phead2();
     37 		funcflag = TRUE;
     38 	}
     39 	| error
     40 	={
     41 # ifdef DEBUG
     42 		if(debug) {
     43 			sect1dump();
     44 			sect2dump();
     45 			}
     46 # endif
     47 		}
     48 	;
     49 end:		delim | ;
     50 defns:	defns STR STR
     51 	={	strcpy((char*)dp,(char*)$2.cp);
     52 		def[dptr] = dp;
     53 		dp += strlen((char*)$2.cp) + 1;
     54 		strcpy((char*)dp,(char*)$3.cp);
     55 		subs[dptr++] = dp;
     56 		if(dptr >= DEFSIZE)
     57 			error("Too many definitions");
     58 		dp += strlen((char*)$3.cp) + 1;
     59 		if(dp >= dchar+DEFCHAR)
     60 			error("Definitions too long");
     61 		subs[dptr]=def[dptr]=0;	/* for lookup - require ending null */
     62 	}
     63 	|
     64 	;
     65 delim:	DELIM
     66 	={
     67 # ifdef DEBUG
     68 		if(sect == DEFSECTION && debug) sect1dump();
     69 # endif
     70 		sect++;
     71 		}
     72 	;
     73 prods:	prods pr
     74 	={	$$.i = mn2(RNEWE,$1.i,$2.i);
     75 		}
     76 	|	pr
     77 	={	$$.i = $1.i;}
     78 	;
     79 pr:	r NEWE
     80 	={
     81 		if(divflg == TRUE)
     82 			i = mn1(S1FINAL,casecount);
     83 		else i = mn1(FINAL,casecount);
     84 		$$.i = mn2(RCAT,$1.i,i);
     85 		divflg = FALSE;
     86 		casecount++;
     87 		}
     88 	| error NEWE
     89 	={
     90 # ifdef DEBUG
     91 		if(debug) sect2dump();
     92 # endif
     93 		}
     94 r:	CHAR
     95 	={	$$.i = mn0($1.i); }
     96 	| STR
     97 	={
     98 		p = $1.cp;
     99 		i = mn0(*p++);
    100 		while(*p)
    101 			i = mn2(RSTR,i,*p++);
    102 		$$.i = i;
    103 		}
    104 	| '.'
    105 	={	symbol['\n'] = 0;
    106 		if(psave == FALSE){
    107 			p = ccptr;
    108 			psave = ccptr;
    109 			for(i=1;i<'\n';i++){
    110 				symbol[i] = 1;
    111 				*ccptr++ = i;
    112 				}
    113 			for(i='\n'+1;i<NCH;i++){
    114 				symbol[i] = 1;
    115 				*ccptr++ = i;
    116 				}
    117 			*ccptr++ = 0;
    118 			if(ccptr > ccl+CCLSIZE)
    119 				error("Too many large character classes");
    120 			}
    121 		else
    122 			p = psave;
    123 		$$.i = mnp(RCCL,p);
    124 		cclinter(1);
    125 		}
    126 	| CCL
    127 	={	$$.i = mnp(RCCL,$1.cp); }
    128 	| NCCL
    129 	={	$$.i = mnp(RNCCL,$1.cp); }
    130 	| r '*'
    131 	={	$$.i = mn1(STAR,$1.i); }
    132 	| r '+'
    133 	={	$$.i = mn1(PLUS,$1.i); }
    134 	| r '?'
    135 	={	$$.i = mn1(QUEST,$1.i); }
    136 	| r '|' r
    137 	={	$$.i = mn2(BAR,$1.i,$3.i); }
    138 	| r r %prec CAT
    139 	={	$$.i = mn2(RCAT,$1.i,$2.i); }
    140 	| r '/' r
    141 	={	if(!divflg){
    142 			j = mn1(S2FINAL,-casecount);
    143 			i = mn2(RCAT,$1.i,j);
    144 			$$.i = mn2(DIV,i,$3.i);
    145 			}
    146 		else {
    147 			$$.i = mn2(RCAT,$1.i,$3.i);
    148 			warning("Extra slash removed");
    149 			}
    150 		divflg = TRUE;
    151 		}
    152 	| r ITER ',' ITER '}'
    153 	={	if($2.i > $4.i){
    154 			i = $2.i;
    155 			$2.i = $4.i;
    156 			$4.i = i;
    157 			}
    158 		if($4.i <= 0)
    159 			warning("Iteration range must be positive");
    160 		else {
    161 			j = $1.i;
    162 			for(k = 2; k<=$2.i;k++)
    163 				j = mn2(RCAT,j,dupl($1.i));
    164 			for(i = $2.i+1; i<=$4.i; i++){
    165 				g = dupl($1.i);
    166 				for(k=2;k<=i;k++)
    167 					g = mn2(RCAT,g,dupl($1.i));
    168 				j = mn2(BAR,j,g);
    169 				}
    170 			$$.i = j;
    171 			}
    172 	}
    173 	| r ITER '}'
    174 	={
    175 		if($2.i < 0)warning("Can't have negative iteration");
    176 		else if($2.i == 0) $$.i = mn0(RNULLS);
    177 		else {
    178 			j = $1.i;
    179 			for(k=2;k<=$2.i;k++)
    180 				j = mn2(RCAT,j,dupl($1.i));
    181 			$$.i = j;
    182 			}
    183 		}
    184 	| r ITER ',' '}'
    185 	={
    186 				/* from n to infinity */
    187 		if($2.i < 0)warning("Can't have negative iteration");
    188 		else if($2.i == 0) $$.i = mn1(STAR,$1.i);
    189 		else if($2.i == 1)$$.i = mn1(PLUS,$1.i);
    190 		else {		/* >= 2 iterations minimum */
    191 			j = $1.i;
    192 			for(k=2;k<$2.i;k++)
    193 				j = mn2(RCAT,j,dupl($1.i));
    194 			k = mn1(PLUS,dupl($1.i));
    195 			$$.i = mn2(RCAT,j,k);
    196 			}
    197 		}
    198 	| SCON r
    199 	={	$$.i = mn2(RSCON,$2.i,(uintptr)$1.cp); }
    200 	| '^' r
    201 	={	$$.i = mn1(CARAT,$2.i); }
    202 	| r '$'
    203 	={	i = mn0('\n');
    204 		if(!divflg){
    205 			j = mn1(S2FINAL,-casecount);
    206 			k = mn2(RCAT,$1.i,j);
    207 			$$.i = mn2(DIV,k,i);
    208 			}
    209 		else $$.i = mn2(RCAT,$1.i,i);
    210 		divflg = TRUE;
    211 		}
    212 	| '(' r ')'
    213 	={	$$.i = $2.i; }
    214 	|	NULLS
    215 	={	$$.i = mn0(RNULLS); }
    216 	;
    217 %%
    218 int
    219 yylex(void)
    220 {
    221 	uchar *p;
    222 	int c, i;
    223 	uchar  *t, *xp;
    224 	int n, j, k, x;
    225 	static int sectbegin;
    226 	static uchar token[TOKENSIZE];
    227 	static int iter;
    228 
    229 # ifdef DEBUG
    230 	yylval.i = 0;
    231 # endif
    232 
    233 	if(sect == DEFSECTION) {		/* definitions section */
    234 		while(!eof) {
    235 			if(prev == '\n'){		/* next char is at beginning of line */
    236 				getl(p=buf);
    237 				switch(*p){
    238 				case '%':
    239 					switch(*(p+1)){
    240 					case '%':
    241 						lgate();
    242 						Bprint(&fout,"#define YYNEWLINE %d\n",'\n');
    243 						Bprint(&fout,"int\nyylex(void){\nint nstr; extern int yyprevious;\nif(yyprevious){}\n");
    244 						sectbegin = TRUE;
    245 						i = treesize*(sizeof(*name)+sizeof(*left)+
    246 							sizeof(*right)+sizeof(*nullstr)+sizeof(*parent))+ALITTLEEXTRA;
    247 						p = myalloc(i,1);
    248 						if(p == 0)
    249 							error("Too little core for parse tree");
    250 						free(p);
    251 						name = myalloc(treesize,sizeof(*name));
    252 						left = myalloc(treesize,sizeof(*left));
    253 						right = myalloc(treesize,sizeof(*right));
    254 						nullstr = myalloc(treesize,sizeof(*nullstr));
    255 						parent = myalloc(treesize,sizeof(*parent));
    256 						ptr = myalloc(treesize,sizeof(*ptr));
    257 						if(name == 0 || left == 0 || right == 0 || parent == 0 || nullstr == 0 || ptr == 0)
    258 							error("Too little core for parse tree");
    259 						return(freturn(DELIM));
    260 					case 'p': case 'P':	/* has overridden number of positions */
    261 						while(*p && !isdigit(*p))p++;
    262 						maxpos = atol((char*)p);
    263 # ifdef DEBUG
    264 						if (debug) print("positions (%%p) now %d\n",maxpos);
    265 # endif
    266 						if(report == 2)report = 1;
    267 						continue;
    268 					case 'n': case 'N':	/* has overridden number of states */
    269 						while(*p && !isdigit(*p))p++;
    270 						nstates = atol((char*)p);
    271 # ifdef DEBUG
    272 						if(debug)print( " no. states (%%n) now %d\n",nstates);
    273 # endif
    274 						if(report == 2)report = 1;
    275 						continue;
    276 					case 'e': case 'E':		/* has overridden number of tree nodes */
    277 						while(*p && !isdigit(*p))p++;
    278 						treesize = atol((char*)p);
    279 # ifdef DEBUG
    280 						if (debug) print("treesize (%%e) now %d\n",treesize);
    281 # endif
    282 						if(report == 2)report = 1;
    283 						continue;
    284 					case 'o': case 'O':
    285 						while (*p && !isdigit(*p))p++;
    286 						outsize = atol((char*)p);
    287 						if (report ==2) report=1;
    288 						continue;
    289 					case 'a': case 'A':		/* has overridden number of transitions */
    290 						while(*p && !isdigit(*p))p++;
    291 						if(report == 2)report = 1;
    292 						ntrans = atol((char*)p);
    293 # ifdef DEBUG
    294 						if (debug)print("N. trans (%%a) now %d\n",ntrans);
    295 # endif
    296 						continue;
    297 					case 'k': case 'K': /* overriden packed char classes */
    298 						while (*p && !isdigit(*p))p++;
    299 						if (report==2) report=1;
    300 						free(pchar);
    301 						pchlen = atol((char*)p);
    302 # ifdef DEBUG
    303 						if (debug) print( "Size classes (%%k) now %d\n",pchlen);
    304 # endif
    305 						pchar=pcptr=myalloc(pchlen, sizeof(*pchar));
    306 						continue;
    307 					case '{':
    308 						lgate();
    309 						while(getl(p) && strcmp((char*)p,"%}") != 0)
    310 							Bprint(&fout, "%s\n",(char*)p);
    311 						if(p[0] == '%') continue;
    312 						error("Premature eof");
    313 					case 's': case 'S':		/* start conditions */
    314 						lgate();
    315 						while(*p && strchr(" \t,", *p) == 0) p++;
    316 						n = TRUE;
    317 						while(n){
    318 							while(*p && strchr(" \t,", *p)) p++;
    319 							t = p;
    320 							while(*p && strchr(" \t,", *p) == 0)p++;
    321 							if(!*p) n = FALSE;
    322 							*p++ = 0;
    323 							if (*t == 0) continue;
    324 							i = sptr*2;
    325 							Bprint(&fout,"#define %s %d\n",(char*)t,i);
    326 							strcpy((char*)sp, (char*)t);
    327 							sname[sptr++] = sp;
    328 							sname[sptr] = 0;	/* required by lookup */
    329 							if(sptr >= STARTSIZE)
    330 								error("Too many start conditions");
    331 							sp += strlen((char*)sp) + 1;
    332 							if(sp >= stchar+STARTCHAR)
    333 								error("Start conditions too long");
    334 						}
    335 						continue;
    336 					default:
    337 						warning("Invalid request %s",p);
    338 						continue;
    339 					}	/* end of switch after seeing '%' */
    340 				case ' ': case '\t':		/* must be code */
    341 					lgate();
    342 					Bprint(&fout, "%s\n",(char*)p);
    343 					continue;
    344 				default:		/* definition */
    345 					while(*p && !isspace(*p)) p++;
    346 					if(*p == 0)
    347 						continue;
    348 					prev = *p;
    349 					*p = 0;
    350 					bptr = p+1;
    351 					yylval.cp = buf;
    352 					if(isdigit(buf[0]))
    353 						warning("Substitution strings may not begin with digits");
    354 					return(freturn(STR));
    355 				}
    356 			}
    357 			/* still sect 1, but prev != '\n' */
    358 			else {
    359 				p = bptr;
    360 				while(*p && isspace(*p)) p++;
    361 				if(*p == 0)
    362 					warning("No translation given - null string assumed");
    363 				strcpy((char*)token, (char*)p);
    364 				yylval.cp = token;
    365 				prev = '\n';
    366 				return(freturn(STR));
    367 			}
    368 		}
    369 		/* end of section one processing */
    370 	} else if(sect == RULESECTION){		/* rules and actions */
    371 		while(!eof){
    372 			switch(c=gch()){
    373 			case '\0':
    374 				return(freturn(0));
    375 			case '\n':
    376 				if(prev == '\n') continue;
    377 				x = NEWE;
    378 				break;
    379 			case ' ':
    380 			case '\t':
    381 				if(sectbegin == TRUE){
    382 					cpyact();
    383 					while((c=gch()) && c != '\n');
    384 					continue;
    385 				}
    386 				if(!funcflag)phead2();
    387 				funcflag = TRUE;
    388 				Bprint(&fout,"case %d:\n",casecount);
    389 				if(cpyact())
    390 					Bprint(&fout,"break;\n");
    391 				while((c=gch()) && c != '\n');
    392 				if(peek == ' ' || peek == '\t' || sectbegin == TRUE){
    393 					warning("Executable statements should occur right after %%");
    394 					continue;
    395 				}
    396 				x = NEWE;
    397 				break;
    398 			case '%':
    399 				if(prev != '\n') goto character;
    400 				if(peek == '{'){	/* included code */
    401 					getl(buf);
    402 					while(!eof && getl(buf) && strcmp("%}",(char*)buf) != 0)
    403 						Bprint(&fout,"%s\n",(char*)buf);
    404 					continue;
    405 				}
    406 				if(peek == '%'){
    407 					gch();
    408 					gch();
    409 					x = DELIM;
    410 					break;
    411 				}
    412 				goto character;
    413 			case '|':
    414 				if(peek == ' ' || peek == '\t' || peek == '\n'){
    415 					Bprint(&fout,"%d\n",30000+casecount++);
    416 					continue;
    417 				}
    418 				x = '|';
    419 				break;
    420 			case '$':
    421 				if(peek == '\n' || peek == ' ' || peek == '\t' || peek == '|' || peek == '/'){
    422 					x = c;
    423 					break;
    424 				}
    425 				goto character;
    426 			case '^':
    427 				if(prev != '\n' && scon != TRUE) goto character;	/* valid only at line begin */
    428 				x = c;
    429 				break;
    430 			case '?':
    431 			case '+':
    432 			case '.':
    433 			case '*':
    434 			case '(':
    435 			case ')':
    436 			case ',':
    437 			case '/':
    438 				x = c;
    439 				break;
    440 			case '}':
    441 				iter = FALSE;
    442 				x = c;
    443 				break;
    444 			case '{':	/* either iteration or definition */
    445 				if(isdigit(c=gch())){	/* iteration */
    446 					iter = TRUE;
    447 				ieval:
    448 					i = 0;
    449 					while(isdigit(c)){
    450 						token[i++] = c;
    451 						c = gch();
    452 					}
    453 					token[i] = 0;
    454 					yylval.i = atol((char*)token);
    455 					munputc(c);
    456 					x = ITER;
    457 					break;
    458 				} else {		/* definition */
    459 					i = 0;
    460 					while(c && c!='}'){
    461 						token[i++] = c;
    462 						c = gch();
    463 					}
    464 					token[i] = 0;
    465 					i = lookup(token,def);
    466 					if(i < 0)
    467 						warning("Definition %s not found",token);
    468 					else
    469 						munputs(subs[i]);
    470 					continue;
    471 				}
    472 			case '<':		/* start condition ? */
    473 				if(prev != '\n')		/* not at line begin, not start */
    474 					goto character;
    475 				t = slptr;
    476 				do {
    477 					i = 0;
    478 					c = gch();
    479 					while(c != ',' && c && c != '>'){
    480 						token[i++] = c;
    481 						c = gch();
    482 					}
    483 					token[i] = 0;
    484 					if(i == 0)
    485 						goto character;
    486 					i = lookup(token,sname);
    487 					if(i < 0) {
    488 						warning("Undefined start condition %s",token);
    489 						continue;
    490 					}
    491 					*slptr++ = i+1;
    492 				} while(c && c != '>');
    493 				*slptr++ = 0;
    494 				/* check if previous value re-usable */
    495 				for (xp=slist; xp<t; ){
    496 					if (strcmp((char*)xp, (char*)t)==0)
    497 						break;
    498 					while (*xp++);
    499 				}
    500 				if (xp<t){
    501 					/* re-use previous pointer to string */
    502 					slptr=t;
    503 					t=xp;
    504 				}
    505 				if(slptr > slist+STARTSIZE)		/* note not packed ! */
    506 					error("Too many start conditions used");
    507 				yylval.cp = t;
    508 				x = SCON;
    509 				break;
    510 			case '"':
    511 				i = 0;
    512 				while((c=gch()) && c != '"' && c != '\n'){
    513 					if(c == '\\') c = usescape(gch());
    514 					token[i++] = c;
    515 					if(i > TOKENSIZE){
    516 						warning("String too long");
    517 						i = TOKENSIZE-1;
    518 						break;
    519 					}
    520 				}
    521 				if(c == '\n') {
    522 					yyline--;
    523 					warning("Non-terminated string");
    524 					yyline++;
    525 				}
    526 				token[i] = 0;
    527 				if(i == 0)x = NULLS;
    528 				else if(i == 1){
    529 					yylval.i = token[0];
    530 					x = CHAR;
    531 				} else {
    532 					yylval.cp = token;
    533 					x = STR;
    534 				}
    535 				break;
    536 			case '[':
    537 				for(i=1;i<NCH;i++) symbol[i] = 0;
    538 				x = CCL;
    539 				if((c = gch()) == '^'){
    540 					x = NCCL;
    541 					c = gch();
    542 				}
    543 				while(c != ']' && c){
    544 					if(c == '\\') c = usescape(gch());
    545 					symbol[c] = 1;
    546 					j = c;
    547 					if((c=gch()) == '-' && peek != ']'){		/* range specified */
    548 						c = gch();
    549 						if(c == '\\') c = usescape(gch());
    550 						k = c;
    551 						if(j > k) {
    552 							n = j;
    553 							j = k;
    554 							k = n;
    555 						}
    556 						if(!(('A' <= j && k <= 'Z') ||
    557 						     ('a' <= j && k <= 'z') ||
    558 						     ('0' <= j && k <= '9')))
    559 							warning("Non-portable Character Class");
    560 						for(n=j+1;n<=k;n++)
    561 							symbol[n] = 1;		/* implementation dependent */
    562 						c = gch();
    563 					}
    564 				}
    565 				/* try to pack ccl's */
    566 				i = 0;
    567 				for(j=0;j<NCH;j++)
    568 					if(symbol[j])token[i++] = j;
    569 				token[i] = 0;
    570 				p = ccl;
    571 				while(p <ccptr && strcmp((char*)token,(char*)p) != 0)p++;
    572 				if(p < ccptr)	/* found it */
    573 					yylval.cp = p;
    574 				else {
    575 					yylval.cp = ccptr;
    576 					strcpy((char*)ccptr,(char*)token);
    577 					ccptr += strlen((char*)token) + 1;
    578 					if(ccptr >= ccl+CCLSIZE)
    579 						error("Too many large character classes");
    580 				}
    581 				cclinter(x==CCL);
    582 				break;
    583 			case '\\':
    584 				c = usescape(gch());
    585 			default:
    586 			character:
    587 				if(iter){	/* second part of an iteration */
    588 					iter = FALSE;
    589 					if('0' <= c && c <= '9')
    590 						goto ieval;
    591 				}
    592 				if(isalpha(peek)){
    593 					i = 0;
    594 					yylval.cp = token;
    595 					token[i++] = c;
    596 					while(isalpha(peek))
    597 						token[i++] = gch();
    598 					if(peek == '?' || peek == '*' || peek == '+')
    599 						munputc(token[--i]);
    600 					token[i] = 0;
    601 					if(i == 1){
    602 						yylval.i = token[0];
    603 						x = CHAR;
    604 					}
    605 					else x = STR;
    606 				} else {
    607 					yylval.i = c;
    608 					x = CHAR;
    609 				}
    610 			}
    611 			scon = FALSE;
    612 			if(x == SCON)scon = TRUE;
    613 			sectbegin = FALSE;
    614 			return(freturn(x));
    615 		}
    616 	}
    617 	/* section three */
    618 	ptail();
    619 # ifdef DEBUG
    620 	if(debug)
    621 		Bprint(&fout,"\n/*this comes from section three - debug */\n");
    622 # endif
    623 	while(getl(buf) && !eof)
    624 		Bprint(&fout,"%s\n",(char*)buf);
    625 	return(freturn(0));
    626 }
    627 /* end of yylex */
    628 # ifdef DEBUG
    629 int
    630 freturn(int i)
    631 {
    632 	if(yydebug) {
    633 		print("now return ");
    634 		if(i < NCH) allprint(i);
    635 		else print("%d",i);
    636 		printf("   yylval = ");
    637 		switch(i){
    638 			case STR: case CCL: case NCCL:
    639 				strpt(yylval.cp);
    640 				break;
    641 			case CHAR:
    642 				allprint(yylval.i);
    643 				break;
    644 			default:
    645 				print("%d",yylval.i);
    646 				break;
    647 		}
    648 		print("\n");
    649 	}
    650 	return(i);
    651 }
    652 # endif