plan9port

fork of plan9port with libvec, libstr and libsdb
Log | Files | Refs | README | LICENSE

lex.c (6568B)


      1 #include "rc.h"
      2 #include "exec.h"
      3 #include "io.h"
      4 #include "getflags.h"
      5 #include "fns.h"
      6 int getnext(void);
      7 
      8 int
      9 wordchr(int c)
     10 {
     11 	return !strchr("\n \t#;&|^$=`'{}()<>", c) && c!=EOF;
     12 }
     13 
     14 int
     15 idchr(int c)
     16 {
     17 	/*
     18 	 * Formerly:
     19 	 * return 'a'<=c && c<='z' || 'A'<=c && c<='Z' || '0'<=c && c<='9'
     20 	 *	|| c=='_' || c=='*';
     21 	 */
     22 	return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c);
     23 }
     24 int future = EOF;
     25 int doprompt = 1;
     26 int inquote;
     27 int incomm;
     28 /*
     29  * Look ahead in the input stream
     30  */
     31 
     32 int
     33 nextc(void)
     34 {
     35 	if(future==EOF)
     36 		future = getnext();
     37 	return future;
     38 }
     39 /*
     40  * Consume the lookahead character.
     41  */
     42 
     43 int
     44 advance(void)
     45 {
     46 	int c = nextc();
     47 	lastc = future;
     48 	future = EOF;
     49 	return c;
     50 }
     51 /*
     52  * read a character from the input stream
     53  */
     54 
     55 int
     56 getnext(void)
     57 {
     58 	int c;
     59 	static int peekc = EOF;
     60 	if(peekc!=EOF){
     61 		c = peekc;
     62 		peekc = EOF;
     63 		return c;
     64 	}
     65 	if(runq->eof)
     66 		return EOF;
     67 	if(doprompt)
     68 		pprompt();
     69 	c = rchr(runq->cmdfd);
     70 	if(!inquote && c=='\\'){
     71 		c = rchr(runq->cmdfd);
     72 		if(c=='\n' && !incomm){		/* don't continue a comment */
     73 			doprompt = 1;
     74 			c=' ';
     75 		}
     76 		else{
     77 			peekc = c;
     78 			c='\\';
     79 		}
     80 	}
     81 	doprompt = doprompt || c=='\n' || c==EOF;
     82 	if(c==EOF)
     83 		runq->eof++;
     84 	else if(flag['V'] || ndot>=2 && flag['v']) pchr(err, c);
     85 	return c;
     86 }
     87 
     88 void
     89 pprompt(void)
     90 {
     91 	var *prompt;
     92 	if(runq->iflag){
     93 		pstr(err, promptstr);
     94 		flush(err);
     95 		prompt = vlook("prompt");
     96 		if(prompt->val && prompt->val->next)
     97 			promptstr = prompt->val->next->word;
     98 		else
     99 			promptstr="\t";
    100 	}
    101 	runq->lineno++;
    102 	doprompt = 0;
    103 }
    104 
    105 int
    106 skipwhite(void)
    107 {
    108 	int c, skipped;
    109 	skipped = 0;
    110 	for(;;){
    111 		c = nextc();
    112 		/* Why did this used to be  if(!inquote && c=='#') ?? */
    113 		if(c=='#'){
    114 			incomm = 1;
    115 			skipped = 1;
    116 			for(;;){
    117 				c = nextc();
    118 				if(c=='\n' || c==EOF) {
    119 					incomm = 0;
    120 					break;
    121 				}
    122 				advance();
    123 			}
    124 		}
    125 		if(c==' ' || c=='\t') {
    126 			skipped = 1;
    127 			advance();
    128 		}
    129 		else
    130 			return skipped;
    131 	}
    132 }
    133 
    134 void
    135 skipnl(void)
    136 {
    137 	int c;
    138 	for(;;){
    139 		skipwhite();
    140 		c = nextc();
    141 		if(c!='\n')
    142 			return;
    143 		advance();
    144 	}
    145 }
    146 
    147 int
    148 nextis(int c)
    149 {
    150 	if(nextc()==c){
    151 		advance();
    152 		return 1;
    153 	}
    154 	return 0;
    155 }
    156 
    157 char*
    158 addtok(char *p, int val)
    159 {
    160 	if(p==0)
    161 		return 0;
    162 	if(p==&tok[NTOK-1]){
    163 		*p = 0;
    164 		yyerror("token buffer too short");
    165 		return 0;
    166 	}
    167 	*p++=val;
    168 	return p;
    169 }
    170 
    171 char*
    172 addutf(char *p, int c)
    173 {
    174 	p = addtok(p, c);
    175 	if(twobyte(c))	 /* 2-byte escape */
    176 		return addtok(p, advance());
    177 	if(threebyte(c)){	/* 3-byte escape */
    178 		p = addtok(p, advance());
    179 		return addtok(p, advance());
    180 	}
    181 	if(fourbyte(c)){	/* 4-byte escape */
    182 		p = addtok(p, advance());
    183 		p = addtok(p, advance());
    184 		return addtok(p, advance());
    185 	}
    186 	return p;
    187 }
    188 int lastdol;	/* was the last token read '$' or '$#' or '"'? */
    189 int lastword;	/* was the last token read a word or compound word terminator? */
    190 
    191 int
    192 yylex(void)
    193 {
    194 	int c, d = nextc();
    195 	char *w = tok;
    196 	tree *t;
    197 	yylval.tree = 0;
    198 	/*
    199 	 * Embarassing sneakiness:  if the last token read was a quoted or unquoted
    200 	 * WORD then we alter the meaning of what follows.  If the next character
    201 	 * is `(', we return SUB (a subscript paren) and consume the `('.  Otherwise,
    202 	 * if the next character is the first character of a simple or compound word,
    203 	 * we insert a `^' before it.
    204 	 */
    205 	if(lastword && flag['Y']){
    206 		lastword = 0;
    207 		if(d=='('){
    208 			advance();
    209 			strcpy(tok, "(");
    210 			return SUB;
    211 		}
    212 		if(wordchr(d) || d=='\'' || d=='`' || d=='$' || d=='"'){
    213 			strcpy(tok, "^");
    214 			return '^';
    215 		}
    216 	}
    217 	inquote = 0;
    218 	if(skipwhite() && !flag['Y'])
    219 		return ' ';
    220 	switch(c = advance()){
    221 	case EOF:
    222 		lastdol = 0;
    223 		strcpy(tok, "EOF");
    224 		return EOF;
    225 	case '$':
    226 		lastdol = 1;
    227 		if(nextis('#')){
    228 			strcpy(tok, "$#");
    229 			return COUNT;
    230 		}
    231 		if(nextis('"')){
    232 			strcpy(tok, "$\"");
    233 			return '"';
    234 		}
    235 		strcpy(tok, "$");
    236 		return '$';
    237 	case '&':
    238 		lastdol = 0;
    239 		if(nextis('&')){
    240 			if(flag['Y'])
    241 				skipnl();
    242 			strcpy(tok, "&&");
    243 			return ANDAND;
    244 		}
    245 		strcpy(tok, "&");
    246 		return '&';
    247 	case '|':
    248 		lastdol = 0;
    249 		if(nextis(c)){
    250 			if(flag['Y'])
    251 				skipnl();
    252 			strcpy(tok, "||");
    253 			return OROR;
    254 		}
    255 	case '<':
    256 	case '>':
    257 		lastdol = 0;
    258 		/*
    259 		 * funny redirection tokens:
    260 		 *	redir:	arrow | arrow '[' fd ']'
    261 		 *	arrow:	'<' | '<<' | '>' | '>>' | '|'
    262 		 *	fd:	digit | digit '=' | digit '=' digit
    263 		 *	digit:	'0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9'
    264 		 * some possibilities are nonsensical and get a message.
    265 		 */
    266 		*w++=c;
    267 		t = newtree();
    268 		switch(c){
    269 		case '|':
    270 			t->type = PIPE;
    271 			t->fd0 = 1;
    272 			t->fd1 = 0;
    273 			break;
    274 		case '>':
    275 			t->type = REDIR;
    276 			if(nextis(c)){
    277 				t->rtype = APPEND;
    278 				*w++=c;
    279 			}
    280 			else t->rtype = WRITE;
    281 			t->fd0 = 1;
    282 			break;
    283 		case '<':
    284 			t->type = REDIR;
    285 			if(nextis(c)){
    286 				t->rtype = HERE;
    287 				*w++=c;
    288 			} else if (nextis('>')){
    289 				t->rtype = RDWR;
    290 				*w++=c;
    291 			} else t->rtype = READ;
    292 			t->fd0 = 0;
    293 			break;
    294 		}
    295 		if(nextis('[')){
    296 			*w++='[';
    297 			c = advance();
    298 			*w++=c;
    299 			if(c<'0' || '9'<c){
    300 			RedirErr:
    301 				*w = 0;
    302 				yyerror(t->type==PIPE?"pipe syntax"
    303 						:"redirection syntax");
    304 				return EOF;
    305 			}
    306 			t->fd0 = 0;
    307 			do{
    308 				t->fd0 = t->fd0*10+c-'0';
    309 				*w++=c;
    310 				c = advance();
    311 			}while('0'<=c && c<='9');
    312 			if(c=='='){
    313 				*w++='=';
    314 				if(t->type==REDIR)
    315 					t->type = DUP;
    316 				c = advance();
    317 				if('0'<=c && c<='9'){
    318 					t->rtype = DUPFD;
    319 					t->fd1 = t->fd0;
    320 					t->fd0 = 0;
    321 					do{
    322 						t->fd0 = t->fd0*10+c-'0';
    323 						*w++=c;
    324 						c = advance();
    325 					}while('0'<=c && c<='9');
    326 				}
    327 				else{
    328 					if(t->type==PIPE)
    329 						goto RedirErr;
    330 					t->rtype = CLOSE;
    331 				}
    332 			}
    333 			if(c!=']'
    334 			|| t->type==DUP && (t->rtype==HERE || t->rtype==APPEND))
    335 				goto RedirErr;
    336 			*w++=']';
    337 		}
    338 		*w='\0';
    339 		yylval.tree = t;
    340 		if(t->type==PIPE && flag['Y'])
    341 			skipnl();
    342 		if(t->type==REDIR) {
    343 			skipwhite();
    344 			if(nextc() == '{')
    345 				t->type = REDIRW;
    346 		}
    347 		return t->type;
    348 	case '\'':
    349 		lastdol = 0;
    350 		lastword = 1;
    351 		inquote = 1;
    352 		for(;;){
    353 			c = advance();
    354 			if(c==EOF)
    355 				break;
    356 			if(c=='\''){
    357 				if(nextc()!='\'')
    358 					break;
    359 				advance();
    360 			}
    361 			w = addutf(w, c);
    362 		}
    363 		if(w!=0)
    364 			*w='\0';
    365 		t = token(tok, WORD);
    366 		t->quoted = 1;
    367 		yylval.tree = t;
    368 		return t->type;
    369 	}
    370 	if(!wordchr(c)){
    371 		lastdol = 0;
    372 		tok[0] = c;
    373 		tok[1]='\0';
    374 		return c;
    375 	}
    376 	for(;;){
    377 		/* next line should have (char)c==GLOB, but ken's compiler is broken */
    378 		if(c=='*' || c=='[' || c=='?' || c==(unsigned char)GLOB)
    379 			w = addtok(w, GLOB);
    380 		w = addutf(w, c);
    381 		c = nextc();
    382 		if(lastdol?!idchr(c):!wordchr(c)) break;
    383 		advance();
    384 	}
    385 
    386 	lastword = 1;
    387 	lastdol = 0;
    388 	if(w!=0)
    389 		*w='\0';
    390 	t = klook(tok);
    391 	if(t->type!=WORD)
    392 		lastword = 0;
    393 	t->quoted = 0;
    394 	yylval.tree = t;
    395 	return t->type;
    396 }