lex.c (6568B)
1 #include "rc.h" 2 #include "exec.h" 3 #include "io.h" 4 #include "getflags.h" 5 #include "fns.h" 6 int getnext(void); 7 8 int 9 wordchr(int c) 10 { 11 return !strchr("\n \t#;&|^$=`'{}()<>", c) && c!=EOF; 12 } 13 14 int 15 idchr(int c) 16 { 17 /* 18 * Formerly: 19 * return 'a'<=c && c<='z' || 'A'<=c && c<='Z' || '0'<=c && c<='9' 20 * || c=='_' || c=='*'; 21 */ 22 return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c); 23 } 24 int future = EOF; 25 int doprompt = 1; 26 int inquote; 27 int incomm; 28 /* 29 * Look ahead in the input stream 30 */ 31 32 int 33 nextc(void) 34 { 35 if(future==EOF) 36 future = getnext(); 37 return future; 38 } 39 /* 40 * Consume the lookahead character. 41 */ 42 43 int 44 advance(void) 45 { 46 int c = nextc(); 47 lastc = future; 48 future = EOF; 49 return c; 50 } 51 /* 52 * read a character from the input stream 53 */ 54 55 int 56 getnext(void) 57 { 58 int c; 59 static int peekc = EOF; 60 if(peekc!=EOF){ 61 c = peekc; 62 peekc = EOF; 63 return c; 64 } 65 if(runq->eof) 66 return EOF; 67 if(doprompt) 68 pprompt(); 69 c = rchr(runq->cmdfd); 70 if(!inquote && c=='\\'){ 71 c = rchr(runq->cmdfd); 72 if(c=='\n' && !incomm){ /* don't continue a comment */ 73 doprompt = 1; 74 c=' '; 75 } 76 else{ 77 peekc = c; 78 c='\\'; 79 } 80 } 81 doprompt = doprompt || c=='\n' || c==EOF; 82 if(c==EOF) 83 runq->eof++; 84 else if(flag['V'] || ndot>=2 && flag['v']) pchr(err, c); 85 return c; 86 } 87 88 void 89 pprompt(void) 90 { 91 var *prompt; 92 if(runq->iflag){ 93 pstr(err, promptstr); 94 flush(err); 95 prompt = vlook("prompt"); 96 if(prompt->val && prompt->val->next) 97 promptstr = prompt->val->next->word; 98 else 99 promptstr="\t"; 100 } 101 runq->lineno++; 102 doprompt = 0; 103 } 104 105 int 106 skipwhite(void) 107 { 108 int c, skipped; 109 skipped = 0; 110 for(;;){ 111 c = nextc(); 112 /* Why did this used to be if(!inquote && c=='#') ?? */ 113 if(c=='#'){ 114 incomm = 1; 115 skipped = 1; 116 for(;;){ 117 c = nextc(); 118 if(c=='\n' || c==EOF) { 119 incomm = 0; 120 break; 121 } 122 advance(); 123 } 124 } 125 if(c==' ' || c=='\t') { 126 skipped = 1; 127 advance(); 128 } 129 else 130 return skipped; 131 } 132 } 133 134 void 135 skipnl(void) 136 { 137 int c; 138 for(;;){ 139 skipwhite(); 140 c = nextc(); 141 if(c!='\n') 142 return; 143 advance(); 144 } 145 } 146 147 int 148 nextis(int c) 149 { 150 if(nextc()==c){ 151 advance(); 152 return 1; 153 } 154 return 0; 155 } 156 157 char* 158 addtok(char *p, int val) 159 { 160 if(p==0) 161 return 0; 162 if(p==&tok[NTOK-1]){ 163 *p = 0; 164 yyerror("token buffer too short"); 165 return 0; 166 } 167 *p++=val; 168 return p; 169 } 170 171 char* 172 addutf(char *p, int c) 173 { 174 p = addtok(p, c); 175 if(twobyte(c)) /* 2-byte escape */ 176 return addtok(p, advance()); 177 if(threebyte(c)){ /* 3-byte escape */ 178 p = addtok(p, advance()); 179 return addtok(p, advance()); 180 } 181 if(fourbyte(c)){ /* 4-byte escape */ 182 p = addtok(p, advance()); 183 p = addtok(p, advance()); 184 return addtok(p, advance()); 185 } 186 return p; 187 } 188 int lastdol; /* was the last token read '$' or '$#' or '"'? */ 189 int lastword; /* was the last token read a word or compound word terminator? */ 190 191 int 192 yylex(void) 193 { 194 int c, d = nextc(); 195 char *w = tok; 196 tree *t; 197 yylval.tree = 0; 198 /* 199 * Embarassing sneakiness: if the last token read was a quoted or unquoted 200 * WORD then we alter the meaning of what follows. If the next character 201 * is `(', we return SUB (a subscript paren) and consume the `('. Otherwise, 202 * if the next character is the first character of a simple or compound word, 203 * we insert a `^' before it. 204 */ 205 if(lastword && flag['Y']){ 206 lastword = 0; 207 if(d=='('){ 208 advance(); 209 strcpy(tok, "("); 210 return SUB; 211 } 212 if(wordchr(d) || d=='\'' || d=='`' || d=='$' || d=='"'){ 213 strcpy(tok, "^"); 214 return '^'; 215 } 216 } 217 inquote = 0; 218 if(skipwhite() && !flag['Y']) 219 return ' '; 220 switch(c = advance()){ 221 case EOF: 222 lastdol = 0; 223 strcpy(tok, "EOF"); 224 return EOF; 225 case '$': 226 lastdol = 1; 227 if(nextis('#')){ 228 strcpy(tok, "$#"); 229 return COUNT; 230 } 231 if(nextis('"')){ 232 strcpy(tok, "$\""); 233 return '"'; 234 } 235 strcpy(tok, "$"); 236 return '$'; 237 case '&': 238 lastdol = 0; 239 if(nextis('&')){ 240 if(flag['Y']) 241 skipnl(); 242 strcpy(tok, "&&"); 243 return ANDAND; 244 } 245 strcpy(tok, "&"); 246 return '&'; 247 case '|': 248 lastdol = 0; 249 if(nextis(c)){ 250 if(flag['Y']) 251 skipnl(); 252 strcpy(tok, "||"); 253 return OROR; 254 } 255 case '<': 256 case '>': 257 lastdol = 0; 258 /* 259 * funny redirection tokens: 260 * redir: arrow | arrow '[' fd ']' 261 * arrow: '<' | '<<' | '>' | '>>' | '|' 262 * fd: digit | digit '=' | digit '=' digit 263 * digit: '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9' 264 * some possibilities are nonsensical and get a message. 265 */ 266 *w++=c; 267 t = newtree(); 268 switch(c){ 269 case '|': 270 t->type = PIPE; 271 t->fd0 = 1; 272 t->fd1 = 0; 273 break; 274 case '>': 275 t->type = REDIR; 276 if(nextis(c)){ 277 t->rtype = APPEND; 278 *w++=c; 279 } 280 else t->rtype = WRITE; 281 t->fd0 = 1; 282 break; 283 case '<': 284 t->type = REDIR; 285 if(nextis(c)){ 286 t->rtype = HERE; 287 *w++=c; 288 } else if (nextis('>')){ 289 t->rtype = RDWR; 290 *w++=c; 291 } else t->rtype = READ; 292 t->fd0 = 0; 293 break; 294 } 295 if(nextis('[')){ 296 *w++='['; 297 c = advance(); 298 *w++=c; 299 if(c<'0' || '9'<c){ 300 RedirErr: 301 *w = 0; 302 yyerror(t->type==PIPE?"pipe syntax" 303 :"redirection syntax"); 304 return EOF; 305 } 306 t->fd0 = 0; 307 do{ 308 t->fd0 = t->fd0*10+c-'0'; 309 *w++=c; 310 c = advance(); 311 }while('0'<=c && c<='9'); 312 if(c=='='){ 313 *w++='='; 314 if(t->type==REDIR) 315 t->type = DUP; 316 c = advance(); 317 if('0'<=c && c<='9'){ 318 t->rtype = DUPFD; 319 t->fd1 = t->fd0; 320 t->fd0 = 0; 321 do{ 322 t->fd0 = t->fd0*10+c-'0'; 323 *w++=c; 324 c = advance(); 325 }while('0'<=c && c<='9'); 326 } 327 else{ 328 if(t->type==PIPE) 329 goto RedirErr; 330 t->rtype = CLOSE; 331 } 332 } 333 if(c!=']' 334 || t->type==DUP && (t->rtype==HERE || t->rtype==APPEND)) 335 goto RedirErr; 336 *w++=']'; 337 } 338 *w='\0'; 339 yylval.tree = t; 340 if(t->type==PIPE && flag['Y']) 341 skipnl(); 342 if(t->type==REDIR) { 343 skipwhite(); 344 if(nextc() == '{') 345 t->type = REDIRW; 346 } 347 return t->type; 348 case '\'': 349 lastdol = 0; 350 lastword = 1; 351 inquote = 1; 352 for(;;){ 353 c = advance(); 354 if(c==EOF) 355 break; 356 if(c=='\''){ 357 if(nextc()!='\'') 358 break; 359 advance(); 360 } 361 w = addutf(w, c); 362 } 363 if(w!=0) 364 *w='\0'; 365 t = token(tok, WORD); 366 t->quoted = 1; 367 yylval.tree = t; 368 return t->type; 369 } 370 if(!wordchr(c)){ 371 lastdol = 0; 372 tok[0] = c; 373 tok[1]='\0'; 374 return c; 375 } 376 for(;;){ 377 /* next line should have (char)c==GLOB, but ken's compiler is broken */ 378 if(c=='*' || c=='[' || c=='?' || c==(unsigned char)GLOB) 379 w = addtok(w, GLOB); 380 w = addutf(w, c); 381 c = nextc(); 382 if(lastdol?!idchr(c):!wordchr(c)) break; 383 advance(); 384 } 385 386 lastword = 1; 387 lastdol = 0; 388 if(w!=0) 389 *w='\0'; 390 t = klook(tok); 391 if(t->type!=WORD) 392 lastword = 0; 393 t->quoted = 0; 394 yylval.tree = t; 395 return t->type; 396 }