lex.c (9354B)
1 #include <u.h> 2 #include <libc.h> 3 #include <bio.h> 4 #include <ctype.h> 5 #include <mach.h> 6 #define Extern extern 7 #include "acid.h" 8 #include "y.tab.h" 9 10 struct keywd 11 { 12 char *name; 13 int terminal; 14 } 15 keywds[] = 16 { 17 "do", Tdo, 18 "if", Tif, 19 "then", Tthen, 20 "else", Telse, 21 "while", Twhile, 22 "loop", Tloop, 23 "head", Thead, 24 "tail", Ttail, 25 "append", Tappend, 26 "defn", Tfn, 27 "return", Tret, 28 "local", Tlocal, 29 "aggr", Tcomplex, 30 "union", Tcomplex, 31 "adt", Tcomplex, 32 "complex", Tcomplex, 33 "delete", Tdelete, 34 "whatis", Twhat, 35 "eval", Teval, 36 "builtin", Tbuiltin, 37 0, 0 38 }; 39 40 char cmap[256]; 41 42 void 43 initcmap(void) 44 { 45 cmap['0']= '\0'+1; 46 cmap['n']= '\n'+1; 47 cmap['r']= '\r'+1; 48 cmap['t']= '\t'+1; 49 cmap['b']= '\b'+1; 50 cmap['f']= '\f'+1; 51 cmap['a']= '\a'+1; 52 cmap['v']= '\v'+1; 53 cmap['\\']= '\\'+1; 54 cmap['"']= '"'+1; 55 } 56 57 void 58 kinit(void) 59 { 60 int i; 61 62 initcmap(); 63 64 for(i = 0; keywds[i].name; i++) 65 enter(keywds[i].name, keywds[i].terminal); 66 } 67 68 typedef struct IOstack IOstack; 69 struct IOstack 70 { 71 char *name; 72 int line; 73 char *text; 74 char *ip; 75 Biobuf *fin; 76 IOstack *prev; 77 }; 78 IOstack *lexio; 79 uint nlexio; 80 81 void 82 setacidfile(void) 83 { 84 char *name; 85 Lsym *l; 86 87 if(lexio) 88 name = lexio->name; 89 else 90 name = ""; 91 l = mkvar("acidfile"); 92 l->v->set = 1; 93 l->v->store.fmt = 's'; 94 l->v->type = TSTRING; 95 l->v->store.u.string = strnode(name); 96 } 97 98 void 99 pushfile(char *file) 100 { 101 Biobuf *b; 102 IOstack *io; 103 104 if(nlexio > 64) 105 error("too many includes"); 106 107 if(file) 108 b = Bopen(file, OREAD); 109 else{ 110 b = Bopen(unsharp("#d/0"), OREAD); 111 file = "<stdin>"; 112 } 113 114 if(b == 0) 115 error("pushfile: %s: %r", file); 116 117 io = malloc(sizeof(IOstack)); 118 if(io == 0) 119 fatal("no memory"); 120 io->name = strdup(file); 121 if(io->name == 0) 122 fatal("no memory"); 123 io->line = line; 124 line = 1; 125 io->text = 0; 126 io->fin = b; 127 io->prev = lexio; 128 lexio = io; 129 nlexio++; 130 setacidfile(); 131 } 132 133 void 134 pushfd(int fd) 135 { 136 pushfile("/dev/null"); 137 close(lexio->fin->fid); 138 free(lexio->name); 139 lexio->name = smprint("<fd#d>", fd); 140 lexio->fin->fid = fd; 141 } 142 143 void 144 pushstr(Node *s) 145 { 146 IOstack *io; 147 148 io = malloc(sizeof(IOstack)); 149 if(io == 0) 150 fatal("no memory"); 151 io->line = line; 152 line = 1; 153 io->name = strdup("<string>"); 154 if(io->name == 0) 155 fatal("no memory"); 156 io->line = line; 157 line = 1; 158 io->text = strdup(s->store.u.string->string); 159 if(io->text == 0) 160 fatal("no memory"); 161 io->ip = io->text; 162 io->fin = 0; 163 io->prev = lexio; 164 nlexio++; 165 lexio = io; 166 setacidfile(); 167 } 168 169 void 170 restartio(void) 171 { 172 Bflush(lexio->fin); 173 Binit(lexio->fin, 0, OREAD); 174 } 175 176 int 177 popio(void) 178 { 179 IOstack *s; 180 181 if(lexio == 0) 182 return 0; 183 184 if(lexio->prev == 0){ 185 if(lexio->fin) 186 restartio(); 187 return 0; 188 } 189 190 if(lexio->fin) 191 Bterm(lexio->fin); 192 else 193 free(lexio->text); 194 free(lexio->name); 195 line = lexio->line; 196 s = lexio; 197 lexio = s->prev; 198 free(s); 199 nlexio--; 200 setacidfile(); 201 return 1; 202 } 203 204 int 205 Zfmt(Fmt *f) 206 { 207 char buf[1024], *p; 208 IOstack *e; 209 210 e = lexio; 211 if(e) { 212 p = seprint(buf, buf+sizeof buf, "%s:%d", e->name, line); 213 while(e->prev) { 214 e = e->prev; 215 if(initialising && e->prev == 0) 216 break; 217 p = seprint(p, buf+sizeof buf, " [%s:%d]", e->name, e->line); 218 } 219 } else 220 sprint(buf, "no file:0"); 221 fmtstrcpy(f, buf); 222 return 0; 223 } 224 225 void 226 unlexc(int s) 227 { 228 if(s == '\n') 229 line--; 230 231 if(lexio->fin) 232 Bungetc(lexio->fin); 233 else 234 lexio->ip--; 235 } 236 237 int 238 lexc(void) 239 { 240 int c; 241 242 if(lexio->fin) { 243 c = Bgetc(lexio->fin); 244 if(gotint) 245 error("interrupt"); 246 return c; 247 } 248 249 c = *lexio->ip++; 250 if(c == 0) 251 return -1; 252 return c; 253 } 254 255 int 256 escchar(int c) 257 { 258 int n; 259 char buf[Strsize]; 260 261 if(c >= '0' && c <= '9') { 262 n = 1; 263 buf[0] = c; 264 for(;;) { 265 c = lexc(); 266 if(c == Eof) 267 error("%d: <eof> in escape sequence", line); 268 if(strchr("0123456789xX", c) == 0) { 269 unlexc(c); 270 break; 271 } 272 buf[n++] = c; 273 } 274 buf[n] = '\0'; 275 return strtol(buf, 0, 0); 276 } 277 278 n = cmap[(unsigned char)c]; 279 if(n == 0) 280 return c; 281 return n-1; 282 } 283 284 void 285 eatstring(void) 286 { 287 int esc, c, cnt; 288 char buf[Strsize]; 289 290 esc = 0; 291 for(cnt = 0;;) { 292 c = lexc(); 293 switch(c) { 294 case Eof: 295 error("%d: <eof> in string constant", line); 296 297 case '\n': 298 error("newline in string constant"); 299 goto done; 300 301 case '\\': 302 if(esc) 303 goto Default; 304 esc = 1; 305 break; 306 307 case '"': 308 if(esc == 0) 309 goto done; 310 311 /* Fall through */ 312 default: 313 Default: 314 if(esc) { 315 c = escchar(c); 316 esc = 0; 317 } 318 buf[cnt++] = c; 319 break; 320 } 321 if(cnt >= Strsize) 322 error("string token too long"); 323 } 324 done: 325 buf[cnt] = '\0'; 326 yylval.string = strnode(buf); 327 } 328 329 void 330 eatnl(void) 331 { 332 int c; 333 334 line++; 335 for(;;) { 336 c = lexc(); 337 if(c == Eof) 338 error("eof in comment"); 339 if(c == '\n') 340 return; 341 } 342 } 343 344 int 345 bqsymbol(void) 346 { 347 int c; 348 char *p; 349 Lsym *s; 350 351 p = symbol; 352 while((c = lexc()) != '`'){ 353 if(c == Eof) 354 error("eof in backquote"); 355 if(c == '\n') 356 error("newline in backquote"); 357 *p++ = c; 358 } 359 if(p >= symbol+sizeof symbol) 360 sysfatal("overflow in bqsymbol"); 361 *p = 0; 362 363 s = look(symbol); 364 if(s == 0) 365 s = enter(symbol, Tid); 366 yylval.sym = s; 367 return s->lexval; 368 } 369 370 int 371 yylex(void) 372 { 373 int c; 374 extern char vfmt[]; 375 376 loop: 377 Bflush(bout); 378 c = lexc(); 379 switch(c) { 380 case Eof: 381 if(gotint) { 382 gotint = 0; 383 stacked = 0; 384 Bprint(bout, "\nacid; "); 385 goto loop; 386 } 387 return Eof; 388 389 case '`': 390 return bqsymbol(); 391 392 case '"': 393 eatstring(); 394 return Tstring; 395 396 case ' ': 397 case '\t': 398 goto loop; 399 400 case '\n': 401 line++; 402 if(interactive == 0) 403 goto loop; 404 if(stacked) { 405 print("\t"); 406 goto loop; 407 } 408 nlcount++; 409 return ';'; 410 411 case '.': 412 c = lexc(); 413 unlexc(c); 414 if(isdigit(c)) 415 return numsym('.'); 416 417 return '.'; 418 419 case '(': 420 case ')': 421 case '[': 422 case ']': 423 case ';': 424 case ':': 425 case ',': 426 case '~': 427 case '?': 428 case '*': 429 case '@': 430 case '^': 431 case '%': 432 return c; 433 case '{': 434 stacked++; 435 return c; 436 case '}': 437 stacked--; 438 return c; 439 440 case '\\': 441 c = lexc(); 442 if(strchr(vfmt, c) == 0) { 443 unlexc(c); 444 return '\\'; 445 } 446 yylval.ival = c; 447 return Tfmt; 448 449 case '!': 450 c = lexc(); 451 if(c == '=') 452 return Tneq; 453 unlexc(c); 454 return '!'; 455 456 case '+': 457 c = lexc(); 458 if(c == '+') 459 return Tinc; 460 unlexc(c); 461 return '+'; 462 463 case '/': 464 c = lexc(); 465 if(c == '/') { 466 eatnl(); 467 goto loop; 468 } 469 unlexc(c); 470 return '/'; 471 472 case '\'': 473 c = lexc(); 474 if(c == '\\') 475 yylval.ival = escchar(lexc()); 476 else 477 yylval.ival = c; 478 c = lexc(); 479 if(c != '\'') { 480 error("missing '"); 481 unlexc(c); 482 } 483 return Tconst; 484 485 case '&': 486 c = lexc(); 487 if(c == '&') 488 return Tandand; 489 unlexc(c); 490 return '&'; 491 492 case '=': 493 c = lexc(); 494 if(c == '=') 495 return Teq; 496 unlexc(c); 497 return '='; 498 499 case '|': 500 c = lexc(); 501 if(c == '|') 502 return Toror; 503 unlexc(c); 504 return '|'; 505 506 case '<': 507 c = lexc(); 508 if(c == '=') 509 return Tleq; 510 if(c == '<') 511 return Tlsh; 512 unlexc(c); 513 return '<'; 514 515 case '>': 516 c = lexc(); 517 if(c == '=') 518 return Tgeq; 519 if(c == '>') 520 return Trsh; 521 unlexc(c); 522 return '>'; 523 524 case '-': 525 c = lexc(); 526 527 if(c == '>') 528 return Tindir; 529 530 if(c == '-') 531 return Tdec; 532 unlexc(c); 533 return '-'; 534 535 default: 536 return numsym(c); 537 } 538 } 539 540 int 541 numsym(char first) 542 { 543 int c, isbin, isfloat, ishex; 544 char *sel, *p; 545 Lsym *s; 546 547 symbol[0] = first; 548 p = symbol; 549 550 ishex = 0; 551 isbin = 0; 552 isfloat = 0; 553 if(first == '.') 554 isfloat = 1; 555 556 if(isdigit((uchar)*p++) || isfloat) { 557 for(;;) { 558 c = lexc(); 559 if(c < 0) 560 error("%d: <eof> eating symbols", line); 561 562 if(c == '\n') 563 line++; 564 sel = "01234567890.xb"; 565 if(ishex) 566 sel = "01234567890abcdefABCDEF"; 567 else if(isbin) 568 sel = "01"; 569 else if(isfloat) 570 sel = "01234567890eE-+"; 571 572 if(strchr(sel, c) == 0) { 573 unlexc(c); 574 break; 575 } 576 if(c == '.') 577 isfloat = 1; 578 if(!isbin && c == 'x') 579 ishex = 1; 580 if(!ishex && c == 'b') 581 isbin = 1; 582 *p++ = c; 583 } 584 *p = '\0'; 585 if(isfloat) { 586 yylval.fval = atof(symbol); 587 return Tfconst; 588 } 589 590 if(isbin) 591 yylval.ival = strtoull(symbol+2, 0, 2); 592 else 593 yylval.ival = strtoll(symbol, 0, 0); 594 return Tconst; 595 } 596 597 for(;;) { 598 c = lexc(); 599 if(c < 0) 600 error("%d <eof> eating symbols", line); 601 if(c == '\n') 602 line++; 603 /* allow :: in name */ 604 if(c == ':'){ 605 c = lexc(); 606 if(c == ':'){ 607 *p++ = ':'; 608 *p++ = ':'; 609 continue; 610 } 611 unlexc(c); 612 unlexc(':'); 613 break; 614 } 615 if(c != '_' && c != '$' && c < Runeself && !isalnum(c)) { 616 unlexc(c); 617 break; 618 } 619 *p++ = c; 620 } 621 622 *p = '\0'; 623 624 s = look(symbol); 625 if(s == 0) 626 s = enter(symbol, Tid); 627 628 yylval.sym = s; 629 return s->lexval; 630 } 631 632 Lsym* 633 enter(char *name, int t) 634 { 635 Lsym *s; 636 ulong h; 637 char *p; 638 Value *v; 639 640 h = 0; 641 for(p = name; *p; p++) 642 h = h*3 + *p; 643 h %= Hashsize; 644 645 s = gmalloc(sizeof(Lsym)); 646 memset(s, 0, sizeof(Lsym)); 647 s->name = strdup(name); 648 649 s->hash = hash[h]; 650 hash[h] = s; 651 s->lexval = t; 652 653 v = gmalloc(sizeof(Value)); 654 s->v = v; 655 656 v->store.fmt = 'X'; 657 v->type = TINT; 658 memset(v, 0, sizeof(Value)); 659 660 return s; 661 } 662 663 void 664 delsym(Lsym *s) 665 { 666 char *q; 667 ulong h; 668 Lsym *p; 669 670 h = 0; 671 for(q = s->name; *q; q++) 672 h = h*3 + *q; 673 h %= Hashsize; 674 675 if(hash[h] == s) 676 hash[h] = s->hash; 677 else{ 678 for(p=hash[h]; p && p->hash != s; p=p->hash) 679 ; 680 if(p) 681 p->hash = s->hash; 682 } 683 s->hash = nil; 684 } 685 686 Lsym* 687 look(char *name) 688 { 689 Lsym *s; 690 ulong h; 691 char *p; 692 693 h = 0; 694 for(p = name; *p; p++) 695 h = h*3 + *p; 696 h %= Hashsize; 697 698 for(s = hash[h]; s; s = s->hash) 699 if(strcmp(name, s->name) == 0) 700 return s; 701 return 0; 702 } 703 704 Lsym* 705 mkvar(char *s) 706 { 707 Lsym *l; 708 709 l = look(s); 710 if(l == 0) 711 l = enter(s, Tid); 712 return l; 713 }