plan9port

fork of plan9port with libvec, libstr and libsdb
Log | Files | Refs | README | LICENSE

lex.c (9354B)


      1 #include <u.h>
      2 #include <libc.h>
      3 #include <bio.h>
      4 #include <ctype.h>
      5 #include <mach.h>
      6 #define Extern extern
      7 #include "acid.h"
      8 #include "y.tab.h"
      9 
     10 struct keywd
     11 {
     12 	char	*name;
     13 	int	terminal;
     14 }
     15 keywds[] =
     16 {
     17 	"do",		Tdo,
     18 	"if",		Tif,
     19 	"then",		Tthen,
     20 	"else",		Telse,
     21 	"while",	Twhile,
     22 	"loop",		Tloop,
     23 	"head",		Thead,
     24 	"tail",		Ttail,
     25 	"append",	Tappend,
     26 	"defn",		Tfn,
     27 	"return",	Tret,
     28 	"local",	Tlocal,
     29 	"aggr",		Tcomplex,
     30 	"union",	Tcomplex,
     31 	"adt",		Tcomplex,
     32 	"complex",	Tcomplex,
     33 	"delete",	Tdelete,
     34 	"whatis",	Twhat,
     35 	"eval",		Teval,
     36 	"builtin",	Tbuiltin,
     37 	0,		0
     38 };
     39 
     40 char cmap[256];
     41 
     42 void
     43 initcmap(void)
     44 {
     45 	cmap['0']=	'\0'+1;
     46 	cmap['n']=	'\n'+1;
     47 	cmap['r']=	'\r'+1;
     48 	cmap['t']=	'\t'+1;
     49 	cmap['b']=	'\b'+1;
     50 	cmap['f']=	'\f'+1;
     51 	cmap['a']=	'\a'+1;
     52 	cmap['v']=	'\v'+1;
     53 	cmap['\\']=	'\\'+1;
     54 	cmap['"']=	'"'+1;
     55 }
     56 
     57 void
     58 kinit(void)
     59 {
     60 	int i;
     61 
     62 	initcmap();
     63 
     64 	for(i = 0; keywds[i].name; i++)
     65 		enter(keywds[i].name, keywds[i].terminal);
     66 }
     67 
     68 typedef struct IOstack IOstack;
     69 struct IOstack
     70 {
     71 	char	*name;
     72 	int	line;
     73 	char	*text;
     74 	char	*ip;
     75 	Biobuf	*fin;
     76 	IOstack	*prev;
     77 };
     78 IOstack *lexio;
     79 uint nlexio;
     80 
     81 void
     82 setacidfile(void)
     83 {
     84 	char *name;
     85 	Lsym *l;
     86 
     87 	if(lexio)
     88 		name = lexio->name;
     89 	else
     90 		name = "";
     91 	l = mkvar("acidfile");
     92 	l->v->set = 1;
     93 	l->v->store.fmt = 's';
     94 	l->v->type = TSTRING;
     95 	l->v->store.u.string = strnode(name);
     96 }
     97 
     98 void
     99 pushfile(char *file)
    100 {
    101 	Biobuf *b;
    102 	IOstack *io;
    103 
    104 	if(nlexio > 64)
    105 		error("too many includes");
    106 
    107 	if(file)
    108 		b = Bopen(file, OREAD);
    109 	else{
    110 		b = Bopen(unsharp("#d/0"), OREAD);
    111 		file = "<stdin>";
    112 	}
    113 
    114 	if(b == 0)
    115 		error("pushfile: %s: %r", file);
    116 
    117 	io = malloc(sizeof(IOstack));
    118 	if(io == 0)
    119 		fatal("no memory");
    120 	io->name = strdup(file);
    121 	if(io->name == 0)
    122 		fatal("no memory");
    123 	io->line = line;
    124 	line = 1;
    125 	io->text = 0;
    126 	io->fin = b;
    127 	io->prev = lexio;
    128 	lexio = io;
    129 	nlexio++;
    130 	setacidfile();
    131 }
    132 
    133 void
    134 pushfd(int fd)
    135 {
    136 	pushfile("/dev/null");
    137 	close(lexio->fin->fid);
    138 	free(lexio->name);
    139 	lexio->name = smprint("<fd#d>", fd);
    140 	lexio->fin->fid = fd;
    141 }
    142 
    143 void
    144 pushstr(Node *s)
    145 {
    146 	IOstack *io;
    147 
    148 	io = malloc(sizeof(IOstack));
    149 	if(io == 0)
    150 		fatal("no memory");
    151 	io->line = line;
    152 	line = 1;
    153 	io->name = strdup("<string>");
    154 	if(io->name == 0)
    155 		fatal("no memory");
    156 	io->line = line;
    157 	line = 1;
    158 	io->text = strdup(s->store.u.string->string);
    159 	if(io->text == 0)
    160 		fatal("no memory");
    161 	io->ip = io->text;
    162 	io->fin = 0;
    163 	io->prev = lexio;
    164 	nlexio++;
    165 	lexio = io;
    166 	setacidfile();
    167 }
    168 
    169 void
    170 restartio(void)
    171 {
    172 	Bflush(lexio->fin);
    173 	Binit(lexio->fin, 0, OREAD);
    174 }
    175 
    176 int
    177 popio(void)
    178 {
    179 	IOstack *s;
    180 
    181 	if(lexio == 0)
    182 		return 0;
    183 
    184 	if(lexio->prev == 0){
    185 		if(lexio->fin)
    186 			restartio();
    187 		return 0;
    188 	}
    189 
    190 	if(lexio->fin)
    191 		Bterm(lexio->fin);
    192 	else
    193 		free(lexio->text);
    194 	free(lexio->name);
    195 	line = lexio->line;
    196 	s = lexio;
    197 	lexio = s->prev;
    198 	free(s);
    199 	nlexio--;
    200 	setacidfile();
    201 	return 1;
    202 }
    203 
    204 int
    205 Zfmt(Fmt *f)
    206 {
    207 	char buf[1024], *p;
    208 	IOstack *e;
    209 
    210 	e = lexio;
    211 	if(e) {
    212 		p = seprint(buf, buf+sizeof buf, "%s:%d", e->name, line);
    213 		while(e->prev) {
    214 			e = e->prev;
    215 			if(initialising && e->prev == 0)
    216 				break;
    217 			p = seprint(p, buf+sizeof buf, " [%s:%d]", e->name, e->line);
    218 		}
    219 	} else
    220 		sprint(buf, "no file:0");
    221 	fmtstrcpy(f, buf);
    222 	return 0;
    223 }
    224 
    225 void
    226 unlexc(int s)
    227 {
    228 	if(s == '\n')
    229 		line--;
    230 
    231 	if(lexio->fin)
    232 		Bungetc(lexio->fin);
    233 	else
    234 		lexio->ip--;
    235 }
    236 
    237 int
    238 lexc(void)
    239 {
    240 	int c;
    241 
    242 	if(lexio->fin) {
    243 		c = Bgetc(lexio->fin);
    244 		if(gotint)
    245 			error("interrupt");
    246 		return c;
    247 	}
    248 
    249 	c = *lexio->ip++;
    250 	if(c == 0)
    251 		return -1;
    252 	return c;
    253 }
    254 
    255 int
    256 escchar(int c)
    257 {
    258 	int n;
    259 	char buf[Strsize];
    260 
    261 	if(c >= '0' && c <= '9') {
    262 		n = 1;
    263 		buf[0] = c;
    264 		for(;;) {
    265 			c = lexc();
    266 			if(c == Eof)
    267 				error("%d: <eof> in escape sequence", line);
    268 			if(strchr("0123456789xX", c) == 0) {
    269 				unlexc(c);
    270 				break;
    271 			}
    272 			buf[n++] = c;
    273 		}
    274 		buf[n] = '\0';
    275 		return strtol(buf, 0, 0);
    276 	}
    277 
    278 	n = cmap[(unsigned char)c];
    279 	if(n == 0)
    280 		return c;
    281 	return n-1;
    282 }
    283 
    284 void
    285 eatstring(void)
    286 {
    287 	int esc, c, cnt;
    288 	char buf[Strsize];
    289 
    290 	esc = 0;
    291 	for(cnt = 0;;) {
    292 		c = lexc();
    293 		switch(c) {
    294 		case Eof:
    295 			error("%d: <eof> in string constant", line);
    296 
    297 		case '\n':
    298 			error("newline in string constant");
    299 			goto done;
    300 
    301 		case '\\':
    302 			if(esc)
    303 				goto Default;
    304 			esc = 1;
    305 			break;
    306 
    307 		case '"':
    308 			if(esc == 0)
    309 				goto done;
    310 
    311 			/* Fall through */
    312 		default:
    313 		Default:
    314 			if(esc) {
    315 				c = escchar(c);
    316 				esc = 0;
    317 			}
    318 			buf[cnt++] = c;
    319 			break;
    320 		}
    321 		if(cnt >= Strsize)
    322 			error("string token too long");
    323 	}
    324 done:
    325 	buf[cnt] = '\0';
    326 	yylval.string = strnode(buf);
    327 }
    328 
    329 void
    330 eatnl(void)
    331 {
    332 	int c;
    333 
    334 	line++;
    335 	for(;;) {
    336 		c = lexc();
    337 		if(c == Eof)
    338 			error("eof in comment");
    339 		if(c == '\n')
    340 			return;
    341 	}
    342 }
    343 
    344 int
    345 bqsymbol(void)
    346 {
    347 	int c;
    348 	char *p;
    349 	Lsym *s;
    350 
    351 	p = symbol;
    352 	while((c = lexc()) != '`'){
    353 		if(c == Eof)
    354 			error("eof in backquote");
    355 		if(c == '\n')
    356 			error("newline in backquote");
    357 		*p++ = c;
    358 	}
    359 	if(p >= symbol+sizeof symbol)
    360 		sysfatal("overflow in bqsymbol");
    361 	*p = 0;
    362 
    363 	s = look(symbol);
    364 	if(s == 0)
    365 		s = enter(symbol, Tid);
    366 	yylval.sym = s;
    367 	return s->lexval;
    368 }
    369 
    370 int
    371 yylex(void)
    372 {
    373 	int c;
    374 	extern char vfmt[];
    375 
    376 loop:
    377 	Bflush(bout);
    378 	c = lexc();
    379 	switch(c) {
    380 	case Eof:
    381 		if(gotint) {
    382 			gotint = 0;
    383 			stacked = 0;
    384 			Bprint(bout, "\nacid; ");
    385 			goto loop;
    386 		}
    387 		return Eof;
    388 
    389 	case '`':
    390 		return bqsymbol();
    391 
    392 	case '"':
    393 		eatstring();
    394 		return Tstring;
    395 
    396 	case ' ':
    397 	case '\t':
    398 		goto loop;
    399 
    400 	case '\n':
    401 		line++;
    402 		if(interactive == 0)
    403 			goto loop;
    404 		if(stacked) {
    405 			print("\t");
    406 			goto loop;
    407 		}
    408 		nlcount++;
    409 		return ';';
    410 
    411 	case '.':
    412 		c = lexc();
    413 		unlexc(c);
    414 		if(isdigit(c))
    415 			return numsym('.');
    416 
    417 		return '.';
    418 
    419 	case '(':
    420 	case ')':
    421 	case '[':
    422 	case ']':
    423 	case ';':
    424 	case ':':
    425 	case ',':
    426 	case '~':
    427 	case '?':
    428 	case '*':
    429 	case '@':
    430 	case '^':
    431 	case '%':
    432 		return c;
    433 	case '{':
    434 		stacked++;
    435 		return c;
    436 	case '}':
    437 		stacked--;
    438 		return c;
    439 
    440 	case '\\':
    441 		c = lexc();
    442 		if(strchr(vfmt, c) == 0) {
    443 			unlexc(c);
    444 			return '\\';
    445 		}
    446 		yylval.ival = c;
    447 		return Tfmt;
    448 
    449 	case '!':
    450 		c = lexc();
    451 		if(c == '=')
    452 			return Tneq;
    453 		unlexc(c);
    454 		return '!';
    455 
    456 	case '+':
    457 		c = lexc();
    458 		if(c == '+')
    459 			return Tinc;
    460 		unlexc(c);
    461 		return '+';
    462 
    463 	case '/':
    464 		c = lexc();
    465 		if(c == '/') {
    466 			eatnl();
    467 			goto loop;
    468 		}
    469 		unlexc(c);
    470 		return '/';
    471 
    472 	case '\'':
    473 		c = lexc();
    474 		if(c == '\\')
    475 			yylval.ival = escchar(lexc());
    476 		else
    477 			yylval.ival = c;
    478 		c = lexc();
    479 		if(c != '\'') {
    480 			error("missing '");
    481 			unlexc(c);
    482 		}
    483 		return Tconst;
    484 
    485 	case '&':
    486 		c = lexc();
    487 		if(c == '&')
    488 			return Tandand;
    489 		unlexc(c);
    490 		return '&';
    491 
    492 	case '=':
    493 		c = lexc();
    494 		if(c == '=')
    495 			return Teq;
    496 		unlexc(c);
    497 		return '=';
    498 
    499 	case '|':
    500 		c = lexc();
    501 		if(c == '|')
    502 			return Toror;
    503 		unlexc(c);
    504 		return '|';
    505 
    506 	case '<':
    507 		c = lexc();
    508 		if(c == '=')
    509 			return Tleq;
    510 		if(c == '<')
    511 			return Tlsh;
    512 		unlexc(c);
    513 		return '<';
    514 
    515 	case '>':
    516 		c = lexc();
    517 		if(c == '=')
    518 			return Tgeq;
    519 		if(c == '>')
    520 			return Trsh;
    521 		unlexc(c);
    522 		return '>';
    523 
    524 	case '-':
    525 		c = lexc();
    526 
    527 		if(c == '>')
    528 			return Tindir;
    529 
    530 		if(c == '-')
    531 			return Tdec;
    532 		unlexc(c);
    533 		return '-';
    534 
    535 	default:
    536 		return numsym(c);
    537 	}
    538 }
    539 
    540 int
    541 numsym(char first)
    542 {
    543 	int c, isbin, isfloat, ishex;
    544 	char *sel, *p;
    545 	Lsym *s;
    546 
    547 	symbol[0] = first;
    548 	p = symbol;
    549 
    550 	ishex = 0;
    551 	isbin = 0;
    552 	isfloat = 0;
    553 	if(first == '.')
    554 		isfloat = 1;
    555 
    556 	if(isdigit((uchar)*p++) || isfloat) {
    557 		for(;;) {
    558 			c = lexc();
    559 			if(c < 0)
    560 				error("%d: <eof> eating symbols", line);
    561 
    562 			if(c == '\n')
    563 				line++;
    564 			sel = "01234567890.xb";
    565 			if(ishex)
    566 				sel = "01234567890abcdefABCDEF";
    567 			else if(isbin)
    568 				sel = "01";
    569 			else if(isfloat)
    570 				sel = "01234567890eE-+";
    571 
    572 			if(strchr(sel, c) == 0) {
    573 				unlexc(c);
    574 				break;
    575 			}
    576 			if(c == '.')
    577 				isfloat = 1;
    578 			if(!isbin && c == 'x')
    579 				ishex = 1;
    580 			if(!ishex && c == 'b')
    581 				isbin = 1;
    582 			*p++ = c;
    583 		}
    584 		*p = '\0';
    585 		if(isfloat) {
    586 			yylval.fval = atof(symbol);
    587 			return Tfconst;
    588 		}
    589 
    590 		if(isbin)
    591 			yylval.ival = strtoull(symbol+2, 0, 2);
    592 		else
    593 			yylval.ival = strtoll(symbol, 0, 0);
    594 		return Tconst;
    595 	}
    596 
    597 	for(;;) {
    598 		c = lexc();
    599 		if(c < 0)
    600 			error("%d <eof> eating symbols", line);
    601 		if(c == '\n')
    602 			line++;
    603 		/* allow :: in name */
    604 		if(c == ':'){
    605 			c = lexc();
    606 			if(c == ':'){
    607 				*p++ = ':';
    608 				*p++ = ':';
    609 				continue;
    610 			}
    611 			unlexc(c);
    612 			unlexc(':');
    613 			break;
    614 		}
    615 		if(c != '_' && c != '$' && c < Runeself && !isalnum(c)) {
    616 			unlexc(c);
    617 			break;
    618 		}
    619 		*p++ = c;
    620 	}
    621 
    622 	*p = '\0';
    623 
    624 	s = look(symbol);
    625 	if(s == 0)
    626 		s = enter(symbol, Tid);
    627 
    628 	yylval.sym = s;
    629 	return s->lexval;
    630 }
    631 
    632 Lsym*
    633 enter(char *name, int t)
    634 {
    635 	Lsym *s;
    636 	ulong h;
    637 	char *p;
    638 	Value *v;
    639 
    640 	h = 0;
    641 	for(p = name; *p; p++)
    642 		h = h*3 + *p;
    643 	h %= Hashsize;
    644 
    645 	s = gmalloc(sizeof(Lsym));
    646 	memset(s, 0, sizeof(Lsym));
    647 	s->name = strdup(name);
    648 
    649 	s->hash = hash[h];
    650 	hash[h] = s;
    651 	s->lexval = t;
    652 
    653 	v = gmalloc(sizeof(Value));
    654 	s->v = v;
    655 
    656 	v->store.fmt = 'X';
    657 	v->type = TINT;
    658 	memset(v, 0, sizeof(Value));
    659 
    660 	return s;
    661 }
    662 
    663 void
    664 delsym(Lsym *s)
    665 {
    666 	char *q;
    667 	ulong h;
    668 	Lsym *p;
    669 
    670 	h = 0;
    671 	for(q = s->name; *q; q++)
    672 		h = h*3 + *q;
    673 	h %= Hashsize;
    674 
    675 	if(hash[h] == s)
    676 		hash[h] = s->hash;
    677 	else{
    678 		for(p=hash[h]; p && p->hash != s; p=p->hash)
    679 			;
    680 		if(p)
    681 			p->hash = s->hash;
    682 	}
    683 	s->hash = nil;
    684 }
    685 
    686 Lsym*
    687 look(char *name)
    688 {
    689 	Lsym *s;
    690 	ulong h;
    691 	char *p;
    692 
    693 	h = 0;
    694 	for(p = name; *p; p++)
    695 		h = h*3 + *p;
    696 	h %= Hashsize;
    697 
    698 	for(s = hash[h]; s; s = s->hash)
    699 		if(strcmp(name, s->name) == 0)
    700 			return s;
    701 	return 0;
    702 }
    703 
    704 Lsym*
    705 mkvar(char *s)
    706 {
    707 	Lsym *l;
    708 
    709 	l = look(s);
    710 	if(l == 0)
    711 		l = enter(s, Tid);
    712 	return l;
    713 }