plan9port

fork of plan9port with libvec, libstr and libsdb
Log | Files | Refs | README | LICENSE

rfc822.y (13421B)


      1 %{
      2 #include "common.h"
      3 #include "smtp.h"
      4 #include <ctype.h>
      5 
      6 char	*yylp;		/* next character to be lex'd */
      7 int	yydone;		/* tell yylex to give up */
      8 char	*yybuffer;	/* first parsed character */
      9 char	*yyend;		/* end of buffer to be parsed */
     10 Node	*root;
     11 Field	*firstfield;
     12 Field	*lastfield;
     13 Node	*usender;
     14 Node	*usys;
     15 Node	*udate;
     16 char	*startfield, *endfield;
     17 int	originator;
     18 int	destination;
     19 int	date;
     20 int	received;
     21 int	messageid;
     22 %}
     23 
     24 %term WORD
     25 %term DATE
     26 %term RESENT_DATE
     27 %term RETURN_PATH
     28 %term FROM
     29 %term SENDER
     30 %term REPLY_TO
     31 %term RESENT_FROM
     32 %term RESENT_SENDER
     33 %term RESENT_REPLY_TO
     34 %term SUBJECT
     35 %term TO
     36 %term CC
     37 %term BCC
     38 %term RESENT_TO
     39 %term RESENT_CC
     40 %term RESENT_BCC
     41 %term REMOTE
     42 %term PRECEDENCE
     43 %term MIMEVERSION
     44 %term CONTENTTYPE
     45 %term MESSAGEID
     46 %term RECEIVED
     47 %term MAILER
     48 %term BADTOKEN
     49 %start msg
     50 %%
     51 
     52 msg		: fields
     53 		| unixfrom '\n' fields
     54 		;
     55 fields		: '\n'
     56 			{ yydone = 1; }
     57 		| field '\n'
     58 		| field '\n' fields
     59 		;
     60 field		: dates
     61 			{ date = 1; }
     62 		| originator
     63 			{ originator = 1; }
     64 		| destination
     65 			{ destination = 1; }
     66 		| subject
     67 		| optional
     68 		| ignored
     69 		| received
     70 		| precedence
     71 		| error '\n' field
     72 		;
     73 unixfrom	: FROM route_addr unix_date_time REMOTE FROM word
     74 			{ freenode($1); freenode($4); freenode($5);
     75 			  usender = $2; udate = $3; usys = $6;
     76 			}
     77 		;
     78 originator	: REPLY_TO ':' address_list
     79 			{ newfield(link3($1, $2, $3), 1); }
     80 		| RETURN_PATH ':' route_addr
     81 			{ newfield(link3($1, $2, $3), 1); }
     82 		| FROM ':' mailbox_list
     83 			{ newfield(link3($1, $2, $3), 1); }
     84 		| SENDER ':' mailbox
     85 			{ newfield(link3($1, $2, $3), 1); }
     86 		| RESENT_REPLY_TO ':' address_list
     87 			{ newfield(link3($1, $2, $3), 1); }
     88 		| RESENT_SENDER ':' mailbox
     89 			{ newfield(link3($1, $2, $3), 1); }
     90 		| RESENT_FROM ':' mailbox
     91 			{ newfield(link3($1, $2, $3), 1); }
     92 		;
     93 dates 		: DATE ':' date_time
     94 			{ newfield(link3($1, $2, $3), 0); }
     95 		| RESENT_DATE ':' date_time
     96 			{ newfield(link3($1, $2, $3), 0); }
     97 		;
     98 destination	: TO ':'
     99 			{ newfield(link2($1, $2), 0); }
    100 		| TO ':' address_list
    101 			{ newfield(link3($1, $2, $3), 0); }
    102 		| RESENT_TO ':'
    103 			{ newfield(link2($1, $2), 0); }
    104 		| RESENT_TO ':' address_list
    105 			{ newfield(link3($1, $2, $3), 0); }
    106 		| CC ':'
    107 			{ newfield(link2($1, $2), 0); }
    108 		| CC ':' address_list
    109 			{ newfield(link3($1, $2, $3), 0); }
    110 		| RESENT_CC ':'
    111 			{ newfield(link2($1, $2), 0); }
    112 		| RESENT_CC ':' address_list
    113 			{ newfield(link3($1, $2, $3), 0); }
    114 		| BCC ':'
    115 			{ newfield(link2($1, $2), 0); }
    116 		| BCC ':' address_list
    117 			{ newfield(link3($1, $2, $3), 0); }
    118 		| RESENT_BCC ':' 
    119 			{ newfield(link2($1, $2), 0); }
    120 		| RESENT_BCC ':' address_list
    121 			{ newfield(link3($1, $2, $3), 0); }
    122 		;
    123 subject		: SUBJECT ':' things
    124 			{ newfield(link3($1, $2, $3), 0); }
    125 		| SUBJECT ':'
    126 			{ newfield(link2($1, $2), 0); }
    127 		;
    128 received	: RECEIVED ':' things
    129 			{ newfield(link3($1, $2, $3), 0); received++; }
    130 		| RECEIVED ':'
    131 			{ newfield(link2($1, $2), 0); received++; }
    132 		;
    133 precedence	: PRECEDENCE ':' things
    134 			{ newfield(link3($1, $2, $3), 0); }
    135 		| PRECEDENCE ':'
    136 			{ newfield(link2($1, $2), 0); }
    137 		;
    138 ignored		: ignoredhdr ':' things
    139 			{ newfield(link3($1, $2, $3), 0); }
    140 		| ignoredhdr ':'
    141 			{ newfield(link2($1, $2), 0); }
    142 		;
    143 ignoredhdr	: MIMEVERSION | CONTENTTYPE | MESSAGEID { messageid = 1; } | MAILER
    144 		;
    145 optional	: fieldwords ':' things
    146 			{ /* hack to allow same lex for field names and the rest */
    147 			 if(badfieldname($1)){
    148 				freenode($1);
    149 				freenode($2);
    150 				freenode($3);
    151 				return 1;
    152 			 }
    153 			 newfield(link3($1, $2, $3), 0);
    154 			}
    155 		| fieldwords ':'
    156 			{ /* hack to allow same lex for field names and the rest */
    157 			 if(badfieldname($1)){
    158 				freenode($1);
    159 				freenode($2);
    160 				return 1;
    161 			 }
    162 			 newfield(link2($1, $2), 0);
    163 			}
    164 		;
    165 address_list	: address
    166 		| address_list ',' address
    167 			{ $$ = link3($1, $2, $3); }
    168 		;
    169 address		: mailbox
    170 		| group
    171 		;
    172 group		: phrase ':' address_list ';'
    173 			{ $$ = link2($1, link3($2, $3, $4)); }
    174 		| phrase ':' ';'
    175 			{ $$ = link3($1, $2, $3); }
    176 		;
    177 mailbox_list	: mailbox
    178 		| mailbox_list ',' mailbox
    179 			{ $$ = link3($1, $2, $3); }
    180 		;
    181 mailbox		: route_addr
    182 		| phrase brak_addr
    183 			{ $$ = link2($1, $2); }
    184 		| brak_addr
    185 		;
    186 brak_addr	: '<' route_addr '>'
    187 			{ $$ = link3($1, $2, $3); }
    188 		| '<' '>'
    189 			{ $$ = nobody($2); freenode($1); }
    190 		;
    191 route_addr	: route ':' at_addr
    192 			{ $$ = address(concat($1, concat($2, $3))); }
    193 		| addr_spec
    194 		;
    195 route		: '@' domain
    196 			{ $$ = concat($1, $2); }
    197 		| route ',' '@' domain
    198 			{ $$ = concat($1, concat($2, concat($3, $4))); }
    199 		;
    200 addr_spec	: local_part
    201 			{ $$ = address($1); }
    202 		| at_addr
    203 		;
    204 at_addr		: local_part '@' domain
    205 			{ $$ = address(concat($1, concat($2, $3)));}
    206 		| at_addr '@' domain
    207 			{ $$ = address(concat($1, concat($2, $3)));}
    208 		;
    209 local_part	: word
    210 		;
    211 domain		: word
    212 		;
    213 phrase		: word
    214 		| phrase word
    215 			{ $$ = link2($1, $2); }
    216 		;
    217 things		: thing
    218 		| things thing
    219 			{ $$ = link2($1, $2); }
    220 		;
    221 thing		: word | '<' | '>' | '@' | ':' | ';' | ','
    222 		;
    223 date_time	: things
    224 		;
    225 unix_date_time	: word word word unix_time word word
    226 			{ $$ = link3($1, $3, link3($2, $6, link2($4, $5))); }
    227 		;
    228 unix_time	: word
    229 		| unix_time ':' word
    230 			{ $$ = link3($1, $2, $3); }
    231 		;
    232 word		: WORD | DATE | RESENT_DATE | RETURN_PATH | FROM | SENDER
    233 		| REPLY_TO | RESENT_FROM | RESENT_SENDER | RESENT_REPLY_TO
    234 		| TO | CC | BCC | RESENT_TO | RESENT_CC | RESENT_BCC | REMOTE | SUBJECT
    235 		| PRECEDENCE | MIMEVERSION | CONTENTTYPE | MESSAGEID | RECEIVED | MAILER
    236 		;
    237 fieldwords	: fieldword
    238 		| WORD
    239 		| fieldwords fieldword
    240 			{ $$ = link2($1, $2); }
    241 		| fieldwords word
    242 			{ $$ = link2($1, $2); }
    243 		;
    244 fieldword	: '<' | '>' | '@' | ';' | ','
    245 		;
    246 %%
    247 
    248 /*
    249  *  Initialize the parsing.  Done once for each header field.
    250  */
    251 void
    252 yyinit(char *p, int len)
    253 {
    254 	yybuffer = p;
    255 	yylp = p;
    256 	yyend = p + len;
    257 	firstfield = lastfield = 0;
    258 	received = 0;
    259 }
    260 
    261 /*
    262  *  keywords identifying header fields we care about
    263  */
    264 typedef struct Keyword	Keyword;
    265 struct Keyword {
    266 	char	*rep;
    267 	int	val;
    268 };
    269 
    270 /* field names that we need to recognize */
    271 Keyword key[] = {
    272 	{ "date", DATE },
    273 	{ "resent-date", RESENT_DATE },
    274 	{ "return_path", RETURN_PATH },
    275 	{ "from", FROM },
    276 	{ "sender", SENDER },
    277 	{ "reply-to", REPLY_TO },
    278 	{ "resent-from", RESENT_FROM },
    279 	{ "resent-sender", RESENT_SENDER },
    280 	{ "resent-reply-to", RESENT_REPLY_TO },
    281 	{ "to", TO },
    282 	{ "cc", CC },
    283 	{ "bcc", BCC },
    284 	{ "resent-to", RESENT_TO },
    285 	{ "resent-cc", RESENT_CC },
    286 	{ "resent-bcc", RESENT_BCC },
    287 	{ "remote", REMOTE },
    288 	{ "subject", SUBJECT },
    289 	{ "precedence", PRECEDENCE },
    290 	{ "mime-version", MIMEVERSION },
    291 	{ "content-type", CONTENTTYPE },
    292 	{ "message-id", MESSAGEID },
    293 	{ "received", RECEIVED },
    294 	{ "mailer", MAILER },
    295 	{ "who-the-hell-cares", WORD }
    296 };
    297 
    298 /*
    299  *  Lexical analysis for an rfc822 header field.  Continuation lines
    300  *  are handled in yywhite() when skipping over white space.
    301  *
    302  */
    303 int
    304 yylex(void)
    305 {
    306 	String *t;
    307 	int quoting;
    308 	int escaping;
    309 	char *start;
    310 	Keyword *kp;
    311 	int c, d;
    312 
    313 /*	print("lexing\n"); /**/
    314 	if(yylp >= yyend)
    315 		return 0;
    316 	if(yydone)
    317 		return 0;
    318 
    319 	quoting = escaping = 0;
    320 	start = yylp;
    321 	yylval = malloc(sizeof(Node));
    322 	yylval->white = yylval->s = 0;
    323 	yylval->next = 0;
    324 	yylval->addr = 0;
    325 	yylval->start = yylp;
    326 	for(t = 0; yylp < yyend; yylp++){
    327 		c = *yylp & 0xff;
    328 
    329 		/* dump nulls, they can't be in header */
    330 		if(c == 0)
    331 			continue;
    332 
    333 		if(escaping) {
    334 			escaping = 0;
    335 		} else if(quoting) {
    336 			switch(c){
    337 			case '\\':
    338 				escaping = 1;
    339 				break;
    340 			case '\n':
    341 				d = (*(yylp+1))&0xff;
    342 				if(d != ' ' && d != '\t'){
    343 					quoting = 0;
    344 					yylp--;
    345 					continue;
    346 				}
    347 				break;
    348 			case '"':
    349 				quoting = 0;
    350 				break;
    351 			}
    352 		} else {
    353 			switch(c){
    354 			case '\\':
    355 				escaping = 1;
    356 				break;
    357 			case '(':
    358 			case ' ':
    359 			case '\t':
    360 			case '\r':
    361 				goto out;
    362 			case '\n':
    363 				if(yylp == start){
    364 					yylp++;
    365 /*					print("lex(c %c)\n", c); /**/
    366 					yylval->end = yylp;
    367 					return yylval->c = c;
    368 				}
    369 				goto out;
    370 			case '@':
    371 			case '>':
    372 			case '<':
    373 			case ':':
    374 			case ',':
    375 			case ';':
    376 				if(yylp == start){
    377 					yylp++;
    378 					yylval->white = yywhite();
    379 /*					print("lex(c %c)\n", c); /**/
    380 					yylval->end = yylp;
    381 					return yylval->c = c;
    382 				}
    383 				goto out;
    384 			case '"':
    385 				quoting = 1;
    386 				break;
    387 			default:
    388 				break;
    389 			}
    390 		}
    391 		if(t == 0)
    392 			t = s_new();
    393 		s_putc(t, c);
    394 	}
    395 out:
    396 	yylval->white = yywhite();
    397 	if(t) {
    398 		s_terminate(t);
    399 	} else				/* message begins with white-space! */
    400 		return yylval->c = '\n';
    401 	yylval->s = t;
    402 	for(kp = key; kp->val != WORD; kp++)
    403 		if(cistrcmp(s_to_c(t), kp->rep)==0)
    404 			break;
    405 /*	print("lex(%d) %s\n", kp->val-WORD, s_to_c(t)); /**/
    406 	yylval->end = yylp;
    407 	return yylval->c = kp->val;
    408 }
    409 
    410 void
    411 yyerror(char *x)
    412 {
    413 	USED(x);
    414 
    415 	/*fprint(2, "parse err: %s\n", x);/**/
    416 }
    417 
    418 /*
    419  *  parse white space and comments
    420  */
    421 String *
    422 yywhite(void)
    423 {
    424 	String *w;
    425 	int clevel;
    426 	int c;
    427 	int escaping;
    428 
    429 	escaping = clevel = 0;
    430 	for(w = 0; yylp < yyend; yylp++){
    431 		c = *yylp & 0xff;
    432 
    433 		/* dump nulls, they can't be in header */
    434 		if(c == 0)
    435 			continue;
    436 
    437 		if(escaping){
    438 			escaping = 0;
    439 		} else if(clevel) {
    440 			switch(c){
    441 			case '\n':
    442 				/*
    443 				 *  look for multiline fields
    444 				 */
    445 				if(*(yylp+1)==' ' || *(yylp+1)=='\t')
    446 					break;
    447 				else
    448 					goto out;
    449 			case '\\':
    450 				escaping = 1;
    451 				break;
    452 			case '(':
    453 				clevel++;
    454 				break;
    455 			case ')':
    456 				clevel--;
    457 				break;
    458 			}
    459 		} else {
    460 			switch(c){
    461 			case '\\':
    462 				escaping = 1;
    463 				break;
    464 			case '(':
    465 				clevel++;
    466 				break;
    467 			case ' ':
    468 			case '\t':
    469 			case '\r':
    470 				break;
    471 			case '\n':
    472 				/*
    473 				 *  look for multiline fields
    474 				 */
    475 				if(*(yylp+1)==' ' || *(yylp+1)=='\t')
    476 					break;
    477 				else
    478 					goto out;
    479 			default:
    480 				goto out;
    481 			}
    482 		}
    483 		if(w == 0)
    484 			w = s_new();
    485 		s_putc(w, c);
    486 	}
    487 out:
    488 	if(w)
    489 		s_terminate(w);
    490 	return w;
    491 }
    492 
    493 /*
    494  *  link two parsed entries together
    495  */
    496 Node*
    497 link2(Node *p1, Node *p2)
    498 {
    499 	Node *p;
    500 
    501 	for(p = p1; p->next; p = p->next)
    502 		;
    503 	p->next = p2;
    504 	return p1;
    505 }
    506 
    507 /*
    508  *  link three parsed entries together
    509  */
    510 Node*
    511 link3(Node *p1, Node *p2, Node *p3)
    512 {
    513 	Node *p;
    514 
    515 	for(p = p2; p->next; p = p->next)
    516 		;
    517 	p->next = p3;
    518 
    519 	for(p = p1; p->next; p = p->next)
    520 		;
    521 	p->next = p2;
    522 
    523 	return p1;
    524 }
    525 
    526 /*
    527  *  make a:b, move all white space after both
    528  */
    529 Node*
    530 colon(Node *p1, Node *p2)
    531 {
    532 	if(p1->white){
    533 		if(p2->white)
    534 			s_append(p1->white, s_to_c(p2->white));
    535 	} else {
    536 		p1->white = p2->white;
    537 		p2->white = 0;
    538 	}
    539 
    540 	s_append(p1->s, ":");
    541 	if(p2->s)
    542 		s_append(p1->s, s_to_c(p2->s));
    543 
    544 	if(p1->end < p2->end)
    545 		p1->end = p2->end;
    546 	freenode(p2);
    547 	return p1;
    548 }
    549 
    550 /*
    551  *  concatenate two fields, move all white space after both
    552  */
    553 Node*
    554 concat(Node *p1, Node *p2)
    555 {
    556 	char buf[2];
    557 
    558 	if(p1->white){
    559 		if(p2->white)
    560 			s_append(p1->white, s_to_c(p2->white));
    561 	} else {
    562 		p1->white = p2->white;
    563 		p2->white = 0;
    564 	}
    565 
    566 	if(p1->s == nil){
    567 		buf[0] = p1->c;
    568 		buf[1] = 0;
    569 		p1->s = s_new();
    570 		s_append(p1->s, buf);
    571 	}
    572 
    573 	if(p2->s)
    574 		s_append(p1->s, s_to_c(p2->s));
    575 	else {
    576 		buf[0] = p2->c;
    577 		buf[1] = 0;
    578 		s_append(p1->s, buf);
    579 	}
    580 
    581 	if(p1->end < p2->end)
    582 		p1->end = p2->end;
    583 	freenode(p2);
    584 	return p1;
    585 }
    586 
    587 /*
    588  *  look for disallowed chars in the field name
    589  */
    590 int
    591 badfieldname(Node *p)
    592 {
    593 	for(; p; p = p->next){
    594 		/* field name can't contain white space */
    595 		if(p->white && p->next)
    596 			return 1;
    597 	}
    598 	return 0;
    599 }
    600 
    601 /*
    602  *  mark as an address
    603  */
    604 Node *
    605 address(Node *p)
    606 {
    607 	p->addr = 1;
    608 	return p;
    609 }
    610 
    611 /*
    612  *  case independent string compare
    613  */
    614 int
    615 cistrcmp(char *s1, char *s2)
    616 {
    617 	int c1, c2;
    618 
    619 	for(; *s1; s1++, s2++){
    620 		c1 = isupper(*s1) ? tolower(*s1) : *s1;
    621 		c2 = isupper(*s2) ? tolower(*s2) : *s2;
    622 		if (c1 != c2)
    623 			return -1;
    624 	}
    625 	return *s2;
    626 }
    627 
    628 /*
    629  *  free a node
    630  */
    631 void
    632 freenode(Node *p)
    633 {
    634 	Node *tp;
    635 
    636 	while(p){
    637 		tp = p->next;
    638 		if(p->s)
    639 			s_free(p->s);
    640 		if(p->white)
    641 			s_free(p->white);
    642 		free(p);
    643 		p = tp;
    644 	}
    645 }
    646 
    647 
    648 /*
    649  *  an anonymous user
    650  */
    651 Node*
    652 nobody(Node *p)
    653 {
    654 	if(p->s)
    655 		s_free(p->s);
    656 	p->s = s_copy("pOsTmAsTeR");
    657 	p->addr = 1;
    658 	return p;
    659 }
    660 
    661 /*
    662  *  add anything that was dropped because of a parse error
    663  */
    664 void
    665 missing(Node *p)
    666 {
    667 	Node *np;
    668 	char *start, *end;
    669 	Field *f;
    670 	String *s;
    671 
    672 	start = yybuffer;
    673 	if(lastfield != nil){
    674 		for(np = lastfield->node; np; np = np->next)
    675 			start = np->end+1;
    676 	}
    677 
    678 	end = p->start-1;
    679 
    680 	if(end <= start)
    681 		return;
    682 
    683 	if(strncmp(start, "From ", 5) == 0)
    684 		return;
    685 
    686 	np = malloc(sizeof(Node));
    687 	np->start = start;
    688 	np->end = end;
    689 	np->white = nil;
    690 	s = s_copy("BadHeader: ");
    691 	np->s = s_nappend(s, start, end-start);
    692 	np->next = nil;
    693 
    694 	f = malloc(sizeof(Field));
    695 	f->next = 0;
    696 	f->node = np;
    697 	f->source = 0;
    698 	if(firstfield)
    699 		lastfield->next = f;
    700 	else
    701 		firstfield = f;
    702 	lastfield = f;
    703 }
    704 
    705 /*
    706  *  create a new field
    707  */
    708 void
    709 newfield(Node *p, int source)
    710 {
    711 	Field *f;
    712 
    713 	missing(p);
    714 
    715 	f = malloc(sizeof(Field));
    716 	f->next = 0;
    717 	f->node = p;
    718 	f->source = source;
    719 	if(firstfield)
    720 		lastfield->next = f;
    721 	else
    722 		firstfield = f;
    723 	lastfield = f;
    724 	endfield = startfield;
    725 	startfield = yylp;
    726 }
    727 
    728 /*
    729  *  fee a list of fields
    730  */
    731 void
    732 freefield(Field *f)
    733 {
    734 	Field *tf;
    735 
    736 	while(f){
    737 		tf = f->next;
    738 		freenode(f->node);
    739 		free(f);
    740 		f = tf;
    741 	}
    742 }
    743 
    744 /*
    745  *  add some white space to a node
    746  */
    747 Node*
    748 whiten(Node *p)
    749 {
    750 	Node *tp;
    751 
    752 	for(tp = p; tp->next; tp = tp->next)
    753 		;
    754 	if(tp->white == 0)
    755 		tp->white = s_copy(" ");
    756 	return p;
    757 }
    758 
    759 void
    760 yycleanup(void)
    761 {
    762 	Field *f, *fnext;
    763 	Node *np, *next;
    764 
    765 	for(f = firstfield; f; f = fnext){
    766 		for(np = f->node; np; np = next){
    767 			if(np->s)
    768 				s_free(np->s);
    769 			if(np->white)
    770 				s_free(np->white);
    771 			next = np->next;
    772 			free(np);
    773 		}
    774 		fnext = f->next;
    775 		free(f);
    776 	}
    777 	firstfield = lastfield = 0;
    778 }