rfc822.y (13421B)
1 %{ 2 #include "common.h" 3 #include "smtp.h" 4 #include <ctype.h> 5 6 char *yylp; /* next character to be lex'd */ 7 int yydone; /* tell yylex to give up */ 8 char *yybuffer; /* first parsed character */ 9 char *yyend; /* end of buffer to be parsed */ 10 Node *root; 11 Field *firstfield; 12 Field *lastfield; 13 Node *usender; 14 Node *usys; 15 Node *udate; 16 char *startfield, *endfield; 17 int originator; 18 int destination; 19 int date; 20 int received; 21 int messageid; 22 %} 23 24 %term WORD 25 %term DATE 26 %term RESENT_DATE 27 %term RETURN_PATH 28 %term FROM 29 %term SENDER 30 %term REPLY_TO 31 %term RESENT_FROM 32 %term RESENT_SENDER 33 %term RESENT_REPLY_TO 34 %term SUBJECT 35 %term TO 36 %term CC 37 %term BCC 38 %term RESENT_TO 39 %term RESENT_CC 40 %term RESENT_BCC 41 %term REMOTE 42 %term PRECEDENCE 43 %term MIMEVERSION 44 %term CONTENTTYPE 45 %term MESSAGEID 46 %term RECEIVED 47 %term MAILER 48 %term BADTOKEN 49 %start msg 50 %% 51 52 msg : fields 53 | unixfrom '\n' fields 54 ; 55 fields : '\n' 56 { yydone = 1; } 57 | field '\n' 58 | field '\n' fields 59 ; 60 field : dates 61 { date = 1; } 62 | originator 63 { originator = 1; } 64 | destination 65 { destination = 1; } 66 | subject 67 | optional 68 | ignored 69 | received 70 | precedence 71 | error '\n' field 72 ; 73 unixfrom : FROM route_addr unix_date_time REMOTE FROM word 74 { freenode($1); freenode($4); freenode($5); 75 usender = $2; udate = $3; usys = $6; 76 } 77 ; 78 originator : REPLY_TO ':' address_list 79 { newfield(link3($1, $2, $3), 1); } 80 | RETURN_PATH ':' route_addr 81 { newfield(link3($1, $2, $3), 1); } 82 | FROM ':' mailbox_list 83 { newfield(link3($1, $2, $3), 1); } 84 | SENDER ':' mailbox 85 { newfield(link3($1, $2, $3), 1); } 86 | RESENT_REPLY_TO ':' address_list 87 { newfield(link3($1, $2, $3), 1); } 88 | RESENT_SENDER ':' mailbox 89 { newfield(link3($1, $2, $3), 1); } 90 | RESENT_FROM ':' mailbox 91 { newfield(link3($1, $2, $3), 1); } 92 ; 93 dates : DATE ':' date_time 94 { newfield(link3($1, $2, $3), 0); } 95 | RESENT_DATE ':' date_time 96 { newfield(link3($1, $2, $3), 0); } 97 ; 98 destination : TO ':' 99 { newfield(link2($1, $2), 0); } 100 | TO ':' address_list 101 { newfield(link3($1, $2, $3), 0); } 102 | RESENT_TO ':' 103 { newfield(link2($1, $2), 0); } 104 | RESENT_TO ':' address_list 105 { newfield(link3($1, $2, $3), 0); } 106 | CC ':' 107 { newfield(link2($1, $2), 0); } 108 | CC ':' address_list 109 { newfield(link3($1, $2, $3), 0); } 110 | RESENT_CC ':' 111 { newfield(link2($1, $2), 0); } 112 | RESENT_CC ':' address_list 113 { newfield(link3($1, $2, $3), 0); } 114 | BCC ':' 115 { newfield(link2($1, $2), 0); } 116 | BCC ':' address_list 117 { newfield(link3($1, $2, $3), 0); } 118 | RESENT_BCC ':' 119 { newfield(link2($1, $2), 0); } 120 | RESENT_BCC ':' address_list 121 { newfield(link3($1, $2, $3), 0); } 122 ; 123 subject : SUBJECT ':' things 124 { newfield(link3($1, $2, $3), 0); } 125 | SUBJECT ':' 126 { newfield(link2($1, $2), 0); } 127 ; 128 received : RECEIVED ':' things 129 { newfield(link3($1, $2, $3), 0); received++; } 130 | RECEIVED ':' 131 { newfield(link2($1, $2), 0); received++; } 132 ; 133 precedence : PRECEDENCE ':' things 134 { newfield(link3($1, $2, $3), 0); } 135 | PRECEDENCE ':' 136 { newfield(link2($1, $2), 0); } 137 ; 138 ignored : ignoredhdr ':' things 139 { newfield(link3($1, $2, $3), 0); } 140 | ignoredhdr ':' 141 { newfield(link2($1, $2), 0); } 142 ; 143 ignoredhdr : MIMEVERSION | CONTENTTYPE | MESSAGEID { messageid = 1; } | MAILER 144 ; 145 optional : fieldwords ':' things 146 { /* hack to allow same lex for field names and the rest */ 147 if(badfieldname($1)){ 148 freenode($1); 149 freenode($2); 150 freenode($3); 151 return 1; 152 } 153 newfield(link3($1, $2, $3), 0); 154 } 155 | fieldwords ':' 156 { /* hack to allow same lex for field names and the rest */ 157 if(badfieldname($1)){ 158 freenode($1); 159 freenode($2); 160 return 1; 161 } 162 newfield(link2($1, $2), 0); 163 } 164 ; 165 address_list : address 166 | address_list ',' address 167 { $$ = link3($1, $2, $3); } 168 ; 169 address : mailbox 170 | group 171 ; 172 group : phrase ':' address_list ';' 173 { $$ = link2($1, link3($2, $3, $4)); } 174 | phrase ':' ';' 175 { $$ = link3($1, $2, $3); } 176 ; 177 mailbox_list : mailbox 178 | mailbox_list ',' mailbox 179 { $$ = link3($1, $2, $3); } 180 ; 181 mailbox : route_addr 182 | phrase brak_addr 183 { $$ = link2($1, $2); } 184 | brak_addr 185 ; 186 brak_addr : '<' route_addr '>' 187 { $$ = link3($1, $2, $3); } 188 | '<' '>' 189 { $$ = nobody($2); freenode($1); } 190 ; 191 route_addr : route ':' at_addr 192 { $$ = address(concat($1, concat($2, $3))); } 193 | addr_spec 194 ; 195 route : '@' domain 196 { $$ = concat($1, $2); } 197 | route ',' '@' domain 198 { $$ = concat($1, concat($2, concat($3, $4))); } 199 ; 200 addr_spec : local_part 201 { $$ = address($1); } 202 | at_addr 203 ; 204 at_addr : local_part '@' domain 205 { $$ = address(concat($1, concat($2, $3)));} 206 | at_addr '@' domain 207 { $$ = address(concat($1, concat($2, $3)));} 208 ; 209 local_part : word 210 ; 211 domain : word 212 ; 213 phrase : word 214 | phrase word 215 { $$ = link2($1, $2); } 216 ; 217 things : thing 218 | things thing 219 { $$ = link2($1, $2); } 220 ; 221 thing : word | '<' | '>' | '@' | ':' | ';' | ',' 222 ; 223 date_time : things 224 ; 225 unix_date_time : word word word unix_time word word 226 { $$ = link3($1, $3, link3($2, $6, link2($4, $5))); } 227 ; 228 unix_time : word 229 | unix_time ':' word 230 { $$ = link3($1, $2, $3); } 231 ; 232 word : WORD | DATE | RESENT_DATE | RETURN_PATH | FROM | SENDER 233 | REPLY_TO | RESENT_FROM | RESENT_SENDER | RESENT_REPLY_TO 234 | TO | CC | BCC | RESENT_TO | RESENT_CC | RESENT_BCC | REMOTE | SUBJECT 235 | PRECEDENCE | MIMEVERSION | CONTENTTYPE | MESSAGEID | RECEIVED | MAILER 236 ; 237 fieldwords : fieldword 238 | WORD 239 | fieldwords fieldword 240 { $$ = link2($1, $2); } 241 | fieldwords word 242 { $$ = link2($1, $2); } 243 ; 244 fieldword : '<' | '>' | '@' | ';' | ',' 245 ; 246 %% 247 248 /* 249 * Initialize the parsing. Done once for each header field. 250 */ 251 void 252 yyinit(char *p, int len) 253 { 254 yybuffer = p; 255 yylp = p; 256 yyend = p + len; 257 firstfield = lastfield = 0; 258 received = 0; 259 } 260 261 /* 262 * keywords identifying header fields we care about 263 */ 264 typedef struct Keyword Keyword; 265 struct Keyword { 266 char *rep; 267 int val; 268 }; 269 270 /* field names that we need to recognize */ 271 Keyword key[] = { 272 { "date", DATE }, 273 { "resent-date", RESENT_DATE }, 274 { "return_path", RETURN_PATH }, 275 { "from", FROM }, 276 { "sender", SENDER }, 277 { "reply-to", REPLY_TO }, 278 { "resent-from", RESENT_FROM }, 279 { "resent-sender", RESENT_SENDER }, 280 { "resent-reply-to", RESENT_REPLY_TO }, 281 { "to", TO }, 282 { "cc", CC }, 283 { "bcc", BCC }, 284 { "resent-to", RESENT_TO }, 285 { "resent-cc", RESENT_CC }, 286 { "resent-bcc", RESENT_BCC }, 287 { "remote", REMOTE }, 288 { "subject", SUBJECT }, 289 { "precedence", PRECEDENCE }, 290 { "mime-version", MIMEVERSION }, 291 { "content-type", CONTENTTYPE }, 292 { "message-id", MESSAGEID }, 293 { "received", RECEIVED }, 294 { "mailer", MAILER }, 295 { "who-the-hell-cares", WORD } 296 }; 297 298 /* 299 * Lexical analysis for an rfc822 header field. Continuation lines 300 * are handled in yywhite() when skipping over white space. 301 * 302 */ 303 int 304 yylex(void) 305 { 306 String *t; 307 int quoting; 308 int escaping; 309 char *start; 310 Keyword *kp; 311 int c, d; 312 313 /* print("lexing\n"); /**/ 314 if(yylp >= yyend) 315 return 0; 316 if(yydone) 317 return 0; 318 319 quoting = escaping = 0; 320 start = yylp; 321 yylval = malloc(sizeof(Node)); 322 yylval->white = yylval->s = 0; 323 yylval->next = 0; 324 yylval->addr = 0; 325 yylval->start = yylp; 326 for(t = 0; yylp < yyend; yylp++){ 327 c = *yylp & 0xff; 328 329 /* dump nulls, they can't be in header */ 330 if(c == 0) 331 continue; 332 333 if(escaping) { 334 escaping = 0; 335 } else if(quoting) { 336 switch(c){ 337 case '\\': 338 escaping = 1; 339 break; 340 case '\n': 341 d = (*(yylp+1))&0xff; 342 if(d != ' ' && d != '\t'){ 343 quoting = 0; 344 yylp--; 345 continue; 346 } 347 break; 348 case '"': 349 quoting = 0; 350 break; 351 } 352 } else { 353 switch(c){ 354 case '\\': 355 escaping = 1; 356 break; 357 case '(': 358 case ' ': 359 case '\t': 360 case '\r': 361 goto out; 362 case '\n': 363 if(yylp == start){ 364 yylp++; 365 /* print("lex(c %c)\n", c); /**/ 366 yylval->end = yylp; 367 return yylval->c = c; 368 } 369 goto out; 370 case '@': 371 case '>': 372 case '<': 373 case ':': 374 case ',': 375 case ';': 376 if(yylp == start){ 377 yylp++; 378 yylval->white = yywhite(); 379 /* print("lex(c %c)\n", c); /**/ 380 yylval->end = yylp; 381 return yylval->c = c; 382 } 383 goto out; 384 case '"': 385 quoting = 1; 386 break; 387 default: 388 break; 389 } 390 } 391 if(t == 0) 392 t = s_new(); 393 s_putc(t, c); 394 } 395 out: 396 yylval->white = yywhite(); 397 if(t) { 398 s_terminate(t); 399 } else /* message begins with white-space! */ 400 return yylval->c = '\n'; 401 yylval->s = t; 402 for(kp = key; kp->val != WORD; kp++) 403 if(cistrcmp(s_to_c(t), kp->rep)==0) 404 break; 405 /* print("lex(%d) %s\n", kp->val-WORD, s_to_c(t)); /**/ 406 yylval->end = yylp; 407 return yylval->c = kp->val; 408 } 409 410 void 411 yyerror(char *x) 412 { 413 USED(x); 414 415 /*fprint(2, "parse err: %s\n", x);/**/ 416 } 417 418 /* 419 * parse white space and comments 420 */ 421 String * 422 yywhite(void) 423 { 424 String *w; 425 int clevel; 426 int c; 427 int escaping; 428 429 escaping = clevel = 0; 430 for(w = 0; yylp < yyend; yylp++){ 431 c = *yylp & 0xff; 432 433 /* dump nulls, they can't be in header */ 434 if(c == 0) 435 continue; 436 437 if(escaping){ 438 escaping = 0; 439 } else if(clevel) { 440 switch(c){ 441 case '\n': 442 /* 443 * look for multiline fields 444 */ 445 if(*(yylp+1)==' ' || *(yylp+1)=='\t') 446 break; 447 else 448 goto out; 449 case '\\': 450 escaping = 1; 451 break; 452 case '(': 453 clevel++; 454 break; 455 case ')': 456 clevel--; 457 break; 458 } 459 } else { 460 switch(c){ 461 case '\\': 462 escaping = 1; 463 break; 464 case '(': 465 clevel++; 466 break; 467 case ' ': 468 case '\t': 469 case '\r': 470 break; 471 case '\n': 472 /* 473 * look for multiline fields 474 */ 475 if(*(yylp+1)==' ' || *(yylp+1)=='\t') 476 break; 477 else 478 goto out; 479 default: 480 goto out; 481 } 482 } 483 if(w == 0) 484 w = s_new(); 485 s_putc(w, c); 486 } 487 out: 488 if(w) 489 s_terminate(w); 490 return w; 491 } 492 493 /* 494 * link two parsed entries together 495 */ 496 Node* 497 link2(Node *p1, Node *p2) 498 { 499 Node *p; 500 501 for(p = p1; p->next; p = p->next) 502 ; 503 p->next = p2; 504 return p1; 505 } 506 507 /* 508 * link three parsed entries together 509 */ 510 Node* 511 link3(Node *p1, Node *p2, Node *p3) 512 { 513 Node *p; 514 515 for(p = p2; p->next; p = p->next) 516 ; 517 p->next = p3; 518 519 for(p = p1; p->next; p = p->next) 520 ; 521 p->next = p2; 522 523 return p1; 524 } 525 526 /* 527 * make a:b, move all white space after both 528 */ 529 Node* 530 colon(Node *p1, Node *p2) 531 { 532 if(p1->white){ 533 if(p2->white) 534 s_append(p1->white, s_to_c(p2->white)); 535 } else { 536 p1->white = p2->white; 537 p2->white = 0; 538 } 539 540 s_append(p1->s, ":"); 541 if(p2->s) 542 s_append(p1->s, s_to_c(p2->s)); 543 544 if(p1->end < p2->end) 545 p1->end = p2->end; 546 freenode(p2); 547 return p1; 548 } 549 550 /* 551 * concatenate two fields, move all white space after both 552 */ 553 Node* 554 concat(Node *p1, Node *p2) 555 { 556 char buf[2]; 557 558 if(p1->white){ 559 if(p2->white) 560 s_append(p1->white, s_to_c(p2->white)); 561 } else { 562 p1->white = p2->white; 563 p2->white = 0; 564 } 565 566 if(p1->s == nil){ 567 buf[0] = p1->c; 568 buf[1] = 0; 569 p1->s = s_new(); 570 s_append(p1->s, buf); 571 } 572 573 if(p2->s) 574 s_append(p1->s, s_to_c(p2->s)); 575 else { 576 buf[0] = p2->c; 577 buf[1] = 0; 578 s_append(p1->s, buf); 579 } 580 581 if(p1->end < p2->end) 582 p1->end = p2->end; 583 freenode(p2); 584 return p1; 585 } 586 587 /* 588 * look for disallowed chars in the field name 589 */ 590 int 591 badfieldname(Node *p) 592 { 593 for(; p; p = p->next){ 594 /* field name can't contain white space */ 595 if(p->white && p->next) 596 return 1; 597 } 598 return 0; 599 } 600 601 /* 602 * mark as an address 603 */ 604 Node * 605 address(Node *p) 606 { 607 p->addr = 1; 608 return p; 609 } 610 611 /* 612 * case independent string compare 613 */ 614 int 615 cistrcmp(char *s1, char *s2) 616 { 617 int c1, c2; 618 619 for(; *s1; s1++, s2++){ 620 c1 = isupper(*s1) ? tolower(*s1) : *s1; 621 c2 = isupper(*s2) ? tolower(*s2) : *s2; 622 if (c1 != c2) 623 return -1; 624 } 625 return *s2; 626 } 627 628 /* 629 * free a node 630 */ 631 void 632 freenode(Node *p) 633 { 634 Node *tp; 635 636 while(p){ 637 tp = p->next; 638 if(p->s) 639 s_free(p->s); 640 if(p->white) 641 s_free(p->white); 642 free(p); 643 p = tp; 644 } 645 } 646 647 648 /* 649 * an anonymous user 650 */ 651 Node* 652 nobody(Node *p) 653 { 654 if(p->s) 655 s_free(p->s); 656 p->s = s_copy("pOsTmAsTeR"); 657 p->addr = 1; 658 return p; 659 } 660 661 /* 662 * add anything that was dropped because of a parse error 663 */ 664 void 665 missing(Node *p) 666 { 667 Node *np; 668 char *start, *end; 669 Field *f; 670 String *s; 671 672 start = yybuffer; 673 if(lastfield != nil){ 674 for(np = lastfield->node; np; np = np->next) 675 start = np->end+1; 676 } 677 678 end = p->start-1; 679 680 if(end <= start) 681 return; 682 683 if(strncmp(start, "From ", 5) == 0) 684 return; 685 686 np = malloc(sizeof(Node)); 687 np->start = start; 688 np->end = end; 689 np->white = nil; 690 s = s_copy("BadHeader: "); 691 np->s = s_nappend(s, start, end-start); 692 np->next = nil; 693 694 f = malloc(sizeof(Field)); 695 f->next = 0; 696 f->node = np; 697 f->source = 0; 698 if(firstfield) 699 lastfield->next = f; 700 else 701 firstfield = f; 702 lastfield = f; 703 } 704 705 /* 706 * create a new field 707 */ 708 void 709 newfield(Node *p, int source) 710 { 711 Field *f; 712 713 missing(p); 714 715 f = malloc(sizeof(Field)); 716 f->next = 0; 717 f->node = p; 718 f->source = source; 719 if(firstfield) 720 lastfield->next = f; 721 else 722 firstfield = f; 723 lastfield = f; 724 endfield = startfield; 725 startfield = yylp; 726 } 727 728 /* 729 * fee a list of fields 730 */ 731 void 732 freefield(Field *f) 733 { 734 Field *tf; 735 736 while(f){ 737 tf = f->next; 738 freenode(f->node); 739 free(f); 740 f = tf; 741 } 742 } 743 744 /* 745 * add some white space to a node 746 */ 747 Node* 748 whiten(Node *p) 749 { 750 Node *tp; 751 752 for(tp = p; tp->next; tp = tp->next) 753 ; 754 if(tp->white == 0) 755 tp->white = s_copy(" "); 756 return p; 757 } 758 759 void 760 yycleanup(void) 761 { 762 Field *f, *fnext; 763 Node *np, *next; 764 765 for(f = firstfield; f; f = fnext){ 766 for(np = f->node; np; np = next){ 767 if(np->s) 768 s_free(np->s); 769 if(np->white) 770 s_free(np->white); 771 next = np->next; 772 free(np); 773 } 774 fnext = f->next; 775 free(f); 776 } 777 firstfield = lastfield = 0; 778 }