vf.c (19995B)
1 /* 2 * this is a filter that changes mime types and names of 3 * suspect executable attachments. 4 */ 5 #include "common.h" 6 #include <ctype.h> 7 8 enum { 9 Accept = 0xA, 10 Discard = 0xD, 11 }; 12 13 Biobuf in; 14 Biobuf out; 15 16 typedef struct Mtype Mtype; 17 typedef struct Hdef Hdef; 18 typedef struct Hline Hline; 19 typedef struct Part Part; 20 21 static int badfile(char *name); 22 static int badtype(char *type); 23 static void ctype(Part*, Hdef*, char*); 24 static void cencoding(Part*, Hdef*, char*); 25 static void cdisposition(Part*, Hdef*, char*); 26 static int decquoted(char *out, char *in, char *e); 27 static char* getstring(char *p, String *s, int dolower); 28 static void init_hdefs(void); 29 static int isattribute(char **pp, char *attr); 30 static int latin1toutf(char *out, char *in, char *e); 31 static String* mkboundary(void); 32 static Part* part(Part *pp); 33 static Part* passbody(Part *p, int dobound); 34 static void passnotheader(void); 35 static void passunixheader(void); 36 static Part* problemchild(Part *p); 37 static void readheader(Part *p); 38 static Hline* readhl(void); 39 static void readmtypes(void); 40 static int save(Part *p, char *file); 41 static void setfilename(Part *p, char *name); 42 static char* skiptosemi(char *p); 43 static char* skipwhite(char *p); 44 static String* tokenconvert(String *t); 45 static void writeheader(Part *p, int); 46 47 enum 48 { 49 /* encodings */ 50 Enone= 0, 51 Ebase64, 52 Equoted, 53 54 /* disposition possibilities */ 55 Dnone= 0, 56 Dinline, 57 Dfile, 58 Dignore, 59 60 PAD64= '=' 61 }; 62 63 /* 64 * a message part; either the whole message or a subpart 65 */ 66 struct Part 67 { 68 Part *pp; /* parent part */ 69 Hline *hl; /* linked list of header lines */ 70 int disposition; 71 int encoding; 72 int badfile; 73 int badtype; 74 String *boundary; /* boundary for multiparts */ 75 int blen; 76 String *charset; /* character set */ 77 String *type; /* content type */ 78 String *filename; /* file name */ 79 Biobuf *tmpbuf; /* diversion input buffer */ 80 }; 81 82 /* 83 * a (multi)line header 84 */ 85 struct Hline 86 { 87 Hline *next; 88 String *s; 89 }; 90 91 /* 92 * header definitions for parsing 93 */ 94 struct Hdef 95 { 96 char *type; 97 void (*f)(Part*, Hdef*, char*); 98 int len; 99 }; 100 101 Hdef hdefs[] = 102 { 103 { "content-type:", ctype, }, 104 { "content-transfer-encoding:", cencoding, }, 105 { "content-disposition:", cdisposition, }, 106 { 0, } 107 }; 108 109 /* 110 * acceptable content types and their extensions 111 */ 112 struct Mtype { 113 Mtype *next; 114 char *ext; /* extension */ 115 char *gtype; /* generic content type */ 116 char *stype; /* specific content type */ 117 char class; 118 }; 119 Mtype *mtypes; 120 121 int justreject; 122 char *savefile; 123 124 void 125 usage(void) 126 { 127 fprint(2, "usage: upas/vf [-r] [-s savefile]\n"); 128 exits("usage"); 129 } 130 131 void 132 main(int argc, char **argv) 133 { 134 ARGBEGIN{ 135 case 'r': 136 justreject = 1; 137 break; 138 case 's': 139 savefile = EARGF(usage()); 140 break; 141 default: 142 usage(); 143 }ARGEND; 144 145 if(argc) 146 usage(); 147 148 Binit(&in, 0, OREAD); 149 Binit(&out, 1, OWRITE); 150 151 init_hdefs(); 152 readmtypes(); 153 154 /* pass through our standard 'From ' line */ 155 passunixheader(); 156 157 /* parse with the top level part */ 158 part(nil); 159 160 exits(0); 161 } 162 163 void 164 refuse(void) 165 { 166 postnote(PNGROUP, getpid(), "mail refused: we don't accept executable attachments"); 167 exits("mail refused: we don't accept executable attachments"); 168 } 169 170 171 /* 172 * parse a part; returns the ancestor whose boundary terminated 173 * this part or nil on EOF. 174 */ 175 static Part* 176 part(Part *pp) 177 { 178 Part *p, *np; 179 180 p = mallocz(sizeof *p, 1); 181 p->pp = pp; 182 readheader(p); 183 184 if(p->boundary != nil){ 185 /* the format of a multipart part is always: 186 * header 187 * null or ignored body 188 * boundary 189 * header 190 * body 191 * boundary 192 * ... 193 */ 194 writeheader(p, 1); 195 np = passbody(p, 1); 196 if(np != p) 197 return np; 198 for(;;){ 199 np = part(p); 200 if(np != p) 201 return np; 202 } 203 } else { 204 /* no boundary */ 205 /* may still be multipart if this is a forwarded message */ 206 if(p->type && cistrcmp(s_to_c(p->type), "message/rfc822") == 0){ 207 /* the format of forwarded message is: 208 * header 209 * header 210 * body 211 */ 212 writeheader(p, 1); 213 passnotheader(); 214 return part(p); 215 } else { 216 /* 217 * This is the meat. This may be an executable. 218 * if so, wrap it and change its type 219 */ 220 if(p->badtype || p->badfile){ 221 if(p->badfile == 2){ 222 if(savefile != nil) 223 save(p, savefile); 224 syslog(0, "vf", "vf rejected %s %s", p->type?s_to_c(p->type):"?", 225 p->filename?s_to_c(p->filename):"?"); 226 fprint(2, "The mail contained an executable attachment.\n"); 227 fprint(2, "We refuse all mail containing such.\n"); 228 refuse(); 229 } 230 np = problemchild(p); 231 if(np != p) 232 return np; 233 /* if problemchild returns p, it turns out p is okay: fall thru */ 234 } 235 writeheader(p, 1); 236 return passbody(p, 1); 237 } 238 } 239 } 240 241 /* 242 * read and parse a complete header 243 */ 244 static void 245 readheader(Part *p) 246 { 247 Hline *hl, **l; 248 Hdef *hd; 249 250 l = &p->hl; 251 for(;;){ 252 hl = readhl(); 253 if(hl == nil) 254 break; 255 *l = hl; 256 l = &hl->next; 257 258 for(hd = hdefs; hd->type != nil; hd++){ 259 if(cistrncmp(s_to_c(hl->s), hd->type, hd->len) == 0){ 260 (*hd->f)(p, hd, s_to_c(hl->s)); 261 break; 262 } 263 } 264 } 265 } 266 267 /* 268 * read a possibly multiline header line 269 */ 270 static Hline* 271 readhl(void) 272 { 273 Hline *hl; 274 String *s; 275 char *p; 276 int n; 277 278 p = Brdline(&in, '\n'); 279 if(p == nil) 280 return nil; 281 n = Blinelen(&in); 282 if(memchr(p, ':', n) == nil){ 283 Bseek(&in, -n, 1); 284 return nil; 285 } 286 s = s_nappend(s_new(), p, n); 287 for(;;){ 288 p = Brdline(&in, '\n'); 289 if(p == nil) 290 break; 291 n = Blinelen(&in); 292 if(*p != ' ' && *p != '\t'){ 293 Bseek(&in, -n, 1); 294 break; 295 } 296 s = s_nappend(s, p, n); 297 } 298 hl = malloc(sizeof *hl); 299 hl->s = s; 300 hl->next = nil; 301 return hl; 302 } 303 304 /* 305 * write out a complete header 306 */ 307 static void 308 writeheader(Part *p, int xfree) 309 { 310 Hline *hl, *next; 311 312 for(hl = p->hl; hl != nil; hl = next){ 313 Bprint(&out, "%s", s_to_c(hl->s)); 314 if(xfree) 315 s_free(hl->s); 316 next = hl->next; 317 if(xfree) 318 free(hl); 319 } 320 if(xfree) 321 p->hl = nil; 322 } 323 324 /* 325 * pass a body through. return if we hit one of our ancestors' 326 * boundaries or EOF. if we hit a boundary, return a pointer to 327 * that ancestor. if we hit EOF, return nil. 328 */ 329 static Part* 330 passbody(Part *p, int dobound) 331 { 332 Part *pp; 333 Biobuf *b; 334 char *cp; 335 336 for(;;){ 337 if(p->tmpbuf){ 338 b = p->tmpbuf; 339 cp = Brdline(b, '\n'); 340 if(cp == nil){ 341 Bterm(b); 342 p->tmpbuf = nil; 343 goto Stdin; 344 } 345 }else{ 346 Stdin: 347 b = ∈ 348 cp = Brdline(b, '\n'); 349 } 350 if(cp == nil) 351 return nil; 352 for(pp = p; pp != nil; pp = pp->pp) 353 if(pp->boundary != nil 354 && strncmp(cp, s_to_c(pp->boundary), pp->blen) == 0){ 355 if(dobound) 356 Bwrite(&out, cp, Blinelen(b)); 357 else 358 Bseek(b, -Blinelen(b), 1); 359 return pp; 360 } 361 Bwrite(&out, cp, Blinelen(b)); 362 } 363 return nil; 364 } 365 366 /* 367 * save the message somewhere 368 */ 369 static vlong bodyoff; /* clumsy hack */ 370 static int 371 save(Part *p, char *file) 372 { 373 int fd; 374 char *cp; 375 376 Bterm(&out); 377 memset(&out, 0, sizeof(out)); 378 379 fd = open(file, OWRITE); 380 if(fd < 0) 381 return -1; 382 seek(fd, 0, 2); 383 Binit(&out, fd, OWRITE); 384 cp = ctime(time(0)); 385 cp[28] = 0; 386 Bprint(&out, "From virusfilter %s\n", cp); 387 writeheader(p, 0); 388 bodyoff = Boffset(&out); 389 passbody(p, 1); 390 Bprint(&out, "\n"); 391 Bterm(&out); 392 close(fd); 393 394 memset(&out, 0, sizeof out); 395 Binit(&out, 1, OWRITE); 396 return 0; 397 } 398 399 /* 400 * write to a file but save the fd for passbody. 401 */ 402 static char* 403 savetmp(Part *p) 404 { 405 char buf[40], *name; 406 int fd; 407 408 strcpy(buf, "/var/tmp/vf.XXXXXXXXXXX"); 409 if((fd = mkstemp(buf)) < 0){ 410 fprint(2, "error creating temporary file: %r\n"); 411 refuse(); 412 } 413 name = buf; 414 close(fd); 415 if(save(p, name) < 0){ 416 fprint(2, "error saving temporary file: %r\n"); 417 refuse(); 418 } 419 if(p->tmpbuf){ 420 fprint(2, "error in savetmp: already have tmp file!\n"); 421 refuse(); 422 } 423 p->tmpbuf = Bopen(name, OREAD|ORCLOSE); 424 if(p->tmpbuf == nil){ 425 fprint(2, "error reading tempoary file: %r\n"); 426 refuse(); 427 } 428 Bseek(p->tmpbuf, bodyoff, 0); 429 return strdup(name); 430 } 431 432 /* 433 * Run the external checker to do content-based checks. 434 */ 435 static int 436 runchecker(Part *p) 437 { 438 int pid; 439 char *name; 440 Waitmsg *w; 441 static char *val; 442 443 if(val == nil) 444 val = unsharp("#9/mail/lib/validateattachment"); 445 if(val == nil || access(val, AEXEC) < 0) 446 return 0; 447 448 name = savetmp(p); 449 fprint(2, "run checker %s\n", name); 450 switch(pid = fork()){ 451 case -1: 452 sysfatal("fork: %r"); 453 case 0: 454 dup(2, 1); 455 execl(val, "validateattachment", name, nil); 456 _exits("exec failed"); 457 } 458 459 /* 460 * Okay to return on error - will let mail through but wrapped. 461 */ 462 w = wait(); 463 remove(name); 464 if(w == nil){ 465 syslog(0, "mail", "vf wait failed: %r"); 466 return 0; 467 } 468 if(w->pid != pid){ 469 syslog(0, "mail", "vf wrong pid %d != %d", w->pid, pid); 470 return 0; 471 } 472 if(p->filename) 473 name = s_to_c(p->filename); 474 if(atoi(w->msg) == Discard){ 475 syslog(0, "mail", "vf validateattachment rejected %s", name); 476 refuse(); 477 } 478 if(atoi(w->msg) == Accept){ 479 syslog(0, "mail", "vf validateattachment accepted %s", name); 480 return 1; 481 } 482 free(w); 483 return 0; 484 } 485 486 /* 487 * emit a multipart Part that explains the problem 488 */ 489 static Part* 490 problemchild(Part *p) 491 { 492 Part *np; 493 Hline *hl; 494 String *boundary; 495 char *cp; 496 497 /* 498 * We don't know whether the attachment is okay. 499 * If there's an external checker, let it have a crack at it. 500 */ 501 if(runchecker(p) > 0) 502 return p; 503 504 if(justreject) 505 return p; 506 507 syslog(0, "mail", "vf wrapped %s %s", p->type?s_to_c(p->type):"?", 508 p->filename?s_to_c(p->filename):"?"); 509 510 boundary = mkboundary(); 511 /* print out non-mime headers */ 512 for(hl = p->hl; hl != nil; hl = hl->next) 513 if(cistrncmp(s_to_c(hl->s), "content-", 8) != 0) 514 Bprint(&out, "%s", s_to_c(hl->s)); 515 516 /* add in our own multipart headers and message */ 517 Bprint(&out, "Content-Type: multipart/mixed;\n"); 518 Bprint(&out, "\tboundary=\"%s\"\n", s_to_c(boundary)); 519 Bprint(&out, "Content-Disposition: inline\n"); 520 Bprint(&out, "\n"); 521 Bprint(&out, "This is a multi-part message in MIME format.\n"); 522 Bprint(&out, "--%s\n", s_to_c(boundary)); 523 Bprint(&out, "Content-Disposition: inline\n"); 524 Bprint(&out, "Content-Type: text/plain; charset=\"US-ASCII\"\n"); 525 Bprint(&out, "Content-Transfer-Encoding: 7bit\n"); 526 Bprint(&out, "\n"); 527 Bprint(&out, "from postmaster@%s:\n", sysname()); 528 Bprint(&out, "The following attachment had content that we can't\n"); 529 Bprint(&out, "prove to be harmless. To avoid possible automatic\n"); 530 Bprint(&out, "execution, we changed the content headers.\n"); 531 Bprint(&out, "The original header was:\n\n"); 532 533 /* print out original header lines */ 534 for(hl = p->hl; hl != nil; hl = hl->next) 535 if(cistrncmp(s_to_c(hl->s), "content-", 8) == 0) 536 Bprint(&out, "\t%s", s_to_c(hl->s)); 537 Bprint(&out, "--%s\n", s_to_c(boundary)); 538 539 /* change file name */ 540 if(p->filename) 541 s_append(p->filename, ".suspect"); 542 else 543 p->filename = s_copy("file.suspect"); 544 545 /* print out new header */ 546 Bprint(&out, "Content-Type: application/octet-stream\n"); 547 Bprint(&out, "Content-Disposition: attachment; filename=\"%s\"\n", s_to_c(p->filename)); 548 switch(p->encoding){ 549 case Enone: 550 break; 551 case Ebase64: 552 Bprint(&out, "Content-Transfer-Encoding: base64\n"); 553 break; 554 case Equoted: 555 Bprint(&out, "Content-Transfer-Encoding: quoted-printable\n"); 556 break; 557 } 558 559 /* pass the body */ 560 np = passbody(p, 0); 561 562 /* add the new boundary and the original terminator */ 563 Bprint(&out, "--%s--\n", s_to_c(boundary)); 564 if(np && np->boundary){ 565 cp = Brdline(&in, '\n'); 566 Bwrite(&out, cp, Blinelen(&in)); 567 } 568 569 return np; 570 } 571 572 static int 573 isattribute(char **pp, char *attr) 574 { 575 char *p; 576 int n; 577 578 n = strlen(attr); 579 p = *pp; 580 if(cistrncmp(p, attr, n) != 0) 581 return 0; 582 p += n; 583 while(*p == ' ') 584 p++; 585 if(*p++ != '=') 586 return 0; 587 while(*p == ' ') 588 p++; 589 *pp = p; 590 return 1; 591 } 592 593 /* 594 * parse content type header 595 */ 596 static void 597 ctype(Part *p, Hdef *h, char *cp) 598 { 599 String *s; 600 601 cp += h->len; 602 cp = skipwhite(cp); 603 604 p->type = s_new(); 605 cp = getstring(cp, p->type, 1); 606 if(badtype(s_to_c(p->type))) 607 p->badtype = 1; 608 609 while(*cp){ 610 if(isattribute(&cp, "boundary")){ 611 s = s_new(); 612 cp = getstring(cp, s, 0); 613 p->boundary = s_reset(p->boundary); 614 s_append(p->boundary, "--"); 615 s_append(p->boundary, s_to_c(s)); 616 p->blen = s_len(p->boundary); 617 s_free(s); 618 } else if(cistrncmp(cp, "multipart", 9) == 0){ 619 /* 620 * the first unbounded part of a multipart message, 621 * the preamble, is not displayed or saved 622 */ 623 } else if(isattribute(&cp, "name")){ 624 setfilename(p, cp); 625 } else if(isattribute(&cp, "charset")){ 626 if(p->charset == nil) 627 p->charset = s_new(); 628 cp = getstring(cp, s_reset(p->charset), 0); 629 } 630 631 cp = skiptosemi(cp); 632 } 633 } 634 635 /* 636 * parse content encoding header 637 */ 638 static void 639 cencoding(Part *m, Hdef *h, char *p) 640 { 641 p += h->len; 642 p = skipwhite(p); 643 if(cistrncmp(p, "base64", 6) == 0) 644 m->encoding = Ebase64; 645 else if(cistrncmp(p, "quoted-printable", 16) == 0) 646 m->encoding = Equoted; 647 } 648 649 /* 650 * parse content disposition header 651 */ 652 static void 653 cdisposition(Part *p, Hdef *h, char *cp) 654 { 655 cp += h->len; 656 cp = skipwhite(cp); 657 while(*cp){ 658 if(cistrncmp(cp, "inline", 6) == 0){ 659 p->disposition = Dinline; 660 } else if(cistrncmp(cp, "attachment", 10) == 0){ 661 p->disposition = Dfile; 662 } else if(cistrncmp(cp, "filename=", 9) == 0){ 663 cp += 9; 664 setfilename(p, cp); 665 } 666 cp = skiptosemi(cp); 667 } 668 669 } 670 671 static void 672 setfilename(Part *p, char *name) 673 { 674 if(p->filename == nil) 675 p->filename = s_new(); 676 getstring(name, s_reset(p->filename), 0); 677 p->filename = tokenconvert(p->filename); 678 p->badfile = badfile(s_to_c(p->filename)); 679 } 680 681 static char* 682 skipwhite(char *p) 683 { 684 while(isspace(*p)) 685 p++; 686 return p; 687 } 688 689 static char* 690 skiptosemi(char *p) 691 { 692 while(*p && *p != ';') 693 p++; 694 while(*p == ';' || isspace(*p)) 695 p++; 696 return p; 697 } 698 699 /* 700 * parse a possibly "'d string from a header. A 701 * ';' terminates the string. 702 */ 703 static char* 704 getstring(char *p, String *s, int dolower) 705 { 706 s = s_reset(s); 707 p = skipwhite(p); 708 if(*p == '"'){ 709 p++; 710 for(;*p && *p != '"'; p++) 711 if(dolower) 712 s_putc(s, tolower(*p)); 713 else 714 s_putc(s, *p); 715 if(*p == '"') 716 p++; 717 s_terminate(s); 718 719 return p; 720 } 721 722 for(; *p && !isspace(*p) && *p != ';'; p++) 723 if(dolower) 724 s_putc(s, tolower(*p)); 725 else 726 s_putc(s, *p); 727 s_terminate(s); 728 729 return p; 730 } 731 732 static void 733 init_hdefs(void) 734 { 735 Hdef *hd; 736 static int already; 737 738 if(already) 739 return; 740 already = 1; 741 742 for(hd = hdefs; hd->type != nil; hd++) 743 hd->len = strlen(hd->type); 744 } 745 746 /* 747 * create a new boundary 748 */ 749 static String* 750 mkboundary(void) 751 { 752 char buf[32]; 753 int i; 754 static int already; 755 756 if(already == 0){ 757 srand((time(0)<<16)|getpid()); 758 already = 1; 759 } 760 strcpy(buf, "upas-"); 761 for(i = 5; i < sizeof(buf)-1; i++) 762 buf[i] = 'a' + nrand(26); 763 buf[i] = 0; 764 return s_copy(buf); 765 } 766 767 /* 768 * skip blank lines till header 769 */ 770 static void 771 passnotheader(void) 772 { 773 char *cp; 774 int i, n; 775 776 while((cp = Brdline(&in, '\n')) != nil){ 777 n = Blinelen(&in); 778 for(i = 0; i < n-1; i++) 779 if(cp[i] != ' ' && cp[i] != '\t' && cp[i] != '\r'){ 780 Bseek(&in, -n, 1); 781 return; 782 } 783 Bwrite(&out, cp, n); 784 } 785 } 786 787 /* 788 * pass unix header lines 789 */ 790 static void 791 passunixheader(void) 792 { 793 char *p; 794 int n; 795 796 while((p = Brdline(&in, '\n')) != nil){ 797 n = Blinelen(&in); 798 if(strncmp(p, "From ", 5) != 0){ 799 Bseek(&in, -n, 1); 800 break; 801 } 802 Bwrite(&out, p, n); 803 } 804 } 805 806 /* 807 * Read mime types 808 */ 809 static void 810 readmtypes(void) 811 { 812 Biobuf *b; 813 char *p; 814 char *f[6]; 815 Mtype *m; 816 Mtype **l; 817 818 b = Bopen(unsharp("#9/lib/mimetype"), OREAD); 819 if(b == nil) 820 return; 821 822 l = &mtypes; 823 while((p = Brdline(b, '\n')) != nil){ 824 if(*p == '#') 825 continue; 826 p[Blinelen(b)-1] = 0; 827 if(tokenize(p, f, nelem(f)) < 5) 828 continue; 829 m = mallocz(sizeof *m, 1); 830 if(m == nil) 831 goto err; 832 m->ext = strdup(f[0]); 833 if(m->ext == 0) 834 goto err; 835 m->gtype = strdup(f[1]); 836 if(m->gtype == 0) 837 goto err; 838 m->stype = strdup(f[2]); 839 if(m->stype == 0) 840 goto err; 841 m->class = *f[4]; 842 *l = m; 843 l = &(m->next); 844 } 845 Bterm(b); 846 return; 847 err: 848 if(m == nil) 849 return; 850 free(m->ext); 851 free(m->gtype); 852 free(m->stype); 853 free(m); 854 Bterm(b); 855 } 856 857 /* 858 * if the class is 'm' or 'y', accept it 859 * if the class is 'p' check a previous extension 860 * otherwise, filename is bad 861 */ 862 static int 863 badfile(char *name) 864 { 865 char *p; 866 Mtype *m; 867 int rv; 868 869 p = strrchr(name, '.'); 870 if(p == nil) 871 return 0; 872 873 for(m = mtypes; m != nil; m = m->next) 874 if(cistrcmp(p, m->ext) == 0){ 875 switch(m->class){ 876 case 'm': 877 case 'y': 878 return 0; 879 case 'p': 880 *p = 0; 881 rv = badfile(name); 882 *p = '.'; 883 return rv; 884 case 'r': 885 return 2; 886 } 887 } 888 return 1; 889 } 890 891 /* 892 * if the class is 'm' or 'y' or 'p', accept it 893 * otherwise, filename is bad 894 */ 895 static int 896 badtype(char *type) 897 { 898 Mtype *m; 899 char *s, *fix; 900 int rv = 1; 901 902 fix = s = strchr(type, '/'); 903 if(s != nil) 904 *s++ = 0; 905 else 906 s = "-"; 907 908 for(m = mtypes; m != nil; m = m->next){ 909 if(cistrcmp(type, m->gtype) != 0) 910 continue; 911 if(cistrcmp(s, m->stype) != 0) 912 continue; 913 switch(m->class){ 914 case 'y': 915 case 'p': 916 case 'm': 917 rv = 0; 918 break; 919 } 920 break; 921 } 922 923 if(fix != nil) 924 *fix = '/'; 925 return rv; 926 } 927 928 /* rfc2047 non-ascii */ 929 typedef struct Charset Charset; 930 struct Charset { 931 char *name; 932 int len; 933 int convert; 934 } charsets[] = 935 { 936 { "us-ascii", 8, 1, }, 937 { "utf-8", 5, 0, }, 938 { "iso-8859-1", 10, 1, } 939 }; 940 941 /* 942 * convert to UTF if need be 943 */ 944 static String* 945 tokenconvert(String *t) 946 { 947 String *s; 948 char decoded[1024]; 949 char utfbuf[2*1024]; 950 int i, len; 951 char *e; 952 char *token; 953 954 token = s_to_c(t); 955 len = s_len(t); 956 957 if(token[0] != '=' || token[1] != '?' || 958 token[len-2] != '?' || token[len-1] != '=') 959 goto err; 960 e = token+len-2; 961 token += 2; 962 963 /* bail if we don't understand the character set */ 964 for(i = 0; i < nelem(charsets); i++) 965 if(cistrncmp(charsets[i].name, token, charsets[i].len) == 0) 966 if(token[charsets[i].len] == '?'){ 967 token += charsets[i].len + 1; 968 break; 969 } 970 if(i >= nelem(charsets)) 971 goto err; 972 973 /* bail if it doesn't fit */ 974 if(strlen(token) > sizeof(decoded)-1) 975 goto err; 976 977 /* bail if we don't understand the encoding */ 978 if(cistrncmp(token, "b?", 2) == 0){ 979 token += 2; 980 len = dec64((uchar*)decoded, sizeof(decoded), token, e-token); 981 decoded[len] = 0; 982 } else if(cistrncmp(token, "q?", 2) == 0){ 983 token += 2; 984 len = decquoted(decoded, token, e); 985 if(len > 0 && decoded[len-1] == '\n') 986 len--; 987 decoded[len] = 0; 988 } else 989 goto err; 990 991 s = nil; 992 switch(charsets[i].convert){ 993 case 0: 994 s = s_copy(decoded); 995 break; 996 case 1: 997 s = s_new(); 998 latin1toutf(utfbuf, decoded, decoded+len); 999 s_append(s, utfbuf); 1000 break; 1001 } 1002 1003 return s; 1004 err: 1005 return s_clone(t); 1006 } 1007 1008 /* 1009 * decode quoted 1010 */ 1011 enum 1012 { 1013 Self= 1, 1014 Hex= 2 1015 }; 1016 uchar tableqp[256]; 1017 1018 static void 1019 initquoted(void) 1020 { 1021 int c; 1022 1023 memset(tableqp, 0, 256); 1024 for(c = ' '; c <= '<'; c++) 1025 tableqp[c] = Self; 1026 for(c = '>'; c <= '~'; c++) 1027 tableqp[c] = Self; 1028 tableqp['\t'] = Self; 1029 tableqp['='] = Hex; 1030 } 1031 1032 static int 1033 hex2int(int x) 1034 { 1035 if(x >= '0' && x <= '9') 1036 return x - '0'; 1037 if(x >= 'A' && x <= 'F') 1038 return (x - 'A') + 10; 1039 if(x >= 'a' && x <= 'f') 1040 return (x - 'a') + 10; 1041 return 0; 1042 } 1043 1044 static char* 1045 decquotedline(char *out, char *in, char *e) 1046 { 1047 int c, soft; 1048 1049 /* dump trailing white space */ 1050 while(e >= in && (*e == ' ' || *e == '\t' || *e == '\r' || *e == '\n')) 1051 e--; 1052 1053 /* trailing '=' means no newline */ 1054 if(*e == '='){ 1055 soft = 1; 1056 e--; 1057 } else 1058 soft = 0; 1059 1060 while(in <= e){ 1061 c = (*in++) & 0xff; 1062 switch(tableqp[c]){ 1063 case Self: 1064 *out++ = c; 1065 break; 1066 case Hex: 1067 c = hex2int(*in++)<<4; 1068 c |= hex2int(*in++); 1069 *out++ = c; 1070 break; 1071 } 1072 } 1073 if(!soft) 1074 *out++ = '\n'; 1075 *out = 0; 1076 1077 return out; 1078 } 1079 1080 static int 1081 decquoted(char *out, char *in, char *e) 1082 { 1083 char *p, *nl; 1084 1085 if(tableqp[' '] == 0) 1086 initquoted(); 1087 1088 p = out; 1089 while((nl = strchr(in, '\n')) != nil && nl < e){ 1090 p = decquotedline(p, in, nl); 1091 in = nl + 1; 1092 } 1093 if(in < e) 1094 p = decquotedline(p, in, e-1); 1095 1096 /* make sure we end with a new line */ 1097 if(*(p-1) != '\n'){ 1098 *p++ = '\n'; 1099 *p = 0; 1100 } 1101 1102 return p - out; 1103 } 1104 1105 /* translate latin1 directly since it fits neatly in utf */ 1106 static int 1107 latin1toutf(char *out, char *in, char *e) 1108 { 1109 Rune r; 1110 char *p; 1111 1112 p = out; 1113 for(; in < e; in++){ 1114 r = (*in) & 0xff; 1115 p += runetochar(p, &r); 1116 } 1117 *p = 0; 1118 return p - out; 1119 }