plan9port

fork of plan9port with libvec, libstr and libsdb
Log | Files | Refs | README | LICENSE

vf.c (19995B)


      1 /*
      2  *  this is a filter that changes mime types and names of
      3  *  suspect executable attachments.
      4  */
      5 #include "common.h"
      6 #include <ctype.h>
      7 
      8 enum {
      9 	Accept = 0xA,
     10 	Discard = 0xD,
     11 };
     12 
     13 Biobuf in;
     14 Biobuf out;
     15 
     16 typedef struct Mtype Mtype;
     17 typedef struct Hdef Hdef;
     18 typedef struct Hline Hline;
     19 typedef struct Part Part;
     20 
     21 static int	badfile(char *name);
     22 static int	badtype(char *type);
     23 static void	ctype(Part*, Hdef*, char*);
     24 static void	cencoding(Part*, Hdef*, char*);
     25 static void	cdisposition(Part*, Hdef*, char*);
     26 static int	decquoted(char *out, char *in, char *e);
     27 static char*	getstring(char *p, String *s, int dolower);
     28 static void	init_hdefs(void);
     29 static int	isattribute(char **pp, char *attr);
     30 static int	latin1toutf(char *out, char *in, char *e);
     31 static String*	mkboundary(void);
     32 static Part*	part(Part *pp);
     33 static Part*	passbody(Part *p, int dobound);
     34 static void	passnotheader(void);
     35 static void	passunixheader(void);
     36 static Part*	problemchild(Part *p);
     37 static void	readheader(Part *p);
     38 static Hline*	readhl(void);
     39 static void	readmtypes(void);
     40 static int	save(Part *p, char *file);
     41 static void	setfilename(Part *p, char *name);
     42 static char*	skiptosemi(char *p);
     43 static char*	skipwhite(char *p);
     44 static String*	tokenconvert(String *t);
     45 static void	writeheader(Part *p, int);
     46 
     47 enum
     48 {
     49 	/* encodings */
     50 	Enone=	0,
     51 	Ebase64,
     52 	Equoted,
     53 
     54 	/* disposition possibilities */
     55 	Dnone=	0,
     56 	Dinline,
     57 	Dfile,
     58 	Dignore,
     59 
     60 	PAD64=	'='
     61 };
     62 
     63 /*
     64  *  a message part; either the whole message or a subpart
     65  */
     66 struct Part
     67 {
     68 	Part	*pp;		/* parent part */
     69 	Hline	*hl;		/* linked list of header lines */
     70 	int	disposition;
     71 	int	encoding;
     72 	int	badfile;
     73 	int	badtype;
     74 	String	*boundary;	/* boundary for multiparts */
     75 	int	blen;
     76 	String	*charset;	/* character set */
     77 	String	*type;		/* content type */
     78 	String	*filename;	/* file name */
     79 	Biobuf	*tmpbuf;		/* diversion input buffer */
     80 };
     81 
     82 /*
     83  *  a (multi)line header
     84  */
     85 struct Hline
     86 {
     87 	Hline	*next;
     88 	String		*s;
     89 };
     90 
     91 /*
     92  *  header definitions for parsing
     93  */
     94 struct Hdef
     95 {
     96 	char *type;
     97 	void (*f)(Part*, Hdef*, char*);
     98 	int len;
     99 };
    100 
    101 Hdef hdefs[] =
    102 {
    103 	{ "content-type:", ctype, },
    104 	{ "content-transfer-encoding:", cencoding, },
    105 	{ "content-disposition:", cdisposition, },
    106 	{ 0, }
    107 };
    108 
    109 /*
    110  *  acceptable content types and their extensions
    111  */
    112 struct Mtype {
    113 	Mtype	*next;
    114 	char 	*ext;		/* extension */
    115 	char	*gtype;		/* generic content type */
    116 	char	*stype;		/* specific content type */
    117 	char	class;
    118 };
    119 Mtype *mtypes;
    120 
    121 int justreject;
    122 char *savefile;
    123 
    124 void
    125 usage(void)
    126 {
    127 	fprint(2, "usage: upas/vf [-r] [-s savefile]\n");
    128 	exits("usage");
    129 }
    130 
    131 void
    132 main(int argc, char **argv)
    133 {
    134 	ARGBEGIN{
    135 	case 'r':
    136 		justreject = 1;
    137 		break;
    138 	case 's':
    139 		savefile = EARGF(usage());
    140 		break;
    141 	default:
    142 		usage();
    143 	}ARGEND;
    144 
    145 	if(argc)
    146 		usage();
    147 
    148 	Binit(&in, 0, OREAD);
    149 	Binit(&out, 1, OWRITE);
    150 
    151 	init_hdefs();
    152 	readmtypes();
    153 
    154 	/* pass through our standard 'From ' line */
    155 	passunixheader();
    156 
    157 	/* parse with the top level part */
    158 	part(nil);
    159 
    160 	exits(0);
    161 }
    162 
    163 void
    164 refuse(void)
    165 {
    166 	postnote(PNGROUP, getpid(), "mail refused: we don't accept executable attachments");
    167 	exits("mail refused: we don't accept executable attachments");
    168 }
    169 
    170 
    171 /*
    172  *  parse a part; returns the ancestor whose boundary terminated
    173  *  this part or nil on EOF.
    174  */
    175 static Part*
    176 part(Part *pp)
    177 {
    178 	Part *p, *np;
    179 
    180 	p = mallocz(sizeof *p, 1);
    181 	p->pp = pp;
    182 	readheader(p);
    183 
    184 	if(p->boundary != nil){
    185 		/* the format of a multipart part is always:
    186 		 *   header
    187 		 *   null or ignored body
    188 		 *   boundary
    189 		 *   header
    190 		 *   body
    191 		 *   boundary
    192 		 *   ...
    193 		 */
    194 		writeheader(p, 1);
    195 		np = passbody(p, 1);
    196 		if(np != p)
    197 			return np;
    198 		for(;;){
    199 			np = part(p);
    200 			if(np != p)
    201 				return np;
    202 		}
    203 	} else {
    204 		/* no boundary */
    205 		/* may still be multipart if this is a forwarded message */
    206 		if(p->type && cistrcmp(s_to_c(p->type), "message/rfc822") == 0){
    207 			/* the format of forwarded message is:
    208 			 *   header
    209 			 *   header
    210 			 *   body
    211 			 */
    212 			writeheader(p, 1);
    213 			passnotheader();
    214 			return part(p);
    215 		} else {
    216 			/*
    217 			 * This is the meat.  This may be an executable.
    218 			 * if so, wrap it and change its type
    219 			 */
    220 			if(p->badtype || p->badfile){
    221 				if(p->badfile == 2){
    222 					if(savefile != nil)
    223 						save(p, savefile);
    224 					syslog(0, "vf", "vf rejected %s %s", p->type?s_to_c(p->type):"?",
    225 						p->filename?s_to_c(p->filename):"?");
    226 					fprint(2, "The mail contained an executable attachment.\n");
    227 					fprint(2, "We refuse all mail containing such.\n");
    228 					refuse();
    229 				}
    230 				np = problemchild(p);
    231 				if(np != p)
    232 					return np;
    233 				/* if problemchild returns p, it turns out p is okay: fall thru */
    234 			}
    235 			writeheader(p, 1);
    236 			return passbody(p, 1);
    237 		}
    238 	}
    239 }
    240 
    241 /*
    242  *  read and parse a complete header
    243  */
    244 static void
    245 readheader(Part *p)
    246 {
    247 	Hline *hl, **l;
    248 	Hdef *hd;
    249 
    250 	l = &p->hl;
    251 	for(;;){
    252 		hl = readhl();
    253 		if(hl == nil)
    254 			break;
    255 		*l = hl;
    256 		l = &hl->next;
    257 
    258 		for(hd = hdefs; hd->type != nil; hd++){
    259 			if(cistrncmp(s_to_c(hl->s), hd->type, hd->len) == 0){
    260 				(*hd->f)(p, hd, s_to_c(hl->s));
    261 				break;
    262 			}
    263 		}
    264 	}
    265 }
    266 
    267 /*
    268  *  read a possibly multiline header line
    269  */
    270 static Hline*
    271 readhl(void)
    272 {
    273 	Hline *hl;
    274 	String *s;
    275 	char *p;
    276 	int n;
    277 
    278 	p = Brdline(&in, '\n');
    279 	if(p == nil)
    280 		return nil;
    281 	n = Blinelen(&in);
    282 	if(memchr(p, ':', n) == nil){
    283 		Bseek(&in, -n, 1);
    284 		return nil;
    285 	}
    286 	s = s_nappend(s_new(), p, n);
    287 	for(;;){
    288 		p = Brdline(&in, '\n');
    289 		if(p == nil)
    290 			break;
    291 		n = Blinelen(&in);
    292 		if(*p != ' ' && *p != '\t'){
    293 			Bseek(&in, -n, 1);
    294 			break;
    295 		}
    296 		s = s_nappend(s, p, n);
    297 	}
    298 	hl = malloc(sizeof *hl);
    299 	hl->s = s;
    300 	hl->next = nil;
    301 	return hl;
    302 }
    303 
    304 /*
    305  *  write out a complete header
    306  */
    307 static void
    308 writeheader(Part *p, int xfree)
    309 {
    310 	Hline *hl, *next;
    311 
    312 	for(hl = p->hl; hl != nil; hl = next){
    313 		Bprint(&out, "%s", s_to_c(hl->s));
    314 		if(xfree)
    315 			s_free(hl->s);
    316 		next = hl->next;
    317 		if(xfree)
    318 			free(hl);
    319 	}
    320 	if(xfree)
    321 		p->hl = nil;
    322 }
    323 
    324 /*
    325  *  pass a body through.  return if we hit one of our ancestors'
    326  *  boundaries or EOF.  if we hit a boundary, return a pointer to
    327  *  that ancestor.  if we hit EOF, return nil.
    328  */
    329 static Part*
    330 passbody(Part *p, int dobound)
    331 {
    332 	Part *pp;
    333 	Biobuf *b;
    334 	char *cp;
    335 
    336 	for(;;){
    337 		if(p->tmpbuf){
    338 			b = p->tmpbuf;
    339 			cp = Brdline(b, '\n');
    340 			if(cp == nil){
    341 				Bterm(b);
    342 				p->tmpbuf = nil;
    343 				goto Stdin;
    344 			}
    345 		}else{
    346 		Stdin:
    347 			b = &in;
    348 			cp = Brdline(b, '\n');
    349 		}
    350 		if(cp == nil)
    351 			return nil;
    352 		for(pp = p; pp != nil; pp = pp->pp)
    353 			if(pp->boundary != nil
    354 			&& strncmp(cp, s_to_c(pp->boundary), pp->blen) == 0){
    355 				if(dobound)
    356 					Bwrite(&out, cp, Blinelen(b));
    357 				else
    358 					Bseek(b, -Blinelen(b), 1);
    359 				return pp;
    360 			}
    361 		Bwrite(&out, cp, Blinelen(b));
    362 	}
    363 	return nil;
    364 }
    365 
    366 /*
    367  *  save the message somewhere
    368  */
    369 static vlong bodyoff;	/* clumsy hack */
    370 static int
    371 save(Part *p, char *file)
    372 {
    373 	int fd;
    374 	char *cp;
    375 
    376 	Bterm(&out);
    377 	memset(&out, 0, sizeof(out));
    378 
    379 	fd = open(file, OWRITE);
    380 	if(fd < 0)
    381 		return -1;
    382 	seek(fd, 0, 2);
    383 	Binit(&out, fd, OWRITE);
    384 	cp = ctime(time(0));
    385 	cp[28] = 0;
    386 	Bprint(&out, "From virusfilter %s\n", cp);
    387 	writeheader(p, 0);
    388 	bodyoff = Boffset(&out);
    389 	passbody(p, 1);
    390 	Bprint(&out, "\n");
    391 	Bterm(&out);
    392 	close(fd);
    393 
    394 	memset(&out, 0, sizeof out);
    395 	Binit(&out, 1, OWRITE);
    396 	return 0;
    397 }
    398 
    399 /*
    400  * write to a file but save the fd for passbody.
    401  */
    402 static char*
    403 savetmp(Part *p)
    404 {
    405 	char buf[40], *name;
    406 	int fd;
    407 
    408 	strcpy(buf, "/var/tmp/vf.XXXXXXXXXXX");
    409 	if((fd = mkstemp(buf)) < 0){
    410 		fprint(2, "error creating temporary file: %r\n");
    411 		refuse();
    412 	}
    413 	name = buf;
    414 	close(fd);
    415 	if(save(p, name) < 0){
    416 		fprint(2, "error saving temporary file: %r\n");
    417 		refuse();
    418 	}
    419 	if(p->tmpbuf){
    420 		fprint(2, "error in savetmp: already have tmp file!\n");
    421 		refuse();
    422 	}
    423 	p->tmpbuf = Bopen(name, OREAD|ORCLOSE);
    424 	if(p->tmpbuf == nil){
    425 		fprint(2, "error reading tempoary file: %r\n");
    426 		refuse();
    427 	}
    428 	Bseek(p->tmpbuf, bodyoff, 0);
    429 	return strdup(name);
    430 }
    431 
    432 /*
    433  * Run the external checker to do content-based checks.
    434  */
    435 static int
    436 runchecker(Part *p)
    437 {
    438 	int pid;
    439 	char *name;
    440 	Waitmsg *w;
    441 	static char *val;
    442 
    443 	if(val == nil)
    444 		val = unsharp("#9/mail/lib/validateattachment");
    445 	if(val == nil || access(val, AEXEC) < 0)
    446 		return 0;
    447 
    448 	name = savetmp(p);
    449 	fprint(2, "run checker %s\n", name);
    450 	switch(pid = fork()){
    451 	case -1:
    452 		sysfatal("fork: %r");
    453 	case 0:
    454 		dup(2, 1);
    455 		execl(val, "validateattachment", name, nil);
    456 		_exits("exec failed");
    457 	}
    458 
    459 	/*
    460 	 * Okay to return on error - will let mail through but wrapped.
    461 	 */
    462 	w = wait();
    463 	remove(name);
    464 	if(w == nil){
    465 		syslog(0, "mail", "vf wait failed: %r");
    466 		return 0;
    467 	}
    468 	if(w->pid != pid){
    469 		syslog(0, "mail", "vf wrong pid %d != %d", w->pid, pid);
    470 		return 0;
    471 	}
    472 	if(p->filename)
    473 		name = s_to_c(p->filename);
    474 	if(atoi(w->msg) == Discard){
    475 		syslog(0, "mail", "vf validateattachment rejected %s", name);
    476 		refuse();
    477 	}
    478 	if(atoi(w->msg) == Accept){
    479 		syslog(0, "mail", "vf validateattachment accepted %s", name);
    480 		return 1;
    481 	}
    482 	free(w);
    483 	return 0;
    484 }
    485 
    486 /*
    487  *  emit a multipart Part that explains the problem
    488  */
    489 static Part*
    490 problemchild(Part *p)
    491 {
    492 	Part *np;
    493 	Hline *hl;
    494 	String *boundary;
    495 	char *cp;
    496 
    497 	/*
    498 	 * We don't know whether the attachment is okay.
    499 	 * If there's an external checker, let it have a crack at it.
    500 	 */
    501 	if(runchecker(p) > 0)
    502 		return p;
    503 
    504 	if(justreject)
    505 		return p;
    506 
    507 	syslog(0, "mail", "vf wrapped %s %s", p->type?s_to_c(p->type):"?",
    508 		p->filename?s_to_c(p->filename):"?");
    509 
    510 	boundary = mkboundary();
    511 	/* print out non-mime headers */
    512 	for(hl = p->hl; hl != nil; hl = hl->next)
    513 		if(cistrncmp(s_to_c(hl->s), "content-", 8) != 0)
    514 			Bprint(&out, "%s", s_to_c(hl->s));
    515 
    516 	/* add in our own multipart headers and message */
    517 	Bprint(&out, "Content-Type: multipart/mixed;\n");
    518 	Bprint(&out, "\tboundary=\"%s\"\n", s_to_c(boundary));
    519 	Bprint(&out, "Content-Disposition: inline\n");
    520 	Bprint(&out, "\n");
    521 	Bprint(&out, "This is a multi-part message in MIME format.\n");
    522 	Bprint(&out, "--%s\n", s_to_c(boundary));
    523 	Bprint(&out, "Content-Disposition: inline\n");
    524 	Bprint(&out, "Content-Type: text/plain; charset=\"US-ASCII\"\n");
    525 	Bprint(&out, "Content-Transfer-Encoding: 7bit\n");
    526 	Bprint(&out, "\n");
    527 	Bprint(&out, "from postmaster@%s:\n", sysname());
    528 	Bprint(&out, "The following attachment had content that we can't\n");
    529 	Bprint(&out, "prove to be harmless.  To avoid possible automatic\n");
    530 	Bprint(&out, "execution, we changed the content headers.\n");
    531 	Bprint(&out, "The original header was:\n\n");
    532 
    533 	/* print out original header lines */
    534 	for(hl = p->hl; hl != nil; hl = hl->next)
    535 		if(cistrncmp(s_to_c(hl->s), "content-", 8) == 0)
    536 			Bprint(&out, "\t%s", s_to_c(hl->s));
    537 	Bprint(&out, "--%s\n", s_to_c(boundary));
    538 
    539 	/* change file name */
    540 	if(p->filename)
    541 		s_append(p->filename, ".suspect");
    542 	else
    543 		p->filename = s_copy("file.suspect");
    544 
    545 	/* print out new header */
    546 	Bprint(&out, "Content-Type: application/octet-stream\n");
    547 	Bprint(&out, "Content-Disposition: attachment; filename=\"%s\"\n", s_to_c(p->filename));
    548 	switch(p->encoding){
    549 	case Enone:
    550 		break;
    551 	case Ebase64:
    552 		Bprint(&out, "Content-Transfer-Encoding: base64\n");
    553 		break;
    554 	case Equoted:
    555 		Bprint(&out, "Content-Transfer-Encoding: quoted-printable\n");
    556 		break;
    557 	}
    558 
    559 	/* pass the body */
    560 	np = passbody(p, 0);
    561 
    562 	/* add the new boundary and the original terminator */
    563 	Bprint(&out, "--%s--\n", s_to_c(boundary));
    564 	if(np && np->boundary){
    565 		cp = Brdline(&in, '\n');
    566 		Bwrite(&out, cp, Blinelen(&in));
    567 	}
    568 
    569 	return np;
    570 }
    571 
    572 static int
    573 isattribute(char **pp, char *attr)
    574 {
    575 	char *p;
    576 	int n;
    577 
    578 	n = strlen(attr);
    579 	p = *pp;
    580 	if(cistrncmp(p, attr, n) != 0)
    581 		return 0;
    582 	p += n;
    583 	while(*p == ' ')
    584 		p++;
    585 	if(*p++ != '=')
    586 		return 0;
    587 	while(*p == ' ')
    588 		p++;
    589 	*pp = p;
    590 	return 1;
    591 }
    592 
    593 /*
    594  *  parse content type header
    595  */
    596 static void
    597 ctype(Part *p, Hdef *h, char *cp)
    598 {
    599 	String *s;
    600 
    601 	cp += h->len;
    602 	cp = skipwhite(cp);
    603 
    604 	p->type = s_new();
    605 	cp = getstring(cp, p->type, 1);
    606 	if(badtype(s_to_c(p->type)))
    607 		p->badtype = 1;
    608 
    609 	while(*cp){
    610 		if(isattribute(&cp, "boundary")){
    611 			s = s_new();
    612 			cp = getstring(cp, s, 0);
    613 			p->boundary = s_reset(p->boundary);
    614 			s_append(p->boundary, "--");
    615 			s_append(p->boundary, s_to_c(s));
    616 			p->blen = s_len(p->boundary);
    617 			s_free(s);
    618 		} else if(cistrncmp(cp, "multipart", 9) == 0){
    619 			/*
    620 			 *  the first unbounded part of a multipart message,
    621 			 *  the preamble, is not displayed or saved
    622 			 */
    623 		} else if(isattribute(&cp, "name")){
    624 			setfilename(p, cp);
    625 		} else if(isattribute(&cp, "charset")){
    626 			if(p->charset == nil)
    627 				p->charset = s_new();
    628 			cp = getstring(cp, s_reset(p->charset), 0);
    629 		}
    630 
    631 		cp = skiptosemi(cp);
    632 	}
    633 }
    634 
    635 /*
    636  *  parse content encoding header
    637  */
    638 static void
    639 cencoding(Part *m, Hdef *h, char *p)
    640 {
    641 	p += h->len;
    642 	p = skipwhite(p);
    643 	if(cistrncmp(p, "base64", 6) == 0)
    644 		m->encoding = Ebase64;
    645 	else if(cistrncmp(p, "quoted-printable", 16) == 0)
    646 		m->encoding = Equoted;
    647 }
    648 
    649 /*
    650  *  parse content disposition header
    651  */
    652 static void
    653 cdisposition(Part *p, Hdef *h, char *cp)
    654 {
    655 	cp += h->len;
    656 	cp = skipwhite(cp);
    657 	while(*cp){
    658 		if(cistrncmp(cp, "inline", 6) == 0){
    659 			p->disposition = Dinline;
    660 		} else if(cistrncmp(cp, "attachment", 10) == 0){
    661 			p->disposition = Dfile;
    662 		} else if(cistrncmp(cp, "filename=", 9) == 0){
    663 			cp += 9;
    664 			setfilename(p, cp);
    665 		}
    666 		cp = skiptosemi(cp);
    667 	}
    668 
    669 }
    670 
    671 static void
    672 setfilename(Part *p, char *name)
    673 {
    674 	if(p->filename == nil)
    675 		p->filename = s_new();
    676 	getstring(name, s_reset(p->filename), 0);
    677 	p->filename = tokenconvert(p->filename);
    678 	p->badfile = badfile(s_to_c(p->filename));
    679 }
    680 
    681 static char*
    682 skipwhite(char *p)
    683 {
    684 	while(isspace(*p))
    685 		p++;
    686 	return p;
    687 }
    688 
    689 static char*
    690 skiptosemi(char *p)
    691 {
    692 	while(*p && *p != ';')
    693 		p++;
    694 	while(*p == ';' || isspace(*p))
    695 		p++;
    696 	return p;
    697 }
    698 
    699 /*
    700  *  parse a possibly "'d string from a header.  A
    701  *  ';' terminates the string.
    702  */
    703 static char*
    704 getstring(char *p, String *s, int dolower)
    705 {
    706 	s = s_reset(s);
    707 	p = skipwhite(p);
    708 	if(*p == '"'){
    709 		p++;
    710 		for(;*p && *p != '"'; p++)
    711 			if(dolower)
    712 				s_putc(s, tolower(*p));
    713 			else
    714 				s_putc(s, *p);
    715 		if(*p == '"')
    716 			p++;
    717 		s_terminate(s);
    718 
    719 		return p;
    720 	}
    721 
    722 	for(; *p && !isspace(*p) && *p != ';'; p++)
    723 		if(dolower)
    724 			s_putc(s, tolower(*p));
    725 		else
    726 			s_putc(s, *p);
    727 	s_terminate(s);
    728 
    729 	return p;
    730 }
    731 
    732 static void
    733 init_hdefs(void)
    734 {
    735 	Hdef *hd;
    736 	static int already;
    737 
    738 	if(already)
    739 		return;
    740 	already = 1;
    741 
    742 	for(hd = hdefs; hd->type != nil; hd++)
    743 		hd->len = strlen(hd->type);
    744 }
    745 
    746 /*
    747  *  create a new boundary
    748  */
    749 static String*
    750 mkboundary(void)
    751 {
    752 	char buf[32];
    753 	int i;
    754 	static int already;
    755 
    756 	if(already == 0){
    757 		srand((time(0)<<16)|getpid());
    758 		already = 1;
    759 	}
    760 	strcpy(buf, "upas-");
    761 	for(i = 5; i < sizeof(buf)-1; i++)
    762 		buf[i] = 'a' + nrand(26);
    763 	buf[i] = 0;
    764 	return s_copy(buf);
    765 }
    766 
    767 /*
    768  *  skip blank lines till header
    769  */
    770 static void
    771 passnotheader(void)
    772 {
    773 	char *cp;
    774 	int i, n;
    775 
    776 	while((cp = Brdline(&in, '\n')) != nil){
    777 		n = Blinelen(&in);
    778 		for(i = 0; i < n-1; i++)
    779 			if(cp[i] != ' ' && cp[i] != '\t' && cp[i] != '\r'){
    780 				Bseek(&in, -n, 1);
    781 				return;
    782 			}
    783 		Bwrite(&out, cp, n);
    784 	}
    785 }
    786 
    787 /*
    788  *  pass unix header lines
    789  */
    790 static void
    791 passunixheader(void)
    792 {
    793 	char *p;
    794 	int n;
    795 
    796 	while((p = Brdline(&in, '\n')) != nil){
    797 		n = Blinelen(&in);
    798 		if(strncmp(p, "From ", 5) != 0){
    799 			Bseek(&in, -n, 1);
    800 			break;
    801 		}
    802 		Bwrite(&out, p, n);
    803 	}
    804 }
    805 
    806 /*
    807  *  Read mime types
    808  */
    809 static void
    810 readmtypes(void)
    811 {
    812 	Biobuf *b;
    813 	char *p;
    814 	char *f[6];
    815 	Mtype *m;
    816 	Mtype **l;
    817 
    818 	b = Bopen(unsharp("#9/lib/mimetype"), OREAD);
    819 	if(b == nil)
    820 		return;
    821 
    822 	l = &mtypes;
    823 	while((p = Brdline(b, '\n')) != nil){
    824 		if(*p == '#')
    825 			continue;
    826 		p[Blinelen(b)-1] = 0;
    827 		if(tokenize(p, f, nelem(f)) < 5)
    828 			continue;
    829 		m = mallocz(sizeof *m, 1);
    830 		if(m == nil)
    831 			goto err;
    832 		m->ext = strdup(f[0]);
    833 		if(m->ext == 0)
    834 			goto err;
    835 		m->gtype = strdup(f[1]);
    836 		if(m->gtype == 0)
    837 			goto err;
    838 		m->stype = strdup(f[2]);
    839 		if(m->stype == 0)
    840 			goto err;
    841 		m->class = *f[4];
    842 		*l = m;
    843 		l = &(m->next);
    844 	}
    845 	Bterm(b);
    846 	return;
    847 err:
    848 	if(m == nil)
    849 		return;
    850 	free(m->ext);
    851 	free(m->gtype);
    852 	free(m->stype);
    853 	free(m);
    854 	Bterm(b);
    855 }
    856 
    857 /*
    858  *  if the class is 'm' or 'y', accept it
    859  *  if the class is 'p' check a previous extension
    860  *  otherwise, filename is bad
    861  */
    862 static int
    863 badfile(char *name)
    864 {
    865 	char *p;
    866 	Mtype *m;
    867 	int rv;
    868 
    869 	p = strrchr(name, '.');
    870 	if(p == nil)
    871 		return 0;
    872 
    873 	for(m = mtypes; m != nil; m = m->next)
    874 		if(cistrcmp(p, m->ext) == 0){
    875 			switch(m->class){
    876 			case 'm':
    877 			case 'y':
    878 				return 0;
    879 			case 'p':
    880 				*p = 0;
    881 				rv = badfile(name);
    882 				*p = '.';
    883 				return rv;
    884 			case 'r':
    885 				return 2;
    886 			}
    887 		}
    888 	return 1;
    889 }
    890 
    891 /*
    892  *  if the class is 'm' or 'y' or 'p', accept it
    893  *  otherwise, filename is bad
    894  */
    895 static int
    896 badtype(char *type)
    897 {
    898 	Mtype *m;
    899 	char *s, *fix;
    900 	int rv = 1;
    901 
    902 	fix = s = strchr(type, '/');
    903 	if(s != nil)
    904 		*s++ = 0;
    905 	else
    906 		s = "-";
    907 
    908 	for(m = mtypes; m != nil; m = m->next){
    909 		if(cistrcmp(type, m->gtype) != 0)
    910 			continue;
    911 		if(cistrcmp(s, m->stype) != 0)
    912 			continue;
    913 		switch(m->class){
    914 		case 'y':
    915 		case 'p':
    916 		case 'm':
    917 			rv = 0;
    918 			break;
    919 		}
    920 		break;
    921 	}
    922 
    923 	if(fix != nil)
    924 		*fix = '/';
    925 	return rv;
    926 }
    927 
    928 /* rfc2047 non-ascii */
    929 typedef struct Charset Charset;
    930 struct Charset {
    931 	char *name;
    932 	int len;
    933 	int convert;
    934 } charsets[] =
    935 {
    936 	{ "us-ascii",		8,	1, },
    937 	{ "utf-8",		5,	0, },
    938 	{ "iso-8859-1",		10,	1, }
    939 };
    940 
    941 /*
    942  *  convert to UTF if need be
    943  */
    944 static String*
    945 tokenconvert(String *t)
    946 {
    947 	String *s;
    948 	char decoded[1024];
    949 	char utfbuf[2*1024];
    950 	int i, len;
    951 	char *e;
    952 	char *token;
    953 
    954 	token = s_to_c(t);
    955 	len = s_len(t);
    956 
    957 	if(token[0] != '=' || token[1] != '?' ||
    958 	   token[len-2] != '?' || token[len-1] != '=')
    959 		goto err;
    960 	e = token+len-2;
    961 	token += 2;
    962 
    963 	/* bail if we don't understand the character set */
    964 	for(i = 0; i < nelem(charsets); i++)
    965 		if(cistrncmp(charsets[i].name, token, charsets[i].len) == 0)
    966 		if(token[charsets[i].len] == '?'){
    967 			token += charsets[i].len + 1;
    968 			break;
    969 		}
    970 	if(i >= nelem(charsets))
    971 		goto err;
    972 
    973 	/* bail if it doesn't fit  */
    974 	if(strlen(token) > sizeof(decoded)-1)
    975 		goto err;
    976 
    977 	/* bail if we don't understand the encoding */
    978 	if(cistrncmp(token, "b?", 2) == 0){
    979 		token += 2;
    980 		len = dec64((uchar*)decoded, sizeof(decoded), token, e-token);
    981 		decoded[len] = 0;
    982 	} else if(cistrncmp(token, "q?", 2) == 0){
    983 		token += 2;
    984 		len = decquoted(decoded, token, e);
    985 		if(len > 0 && decoded[len-1] == '\n')
    986 			len--;
    987 		decoded[len] = 0;
    988 	} else
    989 		goto err;
    990 
    991 	s = nil;
    992 	switch(charsets[i].convert){
    993 	case 0:
    994 		s = s_copy(decoded);
    995 		break;
    996 	case 1:
    997 		s = s_new();
    998 		latin1toutf(utfbuf, decoded, decoded+len);
    999 		s_append(s, utfbuf);
   1000 		break;
   1001 	}
   1002 
   1003 	return s;
   1004 err:
   1005 	return s_clone(t);
   1006 }
   1007 
   1008 /*
   1009  *  decode quoted
   1010  */
   1011 enum
   1012 {
   1013 	Self=	1,
   1014 	Hex=	2
   1015 };
   1016 uchar	tableqp[256];
   1017 
   1018 static void
   1019 initquoted(void)
   1020 {
   1021 	int c;
   1022 
   1023 	memset(tableqp, 0, 256);
   1024 	for(c = ' '; c <= '<'; c++)
   1025 		tableqp[c] = Self;
   1026 	for(c = '>'; c <= '~'; c++)
   1027 		tableqp[c] = Self;
   1028 	tableqp['\t'] = Self;
   1029 	tableqp['='] = Hex;
   1030 }
   1031 
   1032 static int
   1033 hex2int(int x)
   1034 {
   1035 	if(x >= '0' && x <= '9')
   1036 		return x - '0';
   1037 	if(x >= 'A' && x <= 'F')
   1038 		return (x - 'A') + 10;
   1039 	if(x >= 'a' && x <= 'f')
   1040 		return (x - 'a') + 10;
   1041 	return 0;
   1042 }
   1043 
   1044 static char*
   1045 decquotedline(char *out, char *in, char *e)
   1046 {
   1047 	int c, soft;
   1048 
   1049 	/* dump trailing white space */
   1050 	while(e >= in && (*e == ' ' || *e == '\t' || *e == '\r' || *e == '\n'))
   1051 		e--;
   1052 
   1053 	/* trailing '=' means no newline */
   1054 	if(*e == '='){
   1055 		soft = 1;
   1056 		e--;
   1057 	} else
   1058 		soft = 0;
   1059 
   1060 	while(in <= e){
   1061 		c = (*in++) & 0xff;
   1062 		switch(tableqp[c]){
   1063 		case Self:
   1064 			*out++ = c;
   1065 			break;
   1066 		case Hex:
   1067 			c = hex2int(*in++)<<4;
   1068 			c |= hex2int(*in++);
   1069 			*out++ = c;
   1070 			break;
   1071 		}
   1072 	}
   1073 	if(!soft)
   1074 		*out++ = '\n';
   1075 	*out = 0;
   1076 
   1077 	return out;
   1078 }
   1079 
   1080 static int
   1081 decquoted(char *out, char *in, char *e)
   1082 {
   1083 	char *p, *nl;
   1084 
   1085 	if(tableqp[' '] == 0)
   1086 		initquoted();
   1087 
   1088 	p = out;
   1089 	while((nl = strchr(in, '\n')) != nil && nl < e){
   1090 		p = decquotedline(p, in, nl);
   1091 		in = nl + 1;
   1092 	}
   1093 	if(in < e)
   1094 		p = decquotedline(p, in, e-1);
   1095 
   1096 	/* make sure we end with a new line */
   1097 	if(*(p-1) != '\n'){
   1098 		*p++ = '\n';
   1099 		*p = 0;
   1100 	}
   1101 
   1102 	return p - out;
   1103 }
   1104 
   1105 /* translate latin1 directly since it fits neatly in utf */
   1106 static int
   1107 latin1toutf(char *out, char *in, char *e)
   1108 {
   1109 	Rune r;
   1110 	char *p;
   1111 
   1112 	p = out;
   1113 	for(; in < e; in++){
   1114 		r = (*in) & 0xff;
   1115 		p += runetochar(p, &r);
   1116 	}
   1117 	*p = 0;
   1118 	return p - out;
   1119 }