plan9port

fork of plan9port with libvec, libstr and libsdb
Log | Files | Refs | README | LICENSE

decode.c (5018B)


      1 /* Quick and dirty RFC 2047 */
      2 
      3 #include "a.h"
      4 
      5 static int
      6 unhex1(char c)
      7 {
      8 	if('0' <= c && c <= '9')
      9 		return c-'0';
     10 	if('a' <= c && c <= 'f')
     11 		return c-'a'+10;
     12 	if('A' <= c && c <= 'F')
     13 		return c-'A'+10;
     14 	return 15;
     15 }
     16 
     17 static int
     18 unhex(char *s)
     19 {
     20 	return unhex1(s[0])*16+unhex1(s[1]);
     21 }
     22 
     23 int
     24 _decqp(uchar *out, int lim, char *in, int n, int underscores)
     25 {
     26 	char *p, *ep;
     27 	uchar *eout, *out0;
     28 
     29 	out0 = out;
     30 	eout = out+lim;
     31 	for(p=in, ep=in+n; p<ep && out<eout; ){
     32 		if(underscores && *p == '_'){
     33 			*out++ = ' ';
     34 			p++;
     35 		}
     36 		else if(*p == '='){
     37 			if(p+1 >= ep)
     38 				break;
     39 			if(*(p+1) == '\n'){
     40 				p += 2;
     41 				continue;
     42 			}
     43 			if(p+3 > ep)
     44 				break;
     45 			*out++ = unhex(p+1);
     46 			p += 3;
     47 		}else
     48 			*out++ = *p++;
     49 	}
     50 	return out-out0;
     51 }
     52 
     53 int
     54 decqp(uchar *out, int lim, char *in, int n)
     55 {
     56 	return _decqp(out, lim, in, n, 0);
     57 }
     58 
     59 char*
     60 decode(int kind, char *s, int *len)
     61 {
     62 	char *t;
     63 	int l;
     64 
     65 	if(s == nil)
     66 		return s;
     67 	switch(kind){
     68 	case QuotedPrintable:
     69 	case QuotedPrintableU:
     70 		l = strlen(s)+1;
     71 		t = emalloc(l);
     72 		l = _decqp((uchar*)t, l, s, l-1, kind==QuotedPrintableU);
     73 		*len = l;
     74 		t[l] = 0;
     75 		return t;
     76 
     77 	case Base64:
     78 		l = strlen(s)+1;
     79 		t = emalloc(l);
     80 		l = dec64((uchar*)t, l, s, l-1);
     81 		*len = l;
     82 		t[l] = 0;
     83 		return t;
     84 
     85 	default:
     86 		*len = strlen(s);
     87 		return estrdup(s);
     88 	}
     89 }
     90 
     91 struct {
     92 	char *mime;
     93 	char *tcs;
     94 } tcstab[] = {
     95 	"iso-8859-2",		"8859-2",
     96 	"iso-8859-3",		"8859-3",
     97 	"iso-8859-4",		"8859-4",
     98 	"iso-8859-5",		"8859-5",
     99 	"iso-8859-6",		"8859-6",
    100 	"iso-8859-7",		"8859-7",
    101 	"iso-8859-8",		"8859-8",
    102 	"iso-8859-9",		"8859-9",
    103 	"iso-8859-10",	"8859-10",
    104 	"iso-8859-15",	"8859-15",
    105 	"big5",			"big5",
    106 	"iso-2022-jp",	"jis-kanji",
    107 	"windows-1250",	"windows-1250",
    108 	"windows-1251",	"windows-1251",
    109 	"windows-1252",	"windows-1252",
    110 	"windows-1253",	"windows-1253",
    111 	"windows-1254",	"windows-1254",
    112 	"windows-1255",	"windows-1255",
    113 	"windows-1256",	"windows-1256",
    114 	"windows-1257",	"windows-1257",
    115 	"windows-1258",	"windows-1258",
    116 	"koi8-r",			"koi8"
    117 };
    118 
    119 typedef struct Writeargs Writeargs;
    120 struct Writeargs
    121 {
    122 	int fd;
    123 	char *s;
    124 };
    125 
    126 static void
    127 twriter(void *v)
    128 {
    129 	Writeargs *w;
    130 
    131 	w = v;
    132 	write(w->fd, w->s, strlen(w->s));
    133 	close(w->fd);
    134 	free(w->s);
    135 	free(w);
    136 }
    137 
    138 char*
    139 tcs(char *charset, char *s)
    140 {
    141 	char *buf;
    142 	int i, n, nbuf;
    143 	int fd[3], p[2], pp[2];
    144 	uchar *us;
    145 	char *t, *u;
    146 	Rune r;
    147 	Writeargs *w;
    148 
    149 	if(s == nil || charset == nil || *s == 0)
    150 		return s;
    151 
    152 	if(cistrcmp(charset, "utf-8") == 0)
    153 		return s;
    154 	if(cistrcmp(charset, "iso-8859-1") == 0 || cistrcmp(charset, "us-ascii") == 0){
    155 latin1:
    156 		n = 0;
    157 		for(us=(uchar*)s; *us; us++)
    158 			n += runelen(*us);
    159 		n++;
    160 		t = emalloc(n);
    161 		for(us=(uchar*)s, u=t; *us; us++){
    162 			r = *us;
    163 			u += runetochar(u, &r);
    164 		}
    165 		*u = 0;
    166 		free(s);
    167 		return t;
    168 	}
    169 	for(i=0; i<nelem(tcstab); i++)
    170 		if(cistrcmp(charset, tcstab[i].mime) == 0)
    171 			goto tcs;
    172 	goto latin1;
    173 
    174 tcs:
    175 	if(pipe(p) < 0 || pipe(pp) < 0)
    176 		sysfatal("pipe: %r");
    177 	fd[0] = p[0];
    178 	fd[1] = pp[0];
    179 	fd[2] = dup(2, -1);
    180 	if(threadspawnl(fd, "tcs", "tcs", "-f", tcstab[i].tcs, nil) < 0){
    181 		close(p[0]);
    182 		close(p[1]);
    183 		close(pp[0]);
    184 		close(pp[1]);
    185 		close(fd[2]);
    186 		goto latin1;
    187 	}
    188 	close(p[0]);
    189 	close(pp[0]);
    190 
    191 	nbuf = UTFmax*strlen(s)+100;	/* just a guess at worst case */
    192 	buf = emalloc(nbuf);
    193 
    194 	w = emalloc(sizeof *w);
    195 	w->fd = p[1];
    196 	w->s = estrdup(s);
    197 	proccreate(twriter, w, STACK);
    198 
    199 	n = readn(pp[1], buf, nbuf-1);
    200 	close(pp[1]);
    201 	if(n <= 0){
    202 		free(buf);
    203 		goto latin1;
    204 	}
    205 	buf[n] = 0;
    206 	free(s);
    207 	s = estrdup(buf);
    208 	free(buf);
    209 	return s;
    210 }
    211 
    212 char*
    213 unrfc2047(char *s)
    214 {
    215 	char *p, *q, *t, *u, *v;
    216 	int len;
    217 	Rune r;
    218 	Fmt fmt;
    219 
    220 	if(s == nil)
    221 		return nil;
    222 
    223 	if(strstr(s, "=?") == nil)
    224 		return s;
    225 
    226 	fmtstrinit(&fmt);
    227 	for(p=s; *p; ){
    228 		/* =?charset?e?text?= */
    229 		if(*p=='=' && *(p+1)=='?'){
    230 			p += 2;
    231 			q = strchr(p, '?');
    232 			if(q == nil)
    233 				goto emit;
    234 			q++;
    235 			if(*q == '?' || *(q+1) != '?')
    236 				goto emit;
    237 			t = q+2;
    238 			u = strchr(t, '?');
    239 			if(u == nil || *(u+1) != '=')
    240 				goto emit;
    241 			switch(*q){
    242 			case 'q':
    243 			case 'Q':
    244 				*u = 0;
    245 				v = decode(QuotedPrintableU, t, &len);
    246 				break;
    247 			case 'b':
    248 			case 'B':
    249 				*u = 0;
    250 				v = decode(Base64, t, &len);
    251 				break;
    252 			default:
    253 				goto emit;
    254 			}
    255 			*(q-1) = 0;
    256 			v = tcs(p, v);
    257 			fmtstrcpy(&fmt, v);
    258 			free(v);
    259 			p = u+2;
    260 		}
    261 	emit:
    262 		p += chartorune(&r, p);
    263 		fmtrune(&fmt, r);
    264 	}
    265 	p = fmtstrflush(&fmt);
    266 	if(p == nil)
    267 		sysfatal("out of memory");
    268 	free(s);
    269 	return p;
    270 }
    271 
    272 #ifdef TEST
    273 char *test[] =
    274 {
    275 	"hello world",
    276 	"hello =?iso-8859-1?q?this is some text?=",
    277 	"=?US-ASCII?Q?Keith_Moore?=",
    278 	"=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=",
    279 	"=?ISO-8859-1?Q?Andr=E9?= Pirard",
    280 	"=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=",
    281 	"=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=",
    282 	"=?ISO-8859-1?Q?Olle_J=E4rnefors?=",
    283 	"=?iso-2022-jp?B?GyRCTTVKISRKP006SiRyS34kPyQ3JEZKcz03JCIkahsoQg==?=",
    284 	"=?UTF-8?B?Ik5pbHMgTy4gU2Vsw6VzZGFsIg==?="
    285 };
    286 
    287 void
    288 threadmain(int argc, char **argv)
    289 {
    290 	int i;
    291 
    292 	for(i=0; i<nelem(test); i++)
    293 		print("%s\n\t%s\n", test[i], unrfc2047(estrdup(test[i])));
    294 	threadexitsall(0);
    295 }
    296 
    297 #endif