decode.c (5018B)
1 /* Quick and dirty RFC 2047 */ 2 3 #include "a.h" 4 5 static int 6 unhex1(char c) 7 { 8 if('0' <= c && c <= '9') 9 return c-'0'; 10 if('a' <= c && c <= 'f') 11 return c-'a'+10; 12 if('A' <= c && c <= 'F') 13 return c-'A'+10; 14 return 15; 15 } 16 17 static int 18 unhex(char *s) 19 { 20 return unhex1(s[0])*16+unhex1(s[1]); 21 } 22 23 int 24 _decqp(uchar *out, int lim, char *in, int n, int underscores) 25 { 26 char *p, *ep; 27 uchar *eout, *out0; 28 29 out0 = out; 30 eout = out+lim; 31 for(p=in, ep=in+n; p<ep && out<eout; ){ 32 if(underscores && *p == '_'){ 33 *out++ = ' '; 34 p++; 35 } 36 else if(*p == '='){ 37 if(p+1 >= ep) 38 break; 39 if(*(p+1) == '\n'){ 40 p += 2; 41 continue; 42 } 43 if(p+3 > ep) 44 break; 45 *out++ = unhex(p+1); 46 p += 3; 47 }else 48 *out++ = *p++; 49 } 50 return out-out0; 51 } 52 53 int 54 decqp(uchar *out, int lim, char *in, int n) 55 { 56 return _decqp(out, lim, in, n, 0); 57 } 58 59 char* 60 decode(int kind, char *s, int *len) 61 { 62 char *t; 63 int l; 64 65 if(s == nil) 66 return s; 67 switch(kind){ 68 case QuotedPrintable: 69 case QuotedPrintableU: 70 l = strlen(s)+1; 71 t = emalloc(l); 72 l = _decqp((uchar*)t, l, s, l-1, kind==QuotedPrintableU); 73 *len = l; 74 t[l] = 0; 75 return t; 76 77 case Base64: 78 l = strlen(s)+1; 79 t = emalloc(l); 80 l = dec64((uchar*)t, l, s, l-1); 81 *len = l; 82 t[l] = 0; 83 return t; 84 85 default: 86 *len = strlen(s); 87 return estrdup(s); 88 } 89 } 90 91 struct { 92 char *mime; 93 char *tcs; 94 } tcstab[] = { 95 "iso-8859-2", "8859-2", 96 "iso-8859-3", "8859-3", 97 "iso-8859-4", "8859-4", 98 "iso-8859-5", "8859-5", 99 "iso-8859-6", "8859-6", 100 "iso-8859-7", "8859-7", 101 "iso-8859-8", "8859-8", 102 "iso-8859-9", "8859-9", 103 "iso-8859-10", "8859-10", 104 "iso-8859-15", "8859-15", 105 "big5", "big5", 106 "iso-2022-jp", "jis-kanji", 107 "windows-1250", "windows-1250", 108 "windows-1251", "windows-1251", 109 "windows-1252", "windows-1252", 110 "windows-1253", "windows-1253", 111 "windows-1254", "windows-1254", 112 "windows-1255", "windows-1255", 113 "windows-1256", "windows-1256", 114 "windows-1257", "windows-1257", 115 "windows-1258", "windows-1258", 116 "koi8-r", "koi8" 117 }; 118 119 typedef struct Writeargs Writeargs; 120 struct Writeargs 121 { 122 int fd; 123 char *s; 124 }; 125 126 static void 127 twriter(void *v) 128 { 129 Writeargs *w; 130 131 w = v; 132 write(w->fd, w->s, strlen(w->s)); 133 close(w->fd); 134 free(w->s); 135 free(w); 136 } 137 138 char* 139 tcs(char *charset, char *s) 140 { 141 char *buf; 142 int i, n, nbuf; 143 int fd[3], p[2], pp[2]; 144 uchar *us; 145 char *t, *u; 146 Rune r; 147 Writeargs *w; 148 149 if(s == nil || charset == nil || *s == 0) 150 return s; 151 152 if(cistrcmp(charset, "utf-8") == 0) 153 return s; 154 if(cistrcmp(charset, "iso-8859-1") == 0 || cistrcmp(charset, "us-ascii") == 0){ 155 latin1: 156 n = 0; 157 for(us=(uchar*)s; *us; us++) 158 n += runelen(*us); 159 n++; 160 t = emalloc(n); 161 for(us=(uchar*)s, u=t; *us; us++){ 162 r = *us; 163 u += runetochar(u, &r); 164 } 165 *u = 0; 166 free(s); 167 return t; 168 } 169 for(i=0; i<nelem(tcstab); i++) 170 if(cistrcmp(charset, tcstab[i].mime) == 0) 171 goto tcs; 172 goto latin1; 173 174 tcs: 175 if(pipe(p) < 0 || pipe(pp) < 0) 176 sysfatal("pipe: %r"); 177 fd[0] = p[0]; 178 fd[1] = pp[0]; 179 fd[2] = dup(2, -1); 180 if(threadspawnl(fd, "tcs", "tcs", "-f", tcstab[i].tcs, nil) < 0){ 181 close(p[0]); 182 close(p[1]); 183 close(pp[0]); 184 close(pp[1]); 185 close(fd[2]); 186 goto latin1; 187 } 188 close(p[0]); 189 close(pp[0]); 190 191 nbuf = UTFmax*strlen(s)+100; /* just a guess at worst case */ 192 buf = emalloc(nbuf); 193 194 w = emalloc(sizeof *w); 195 w->fd = p[1]; 196 w->s = estrdup(s); 197 proccreate(twriter, w, STACK); 198 199 n = readn(pp[1], buf, nbuf-1); 200 close(pp[1]); 201 if(n <= 0){ 202 free(buf); 203 goto latin1; 204 } 205 buf[n] = 0; 206 free(s); 207 s = estrdup(buf); 208 free(buf); 209 return s; 210 } 211 212 char* 213 unrfc2047(char *s) 214 { 215 char *p, *q, *t, *u, *v; 216 int len; 217 Rune r; 218 Fmt fmt; 219 220 if(s == nil) 221 return nil; 222 223 if(strstr(s, "=?") == nil) 224 return s; 225 226 fmtstrinit(&fmt); 227 for(p=s; *p; ){ 228 /* =?charset?e?text?= */ 229 if(*p=='=' && *(p+1)=='?'){ 230 p += 2; 231 q = strchr(p, '?'); 232 if(q == nil) 233 goto emit; 234 q++; 235 if(*q == '?' || *(q+1) != '?') 236 goto emit; 237 t = q+2; 238 u = strchr(t, '?'); 239 if(u == nil || *(u+1) != '=') 240 goto emit; 241 switch(*q){ 242 case 'q': 243 case 'Q': 244 *u = 0; 245 v = decode(QuotedPrintableU, t, &len); 246 break; 247 case 'b': 248 case 'B': 249 *u = 0; 250 v = decode(Base64, t, &len); 251 break; 252 default: 253 goto emit; 254 } 255 *(q-1) = 0; 256 v = tcs(p, v); 257 fmtstrcpy(&fmt, v); 258 free(v); 259 p = u+2; 260 } 261 emit: 262 p += chartorune(&r, p); 263 fmtrune(&fmt, r); 264 } 265 p = fmtstrflush(&fmt); 266 if(p == nil) 267 sysfatal("out of memory"); 268 free(s); 269 return p; 270 } 271 272 #ifdef TEST 273 char *test[] = 274 { 275 "hello world", 276 "hello =?iso-8859-1?q?this is some text?=", 277 "=?US-ASCII?Q?Keith_Moore?=", 278 "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=", 279 "=?ISO-8859-1?Q?Andr=E9?= Pirard", 280 "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=", 281 "=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=", 282 "=?ISO-8859-1?Q?Olle_J=E4rnefors?=", 283 "=?iso-2022-jp?B?GyRCTTVKISRKP006SiRyS34kPyQ3JEZKcz03JCIkahsoQg==?=", 284 "=?UTF-8?B?Ik5pbHMgTy4gU2Vsw6VzZGFsIg==?=" 285 }; 286 287 void 288 threadmain(int argc, char **argv) 289 { 290 int i; 291 292 for(i=0; i<nelem(test); i++) 293 print("%s\n\t%s\n", test[i], unrfc2047(estrdup(test[i]))); 294 threadexitsall(0); 295 } 296 297 #endif