plan9port

fork of plan9port with libvec, libstr and libsdb
Log | Files | Refs | README | LICENSE

hget.c (26037B)


      1 #include <u.h>
      2 #include <libc.h>
      3 #include <ctype.h>
      4 #include <bio.h>
      5 #include <ip.h>
      6 #include <libsec.h>
      7 #include <auth.h>
      8 #include <thread.h>
      9 
     10 typedef struct URL URL;
     11 struct URL
     12 {
     13 	int	method;
     14 	char	*host;
     15 	char	*port;
     16 	char	*page;
     17 	char	*etag;
     18 	char	*redirect;
     19 	char	*postbody;
     20 	char	*cred;
     21 	long	mtime;
     22 };
     23 
     24 typedef struct Range Range;
     25 struct Range
     26 {
     27 	long	start;	/* only 2 gig supported, tdb */
     28 	long	end;
     29 };
     30 
     31 typedef struct Out Out;
     32 struct Out
     33 {
     34 	int fd;
     35 	int offset;				/* notional current offset in output */
     36 	int written;			/* number of bytes successfully transferred to output */
     37 	DigestState *curr;		/* digest state up to offset (if known) */
     38 	DigestState *hiwat;		/* digest state of all bytes written */
     39 };
     40 
     41 enum
     42 {
     43 	Http,
     44 	Https,
     45 	Ftp,
     46 	Other
     47 };
     48 
     49 enum
     50 {
     51 	Eof = 0,
     52 	Error = -1,
     53 	Server = -2,
     54 	Changed = -3
     55 };
     56 
     57 int debug;
     58 char *ofile;
     59 
     60 
     61 int	doftp(URL*, URL*, Range*, Out*, long);
     62 int	dohttp(URL*, URL*,  Range*, Out*, long);
     63 int	crackurl(URL*, char*);
     64 Range*	crackrange(char*);
     65 int	getheader(int, char*, int);
     66 int	httpheaders(int, int, URL*, Range*);
     67 int	httprcode(int);
     68 int	cistrncmp(char*, char*, int);
     69 int	cistrcmp(char*, char*);
     70 void	initibuf(void);
     71 int	readline(int, char*, int);
     72 int	readibuf(int, char*, int);
     73 int	dfprint(int, char*, ...);
     74 void	unreadline(char*);
     75 int	output(Out*, char*, int);
     76 void	setoffset(Out*, int);
     77 
     78 int	verbose;
     79 char	*net;
     80 char	tcpdir[NETPATHLEN];
     81 int	headerprint;
     82 
     83 struct {
     84 	char	*name;
     85 	int	(*f)(URL*, URL*, Range*, Out*, long);
     86 } method[] = {
     87 	{ "http",	dohttp },
     88 	{ "https",	dohttp },
     89 	{ "ftp",	doftp },
     90 	{ "_______",	nil },
     91 };
     92 
     93 void
     94 usage(void)
     95 {
     96 	fprint(2, "usage: %s [-hv] [-o outfile] [-p body] [-x netmtpt] url\n", argv0);
     97 	threadexitsall("usage");
     98 }
     99 
    100 void
    101 threadmain(int argc, char **argv)
    102 {
    103 	URL u;
    104 	Range r;
    105 	int errs, n;
    106 	ulong mtime;
    107 	Dir *d;
    108 	char postbody[4096], *p, *e, *t, *hpx;
    109 	URL px; /* Proxy */
    110 	Out out;
    111 
    112 	ofile = nil;
    113 	p = postbody;
    114 	e = p + sizeof(postbody);
    115 	r.start = 0;
    116 	r.end = -1;
    117 	mtime = 0;
    118 	memset(&u, 0, sizeof(u));
    119 	memset(&px, 0, sizeof(px));
    120 	hpx = getenv("httpproxy");
    121 
    122 	ARGBEGIN {
    123 	case 'o':
    124 		ofile = ARGF();
    125 		break;
    126 	case 'd':
    127 		debug = 1;
    128 		break;
    129 	case 'h':
    130 		headerprint = 1;
    131 		break;
    132 	case 'v':
    133 		verbose = 1;
    134 		break;
    135 	case 'x':
    136 		net = ARGF();
    137 		if(net == nil)
    138 			usage();
    139 		break;
    140 	case 'p':
    141 		t = ARGF();
    142 		if(t == nil)
    143 			usage();
    144 		if(p != postbody)
    145 			p = seprint(p, e, "&%s", t);
    146 		else
    147 			p = seprint(p, e, "%s", t);
    148 		u.postbody = postbody;
    149 
    150 		break;
    151 	default:
    152 		usage();
    153 	} ARGEND;
    154 
    155 	if(net != nil){
    156 		if(strlen(net) > sizeof(tcpdir)-5)
    157 			sysfatal("network mount point too long");
    158 		snprint(tcpdir, sizeof(tcpdir), "%s/tcp", net);
    159 	} else
    160 		snprint(tcpdir, sizeof(tcpdir), "tcp");
    161 
    162 	if(argc != 1)
    163 		usage();
    164 
    165 
    166 	out.fd = 1;
    167 	out.written = 0;
    168 	out.offset = 0;
    169 	out.curr = nil;
    170 	out.hiwat = nil;
    171 	if(ofile != nil){
    172 		d = dirstat(ofile);
    173 		if(d == nil){
    174 			out.fd = create(ofile, OWRITE, 0664);
    175 			if(out.fd < 0)
    176 				sysfatal("creating %s: %r", ofile);
    177 		} else {
    178 			out.fd = open(ofile, OWRITE);
    179 			if(out.fd < 0)
    180 				sysfatal("can't open %s: %r", ofile);
    181 			r.start = d->length;
    182 			mtime = d->mtime;
    183 			free(d);
    184 		}
    185 	}
    186 
    187 	errs = 0;
    188 
    189 	if(crackurl(&u, argv[0]) < 0)
    190 		sysfatal("%r");
    191 	if(hpx && crackurl(&px, hpx) < 0)
    192 		sysfatal("%r");
    193 
    194 	for(;;){
    195 		setoffset(&out, 0);
    196 		/* transfer data */
    197 		werrstr("");
    198 		n = (*method[u.method].f)(&u, &px, &r, &out, mtime);
    199 
    200 		switch(n){
    201 		case Eof:
    202 			threadexitsall(0);
    203 			break;
    204 		case Error:
    205 			if(errs++ < 10)
    206 				continue;
    207 			sysfatal("too many errors with no progress %r");
    208 			break;
    209 		case Server:
    210 			sysfatal("server returned: %r");
    211 			break;
    212 		}
    213 
    214 		/* forward progress */
    215 		errs = 0;
    216 		r.start += n;
    217 		if(r.start >= r.end)
    218 			break;
    219 	}
    220 
    221 	threadexitsall(0);
    222 }
    223 
    224 int
    225 crackurl(URL *u, char *s)
    226 {
    227 	char *p;
    228 	int i;
    229 
    230 	if(u->host != nil){
    231 		free(u->host);
    232 		u->host = nil;
    233 	}
    234 	if(u->page != nil){
    235 		free(u->page);
    236 		u->page = nil;
    237 	}
    238 
    239 	/* get type */
    240 	u->method = Other;
    241 	for(p = s; *p; p++){
    242 		if(*p == '/'){
    243 			u->method = Http;
    244 			p = s;
    245 			break;
    246 		}
    247 		if(*p == ':' && *(p+1)=='/' && *(p+2)=='/'){
    248 			*p = 0;
    249 			p += 3;
    250 			for(i = 0; i < nelem(method); i++){
    251 				if(cistrcmp(s, method[i].name) == 0){
    252 					u->method = i;
    253 					break;
    254 				}
    255 			}
    256 			break;
    257 		}
    258 	}
    259 
    260 	if(u->method == Other){
    261 		werrstr("unsupported URL type %s", s);
    262 		return -1;
    263 	}
    264 
    265 	/* get system */
    266 	s = p;
    267 	p = strchr(s, '/');
    268 	if(p == nil){
    269 		u->host = strdup(s);
    270 		u->page = strdup("/");
    271 	} else {
    272 		u->page = strdup(p);
    273 		*p = 0;
    274 		u->host = strdup(s);
    275 		*p = '/';
    276 	}
    277 
    278 	if(p = strchr(u->host, ':')) {
    279 		*p++ = 0;
    280 		u->port = p;
    281 	} else
    282 		u->port = method[u->method].name;
    283 
    284 	if(*(u->host) == 0){
    285 		werrstr("bad url, null host");
    286 		return -1;
    287 	}
    288 
    289 	return 0;
    290 }
    291 
    292 char *day[] = {
    293 	"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
    294 };
    295 
    296 char *month[] = {
    297 	"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
    298 };
    299 
    300 struct
    301 {
    302 	int	fd;
    303 	long	mtime;
    304 } note;
    305 
    306 void
    307 catch(void *v, char *s)
    308 {
    309 	Dir d;
    310 
    311 	USED(v);
    312 	USED(s);
    313 
    314 	nulldir(&d);
    315 	d.mtime = note.mtime;
    316 	if(dirfwstat(note.fd, &d) < 0)
    317 		sysfatal("catch: can't dirfwstat: %r");
    318 	noted(NDFLT);
    319 }
    320 
    321 int
    322 dohttp(URL *u, URL *px, Range *r, Out *out, long mtime)
    323 {
    324 	int fd, cfd;
    325 	int redirect, auth, loop;
    326 	int n, rv, code;
    327 	long tot, vtime;
    328 	Tm *tm;
    329 	char buf[1024];
    330 	char err[ERRMAX];
    331 
    332 
    333 	/*  always move back to a previous 512 byte bound because some
    334 	 *  servers can't seem to deal with requests that start at the
    335 	 *  end of the file
    336 	 */
    337 	if(r->start)
    338 		r->start = ((r->start-1)/512)*512;
    339 
    340 	/* loop for redirects, requires reading both response code and headers */
    341 	fd = -1;
    342 	for(loop = 0; loop < 32; loop++){
    343 		if(px->host == nil){
    344 			fd = dial(netmkaddr(u->host, tcpdir, u->port), 0, 0, 0);
    345 		} else {
    346 			fd = dial(netmkaddr(px->host, tcpdir, px->port), 0, 0, 0);
    347 		}
    348 		if(fd < 0)
    349 			return Error;
    350 
    351 		if(u->method == Https){
    352 			int tfd;
    353 			TLSconn conn;
    354 
    355 			memset(&conn, 0, sizeof conn);
    356 			tfd = tlsClient(fd, &conn);
    357 			if(tfd < 0){
    358 				fprint(2, "tlsClient: %r\n");
    359 				close(fd);
    360 				return Error;
    361 			}
    362 			/* BUG: check cert here? */
    363 			if(conn.cert)
    364 				free(conn.cert);
    365 			close(fd);
    366 			fd = tfd;
    367 		}
    368 
    369 		/* write request, use range if not start of file */
    370 		if(u->postbody == nil){
    371 			if(px->host == nil){
    372 				dfprint(fd,	"GET %s HTTP/1.0\r\n"
    373 						"Host: %s\r\n"
    374 						"User-agent: Plan9/hget\r\n"
    375 						"Cache-Control: no-cache\r\n"
    376 						"Pragma: no-cache\r\n",
    377 						u->page, u->host);
    378 			} else {
    379 				dfprint(fd,	"GET http://%s%s HTTP/1.0\r\n"
    380 						"Host: %s\r\n"
    381 						"User-agent: Plan9/hget\r\n"
    382 						"Cache-Control: no-cache\r\n"
    383 						"Pragma: no-cache\r\n",
    384 						u->host, u->page, u->host);
    385 			}
    386 			if(u->cred)
    387 				dfprint(fd,	"Authorization: Basic %s\r\n",
    388 						u->cred);
    389 		} else {
    390 			if(px->host == nil){
    391 				dfprint(fd,	"POST %s HTTP/1.0\r\n"
    392 						"Host: %s\r\n"
    393 						"Content-type: application/x-www-form-urlencoded\r\n"
    394 						"Content-length: %d\r\n"
    395 						"User-agent: Plan9/hget\r\n"
    396 						"\r\n",
    397 						u->page, u->host, strlen(u->postbody));
    398 			} else {
    399 				dfprint(fd, "POST http://%s%s HTTP/1.0\r\n"
    400 						"Host: %s\r\n"
    401 						"Content-type: application/x-www-form-urlencoded\r\n"
    402 						"Content-length: %d\r\n"
    403 						"User-agent: Plan9/hget\r\n"
    404 						"\r\n",
    405 						u->host, u->page, u->host, strlen(u->postbody));
    406 			}
    407 			dfprint(fd,	"%s", u->postbody);
    408 		}
    409 		if(r->start != 0){
    410 			dfprint(fd, "Range: bytes=%d-\n", r->start);
    411 			if(u->etag != nil){
    412 				dfprint(fd, "If-range: %s\n", u->etag);
    413 			} else {
    414 				tm = gmtime(mtime);
    415 				dfprint(fd, "If-range: %s, %d %s %d %2d:%2.2d:%2.2d GMT\n",
    416 					day[tm->wday], tm->mday, month[tm->mon],
    417 					tm->year+1900, tm->hour, tm->min, tm->sec);
    418 			}
    419 		}
    420 		if((cfd = open("/mnt/webcookies/http", ORDWR)) >= 0){
    421 			if(fprint(cfd, "http://%s%s", u->host, u->page) > 0){
    422 				while((n = read(cfd, buf, sizeof buf)) > 0){
    423 					if(debug)
    424 						write(2, buf, n);
    425 					write(fd, buf, n);
    426 				}
    427 			}else{
    428 				close(cfd);
    429 				cfd = -1;
    430 			}
    431 		}
    432 
    433 		dfprint(fd, "\r\n", u->host);
    434 
    435 		auth = 0;
    436 		redirect = 0;
    437 		initibuf();
    438 		code = httprcode(fd);
    439 		switch(code){
    440 		case Error:	/* connection timed out */
    441 		case Eof:
    442 			close(fd);
    443 			close(cfd);
    444 			return code;
    445 
    446 		case 200:	/* OK */
    447 		case 201:	/* Created */
    448 		case 202:	/* Accepted */
    449 			if(ofile == nil && r->start != 0)
    450 				sysfatal("page changed underfoot");
    451 			break;
    452 
    453 		case 204:	/* No Content */
    454 			sysfatal("No Content");
    455 
    456 		case 206:	/* Partial Content */
    457 			setoffset(out, r->start);
    458 			break;
    459 
    460 		case 301:	/* Moved Permanently */
    461 		case 302:	/* Moved Temporarily */
    462 			redirect = 1;
    463 			u->postbody = nil;
    464 			break;
    465 
    466 		case 304:	/* Not Modified */
    467 			break;
    468 
    469 		case 400:	/* Bad Request */
    470 			sysfatal("Bad Request");
    471 
    472 		case 401:	/* Unauthorized */
    473 			if (auth)
    474 				sysfatal("Authentication failed");
    475 			auth = 1;
    476 			break;
    477 
    478 		case 402:	/* ??? */
    479 			sysfatal("Unauthorized");
    480 
    481 		case 403:	/* Forbidden */
    482 			sysfatal("Forbidden by server");
    483 
    484 		case 404:	/* Not Found */
    485 			sysfatal("Not found on server");
    486 
    487 		case 407:	/* Proxy Authentication */
    488 			sysfatal("Proxy authentication required");
    489 
    490 		case 500:	/* Internal server error */
    491 			sysfatal("Server choked");
    492 
    493 		case 501:	/* Not implemented */
    494 			sysfatal("Server can't do it!");
    495 
    496 		case 502:	/* Bad gateway */
    497 			sysfatal("Bad gateway");
    498 
    499 		case 503:	/* Service unavailable */
    500 			sysfatal("Service unavailable");
    501 
    502 		default:
    503 			sysfatal("Unknown response code %d", code);
    504 		}
    505 
    506 		if(u->redirect != nil){
    507 			free(u->redirect);
    508 			u->redirect = nil;
    509 		}
    510 
    511 		rv = httpheaders(fd, cfd, u, r);
    512 		close(cfd);
    513 		if(rv != 0){
    514 			close(fd);
    515 			return rv;
    516 		}
    517 
    518 		if(!redirect && !auth)
    519 			break;
    520 
    521 		if (redirect){
    522 			if(u->redirect == nil)
    523 				sysfatal("redirect: no URL");
    524 			if(crackurl(u, u->redirect) < 0)
    525 				sysfatal("redirect: %r");
    526 		}
    527 	}
    528 
    529 	/* transfer whatever you get */
    530 	if(ofile != nil && u->mtime != 0){
    531 		note.fd = out->fd;
    532 		note.mtime = u->mtime;
    533 		notify(catch);
    534 	}
    535 
    536 	tot = 0;
    537 	vtime = 0;
    538 	for(;;){
    539 		n = readibuf(fd, buf, sizeof(buf));
    540 		if(n <= 0)
    541 			break;
    542 		if(output(out, buf, n) != n)
    543 			break;
    544 		tot += n;
    545 		if(verbose && (vtime != time(0) || r->start == r->end)) {
    546 			vtime = time(0);
    547 			fprint(2, "%ld %ld\n", r->start+tot, r->end);
    548 		}
    549 	}
    550 	notify(nil);
    551 	close(fd);
    552 
    553 	if(ofile != nil && u->mtime != 0){
    554 		Dir d;
    555 
    556 		rerrstr(err, sizeof err);
    557 		nulldir(&d);
    558 		d.mtime = u->mtime;
    559 		if(dirfwstat(out->fd, &d) < 0)
    560 			fprint(2, "couldn't set mtime: %r\n");
    561 		errstr(err, sizeof err);
    562 	}
    563 
    564 	return tot;
    565 }
    566 
    567 /* get the http response code */
    568 int
    569 httprcode(int fd)
    570 {
    571 	int n;
    572 	char *p;
    573 	char buf[256];
    574 
    575 	n = readline(fd, buf, sizeof(buf)-1);
    576 	if(n <= 0)
    577 		return n;
    578 	if(debug)
    579 		fprint(2, "%d <- %s\n", fd, buf);
    580 	p = strchr(buf, ' ');
    581 	if(strncmp(buf, "HTTP/", 5) != 0 || p == nil){
    582 		werrstr("bad response from server");
    583 		return -1;
    584 	}
    585 	buf[n] = 0;
    586 	return atoi(p+1);
    587 }
    588 
    589 /* read in and crack the http headers, update u and r */
    590 void	hhetag(char*, URL*, Range*);
    591 void	hhmtime(char*, URL*, Range*);
    592 void	hhclen(char*, URL*, Range*);
    593 void	hhcrange(char*, URL*, Range*);
    594 void	hhuri(char*, URL*, Range*);
    595 void	hhlocation(char*, URL*, Range*);
    596 void	hhauth(char*, URL*, Range*);
    597 
    598 struct {
    599 	char *name;
    600 	void (*f)(char*, URL*, Range*);
    601 } headers[] = {
    602 	{ "etag:", hhetag },
    603 	{ "last-modified:", hhmtime },
    604 	{ "content-length:", hhclen },
    605 	{ "content-range:", hhcrange },
    606 	{ "uri:", hhuri },
    607 	{ "location:", hhlocation },
    608 	{ "WWW-Authenticate:", hhauth },
    609 };
    610 int
    611 httpheaders(int fd, int cfd, URL *u, Range *r)
    612 {
    613 	char buf[2048];
    614 	char *p;
    615 	int i, n;
    616 
    617 	for(;;){
    618 		n = getheader(fd, buf, sizeof(buf));
    619 		if(n <= 0)
    620 			break;
    621 		if(cfd >= 0)
    622 			fprint(cfd, "%s\n", buf);
    623 		for(i = 0; i < nelem(headers); i++){
    624 			n = strlen(headers[i].name);
    625 			if(cistrncmp(buf, headers[i].name, n) == 0){
    626 				/* skip field name and leading white */
    627 				p = buf + n;
    628 				while(*p == ' ' || *p == '\t')
    629 					p++;
    630 
    631 				(*headers[i].f)(p, u, r);
    632 				break;
    633 			}
    634 		}
    635 	}
    636 	return n;
    637 }
    638 
    639 /*
    640  *  read a single mime header, collect continuations.
    641  *
    642  *  this routine assumes that there is a blank line twixt
    643  *  the header and the message body, otherwise bytes will
    644  *  be lost.
    645  */
    646 int
    647 getheader(int fd, char *buf, int n)
    648 {
    649 	char *p, *e;
    650 	int i;
    651 
    652 	n--;
    653 	p = buf;
    654 	for(e = p + n; ; p += i){
    655 		i = readline(fd, p, e-p);
    656 		if(i < 0)
    657 			return i;
    658 
    659 		if(p == buf){
    660 			/* first line */
    661 			if(strchr(buf, ':') == nil)
    662 				break;		/* end of headers */
    663 		} else {
    664 			/* continuation line */
    665 			if(*p != ' ' && *p != '\t'){
    666 				unreadline(p);
    667 				*p = 0;
    668 				break;		/* end of this header */
    669 			}
    670 		}
    671 	}
    672 	if(headerprint)
    673 		print("%s\n", buf);
    674 
    675 	if(debug)
    676 		fprint(2, "%d <- %s\n", fd, buf);
    677 	return p-buf;
    678 }
    679 
    680 void
    681 hhetag(char *p, URL *u, Range *r)
    682 {
    683 	USED(r);
    684 
    685 	if(u->etag != nil){
    686 		if(strcmp(u->etag, p) != 0)
    687 			sysfatal("file changed underfoot");
    688 	} else
    689 		u->etag = strdup(p);
    690 }
    691 
    692 char*	monthchars = "janfebmaraprmayjunjulaugsepoctnovdec";
    693 
    694 void
    695 hhmtime(char *p, URL *u, Range *r)
    696 {
    697 	char *month, *day, *yr, *hms;
    698 	char *fields[6];
    699 	Tm tm, now;
    700 	int i;
    701 
    702 	USED(r);
    703 
    704 	i = getfields(p, fields, 6, 1, " \t");
    705 	if(i < 5)
    706 		return;
    707 
    708 	day = fields[1];
    709 	month = fields[2];
    710 	yr = fields[3];
    711 	hms = fields[4];
    712 
    713 	/* default time */
    714 	now = *gmtime(time(0));
    715 	tm = now;
    716 	tm.yday = 0;
    717 
    718 	/* convert ascii month to a number twixt 1 and 12 */
    719 	if(*month >= '0' && *month <= '9'){
    720 		tm.mon = atoi(month) - 1;
    721 		if(tm.mon < 0 || tm.mon > 11)
    722 			tm.mon = 5;
    723 	} else {
    724 		for(p = month; *p; p++)
    725 			*p = tolower((uchar)*p);
    726 		for(i = 0; i < 12; i++)
    727 			if(strncmp(&monthchars[i*3], month, 3) == 0){
    728 				tm.mon = i;
    729 				break;
    730 			}
    731 	}
    732 
    733 	tm.mday = atoi(day);
    734 
    735 	if(hms) {
    736 		tm.hour = strtoul(hms, &p, 10);
    737 		if(*p == ':') {
    738 			p++;
    739 			tm.min = strtoul(p, &p, 10);
    740 			if(*p == ':') {
    741 				p++;
    742 				tm.sec = strtoul(p, &p, 10);
    743 			}
    744 		}
    745 		if(tolower((uchar)*p) == 'p')
    746 			tm.hour += 12;
    747 	}
    748 
    749 	if(yr) {
    750 		tm.year = atoi(yr);
    751 		if(tm.year >= 1900)
    752 			tm.year -= 1900;
    753 	} else {
    754 		if(tm.mon > now.mon || (tm.mon == now.mon && tm.mday > now.mday+1))
    755 			tm.year--;
    756 	}
    757 
    758 	strcpy(tm.zone, "GMT");
    759 	/* convert to epoch seconds */
    760 	u->mtime = tm2sec(&tm);
    761 }
    762 
    763 void
    764 hhclen(char *p, URL *u, Range *r)
    765 {
    766 	USED(u);
    767 
    768 	r->end = atoi(p);
    769 }
    770 
    771 void
    772 hhcrange(char *p, URL *u, Range *r)
    773 {
    774 	char *x;
    775 	vlong l;
    776 
    777 	USED(u);
    778 	l = 0;
    779 	x = strchr(p, '/');
    780 	if(x)
    781 		l = atoll(x+1);
    782 	if(l == 0)
    783 	x = strchr(p, '-');
    784 	if(x)
    785 		l = atoll(x+1);
    786 	if(l)
    787 		r->end = l;
    788 }
    789 
    790 void
    791 hhuri(char *p, URL *u, Range *r)
    792 {
    793 	USED(r);
    794 
    795 	if(*p != '<')
    796 		return;
    797 	u->redirect = strdup(p+1);
    798 	p = strchr(u->redirect, '>');
    799 	if(p != nil)
    800 		*p = 0;
    801 }
    802 
    803 void
    804 hhlocation(char *p, URL *u, Range *r)
    805 {
    806 	USED(r);
    807 
    808 	u->redirect = strdup(p);
    809 }
    810 
    811 void
    812 hhauth(char *p, URL *u, Range *r)
    813 {
    814 	char *f[4];
    815 	UserPasswd *up;
    816 	char *s, cred[64];
    817 
    818 	USED(r);
    819 
    820 	if (cistrncmp(p, "basic ", 6) != 0)
    821 		sysfatal("only Basic authentication supported");
    822 
    823 	if (gettokens(p, f, nelem(f), "\"") < 2)
    824 		sysfatal("garbled auth data");
    825 
    826 	if ((up = auth_getuserpasswd(auth_getkey, "proto=pass service=http dom=%q relm=%q",
    827 	    	u->host, f[1])) == nil)
    828 			sysfatal("cannot authenticate");
    829 
    830 	s = smprint("%s:%s", up->user, up->passwd);
    831 	if(enc64(cred, sizeof(cred), (uchar *)s, strlen(s)) == -1)
    832 		sysfatal("enc64");
    833   		free(s);
    834 
    835 	assert(u->cred = strdup(cred));
    836 }
    837 
    838 enum
    839 {
    840 	/* ftp return codes */
    841 	Extra=		1,
    842 	Success=	2,
    843 	Incomplete=	3,
    844 	TempFail=	4,
    845 	PermFail=	5,
    846 
    847 	Nnetdir=	64,	/* max length of network directory paths */
    848 	Ndialstr=	64		/* max length of dial strings */
    849 };
    850 
    851 int ftpcmd(int, char*, ...);
    852 int ftprcode(int, char*, int);
    853 int hello(int);
    854 int logon(int);
    855 int xfertype(int, char*);
    856 int passive(int, URL*);
    857 int active(int, URL*);
    858 int ftpxfer(int, Out*, Range*);
    859 int terminateftp(int, int);
    860 int getaddrport(char*, uchar*, uchar*);
    861 int ftprestart(int, Out*, URL*, Range*, long);
    862 
    863 int
    864 doftp(URL *u, URL *px, Range *r, Out *out, long mtime)
    865 {
    866 	int pid, ctl, data, rv;
    867 	Waitmsg *w;
    868 	char msg[64];
    869 
    870 	/* untested, proxy dosn't work with ftp (I think) */
    871 	if(px->host == nil){
    872 		ctl = dial(netmkaddr(u->host, tcpdir, u->port), 0, 0, 0);
    873 	} else {
    874 		ctl = dial(netmkaddr(px->host, tcpdir, px->port), 0, 0, 0);
    875 	}
    876 
    877 	if(ctl < 0)
    878 		return Error;
    879 	if(net == nil)
    880 		strcpy(tcpdir, "tcp");
    881 
    882 	initibuf();
    883 
    884 	rv = hello(ctl);
    885 	if(rv < 0)
    886 		return terminateftp(ctl, rv);
    887 
    888 	rv = logon(ctl);
    889 	if(rv < 0)
    890 		return terminateftp(ctl, rv);
    891 
    892 	rv = xfertype(ctl, "I");
    893 	if(rv < 0)
    894 		return terminateftp(ctl, rv);
    895 
    896 	/* if file is up to date and the right size, stop */
    897 	if(ftprestart(ctl, out, u, r, mtime) > 0){
    898 		close(ctl);
    899 		return Eof;
    900 	}
    901 
    902 	/* first try passive mode, then active */
    903 	data = passive(ctl, u);
    904 	if(data < 0){
    905 		data = active(ctl, u);
    906 		if(data < 0)
    907 			return Error;
    908 	}
    909 
    910 	/* fork */
    911 	switch(pid = fork()){
    912 	case -1:
    913 		close(data);
    914 		return terminateftp(ctl, Error);
    915 	case 0:
    916 		ftpxfer(data, out, r);
    917 		close(data);
    918 		#undef _exits
    919 		_exits(0);
    920 	default:
    921 		close(data);
    922 		break;
    923 	}
    924 
    925 	/* wait for reply message */
    926 	rv = ftprcode(ctl, msg, sizeof(msg));
    927 	close(ctl);
    928 
    929 	/* wait for process to terminate */
    930 	w = nil;
    931 	for(;;){
    932 		free(w);
    933 		w = wait();
    934 		if(w == nil)
    935 			return Error;
    936 		if(w->pid == pid){
    937 			if(w->msg[0] == 0){
    938 				free(w);
    939 				break;
    940 			}
    941 			werrstr("xfer: %s", w->msg);
    942 			free(w);
    943 			return Error;
    944 		}
    945 	}
    946 
    947 	switch(rv){
    948 	case Success:
    949 		return Eof;
    950 	case TempFail:
    951 		return Server;
    952 	default:
    953 		return Error;
    954 	}
    955 }
    956 
    957 int
    958 ftpcmd(int ctl, char *fmt, ...)
    959 {
    960 	va_list arg;
    961 	char buf[2*1024], *s;
    962 
    963 	va_start(arg, fmt);
    964 	s = vseprint(buf, buf + (sizeof(buf)-4) / sizeof(*buf), fmt, arg);
    965 	va_end(arg);
    966 	if(debug)
    967 		fprint(2, "%d -> %s\n", ctl, buf);
    968 	*s++ = '\r';
    969 	*s++ = '\n';
    970 	if(write(ctl, buf, s - buf) != s - buf)
    971 		return -1;
    972 	return 0;
    973 }
    974 
    975 int
    976 ftprcode(int ctl, char *msg, int len)
    977 {
    978 	int rv;
    979 	int i;
    980 	char *p;
    981 
    982 	len--;	/* room for terminating null */
    983 	for(;;){
    984 		*msg = 0;
    985 		i = readline(ctl, msg, len);
    986 		if(i < 0)
    987 			break;
    988 		if(debug)
    989 			fprint(2, "%d <- %s\n", ctl, msg);
    990 
    991 		/* stop if not a continuation */
    992 		rv = strtol(msg, &p, 10);
    993 		if(rv >= 100 && rv < 600 && p==msg+3 && *p == ' ')
    994 			return rv/100;
    995 	}
    996 	*msg = 0;
    997 
    998 	return -1;
    999 }
   1000 
   1001 int
   1002 hello(int ctl)
   1003 {
   1004 	char msg[1024];
   1005 
   1006 	/* wait for hello from other side */
   1007 	if(ftprcode(ctl, msg, sizeof(msg)) != Success){
   1008 		werrstr("HELLO: %s", msg);
   1009 		return Server;
   1010 	}
   1011 	return 0;
   1012 }
   1013 
   1014 int
   1015 getdec(char *p, int n)
   1016 {
   1017 	int x = 0;
   1018 	int i;
   1019 
   1020 	for(i = 0; i < n; i++)
   1021 		x = x*10 + (*p++ - '0');
   1022 	return x;
   1023 }
   1024 
   1025 int
   1026 ftprestart(int ctl, Out *out, URL *u, Range *r, long mtime)
   1027 {
   1028 	Tm tm;
   1029 	char msg[1024];
   1030 	long x, rmtime;
   1031 
   1032 	ftpcmd(ctl, "MDTM %s", u->page);
   1033 	if(ftprcode(ctl, msg, sizeof(msg)) != Success){
   1034 		r->start = 0;
   1035 		return 0;		/* need to do something */
   1036 	}
   1037 
   1038 	/* decode modification time */
   1039 	if(strlen(msg) < 4 + 4 + 2 + 2 + 2 + 2 + 2){
   1040 		r->start = 0;
   1041 		return 0;		/* need to do something */
   1042 	}
   1043 	memset(&tm, 0, sizeof(tm));
   1044 	tm.year = getdec(msg+4, 4) - 1900;
   1045 	tm.mon = getdec(msg+4+4, 2) - 1;
   1046 	tm.mday = getdec(msg+4+4+2, 2);
   1047 	tm.hour = getdec(msg+4+4+2+2, 2);
   1048 	tm.min = getdec(msg+4+4+2+2+2, 2);
   1049 	tm.sec = getdec(msg+4+4+2+2+2+2, 2);
   1050 	strcpy(tm.zone, "GMT");
   1051 	rmtime = tm2sec(&tm);
   1052 	if(rmtime > mtime)
   1053 		r->start = 0;
   1054 
   1055 	/* get size */
   1056 	ftpcmd(ctl, "SIZE %s", u->page);
   1057 	if(ftprcode(ctl, msg, sizeof(msg)) == Success){
   1058 		x = atol(msg+4);
   1059 		if(r->start == x)
   1060 			return 1;	/* we're up to date */
   1061 		r->end = x;
   1062 	}
   1063 
   1064 	/* seek to restart point */
   1065 	if(r->start > 0){
   1066 		ftpcmd(ctl, "REST %lud", r->start);
   1067 		if(ftprcode(ctl, msg, sizeof(msg)) == Incomplete){
   1068 			setoffset(out, r->start);
   1069 		}else
   1070 			r->start = 0;
   1071 	}
   1072 
   1073 	return 0;	/* need to do something */
   1074 }
   1075 
   1076 int
   1077 logon(int ctl)
   1078 {
   1079 	char msg[1024];
   1080 
   1081 	/* login anonymous */
   1082 	ftpcmd(ctl, "USER anonymous");
   1083 	switch(ftprcode(ctl, msg, sizeof(msg))){
   1084 	case Success:
   1085 		return 0;
   1086 	case Incomplete:
   1087 		break;	/* need password */
   1088 	default:
   1089 		werrstr("USER: %s", msg);
   1090 		return Server;
   1091 	}
   1092 
   1093 	/* send user id as password */
   1094 	sprint(msg, "%s@closedmind.org", getuser());
   1095 	ftpcmd(ctl, "PASS %s", msg);
   1096 	if(ftprcode(ctl, msg, sizeof(msg)) != Success){
   1097 		werrstr("PASS: %s", msg);
   1098 		return Server;
   1099 	}
   1100 
   1101 	return 0;
   1102 }
   1103 
   1104 int
   1105 xfertype(int ctl, char *t)
   1106 {
   1107 	char msg[1024];
   1108 
   1109 	ftpcmd(ctl, "TYPE %s", t);
   1110 	if(ftprcode(ctl, msg, sizeof(msg)) != Success){
   1111 		werrstr("TYPE %s: %s", t, msg);
   1112 		return Server;
   1113 	}
   1114 
   1115 	return 0;
   1116 }
   1117 
   1118 int
   1119 passive(int ctl, URL *u)
   1120 {
   1121 	char msg[1024];
   1122 	char ipaddr[32];
   1123 	char *f[6];
   1124 	char *p;
   1125 	int fd;
   1126 	int port;
   1127 	char aport[12];
   1128 
   1129 	ftpcmd(ctl, "PASV");
   1130 	if(ftprcode(ctl, msg, sizeof(msg)) != Success)
   1131 		return Error;
   1132 
   1133 	/* get address and port number from reply, this is AI */
   1134 	p = strchr(msg, '(');
   1135 	if(p == nil){
   1136 		for(p = msg+3; *p; p++)
   1137 			if(isdigit((uchar)*p))
   1138 				break;
   1139 	} else
   1140 		p++;
   1141 	if(getfields(p, f, 6, 0, ",)") < 6){
   1142 		werrstr("ftp protocol botch");
   1143 		return Server;
   1144 	}
   1145 	snprint(ipaddr, sizeof(ipaddr), "%s.%s.%s.%s",
   1146 		f[0], f[1], f[2], f[3]);
   1147 	port = ((atoi(f[4])&0xff)<<8) + (atoi(f[5])&0xff);
   1148 	sprint(aport, "%d", port);
   1149 
   1150 	/* open data connection */
   1151 	fd = dial(netmkaddr(ipaddr, tcpdir, aport), 0, 0, 0);
   1152 	if(fd < 0){
   1153 		werrstr("passive mode failed: %r");
   1154 		return Error;
   1155 	}
   1156 
   1157 	/* tell remote to send a file */
   1158 	ftpcmd(ctl, "RETR %s", u->page);
   1159 	if(ftprcode(ctl, msg, sizeof(msg)) != Extra){
   1160 		werrstr("RETR %s: %s", u->page, msg);
   1161 		return Error;
   1162 	}
   1163 	return fd;
   1164 }
   1165 
   1166 int
   1167 active(int ctl, URL *u)
   1168 {
   1169 	char msg[1024];
   1170 	char dir[40], ldir[40];
   1171 	uchar ipaddr[4];
   1172 	uchar port[2];
   1173 	int lcfd, dfd, afd;
   1174 
   1175 	/* announce a port for the call back */
   1176 	snprint(msg, sizeof(msg), "%s!*!0", tcpdir);
   1177 	afd = announce(msg, dir);
   1178 	if(afd < 0)
   1179 		return Error;
   1180 
   1181 	/* get a local address/port of the annoucement */
   1182 	if(getaddrport(dir, ipaddr, port) < 0){
   1183 		close(afd);
   1184 		return Error;
   1185 	}
   1186 
   1187 	/* tell remote side address and port*/
   1188 	ftpcmd(ctl, "PORT %d,%d,%d,%d,%d,%d", ipaddr[0], ipaddr[1], ipaddr[2],
   1189 		ipaddr[3], port[0], port[1]);
   1190 	if(ftprcode(ctl, msg, sizeof(msg)) != Success){
   1191 		close(afd);
   1192 		werrstr("active: %s", msg);
   1193 		return Error;
   1194 	}
   1195 
   1196 	/* tell remote to send a file */
   1197 	ftpcmd(ctl, "RETR %s", u->page);
   1198 	if(ftprcode(ctl, msg, sizeof(msg)) != Extra){
   1199 		close(afd);
   1200 		werrstr("RETR: %s", msg);
   1201 		return Server;
   1202 	}
   1203 
   1204 	/* wait for a connection */
   1205 	lcfd = listen(dir, ldir);
   1206 	if(lcfd < 0){
   1207 		close(afd);
   1208 		return Error;
   1209 	}
   1210 	dfd = accept(lcfd, ldir);
   1211 	if(dfd < 0){
   1212 		close(afd);
   1213 		close(lcfd);
   1214 		return Error;
   1215 	}
   1216 	close(afd);
   1217 	close(lcfd);
   1218 
   1219 	return dfd;
   1220 }
   1221 
   1222 int
   1223 ftpxfer(int in, Out *out, Range *r)
   1224 {
   1225 	char buf[1024];
   1226 	long vtime;
   1227 	int i, n;
   1228 
   1229 	vtime = 0;
   1230 	for(n = 0;;n += i){
   1231 		i = read(in, buf, sizeof(buf));
   1232 		if(i == 0)
   1233 			break;
   1234 		if(i < 0)
   1235 			return Error;
   1236 		if(output(out, buf, i) != i)
   1237 			return Error;
   1238 		r->start += i;
   1239 		if(verbose && (vtime != time(0) || r->start == r->end)) {
   1240 			vtime = time(0);
   1241 			fprint(2, "%ld %ld\n", r->start, r->end);
   1242 		}
   1243 	}
   1244 	return n;
   1245 }
   1246 
   1247 int
   1248 terminateftp(int ctl, int rv)
   1249 {
   1250 	close(ctl);
   1251 	return rv;
   1252 }
   1253 
   1254 /*
   1255  * case insensitive strcmp (why aren't these in libc?)
   1256  */
   1257 int
   1258 cistrncmp(char *a, char *b, int n)
   1259 {
   1260 	while(n-- > 0){
   1261 		if(tolower((uchar)*a++) != tolower((uchar)*b++))
   1262 			return -1;
   1263 	}
   1264 	return 0;
   1265 }
   1266 
   1267 int
   1268 cistrcmp(char *a, char *b)
   1269 {
   1270 	while(*a || *b)
   1271 		if(tolower((uchar)*a++) != tolower((uchar)*b++))
   1272 			return -1;
   1273 
   1274 	return 0;
   1275 }
   1276 
   1277 /*
   1278  *  buffered io
   1279  */
   1280 struct
   1281 {
   1282 	char *rp;
   1283 	char *wp;
   1284 	char buf[4*1024];
   1285 } b;
   1286 
   1287 void
   1288 initibuf(void)
   1289 {
   1290 	b.rp = b.wp = b.buf;
   1291 }
   1292 
   1293 /*
   1294  *  read a possibly buffered line, strip off trailing while
   1295  */
   1296 int
   1297 readline(int fd, char *buf, int len)
   1298 {
   1299 	int n;
   1300 	char *p;
   1301 	int eof = 0;
   1302 
   1303 	len--;
   1304 
   1305 	for(p = buf;;){
   1306 		if(b.rp >= b.wp){
   1307 			n = read(fd, b.wp, sizeof(b.buf)/2);
   1308 			if(n < 0)
   1309 				return -1;
   1310 			if(n == 0){
   1311 				eof = 1;
   1312 				break;
   1313 			}
   1314 			b.wp += n;
   1315 		}
   1316 		n = *b.rp++;
   1317 		if(len > 0){
   1318 			*p++ = n;
   1319 			len--;
   1320 		}
   1321 		if(n == '\n')
   1322 			break;
   1323 	}
   1324 
   1325 	/* drop trailing white */
   1326 	for(;;){
   1327 		if(p <= buf)
   1328 			break;
   1329 		n = *(p-1);
   1330 		if(n != ' ' && n != '\t' && n != '\r' && n != '\n')
   1331 			break;
   1332 		p--;
   1333 	}
   1334 	*p = 0;
   1335 
   1336 	if(eof && p == buf)
   1337 		return -1;
   1338 
   1339 	return p-buf;
   1340 }
   1341 
   1342 void
   1343 unreadline(char *line)
   1344 {
   1345 	int i, n;
   1346 
   1347 	i = strlen(line);
   1348 	n = b.wp-b.rp;
   1349 	memmove(&b.buf[i+1], b.rp, n);
   1350 	memmove(b.buf, line, i);
   1351 	b.buf[i] = '\n';
   1352 	b.rp = b.buf;
   1353 	b.wp = b.rp + i + 1 + n;
   1354 }
   1355 
   1356 int
   1357 readibuf(int fd, char *buf, int len)
   1358 {
   1359 	int n;
   1360 
   1361 	n = b.wp-b.rp;
   1362 	if(n > 0){
   1363 		if(n > len)
   1364 			n = len;
   1365 		memmove(buf, b.rp, n);
   1366 		b.rp += n;
   1367 		return n;
   1368 	}
   1369 	return read(fd, buf, len);
   1370 }
   1371 
   1372 int
   1373 dfprint(int fd, char *fmt, ...)
   1374 {
   1375 	char buf[4*1024];
   1376 	va_list arg;
   1377 
   1378 	va_start(arg, fmt);
   1379 	vseprint(buf, buf+sizeof(buf), fmt, arg);
   1380 	va_end(arg);
   1381 	if(debug)
   1382 		fprint(2, "%d -> %s", fd, buf);
   1383 	return fprint(fd, "%s", buf);
   1384 }
   1385 
   1386 int
   1387 getaddrport(char *dir, uchar *ipaddr, uchar *port)
   1388 {
   1389 	char buf[256];
   1390 	int fd, i;
   1391 	char *p;
   1392 
   1393 	snprint(buf, sizeof(buf), "%s/local", dir);
   1394 	fd = open(buf, OREAD);
   1395 	if(fd < 0)
   1396 		return -1;
   1397 	i = read(fd, buf, sizeof(buf)-1);
   1398 	close(fd);
   1399 	if(i <= 0)
   1400 		return -1;
   1401 	buf[i] = 0;
   1402 	p = strchr(buf, '!');
   1403 	if(p != nil)
   1404 		*p++ = 0;
   1405 	v4parseip(ipaddr, buf);
   1406 	i = atoi(p);
   1407 	port[0] = i>>8;
   1408 	port[1] = i;
   1409 	return 0;
   1410 }
   1411 
   1412 void
   1413 md5free(DigestState *state)
   1414 {
   1415 	uchar x[MD5dlen];
   1416 	md5(nil, 0, x, state);
   1417 }
   1418 
   1419 DigestState*
   1420 md5dup(DigestState *state)
   1421 {
   1422 	DigestState *s2;
   1423 
   1424 	s2 = malloc(sizeof(DigestState));
   1425 	if(s2 == nil)
   1426 		sysfatal("malloc: %r");
   1427 	*s2 = *state;
   1428 	s2->malloced = 1;
   1429 	return s2;
   1430 }
   1431 
   1432 void
   1433 setoffset(Out *out, int offset)
   1434 {
   1435 	md5free(out->curr);
   1436 	if(offset == 0)
   1437 		out->curr = md5(nil, 0, nil, nil);
   1438 	else
   1439 		out->curr = nil;
   1440 	out->offset = offset;
   1441 }
   1442 
   1443 /*
   1444  * write some output, discarding it (but keeping track)
   1445  * if we've already written it. if we've gone backwards,
   1446  * verify that everything previously written matches
   1447  * that which would have been written from the current
   1448  * output.
   1449  */
   1450 int
   1451 output(Out *out, char *buf, int nb)
   1452 {
   1453 	int n, d;
   1454 	uchar m0[MD5dlen], m1[MD5dlen];
   1455 
   1456 	n = nb;
   1457 	d = out->written - out->offset;
   1458 	assert(d >= 0);
   1459 	if(d > 0){
   1460 		if(n < d){
   1461 			if(out->curr != nil)
   1462 				md5((uchar*)buf, n, nil, out->curr);
   1463 			out->offset += n;
   1464 			return n;
   1465 		}
   1466 		if(out->curr != nil){
   1467 			md5((uchar*)buf, d, m0, out->curr);
   1468 			out->curr = nil;
   1469 			md5(nil, 0, m1, md5dup(out->hiwat));
   1470 			if(memcmp(m0, m1, MD5dlen) != 0){
   1471 				fprint(2, "integrity check failure at offset %d\n", out->written);
   1472 				return -1;
   1473 			}
   1474 		}
   1475 		buf += d;
   1476 		n -= d;
   1477 		out->offset += d;
   1478 	}
   1479 	if(n > 0){
   1480 		out->hiwat = md5((uchar*)buf, n, nil, out->hiwat);
   1481 		n = write(out->fd, buf, n);
   1482 		if(n > 0){
   1483 			out->offset += n;
   1484 			out->written += n;
   1485 		}
   1486 	}
   1487 	return n + d;
   1488 }