freebsd-nq/usr.bin/fetch/http.c

/*-
 * Copyright 1997 Massachusetts Institute of Technology
 *
 * Permission to use, copy, modify, and distribute this software and
 * its documentation for any purpose and without fee is hereby
 * granted, provided that both the above copyright notice and this
 * permission notice appear in all copies, that both the above
 * copyright notice and this permission notice appear in all
 * supporting documentation, and that the name of M.I.T. not be used
 * in advertising or publicity pertaining to distribution of the
 * software without specific, written prior permission.  M.I.T. makes
 * no representations about the suitability of this software for any
 * purpose.  It is provided "as is" without express or implied
 * warranty.
 *
 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *	$Id: http.c,v 1.5 1997/03/05 18:57:16 fenner Exp $
 */

#include <sys/types.h>

#include <ctype.h>
#include <err.h>
#include <errno.h>
#include <limits.h>
#include <md5.h>
#include <netdb.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sysexits.h>
#include <time.h>
#include <unistd.h>

#include <sys/param.h>		/* for MAXHOSTNAMELEN */
#include <sys/queue.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/sysctl.h>
#include <sys/uio.h>

#include <netinet/in.h>
#include <arpa/inet.h>

#include "fetch.h"

struct http_state {
	char *http_hostname;
	char *http_remote_request;
	char *http_decoded_file;
	char *http_host_header;
	char *http_authentication;
	char *http_proxy_authentication;
	unsigned http_port;
	int http_redirected;
};

struct http_auth {
	TAILQ_ENTRY(http_auth) ha_link;
	char *ha_scheme;
	char *ha_realm;
	char *ha_params;
	const struct http_auth_method *ha_ham;
};
TAILQ_HEAD(http_auth_head, http_auth);

static int http_parse(struct fetch_state *fs, const char *uri);
static int http_proxy_parse(struct fetch_state *fs, const char *uri);
static int http_close(struct fetch_state *fs);
static int http_retrieve(struct fetch_state *fs);
static int basic_doauth(struct fetch_state *fs, struct http_auth *ha, int prx);

struct uri_scheme http_scheme =
	{ "http", http_parse, http_proxy_parse, "HTTP_PROXY", "http" };

struct http_auth_head http_auth, http_proxy_auth;

struct http_auth_method {
	const char *ham_scheme;
	int (*ham_doauth)(struct fetch_state *, struct http_auth *, int);
} http_auth_methods[] = {
	{ "basic", basic_doauth },
	{ 0, 0 }
};

/* We are only concerned with headers we might receive. */
enum http_header {
	ht_accept_ranges, ht_age, ht_allow, ht_cache_control, ht_connection,
	ht_content_base, ht_content_encoding, ht_content_language,
	ht_content_length, ht_content_location, ht_content_md5,
	ht_content_range, ht_content_type, ht_date, ht_etag, ht_expires,
	ht_last_modified, ht_location, ht_pragma, ht_proxy_authenticate,
	ht_public, ht_retry_after, ht_server, ht_transfer_encoding,
	ht_upgrade, ht_vary, ht_via, ht_www_authenticate, ht_warning,
	/* unusual cases */
	ht_syntax_error, ht_unknown, ht_end_of_header
};

static char *format_http_date(time_t when);
static char *format_http_user_agent(void);
static enum http_header http_parse_header(char *line, char **valuep);
static int check_md5(FILE *fp, char *base64ofmd5);
static int http_first_line(const char *line);
static int parse_http_content_range(char *orig, off_t *first, off_t *total);
static int process_http_auth(struct fetch_state *fs, char *hdr, int autherr);
static struct http_auth *find_http_auth(struct http_auth_head *list,
					const char *scheme, const char *realm);
static time_t parse_http_date(char *datestring);
static void setup_http_auth(void);

static int
http_parse(struct fetch_state *fs, const char *uri)
{
	const char *p, *colon, *slash, *ques, *q;
	char *hostname, *hosthdr, *trimmed_name;
	unsigned port;
	struct http_state *https;

	p = uri + 5;
	port = 0;

	if (p[0] != '/' || p[1] != '/') {
		warnx("`%s': malformed `http' URL", uri);
		return EX_USAGE;
	}

	p += 2;
	colon = strchr(p, ':');
	slash = strchr(p, '/');
	if (colon && slash && colon < slash)
		q = colon;
	else
		q = slash;
	if (q == 0) {
		warnx("`%s': malformed `http' URL", uri);
		return EX_USAGE;
	}
	hostname = alloca(q - p + 1);
	hostname[0] = '\0';
	strncat(hostname, p, q - p);
	p = slash;

	if (colon && colon + 1 != slash) {
		unsigned long ul;
		char *ep;

		errno = 0;
		ul = strtoul(colon + 1, &ep, 10);
		if (ep != slash || ep == colon + 1 || errno != 0
		    || ul < 1 || ul > 65534) {
			warn("`%s': invalid port in URL", uri);
			return EX_USAGE;
		}

		port = ul;
	} else {
		port = 80;
	}

	p = slash;

	https = safe_malloc(sizeof *https);

	/*
	 * Now, we have a copy of the hostname in hostname, the specified port
	 * (or the default value) in port, and p points to the filename part
	 * of the URI.
	 */
	https->http_hostname = safe_strdup(hostname);
	https->http_port = port;
	hosthdr = alloca(sizeof("Host: :\r\n") + 5 + strlen(hostname));
	sprintf(hosthdr, "Host: %s:%d\r\n", hostname, port);
	https->http_host_header = safe_strdup(hosthdr);

	/*
	 * NB: HTTP/1.1 servers MUST also accept a full URI.
	 * However, HTTP/1.0 servers will ONLY accept a trimmed URI.
	 */
	https->http_remote_request = safe_strdup(p);
	p++;
	ques = strpbrk(p, "?#");
	if (ques) {
		trimmed_name = safe_strndup(p, ques - p);
	} else {
		trimmed_name = safe_strdup(p);
	}
	https->http_decoded_file = percent_decode(trimmed_name);
	free(trimmed_name);
	p = https->http_decoded_file;
	/* now p is the decoded version, so we can extract the basename */

	if (fs->fs_outputfile == 0) {
		slash = strrchr(p, '/');
		if (slash)
			fs->fs_outputfile = slash + 1;
		else
			fs->fs_outputfile = p;
	}
	https->http_redirected = 0;
	https->http_authentication = https->http_proxy_authentication = 0;

	fs->fs_proto = https;
	fs->fs_close = http_close;
	fs->fs_retrieve = http_retrieve;
	return 0;
}

/*
 * An HTTP proxy works by accepting a complete URI in a GET request,
 * retrieving that object, and then forwarding it back to us.  Because
 * it can conceivably handle any URI, we have to do a bit more work
 * in the parsing of it.
 */
static int
http_proxy_parse(struct fetch_state *fs, const char *uri)
{
	struct http_state *https;
	const char *env, *slash, *ques;
	char *file;
	int rv;

	https = safe_malloc(sizeof *https);
	https->http_remote_request = safe_strdup(uri);

	env = getenv("HTTP_PROXY");
	rv = parse_host_port(env, &https->http_hostname, &https->http_port);
	if (rv) {
out:
		free(https->http_remote_request);
		free(https);
		return rv;
	}

	if (strncmp(uri, "http://", 7) == 0) {
		char *hosthdr;
		slash = strchr(uri + 7, '/');
		if (slash == 0) {
			warnx("`%s': malformed `http' URL", uri);
			rv = EX_USAGE;
			free(https->http_hostname);
			goto out;
		}
		ques = strpbrk(slash, "?#");
		if (ques == 0)
			file = safe_strdup(slash);
		else
			file = safe_strndup(slash, ques - slash);
		hosthdr = alloca(sizeof("Host: \r\n") + slash - uri - 7);
		strcpy(hosthdr, "Host: ");
		strncat(hosthdr, uri + 7, slash - uri - 7);
		strcat(hosthdr, "\r\n");
		https->http_host_header = safe_strdup(hosthdr);
	} else {
		slash = uri;
		while (*slash && *slash != ':')
			slash++;
		if (*slash)
			slash++;
		if (slash[0] == '/' && slash[1] == '/') {
			slash += 2;
			while (*slash && *slash != '/')
				slash++;
		}
		file = safe_strdup(slash);
		https->http_host_header = safe_strdup("");
	}
	https->http_decoded_file = percent_decode(file);
	https->http_redirected = 0;
	https->http_authentication = https->http_proxy_authentication = 0;
	free(file);
	if (fs->fs_outputfile == 0) {
		slash = strrchr(https->http_decoded_file, '/');
		/* NB: we are not guaranteed to find one... */
		fs->fs_outputfile = slash ? slash + 1
			: https->http_decoded_file;
	}

	fs->fs_proto = https;
	fs->fs_close = http_close;
	fs->fs_retrieve = http_retrieve;
	return 0;
}

static int
http_close(struct fetch_state *fs)
{
	struct http_state *https = fs->fs_proto;

	free(https->http_hostname);
	free(https->http_remote_request);
	free(https->http_decoded_file);
	free(https->http_host_header);
	if (https->http_authentication)
		free(https->http_authentication);
	if (https->http_proxy_authentication)
		free(https->http_proxy_authentication);
	free(https);
	fs->fs_outputfile = 0;
	return 0;
}

static int
nullclose(struct fetch_state *fs)
{
	return 0;
}

/*
 * Process a redirection.  This has a small memory leak.
 */
static int
http_redirect(struct fetch_state *fs, char *new, int permanent)
{
	struct http_state *https = fs->fs_proto;
	int num_redirects = https->http_redirected + 1;
	char *out = safe_strdup(fs->fs_outputfile);
	int rv;

	if (num_redirects > 5) {
		warnx("%s: HTTP redirection limit exceeded");
		return EX_PROTOCOL;
	}

	free(https->http_hostname);
	free(https->http_remote_request);
	free(https->http_decoded_file);
	free(https);
	warnx("%s: resource has moved %s to `%s'", out,
	      permanent ? "permanently" : "temporarily", new);
	rv = http_parse(fs, new);
	if (rv != 0) {
		fs->fs_close = nullclose; /* XXX rethink interface? */
		return rv;
	}
	https = fs->fs_proto;
	https->http_redirected = num_redirects;
	/*
	 * This ensures that the output file name doesn't suddenly change
	 * under the user's feet.  Unfortunately, this results in a small
	 * memory leak.  I wish C had garbage collection...
	 */
	fs->fs_outputfile = out;
	rv = http_retrieve(fs);
	return rv;
}

/*
 * Read HTML-formatted data from remote and display it on stderr.
 * This is extremely incomplete, as all it does is delete anything
 * between angle brackets.  However, this is usually good enough for
 * error messages.
 */
static void
html_display(FILE *remote)
{
	char *line;
	int linelen;
	int inbracket = 0;


	while ((line = fgetln(remote, &linelen)) != 0) {
		char *end = line + linelen;
		char *p;
		int content = 0;

		for (p = line; p < end; p++) {
			if (*p == '<' && !inbracket) {
				fwrite(line, 1, (p - line),
					stderr);
				inbracket = 1;
			}
			if (!inbracket && !content &&
				*p != '\n' && *p != '\r')
				content = 1;
			if (*p == '>' && inbracket) {
				line = p + 1;
				inbracket = 0;
			}
		}
		if (content && line < end)
			fwrite(line, 1, (end - line), stderr);
	}
}

/*
 * Get a file using HTTP.  We will try to implement HTTP/1.1 eventually.
 * This subroutine makes heavy use of the 4.4-Lite standard I/O library,
 * in particular the `fgetln' which allows us to slurp an entire `line'
 * (an arbitrary string of non-NUL characters ending in a newline) directly
 * out of the stdio buffer.  This makes interpreting the HTTP headers much
 * easier, since they are all guaranteed to end in `\r\n' and we can just
 * ignore the `\r'.
 */
static int
http_retrieve(struct fetch_state *fs)
{
	struct http_state *https;
	FILE *remote, *local;
	int s;
	struct sockaddr_in sin;
	struct msghdr msg;
#define NIOV	16	/* max is currently 14 */
	struct iovec iov[NIOV];
	int n, status;
	const char *env;
	int timo;
	char *line, *new_location;
	char *errstr = 0;
	size_t linelen, readresult, writeresult;
	off_t total_length, restart_from;
	time_t last_modified, when_to_retry;
	char *base64ofmd5;
	static char buf[BUFFER_SIZE];
	int to_stdout, restarting, redirection, retrying, autherror;
	char rangebuf[sizeof("Range: bytes=18446744073709551616-\r\n")];

	setup_http_auth();

	https = fs->fs_proto;
	to_stdout = (strcmp(fs->fs_outputfile, "-") == 0);
	restarting = fs->fs_restart;
	redirection = 0;
	retrying = 0;
	autherror = 0;

	/*
	 * Figure out the timeout.  Prefer the -T command-line value,
	 * otherwise the HTTP_TIMEOUT envar, or else don't time out at all.
	 */
	if (fs->fs_timeout) {
		timo = fs->fs_timeout;
	} else if ((env = getenv("HTTP_TIMEOUT")) != 0) {
		char *ep;
		unsigned long ul;

		errno = 0;
		ul = strtoul(env, &ep, 0);
		if (*ep != '\0' || *env == '\0' || errno != 0
		    || ul > INT_MAX) {
			warnx("`%s': invalid timeout", env);
			return EX_USAGE;
		}
		timo = ul;
	} else {
		timo = 0;
	}

	memset(&sin, 0, sizeof sin);
	sin.sin_family = AF_INET;
	sin.sin_len = sizeof sin;
	sin.sin_port = htons(https->http_port);

	fs->fs_status = "looking up hostname";
	if (inet_aton(https->http_hostname, &sin.sin_addr) == 0) {
		struct hostent *hp;

		/* XXX - do timeouts for name resolution? */
		hp = gethostbyname2(https->http_hostname, AF_INET);
		if (hp == 0) {
			warnx("`%s': cannot resolve: %s", https->http_hostname,
			      hstrerror(h_errno));
			return EX_NOHOST;
		}
		memcpy(&sin.sin_addr, hp->h_addr_list[0], sizeof sin.sin_addr);
	}

	fs->fs_status = "creating request message";
	msg.msg_name = (caddr_t)&sin;
	msg.msg_namelen = sizeof sin;
	msg.msg_iov = iov;
	n = 0;
	msg.msg_control = 0;
	msg.msg_controllen = 0;
	msg.msg_flags = MSG_EOF;

#define addstr(Iov, N, Str) \
	do { \
		    Iov[N].iov_base = (void *)Str; \
		     Iov[N].iov_len = strlen(Iov[n].iov_base); \
		     N++; \
        } while(0)

retry:
	addstr(iov, n, "GET ");
	addstr(iov, n, https->http_remote_request);
	addstr(iov, n, " HTTP/1.1\r\n");
	/*
	 * The choice of HTTP/1.1 may be a bit controversial.  The
	 * specification says that implementations which are not at
	 * least conditionally compliant MUST NOT call themselves
	 * HTTP/1.1.  We choose not to comply with that requirement.
	 * (Eventually we will support the full HTTP/1.1, at which
	 * time this comment will not apply.  But it's amusing how
	 * specifications attempt to define behavior for implementations
	 * which aren't obeying the spec in the first place...)
	 */
	addstr(iov, n, format_http_user_agent());
	/* do content negotiation here */
	addstr(iov, n, "Accept: */*\r\n");
	addstr(iov, n, https->http_host_header);
	addstr(iov, n, "Connection: close\r\n");
	if (https->http_proxy_authentication)
		addstr(iov, n, https->http_proxy_authentication);
	if (https->http_authentication)
		addstr(iov, n, https->http_authentication);
	if (fs->fs_mirror) {
		struct stat stab;

		errno = 0;
		if (((!to_stdout && stat(fs->fs_outputfile, &stab) == 0)
		     || (to_stdout && fstat(STDOUT_FILENO, &stab) == 0))
		    && S_ISREG(stab.st_mode)) {
			addstr(iov, n, "If-Modified-Since: ");
			addstr(iov, n, format_http_date(stab.st_mtime));
			addstr(iov, n, "\r\n");
		} else if (errno != 0) {
			warn("%s: cannot mirror; will retrieve anew",
			     fs->fs_outputfile);
		}
	}
	if (restarting) {
		struct stat stab;

		errno = 0;
		if (((!to_stdout && stat(fs->fs_outputfile, &stab) == 0)
		     || (to_stdout && fstat(STDOUT_FILENO, &stab) == 0))
		    && S_ISREG(stab.st_mode)) {
			addstr(iov, n, "If-Range: ");
			addstr(iov, n, format_http_date(stab.st_mtime));
			addstr(iov, n, "\r\n");
			sprintf(rangebuf, "Range: bytes=%qd-\r\n",
				(quad_t)stab.st_size);
			addstr(iov, n, rangebuf);
		} else if (errno != 0) {
			warn("%s: cannot restart; will retrieve anew",
			     fs->fs_outputfile);
			restarting = 0;
		} else {
			warnx("%s: cannot restart; will retrieve anew",
			      fs->fs_outputfile);
			restarting = 0;
		}
	}
	addstr(iov, n, "\r\n");
	msg.msg_iovlen = n;

	if (n >= NIOV)
		err(EX_SOFTWARE, "request vector length exceeded: %d", n);

	s = socket(PF_INET, SOCK_STREAM, 0);
	if (s < 0) {
		warn("socket");
		return EX_OSERR;
	}

	remote = fdopen(s, "r");
	if (remote == 0) {
		warn("fdopen");
		close(s);
		return EX_OSERR;
	}

	fs->fs_status = "sending request message";
	setup_sigalrm();
	alarm(timo);
	if (sendmsg(s, &msg, MSG_EOF) < 0) {
		warn("sendmsg: %s", https->http_hostname);
		fclose(remote);
		return EX_OSERR;
	}

got100reply:
	fs->fs_status = "reading reply status";
	alarm(timo);
	line = fgetln(remote, &linelen);
	alarm(0);
	if (line == 0) {
		if (ferror(remote)) {
			warn("reading reply from %s", https->http_hostname);
			fclose(remote);
			unsetup_sigalrm();
			return EX_OSERR;
		} else {
			warnx("empty reply from %s", https->http_hostname);
			fclose(remote);
			unsetup_sigalrm();
			return EX_PROTOCOL;
		}
	}
	/*
	 * If the other end is HTTP 0.9, then we just suck their
	 * response over; can't do anything fancy.  We assume that
	 * the file is a text file, so it is safe to use fgetln()
	 * to suck the entire file.  (It had better be, since
	 * we used it to grab the first line.)
	 */
	if (linelen < 5 || strncasecmp(line, "http", 4) != 0) {
		if (to_stdout)
			local = fopen("/dev/stdout", "w");
		else
			local = fopen(fs->fs_outputfile, "w");
		if (local == 0) {
			warn("%s: fopen", fs->fs_outputfile);
			fclose(remote);
			unsetup_sigalrm();
			return EX_OSERR;
		}
		fs->fs_status = "retrieving from HTTP/0.9 server";
		display(fs, -1, 0);

		do {
			writeresult = fwrite(line, 1, linelen, local);
			display(fs, -1, writeresult);
			if (writeresult != linelen)
				break;
			alarm(timo);
			line = fgetln(remote, &linelen);
			alarm(0);
		} while(line != 0);
		unsetup_sigalrm();

		if (ferror(local)) {
			warn("%s", fs->fs_outputfile);
			fclose(local);
			fclose(remote);
			rm(fs);
			return EX_OSERR;
		} else if(ferror(remote)) {
			warn("%s", https->http_hostname);
			fclose(local);
			fclose(remote);
			rm(fs);
			return EX_OSERR;
		}
		fclose(local);
		fclose(remote);
		display(fs, -1, -1);
		return 0;
	}
	/*
	 * OK.  The other end is doing HTTP 1.0 at the very least.
	 * This means that some of the fancy stuff is at least possible.
	 */
	line[linelen - 1] = '\0'; /* turn line into a string */
	status = http_first_line(line);

	/* In the future, we might handle redirection and other responses. */
	switch(status) {
	case 100:		/* Continue */
		goto got100reply;
	case 200:		/* Here come results */
	case 203:		/* Non-Authoritative Information */
		restarting = 0;
		break;
	case 206:		/* Here come partial results */
		/* can only happen when restarting */
		break;
	case 301:		/* Resource has moved permanently */
		if (!fs->fs_auto_retry)
			errstr = safe_strdup(line);
		else
			redirection = 301;
		break;
	case 302:		/* Resource has moved temporarily */
		/*
		 * We don't test fs->fs_auto_retry here so that this
		 * sort of redirection is transparent to the user.
		 */
		redirection = 302;
		break;
	case 304:		/* Object is unmodified */
		if (fs->fs_mirror) {
			fclose(remote);
			unsetup_sigalrm();
			return 0;
		}
		errstr = safe_strdup(line);
		break;
	case 401:		/* Unauthorized */
		if (https->http_authentication)
			errstr = safe_strdup(line);
		else
			autherror = 401;
		break;
	case 407:		/* Proxy Authentication Required */
		if (https->http_proxy_authentication)
			errstr = safe_strdup(line);
		else
			autherror = 407;
		break;
	case 503:		/* Service Unavailable */
		if (!fs->fs_auto_retry)
			errstr = safe_strdup(line);
		else
			retrying = 503;
		break;

	default:
		errstr = safe_strdup(line);
		break;
	}

	total_length = -1;	/* -1 means ``don't know'' */
	last_modified = when_to_retry = -1;
	base64ofmd5 = 0;
	new_location = 0;
	restart_from = 0;
	fs->fs_status = "parsing reply headers";

	while((line = fgetln(remote, &linelen)) != 0) {
		char *value, *ep;
		enum http_header header;
		unsigned long ul;

		line[linelen - 1] = '\0';
		header = http_parse_header(line, &value);

		if (header == ht_end_of_header)
			break;

		switch(header) {
		case ht_content_length:
			errno = 0;
			ul = strtoul(value, &ep, 10);
			if (errno != 0 || *ep)
				warnx("invalid Content-Length: `%s'", value);
			if (!restarting)
				total_length = ul;
			break;

		case ht_last_modified:
			last_modified = parse_http_date(value);
			if (last_modified == -1 && fs->fs_verbose > 0)
				warnx("invalid Last-Modified: `%s'", value);
			break;

		case ht_content_md5:
			base64ofmd5 = safe_strdup(value);
			break;

		case ht_content_range:
			if (!restarting) /* XXX protocol error */
				break;

			/* NB: we might have to restart from farther back
			   than we asked. */
			status = parse_http_content_range(value, &restart_from,
							  &total_length);
			/* If we couldn't understand the reply, get the whole
			   thing. */
			if (status) {
				restarting = 0;
doretry:
				fclose(remote);
				if (base64ofmd5)
					free(base64ofmd5);
				if (new_location)
					free(new_location);
				restart_from = 0;
				n = 0;
				goto retry;
			}
			break;

		case ht_location:
			if (redirection) {
				char *s = value;
				while (*s && !isspace(*s))
					s++;
				new_location = safe_strndup(value, s - value);
			}
			break;

		case ht_transfer_encoding:
			warnx("%s: %s specified a Transfer-Encoding: %s",
			      fs->fs_outputfile, https->http_hostname,
			      value);
			warnx("%s: output file may be uninterpretable",
			      fs->fs_outputfile);
			break;

		case ht_retry_after:
			if (!retrying)
				break;

			errno = 0;
			ul = strtoul(value, &ep, 10);
			if (errno != 0 || (*ep && !isspace(*ep))) {
				time_t when;
				when = parse_http_date(value);
				if (when == -1)
					break;
				when_to_retry = when;
			} else {
				when_to_retry = time(0) + ul;
			}
			break;

		case ht_www_authenticate:
			if (autherror != 401)
				break;

			status = process_http_auth(fs, value, autherror);
			if (status != 0)
				goto cantauth;
			break;

		case ht_proxy_authenticate:
			if (autherror != 407)
				break;
			status = process_http_auth(fs, value, autherror);
			if (status != 0)
				goto cantauth;
			break;

		default:
			break;
		}
	}
	if (autherror == 401 && https->http_authentication)
		goto doretry;
	if (autherror == 407 && https->http_proxy_authentication)
		goto doretry;
	if (autherror) {
		goto spewerror;
	}

	if (retrying) {
		int howlong;

		if (when_to_retry == -1) {
			errstr = safe_strdup("HTTP/1.1 503 Service Unavailable");
			goto spewerror;
		}
		howlong = when_to_retry - time(0);
		if (howlong < 30)
			howlong = 30;

		warnx("%s: service unavailable; retrying in %d seconds",
		      https->http_hostname, howlong);
		fs->fs_status = "waiting to retry";
		sleep(howlong);
		goto doretry;
	}

	if (errstr != 0) {
spewerror:
		warnx("%s: %s: HTTP server returned error code %d",
		      fs->fs_outputfile, https->http_hostname, status);
		if (fs->fs_verbose > 1) {
			fputs(errstr, stderr);
			fputc('\n', stderr);
			html_display(remote);
		}
		free(errstr);
		fclose(remote);
		unsetup_sigalrm();
		return EX_UNAVAILABLE;
	}

	if (redirection && new_location) {
		fclose(remote);
		if (base64ofmd5)
			free(base64ofmd5);
		fs->fs_status = "processing redirection";
		status = http_redirect(fs, new_location, redirection == 301);
		free(new_location);
		return status;
	} else if (redirection) {
		warnx("%s: redirection but no new location",
		      fs->fs_outputfile);
		fclose(remote);
		if (base64ofmd5)
			free(base64ofmd5);
		return EX_PROTOCOL;
	}

	fs->fs_status = "retrieving file from HTTP/1.x server";

	/*
	 * OK, if we got here, then we have finished parsing the header
	 * and have read the `\r\n' line which denotes the end of same.
	 * We may or may not have a good idea of the length of the file
	 * or its modtime.  At this point we will have to deal with
	 * any special byte-range, content-negotiation, redirection,
	 * or authentication, and probably jump back up to the top,
	 * once we implement those features.  So, all we have left to
	 * do is open up the output file and copy data from input to
	 * output until EOF.
	 */
	if (to_stdout)
		local = fopen("/dev/stdout", restarting ? "a" : "w");
	else
		local = fopen(fs->fs_outputfile, restarting ? "a" : "w");
	if (local == 0) {
		warn("%s: fopen", fs->fs_outputfile);
		fclose(remote);
		unsetup_sigalrm();
		return EX_OSERR;
	}

	fs->fs_modtime = last_modified;
	fseek(local, restart_from, SEEK_SET); /* XXX truncation off_t->long */
	display(fs, total_length, restart_from); /* XXX truncation */

	/*
	 * Eventually this loop will be separated out as http_suck(), and
	 * there will be a separate http_suck_chunked() to deal with that
	 * Transfer-Encoding.
	 */
	do {
		alarm(timo);
		readresult = fread(buf, 1, sizeof buf, remote);
		alarm(0);

		if (readresult == 0)
			break;
		display(fs, total_length, readresult);

		writeresult = fwrite(buf, 1, readresult, local);
	} while (writeresult == readresult);

	status = errno;		/* save errno for warn(), below, if needed */
	display(fs, total_length, -1); /* do here in case we have to warn */
	errno = status;

	if (ferror(remote)) {
		warn("reading remote file from %s", https->http_hostname);
		status = EX_OSERR;
	} else if(ferror(local)) {
		warn("`%s': fwrite", fs->fs_outputfile);
		status = EX_OSERR;
	} else {
		status = 0;
	}
	if (base64ofmd5) {
		/*
		 * Ack.  When restarting, the MD5 only covers the parts
		 * we are getting, not the whole thing.
		 */
		fseek(local, restart_from, SEEK_SET);
		fs->fs_status = "computing MD5 message digest";
		status = check_md5(local, base64ofmd5);
		free(base64ofmd5);
	}

	fclose(local);
out:
	unsetup_sigalrm();
	fclose(remote);

	if (status != 0)
		rm(fs);
	else
		adjmodtime(fs);

	return status;
#undef addstr

cantauth:
	warnx("%s: cannot authenticate with %s %s",
	      fs->fs_outputfile,
	      (autherror == 401) ? "server" : "proxy",
	      https->http_hostname);
	status = EX_NOPERM;
	goto out;
}

/*
 * The format of the response line for an HTTP request is:
 *	HTTP/V.vv{WS}999{WS}Explanatory text for humans to read\r\n
 * Old pre-HTTP/1.0 servers can return
 *	HTTP{WS}999{WS}Explanatory text for humans to read\r\n
 * Where {WS} represents whitespace (spaces and/or tabs) and 999
 * is a machine-interprable result code.  We return the integer value
 * of that result code, or the impossible value `0' if we are unable to
 * parse the result.
 */
static int
http_first_line(const char *line)
{
	char *ep;
	unsigned long ul;

	if (strncasecmp(line, "http", 4) != 0)
		return 0;

	line += 4;
	while (*line && !isspace(*line)) /* skip non-whitespace */
		line++;
	while (*line && isspace(*line))	/* skip first whitespace */
		line++;

	errno = 0;
	ul = strtoul(line, &ep, 10);
	if (errno != 0 || ul > 999 || ul < 100 || !isspace(*ep))
		return 0;
	return ul;
}

/*
 * The format of a header line for an HTTP request is:
 *	Header-Name: header-value (with comments in parens)\r\n
 * This would be a nice application for gperf(1), except that the
 * names are case-insensitive and gperf can't handle that.
 */
static enum http_header
http_parse_header(char *line, char **valuep)
{
	char *colon, *value;

	if (*line == '\0' /* protocol error! */
	    || (line[0] == '\r' && line[1] == '\0'))
		return ht_end_of_header;

	colon = strchr(line, ':');
	if (colon == 0)
		return ht_syntax_error;
	*colon = '\0';

	for (value = colon + 1; *value && isspace(*value); value++)
		;		/* do nothing */

	/* Trim trailing whitespace (including \r). */
	*valuep = value;
	value += strlen(value) - 1;
	while (value > *valuep && isspace(*value))
		value--;
	*++value = '\0';

#define cmp(name, num) do { if (!strcasecmp(line, name)) return num; } while(0)
	cmp("Accept-Ranges", ht_accept_ranges);
	cmp("Age", ht_age);
	cmp("Allow", ht_allow);
	cmp("Cache-Control", ht_cache_control);
	cmp("Connection", ht_connection);
	cmp("Content-Base", ht_content_base);
	cmp("Content-Encoding", ht_content_encoding);
	cmp("Content-Language", ht_content_language);
	cmp("Content-Length", ht_content_length);
	cmp("Content-Location", ht_content_location);
	cmp("Content-MD5", ht_content_md5);
	cmp("Content-Range", ht_content_range);
	cmp("Content-Type", ht_content_type);
	cmp("Date", ht_date);
	cmp("ETag", ht_etag);
	cmp("Expires", ht_expires);
	cmp("Last-Modified", ht_last_modified);
	cmp("Location", ht_location);
	cmp("Pragma", ht_pragma);
	cmp("Proxy-Authenticate", ht_proxy_authenticate);
	cmp("Public", ht_public);
	cmp("Retry-After", ht_retry_after);
	cmp("Server", ht_server);
	cmp("Transfer-Encoding", ht_transfer_encoding);
	cmp("Upgrade", ht_upgrade);
	cmp("Vary", ht_vary);
	cmp("Via", ht_via);
	cmp("WWW-Authenticate", ht_www_authenticate);
	cmp("Warning", ht_warning);
#undef cmp
	return ht_unknown;
}

/*
 * Compute the RSA Data Security, Inc., MD5 Message Digest of the file
 * given in `fp', see if it matches the one given in base64 encoding by
 * `base64ofmd5'.  Warn and return an error if it doesn't.
 */
static int
check_md5(FILE *fp, char *base64ofmd5) {
	MD5_CTX ctx;
	unsigned char digest[16];
	char buf[512];
	size_t len;
	char *ourval;

	MD5Init(&ctx);
	while ((len = fread(buf, 1, sizeof buf, fp)) != 0) {
		MD5Update(&ctx, buf, len);
	}
	MD5Final(digest, &ctx);
	ourval = to_base64(digest, 16);
	if (strcmp(ourval, base64ofmd5) != 0) {
		warnx("MD5 digest mismatch: %s, should be %s", ourval,
		      base64ofmd5);
		free(ourval);
		return EX_DATAERR;
	}
	free(ourval);
	return 0;
}

static const char *wkdays[] = {
	"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
};
static const char *months[] = {
	"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct",
	"Nov", "Dec"
};

/*
 * Interpret one of the three possible formats for an HTTP date.
 * All of them are really bogus; HTTP should use either ISO 8601
 * or NTP timestamps.  We make some attempt to accept a subset of 8601
 * format.  The three standard formats are all fixed-length subsets of their
 * respective standards (except 8601, which puts all of the stuff we
 * care about up front).
 */
static time_t
parse_http_date(char *string)
{
	static struct tm tm;	/* get good initialization */
	time_t rv;
	const char *tz;
	int i;

	/* 8601 has the shortest minimum length */
	if (strlen(string) < 15)
		return -1;

	if (isdigit(*string)) {
		/* ISO 8601: 19970127T134551stuffwedon'tcareabout */
		for (i = 0; i < 15; i++) {
			if (i != 8 && !isdigit(string[i]))
				break;
		}
		if (i < 15)
			return -1;
#define digit(x) (string[x] - '0')
		tm.tm_year = (digit(0) * 1000
			      + digit(1) * 100
			      + digit(2) * 10
			      + digit(3)) - 1900;
		tm.tm_mon = digit(4) * 10 + digit(5) - 1;
		tm.tm_mday = digit(6) * 10 + digit(7);
		if (string[8] != 'T' && string[8] != 't' && string[8] != ' ')
			return -1;
		tm.tm_hour = digit(9) * 10 + digit(10);
		tm.tm_min = digit(11) * 10 + digit(12);
		tm.tm_sec = digit(13) * 10 + digit(14);
		/* We don't care about the rest of the stuff after the secs. */
	} else if (string[3] == ',') {
		/* Mon, 27 Jan 1997 14:24:35 stuffwedon'tcareabout */
		if (strlen(string) < 25)
			return -1;
		string += 5;	/* skip over day-of-week */
		if (!(isdigit(string[0]) && isdigit(string[1])))
			return -1;
		tm.tm_mday = digit(0) * 10 + digit(1);
		for (i = 0; i < 12; i++) {
			if (strncasecmp(months[i], &string[3], 3) == 0)
				break;
		}
		if (i >= 12)
			return -1;
		tm.tm_mon = i;

		if (sscanf(&string[7], "%d %d:%d:%d", &i, &tm.tm_hour,
			   &tm.tm_min, &tm.tm_sec) != 4)
			return -1;
		tm.tm_year = i - 1900;

	} else if (string[3] == ' ') {
		/* Mon Jan 27 14:25:20 1997 */
		if (strlen(string) < 25)
			return -1;
		string += 4;
		for (i = 0; i < 12; i++) {
			if (strncasecmp(string, months[i], 3) == 0)
				break;
		}
		if (i >= 12)
			return -1;
		tm.tm_mon = i;
		if (sscanf(&string[4], "%d %d:%d:%d %u", &tm.tm_mday,
			   &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &i)
		    != 5)
			return -1;
		tm.tm_year = i - 1900;
	} else {
		/* Monday, 27-Jan-97 14:31:09 stuffwedon'tcareabout */
		char *comma = strchr(string, ',');
		char mname[4];

		if (comma == 0)
			return -1;
		string = comma + 1;
		if (strlen(string) < 19)
			return -1;
		string++;
		mname[4] = '\0';
		if (sscanf(string, "%d-%c%c%c-%d %d:%d:%d", &tm.tm_mday,
			   mname, mname + 1, mname + 2, &tm.tm_year,
			   &tm.tm_hour, &tm.tm_min, &tm.tm_sec) != 8)
			return -1;
		for (i = 0; i < 12; i++) {
			if (strcasecmp(months[i], mname))
				break;
		}
		if (i >= 12)
			return -1;
		tm.tm_mon = i;
	}
#undef digit

	if (tm.tm_sec > 60 || tm.tm_min > 59 || tm.tm_hour > 23
	    || tm.tm_mday > 31 || tm.tm_mon > 11)
		return -1;
	if (tm.tm_sec < 0 || tm.tm_min < 0 || tm.tm_hour < 0
	    || tm.tm_mday < 0 || tm.tm_mon < 0 || tm.tm_year < 0)
		return -1;

	tz = getenv("TZ");
	setenv("TZ", "UTC0", 1);
	tzset();
	rv = mktime(&tm);
	if (tz)
		setenv("TZ", tz, 1);
	else
		unsetenv("TZ");
	return rv;
}

static char *
format_http_date(time_t when)
{
	struct tm *tm;
	static char buf[30];

	tm = gmtime(&when);
	if (tm == 0)
		return 0;
#ifndef HTTP_DATE_ISO_8601
	sprintf(buf, "%s, %02d %s %04d %02d:%02d:%02d GMT",
		wkdays[tm->tm_wday], tm->tm_mday, months[tm->tm_mon],
		tm->tm_year + 1900, tm->tm_hour, tm->tm_min, tm->tm_sec);
#else /* ISO 8601 */
	sprintf(buf, "%04d%02d%02dT%02d%02d%02d+0000",
		tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
		tm->tm_hour, tm->tm_min, tm->tm_sec);
#endif
	return buf;
}

static char *
format_http_user_agent(void)
{
	static char buf[128];
	static int inited;

	if (!inited) {
		int mib[2];
		char ostype[128], osrelease[128], machine[128];
		size_t len;

		mib[0] = CTL_KERN;
		mib[1] = KERN_OSTYPE;
		len = sizeof ostype;
		if (sysctl(mib, 2, ostype, &len, 0, 0) < 0) {
			warn("sysctl");
			ostype[0] = '\0';
		}
		mib[1] = KERN_OSRELEASE;
		len = sizeof osrelease;
		if (sysctl(mib, 2, osrelease, &len, 0, 0) < 0) {
			warn("sysctl");
			osrelease[0] = '\0';
		}
		mib[0] = CTL_HW;
		mib[1] = HW_MACHINE;
		len = sizeof machine;
		if (sysctl(mib, 2, machine, &len, 0, 0) < 0) {
			warn("sysctl");
			machine[0] = '\0';
		}

		snprintf(buf, sizeof buf,
			 "User-Agent: " FETCH_VERSION " %s/%s (%s)\r\n",
			 ostype, osrelease, machine);
	}
	return buf;
}

/*
 * Parse a Content-Range return header from the server.  RFC 2066 defines
 * this header to have the format:
 *	Content-Range: bytes 12345-67890/123456
 * Since we always ask for the whole rest of the file, we consider it an
 * error if the reply doesn't claim to give it to us.
 */
static int
parse_http_content_range(char *orig, off_t *restart_from, off_t *total_length)
{
	u_quad_t first, last, total;
	char *ep;

	if (strncasecmp(orig, "bytes", 5) != 0) {
		warnx("unknown Content-Range unit: `%s'", orig);
		return EX_PROTOCOL;
	}

	orig += 5;
	while (*orig && isspace(*orig))
		orig++;

	errno = 0;
	first = strtouq(orig, &ep, 10);
	if (errno != 0 || *ep != '-') {
		warnx("invalid Content-Range: `%s'", orig);
		return EX_PROTOCOL;
	}
	last = strtouq(ep + 1, &ep, 10);
	if (errno != 0 || *ep != '/' || last < first) {
		warnx("invalid Content-Range: `%s'", orig);
		return EX_PROTOCOL;
	}
	total = strtouq(ep + 1, &ep, 10);
	if (errno != 0 || !(*ep == '\0' || isspace(*ep))) {
		warnx("invalid Content-Range: `%s'", orig);
		return EX_PROTOCOL;
	}

	if (last + 1 != total) {
		warnx("HTTP server did not return requested Content-Range");
		return EX_PROTOCOL;
	}

	*restart_from = first;
	*total_length = last;
	return 0;
}

/*
 * Do HTTP authentication.  We only do ``basic'' right now, but
 * MD5 ought to be fairly easy.  The hard part is actually teasing
 * apart the header, which is fairly badly designed (so what else is
 * new?).
 */
static char *
getauthparam(char *params, const char *name)
{
	char *rv;
	enum state { normal, quoted } state;
	while (*params) {
		if (strncasecmp(params, name, strlen(name)) == 0
		    && params[strlen(name)] == '=')
			break;
		state = normal;
		while (*params) {
			if (state == normal && *params == ',')
				break;
			if (*params == '\"')
				state = (state == quoted) ? normal : quoted;
			if (*params == '\\' && params[1] != '\0')
				params++;
			params++;
		}
	}

	if (*params == '\0')
		return 0;
	params += strlen(name) + 1;
	rv = params;
	state = normal;
	while (*params) {
		if (state == normal && *params == ',')
			break;
		if (*params == '\"')
			state = (state == quoted) ? normal : quoted;
		if (*params == '\\' && params[1] != '\0')
			params++;
		params++;
	}
	if (params[-1] == '\"')
		params[-1] = '\0';
	else
		params[0] = '\0';

	if (*rv == '\"')
		rv++;
	return rv;
}

static int
process_http_auth(struct fetch_state *fs, char *hdr, int autherr)
{
	enum state { normal, quoted } state;
	char *scheme, *params, *nscheme, *realm;
	struct http_auth *ha;

	do {
		scheme = params = hdr;
		/* Look for end of scheme name. */
		while (*params && !isspace(*params))
			params++;

		if (*params == '\0')
			return EX_PROTOCOL;

		/* Null-terminate scheme and skip whitespace. */
		while (*params && isspace(*params))
			*params++ = '\0';

		/* Semi-parse parameters to find their end. */
		nscheme = params;
		state = normal;
		while (*nscheme) {
			if (state == normal && isspace(*nscheme))
				break;
			if (*nscheme == '\"')
				state = (state == quoted) ? normal : quoted;
			if (*nscheme == '\\' && nscheme[1] != '\0')
				nscheme++;
			nscheme++;
		}

		/* Null-terminate parameters and skip whitespace. */
		while (*nscheme && isspace(*nscheme))
			*nscheme++ = '\0';

		realm = getauthparam(params, "realm");
		if (realm == 0) {
			scheme = nscheme;
			continue;
		}

		if (autherr == 401)
			ha = find_http_auth(&http_auth, scheme, realm);
		else
			ha = find_http_auth(&http_proxy_auth, scheme, realm);

		if (ha)
			return ha->ha_ham->ham_doauth(fs, ha, autherr == 407);
	} while (*scheme);
	return EX_NOPERM;
}

static void
parse_http_auth_env(const char *env, struct http_auth_head *ha_tqh)
{
	char *nenv, *p, *scheme, *realm, *params;
	struct http_auth *ha;
	struct http_auth_method *ham;

	nenv = alloca(strlen(env) + 1);
	strcpy(nenv, env);

	while ((p = strsep(&nenv, " \t")) != 0) {
		scheme = strsep(&p, ":");
		if (scheme == 0 || *scheme == '\0')
			continue;
		realm = strsep(&p, ":");
		if (realm == 0 || *realm == '\0')
			continue;
		params = (p && *p) ? p : 0;
		for (ham = http_auth_methods; ham->ham_scheme; ham++) {
			if (strcasecmp(scheme, ham->ham_scheme) == 0)
				break;
		}
		if (ham == 0)
			continue;
		ha = safe_malloc(sizeof *ha);
		ha->ha_scheme = safe_strdup(scheme);
		ha->ha_realm = safe_strdup(realm);
		ha->ha_params = params ? safe_strdup(params) : 0;
		ha->ha_ham = ham;
		TAILQ_INSERT_TAIL(ha_tqh, ha, ha_link);
	}
}

/*
 * Look up an authentication method.  Automatically clone wildcards
 * into fully-specified entries.
 */
static struct http_auth *
find_http_auth(struct http_auth_head *tqh, const char *scm, const char *realm)
{
	struct http_auth *ha;

	for (ha = tqh->tqh_first; ha; ha = ha->ha_link.tqe_next) {
		if (strcasecmp(ha->ha_scheme, scm) == 0
		    && strcasecmp(ha->ha_realm, realm) == 0)
			return ha;
	}

	for (ha = tqh->tqh_first; ha; ha = ha->ha_link.tqe_next) {
		if (strcasecmp(ha->ha_scheme, scm) == 0
		    && strcmp(ha->ha_realm, "*") == 0)
			break;
	}
	if (ha != 0) {
		struct http_auth *ha2;

		ha2 = safe_malloc(sizeof *ha2);
		ha2->ha_scheme = safe_strdup(scm);
		ha2->ha_realm = safe_strdup(realm);
		ha2->ha_params = ha->ha_params ? safe_strdup(ha->ha_params) :0;
		ha2->ha_ham = ha->ha_ham;
		TAILQ_INSERT_TAIL(tqh, ha2, ha_link);
		ha = ha2;
	}

	return ha;
}

static void
setup_http_auth(void)
{
	const char *envar;
	static int once;

	if (once)
		return;
	once = 1;

	TAILQ_INIT(&http_auth);
	TAILQ_INIT(&http_proxy_auth);
	envar = getenv("HTTP_AUTH");
	if (envar)
		parse_http_auth_env(envar, &http_auth);

	envar = getenv("HTTP_PROXY_AUTH");
	if (envar)
		parse_http_auth_env(envar, &http_proxy_auth);
}

static int
basic_doauth(struct fetch_state *fs, struct http_auth *ha, int isproxy)
{
	struct http_state *https = fs->fs_proto;
	char *user;
	char *pass;
	char *enc;
	char **hdr;
	size_t userlen;
	FILE *fp;

	if (!isatty(0) &&
	    (ha->ha_params == 0 || strchr(ha->ha_params, ':') == 0))
		return EX_NOPERM;

	fp = fopen("/dev/tty", "r+");
	if (fp == 0) {
		warn("opening /dev/tty");
		return EX_OSERR;
	}
	if (ha->ha_params == 0) {
		fprintf(fp, "Enter `basic' user name for realm `%s': ",
			ha->ha_realm);
		fflush(fp);
		user = fgetln(stdin, &userlen);
		if (user == 0 || userlen < 1) { /* longer name? */
			fclose(fp);
			return EX_NOPERM;
		}
		if (user[userlen - 1] == '\n')
			user[userlen - 1] = '\0';
		else
			user[userlen] = '\0';
		user = safe_strdup(user);
		pass = 0;
	} else if ((pass = strchr(ha->ha_params, ':')) == 0) {
		user = safe_strdup(ha->ha_params);
		free(ha->ha_params);
	}

	if (pass == 0) {
		pass = getpass("Password: ");
		ha->ha_params = safe_malloc(strlen(user) + 2 + strlen(pass));
		strcpy(ha->ha_params, user);
		strcat(ha->ha_params, ":");
		strcat(ha->ha_params, pass);
	}

	enc = to_base64(ha->ha_params, strlen(ha->ha_params));

	hdr = isproxy ? &https->http_proxy_authentication
		: &https->http_authentication;
	if (*hdr)
		free(*hdr);
	*hdr = safe_malloc(sizeof("Proxy-Authorization: basic \r\n")
			   + strlen(enc));
	if (isproxy)
		strcpy(*hdr, "Proxy-Authorization");
	else
		strcpy(*hdr, "Authorization");
	strcat(*hdr, ": Basic ");
	strcat(*hdr, enc);
	strcat(*hdr, "\r\n");
	free(enc);
	return 0;
}