freebsd-nq/lib/libfetch/http.c
2000-05-26 15:34:42 +00:00

703 lines
17 KiB
C

/*-
* Copyright (c) 1998 Dag-Erling Coïdan Smørgrav
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer
* in this position and unchanged.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD$
*/
/*
* The base64 code in this file is based on code from MIT fetch, which
* has the following copyright and license:
*
*-
* Copyright 1997 Massachusetts Institute of Technology
*
* Permission to use, copy, modify, and distribute this software and
* its documentation for any purpose and without fee is hereby
* granted, provided that both the above copyright notice and this
* permission notice appear in all copies, that both the above
* copyright notice and this permission notice appear in all
* supporting documentation, and that the name of M.I.T. not be used
* in advertising or publicity pertaining to distribution of the
* software without specific, written prior permission. M.I.T. makes
* no representations about the suitability of this software for any
* purpose. It is provided "as is" without express or implied
* warranty.
*
* THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
* ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
* SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE. */
#include <sys/param.h>
#include <sys/socket.h>
#include <err.h>
#include <ctype.h>
#include <locale.h>
#include <netdb.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include "fetch.h"
#include "common.h"
#include "httperr.h"
extern char *__progname;
#define ENDL "\r\n"
#define HTTP_OK 200
#define HTTP_PARTIAL 206
#define HTTP_MOVED 302
struct cookie
{
FILE *real_f;
#define ENC_NONE 0
#define ENC_CHUNKED 1
int encoding; /* 1 = chunked, 0 = none */
#define HTTPCTYPELEN 59
char content_type[HTTPCTYPELEN+1];
char *buf;
int b_cur, eof;
unsigned b_len, chunksize;
};
/*
* Send a formatted line; optionally echo to terminal
*/
static int
_http_cmd(FILE *f, char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
vfprintf(f, fmt, ap);
#ifndef NDEBUG
fprintf(stderr, "\033[1m>>> ");
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\033[m");
#endif
va_end(ap);
return 0; /* XXX */
}
/*
* Fill the input buffer, do chunk decoding on the fly
*/
static char *
_http_fillbuf(struct cookie *c)
{
char *ln;
unsigned int len;
if (c->eof)
return NULL;
if (c->encoding == ENC_NONE) {
c->buf = fgetln(c->real_f, &(c->b_len));
c->b_cur = 0;
} else if (c->encoding == ENC_CHUNKED) {
if (c->chunksize == 0) {
ln = fgetln(c->real_f, &len);
if (len <= 2)
return NULL;
DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: "
"%*.*s\033[m\n", (int)len-2, (int)len-2, ln));
sscanf(ln, "%x", &(c->chunksize));
if (!c->chunksize) {
DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
"end of last chunk\033[m\n"));
c->eof = 1;
return NULL;
}
DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
"new chunk: %X\033[m\n", c->chunksize));
}
c->buf = fgetln(c->real_f, &(c->b_len));
if (c->b_len > c->chunksize)
c->b_len = c->chunksize;
c->chunksize -= c->b_len;
c->b_cur = 0;
}
else return NULL; /* unknown encoding */
return c->buf;
}
/*
* Read function
*/
static int
_http_readfn(struct cookie *c, char *buf, int len)
{
int l, pos = 0;
while (len) {
/* empty buffer */
if (!c->buf || (c->b_cur == c->b_len))
if (!_http_fillbuf(c))
break;
l = c->b_len - c->b_cur;
if (len < l) l = len;
memcpy(buf + pos, c->buf + c->b_cur, l);
c->b_cur += l;
pos += l;
len -= l;
}
if (ferror(c->real_f))
return -1;
else return pos;
}
/*
* Write function
*/
static int
_http_writefn(struct cookie *c, const char *buf, int len)
{
size_t r = fwrite(buf, 1, (size_t)len, c->real_f);
return r ? r : -1;
}
/*
* Close function
*/
static int
_http_closefn(struct cookie *c)
{
int r = fclose(c->real_f);
free(c);
return (r == EOF) ? -1 : 0;
}
/*
* Extract content type from cookie
*/
char *
fetchContentType(FILE *f)
{
/*
* We have no way of making sure this really *is* one of our cookies,
* so just check for a null pointer and hope for the best.
*/
return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL;
}
/*
* Base64 encoding
*/
int
_http_base64(char *dst, char *src, int l)
{
static const char base64[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789+/";
int t, r = 0;
while (l >= 3) {
t = (src[0] << 16) | (src[1] << 8) | src[2];
dst[0] = base64[(t >> 18) & 0x3f];
dst[1] = base64[(t >> 12) & 0x3f];
dst[2] = base64[(t >> 6) & 0x3f];
dst[3] = base64[(t >> 0) & 0x3f];
src += 3; l -= 3;
dst += 4; r += 4;
}
switch (l) {
case 2:
t = (src[0] << 16) | (src[1] << 8);
dst[0] = base64[(t >> 18) & 0x3f];
dst[1] = base64[(t >> 12) & 0x3f];
dst[2] = base64[(t >> 6) & 0x3f];
dst[3] = '=';
dst += 4;
r += 4;
break;
case 1:
t = src[0] << 16;
dst[0] = base64[(t >> 18) & 0x3f];
dst[1] = base64[(t >> 12) & 0x3f];
dst[2] = dst[3] = '=';
dst += 4;
r += 4;
break;
case 0:
break;
}
*dst = 0;
return r;
}
/*
* Encode username and password
*/
char *
_http_auth(char *usr, char *pwd)
{
int len, lu, lp;
char *str, *s;
lu = strlen(usr);
lp = strlen(pwd);
len = (lu * 4 + 2) / 3 /* user name, round up */
+ 1 /* colon */
+ (lp * 4 + 2) / 3 /* password, round up */
+ 1; /* null */
if ((s = str = (char *)malloc(len)) == NULL)
return NULL;
s += _http_base64(s, usr, lu);
*s++ = ':';
s += _http_base64(s, pwd, lp);
*s = 0;
return str;
}
/*
* Connect to server or proxy
*/
FILE *
_http_connect(struct url *URL, char *flags)
{
int direct, sd = -1, verbose;
#ifdef INET6
int af = AF_UNSPEC;
#else
int af = AF_INET;
#endif
size_t len;
char *px;
FILE *f;
direct = (flags && strchr(flags, 'd'));
verbose = (flags && strchr(flags, 'v'));
if ((flags && strchr(flags, '4')))
af = AF_INET;
else if ((flags && strchr(flags, '6')))
af = AF_INET6;
/* check port */
if (!URL->port) {
struct servent *se;
if (strcasecmp(URL->scheme, "ftp") == 0)
if ((se = getservbyname("ftp", "tcp")) != NULL)
URL->port = ntohs(se->s_port);
else
URL->port = 21;
else
if ((se = getservbyname("http", "tcp")) != NULL)
URL->port = ntohs(se->s_port);
else
URL->port = 80;
}
/* attempt to connect to proxy server */
if (!direct && (px = getenv("HTTP_PROXY")) != NULL) {
char host[MAXHOSTNAMELEN];
int port = 0;
/* measure length */
#ifdef INET6
if (px[0] != '[' ||
(len = strcspn(px, "]")) >= strlen(px) ||
(px[++len] != '\0' && px[len] != ':'))
#endif
len = strcspn(px, ":");
/* get port (XXX atoi is a little too tolerant perhaps?) */
if (px[len] == ':') {
if (strspn(px+len+1, "0123456789") != strlen(px+len+1)
|| strlen(px+len+1) > 5) {
/* XXX we should emit some kind of warning */
}
port = atoi(px+len+1);
if (port < 1 || port > 65535) {
/* XXX we should emit some kind of warning */
}
}
if (!port) {
#if 0
/*
* commented out, since there is currently no service name
* for HTTP proxies
*/
struct servent *se;
if ((se = getservbyname("xxxx", "tcp")) != NULL)
port = ntohs(se->s_port);
else
#endif
port = 3128;
}
/* get host name */
#ifdef INET6
if (len > 1 && px[0] == '[' && px[len - 1] == ']') {
px++;
len -= 2;
}
#endif
if (len >= MAXHOSTNAMELEN)
len = MAXHOSTNAMELEN - 1;
strncpy(host, px, len);
host[len] = 0;
/* connect */
sd = _fetch_connect(host, port, af, verbose);
}
/* if no proxy is configured or could be contacted, try direct */
if (sd == -1) {
if (strcasecmp(URL->scheme, "ftp") == 0)
goto ouch;
if ((sd = _fetch_connect(URL->host, URL->port, af, verbose)) == -1)
goto ouch;
}
/* reopen as stream */
if ((f = fdopen(sd, "r+")) == NULL)
goto ouch;
return f;
ouch:
if (sd >= 0)
close(sd);
_http_seterr(999); /* XXX do this properly RSN */
return NULL;
}
/*
* Check a header line
*/
char *
_http_match(char *str, char *hdr)
{
while (*str && *hdr && tolower(*str++) == tolower(*hdr++))
/* nothing */;
if (*str || *hdr != ':')
return NULL;
while (*hdr && isspace(*++hdr))
/* nothing */;
return hdr;
}
/*
* Send a HEAD or GET request
*/
int
_http_request(FILE *f, char *op, struct url *URL, char *flags)
{
int e, verbose;
char *ln, *p;
size_t len;
char *host;
#ifdef INET6
char hbuf[MAXHOSTNAMELEN + 1];
#endif
verbose = (flags && strchr(flags, 'v'));
host = URL->host;
#ifdef INET6
if (strchr(URL->host, ':')) {
snprintf(hbuf, sizeof(hbuf), "[%s]", URL->host);
host = hbuf;
}
#endif
/* send request (proxies require absolute form, so use that) */
if (verbose)
_fetch_info("requesting %s://%s:%d%s",
URL->scheme, host, URL->port, URL->doc);
_http_cmd(f, "%s %s://%s:%d%s HTTP/1.1" ENDL,
op, URL->scheme, host, URL->port, URL->doc);
/* start sending headers away */
if (URL->user[0] || URL->pwd[0]) {
char *auth_str = _http_auth(URL->user, URL->pwd);
if (!auth_str)
return 999; /* XXX wrong */
_http_cmd(f, "Authorization: Basic %s" ENDL, auth_str);
free(auth_str);
}
_http_cmd(f, "Host: %s:%d" ENDL, host, URL->port);
_http_cmd(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname);
if (URL->offset)
_http_cmd(f, "Range: bytes=%lld-" ENDL, URL->offset);
_http_cmd(f, "Connection: close" ENDL ENDL);
/* get response */
if ((ln = fgetln(f, &len)) == NULL)
return 999;
DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n",
(int)len-2, (int)len-2, ln));
/* we can't use strchr() and friends since ln isn't NUL-terminated */
p = ln;
while ((p < ln + len) && !isspace(*p))
p++;
while ((p < ln + len) && !isdigit(*p))
p++;
if (!isdigit(*p))
return 999;
e = atoi(p);
DEBUG(fprintf(stderr, "code: [\033[1m%d\033[m]\n", e));
return e;
}
/*
* Retrieve a file by HTTP
*/
FILE *
fetchGetHTTP(struct url *URL, char *flags)
{
int e, enc = ENC_NONE, i, noredirect;
struct cookie *c;
char *ln, *p, *q;
FILE *f, *cf;
size_t len;
off_t pos = 0;
noredirect = (flags && strchr(flags, 'A'));
/* allocate cookie */
if ((c = calloc(1, sizeof *c)) == NULL)
return NULL;
/* connect */
if ((f = _http_connect(URL, flags)) == NULL) {
free(c);
return NULL;
}
c->real_f = f;
e = _http_request(f, "GET", URL, flags);
if (e != (URL->offset ? HTTP_PARTIAL : HTTP_OK)
&& (e != HTTP_MOVED || noredirect)) {
_http_seterr(e);
goto fouch;
}
/* browse through header */
while (1) {
if ((ln = fgetln(f, &len)) == NULL)
goto fouch;
if ((ln[0] == '\r') || (ln[0] == '\n'))
break;
while (isspace(ln[len-1]))
--len;
ln[len] = '\0'; /* XXX */
DEBUG(fprintf(stderr, "header: [\033[1m%s\033[m]\n", ln));
if ((p = _http_match("Location", ln)) != NULL) {
struct url *url;
for (q = p; *q && !isspace(*q); q++)
/* VOID */ ;
*q = 0;
if ((url = fetchParseURL(p)) == NULL)
goto fouch;
url->offset = URL->offset;
url->length = URL->length;
DEBUG(fprintf(stderr, "location: [\033[1m%s\033[m]\n", p));
cf = fetchGetHTTP(url, flags);
fetchFreeURL(url);
fclose(f);
return cf;
} else if ((p = _http_match("Transfer-Encoding", ln)) != NULL) {
for (q = p; *q && !isspace(*q); q++)
/* VOID */ ;
*q = 0;
if (strcasecmp(p, "chunked") == 0)
enc = ENC_CHUNKED;
DEBUG(fprintf(stderr, "transfer encoding: [\033[1m%s\033[m]\n", p));
} else if ((p = _http_match("Content-Type", ln)) != NULL) {
for (i = 0; *p && i < HTTPCTYPELEN; p++, i++)
c->content_type[i] = *p;
do c->content_type[i--] = 0; while (isspace(c->content_type[i]));
DEBUG(fprintf(stderr, "content type: [\033[1m%s\033[m]\n",
c->content_type));
} else if ((p = _http_match("Content-Range", ln)) != NULL) {
if (strncasecmp(p, "bytes ", 6) != 0)
goto fouch;
p += 6;
while (*p && isdigit(*p))
pos = pos * 10 + (*p++ - '0');
/* XXX wouldn't hurt to be slightly more paranoid here */
DEBUG(fprintf(stderr, "content range: [\033[1m%lld-\033[m]\n", pos));
if (pos > URL->offset)
goto fouch;
}
}
/* only body remains */
c->encoding = enc;
cf = funopen(c,
(int (*)(void *, char *, int))_http_readfn,
(int (*)(void *, const char *, int))_http_writefn,
(fpos_t (*)(void *, fpos_t, int))NULL,
(int (*)(void *))_http_closefn);
if (cf == NULL)
goto fouch;
while (pos < URL->offset)
if (fgetc(cf) == EOF)
goto cfouch;
return cf;
fouch:
fclose(f);
free(c);
_http_seterr(999); /* XXX do this properly RSN */
return NULL;
cfouch:
fclose(cf);
_http_seterr(999); /* XXX do this properly RSN */
return NULL;
}
FILE *
fetchPutHTTP(struct url *URL, char *flags)
{
warnx("fetchPutHTTP(): not implemented");
return NULL;
}
/*
* Get an HTTP document's metadata
*/
int
fetchStatHTTP(struct url *URL, struct url_stat *us, char *flags)
{
int e, noredirect;
size_t len;
char *ln, *p, *q;
FILE *f;
noredirect = (flags && strchr(flags, 'A'));
us->size = -1;
us->atime = us->mtime = 0;
/* connect */
if ((f = _http_connect(URL, flags)) == NULL)
return -1;
e = _http_request(f, "HEAD", URL, flags);
if (e != HTTP_OK && (e != HTTP_MOVED || noredirect)) {
_http_seterr(e);
goto ouch;
}
while (1) {
if ((ln = fgetln(f, &len)) == NULL)
goto fouch;
if ((ln[0] == '\r') || (ln[0] == '\n'))
break;
while (isspace(ln[len-1]))
--len;
ln[len] = '\0'; /* XXX */
DEBUG(fprintf(stderr, "header: [\033[1m%s\033[m]\n", ln));
if ((p = _http_match("Location", ln)) != NULL) {
struct url *url;
for (q = p; *q && !isspace(*q); q++)
/* VOID */ ;
*q = 0;
if ((url = fetchParseURL(p)) == NULL)
goto ouch;
url->offset = URL->offset;
url->length = URL->length;
DEBUG(fprintf(stderr, "location: [\033[1m%s\033[m]\n", p));
e = fetchStatHTTP(url, us, flags);
fetchFreeURL(url);
fclose(f);
return e;
} else if ((p = _http_match("Last-Modified", ln)) != NULL) {
struct tm tm;
char locale[64];
strncpy(locale, setlocale(LC_TIME, NULL), sizeof locale);
setlocale(LC_TIME, "C");
strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm);
/* XXX should add support for date-2 and date-3 */
setlocale(LC_TIME, locale);
us->atime = us->mtime = timegm(&tm);
DEBUG(fprintf(stderr, "last modified: [\033[1m%04d-%02d-%02d "
"%02d:%02d:%02d\033[m]\n",
tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
tm.tm_hour, tm.tm_min, tm.tm_sec));
} else if ((p = _http_match("Content-Length", ln)) != NULL) {
us->size = 0;
while (*p && isdigit(*p))
us->size = us->size * 10 + (*p++ - '0');
DEBUG(fprintf(stderr, "content length: [\033[1m%lld\033[m]\n", us->size));
}
}
fclose(f);
return 0;
ouch:
_http_seterr(999); /* XXX do this properly RSN */
fouch:
fclose(f);
return -1;
}
/*
* List a directory
*/
struct url_ent *
fetchListHTTP(struct url *url, char *flags)
{
warnx("fetchListHTTP(): not implemented");
return NULL;
}