Here is my long-threatened revamping of fetch. Jean-Marc probably won't
recognize it any more. This makes the following significant changes: - The main body of the program doesn't know a thing about URIs, HTTP, or FTP. This makes it possible to easily plug in other protocols. (The next revision will probably be able to dynamically add new recognizers.) - There are no longer arbitrary timeouts for the protocols. If you want to set one for yourself, use the environment variables. - FTP proxies are now supported (if I implemented it right). - The HTTP implementation is much more complete, and can now do restarts, preserve modtimes, and mrun in mirror mode. It's not yet up to 1.1, but it's getting there. - Transaction TCP is now used for sending HTTP requests. The HTTP/1.1 syntax for requesting that the connection be closed after one request is implemented. In all of this, I have doubtless broken somebody. Please test it and tell me about the bugs.
This commit is contained in:
parent
d04f83f2e2
commit
78be319939
@ -1,9 +1,9 @@
|
||||
PROG = fetch
|
||||
SRCS = main.c
|
||||
SRCS = file.c ftp.c http.c main.c util.c uri.c
|
||||
|
||||
CFLAGS+= -Wall
|
||||
CFLAGS+= -Wall -Wwrite-strings -Wmissing-prototypes
|
||||
|
||||
DPADD= ${LIBFTPIO}
|
||||
LDADD= -lftpio
|
||||
DPADD= ${LIBFTPIO} ${LIBMD}
|
||||
LDADD= -lftpio -lmd
|
||||
|
||||
.include <bsd.prog.mk>
|
||||
|
@ -11,7 +11,7 @@
|
||||
.Op Fl o Ar file
|
||||
.Ar URL
|
||||
.Nm fetch
|
||||
.Op Fl MPmnpqr
|
||||
.Op Fl MPRmnpqr
|
||||
.Op Fl o Ar file
|
||||
.Op Fl c Ar dir
|
||||
.Fl f Ar file
|
||||
@ -26,22 +26,17 @@ or the
|
||||
protocol. In the first form of the command, the
|
||||
.Ar URL
|
||||
may be of the form
|
||||
.Em http://site.domain/path/to/the/file
|
||||
.Li http://site.domain/path/to/the/file
|
||||
or
|
||||
.Em ftp://site.domain/path/to/the/file.
|
||||
For compatibility with
|
||||
.Xr tftp 1
|
||||
the form
|
||||
.Em site.domain:/path/to/the/file
|
||||
is also accepted.
|
||||
To denote a local filename to be copied or linked to (see
|
||||
.Li ftp://site.domain/path/to/the/file.
|
||||
To denote a local filename to be copied or linked to (see the
|
||||
.Fl l
|
||||
flag), the
|
||||
flag below), the
|
||||
.Em file:/path/to/the/file
|
||||
URL form is used.
|
||||
|
||||
.Pp
|
||||
The second form of the command can be used to get a file using the
|
||||
.Em ftp
|
||||
.Tn FTP
|
||||
protocol, specifying the file name and the remote host with the
|
||||
.Fl h
|
||||
and the
|
||||
@ -50,6 +45,22 @@ flags.
|
||||
.Pp
|
||||
The following options are available:
|
||||
.Bl -tag -width Fl -compact
|
||||
.It Fl c Ar dir
|
||||
The file to retrieve is in directory
|
||||
.Ar dir
|
||||
on the remote host.
|
||||
.It Fl f Ar file
|
||||
The file to retrieve is named
|
||||
.Ar file
|
||||
on the remote host.
|
||||
.It Fl h Ar host
|
||||
The file to retrieve is located on the host
|
||||
.Ar host .
|
||||
.It Fl l
|
||||
If target is a
|
||||
.Ar file:/
|
||||
style of URL, make a link to the target rather than trying
|
||||
to copy it.
|
||||
.It Fl M
|
||||
.It Fl m
|
||||
Mirror mode: Set the modification time of the file so that it is
|
||||
@ -58,26 +69,27 @@ If the file already exists on the local host and is identical (as
|
||||
gauged by size and modification time), no transfer is done.
|
||||
.It Fl n
|
||||
Don't preserve the modtime of the transfered file, use the current time.
|
||||
.It Fl o Ar file
|
||||
Set the output file name to
|
||||
.Ar file .
|
||||
By default, a ``pathname'' is extracted from the specified URI, and
|
||||
its basename is used as the name of the output file. A
|
||||
.Ar file
|
||||
argument of
|
||||
.Sq Li \&-
|
||||
indicates that results are to be directed to the standard output.
|
||||
.It Fl P
|
||||
.It Fl p
|
||||
Use passive mode if you are behind a firewall.
|
||||
.It Fl c Ar dir
|
||||
Change to directory
|
||||
.Ar dir
|
||||
at remote host before starting the transfer.
|
||||
.It Fl f Ar file
|
||||
Retrieve
|
||||
.Ar file
|
||||
on the remote host.
|
||||
.It Fl h Ar host
|
||||
Set the
|
||||
.Ar host
|
||||
for transfer.
|
||||
.It Fl l
|
||||
If target is a
|
||||
.Ar file:/
|
||||
style of URL, make a link to the target rather than trying
|
||||
to copy it.
|
||||
Use the passive mode of the
|
||||
.Tn FTP
|
||||
protocol. This is useful for crossing certain sorts of firewalls.
|
||||
.It Fl q
|
||||
Quiet mode. Do not report transfer progress on the terminal.
|
||||
.It Fl R
|
||||
The filenames specified are ``precious'', and should not be deleted
|
||||
under any circumstances, even if the transfer failed or was incomplete.
|
||||
.It Fl r
|
||||
Restart a previously interrupted transfer.
|
||||
.It Fl T Ar seconds
|
||||
Set timeout value to
|
||||
.Ar seconds.
|
||||
@ -86,47 +98,90 @@ Overrides the environment variables
|
||||
for ftp transfers or
|
||||
.Ev HTTP_TIMEOUT
|
||||
for http transfers if set.
|
||||
.It Fl q
|
||||
Quiet mode. Do not report transfer progress on the terminal.
|
||||
.It Fl v
|
||||
Verbose mode - display FTP connection information in painful detail.
|
||||
.It Fl r
|
||||
Reget. Use this flag to restart an interrupted transfer.
|
||||
.It Fl o Ar file
|
||||
Set the output file name to
|
||||
.Ar file
|
||||
Increase verbosity. More
|
||||
.Fl v Ns \&'s
|
||||
result in more information.
|
||||
.El
|
||||
.Pp
|
||||
Many options are also controlled solely by the environment (this is a
|
||||
bug).
|
||||
.Sh PROXY SERVERS
|
||||
Many sites use application gateways (``proxy servers'') in their
|
||||
firewalls in order to allow communication across the firewall using a
|
||||
trusted protocol. The
|
||||
.Nm fetch
|
||||
program can use both the
|
||||
.Tn FTP
|
||||
and the
|
||||
.Tn HTTP
|
||||
protocol with a proxy server.
|
||||
.Tn FTP
|
||||
proxy servers can only relay
|
||||
.Tn FTP
|
||||
requests;
|
||||
.Tn HTTP
|
||||
proxy servers can relay both
|
||||
.Tn FTP
|
||||
and
|
||||
.Tn HTTP
|
||||
requests.
|
||||
A proxy server can be configured by defining an environment variable
|
||||
named
|
||||
.Dq Va PROTO Ns Ev _PROXY ,
|
||||
where
|
||||
.Va PROTO
|
||||
is the name of the protocol in upper case. The value of the
|
||||
environment variable specifies a hostname, optionally followed by a
|
||||
colon and a port number.
|
||||
.Pp
|
||||
The
|
||||
.Tn FTP
|
||||
proxy client specifies
|
||||
.Dq anonymous
|
||||
as its user name, and passes the remote user name and host as the
|
||||
.Tn FTP
|
||||
session's password, in the form
|
||||
.Dq Va remoteuser Ns Li \&@ Va remotehost .
|
||||
The
|
||||
.Tn HTTP
|
||||
proxy client simply passes the originally-requested URI to the remote
|
||||
server in an
|
||||
.Tn HTTP
|
||||
.Dq Li GET
|
||||
request. HTTP proxy authentication is not yet implemented.
|
||||
When multiple proxy protcols are configured,
|
||||
.Nm
|
||||
will prefer
|
||||
.Tn HTTP .
|
||||
.Sh ENVIRONMENT
|
||||
A transfer using the
|
||||
.Em ftp
|
||||
protocol will be aborted after the delay specified by the
|
||||
.Ev FTP_TIMEOUT
|
||||
variable. The default is 300 (seconds)
|
||||
|
||||
A transfer using the
|
||||
.Em http
|
||||
protocol will be aborted after the delay specified by the
|
||||
.Ev HTTP_TIMEOUT
|
||||
variable. The default is 300 (seconds)
|
||||
|
||||
.Ev FTP_LOGIN
|
||||
is the login name for the remote host. Default is
|
||||
.Em anonymous
|
||||
|
||||
.Ev FTP_PASSWORD
|
||||
is the password for the remote host. Default is
|
||||
.Em <yourname>@
|
||||
|
||||
.Ev FTP_PASSIVE_MODE
|
||||
will force the use of passive mode FTP for firewalls.
|
||||
|
||||
If
|
||||
.Ev HTTP_PROXY
|
||||
is set to a value of the form
|
||||
.Em host:port
|
||||
it specifies the address of a http proxy. The proxy will be used
|
||||
for all ftp and http requests. This is useful if you are behind
|
||||
an application firewall.
|
||||
.Bl -tag -width FTP_PASSIVE_MODE -offset indent
|
||||
.It Ev FTP_TIMEOUT
|
||||
maximum time, in seconds, to wait before aborting an
|
||||
.Tn FTP
|
||||
connection.
|
||||
.It Ev HTTP_TIMEOUT
|
||||
maximum time, in seconds, to wait before aborting an
|
||||
.Tn HTTP
|
||||
connection.
|
||||
.It Ev FTP_LOGIN
|
||||
the login name used for
|
||||
.Tn FTP
|
||||
transfers (default
|
||||
.Dq Li anonymous )
|
||||
.It Ev FTP_PASSWORD
|
||||
the password used for
|
||||
.Tn FTP
|
||||
transfers (default
|
||||
.Dq Va yourname Ns Li \&@ Ns Va yourhost )
|
||||
.It Ev FTP_PASSIVE_MODE
|
||||
force the use of passive mode FTP
|
||||
.It Ev HTTP_PROXY
|
||||
the address of a proxy server which understands
|
||||
.Tn HTTP
|
||||
.It Ev FTP_PROXY
|
||||
the address of a proxy server which understands
|
||||
.Tn FTP
|
||||
.Sh SEE ALSO
|
||||
.Xr ftp 1 ,
|
||||
.Xr tftp 1
|
||||
@ -135,3 +190,9 @@ The
|
||||
.Nm fetch
|
||||
command appeared in
|
||||
.Fx 2.1.5 .
|
||||
.Sh AUTHORS
|
||||
The original implementation of
|
||||
.Nm
|
||||
was done by Jean-Marc Zucconi. It was extensively re-worked for
|
||||
.Fx 3.0
|
||||
by Garrett Wollman.
|
||||
|
87
usr.bin/fetch/fetch.h
Normal file
87
usr.bin/fetch/fetch.h
Normal file
@ -0,0 +1,87 @@
|
||||
/*
|
||||
* Copyright 1997 Massachusetts Institute of Technology
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software and
|
||||
* its documentation for any purpose and without fee is hereby
|
||||
* granted, provided that both the above copyright notice and this
|
||||
* permission notice appear in all copies, that both the above
|
||||
* copyright notice and this permission notice appear in all
|
||||
* supporting documentation, and that the name of M.I.T. not be used
|
||||
* in advertising or publicity pertaining to distribution of the
|
||||
* software without specific, written prior permission. M.I.T. makes
|
||||
* no representations about the suitability of this software for any
|
||||
* purpose. It is provided "as is" without express or implied
|
||||
* warranty.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
|
||||
* ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
|
||||
* SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $Id$
|
||||
*/
|
||||
|
||||
#ifndef fetch_h
|
||||
#define fetch_h 1
|
||||
|
||||
|
||||
#define BUFFER_SIZE 1024
|
||||
#define FETCH_VERSION "fetch/1.0"
|
||||
#define PATH_CP "/bin/cp"
|
||||
|
||||
struct fetch_state {
|
||||
const char *fs_status;
|
||||
const char *fs_outputfile;
|
||||
int fs_verbose; /* -q, -v option */
|
||||
int fs_newtime; /* -n option */
|
||||
int fs_mirror; /* -m option */
|
||||
int fs_restart; /* -r option */
|
||||
int fs_timeout; /* -T option */
|
||||
int fs_passive_mode; /* -p option */
|
||||
int fs_linkfile; /* -l option */
|
||||
int fs_precious; /* -R option */
|
||||
time_t fs_modtime;
|
||||
void *fs_proto;
|
||||
int (*fs_retrieve)(struct fetch_state *);
|
||||
int (*fs_close)(struct fetch_state *);
|
||||
};
|
||||
|
||||
struct uri_scheme {
|
||||
const char *sc_name; /* name of the scheme, <32 characters */
|
||||
int (*sc_parse)(struct fetch_state *, const char *);
|
||||
/* routine to parse a URI and build state */
|
||||
int (*sc_proxy_parse)(struct fetch_state *, const char *);
|
||||
/* same, but for proxy case */
|
||||
const char *sc_proxy_envar; /* envar used to determine proxy */
|
||||
const char *sc_proxy_by; /* list of protos which can proxy us */
|
||||
|
||||
/* The rest is filled in dynamically... */
|
||||
int sc_can_proxy;
|
||||
struct uri_scheme *sc_proxyproto;
|
||||
};
|
||||
|
||||
extern struct uri_scheme file_scheme, ftp_scheme, http_scheme;
|
||||
|
||||
void adjmodtime(struct fetch_state *fs);
|
||||
void catchsig(int signo);
|
||||
void display(struct fetch_state *fs, off_t total, ssize_t thisincr);
|
||||
void init_schemes(void);
|
||||
void rm(struct fetch_state *fs);
|
||||
void setup_sigalrm(void);
|
||||
void unsetup_sigalrm(void);
|
||||
char *percent_decode(const char *orig);
|
||||
char *safe_strdup(const char *orig);
|
||||
char *safe_strndup(const char *orig, size_t len);
|
||||
char *to_base64(const unsigned char *buf, size_t len);
|
||||
int from_base64(const char *orig, unsigned char *buf, size_t *lenp);
|
||||
int parse_host_port(const char *str, char **hostname, int *port);
|
||||
int parse_uri(struct fetch_state *fs, const char *uri);
|
||||
#endif /* ! fetch_h */
|
144
usr.bin/fetch/file.c
Normal file
144
usr.bin/fetch/file.c
Normal file
@ -0,0 +1,144 @@
|
||||
/*-
|
||||
* Copyright 1997 Massachusetts Institute of Technology
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software and
|
||||
* its documentation for any purpose and without fee is hereby
|
||||
* granted, provided that both the above copyright notice and this
|
||||
* permission notice appear in all copies, that both the above
|
||||
* copyright notice and this permission notice appear in all
|
||||
* supporting documentation, and that the name of M.I.T. not be used
|
||||
* in advertising or publicity pertaining to distribution of the
|
||||
* software without specific, written prior permission. M.I.T. makes
|
||||
* no representations about the suitability of this software for any
|
||||
* purpose. It is provided "as is" without express or implied
|
||||
* warranty.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
|
||||
* ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
|
||||
* SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $Id$
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <err.h>
|
||||
#include <errno.h>
|
||||
#include <signal.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sysexits.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <sys/wait.h>
|
||||
|
||||
#include "fetch.h"
|
||||
|
||||
static int file_retrieve(struct fetch_state *fs);
|
||||
static int file_close(struct fetch_state *fs);
|
||||
static int file_parse(struct fetch_state *fs, const char *uri);
|
||||
|
||||
struct uri_scheme file_scheme =
|
||||
{ "file", file_parse, 0, 0, 0 };
|
||||
|
||||
/*
|
||||
* Again, we slightly misinterpret the slash after the hostname as
|
||||
* being the start of the pathname rather than merely a separator.
|
||||
*/
|
||||
static int
|
||||
file_parse(struct fetch_state *fs, const char *uri)
|
||||
{
|
||||
const char *p;
|
||||
|
||||
p = uri + 5; /* skip past `file:' */
|
||||
if (p[0] == '/' && p[1] == '/') {
|
||||
/* skip past `//localhost', if any */
|
||||
p += 2;
|
||||
while (*p && *p != '/')
|
||||
p++;
|
||||
}
|
||||
|
||||
if (p[0] != '/') {
|
||||
warnx("`%s': expected absolute pathname in `file' URL", uri);
|
||||
return EX_USAGE;
|
||||
}
|
||||
|
||||
fs->fs_proto = percent_decode(p);
|
||||
/* guaranteed to succeed because of above test */
|
||||
p = strrchr(fs->fs_proto, '/');
|
||||
if (fs->fs_outputfile == 0) /* only set if not overridden by user */
|
||||
fs->fs_outputfile = p + 1;
|
||||
fs->fs_retrieve = file_retrieve;
|
||||
fs->fs_close = file_close;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
file_close(struct fetch_state *fs)
|
||||
{
|
||||
free(fs->fs_proto);
|
||||
fs->fs_proto = 0;
|
||||
fs->fs_outputfile = 0;
|
||||
fs->fs_status = "free";
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
file_retrieve(struct fetch_state *fs)
|
||||
{
|
||||
/* XXX - this seems bogus to me! */
|
||||
if (access(fs->fs_outputfile, F_OK) == 0) {
|
||||
errno = EEXIST;
|
||||
warn("%s", fs->fs_outputfile);
|
||||
return EX_USAGE;
|
||||
}
|
||||
|
||||
if (fs->fs_linkfile) {
|
||||
fs->fs_status = "symlink";
|
||||
if (symlink(fs->fs_proto, fs->fs_outputfile) == -1) {
|
||||
warn("symlink");
|
||||
return EX_OSERR;
|
||||
}
|
||||
fs->fs_status = "done";
|
||||
} else {
|
||||
pid_t pid;
|
||||
int status;
|
||||
|
||||
fflush(stderr);
|
||||
pid = fork();
|
||||
if (pid < 0) {
|
||||
warn("fork");
|
||||
return EX_TEMPFAIL;
|
||||
} else if (pid == 0) {
|
||||
execl(PATH_CP, "cp", "-p", fs->fs_proto,
|
||||
fs->fs_outputfile, (char *)0);
|
||||
warn("execl: " PATH_CP);
|
||||
fflush(stderr);
|
||||
_exit(EX_OSERR);
|
||||
} else {
|
||||
fs->fs_status = "copying";
|
||||
if (waitpid(pid, &status, 0) < 0) {
|
||||
warn("waitpid(%ld)", (long)pid);
|
||||
return EX_OSERR;
|
||||
}
|
||||
if (WIFEXITED(status))
|
||||
return WEXITSTATUS(status);
|
||||
if (WIFSIGNALED(status))
|
||||
warn(PATH_CP " exited on signal: %s",
|
||||
sys_signame[WTERMSIG(status)]);
|
||||
return EX_OSERR;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
420
usr.bin/fetch/ftp.c
Normal file
420
usr.bin/fetch/ftp.c
Normal file
@ -0,0 +1,420 @@
|
||||
/*-
|
||||
* Copyright 1997 Massachusetts Institute of Technology
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software and
|
||||
* its documentation for any purpose and without fee is hereby
|
||||
* granted, provided that both the above copyright notice and this
|
||||
* permission notice appear in all copies, that both the above
|
||||
* copyright notice and this permission notice appear in all
|
||||
* supporting documentation, and that the name of M.I.T. not be used
|
||||
* in advertising or publicity pertaining to distribution of the
|
||||
* software without specific, written prior permission. M.I.T. makes
|
||||
* no representations about the suitability of this software for any
|
||||
* purpose. It is provided "as is" without express or implied
|
||||
* warranty.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
|
||||
* ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
|
||||
* SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $Id$
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <err.h>
|
||||
#include <errno.h>
|
||||
#include <ftpio.h>
|
||||
#include <limits.h>
|
||||
#include <netdb.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sysexits.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "fetch.h"
|
||||
|
||||
struct ftp_state {
|
||||
char *ftp_hostname;
|
||||
char *ftp_user;
|
||||
char *ftp_password;
|
||||
char *ftp_remote_file;
|
||||
unsigned ftp_port;
|
||||
};
|
||||
|
||||
static int ftp_close(struct fetch_state *fs);
|
||||
static int ftp_retrieve(struct fetch_state *fs);
|
||||
static int ftp_parse(struct fetch_state *fs, const char *uri);
|
||||
static int ftp_proxy_parse(struct fetch_state *fs, const char *uri);
|
||||
|
||||
struct uri_scheme ftp_scheme =
|
||||
{ "ftp", ftp_parse, ftp_proxy_parse, "FTP_PROXY", "ftp,http" };
|
||||
|
||||
static int
|
||||
ftp_parse(struct fetch_state *fs, const char *uri)
|
||||
{
|
||||
const char *p, *colon, *slash, *q;
|
||||
char *hostname, *atsign;
|
||||
unsigned port;
|
||||
struct ftp_state *ftps;
|
||||
|
||||
p = uri + 4;
|
||||
port = 0;
|
||||
|
||||
if (p[0] != '/' || p[1] != '/') {
|
||||
warnx("`%s': invalid `ftp' URL", uri);
|
||||
return EX_USAGE;
|
||||
}
|
||||
|
||||
p += 2;
|
||||
colon = strchr(p, ':');
|
||||
slash = strchr(p, '/');
|
||||
if (colon && slash && colon < slash)
|
||||
q = colon;
|
||||
else
|
||||
q = slash;
|
||||
if (q == 0) {
|
||||
warnx("`%s': malformed `ftp' URL", uri);
|
||||
return EX_USAGE;
|
||||
}
|
||||
hostname = alloca(q - p + 1);
|
||||
hostname[0] = '\0';
|
||||
strncat(hostname, p, q - p);
|
||||
p = slash;
|
||||
|
||||
if (colon && colon + 1 != slash) {
|
||||
unsigned long ul;
|
||||
char *ep;
|
||||
|
||||
errno = 0;
|
||||
ul = strtoul(colon + 1, &ep, 10);
|
||||
if (ep != slash || ep == colon + 1 || errno != 0
|
||||
|| ul < 1 || ul > 65534) {
|
||||
warn("`%s': invalid port in URL", uri);
|
||||
return EX_USAGE;
|
||||
}
|
||||
|
||||
port = ul;
|
||||
} else {
|
||||
port = 21;
|
||||
}
|
||||
|
||||
p = slash + 1;
|
||||
|
||||
ftps = malloc(sizeof *ftps);
|
||||
if (ftps == 0)
|
||||
err(EX_OSERR, "malloc");
|
||||
|
||||
/*
|
||||
* Now, we have a copy of the hostname in hostname, the specified port
|
||||
* (or the default value) in port, and p points to the filename part
|
||||
* of the URI. We just need to check for a user in the hostname,
|
||||
* and then save all the bits in our state.
|
||||
*/
|
||||
atsign = strrchr(hostname, '@');
|
||||
if (atsign) {
|
||||
if (atsign[1] == '\0') {
|
||||
warnx("`%s': malformed `ftp' hostname", hostname);
|
||||
free(ftps);
|
||||
return EX_USAGE;
|
||||
}
|
||||
|
||||
*atsign = '\0';
|
||||
ftps->ftp_user = percent_decode(hostname);
|
||||
ftps->ftp_hostname = safe_strdup(atsign + 1);
|
||||
} else {
|
||||
ftps->ftp_user = 0;
|
||||
ftps->ftp_hostname = safe_strdup(hostname);
|
||||
ftps->ftp_port = port;
|
||||
}
|
||||
|
||||
p = ftps->ftp_remote_file = percent_decode(p);
|
||||
/* now p is the decoded version */
|
||||
|
||||
if (fs->fs_outputfile == 0) {
|
||||
slash = strrchr(p, '/');
|
||||
fs->fs_outputfile = slash + 1;
|
||||
}
|
||||
|
||||
ftps->ftp_password = getenv("FTP_PASSWORD");
|
||||
if (ftps->ftp_password != 0) {
|
||||
ftps->ftp_password = safe_strdup(ftps->ftp_password);
|
||||
} else {
|
||||
char *pw;
|
||||
const char *logname;
|
||||
char localhost[MAXHOSTNAMELEN];
|
||||
|
||||
logname = getlogin();
|
||||
if (logname == 0)
|
||||
logname = "root";
|
||||
gethostname(localhost, sizeof localhost);
|
||||
pw = malloc(strlen(logname) + 1 + strlen(localhost) + 1);
|
||||
if (pw == 0)
|
||||
err(EX_OSERR, "malloc");
|
||||
strcpy(pw, logname);
|
||||
strcat(pw, "@");
|
||||
strcat(pw, localhost);
|
||||
ftps->ftp_password = pw;
|
||||
setenv("FTP_PASSWORD", pw, 0); /* cache the result */
|
||||
}
|
||||
|
||||
if (ftps->ftp_user == 0) {
|
||||
const char *user = getenv("FTP_LOGIN");
|
||||
if (user != 0)
|
||||
ftps->ftp_user = safe_strdup(user);
|
||||
}
|
||||
|
||||
fs->fs_proto = ftps;
|
||||
fs->fs_close = ftp_close;
|
||||
fs->fs_retrieve = ftp_retrieve;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* The only URIs we can handle in the FTP proxy are FTP URLs.
|
||||
* This makes it possible to take a few short cuts.
|
||||
*/
|
||||
static int
|
||||
ftp_proxy_parse(struct fetch_state *fs, const char *uri)
|
||||
{
|
||||
int rv;
|
||||
char *hostname;
|
||||
char *port;
|
||||
const char *user;
|
||||
char *newpass;
|
||||
unsigned portno;
|
||||
struct ftp_state *ftps;
|
||||
|
||||
hostname = getenv("FTP_PROXY");
|
||||
port = strchr(hostname, ':');
|
||||
if (port == 0) {
|
||||
portno = 21;
|
||||
} else {
|
||||
unsigned long ul;
|
||||
char *ep;
|
||||
|
||||
/* All this to avoid modifying the environment. */
|
||||
ep = alloca(strlen(hostname) + 1);
|
||||
strcpy(ep, hostname);
|
||||
port = ep + (port - hostname);
|
||||
hostname = ep;
|
||||
|
||||
*port++ = '\0';
|
||||
errno = 0;
|
||||
ul = strtoul(port, &ep, 0);
|
||||
if (*ep || !*port || errno != 0 || ul < 1 || ul > 65534) {
|
||||
warnx("`%s': invalid port specification for FTP proxy",
|
||||
port);
|
||||
return EX_USAGE;
|
||||
}
|
||||
portno = ul;
|
||||
}
|
||||
|
||||
/* ftp_parse() does most of the work; we can just fix things up */
|
||||
rv = ftp_parse(fs, uri);
|
||||
if (rv)
|
||||
return rv;
|
||||
/* Oops.. it got turned into a file: */
|
||||
if (fs->fs_retrieve != ftp_retrieve) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
ftps = fs->fs_proto;
|
||||
if (ftps->ftp_port != 21) {
|
||||
ftp_close(fs);
|
||||
warnx("`%s': FTP proxy requires the use of the standard port",
|
||||
uri);
|
||||
return EX_USAGE;
|
||||
}
|
||||
|
||||
ftps->ftp_port = portno;
|
||||
user = ftps->ftp_user ? ftps->ftp_user : "anonymous";
|
||||
newpass = malloc(strlen(ftps->ftp_user ? ftps->ftp_user : "anonymous")
|
||||
+ 1 + strlen(ftps->ftp_hostname) + 1);
|
||||
if (newpass == 0)
|
||||
err(EX_OSERR, "malloc");
|
||||
|
||||
strcpy(newpass, user);
|
||||
strcat(newpass, "@");
|
||||
strcpy(newpass, ftps->ftp_hostname);
|
||||
free(ftps->ftp_hostname);
|
||||
ftps->ftp_hostname = safe_strdup(hostname);
|
||||
free(ftps->ftp_password);
|
||||
ftps->ftp_password = newpass;
|
||||
free(ftps->ftp_user);
|
||||
ftps->ftp_user = getenv("FTP_PROXY_USER");
|
||||
if (ftps->ftp_user)
|
||||
ftps->ftp_user = safe_strdup(ftps->ftp_user);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
ftp_close(struct fetch_state *fs)
|
||||
{
|
||||
struct ftp_state *ftps = fs->fs_proto;
|
||||
|
||||
if (ftps->ftp_user)
|
||||
free(ftps->ftp_user);
|
||||
free(ftps->ftp_hostname);
|
||||
free(ftps->ftp_password);
|
||||
free(ftps->ftp_remote_file);
|
||||
free(ftps);
|
||||
fs->fs_proto = 0;
|
||||
fs->fs_outputfile = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
ftp_retrieve(struct fetch_state *fs)
|
||||
{
|
||||
struct ftp_state *ftps = fs->fs_proto;
|
||||
FILE *ftp, *remote, *local;
|
||||
int status;
|
||||
off_t size;
|
||||
off_t seekloc, wehave;
|
||||
time_t modtime;
|
||||
size_t readresult, writeresult;
|
||||
|
||||
ftp = ftpLogin(ftps->ftp_hostname,
|
||||
(char *)(ftps->ftp_user ? ftps->ftp_user : "anonymous"),
|
||||
/* XXX ^^^^ bad API */
|
||||
ftps->ftp_password, 0, fs->fs_verbose > 1,
|
||||
&status);
|
||||
if (ftp == 0) {
|
||||
warnx("%s: %s", ftps->ftp_hostname,
|
||||
status ? ftpErrString(status) : hstrerror(h_errno));
|
||||
return EX_IOERR;
|
||||
}
|
||||
ftpBinary(ftp);
|
||||
ftpPassive(ftp, fs->fs_passive_mode);
|
||||
size = ftpGetSize(ftp, ftps->ftp_remote_file);
|
||||
modtime = ftpGetModtime(ftp, ftps->ftp_remote_file);
|
||||
if (modtime <= 0) { /* xxx */
|
||||
warnx("%s: cannot get remote modification time",
|
||||
ftps->ftp_remote_file);
|
||||
modtime = -1;
|
||||
}
|
||||
fs->fs_modtime = modtime;
|
||||
seekloc = wehave = 0;
|
||||
if (fs->fs_restart || fs->fs_mirror) {
|
||||
struct stat stab;
|
||||
|
||||
if (fs->fs_outputfile[0] == '-'
|
||||
&& fs->fs_outputfile[1] == '\0')
|
||||
status = fstat(STDOUT_FILENO, &stab);
|
||||
else
|
||||
status = stat(fs->fs_outputfile, &stab);
|
||||
if (status < 0) {
|
||||
stab.st_mtime = -1;
|
||||
stab.st_size = 0;
|
||||
}
|
||||
if (status == 0 && !S_ISREG(stab.st_mode)) {
|
||||
fs->fs_restart = 0;
|
||||
fs->fs_mirror = 0;
|
||||
}
|
||||
if (fs->fs_mirror && stab.st_size == size
|
||||
&& modtime <= stab.st_mtime) {
|
||||
fclose(ftp);
|
||||
return 0;
|
||||
}
|
||||
if (fs->fs_restart) {
|
||||
if (stab.st_size != 0 && stab.st_size < size)
|
||||
seekloc = wehave = size;
|
||||
}
|
||||
}
|
||||
|
||||
remote = ftpGet(ftp, ftps->ftp_remote_file, &seekloc);
|
||||
if (remote == 0) {
|
||||
if (ftpErrno(ftp)) {
|
||||
warnx("%s: %s", ftps->ftp_hostname,
|
||||
ftpErrString(ftpErrno(ftp)));
|
||||
fclose(ftp);
|
||||
return EX_IOERR;
|
||||
} else {
|
||||
warn("ftpGet");
|
||||
return EX_OSERR;
|
||||
}
|
||||
}
|
||||
|
||||
if (fs->fs_outputfile[0] == '-' && fs->fs_outputfile[1] == '\0')
|
||||
local = fopen("/dev/stdout", wehave ? "a" : "w");
|
||||
else
|
||||
local = fopen(fs->fs_outputfile, wehave ? "a" : "w");
|
||||
if (local == 0) {
|
||||
warn("%s", fs->fs_outputfile);
|
||||
fclose(remote);
|
||||
fclose(ftp);
|
||||
return EX_OSERR;
|
||||
}
|
||||
|
||||
if (fs->fs_timeout) {
|
||||
char buf[sizeof("18446744073709551616")]; /* 2**64 */
|
||||
snprintf(buf, sizeof buf, "%d", fs->fs_timeout);
|
||||
setenv("FTP_TIMEOUT", buf, 1);
|
||||
} else {
|
||||
char *env = getenv("FTP_TIMEOUT");
|
||||
char *ep;
|
||||
unsigned long ul;
|
||||
|
||||
if (env) {
|
||||
errno = 0;
|
||||
ul = strtoul(env, &ep, 0);
|
||||
if (*env && *ep && errno == 0 && ul <= INT_MAX)
|
||||
fs->fs_timeout = ul;
|
||||
else
|
||||
warnx("`%s': invalid FTP timeout", env);
|
||||
}
|
||||
}
|
||||
|
||||
display(fs, size, wehave);
|
||||
setup_sigalrm();
|
||||
|
||||
do {
|
||||
char buf[BUFFER_SIZE];
|
||||
|
||||
alarm(fs->fs_timeout);
|
||||
readresult = fread(buf, 1, sizeof buf, remote);
|
||||
alarm(0);
|
||||
if (readresult == 0)
|
||||
break;
|
||||
display(fs, size, readresult);
|
||||
writeresult = fwrite(buf, 1, readresult, local);
|
||||
} while (writeresult == readresult);
|
||||
unsetup_sigalrm();
|
||||
|
||||
if (ferror(remote)) {
|
||||
warn("reading remote file from %s", ftps->ftp_hostname);
|
||||
fclose(local);
|
||||
fclose(remote);
|
||||
fclose(ftp);
|
||||
rm(fs);
|
||||
return EX_IOERR;
|
||||
} else if(ferror(local)) {
|
||||
warn("%s", fs->fs_outputfile);
|
||||
fclose(local);
|
||||
fclose(remote);
|
||||
fclose(ftp);
|
||||
rm(fs);
|
||||
return EX_IOERR;
|
||||
}
|
||||
|
||||
fclose(local);
|
||||
fclose(remote);
|
||||
fclose(ftp);
|
||||
display(fs, size, -1);
|
||||
adjmodtime(fs);
|
||||
return 0;
|
||||
}
|
976
usr.bin/fetch/http.c
Normal file
976
usr.bin/fetch/http.c
Normal file
@ -0,0 +1,976 @@
|
||||
/*-
|
||||
* Copyright 1997 Massachusetts Institute of Technology
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software and
|
||||
* its documentation for any purpose and without fee is hereby
|
||||
* granted, provided that both the above copyright notice and this
|
||||
* permission notice appear in all copies, that both the above
|
||||
* copyright notice and this permission notice appear in all
|
||||
* supporting documentation, and that the name of M.I.T. not be used
|
||||
* in advertising or publicity pertaining to distribution of the
|
||||
* software without specific, written prior permission. M.I.T. makes
|
||||
* no representations about the suitability of this software for any
|
||||
* purpose. It is provided "as is" without express or implied
|
||||
* warranty.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
|
||||
* ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
|
||||
* SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $Id$
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <ctype.h>
|
||||
#include <err.h>
|
||||
#include <errno.h>
|
||||
#include <limits.h>
|
||||
#include <md5.h>
|
||||
#include <netdb.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sysexits.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <sys/param.h> /* for MAXHOSTNAMELEN */
|
||||
#include <sys/socket.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/uio.h>
|
||||
|
||||
#include <netinet/in.h>
|
||||
#include <arpa/inet.h>
|
||||
|
||||
#include "fetch.h"
|
||||
|
||||
static int http_parse(struct fetch_state *fs, const char *uri);
|
||||
static int http_proxy_parse(struct fetch_state *fs, const char *uri);
|
||||
static int http_close(struct fetch_state *fs);
|
||||
static int http_retrieve(struct fetch_state *fs);
|
||||
|
||||
struct uri_scheme http_scheme =
|
||||
{ "http", http_parse, http_proxy_parse, "HTTP_PROXY", "http" };
|
||||
|
||||
struct http_state {
|
||||
char *http_hostname;
|
||||
char *http_remote_request;
|
||||
char *http_decoded_file;
|
||||
unsigned http_port;
|
||||
};
|
||||
|
||||
/* We are only concerned with headers we might receive. */
|
||||
enum http_header {
|
||||
ht_content_length, ht_last_modified, ht_content_md5, ht_content_type,
|
||||
ht_transfer_encoding, ht_content_range, ht_warning,
|
||||
/* unusual cases */
|
||||
ht_syntax_error, ht_unknown, ht_end_of_header
|
||||
};
|
||||
|
||||
static char *format_http_date(time_t when);
|
||||
static char *format_http_user_agent(void);
|
||||
static enum http_header http_parse_header(char *line, char **valuep);
|
||||
static int check_md5(FILE *fp, char *base64ofmd5);
|
||||
static int http_first_line(const char *line);
|
||||
static int parse_http_content_range(char *orig, off_t *first, off_t *total);
|
||||
static time_t parse_http_date(char *datestring);
|
||||
|
||||
static int
|
||||
http_parse(struct fetch_state *fs, const char *uri)
|
||||
{
|
||||
const char *p, *colon, *slash, *ques, *q;
|
||||
char *hostname;
|
||||
unsigned port;
|
||||
struct http_state *https;
|
||||
|
||||
p = uri + 5;
|
||||
port = 0;
|
||||
|
||||
if (p[0] != '/' || p[1] != '/') {
|
||||
warnx("`%s': malformed `http' URL", uri);
|
||||
return EX_USAGE;
|
||||
}
|
||||
|
||||
p += 2;
|
||||
colon = strchr(p, ':');
|
||||
slash = strchr(p, '/');
|
||||
if (colon && slash && colon < slash)
|
||||
q = colon;
|
||||
else
|
||||
q = slash;
|
||||
if (q == 0) {
|
||||
warnx("`%s': malformed `http' URL", uri);
|
||||
return EX_USAGE;
|
||||
}
|
||||
hostname = alloca(q - p + 1);
|
||||
hostname[0] = '\0';
|
||||
strncat(hostname, p, q - p);
|
||||
p = slash;
|
||||
|
||||
if (colon && colon + 1 != slash) {
|
||||
unsigned long ul;
|
||||
char *ep;
|
||||
|
||||
errno = 0;
|
||||
ul = strtoul(colon + 1, &ep, 10);
|
||||
if (ep != slash || ep == colon + 1 || errno != 0
|
||||
|| ul < 1 || ul > 65534) {
|
||||
warn("`%s': invalid port in URL", uri);
|
||||
return EX_USAGE;
|
||||
}
|
||||
|
||||
port = ul;
|
||||
} else {
|
||||
port = 80;
|
||||
}
|
||||
|
||||
p = slash + 1;
|
||||
|
||||
https = malloc(sizeof *https);
|
||||
if (https == 0)
|
||||
err(EX_OSERR, "malloc");
|
||||
|
||||
/*
|
||||
* Now, we have a copy of the hostname in hostname, the specified port
|
||||
* (or the default value) in port, and p points to the filename part
|
||||
* of the URI.
|
||||
*/
|
||||
https->http_hostname = safe_strdup(hostname);
|
||||
https->http_port = port;
|
||||
|
||||
ques = strpbrk(p, "?#");
|
||||
if (ques) {
|
||||
https->http_remote_request = safe_strndup(p, ques - p);
|
||||
} else {
|
||||
https->http_remote_request = safe_strdup(p);
|
||||
}
|
||||
p = https->http_decoded_file = percent_decode(p);
|
||||
/* now p is the decoded version, so we can extract the basename */
|
||||
|
||||
if (fs->fs_outputfile == 0) {
|
||||
slash = strrchr(p, '/');
|
||||
if (slash)
|
||||
fs->fs_outputfile = slash + 1;
|
||||
else
|
||||
fs->fs_outputfile = p;
|
||||
}
|
||||
|
||||
fs->fs_proto = https;
|
||||
fs->fs_close = http_close;
|
||||
fs->fs_retrieve = http_retrieve;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* An HTTP proxy works by accepting a complete URI in a GET request,
|
||||
* retrieving that object, and then forwarding it back to us. Because
|
||||
* it can conceivably handle any URI, we have to do a bit more work
|
||||
* in the parsing of it.
|
||||
*/
|
||||
static int
|
||||
http_proxy_parse(struct fetch_state *fs, const char *uri)
|
||||
{
|
||||
struct http_state *https;
|
||||
const char *env, *slash, *ques;
|
||||
char *file;
|
||||
int rv;
|
||||
|
||||
https = malloc(sizeof *https);
|
||||
https->http_remote_request = safe_strdup(uri);
|
||||
|
||||
env = getenv("HTTP_PROXY");
|
||||
rv = parse_host_port(env, &https->http_hostname, &https->http_port);
|
||||
if (rv) {
|
||||
out:
|
||||
free(https->http_remote_request);
|
||||
free(https);
|
||||
return rv;
|
||||
}
|
||||
|
||||
if (strncmp(uri, "http://", 7) == 0) {
|
||||
slash = strchr(uri + 7, '/');
|
||||
if (slash == 0) {
|
||||
warnx("`%s': malformed `http' URL", uri);
|
||||
rv = EX_USAGE;
|
||||
free(https->http_hostname);
|
||||
goto out;
|
||||
}
|
||||
ques = strpbrk(slash, "?#");
|
||||
if (ques == 0)
|
||||
file = safe_strdup(slash);
|
||||
else
|
||||
file = safe_strndup(slash, ques - slash);
|
||||
} else {
|
||||
slash = uri;
|
||||
while (*slash && *slash != ':')
|
||||
slash++;
|
||||
if (*slash)
|
||||
slash++;
|
||||
if (slash[0] == '/' && slash[1] == '/') {
|
||||
slash += 2;
|
||||
while (*slash && *slash != '/')
|
||||
slash++;
|
||||
}
|
||||
file = safe_strdup(slash);
|
||||
}
|
||||
https->http_decoded_file = percent_decode(file);
|
||||
free(file);
|
||||
if (fs->fs_outputfile == 0) {
|
||||
slash = strrchr(https->http_decoded_file, '/');
|
||||
/* NB: we are not guaranteed to find one... */
|
||||
fs->fs_outputfile = slash ? slash + 1
|
||||
: https->http_decoded_file;
|
||||
}
|
||||
|
||||
fs->fs_proto = https;
|
||||
fs->fs_close = http_close;
|
||||
fs->fs_retrieve = http_retrieve;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
http_close(struct fetch_state *fs)
|
||||
{
|
||||
struct http_state *https = fs->fs_proto;
|
||||
|
||||
free(https->http_hostname);
|
||||
free(https->http_remote_request);
|
||||
free(https->http_decoded_file);
|
||||
free(https);
|
||||
fs->fs_outputfile = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get a file using HTTP. We will try to implement HTTP/1.1 eventually.
|
||||
* This subroutine makes heavy use of the 4.4-Lite standard I/O library,
|
||||
* in particular the `fgetln' which allows us to slurp an entire `line'
|
||||
* (an arbitrary string of non-NUL characters ending in a newline) directly
|
||||
* out of the stdio buffer. This makes interpreting the HTTP headers much
|
||||
* easier, since they are all guaranteed to end in `\r\n' and we can just
|
||||
* ignore the `\r'.
|
||||
*/
|
||||
static int
|
||||
http_retrieve(struct fetch_state *fs)
|
||||
{
|
||||
struct http_state *https;
|
||||
FILE *remote, *local;
|
||||
int s;
|
||||
struct sockaddr_in sin;
|
||||
struct msghdr msg;
|
||||
struct iovec iov[16]; /* XXX count precisely */
|
||||
int n, status;
|
||||
const char *env;
|
||||
int timo;
|
||||
char *line;
|
||||
size_t linelen, readresult, writeresult;
|
||||
off_t total_length, restart_from;
|
||||
time_t last_modified;
|
||||
char *base64ofmd5;
|
||||
static char buf[BUFFER_SIZE];
|
||||
int to_stdout;
|
||||
char rangebuf[sizeof("Range: bytes=18446744073709551616-\r\n")];
|
||||
|
||||
https = fs->fs_proto;
|
||||
to_stdout = (strcmp(fs->fs_outputfile, "-") == 0);
|
||||
|
||||
if (fs->fs_timeout) {
|
||||
timo = fs->fs_timeout;
|
||||
} else if ((env = getenv("HTTP_TIMEOUT")) != 0) {
|
||||
char *ep;
|
||||
unsigned long ul;
|
||||
|
||||
errno = 0;
|
||||
ul = strtoul(env, &ep, 0);
|
||||
if (*ep != '\0' || *env == '\0' || errno != 0
|
||||
|| ul > INT_MAX) {
|
||||
warnx("`%s': invalid timeout", env);
|
||||
return EX_USAGE;
|
||||
}
|
||||
timo = ul;
|
||||
} else {
|
||||
timo = 0;
|
||||
}
|
||||
|
||||
memset(&sin, 0, sizeof sin);
|
||||
sin.sin_family = AF_INET;
|
||||
sin.sin_len = sizeof sin;
|
||||
sin.sin_port = htons(https->http_port);
|
||||
|
||||
if (inet_aton(https->http_hostname, &sin.sin_addr) == 0) {
|
||||
struct hostent *hp;
|
||||
|
||||
/* XXX - do timeouts for name resolution? */
|
||||
hp = gethostbyname2(https->http_hostname, AF_INET);
|
||||
if (hp == 0) {
|
||||
warnx("`%s': cannot resolve: %s", https->http_hostname,
|
||||
hstrerror(h_errno));
|
||||
return EX_NOHOST;
|
||||
}
|
||||
memcpy(&sin.sin_addr, hp->h_addr_list[0], sizeof sin.sin_addr);
|
||||
}
|
||||
|
||||
msg.msg_name = (caddr_t)&sin;
|
||||
msg.msg_namelen = sizeof sin;
|
||||
msg.msg_iov = iov;
|
||||
n = 0;
|
||||
msg.msg_control = 0;
|
||||
msg.msg_controllen = 0;
|
||||
msg.msg_flags = MSG_EOF;
|
||||
|
||||
#define addstr(Iov, N, Str) \
|
||||
do { \
|
||||
Iov[N].iov_base = (void *)Str; \
|
||||
Iov[N].iov_len = strlen(Iov[n].iov_base); \
|
||||
N++; \
|
||||
} while(0)
|
||||
|
||||
retry:
|
||||
addstr(iov, n, "GET /");
|
||||
addstr(iov, n, https->http_remote_request);
|
||||
addstr(iov, n, " HTTP/1.0\r\n");
|
||||
addstr(iov, n, format_http_user_agent());
|
||||
/* do content negotiation here */
|
||||
addstr(iov, n, "Accept: */*\r\n");
|
||||
if (fs->fs_mirror) {
|
||||
struct stat stab;
|
||||
|
||||
errno = 0;
|
||||
if (((!to_stdout && stat(fs->fs_outputfile, &stab) == 0)
|
||||
|| (to_stdout && fstat(STDOUT_FILENO, &stab) == 0))
|
||||
&& S_ISREG(stab.st_mode)) {
|
||||
addstr(iov, n, "If-Modified-Since: ");
|
||||
addstr(iov, n, format_http_date(stab.st_mtime));
|
||||
addstr(iov, n, "\r\n");
|
||||
} else if (errno != 0) {
|
||||
warn("%s: cannot mirror; will retrieve anew",
|
||||
fs->fs_outputfile);
|
||||
}
|
||||
}
|
||||
if (fs->fs_restart) {
|
||||
struct stat stab;
|
||||
|
||||
errno = 0;
|
||||
if (((!to_stdout && stat(fs->fs_outputfile, &stab) == 0)
|
||||
|| (to_stdout && fstat(STDOUT_FILENO, &stab) == 0))
|
||||
&& S_ISREG(stab.st_mode)) {
|
||||
addstr(iov, n, "If-Range: ");
|
||||
addstr(iov, n, format_http_date(stab.st_mtime));
|
||||
addstr(iov, n, "\r\n");
|
||||
sprintf(rangebuf, "Range: bytes=%qd-\r\n",
|
||||
(quad_t)stab.st_size);
|
||||
addstr(iov, n, rangebuf);
|
||||
} else if (errno != 0) {
|
||||
warn("%s: cannot restart; will retrieve anew",
|
||||
fs->fs_outputfile);
|
||||
}
|
||||
}
|
||||
addstr(iov, n, "Connection: close\r\n");
|
||||
addstr(iov, n, "\r\n");
|
||||
msg.msg_iovlen = n;
|
||||
|
||||
s = socket(PF_INET, SOCK_STREAM, 0);
|
||||
if (s < 0) {
|
||||
warn("socket");
|
||||
return EX_OSERR;
|
||||
}
|
||||
|
||||
remote = fdopen(s, "r");
|
||||
if (remote == 0) {
|
||||
warn("fdopen");
|
||||
close(s);
|
||||
return EX_OSERR;
|
||||
}
|
||||
|
||||
setup_sigalrm();
|
||||
alarm(timo);
|
||||
if (sendmsg(s, &msg, MSG_EOF) < 0) {
|
||||
warn("%s", https->http_hostname);
|
||||
fclose(remote);
|
||||
return EX_OSERR;
|
||||
}
|
||||
|
||||
alarm(timo);
|
||||
line = fgetln(remote, &linelen);
|
||||
alarm(0);
|
||||
if (line == 0) {
|
||||
if (ferror(remote)) {
|
||||
warn("reading reply from %s", https->http_hostname);
|
||||
fclose(remote);
|
||||
unsetup_sigalrm();
|
||||
return EX_OSERR;
|
||||
} else {
|
||||
warnx("empty reply from %s", https->http_hostname);
|
||||
fclose(remote);
|
||||
unsetup_sigalrm();
|
||||
return EX_PROTOCOL;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* If the other end is HTTP 0.9, then we just suck their
|
||||
* response over; can't do anything fancy. We assume that
|
||||
* the file is a text file, so it is safe to use fgetln()
|
||||
* to suck the entire file. (It had better be, since
|
||||
* we used it to grab the first line.)
|
||||
*/
|
||||
if (linelen < 5 || strncasecmp(line, "http/", 5) != 0) {
|
||||
if (to_stdout)
|
||||
local = fopen("/dev/stdout", "w");
|
||||
else
|
||||
local = fopen(fs->fs_outputfile, "w");
|
||||
if (local == 0) {
|
||||
warn("%s: fopen", fs->fs_outputfile);
|
||||
fclose(remote);
|
||||
unsetup_sigalrm();
|
||||
return EX_OSERR;
|
||||
}
|
||||
display(fs, -1, 0);
|
||||
|
||||
do {
|
||||
writeresult = fwrite(line, 1, linelen, local);
|
||||
display(fs, -1, writeresult);
|
||||
if (writeresult != linelen)
|
||||
break;
|
||||
alarm(timo);
|
||||
line = fgetln(remote, &linelen);
|
||||
alarm(0);
|
||||
} while(line != 0);
|
||||
unsetup_sigalrm();
|
||||
|
||||
if (ferror(local)) {
|
||||
warn("%s", fs->fs_outputfile);
|
||||
fclose(local);
|
||||
fclose(remote);
|
||||
rm(fs);
|
||||
return EX_OSERR;
|
||||
} else if(ferror(remote)) {
|
||||
warn("%s", https->http_hostname);
|
||||
fclose(local);
|
||||
fclose(remote);
|
||||
rm(fs);
|
||||
return EX_OSERR;
|
||||
}
|
||||
fclose(local);
|
||||
fclose(remote);
|
||||
display(fs, -1, -1);
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* OK. The other end is doing HTTP 1.0 at the very least.
|
||||
* This means that some of the fancy stuff is at least possible.
|
||||
*/
|
||||
line[linelen - 1] = '\0'; /* turn line into a string */
|
||||
status = http_first_line(line);
|
||||
|
||||
/* In the future, we might handle redirection and other responses. */
|
||||
switch(status) {
|
||||
case 200: /* Here come results */
|
||||
case 206: /* Here come partial results */
|
||||
break;
|
||||
|
||||
case 304: /* Object is unmodified */
|
||||
if (fs->fs_mirror) {
|
||||
fclose(remote);
|
||||
unsetup_sigalrm();
|
||||
return 0;
|
||||
}
|
||||
/* otherwise, fall through */
|
||||
default:
|
||||
warnx("%s: %s: HTTP server returned error code %d",
|
||||
fs->fs_outputfile, https->http_hostname, status);
|
||||
if (fs->fs_verbose > 1) {
|
||||
fputs(line, stderr);
|
||||
fputc('\n', stderr);
|
||||
while ((line = fgetln(remote, &linelen)) != 0)
|
||||
fwrite(line, 1, linelen, stderr);
|
||||
}
|
||||
fclose(remote);
|
||||
unsetup_sigalrm();
|
||||
return EX_UNAVAILABLE;
|
||||
}
|
||||
|
||||
total_length = -1; /* -1 means ``don't know'' */
|
||||
last_modified = -1;
|
||||
base64ofmd5 = 0;
|
||||
restart_from = 0;
|
||||
|
||||
while((line = fgetln(remote, &linelen)) != 0) {
|
||||
char *value, *ep;
|
||||
enum http_header header;
|
||||
unsigned long ul;
|
||||
|
||||
line[linelen - 1] = '\0';
|
||||
header = http_parse_header(line, &value);
|
||||
|
||||
if (header == ht_end_of_header)
|
||||
break;
|
||||
|
||||
switch(header) {
|
||||
case ht_content_length:
|
||||
errno = 0;
|
||||
ul = strtoul(value, &ep, 10);
|
||||
if (errno != 0 || *ep != '\r')
|
||||
warnx("invalid Content-Length: `%s'", value);
|
||||
if (!fs->fs_restart)
|
||||
total_length = ul;
|
||||
break;
|
||||
|
||||
case ht_last_modified:
|
||||
last_modified = parse_http_date(value);
|
||||
if (last_modified == -1 && fs->fs_verbose > 0)
|
||||
warnx("invalid Last-Modified: `%s'", value);
|
||||
break;
|
||||
|
||||
case ht_content_md5:
|
||||
base64ofmd5 = safe_strdup(value);
|
||||
break;
|
||||
|
||||
case ht_content_range:
|
||||
/* NB: we might have to restart from farther back
|
||||
than we asked. */
|
||||
status = parse_http_content_range(value, &restart_from,
|
||||
&total_length);
|
||||
/* If we couldn't understand the reply, get the whole
|
||||
thing. */
|
||||
if (status) {
|
||||
fs->fs_restart = 0;
|
||||
/*doretry:*/
|
||||
fclose(remote);
|
||||
if (base64ofmd5)
|
||||
free(base64ofmd5);
|
||||
restart_from = 0;
|
||||
n = 0;
|
||||
goto retry;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* OK, if we got here, then we have finished parsing the header
|
||||
* and have read the `\r\n' line which denotes the end of same.
|
||||
* We may or may not have a good idea of the length of the file
|
||||
* or its modtime. At this point we will have to deal with
|
||||
* any special byte-range, content-negotiation, redirection,
|
||||
* or authentication, and probably jump back up to the top,
|
||||
* once we implement those features. So, all we have left to
|
||||
* do is open up the output file and copy data from input to
|
||||
* output until EOF.
|
||||
*/
|
||||
if (to_stdout)
|
||||
local = fopen("/dev/stdout", "w");
|
||||
else
|
||||
local = fopen(fs->fs_outputfile, "w");
|
||||
if (local == 0) {
|
||||
warn("%s: fopen", fs->fs_outputfile);
|
||||
fclose(remote);
|
||||
unsetup_sigalrm();
|
||||
return EX_OSERR;
|
||||
}
|
||||
|
||||
fs->fs_modtime = last_modified;
|
||||
fseek(local, restart_from, SEEK_SET); /* XXX truncation off_t->long */
|
||||
display(fs, total_length, restart_from); /* XXX truncation */
|
||||
|
||||
do {
|
||||
alarm(timo);
|
||||
readresult = fread(buf, 1, sizeof buf, remote);
|
||||
alarm(0);
|
||||
|
||||
if (readresult == 0)
|
||||
break;
|
||||
display(fs, total_length, readresult);
|
||||
|
||||
writeresult = fwrite(buf, 1, sizeof buf, local);
|
||||
} while (writeresult == readresult);
|
||||
|
||||
status = errno; /* save errno for warn(), below, if needed */
|
||||
display(fs, total_length, -1); /* do here in case we have to warn */
|
||||
errno = status;
|
||||
|
||||
if (ferror(remote)) {
|
||||
warn("reading remote file from %s", https->http_hostname);
|
||||
status = EX_OSERR;
|
||||
} else if(ferror(local)) {
|
||||
warn("`%s': fwrite", fs->fs_outputfile);
|
||||
status = EX_OSERR;
|
||||
} else {
|
||||
status = 0;
|
||||
}
|
||||
if (base64ofmd5) {
|
||||
/*
|
||||
* Ack. When restarting, the MD5 only covers the parts
|
||||
* we are getting, not the whole thing.
|
||||
*/
|
||||
fseek(local, restart_from, SEEK_SET);
|
||||
status = check_md5(local, base64ofmd5);
|
||||
free(base64ofmd5);
|
||||
}
|
||||
|
||||
unsetup_sigalrm();
|
||||
fclose(local);
|
||||
fclose(remote);
|
||||
|
||||
if (status != 0)
|
||||
rm(fs);
|
||||
else
|
||||
adjmodtime(fs);
|
||||
|
||||
return status;
|
||||
#undef addstr
|
||||
}
|
||||
|
||||
/*
|
||||
* The format of the response line for an HTTP request is:
|
||||
* HTTP/V.vv{WS}999{WS}Explanatory text for humans to read\r\n
|
||||
* Where {WS} represents whitespace (spaces and/or tabs) and 999
|
||||
* is a machine-interprable result code. We return the integer value
|
||||
* of that result code, or the impossible value `0' if we are unable to
|
||||
* parse the result.
|
||||
*/
|
||||
static int
|
||||
http_first_line(const char *line)
|
||||
{
|
||||
char *ep;
|
||||
unsigned long ul;
|
||||
|
||||
if (strncasecmp(line, "http/", 5) != 0)
|
||||
return 0;
|
||||
|
||||
line += 5;
|
||||
while (*line && isdigit(*line)) /* skip major version number */
|
||||
line++;
|
||||
if (*line++ != '.') /* skip period */
|
||||
return 0;
|
||||
while (*line && isdigit(*line)) /* skip minor version number */
|
||||
line++;
|
||||
while (*line && isspace(*line)) /* skip first whitespace */
|
||||
line++;
|
||||
|
||||
errno = 0;
|
||||
ul = strtoul(line, &ep, 10);
|
||||
if (errno != 0 || ul > 999 || ul < 100 || !isspace(*ep))
|
||||
return 0;
|
||||
return ul;
|
||||
}
|
||||
|
||||
/*
|
||||
* The format of a header line for an HTTP request is:
|
||||
* Header-Name: header-value (with comments in parens)\r\n
|
||||
* This would be a nice application for gperf(1), except that the
|
||||
* names are case-insensitive and gperf can't handle that.
|
||||
*/
|
||||
static enum http_header
|
||||
http_parse_header(char *line, char **valuep)
|
||||
{
|
||||
char *colon, *value;
|
||||
|
||||
if (*line == '\0' /* protocol error! */
|
||||
|| (line[0] == '\r' && line[1] == '\0'))
|
||||
return ht_end_of_header;
|
||||
|
||||
colon = strchr(line, ':');
|
||||
if (colon == 0)
|
||||
return ht_syntax_error;
|
||||
*colon = '\0';
|
||||
|
||||
for (value = colon + 1; *value && isspace(*value); value++)
|
||||
; /* do nothing */
|
||||
|
||||
/* XXX - strip comments? */
|
||||
*valuep = value;
|
||||
|
||||
#define cmp(name, num) do { if (!strcasecmp(line, name)) return num; } while(0)
|
||||
cmp("Content-Length", ht_content_length);
|
||||
cmp("Last-Modified", ht_last_modified);
|
||||
cmp("Content-MD5", ht_content_md5);
|
||||
cmp("Content-Range", ht_content_range);
|
||||
cmp("Content-Type", ht_content_type);
|
||||
cmp("Transfer-Encoding", ht_transfer_encoding);
|
||||
cmp("Warning", ht_warning);
|
||||
#undef cmp
|
||||
return ht_unknown;
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute the RSA Data Security, Inc., MD5 Message Digest of the file
|
||||
* given in `fp', see if it matches the one given in base64 encoding by
|
||||
* `base64ofmd5'. Warn and return an error if it doesn't.
|
||||
*/
|
||||
static int
|
||||
check_md5(FILE *fp, char *base64ofmd5) {
|
||||
MD5_CTX ctx;
|
||||
unsigned char digest[16];
|
||||
char buf[512];
|
||||
size_t len;
|
||||
char *ourval;
|
||||
|
||||
MD5Init(&ctx);
|
||||
while ((len = fread(buf, 1, sizeof buf, fp)) != 0) {
|
||||
MD5Update(&ctx, buf, len);
|
||||
}
|
||||
MD5Final(digest, &ctx);
|
||||
ourval = to_base64(digest, 16);
|
||||
if (strcmp(ourval, base64ofmd5) != 0) {
|
||||
warnx("MD5 digest mismatch: %s, should be %s", ourval,
|
||||
base64ofmd5);
|
||||
free(ourval);
|
||||
return EX_DATAERR;
|
||||
}
|
||||
free(ourval);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const char *wkdays[] = {
|
||||
"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
|
||||
};
|
||||
static const char *months[] = {
|
||||
"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct",
|
||||
"Nov", "Dec"
|
||||
};
|
||||
|
||||
/*
|
||||
* Interpret one of the three possible formats for an HTTP date.
|
||||
* All of them are really bogus; HTTP should use either ISO 8601
|
||||
* or NTP timestamps. We make some attempt to accept a subset of 8601
|
||||
* format. The three standard formats are all fixed-length subsets of their
|
||||
* respective standards (except 8601, which puts all of the stuff we
|
||||
* care about up front).
|
||||
*/
|
||||
static time_t
|
||||
parse_http_date(char *string)
|
||||
{
|
||||
static struct tm tm; /* get good initialization */
|
||||
time_t rv;
|
||||
const char *tz;
|
||||
int i;
|
||||
|
||||
/* 8601 has the shortest minimum length */
|
||||
if (strlen(string) < 15)
|
||||
return -1;
|
||||
|
||||
if (isdigit(*string)) {
|
||||
/* ISO 8601: 19970127T134551stuffwedon'tcareabout */
|
||||
for (i = 0; i < 15; i++) {
|
||||
if (i != 8 && !isdigit(string[i]))
|
||||
break;
|
||||
}
|
||||
if (i < 15)
|
||||
return -1;
|
||||
#define digit(x) (string[x] - '0')
|
||||
tm.tm_year = (digit(0) * 1000
|
||||
+ digit(1) * 100
|
||||
+ digit(2) * 10
|
||||
+ digit(3)) - 1900;
|
||||
tm.tm_mon = digit(4) * 10 + digit(5) - 1;
|
||||
tm.tm_mday = digit(6) * 10 + digit(7);
|
||||
if (string[8] != 'T' && string[8] != 't' && string[8] != ' ')
|
||||
return -1;
|
||||
tm.tm_hour = digit(9) * 10 + digit(10);
|
||||
tm.tm_min = digit(11) * 10 + digit(12);
|
||||
tm.tm_sec = digit(13) * 10 + digit(14);
|
||||
/* We don't care about the rest of the stuff after the secs. */
|
||||
} else if (string[3] == ',') {
|
||||
/* Mon, 27 Jan 1997 14:24:35 stuffwedon'tcareabout */
|
||||
if (strlen(string) < 25)
|
||||
return -1;
|
||||
string += 5; /* skip over day-of-week */
|
||||
if (!(isdigit(string[0]) && isdigit(string[1])))
|
||||
return -1;
|
||||
tm.tm_mday = digit(0) * 10 + digit(1);
|
||||
for (i = 0; i < 12; i++) {
|
||||
if (strncasecmp(months[i], &string[3], 3) == 0)
|
||||
break;
|
||||
}
|
||||
if (i >= 12)
|
||||
return -1;
|
||||
tm.tm_mon = i;
|
||||
|
||||
if (sscanf(&string[7], "%d %d:%d:%d", &i, &tm.tm_hour,
|
||||
&tm.tm_min, &tm.tm_sec) != 4)
|
||||
return -1;
|
||||
tm.tm_year = i - 1900;
|
||||
|
||||
} else if (string[3] == ' ') {
|
||||
/* Mon Jan 27 14:25:20 1997 */
|
||||
if (strlen(string) < 25)
|
||||
return -1;
|
||||
string += 4;
|
||||
for (i = 0; i < 12; i++) {
|
||||
if (strncasecmp(string, months[i], 3) == 0)
|
||||
break;
|
||||
}
|
||||
if (i >= 12)
|
||||
return -1;
|
||||
tm.tm_mon = i;
|
||||
if (sscanf(&string[4], "%d %d:%d:%d %u", &tm.tm_mday,
|
||||
&tm.tm_hour, &tm.tm_min, &tm.tm_sec, &i)
|
||||
!= 5)
|
||||
return -1;
|
||||
tm.tm_year = i - 1900;
|
||||
} else {
|
||||
/* Monday, 27-Jan-97 14:31:09 stuffwedon'tcareabout */
|
||||
char *comma = strchr(string, ',');
|
||||
char mname[4];
|
||||
|
||||
if (comma == 0)
|
||||
return -1;
|
||||
string = comma + 1;
|
||||
if (strlen(string) < 19)
|
||||
return -1;
|
||||
string++;
|
||||
mname[4] = '\0';
|
||||
if (sscanf(string, "%d-%c%c%c-%d %d:%d:%d", &tm.tm_mday,
|
||||
mname, mname + 1, mname + 2, &tm.tm_year,
|
||||
&tm.tm_hour, &tm.tm_min, &tm.tm_sec) != 8)
|
||||
return -1;
|
||||
for (i = 0; i < 12; i++) {
|
||||
if (strcasecmp(months[i], mname))
|
||||
break;
|
||||
}
|
||||
if (i >= 12)
|
||||
return -1;
|
||||
tm.tm_mon = i;
|
||||
}
|
||||
#undef digit
|
||||
|
||||
if (tm.tm_sec > 60 || tm.tm_min > 59 || tm.tm_hour > 23
|
||||
|| tm.tm_mday > 31 || tm.tm_mon > 11)
|
||||
return -1;
|
||||
if (tm.tm_sec < 0 || tm.tm_min < 0 || tm.tm_hour < 0
|
||||
|| tm.tm_mday < 0 || tm.tm_mon < 0 || tm.tm_year < 0)
|
||||
return -1;
|
||||
|
||||
tz = getenv("TZ");
|
||||
setenv("TZ", "UTC0", 1);
|
||||
tzset();
|
||||
rv = mktime(&tm);
|
||||
if (tz)
|
||||
setenv("TZ", tz, 1);
|
||||
else
|
||||
unsetenv("TZ");
|
||||
return rv;
|
||||
}
|
||||
|
||||
static char *
|
||||
format_http_date(time_t when)
|
||||
{
|
||||
struct tm *tm;
|
||||
static char buf[30];
|
||||
|
||||
tm = gmtime(&when);
|
||||
if (tm == 0)
|
||||
return 0;
|
||||
#ifndef HTTP_DATE_ISO_8601
|
||||
sprintf(buf, "%s, %02d %s %04d %02d:%02d:%02d GMT",
|
||||
wkdays[tm->tm_wday], tm->tm_mday, months[tm->tm_mon],
|
||||
tm->tm_year + 1900, tm->tm_hour, tm->tm_min, tm->tm_sec);
|
||||
#else /* ISO 8601 */
|
||||
sprintf(buf, "%04d%02d%02dT%02d%02d%02d+0000",
|
||||
tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
|
||||
tm->tm_hour, tm->tm_min, tm->tm_sec);
|
||||
#endif
|
||||
return buf;
|
||||
}
|
||||
|
||||
static char *
|
||||
format_http_user_agent(void)
|
||||
{
|
||||
static char buf[128];
|
||||
static int inited;
|
||||
|
||||
if (!inited) {
|
||||
int mib[2];
|
||||
char ostype[128], osrelease[128], machine[128];
|
||||
size_t len;
|
||||
|
||||
mib[0] = CTL_KERN;
|
||||
mib[1] = KERN_OSTYPE;
|
||||
len = sizeof ostype;
|
||||
if (sysctl(mib, 2, ostype, &len, 0, 0) < 0) {
|
||||
warn("sysctl");
|
||||
ostype[0] = '\0';
|
||||
}
|
||||
mib[1] = KERN_OSRELEASE;
|
||||
len = sizeof osrelease;
|
||||
if (sysctl(mib, 2, osrelease, &len, 0, 0) < 0) {
|
||||
warn("sysctl");
|
||||
osrelease[0] = '\0';
|
||||
}
|
||||
mib[0] = CTL_HW;
|
||||
mib[1] = HW_MACHINE;
|
||||
len = sizeof machine;
|
||||
if (sysctl(mib, 2, machine, &len, 0, 0) < 0) {
|
||||
warn("sysctl");
|
||||
machine[0] = '\0';
|
||||
}
|
||||
|
||||
snprintf(buf, sizeof buf,
|
||||
"User-Agent: " FETCH_VERSION " %s/%s (%s)\r\n",
|
||||
ostype, osrelease, machine);
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
/*
|
||||
* Parse a Content-Range return header from the server. RFC 2066 defines
|
||||
* this header to have the format:
|
||||
* Content-Range: bytes 12345-67890/123456
|
||||
* Since we always ask for the whole rest of the file, we consider it an
|
||||
* error if the reply doesn't claim to give it to us.
|
||||
*/
|
||||
static int
|
||||
parse_http_content_range(char *orig, off_t *restart_from, off_t *total_length)
|
||||
{
|
||||
u_quad_t first, last, total;
|
||||
char *ep;
|
||||
|
||||
if (strcasecmp(orig, "bytes") != 0) {
|
||||
warnx("unknown Content-Range unit: `%s'", orig);
|
||||
return EX_PROTOCOL;
|
||||
}
|
||||
|
||||
orig += 5;
|
||||
while (*orig && isspace(*orig))
|
||||
orig++;
|
||||
|
||||
errno = 0;
|
||||
first = strtouq(orig, &ep, 10);
|
||||
if (errno != 0 || *ep != '-') {
|
||||
warnx("invalid Content-Range: `%s'", orig);
|
||||
return EX_PROTOCOL;
|
||||
}
|
||||
last = strtouq(ep + 1, &ep, 10);
|
||||
if (errno != 0 || *ep != '/' || last < first) {
|
||||
warnx("invalid Content-Range: `%s'", orig);
|
||||
return EX_PROTOCOL;
|
||||
}
|
||||
total = strtouq(ep + 1, &ep, 10);
|
||||
if (errno != 0 || !(*ep == '\0' || isspace(*ep))) {
|
||||
warnx("invalid Content-Range: `%s'", orig);
|
||||
return EX_PROTOCOL;
|
||||
}
|
||||
|
||||
if (last + 1 != total) {
|
||||
warnx("HTTP server did not return requested Content-Range");
|
||||
return EX_PROTOCOL;
|
||||
}
|
||||
|
||||
*restart_from = first;
|
||||
*total_length = last;
|
||||
return 0;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
122
usr.bin/fetch/uri.c
Normal file
122
usr.bin/fetch/uri.c
Normal file
@ -0,0 +1,122 @@
|
||||
/*-
|
||||
* Copyright 1997 Massachusetts Institute of Technology
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software and
|
||||
* its documentation for any purpose and without fee is hereby
|
||||
* granted, provided that both the above copyright notice and this
|
||||
* permission notice appear in all copies, that both the above
|
||||
* copyright notice and this permission notice appear in all
|
||||
* supporting documentation, and that the name of M.I.T. not be used
|
||||
* in advertising or publicity pertaining to distribution of the
|
||||
* software without specific, written prior permission. M.I.T. makes
|
||||
* no representations about the suitability of this software for any
|
||||
* purpose. It is provided "as is" without express or implied
|
||||
* warranty.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
|
||||
* ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
|
||||
* SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $Id$
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <err.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sysexits.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "fetch.h"
|
||||
|
||||
struct uri_scheme *schemes[] = {
|
||||
&http_scheme, &ftp_scheme, &file_scheme, 0
|
||||
};
|
||||
|
||||
static struct uri_scheme *
|
||||
find_scheme(const char *name)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; schemes[i]; i++) {
|
||||
if (strcasecmp(schemes[i]->sc_name, name) == 0)
|
||||
return schemes[i];
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
init_schemes(void)
|
||||
{
|
||||
int i;
|
||||
char schemebuf[32];
|
||||
const char *s, *t;
|
||||
struct uri_scheme *scp;
|
||||
|
||||
for (i = 0; schemes[i]; i++) {
|
||||
if (getenv(schemes[i]->sc_proxy_envar) != 0)
|
||||
schemes[i]->sc_can_proxy = 1;
|
||||
}
|
||||
|
||||
for (i = 0; schemes[i]; i++) {
|
||||
s = schemes[i]->sc_proxy_by;
|
||||
while (s && *s) {
|
||||
t = strchr(s, ',');
|
||||
if (t) {
|
||||
schemebuf[0] = '\0';
|
||||
strncat(schemebuf, s, t - s);
|
||||
s = t + 1;
|
||||
} else {
|
||||
strcpy(schemebuf, s);
|
||||
s = 0;
|
||||
}
|
||||
scp = find_scheme(schemebuf);
|
||||
if (scp && scp->sc_can_proxy) {
|
||||
schemes[i]->sc_proxyproto = scp;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
parse_uri(struct fetch_state *fs, const char *uri)
|
||||
{
|
||||
const char *colon, *slash;
|
||||
char *scheme;
|
||||
struct uri_scheme *scp;
|
||||
|
||||
fs->fs_status = "parsing URI";
|
||||
colon = strchr(uri, ':');
|
||||
slash = strchr(uri, '/');
|
||||
if (!colon || !slash || slash < colon) {
|
||||
warnx("%s: an absolute URI is required", uri);
|
||||
return EX_USAGE;
|
||||
}
|
||||
|
||||
scheme = alloca(colon - uri + 1);
|
||||
scheme[0] = '\0';
|
||||
strncat(scheme, uri, colon - uri);
|
||||
scp = find_scheme(scheme);
|
||||
|
||||
if (scp == 0) {
|
||||
warnx("%s: unknown URI scheme", scheme);
|
||||
return EX_USAGE;
|
||||
}
|
||||
if (scp->sc_proxyproto)
|
||||
return scp->sc_proxyproto->sc_proxy_parse(fs, uri);
|
||||
else
|
||||
return scp->sc_parse(fs, uri);
|
||||
}
|
||||
|
322
usr.bin/fetch/util.c
Normal file
322
usr.bin/fetch/util.c
Normal file
@ -0,0 +1,322 @@
|
||||
/*-
|
||||
* Copyright 1997 Massachusetts Institute of Technology
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software and
|
||||
* its documentation for any purpose and without fee is hereby
|
||||
* granted, provided that both the above copyright notice and this
|
||||
* permission notice appear in all copies, that both the above
|
||||
* copyright notice and this permission notice appear in all
|
||||
* supporting documentation, and that the name of M.I.T. not be used
|
||||
* in advertising or publicity pertaining to distribution of the
|
||||
* software without specific, written prior permission. M.I.T. makes
|
||||
* no representations about the suitability of this software for any
|
||||
* purpose. It is provided "as is" without express or implied
|
||||
* warranty.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
|
||||
* ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
|
||||
* SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $Id$
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <ctype.h>
|
||||
#include <err.h>
|
||||
#include <errno.h>
|
||||
#include <signal.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sysexits.h>
|
||||
#include <time.h> /* for time() */
|
||||
#include <unistd.h>
|
||||
|
||||
#include <sys/time.h> /* for struct timeval */
|
||||
|
||||
#include "fetch.h"
|
||||
|
||||
|
||||
/* Signal handling functions */
|
||||
|
||||
/*
|
||||
* If this were Scheme we could make this variable private to just these two
|
||||
* functions...
|
||||
*/
|
||||
static struct sigaction oldalrm;
|
||||
|
||||
void
|
||||
setup_sigalrm(void)
|
||||
{
|
||||
struct sigaction catch;
|
||||
|
||||
sigemptyset(&catch.sa_mask);
|
||||
sigaddset(&catch.sa_mask, SIGHUP);
|
||||
sigaddset(&catch.sa_mask, SIGINT);
|
||||
sigaddset(&catch.sa_mask, SIGQUIT);
|
||||
sigaddset(&catch.sa_mask, SIGTERM);
|
||||
sigaddset(&catch.sa_mask, SIGALRM);
|
||||
catch.sa_handler = catchsig;
|
||||
catch.sa_flags = 0;
|
||||
|
||||
sigaction(SIGALRM, &catch, &oldalrm);
|
||||
}
|
||||
|
||||
void
|
||||
unsetup_sigalrm(void)
|
||||
{
|
||||
sigaction(SIGALRM, &oldalrm, 0);
|
||||
}
|
||||
|
||||
|
||||
/* File-handling functions */
|
||||
|
||||
/*
|
||||
* Set the last-modified time of the output file to be that returned by
|
||||
* the server.
|
||||
*/
|
||||
void
|
||||
adjmodtime(struct fetch_state *fs)
|
||||
{
|
||||
struct timeval tv[2];
|
||||
|
||||
/* XXX - not strictly correct, since (time_t)-1 does not have to be
|
||||
> 0. This also catches some of the other routines which erroneously
|
||||
return 0 for invalid times rather than -1. */
|
||||
if (!fs->fs_newtime && fs->fs_modtime > 0) {
|
||||
tv[0].tv_usec = tv[1].tv_usec = 0;
|
||||
time(&tv[0].tv_sec);
|
||||
tv[1].tv_sec = fs->fs_modtime;
|
||||
utimes(fs->fs_outputfile, tv);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Delete the file when exiting on error, if it is not `precious'.
|
||||
*/
|
||||
void
|
||||
rm(struct fetch_state *fs)
|
||||
{
|
||||
if (!(fs->fs_outputfile[0] == '-' && fs->fs_outputfile[1] == '\0')) {
|
||||
if (!fs->fs_restart && !fs->fs_mirror && !fs->fs_precious)
|
||||
unlink(fs->fs_outputfile);
|
||||
else
|
||||
adjmodtime(fs);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* String-handling and -parsing functions */
|
||||
|
||||
/*
|
||||
* Undo the standard %-sign encoding in URIs (e.g., `%2f' -> `/'). This
|
||||
* must be done after the URI is parsed, since the principal purpose of
|
||||
* the encoding is to hide characters which would otherwise be significant
|
||||
* to the parser (like `/').
|
||||
*/
|
||||
char *
|
||||
percent_decode(const char *uri)
|
||||
{
|
||||
char *rv, *s;
|
||||
|
||||
rv = s = malloc(strlen(uri) + 1);
|
||||
if (rv == 0)
|
||||
err(EX_OSERR, "malloc");
|
||||
|
||||
while (*uri) {
|
||||
if (*uri == '%' && uri[1]
|
||||
&& isxdigit(uri[1]) && isxdigit(uri[2])) {
|
||||
int c;
|
||||
static char buf[] = "xx";
|
||||
|
||||
buf[0] = uri[1];
|
||||
buf[1] = uri[2];
|
||||
sscanf(buf, "%x", &c);
|
||||
uri += 3;
|
||||
*s++ = c;
|
||||
} else {
|
||||
*s++ = *uri++;
|
||||
}
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decode a standard host:port string into its constituents, allocating
|
||||
* memory for a new copy of the host part.
|
||||
*/
|
||||
int
|
||||
parse_host_port(const char *s, char **hostname, int *port)
|
||||
{
|
||||
const char *colon;
|
||||
char *ep;
|
||||
unsigned long ul;
|
||||
|
||||
colon = strchr(s, ':');
|
||||
if (colon != 0) {
|
||||
colon++;
|
||||
errno = 0;
|
||||
ul = strtoul(colon + 1, &ep, 10);
|
||||
if (*ep != '\0' || colon[1] == '\0' || errno != 0
|
||||
|| ul < 1 || ul > 65534) {
|
||||
warnx("`%s': invalid port number", s);
|
||||
return EX_USAGE;
|
||||
}
|
||||
|
||||
*hostname = safe_strndup(s, colon - s);
|
||||
*port = ul;
|
||||
} else {
|
||||
*hostname = safe_strdup(s);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* safe_strdup is like strdup, but aborts on error.
|
||||
*/
|
||||
char *
|
||||
safe_strdup(const char *orig)
|
||||
{
|
||||
char *s;
|
||||
|
||||
s = malloc(strlen(orig) + 1);
|
||||
if (s == 0)
|
||||
err(EX_OSERR, "malloc");
|
||||
strcpy(s, orig);
|
||||
return s;
|
||||
}
|
||||
|
||||
/*
|
||||
* safe_strndup is like safe_strdup, but copies at most `len'
|
||||
* characters from `orig'.
|
||||
*/
|
||||
char *
|
||||
safe_strndup(const char *orig, size_t len)
|
||||
{
|
||||
char *s;
|
||||
|
||||
s = malloc(len + 1);
|
||||
if (s == 0)
|
||||
err(EX_OSERR, "malloc");
|
||||
s[0] = '\0';
|
||||
strncat(s, orig, len);
|
||||
return s;
|
||||
}
|
||||
|
||||
/*
|
||||
* Implement the `base64' encoding as described in RFC 1521.
|
||||
*/
|
||||
static const char base64[] =
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
||||
|
||||
char *
|
||||
to_base64(const unsigned char *buf, size_t len)
|
||||
{
|
||||
char *s = malloc((4 * (len + 1)) / 3 + 1), *rv;
|
||||
unsigned tmp;
|
||||
|
||||
if (s == 0)
|
||||
err(EX_OSERR, "malloc");
|
||||
|
||||
rv = s;
|
||||
while (len >= 3) {
|
||||
tmp = buf[0] << 16 | buf[1] << 8 || buf[2];
|
||||
s[0] = base64[tmp >> 18];
|
||||
s[1] = base64[(tmp >> 12) & 077];
|
||||
s[2] = base64[(tmp >> 6) & 077];
|
||||
s[3] = base64[tmp & 077];
|
||||
len -= 3;
|
||||
buf += 3;
|
||||
s += 4;
|
||||
}
|
||||
|
||||
/* RFC 1521 enumerates these three possibilities... */
|
||||
switch(len) {
|
||||
case 2:
|
||||
tmp = buf[0] << 16 | buf[1] << 8;
|
||||
s[0] = base64[(tmp >> 18) & 077];
|
||||
s[1] = base64[(tmp >> 12) & 077];
|
||||
s[2] = base64[(tmp >> 6) & 077];
|
||||
s[3] = '=';
|
||||
break;
|
||||
case 1:
|
||||
tmp = buf[0] << 16;
|
||||
s[0] = base64[(tmp >> 18) & 077];
|
||||
s[1] = base64[(tmp >> 12) & 077];
|
||||
s[2] = s[3] = '=';
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
int
|
||||
from_base64(const char *orig, unsigned char *buf, size_t *lenp)
|
||||
{
|
||||
int len, len2;
|
||||
const char *equals;
|
||||
unsigned tmp;
|
||||
|
||||
len = strlen(orig);
|
||||
while (isspace(orig[len - 1]))
|
||||
len--;
|
||||
|
||||
if (len % 4)
|
||||
return -1;
|
||||
|
||||
len2 = 3 * (len / 4);
|
||||
equals = strchr(orig, '=');
|
||||
if (equals != 0) {
|
||||
if (equals[1] == '=')
|
||||
len2 -= 2;
|
||||
else
|
||||
len2 -= 1;
|
||||
}
|
||||
|
||||
/* Now the length is len2 is the actual length of the original. */
|
||||
if (len2 > *lenp)
|
||||
return -1;
|
||||
*lenp = len2;
|
||||
|
||||
while (len > 0) {
|
||||
int i;
|
||||
const char *off;
|
||||
int forget;
|
||||
|
||||
tmp = 0;
|
||||
forget = 0;
|
||||
for (i = 0; i < 4; i++) {
|
||||
if (orig[i] == '=') {
|
||||
off = base64;
|
||||
forget++;
|
||||
} else {
|
||||
off = strchr(base64, orig[i]);
|
||||
}
|
||||
if (off == 0)
|
||||
return -1;
|
||||
tmp = (tmp << 6) | (off - base64);
|
||||
}
|
||||
|
||||
buf[0] = (tmp >> 16) & 0xff;
|
||||
if (forget < 2)
|
||||
buf[1] = (tmp >> 8) & 0xff;
|
||||
if (forget < 1)
|
||||
buf[2] = (tmp >> 8) & 0xff;
|
||||
len -= 4;
|
||||
orig += 4;
|
||||
buf += 3 - forget;
|
||||
}
|
||||
return 0;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user