Improve URL parsing. In particular, convert scheme and host to lowercase.

MFC after:	1 week
This commit is contained in:
Dag-Erling Smørgrav 2018-11-27 10:45:14 +00:00
parent e36f62bda6
commit 8d9de5b10a
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=341013
3 changed files with 56 additions and 43 deletions

View File

@ -189,9 +189,9 @@ fetch_default_port(const char *scheme)
if ((se = getservbyname(scheme, "tcp")) != NULL) if ((se = getservbyname(scheme, "tcp")) != NULL)
return (ntohs(se->s_port)); return (ntohs(se->s_port));
if (strcasecmp(scheme, SCHEME_FTP) == 0) if (strcmp(scheme, SCHEME_FTP) == 0)
return (FTP_DEFAULT_PORT); return (FTP_DEFAULT_PORT);
if (strcasecmp(scheme, SCHEME_HTTP) == 0) if (strcmp(scheme, SCHEME_HTTP) == 0)
return (HTTP_DEFAULT_PORT); return (HTTP_DEFAULT_PORT);
return (0); return (0);
} }
@ -202,9 +202,9 @@ fetch_default_port(const char *scheme)
int int
fetch_default_proxy_port(const char *scheme) fetch_default_proxy_port(const char *scheme)
{ {
if (strcasecmp(scheme, SCHEME_FTP) == 0) if (strcmp(scheme, SCHEME_FTP) == 0)
return (FTP_DEFAULT_PROXY_PORT); return (FTP_DEFAULT_PROXY_PORT);
if (strcasecmp(scheme, SCHEME_HTTP) == 0) if (strcmp(scheme, SCHEME_HTTP) == 0)
return (HTTP_DEFAULT_PROXY_PORT); return (HTTP_DEFAULT_PROXY_PORT);
return (0); return (0);
} }

View File

@ -32,8 +32,10 @@
__FBSDID("$FreeBSD$"); __FBSDID("$FreeBSD$");
#include <sys/param.h> #include <sys/param.h>
#include <sys/errno.h>
#include <netinet/in.h>
#include <errno.h>
#include <ctype.h> #include <ctype.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
@ -81,13 +83,13 @@ fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
us->size = -1; us->size = -1;
us->atime = us->mtime = 0; us->atime = us->mtime = 0;
} }
if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) if (strcmp(URL->scheme, SCHEME_FILE) == 0)
return (fetchXGetFile(URL, us, flags)); return (fetchXGetFile(URL, us, flags));
else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
return (fetchXGetFTP(URL, us, flags)); return (fetchXGetFTP(URL, us, flags));
else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
return (fetchXGetHTTP(URL, us, flags)); return (fetchXGetHTTP(URL, us, flags));
else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
return (fetchXGetHTTP(URL, us, flags)); return (fetchXGetHTTP(URL, us, flags));
url_seterr(URL_BAD_SCHEME); url_seterr(URL_BAD_SCHEME);
return (NULL); return (NULL);
@ -111,13 +113,13 @@ FILE *
fetchPut(struct url *URL, const char *flags) fetchPut(struct url *URL, const char *flags)
{ {
if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) if (strcmp(URL->scheme, SCHEME_FILE) == 0)
return (fetchPutFile(URL, flags)); return (fetchPutFile(URL, flags));
else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
return (fetchPutFTP(URL, flags)); return (fetchPutFTP(URL, flags));
else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
return (fetchPutHTTP(URL, flags)); return (fetchPutHTTP(URL, flags));
else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
return (fetchPutHTTP(URL, flags)); return (fetchPutHTTP(URL, flags));
url_seterr(URL_BAD_SCHEME); url_seterr(URL_BAD_SCHEME);
return (NULL); return (NULL);
@ -135,13 +137,13 @@ fetchStat(struct url *URL, struct url_stat *us, const char *flags)
us->size = -1; us->size = -1;
us->atime = us->mtime = 0; us->atime = us->mtime = 0;
} }
if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) if (strcmp(URL->scheme, SCHEME_FILE) == 0)
return (fetchStatFile(URL, us, flags)); return (fetchStatFile(URL, us, flags));
else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
return (fetchStatFTP(URL, us, flags)); return (fetchStatFTP(URL, us, flags));
else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
return (fetchStatHTTP(URL, us, flags)); return (fetchStatHTTP(URL, us, flags));
else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
return (fetchStatHTTP(URL, us, flags)); return (fetchStatHTTP(URL, us, flags));
url_seterr(URL_BAD_SCHEME); url_seterr(URL_BAD_SCHEME);
return (-1); return (-1);
@ -155,13 +157,13 @@ struct url_ent *
fetchList(struct url *URL, const char *flags) fetchList(struct url *URL, const char *flags)
{ {
if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) if (strcmp(URL->scheme, SCHEME_FILE) == 0)
return (fetchListFile(URL, flags)); return (fetchListFile(URL, flags));
else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
return (fetchListFTP(URL, flags)); return (fetchListFTP(URL, flags));
else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
return (fetchListHTTP(URL, flags)); return (fetchListHTTP(URL, flags));
else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
return (fetchListHTTP(URL, flags)); return (fetchListHTTP(URL, flags));
url_seterr(URL_BAD_SCHEME); url_seterr(URL_BAD_SCHEME);
return (NULL); return (NULL);
@ -345,7 +347,7 @@ fetchParseURL(const char *URL)
char *doc; char *doc;
const char *p, *q; const char *p, *q;
struct url *u; struct url *u;
int i; int i, n;
/* allocate struct url */ /* allocate struct url */
if ((u = calloc(1, sizeof(*u))) == NULL) { if ((u = calloc(1, sizeof(*u))) == NULL) {
@ -356,8 +358,10 @@ fetchParseURL(const char *URL)
/* scheme name */ /* scheme name */
if ((p = strstr(URL, ":/"))) { if ((p = strstr(URL, ":/"))) {
snprintf(u->scheme, URL_SCHEMELEN+1, if (p - URL > URL_SCHEMELEN)
"%.*s", (int)(p - URL), URL); goto ouch;
for (i = 0; URL + i < p; i++)
u->scheme[i] = tolower((unsigned char)URL[i]);
URL = ++p; URL = ++p;
/* /*
* Only one slash: no host, leave slash as part of document * Only one slash: no host, leave slash as part of document
@ -388,28 +392,37 @@ fetchParseURL(const char *URL)
} }
/* hostname */ /* hostname */
if (*p == '[' && (q = strchr(p + 1, ']')) != NULL && if (*p == '[') {
(*++q == '\0' || *q == '/' || *q == ':')) { q = p + 1 + strspn(p + 1, ":0123456789ABCDEFabcdef");
if ((i = q - p) > MAXHOSTNAMELEN) if (*q++ != ']')
i = MAXHOSTNAMELEN; goto ouch;
strncpy(u->host, p, i);
p = q;
} else { } else {
for (i = 0; *p && (*p != '/') && (*p != ':'); p++) /* valid characters in a DNS name */
if (i < MAXHOSTNAMELEN) q = p + strspn(p, "-." "0123456789"
u->host[i++] = *p; "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "_"
"abcdefghijklmnopqrstuvwxyz");
} }
if ((*q != '\0' && *q != '/' && *q != ':') || q - p > MAXHOSTNAMELEN)
goto ouch;
for (i = 0; p + i < q; i++)
u->host[i] = tolower((unsigned char)p[i]);
u->host[i] = '\0';
p = q;
/* port */ /* port */
if (*p == ':') { if (*p == ':') {
for (q = ++p; *q && (*q != '/'); q++) for (n = 0, q = ++p; *q && (*q != '/'); q++) {
if (isdigit((unsigned char)*q)) if (*q >= '0' && *q <= '9' && n < INT_MAX / 10) {
u->port = u->port * 10 + (*q - '0'); n = n * 10 + (*q - '0');
else { } else {
/* invalid port */ /* invalid port */
url_seterr(URL_BAD_PORT); url_seterr(URL_BAD_PORT);
goto ouch; goto ouch;
} }
}
if (n < 1 || n > IPPORT_MAX)
goto ouch;
u->port = n;
p = q; p = q;
} }
@ -418,8 +431,8 @@ fetchParseURL(const char *URL)
if (!*p) if (!*p)
p = "/"; p = "/";
if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 || if (strcmp(u->scheme, SCHEME_HTTP) == 0 ||
strcasecmp(u->scheme, SCHEME_HTTPS) == 0) { strcmp(u->scheme, SCHEME_HTTPS) == 0) {
const char hexnums[] = "0123456789abcdef"; const char hexnums[] = "0123456789abcdef";
/* percent-escape whitespace. */ /* percent-escape whitespace. */

View File

@ -1085,8 +1085,8 @@ ftp_get_proxy(struct url * url, const char *flags)
} }
if (!purl->port) if (!purl->port)
purl->port = fetch_default_proxy_port(purl->scheme); purl->port = fetch_default_proxy_port(purl->scheme);
if (strcasecmp(purl->scheme, SCHEME_FTP) == 0 || if (strcmp(purl->scheme, SCHEME_FTP) == 0 ||
strcasecmp(purl->scheme, SCHEME_HTTP) == 0) strcmp(purl->scheme, SCHEME_HTTP) == 0)
return (purl); return (purl);
fetchFreeURL(purl); fetchFreeURL(purl);
} }
@ -1104,8 +1104,8 @@ ftp_request(struct url *url, const char *op, struct url_stat *us,
int oflag; int oflag;
/* check if we should use HTTP instead */ /* check if we should use HTTP instead */
if (purl && (strcasecmp(purl->scheme, SCHEME_HTTP) == 0 || if (purl && (strcmp(purl->scheme, SCHEME_HTTP) == 0 ||
strcasecmp(purl->scheme, SCHEME_HTTPS) == 0)) { strcmp(purl->scheme, SCHEME_HTTPS) == 0)) {
if (strcmp(op, "STAT") == 0) if (strcmp(op, "STAT") == 0)
return (http_request(url, "HEAD", us, purl, flags)); return (http_request(url, "HEAD", us, purl, flags));
else if (strcmp(op, "RETR") == 0) else if (strcmp(op, "RETR") == 0)