freebsd-dev/contrib/apr-util/misc/apr_date.c
Peter Wemm 937a200089 Introduce svnlite so that we can check out our source code again.
This is actually a fully functional build except:
* All internal shared libraries are static linked to make sure there
  is no interference with ports (and to reduce build time).
* It does not have the python/perl/etc plugin or API support.
* By default, it installs as "svnlite" rather than "svn".
* If WITH_SVN added in make.conf, you get "svn".
* If WITHOUT_SVNLITE is in make.conf, this is completely disabled.

To be absolutely clear, this is not intended for any use other than
checking out freebsd source and committing, like we once did with cvs.

It should be usable for small scale local repositories that don't
need the python/perl plugin architecture.
2013-06-18 02:53:45 +00:00

638 lines
20 KiB
C

/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* apr_date.c: date parsing utility routines
* These routines are (hopefully) platform independent.
*
* 27 Oct 1996 Roy Fielding
* Extracted (with many modifications) from mod_proxy.c and
* tested with over 50,000 randomly chosen valid date strings
* and several hundred variations of invalid date strings.
*
*/
#include "apr.h"
#include "apr_lib.h"
#define APR_WANT_STRFUNC
#include "apr_want.h"
#if APR_HAVE_STDLIB_H
#include <stdlib.h>
#endif
#if APR_HAVE_CTYPE_H
#include <ctype.h>
#endif
#include "apr_date.h"
/*
* Compare a string to a mask
* Mask characters (arbitrary maximum is 256 characters, just in case):
* @ - uppercase letter
* $ - lowercase letter
* & - hex digit
* # - digit
* ~ - digit or space
* * - swallow remaining characters
* <x> - exact match for any other character
*/
APU_DECLARE(int) apr_date_checkmask(const char *data, const char *mask)
{
int i;
char d;
for (i = 0; i < 256; i++) {
d = data[i];
switch (mask[i]) {
case '\0':
return (d == '\0');
case '*':
return 1;
case '@':
if (!apr_isupper(d))
return 0;
break;
case '$':
if (!apr_islower(d))
return 0;
break;
case '#':
if (!apr_isdigit(d))
return 0;
break;
case '&':
if (!apr_isxdigit(d))
return 0;
break;
case '~':
if ((d != ' ') && !apr_isdigit(d))
return 0;
break;
default:
if (mask[i] != d)
return 0;
break;
}
}
return 0; /* We only get here if mask is corrupted (exceeds 256) */
}
/*
* Parses an HTTP date in one of three standard forms:
*
* Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
* Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
* Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
*
* and returns the apr_time_t number of microseconds since 1 Jan 1970 GMT,
* or APR_DATE_BAD if this would be out of range or if the date is invalid.
*
* The restricted HTTP syntax is
*
* HTTP-date = rfc1123-date | rfc850-date | asctime-date
*
* rfc1123-date = wkday "," SP date1 SP time SP "GMT"
* rfc850-date = weekday "," SP date2 SP time SP "GMT"
* asctime-date = wkday SP date3 SP time SP 4DIGIT
*
* date1 = 2DIGIT SP month SP 4DIGIT
* ; day month year (e.g., 02 Jun 1982)
* date2 = 2DIGIT "-" month "-" 2DIGIT
* ; day-month-year (e.g., 02-Jun-82)
* date3 = month SP ( 2DIGIT | ( SP 1DIGIT ))
* ; month day (e.g., Jun 2)
*
* time = 2DIGIT ":" 2DIGIT ":" 2DIGIT
* ; 00:00:00 - 23:59:59
*
* wkday = "Mon" | "Tue" | "Wed"
* | "Thu" | "Fri" | "Sat" | "Sun"
*
* weekday = "Monday" | "Tuesday" | "Wednesday"
* | "Thursday" | "Friday" | "Saturday" | "Sunday"
*
* month = "Jan" | "Feb" | "Mar" | "Apr"
* | "May" | "Jun" | "Jul" | "Aug"
* | "Sep" | "Oct" | "Nov" | "Dec"
*
* However, for the sake of robustness (and Netscapeness), we ignore the
* weekday and anything after the time field (including the timezone).
*
* This routine is intended to be very fast; 10x faster than using sscanf.
*
* Originally from Andrew Daviel <andrew@vancouver-webpages.com>, 29 Jul 96
* but many changes since then.
*
*/
APU_DECLARE(apr_time_t) apr_date_parse_http(const char *date)
{
apr_time_exp_t ds;
apr_time_t result;
int mint, mon;
const char *monstr, *timstr;
static const int months[12] =
{
('J' << 16) | ('a' << 8) | 'n', ('F' << 16) | ('e' << 8) | 'b',
('M' << 16) | ('a' << 8) | 'r', ('A' << 16) | ('p' << 8) | 'r',
('M' << 16) | ('a' << 8) | 'y', ('J' << 16) | ('u' << 8) | 'n',
('J' << 16) | ('u' << 8) | 'l', ('A' << 16) | ('u' << 8) | 'g',
('S' << 16) | ('e' << 8) | 'p', ('O' << 16) | ('c' << 8) | 't',
('N' << 16) | ('o' << 8) | 'v', ('D' << 16) | ('e' << 8) | 'c'};
if (!date)
return APR_DATE_BAD;
while (*date && apr_isspace(*date)) /* Find first non-whitespace char */
++date;
if (*date == '\0')
return APR_DATE_BAD;
if ((date = strchr(date, ' ')) == NULL) /* Find space after weekday */
return APR_DATE_BAD;
++date; /* Now pointing to first char after space, which should be */
/* start of the actual date information for all 4 formats. */
if (apr_date_checkmask(date, "## @$$ #### ##:##:## *")) {
/* RFC 1123 format with two days */
ds.tm_year = ((date[7] - '0') * 10 + (date[8] - '0') - 19) * 100;
if (ds.tm_year < 0)
return APR_DATE_BAD;
ds.tm_year += ((date[9] - '0') * 10) + (date[10] - '0');
ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0');
monstr = date + 3;
timstr = date + 12;
}
else if (apr_date_checkmask(date, "##-@$$-## ##:##:## *")) {
/* RFC 850 format */
ds.tm_year = ((date[7] - '0') * 10) + (date[8] - '0');
if (ds.tm_year < 70)
ds.tm_year += 100;
ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0');
monstr = date + 3;
timstr = date + 10;
}
else if (apr_date_checkmask(date, "@$$ ~# ##:##:## ####*")) {
/* asctime format */
ds.tm_year = ((date[16] - '0') * 10 + (date[17] - '0') - 19) * 100;
if (ds.tm_year < 0)
return APR_DATE_BAD;
ds.tm_year += ((date[18] - '0') * 10) + (date[19] - '0');
if (date[4] == ' ')
ds.tm_mday = 0;
else
ds.tm_mday = (date[4] - '0') * 10;
ds.tm_mday += (date[5] - '0');
monstr = date;
timstr = date + 7;
}
else if (apr_date_checkmask(date, "# @$$ #### ##:##:## *")) {
/* RFC 1123 format with one day */
ds.tm_year = ((date[6] - '0') * 10 + (date[7] - '0') - 19) * 100;
if (ds.tm_year < 0)
return APR_DATE_BAD;
ds.tm_year += ((date[8] - '0') * 10) + (date[9] - '0');
ds.tm_mday = (date[0] - '0');
monstr = date + 2;
timstr = date + 11;
}
else
return APR_DATE_BAD;
if (ds.tm_mday <= 0 || ds.tm_mday > 31)
return APR_DATE_BAD;
ds.tm_hour = ((timstr[0] - '0') * 10) + (timstr[1] - '0');
ds.tm_min = ((timstr[3] - '0') * 10) + (timstr[4] - '0');
ds.tm_sec = ((timstr[6] - '0') * 10) + (timstr[7] - '0');
if ((ds.tm_hour > 23) || (ds.tm_min > 59) || (ds.tm_sec > 61))
return APR_DATE_BAD;
mint = (monstr[0] << 16) | (monstr[1] << 8) | monstr[2];
for (mon = 0; mon < 12; mon++)
if (mint == months[mon])
break;
if (mon == 12)
return APR_DATE_BAD;
if ((ds.tm_mday == 31) && (mon == 3 || mon == 5 || mon == 8 || mon == 10))
return APR_DATE_BAD;
/* February gets special check for leapyear */
if ((mon == 1) &&
((ds.tm_mday > 29) ||
((ds.tm_mday == 29)
&& ((ds.tm_year & 3)
|| (((ds.tm_year % 100) == 0)
&& (((ds.tm_year % 400) != 100)))))))
return APR_DATE_BAD;
ds.tm_mon = mon;
/* ap_mplode_time uses tm_usec and tm_gmtoff fields, but they haven't
* been set yet.
* It should be safe to just zero out these values.
* tm_usec is the number of microseconds into the second. HTTP only
* cares about second granularity.
* tm_gmtoff is the number of seconds off of GMT the time is. By
* definition all times going through this function are in GMT, so this
* is zero.
*/
ds.tm_usec = 0;
ds.tm_gmtoff = 0;
if (apr_time_exp_get(&result, &ds) != APR_SUCCESS)
return APR_DATE_BAD;
return result;
}
/*
* Parses a string resembling an RFC 822 date. This is meant to be
* leinent in its parsing of dates. Hence, this will parse a wider
* range of dates than apr_date_parse_http.
*
* The prominent mailer (or poster, if mailer is unknown) that has
* been seen in the wild is included for the unknown formats.
*
* Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
* Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
* Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
* Sun, 6 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
* Sun, 06 Nov 94 08:49:37 GMT ; RFC 822
* Sun, 6 Nov 94 08:49:37 GMT ; RFC 822
* Sun, 06 Nov 94 08:49 GMT ; Unknown [drtr@ast.cam.ac.uk]
* Sun, 6 Nov 94 08:49 GMT ; Unknown [drtr@ast.cam.ac.uk]
* Sun, 06 Nov 94 8:49:37 GMT ; Unknown [Elm 70.85]
* Sun, 6 Nov 94 8:49:37 GMT ; Unknown [Elm 70.85]
* Mon, 7 Jan 2002 07:21:22 GMT ; Unknown [Postfix]
* Sun, 06-Nov-1994 08:49:37 GMT ; RFC 850 with four digit years
*
*/
#define TIMEPARSE(ds,hr10,hr1,min10,min1,sec10,sec1) \
{ \
ds.tm_hour = ((hr10 - '0') * 10) + (hr1 - '0'); \
ds.tm_min = ((min10 - '0') * 10) + (min1 - '0'); \
ds.tm_sec = ((sec10 - '0') * 10) + (sec1 - '0'); \
}
#define TIMEPARSE_STD(ds,timstr) \
{ \
TIMEPARSE(ds, timstr[0],timstr[1], \
timstr[3],timstr[4], \
timstr[6],timstr[7]); \
}
APU_DECLARE(apr_time_t) apr_date_parse_rfc(const char *date)
{
apr_time_exp_t ds;
apr_time_t result;
int mint, mon;
const char *monstr, *timstr, *gmtstr;
static const int months[12] =
{
('J' << 16) | ('a' << 8) | 'n', ('F' << 16) | ('e' << 8) | 'b',
('M' << 16) | ('a' << 8) | 'r', ('A' << 16) | ('p' << 8) | 'r',
('M' << 16) | ('a' << 8) | 'y', ('J' << 16) | ('u' << 8) | 'n',
('J' << 16) | ('u' << 8) | 'l', ('A' << 16) | ('u' << 8) | 'g',
('S' << 16) | ('e' << 8) | 'p', ('O' << 16) | ('c' << 8) | 't',
('N' << 16) | ('o' << 8) | 'v', ('D' << 16) | ('e' << 8) | 'c' };
if (!date)
return APR_DATE_BAD;
/* Not all dates have text days at the beginning. */
if (!apr_isdigit(date[0]))
{
while (*date && apr_isspace(*date)) /* Find first non-whitespace char */
++date;
if (*date == '\0')
return APR_DATE_BAD;
if ((date = strchr(date, ' ')) == NULL) /* Find space after weekday */
return APR_DATE_BAD;
++date; /* Now pointing to first char after space, which should be */ }
/* start of the actual date information for all 11 formats. */
if (apr_date_checkmask(date, "## @$$ #### ##:##:## *")) { /* RFC 1123 format */
ds.tm_year = ((date[7] - '0') * 10 + (date[8] - '0') - 19) * 100;
if (ds.tm_year < 0)
return APR_DATE_BAD;
ds.tm_year += ((date[9] - '0') * 10) + (date[10] - '0');
ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0');
monstr = date + 3;
timstr = date + 12;
gmtstr = date + 21;
TIMEPARSE_STD(ds, timstr);
}
else if (apr_date_checkmask(date, "##-@$$-## ##:##:## *")) {/* RFC 850 format */
ds.tm_year = ((date[7] - '0') * 10) + (date[8] - '0');
if (ds.tm_year < 70)
ds.tm_year += 100;
ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0');
monstr = date + 3;
timstr = date + 10;
gmtstr = date + 19;
TIMEPARSE_STD(ds, timstr);
}
else if (apr_date_checkmask(date, "@$$ ~# ##:##:## ####*")) {
/* asctime format */
ds.tm_year = ((date[16] - '0') * 10 + (date[17] - '0') - 19) * 100;
if (ds.tm_year < 0)
return APR_DATE_BAD;
ds.tm_year += ((date[18] - '0') * 10) + (date[19] - '0');
if (date[4] == ' ')
ds.tm_mday = 0;
else
ds.tm_mday = (date[4] - '0') * 10;
ds.tm_mday += (date[5] - '0');
monstr = date;
timstr = date + 7;
gmtstr = NULL;
TIMEPARSE_STD(ds, timstr);
}
else if (apr_date_checkmask(date, "# @$$ #### ##:##:## *")) {
/* RFC 1123 format*/
ds.tm_year = ((date[6] - '0') * 10 + (date[7] - '0') - 19) * 100;
if (ds.tm_year < 0)
return APR_DATE_BAD;
ds.tm_year += ((date[8] - '0') * 10) + (date[9] - '0');
ds.tm_mday = (date[0] - '0');
monstr = date + 2;
timstr = date + 11;
gmtstr = date + 20;
TIMEPARSE_STD(ds, timstr);
}
else if (apr_date_checkmask(date, "## @$$ ## ##:##:## *")) {
/* This is the old RFC 1123 date format - many many years ago, people
* used two-digit years. Oh, how foolish.
*
* Two-digit day, two-digit year version. */
ds.tm_year = ((date[7] - '0') * 10) + (date[8] - '0');
if (ds.tm_year < 70)
ds.tm_year += 100;
ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0');
monstr = date + 3;
timstr = date + 10;
gmtstr = date + 19;
TIMEPARSE_STD(ds, timstr);
}
else if (apr_date_checkmask(date, " # @$$ ## ##:##:## *")) {
/* This is the old RFC 1123 date format - many many years ago, people
* used two-digit years. Oh, how foolish.
*
* Space + one-digit day, two-digit year version.*/
ds.tm_year = ((date[7] - '0') * 10) + (date[8] - '0');
if (ds.tm_year < 70)
ds.tm_year += 100;
ds.tm_mday = (date[1] - '0');
monstr = date + 3;
timstr = date + 10;
gmtstr = date + 19;
TIMEPARSE_STD(ds, timstr);
}
else if (apr_date_checkmask(date, "# @$$ ## ##:##:## *")) {
/* This is the old RFC 1123 date format - many many years ago, people
* used two-digit years. Oh, how foolish.
*
* One-digit day, two-digit year version. */
ds.tm_year = ((date[6] - '0') * 10) + (date[7] - '0');
if (ds.tm_year < 70)
ds.tm_year += 100;
ds.tm_mday = (date[0] - '0');
monstr = date + 2;
timstr = date + 9;
gmtstr = date + 18;
TIMEPARSE_STD(ds, timstr);
}
else if (apr_date_checkmask(date, "## @$$ ## ##:## *")) {
/* Loser format. This is quite bogus. */
ds.tm_year = ((date[7] - '0') * 10) + (date[8] - '0');
if (ds.tm_year < 70)
ds.tm_year += 100;
ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0');
monstr = date + 3;
timstr = date + 10;
gmtstr = NULL;
TIMEPARSE(ds, timstr[0],timstr[1], timstr[3],timstr[4], '0','0');
}
else if (apr_date_checkmask(date, "# @$$ ## ##:## *")) {
/* Loser format. This is quite bogus. */
ds.tm_year = ((date[6] - '0') * 10) + (date[7] - '0');
if (ds.tm_year < 70)
ds.tm_year += 100;
ds.tm_mday = (date[0] - '0');
monstr = date + 2;
timstr = date + 9;
gmtstr = NULL;
TIMEPARSE(ds, timstr[0],timstr[1], timstr[3],timstr[4], '0','0');
}
else if (apr_date_checkmask(date, "## @$$ ## #:##:## *")) {
/* Loser format. This is quite bogus. */
ds.tm_year = ((date[7] - '0') * 10) + (date[8] - '0');
if (ds.tm_year < 70)
ds.tm_year += 100;
ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0');
monstr = date + 3;
timstr = date + 9;
gmtstr = date + 18;
TIMEPARSE(ds, '0',timstr[1], timstr[3],timstr[4], timstr[6],timstr[7]);
}
else if (apr_date_checkmask(date, "# @$$ ## #:##:## *")) {
/* Loser format. This is quite bogus. */
ds.tm_year = ((date[6] - '0') * 10) + (date[7] - '0');
if (ds.tm_year < 70)
ds.tm_year += 100;
ds.tm_mday = (date[0] - '0');
monstr = date + 2;
timstr = date + 8;
gmtstr = date + 17;
TIMEPARSE(ds, '0',timstr[1], timstr[3],timstr[4], timstr[6],timstr[7]);
}
else if (apr_date_checkmask(date, " # @$$ #### ##:##:## *")) {
/* RFC 1123 format with a space instead of a leading zero. */
ds.tm_year = ((date[7] - '0') * 10 + (date[8] - '0') - 19) * 100;
if (ds.tm_year < 0)
return APR_DATE_BAD;
ds.tm_year += ((date[9] - '0') * 10) + (date[10] - '0');
ds.tm_mday = (date[1] - '0');
monstr = date + 3;
timstr = date + 12;
gmtstr = date + 21;
TIMEPARSE_STD(ds, timstr);
}
else if (apr_date_checkmask(date, "##-@$$-#### ##:##:## *")) {
/* RFC 1123 with dashes instead of spaces between date/month/year
* This also looks like RFC 850 with four digit years.
*/
ds.tm_year = ((date[7] - '0') * 10 + (date[8] - '0') - 19) * 100;
if (ds.tm_year < 0)
return APR_DATE_BAD;
ds.tm_year += ((date[9] - '0') * 10) + (date[10] - '0');
ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0');
monstr = date + 3;
timstr = date + 12;
gmtstr = date + 21;
TIMEPARSE_STD(ds, timstr);
}
else
return APR_DATE_BAD;
if (ds.tm_mday <= 0 || ds.tm_mday > 31)
return APR_DATE_BAD;
if ((ds.tm_hour > 23) || (ds.tm_min > 59) || (ds.tm_sec > 61))
return APR_DATE_BAD;
mint = (monstr[0] << 16) | (monstr[1] << 8) | monstr[2];
for (mon = 0; mon < 12; mon++)
if (mint == months[mon])
break;
if (mon == 12)
return APR_DATE_BAD;
if ((ds.tm_mday == 31) && (mon == 3 || mon == 5 || mon == 8 || mon == 10))
return APR_DATE_BAD;
/* February gets special check for leapyear */
if ((mon == 1) &&
((ds.tm_mday > 29)
|| ((ds.tm_mday == 29)
&& ((ds.tm_year & 3)
|| (((ds.tm_year % 100) == 0)
&& (((ds.tm_year % 400) != 100)))))))
return APR_DATE_BAD;
ds.tm_mon = mon;
/* tm_gmtoff is the number of seconds off of GMT the time is.
*
* We only currently support: [+-]ZZZZ where Z is the offset in
* hours from GMT.
*
* If there is any confusion, tm_gmtoff will remain 0.
*/
ds.tm_gmtoff = 0;
/* Do we have a timezone ? */
if (gmtstr) {
int offset;
switch (*gmtstr) {
case '-':
offset = atoi(gmtstr+1);
ds.tm_gmtoff -= (offset / 100) * 60 * 60;
ds.tm_gmtoff -= (offset % 100) * 60;
break;
case '+':
offset = atoi(gmtstr+1);
ds.tm_gmtoff += (offset / 100) * 60 * 60;
ds.tm_gmtoff += (offset % 100) * 60;
break;
}
}
/* apr_time_exp_get uses tm_usec field, but it hasn't been set yet.
* It should be safe to just zero out this value.
* tm_usec is the number of microseconds into the second. HTTP only
* cares about second granularity.
*/
ds.tm_usec = 0;
if (apr_time_exp_gmt_get(&result, &ds) != APR_SUCCESS)
return APR_DATE_BAD;
return result;
}