libc glob: Avoid pathological exponential behavior

Adapt glob's match() routine to use a greedy algorithm that avoids
exponential runtime in byzantine inputs.

While here, add a testcase for the byzantine input.

Prompted by:	https://research.swtch.com/glob
Authored by:	Yves Orton <demerphq at gmail.com>
Obtained from:	Perl (33252c318625f3c6c89b816ee88481940e3e6f95)
Sponsored by:	Dell EMC Isilon
This commit is contained in:
Conrad Meyer 2017-05-03 15:55:29 +00:00
parent 702eb303ff
commit 241eb37e29
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=317749
3 changed files with 171 additions and 44 deletions

View File

@ -903,61 +903,73 @@ globextend(const Char *path, glob_t *pglob, struct glob_limit *limit,
}
/*
* pattern matching function for filenames. Each occurrence of the *
* pattern causes a recursion level.
* pattern matching function for filenames.
*/
static int
match(Char *name, Char *pat, Char *patend)
{
int ok, negate_range;
Char c, k;
Char c, k, *nextp, *nextn;
struct xlocale_collate *table =
(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
while (pat < patend) {
c = *pat++;
switch (c & M_MASK) {
case M_ALL:
if (pat == patend)
return (1);
do
if (match(name, pat, patend))
return (1);
while (*name++ != EOS);
return (0);
case M_ONE:
if (*name++ == EOS)
return (0);
break;
case M_SET:
ok = 0;
if ((k = *name++) == EOS)
return (0);
if ((negate_range = ((*pat & M_MASK) == M_NOT)) != 0)
++pat;
while (((c = *pat++) & M_MASK) != M_END)
if ((*pat & M_MASK) == M_RNG) {
if (table->__collate_load_error ?
CHAR(c) <= CHAR(k) &&
CHAR(k) <= CHAR(pat[1]) :
__wcollate_range_cmp(CHAR(c),
CHAR(k)) <= 0 &&
__wcollate_range_cmp(CHAR(k),
CHAR(pat[1])) <= 0)
nextn = NULL;
nextp = NULL;
while (1) {
while (pat < patend) {
c = *pat++;
switch (c & M_MASK) {
case M_ALL:
if (pat == patend)
return (1);
if (*name == EOS)
return (0);
nextn = name + 1;
nextp = pat - 1;
break;
case M_ONE:
if (*name++ == EOS)
goto fail;
break;
case M_SET:
ok = 0;
if ((k = *name++) == EOS)
goto fail;
if ((negate_range = ((*pat & M_MASK) == M_NOT)) != 0)
++pat;
while (((c = *pat++) & M_MASK) != M_END)
if ((*pat & M_MASK) == M_RNG) {
if (table->__collate_load_error ?
CHAR(c) <= CHAR(k) &&
CHAR(k) <= CHAR(pat[1]) :
__wcollate_range_cmp(CHAR(c),
CHAR(k)) <= 0 &&
__wcollate_range_cmp(CHAR(k),
CHAR(pat[1])) <= 0)
ok = 1;
pat += 2;
} else if (c == k)
ok = 1;
pat += 2;
} else if (c == k)
ok = 1;
if (ok == negate_range)
return (0);
break;
default:
if (*name++ != c)
return (0);
break;
if (ok == negate_range)
goto fail;
break;
default:
if (*name++ != c)
goto fail;
break;
}
}
if (*name == EOS)
return (1);
fail:
if (nextn == NULL)
break;
pat = nextp;
name = nextn;
}
return (*name == EOS);
return (0);
}
/* Free allocated data belonging to a glob_t structure. */

View File

@ -8,6 +8,7 @@ ATF_TESTS_C+= fmtmsg_test
ATF_TESTS_C+= fnmatch2_test
ATF_TESTS_C+= fpclassify2_test
ATF_TESTS_C+= ftw_test
ATF_TESTS_C+= glob2_test
ATF_TESTS_C+= popen_test
ATF_TESTS_C+= posix_spawn_test
ATF_TESTS_C+= wordexp_test

View File

@ -0,0 +1,114 @@
/*
* Copyright (c) 2017 Dell EMC Isilon
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <errno.h>
#include <fcntl.h>
#include <glob.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <atf-c.h>
/*
* Derived from Russ Cox' pathological case test program used for the
* https://research.swtch.com/glob article.
*/
ATF_TC_WITHOUT_HEAD(glob_pathological_test);
ATF_TC_BODY(glob_pathological_test, tc)
{
struct timespec t, t2;
glob_t g;
const char *longname = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
char pattern[1000], *p;
double dt;
unsigned i, j, k, mul;
int fd, rc;
fd = open(longname, O_CREAT | O_RDWR, 0666);
ATF_REQUIRE(fd >= 0);
/*
* Test up to 100 a* groups. Exponential implementations typically go
* bang at i=7 or 8.
*/
for (i = 0; i < 100; i++) {
/*
* Create a*...b pattern with i 'a*' groups.
*/
p = pattern;
for (k = 0; k < i; k++) {
*p++ = 'a';
*p++ = '*';
}
*p++ = 'b';
*p = '\0';
clock_gettime(CLOCK_REALTIME, &t);
for (j = 0; j < mul; j++) {
memset(&g, 0, sizeof g);
rc = glob(pattern, 0, 0, &g);
if (rc == GLOB_NOSPACE || rc == GLOB_ABORTED) {
ATF_REQUIRE_MSG(rc == GLOB_NOMATCH,
"an unexpected error occurred: "
"rc=%d errno=%d", rc, errno);
/* NORETURN */
}
ATF_CHECK_MSG(rc == GLOB_NOMATCH,
"A bogus match occurred: '%s' ~ '%s'", pattern,
g.gl_pathv[0]);
globfree(&g);
}
clock_gettime(CLOCK_REALTIME, &t2);
t2.tv_sec -= t.tv_sec;
t2.tv_nsec -= t.tv_nsec;
dt = t2.tv_sec + (double)t2.tv_nsec/1e9;
dt /= mul;
ATF_CHECK_MSG(dt < 1, "glob(3) took far too long: %d %.9f", i,
dt);
if (dt >= 0.0001)
mul = 1;
}
}
ATF_TP_ADD_TCS(tp)
{
ATF_TP_ADD_TC(tp, glob_pathological_test);
return (atf_no_error());
}