Re-apply fixed r354847

unifdef(1): Improve worst-case bound on symbol resolution

Use RB_TREE to make some algorithms O(lg N) and O(N lg N) instead of O(N)
and O(N^2).

While here, remove arbitrarily limit on number of macros understood.

Reverts r354877 and r354878, which disabled the (correct) test.

PR:		242095
Reported by:	lwhsu
This commit is contained in:
Conrad Meyer 2019-11-20 19:43:34 +00:00
parent 0877992e6b
commit 61287be181
2 changed files with 85 additions and 58 deletions

View File

@ -35,9 +35,6 @@ basic_head() {
} }
basic_body() { basic_body() {
if [ "$(atf_config_get ci false)" = "true" ]; then
atf_skip "https://bugs.freebsd.org/242095"
fi
atf_check -s ignore -o file:$(atf_get_srcdir)/d_basic.out \ atf_check -s ignore -o file:$(atf_get_srcdir)/d_basic.out \
-x "unifdef -U__FreeBSD__ $(atf_get_srcdir)/d_basic.in" -x "unifdef -U__FreeBSD__ $(atf_get_srcdir)/d_basic.in"

View File

@ -45,8 +45,11 @@
* it possible to handle all "dodgy" directives correctly. * it possible to handle all "dodgy" directives correctly.
*/ */
#include <sys/param.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/tree.h>
#include <assert.h>
#include <ctype.h> #include <ctype.h>
#include <err.h> #include <err.h>
#include <stdarg.h> #include <stdarg.h>
@ -149,7 +152,6 @@ static char const * const linestate_name[] = {
*/ */
#define MAXDEPTH 64 /* maximum #if nesting */ #define MAXDEPTH 64 /* maximum #if nesting */
#define MAXLINE 4096 /* maximum length of line */ #define MAXLINE 4096 /* maximum length of line */
#define MAXSYMS 16384 /* maximum number of symbols */
/* /*
* Sometimes when editing a keyword the replacement text is longer, so * Sometimes when editing a keyword the replacement text is longer, so
@ -157,6 +159,26 @@ static char const * const linestate_name[] = {
*/ */
#define EDITSLOP 10 #define EDITSLOP 10
/*
* C17/18 allow 63 characters per macro name, but up to 127 arbitrarily large
* parameters.
*/
struct macro {
RB_ENTRY(macro) entry;
const char *name;
const char *value;
bool ignore; /* -iDsym or -iUsym */
};
static int
macro_cmp(struct macro *a, struct macro *b)
{
return (strcmp(a->name, b->name));
}
static RB_HEAD(MACROMAP, macro) macro_tree = RB_INITIALIZER(&macro_tree);
RB_GENERATE_STATIC(MACROMAP, macro, entry, macro_cmp);
/* /*
* Globals. * Globals.
*/ */
@ -174,11 +196,6 @@ static bool symlist; /* -s: output symbol list */
static bool symdepth; /* -S: output symbol depth */ static bool symdepth; /* -S: output symbol depth */
static bool text; /* -t: this is a text file */ static bool text; /* -t: this is a text file */
static const char *symname[MAXSYMS]; /* symbol name */
static const char *value[MAXSYMS]; /* -Dsym=value */
static bool ignore[MAXSYMS]; /* -iDsym or -iUsym */
static int nsyms; /* number of symbols */
static FILE *input; /* input file pointer */ static FILE *input; /* input file pointer */
static const char *filename; /* input file name */ static const char *filename; /* input file name */
static int linenum; /* current line number */ static int linenum; /* current line number */
@ -227,12 +244,12 @@ static char *astrcat(const char *, const char *);
static void cleantemp(void); static void cleantemp(void);
static void closeio(void); static void closeio(void);
static void debug(const char *, ...); static void debug(const char *, ...);
static void debugsym(const char *, int); static void debugsym(const char *, const struct macro *);
static bool defundef(void); static bool defundef(void);
static void defundefile(const char *); static void defundefile(const char *);
static void done(void); static void done(void);
static void error(const char *); static void error(const char *);
static int findsym(const char **); static struct macro *findsym(const char **);
static void flushline(bool); static void flushline(bool);
static void hashline(void); static void hashline(void);
static void help(void); static void help(void);
@ -807,7 +824,7 @@ static Linetype
parseline(void) parseline(void)
{ {
const char *cp; const char *cp;
int cursym; struct macro *cursym;
Linetype retval; Linetype retval;
Comment_state wascomment; Comment_state wascomment;
@ -829,15 +846,15 @@ parseline(void)
if ((cp = matchsym("ifdef", keyword)) != NULL || if ((cp = matchsym("ifdef", keyword)) != NULL ||
(cp = matchsym("ifndef", keyword)) != NULL) { (cp = matchsym("ifndef", keyword)) != NULL) {
cp = skipcomment(cp); cp = skipcomment(cp);
if ((cursym = findsym(&cp)) < 0) if ((cursym = findsym(&cp)) == NULL)
retval = LT_IF; retval = LT_IF;
else { else {
retval = (keyword[2] == 'n') retval = (keyword[2] == 'n')
? LT_FALSE : LT_TRUE; ? LT_FALSE : LT_TRUE;
if (value[cursym] == NULL) if (cursym->value == NULL)
retval = (retval == LT_TRUE) retval = (retval == LT_TRUE)
? LT_FALSE : LT_TRUE; ? LT_FALSE : LT_TRUE;
if (ignore[cursym]) if (cursym->ignore)
retval = (retval == LT_TRUE) retval = (retval == LT_TRUE)
? LT_TRUEI : LT_FALSEI; ? LT_TRUEI : LT_FALSEI;
} }
@ -1037,7 +1054,7 @@ eval_unary(const struct ops *ops, long *valp, const char **cpp)
{ {
const char *cp; const char *cp;
char *ep; char *ep;
int sym; struct macro *sym;
bool defparen; bool defparen;
Linetype lt; Linetype lt;
@ -1102,27 +1119,27 @@ eval_unary(const struct ops *ops, long *valp, const char **cpp)
debug("eval%d defined missing ')'", prec(ops)); debug("eval%d defined missing ')'", prec(ops));
return (LT_ERROR); return (LT_ERROR);
} }
if (sym < 0) { if (sym == NULL) {
debug("eval%d defined unknown", prec(ops)); debug("eval%d defined unknown", prec(ops));
lt = LT_IF; lt = LT_IF;
} else { } else {
debug("eval%d defined %s", prec(ops), symname[sym]); debug("eval%d defined %s", prec(ops), sym->name);
*valp = (value[sym] != NULL); *valp = (sym->value != NULL);
lt = *valp ? LT_TRUE : LT_FALSE; lt = *valp ? LT_TRUE : LT_FALSE;
} }
constexpr = false; constexpr = false;
} else if (!endsym(*cp)) { } else if (!endsym(*cp)) {
debug("eval%d symbol", prec(ops)); debug("eval%d symbol", prec(ops));
sym = findsym(&cp); sym = findsym(&cp);
if (sym < 0) { if (sym == NULL) {
lt = LT_IF; lt = LT_IF;
cp = skipargs(cp); cp = skipargs(cp);
} else if (value[sym] == NULL) { } else if (sym->value == NULL) {
*valp = 0; *valp = 0;
lt = LT_FALSE; lt = LT_FALSE;
} else { } else {
*valp = strtol(value[sym], &ep, 0); *valp = strtol(sym->value, &ep, 0);
if (*ep != '\0' || ep == value[sym]) if (*ep != '\0' || ep == sym->value)
return (LT_ERROR); return (LT_ERROR);
lt = *valp ? LT_TRUE : LT_FALSE; lt = *valp ? LT_TRUE : LT_FALSE;
cp = skipargs(cp); cp = skipargs(cp);
@ -1439,17 +1456,18 @@ matchsym(const char *s, const char *t)
* Look for the symbol in the symbol table. If it is found, we return * Look for the symbol in the symbol table. If it is found, we return
* the symbol table index, else we return -1. * the symbol table index, else we return -1.
*/ */
static int static struct macro *
findsym(const char **strp) findsym(const char **strp)
{ {
const char *str; const char *str;
int symind; char *strkey;
struct macro key, *res;
str = *strp; str = *strp;
*strp = skipsym(str); *strp = skipsym(str);
if (symlist) { if (symlist) {
if (*strp == str) if (*strp == str)
return (-1); return (NULL);
if (symdepth && firstsym) if (symdepth && firstsym)
printf("%s%3d", zerosyms ? "" : "\n", depth); printf("%s%3d", zerosyms ? "" : "\n", depth);
firstsym = zerosyms = false; firstsym = zerosyms = false;
@ -1458,15 +1476,26 @@ findsym(const char **strp)
(int)(*strp-str), str, (int)(*strp-str), str,
symdepth ? "" : "\n"); symdepth ? "" : "\n");
/* we don't care about the value of the symbol */ /* we don't care about the value of the symbol */
return (0); return (NULL);
} }
for (symind = 0; symind < nsyms; ++symind) {
if (matchsym(symname[symind], str) != NULL) { /*
debugsym("findsym", symind); * 'str' just points into the current mid-parse input and is not
return (symind); * nul-terminated. We know the length of the symbol, *strp - str, but
} * need to provide a nul-terminated lookup key for RB_FIND's comparison
} * function. Create one here.
return (-1); */
strkey = malloc(*strp - str + 1);
memcpy(strkey, str, *strp - str);
strkey[*strp - str] = 0;
key.name = strkey;
res = RB_FIND(MACROMAP, &macro_tree, &key);
if (res != NULL)
debugsym("findsym", res);
free(strkey);
return (res);
} }
/* /*
@ -1476,22 +1505,23 @@ static void
indirectsym(void) indirectsym(void)
{ {
const char *cp; const char *cp;
int changed, sym, ind; int changed;
struct macro *sym, *ind;
do { do {
changed = 0; changed = 0;
for (sym = 0; sym < nsyms; ++sym) { RB_FOREACH(sym, MACROMAP, &macro_tree) {
if (value[sym] == NULL) if (sym->value == NULL)
continue; continue;
cp = value[sym]; cp = sym->value;
ind = findsym(&cp); ind = findsym(&cp);
if (ind == -1 || ind == sym || if (ind == NULL || ind == sym ||
*cp != '\0' || *cp != '\0' ||
value[ind] == NULL || ind->value == NULL ||
value[ind] == value[sym]) ind->value == sym->value)
continue; continue;
debugsym("indir...", sym); debugsym("indir...", sym);
value[sym] = value[ind]; sym->value = ind->value;
debugsym("...ectsym", sym); debugsym("...ectsym", sym);
changed++; changed++;
} }
@ -1523,29 +1553,29 @@ addsym1(bool ignorethis, bool definethis, char *symval)
* Add a symbol to the symbol table. * Add a symbol to the symbol table.
*/ */
static void static void
addsym2(bool ignorethis, const char *sym, const char *val) addsym2(bool ignorethis, const char *symname, const char *val)
{ {
const char *cp = sym; const char *cp = symname;
int symind; struct macro *sym, *r;
symind = findsym(&cp); sym = findsym(&cp);
if (symind < 0) { if (sym == NULL) {
if (nsyms >= MAXSYMS) sym = calloc(1, sizeof(*sym));
errx(2, "too many symbols"); sym->ignore = ignorethis;
symind = nsyms++; sym->name = symname;
sym->value = val;
r = RB_INSERT(MACROMAP, &macro_tree, sym);
assert(r == NULL);
} }
ignore[symind] = ignorethis; debugsym("addsym", sym);
symname[symind] = sym;
value[symind] = val;
debugsym("addsym", symind);
} }
static void static void
debugsym(const char *why, int symind) debugsym(const char *why, const struct macro *sym)
{ {
debug("%s %s%c%s", why, symname[symind], debug("%s %s%c%s", why, sym->name,
value[symind] ? '=' : ' ', sym->value ? '=' : ' ',
value[symind] ? value[symind] : "undef"); sym->value ? sym->value : "undef");
} }
/* /*