Re-apply fixed r354847

unifdef(1): Improve worst-case bound on symbol resolution

Use RB_TREE to make some algorithms O(lg N) and O(N lg N) instead of O(N)
and O(N^2).

While here, remove arbitrarily limit on number of macros understood.

Reverts r354877 and r354878, which disabled the (correct) test.

PR:		242095
Reported by:	lwhsu
This commit is contained in:
Conrad Meyer 2019-11-20 19:43:34 +00:00
parent 0877992e6b
commit 61287be181
2 changed files with 85 additions and 58 deletions

View File

@ -35,9 +35,6 @@ basic_head() {
}
basic_body() {
if [ "$(atf_config_get ci false)" = "true" ]; then
atf_skip "https://bugs.freebsd.org/242095"
fi
atf_check -s ignore -o file:$(atf_get_srcdir)/d_basic.out \
-x "unifdef -U__FreeBSD__ $(atf_get_srcdir)/d_basic.in"

View File

@ -45,8 +45,11 @@
* it possible to handle all "dodgy" directives correctly.
*/
#include <sys/param.h>
#include <sys/stat.h>
#include <sys/tree.h>
#include <assert.h>
#include <ctype.h>
#include <err.h>
#include <stdarg.h>
@ -149,7 +152,6 @@ static char const * const linestate_name[] = {
*/
#define MAXDEPTH 64 /* maximum #if nesting */
#define MAXLINE 4096 /* maximum length of line */
#define MAXSYMS 16384 /* maximum number of symbols */
/*
* Sometimes when editing a keyword the replacement text is longer, so
@ -157,6 +159,26 @@ static char const * const linestate_name[] = {
*/
#define EDITSLOP 10
/*
* C17/18 allow 63 characters per macro name, but up to 127 arbitrarily large
* parameters.
*/
struct macro {
RB_ENTRY(macro) entry;
const char *name;
const char *value;
bool ignore; /* -iDsym or -iUsym */
};
static int
macro_cmp(struct macro *a, struct macro *b)
{
return (strcmp(a->name, b->name));
}
static RB_HEAD(MACROMAP, macro) macro_tree = RB_INITIALIZER(&macro_tree);
RB_GENERATE_STATIC(MACROMAP, macro, entry, macro_cmp);
/*
* Globals.
*/
@ -174,11 +196,6 @@ static bool symlist; /* -s: output symbol list */
static bool symdepth; /* -S: output symbol depth */
static bool text; /* -t: this is a text file */
static const char *symname[MAXSYMS]; /* symbol name */
static const char *value[MAXSYMS]; /* -Dsym=value */
static bool ignore[MAXSYMS]; /* -iDsym or -iUsym */
static int nsyms; /* number of symbols */
static FILE *input; /* input file pointer */
static const char *filename; /* input file name */
static int linenum; /* current line number */
@ -227,12 +244,12 @@ static char *astrcat(const char *, const char *);
static void cleantemp(void);
static void closeio(void);
static void debug(const char *, ...);
static void debugsym(const char *, int);
static void debugsym(const char *, const struct macro *);
static bool defundef(void);
static void defundefile(const char *);
static void done(void);
static void error(const char *);
static int findsym(const char **);
static struct macro *findsym(const char **);
static void flushline(bool);
static void hashline(void);
static void help(void);
@ -807,7 +824,7 @@ static Linetype
parseline(void)
{
const char *cp;
int cursym;
struct macro *cursym;
Linetype retval;
Comment_state wascomment;
@ -829,15 +846,15 @@ parseline(void)
if ((cp = matchsym("ifdef", keyword)) != NULL ||
(cp = matchsym("ifndef", keyword)) != NULL) {
cp = skipcomment(cp);
if ((cursym = findsym(&cp)) < 0)
if ((cursym = findsym(&cp)) == NULL)
retval = LT_IF;
else {
retval = (keyword[2] == 'n')
? LT_FALSE : LT_TRUE;
if (value[cursym] == NULL)
if (cursym->value == NULL)
retval = (retval == LT_TRUE)
? LT_FALSE : LT_TRUE;
if (ignore[cursym])
if (cursym->ignore)
retval = (retval == LT_TRUE)
? LT_TRUEI : LT_FALSEI;
}
@ -1037,7 +1054,7 @@ eval_unary(const struct ops *ops, long *valp, const char **cpp)
{
const char *cp;
char *ep;
int sym;
struct macro *sym;
bool defparen;
Linetype lt;
@ -1102,27 +1119,27 @@ eval_unary(const struct ops *ops, long *valp, const char **cpp)
debug("eval%d defined missing ')'", prec(ops));
return (LT_ERROR);
}
if (sym < 0) {
if (sym == NULL) {
debug("eval%d defined unknown", prec(ops));
lt = LT_IF;
} else {
debug("eval%d defined %s", prec(ops), symname[sym]);
*valp = (value[sym] != NULL);
debug("eval%d defined %s", prec(ops), sym->name);
*valp = (sym->value != NULL);
lt = *valp ? LT_TRUE : LT_FALSE;
}
constexpr = false;
} else if (!endsym(*cp)) {
debug("eval%d symbol", prec(ops));
sym = findsym(&cp);
if (sym < 0) {
if (sym == NULL) {
lt = LT_IF;
cp = skipargs(cp);
} else if (value[sym] == NULL) {
} else if (sym->value == NULL) {
*valp = 0;
lt = LT_FALSE;
} else {
*valp = strtol(value[sym], &ep, 0);
if (*ep != '\0' || ep == value[sym])
*valp = strtol(sym->value, &ep, 0);
if (*ep != '\0' || ep == sym->value)
return (LT_ERROR);
lt = *valp ? LT_TRUE : LT_FALSE;
cp = skipargs(cp);
@ -1439,17 +1456,18 @@ matchsym(const char *s, const char *t)
* Look for the symbol in the symbol table. If it is found, we return
* the symbol table index, else we return -1.
*/
static int
static struct macro *
findsym(const char **strp)
{
const char *str;
int symind;
char *strkey;
struct macro key, *res;
str = *strp;
*strp = skipsym(str);
if (symlist) {
if (*strp == str)
return (-1);
return (NULL);
if (symdepth && firstsym)
printf("%s%3d", zerosyms ? "" : "\n", depth);
firstsym = zerosyms = false;
@ -1458,15 +1476,26 @@ findsym(const char **strp)
(int)(*strp-str), str,
symdepth ? "" : "\n");
/* we don't care about the value of the symbol */
return (0);
return (NULL);
}
for (symind = 0; symind < nsyms; ++symind) {
if (matchsym(symname[symind], str) != NULL) {
debugsym("findsym", symind);
return (symind);
}
}
return (-1);
/*
* 'str' just points into the current mid-parse input and is not
* nul-terminated. We know the length of the symbol, *strp - str, but
* need to provide a nul-terminated lookup key for RB_FIND's comparison
* function. Create one here.
*/
strkey = malloc(*strp - str + 1);
memcpy(strkey, str, *strp - str);
strkey[*strp - str] = 0;
key.name = strkey;
res = RB_FIND(MACROMAP, &macro_tree, &key);
if (res != NULL)
debugsym("findsym", res);
free(strkey);
return (res);
}
/*
@ -1476,22 +1505,23 @@ static void
indirectsym(void)
{
const char *cp;
int changed, sym, ind;
int changed;
struct macro *sym, *ind;
do {
changed = 0;
for (sym = 0; sym < nsyms; ++sym) {
if (value[sym] == NULL)
RB_FOREACH(sym, MACROMAP, &macro_tree) {
if (sym->value == NULL)
continue;
cp = value[sym];
cp = sym->value;
ind = findsym(&cp);
if (ind == -1 || ind == sym ||
if (ind == NULL || ind == sym ||
*cp != '\0' ||
value[ind] == NULL ||
value[ind] == value[sym])
ind->value == NULL ||
ind->value == sym->value)
continue;
debugsym("indir...", sym);
value[sym] = value[ind];
sym->value = ind->value;
debugsym("...ectsym", sym);
changed++;
}
@ -1523,29 +1553,29 @@ addsym1(bool ignorethis, bool definethis, char *symval)
* Add a symbol to the symbol table.
*/
static void
addsym2(bool ignorethis, const char *sym, const char *val)
addsym2(bool ignorethis, const char *symname, const char *val)
{
const char *cp = sym;
int symind;
const char *cp = symname;
struct macro *sym, *r;
symind = findsym(&cp);
if (symind < 0) {
if (nsyms >= MAXSYMS)
errx(2, "too many symbols");
symind = nsyms++;
sym = findsym(&cp);
if (sym == NULL) {
sym = calloc(1, sizeof(*sym));
sym->ignore = ignorethis;
sym->name = symname;
sym->value = val;
r = RB_INSERT(MACROMAP, &macro_tree, sym);
assert(r == NULL);
}
ignore[symind] = ignorethis;
symname[symind] = sym;
value[symind] = val;
debugsym("addsym", symind);
debugsym("addsym", sym);
}
static void
debugsym(const char *why, int symind)
debugsym(const char *why, const struct macro *sym)
{
debug("%s %s%c%s", why, symname[symind],
value[symind] ? '=' : ' ',
value[symind] ? value[symind] : "undef");
debug("%s %s%c%s", why, sym->name,
sym->value ? '=' : ' ',
sym->value ? sym->value : "undef");
}
/*