diff --git a/contrib/netbsd-tests/usr.bin/grep/d_context_e.in b/contrib/netbsd-tests/usr.bin/grep/d_context_e.in new file mode 100644 index 000000000000..453256317202 --- /dev/null +++ b/contrib/netbsd-tests/usr.bin/grep/d_context_e.in @@ -0,0 +1,10 @@ +monkey +banana +apple +fruit +monkey +banna +apple +fruit +apple +monkey diff --git a/contrib/netbsd-tests/usr.bin/grep/d_context_e.out b/contrib/netbsd-tests/usr.bin/grep/d_context_e.out new file mode 100644 index 000000000000..1167e1072860 --- /dev/null +++ b/contrib/netbsd-tests/usr.bin/grep/d_context_e.out @@ -0,0 +1,9 @@ +monkey +banana +apple +fruit +monkey +banna +-- +apple +monkey diff --git a/contrib/netbsd-tests/usr.bin/grep/d_context_f.out b/contrib/netbsd-tests/usr.bin/grep/d_context_f.out new file mode 100644 index 000000000000..2df9efd9ffba --- /dev/null +++ b/contrib/netbsd-tests/usr.bin/grep/d_context_f.out @@ -0,0 +1,9 @@ +monkey +banana +apple +fruit +monkey +banna +apple +fruit +apple diff --git a/contrib/netbsd-tests/usr.bin/grep/d_context_g.out b/contrib/netbsd-tests/usr.bin/grep/d_context_g.out new file mode 100644 index 000000000000..c1b5048aa9c4 --- /dev/null +++ b/contrib/netbsd-tests/usr.bin/grep/d_context_g.out @@ -0,0 +1,8 @@ +apple +fruit +-- +banna +apple +fruit +apple +monkey diff --git a/contrib/netbsd-tests/usr.bin/grep/t_grep.sh b/contrib/netbsd-tests/usr.bin/grep/t_grep.sh index d265e5d09828..267ee87446ee 100755 --- a/contrib/netbsd-tests/usr.bin/grep/t_grep.sh +++ b/contrib/netbsd-tests/usr.bin/grep/t_grep.sh @@ -171,6 +171,12 @@ context_body() atf_check -o file:d_context_b.out grep -A3 tilt d_context_a.in atf_check -o file:d_context_c.out grep -B4 Whig d_context_a.in atf_check -o file:d_context_d.out grep -C1 pig d_context_a.in d_context_b.in + atf_check -o file:d_context_e.out \ + grep -E -C1 '(banana|monkey)' d_context_e.in + atf_check -o file:d_context_f.out \ + grep -Ev -B2 '(banana|monkey|fruit)' d_context_e.in + atf_check -o file:d_context_g.out \ + grep -Ev -A1 '(banana|monkey|fruit)' d_context_e.in } atf_test_case file_exp @@ -386,6 +392,32 @@ zerolen_body() atf_check -o inline:"Eggs\nCheese\n" grep -v -e "^$" test1 } +atf_test_case wflag_emptypat +wflag_emptypat_head() +{ + atf_set "descr" "Check for proper handling of -w with an empty pattern (PR 105221)" +} +wflag_emptypat_body() +{ + grep_type + if [ $? -eq $GREP_TYPE_GNU_FREEBSD ]; then + atf_expect_fail "this test does not pass with GNU grep in base" + fi + + printf "" > test1 + printf "\n" > test2 + printf "qaz" > test3 + printf " qaz\n" > test4 + + atf_check -s exit:1 -o empty grep -w -e "" test1 + + atf_check -o file:test2 grep -w -e "" test2 + + atf_check -s exit:1 -o empty grep -w -e "" test3 + + atf_check -o file:test4 grep -w -e "" test4 +} + atf_test_case fgrep_sanity fgrep_sanity_head() { @@ -490,6 +522,7 @@ atf_init_test_cases() atf_add_test_case escmap atf_add_test_case egrep_empty_invalid atf_add_test_case zerolen + atf_add_test_case wflag_emptypat atf_add_test_case wv_combo_break atf_add_test_case fgrep_sanity atf_add_test_case egrep_sanity diff --git a/usr.bin/grep/grep.c b/usr.bin/grep/grep.c index a4506120478d..033910514fbb 100644 --- a/usr.bin/grep/grep.c +++ b/usr.bin/grep/grep.c @@ -81,7 +81,13 @@ const char *errstr[] = { int cflags = REG_NOSUB; int eflags = REG_STARTEND; -/* Shortcut for matching all cases like empty regex */ +/* XXX TODO: Get rid of this flag. + * matchall is a gross hack that means that an empty pattern was passed to us. + * It is a necessary evil at the moment because our regex(3) implementation + * does not allow for empty patterns, as supported by POSIX's definition of + * grammar for BREs/EREs. When libregex becomes available, it would be wise + * to remove this and let regex(3) handle the dirty details of empty patterns. + */ bool matchall; /* Searching patterns */ @@ -153,9 +159,6 @@ enum { static inline const char *init_color(const char *); /* Housekeeping */ -bool first = true; /* flag whether we are processing the first match */ -bool prev; /* flag whether or not the previous line matched */ -int tail; /* lines left to print */ bool file_err; /* file reading error */ /* @@ -729,20 +732,25 @@ main(int argc, char *argv[]) #endif r_pattern = grep_calloc(patterns, sizeof(*r_pattern)); - /* Check if cheating is allowed (always is for fgrep). */ - for (i = 0; i < patterns; ++i) { + /* Don't process any patterns if we have a blank one */ + if (!matchall) { + /* Check if cheating is allowed (always is for fgrep). */ + for (i = 0; i < patterns; ++i) { #ifndef WITHOUT_FASTMATCH - /* Attempt compilation with fastmatch regex and fallback to - regex(3) if it fails. */ - if (fastncomp(&fg_pattern[i], pattern[i].pat, - pattern[i].len, cflags) == 0) - continue; + /* + * Attempt compilation with fastmatch regex and + * fallback to regex(3) if it fails. + */ + if (fastncomp(&fg_pattern[i], pattern[i].pat, + pattern[i].len, cflags) == 0) + continue; #endif - c = regcomp(&r_pattern[i], pattern[i].pat, cflags); - if (c != 0) { - regerror(c, &r_pattern[i], re_error, - RE_ERROR_BUF); - errx(2, "%s", re_error); + c = regcomp(&r_pattern[i], pattern[i].pat, cflags); + if (c != 0) { + regerror(c, &r_pattern[i], re_error, + RE_ERROR_BUF); + errx(2, "%s", re_error); + } } } diff --git a/usr.bin/grep/grep.h b/usr.bin/grep/grep.h index ea17af4099dd..56ce456abb64 100644 --- a/usr.bin/grep/grep.h +++ b/usr.bin/grep/grep.h @@ -123,8 +123,7 @@ extern char *label; extern const char *color; extern int binbehave, devbehave, dirbehave, filebehave, grepbehave, linkbehave; -extern bool file_err, first, matchall, prev; -extern int tail; +extern bool file_err, matchall; extern unsigned int dpatterns, fpatterns, patterns; extern struct pat *pattern; extern struct epat *dpattern, *fpattern; @@ -145,10 +144,10 @@ void *grep_malloc(size_t size); void *grep_calloc(size_t nmemb, size_t size); void *grep_realloc(void *ptr, size_t size); char *grep_strdup(const char *str); -void printline(struct str *line, int sep, regmatch_t *matches, int m); +void grep_printline(struct str *line, int sep); /* queue.c */ -void enqueue(struct str *x); +bool enqueue(struct str *x); void printqueue(void); void clearqueue(void); diff --git a/usr.bin/grep/queue.c b/usr.bin/grep/queue.c index 18878880a36c..453183367b8c 100644 --- a/usr.bin/grep/queue.c +++ b/usr.bin/grep/queue.c @@ -53,7 +53,10 @@ static unsigned long long count; static struct qentry *dequeue(void); -void +/* + * Enqueue another line; return true if we've dequeued a line as a result + */ +bool enqueue(struct str *x) { struct qentry *item; @@ -72,7 +75,9 @@ enqueue(struct str *x) item = dequeue(); free(item->data.dat); free(item); + return (true); } + return (false); } static struct qentry * @@ -95,7 +100,7 @@ printqueue(void) struct qentry *item; while ((item = dequeue()) != NULL) { - printline(&item->data, '-', NULL, 0); + grep_printline(&item->data, '-'); free(item->data.dat); free(item); } diff --git a/usr.bin/grep/tests/Makefile b/usr.bin/grep/tests/Makefile index f383aa220ad1..f0c0c86c6b47 100644 --- a/usr.bin/grep/tests/Makefile +++ b/usr.bin/grep/tests/Makefile @@ -20,9 +20,13 @@ ${PACKAGE}FILES+= d_context2_c.out ${PACKAGE}FILES+= d_context_a.in ${PACKAGE}FILES+= d_context_a.out ${PACKAGE}FILES+= d_context_b.in +${PACKAGE}FILES+= d_context_e.in ${PACKAGE}FILES+= d_context_b.out ${PACKAGE}FILES+= d_context_c.out ${PACKAGE}FILES+= d_context_d.out +${PACKAGE}FILES+= d_context_e.out +${PACKAGE}FILES+= d_context_f.out +${PACKAGE}FILES+= d_context_g.out ${PACKAGE}FILES+= d_egrep.out ${PACKAGE}FILES+= d_escmap.in ${PACKAGE}FILES+= d_f_file_empty.in diff --git a/usr.bin/grep/util.c b/usr.bin/grep/util.c index dd9a753aac4e..a9c6a46f4a1e 100644 --- a/usr.bin/grep/util.c +++ b/usr.bin/grep/util.c @@ -54,11 +54,23 @@ __FBSDID("$FreeBSD$"); #endif #include "grep.h" -static int linesqueued; -static int procline(struct str *l, int); +static bool first_match = true; -static int lasta; -static bool ctxover; +/* + * Parsing context; used to hold things like matches made and + * other useful bits + */ +struct parsec { + regmatch_t matches[MAX_LINE_MATCHES]; /* Matches made */ + struct str ln; /* Current line */ + size_t matchidx; /* Latest used match index */ + bool binary; /* Binary file? */ +}; + + +static int procline(struct parsec *pc); +static void printline(struct parsec *pc, int sep); +static void printline_metadata(struct str *line, int sep); bool file_matching(const char *fname) @@ -183,12 +195,18 @@ grep_tree(char **argv) int procfile(const char *fn) { + struct parsec pc; struct file *f; struct stat sb; - struct str ln; + struct str *ln; mode_t s; - int c, t; + int c, last_outed, t, tail; + bool doctx, same_file; + doctx = false; + if ((!pc.binary || binbehave != BINFILE_BIN) && !cflag && !qflag && + !lflag && !Lflag && (Aflag != 0 || Bflag != 0)) + doctx = true; mcount = mlimit; if (strcmp(fn, "-") == 0) { @@ -213,57 +231,91 @@ procfile(const char *fn) return (0); } - ln.file = grep_malloc(strlen(fn) + 1); - strcpy(ln.file, fn); - ln.line_no = 0; - ln.len = 0; - ctxover = false; - linesqueued = 0; + /* Convenience */ + ln = &pc.ln; + pc.ln.file = grep_malloc(strlen(fn) + 1); + strcpy(pc.ln.file, fn); + pc.ln.line_no = 0; + pc.ln.len = 0; + pc.ln.off = -1; + pc.binary = f->binary; tail = 0; - lasta = 0; - ln.off = -1; + last_outed = 0; + same_file = false; for (c = 0; c == 0 || !(lflag || qflag); ) { - ln.off += ln.len + 1; - if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL || ln.len == 0) { - if (ln.line_no == 0 && matchall) - exit(0); + /* Reset match count for every line processed */ + pc.matchidx = 0; + pc.ln.off += pc.ln.len + 1; + if ((pc.ln.dat = grep_fgetln(f, &pc.ln.len)) == NULL || + pc.ln.len == 0) { + if (pc.ln.line_no == 0 && matchall) + /* + * An empty file with an empty pattern and the + * -w flag does not match + */ + exit(matchall && wflag ? 1 : 0); else break; } - if (ln.len > 0 && ln.dat[ln.len - 1] == fileeol) - --ln.len; - ln.line_no++; + + if (pc.ln.len > 0 && pc.ln.dat[pc.ln.len - 1] == fileeol) + --pc.ln.len; + pc.ln.line_no++; /* Return if we need to skip a binary file */ - if (f->binary && binbehave == BINFILE_SKIP) { + if (pc.binary && binbehave == BINFILE_SKIP) { grep_close(f); - free(ln.file); + free(pc.ln.file); free(f); return (0); } - /* Process the file line-by-line, enqueue non-matching lines */ - if ((t = procline(&ln, f->binary)) == 0 && Bflag > 0) { - /* Except don't enqueue lines that appear in -A ctx */ - if (ln.line_no == 0 || lasta != ln.line_no) { - /* queue is maxed to Bflag number of lines */ - enqueue(&ln); - linesqueued++; - ctxover = false; + if ((t = procline(&pc)) == 0) + ++c; + + /* Deal with any -B context or context separators */ + if (t == 0 && doctx) { + if (!first_match && (!same_file || last_outed > 0)) + printf("--\n"); + if (Bflag > 0) + printqueue(); + tail = Aflag; + } + /* Print the matching line, but only if not quiet/binary */ + if (t == 0 && !qflag && !pc.binary) { + printline(&pc, ':'); + first_match = false; + same_file = true; + last_outed = 0; + } + if (t != 0 && doctx) { + /* Deal with any -A context */ + if (tail > 0) { + printline(&pc, '-'); + tail--; + if (Bflag > 0) + clearqueue(); } else { /* - * Indicate to procline() that we have ctx - * overlap and make sure queue is empty. + * Enqueue non-matching lines for -B context. + * If we're not actually doing -B context or if + * the enqueue resulted in a line being rotated + * out, then go ahead and increment last_outed + * to signify a gap between context/match. */ - if (!ctxover) - clearqueue(); - ctxover = true; + if (Bflag == 0 || (Bflag > 0 && enqueue(ln))) + ++last_outed; } } - c += t; - if (mflag && mcount <= 0) - break; + + /* Count the matches if we have a match limit */ + if (t == 0 && mflag) { + --mcount; + if (mflag && mcount <= 0) + break; + } + } if (Bflag > 0) clearqueue(); @@ -271,7 +323,7 @@ procfile(const char *fn) if (cflag) { if (!hflag) - printf("%s:", ln.file); + printf("%s:", pc.ln.file); printf("%u\n", c); } if (lflag && !qflag && c != 0) @@ -282,7 +334,7 @@ procfile(const char *fn) binbehave == BINFILE_BIN && f->binary && !qflag) printf(getstr(8), fn); - free(ln.file); + free(pc.ln.file); free(f); return (c); } @@ -297,62 +349,72 @@ procfile(const char *fn) * appropriate output. */ static int -procline(struct str *l, int nottext) +procline(struct parsec *pc) { - regmatch_t matches[MAX_LINE_MATCHES]; - regmatch_t pmatch, lastmatch; + regmatch_t pmatch, lastmatch, chkmatch; + wchar_t wbegin, wend; size_t st = 0, nst = 0; unsigned int i; - int c = 0, m = 0, r = 0, lastmatches = 0, leflags = eflags; - int startm = 0; + int c = 0, r = 0, lastmatches = 0, leflags = eflags; + size_t startm = 0, matchidx; int retry; + matchidx = pc->matchidx; + + /* Special case: empty pattern with -w flag, check first character */ + if (matchall && wflag) { + if (pc->ln.len == 0) + return (0); + wend = L' '; + if (sscanf(&pc->ln.dat[0], "%lc", &wend) != 1 || iswword(wend)) + return (1); + else + return (0); + } else if (matchall) + return (0); + /* Initialize to avoid a false positive warning from GCC. */ lastmatch.rm_so = lastmatch.rm_eo = 0; /* Loop to process the whole line */ - while (st <= l->len) { + while (st <= pc->ln.len) { lastmatches = 0; - startm = m; + startm = matchidx; retry = 0; if (st > 0) leflags |= REG_NOTBOL; /* Loop to compare with all the patterns */ for (i = 0; i < patterns; i++) { pmatch.rm_so = st; - pmatch.rm_eo = l->len; + pmatch.rm_eo = pc->ln.len; #ifndef WITHOUT_FASTMATCH if (fg_pattern[i].pattern) r = fastexec(&fg_pattern[i], - l->dat, 1, &pmatch, leflags); + pc->ln.dat, 1, &pmatch, leflags); else #endif - r = regexec(&r_pattern[i], l->dat, 1, + r = regexec(&r_pattern[i], pc->ln.dat, 1, &pmatch, leflags); - r = (r == 0) ? 0 : REG_NOMATCH; - if (r == REG_NOMATCH) + if (r != 0) continue; /* Check for full match */ - if (r == 0 && xflag) - if (pmatch.rm_so != 0 || - (size_t)pmatch.rm_eo != l->len) - r = REG_NOMATCH; + if (xflag && (pmatch.rm_so != 0 || + (size_t)pmatch.rm_eo != pc->ln.len)) + continue; /* Check for whole word match */ #ifndef WITHOUT_FASTMATCH - if (r == 0 && (wflag || fg_pattern[i].word)) { + if (wflag || fg_pattern[i].word) { #else - if (r == 0 && wflag) { + if (wflag) { #endif - wchar_t wbegin, wend; - wbegin = wend = L' '; if (pmatch.rm_so != 0 && - sscanf(&l->dat[pmatch.rm_so - 1], + sscanf(&pc->ln.dat[pmatch.rm_so - 1], "%lc", &wbegin) != 1) r = REG_NOMATCH; else if ((size_t)pmatch.rm_eo != - l->len && - sscanf(&l->dat[pmatch.rm_eo], + pc->ln.len && + sscanf(&pc->ln.dat[pmatch.rm_eo], "%lc", &wend) != 1) r = REG_NOMATCH; else if (iswword(wbegin) || @@ -361,7 +423,7 @@ procline(struct str *l, int nottext) /* * If we're doing whole word matching and we * matched once, then we should try the pattern - * again after advancing just past the start of + * again after advancing just past the start of * the earliest match. This allows the pattern * to match later on in the line and possibly * still match a whole word. @@ -369,33 +431,40 @@ procline(struct str *l, int nottext) if (r == REG_NOMATCH && (retry == 0 || pmatch.rm_so + 1 < retry)) retry = pmatch.rm_so + 1; + if (r == REG_NOMATCH) + continue; } - if (r == 0) { - lastmatches++; - lastmatch = pmatch; - if (m == 0) - c++; - if (m < MAX_LINE_MATCHES) { - /* Replace previous match if the new one is earlier and/or longer */ - if (m > startm) { - if (pmatch.rm_so < matches[m-1].rm_so || - (pmatch.rm_so == matches[m-1].rm_so && (pmatch.rm_eo - pmatch.rm_so) > (matches[m-1].rm_eo - matches[m-1].rm_so))) { - matches[m-1] = pmatch; - nst = pmatch.rm_eo; - } - } else { - /* Advance as normal if not */ - matches[m++] = pmatch; - nst = pmatch.rm_eo; - } + lastmatches++; + lastmatch = pmatch; + + if (matchidx == 0) + c++; + + /* + * Replace previous match if the new one is earlier + * and/or longer. This will lead to some amount of + * extra work if -o/--color are specified, but it's + * worth it from a correctness point of view. + */ + if (matchidx > startm) { + chkmatch = pc->matches[matchidx - 1]; + if (pmatch.rm_so < chkmatch.rm_so || + (pmatch.rm_so == chkmatch.rm_so && + (pmatch.rm_eo - pmatch.rm_so) > + (chkmatch.rm_eo - chkmatch.rm_so))) { + pc->matches[matchidx - 1] = pmatch; + nst = pmatch.rm_eo; } - - /* matches - skip further patterns */ - if ((color == NULL && !oflag) || - qflag || lflag) - break; + } else { + /* Advance as normal if not */ + pc->matches[matchidx++] = pmatch; + nst = pmatch.rm_eo; } + /* avoid excessive matching - skip further patterns */ + if ((color == NULL && !oflag) || qflag || lflag || + matchidx >= MAX_LINE_MATCHES) + break; } /* @@ -414,7 +483,7 @@ procline(struct str *l, int nottext) /* If we didn't have any matches or REG_NOSUB set */ if (lastmatches == 0 || (cflags & REG_NOSUB)) - nst = l->len; + nst = pc->ln.len; if (lastmatches == 0) /* No matches */ @@ -427,45 +496,11 @@ procline(struct str *l, int nottext) st = nst; } - + /* Reflect the new matchidx in the context */ + pc->matchidx = matchidx; if (vflag) c = !c; - - /* Count the matches if we have a match limit */ - if (mflag) - mcount -= c; - - if (c && binbehave == BINFILE_BIN && nottext) - return (c); /* Binary file */ - - /* Dealing with the context */ - if ((tail || c) && !cflag && !qflag && !lflag && !Lflag) { - if (c) { - if (!first && !prev && !tail && (Bflag || Aflag) && - !ctxover) - printf("--\n"); - tail = Aflag; - if (Bflag > 0) { - printqueue(); - ctxover = false; - } - linesqueued = 0; - printline(l, ':', matches, m); - } else { - /* Print -A lines following matches */ - lasta = l->line_no; - printline(l, '-', matches, m); - tail--; - } - } - - if (c) { - prev = true; - first = false; - } else - prev = false; - - return (c); + return (c ? 0 : 1); } /* @@ -520,69 +555,89 @@ grep_strdup(const char *str) } /* - * Prints a matching line according to the command line options. + * Print an entire line as-is, there are no inline matches to consider. This is + * used for printing context. */ -void -printline(struct str *line, int sep, regmatch_t *matches, int m) +void grep_printline(struct str *line, int sep) { + printline_metadata(line, sep); + fwrite(line->dat, line->len, 1, stdout); + putchar(fileeol); +} + +static void +printline_metadata(struct str *line, int sep) { - size_t a = 0; - int i, n = 0; - - /* If matchall, everything matches but don't actually print for -o */ - if (oflag && matchall) - return; + bool printsep; + printsep = false; if (!hflag) { if (!nullflag) { fputs(line->file, stdout); - ++n; + printsep = true; } else { printf("%s", line->file); putchar(0); } } if (nflag) { - if (n > 0) + if (printsep) putchar(sep); printf("%d", line->line_no); - ++n; + printsep = true; } if (bflag) { - if (n > 0) + if (printsep) putchar(sep); printf("%lld", (long long)line->off); - ++n; + printsep = true; } - if (n) + if (printsep) putchar(sep); +} + +/* + * Prints a matching line according to the command line options. + */ +static void +printline(struct parsec *pc, int sep) +{ + size_t a = 0; + size_t i, matchidx; + regmatch_t match; + + /* If matchall, everything matches but don't actually print for -o */ + if (oflag && matchall) + return; + + matchidx = pc->matchidx; + /* --color and -o */ - if ((oflag || color) && m > 0) { - for (i = 0; i < m; i++) { + if ((oflag || color) && matchidx > 0) { + printline_metadata(&pc->ln, sep); + for (i = 0; i < matchidx; i++) { + match = pc->matches[i]; /* Don't output zero length matches */ - if (matches[i].rm_so == matches[i].rm_eo) + if (match.rm_so == match.rm_eo) continue; if (!oflag) - fwrite(line->dat + a, matches[i].rm_so - a, 1, + fwrite(pc->ln.dat + a, match.rm_so - a, 1, stdout); - if (color) + if (color) fprintf(stdout, "\33[%sm\33[K", color); - - fwrite(line->dat + matches[i].rm_so, - matches[i].rm_eo - matches[i].rm_so, 1, - stdout); - if (color) + fwrite(pc->ln.dat + match.rm_so, + match.rm_eo - match.rm_so, 1, stdout); + if (color) fprintf(stdout, "\33[m\33[K"); - a = matches[i].rm_eo; + a = match.rm_eo; if (oflag) putchar('\n'); } if (!oflag) { - if (line->len - a > 0) - fwrite(line->dat + a, line->len - a, 1, stdout); + if (pc->ln.len - a > 0) + fwrite(pc->ln.dat + a, pc->ln.len - a, 1, + stdout); putchar('\n'); } - } else { - fwrite(line->dat, line->len, 1, stdout); - putchar(fileeol); - } + } else + grep_printline(&pc->ln, sep); }