From 41e04e8c778f258989e62a9042d00ed91d6ffd16 Mon Sep 17 00:00:00 2001 From: Ed Maste Date: Tue, 2 May 2017 20:44:06 +0000 Subject: [PATCH] bsdgrep: fix escape map building for multibyte strings In BSD grep, fix escape map building in the regex parser. It was previously using memory not explicitly initialized, and the MBS escape map was being built based on a version of the pattern with escapes already parsed out. This is Kyle's change, but I restored the broken style that already exists in this file. Submitted by: Kyle Evans Reviewed by: cem, Kyle Evans (my style changes) Differential Revision: https://reviews.freebsd.org/D10098 --- usr.bin/grep/regex/tre-fastmatch.c | 39 +++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/usr.bin/grep/regex/tre-fastmatch.c b/usr.bin/grep/regex/tre-fastmatch.c index 5c659b09f48e..bf59b9f8d551 100644 --- a/usr.bin/grep/regex/tre-fastmatch.c +++ b/usr.bin/grep/regex/tre-fastmatch.c @@ -98,6 +98,18 @@ static int fastcmp(const fastmatch_t *fg, const void *data, fg->pattern[siz] = '\0'; \ } \ +#define CONV_MBS_PAT(src, dest, destsz) \ + { \ + destsz = wcstombs(NULL, src, 0); \ + if (destsz == (size_t)-1) \ + return REG_BADPAT; \ + dest = malloc(destsz + 1); \ + if (dest == NULL) \ + return REG_ESPACE; \ + wcstombs(dest, src, destsz); \ + dest[destsz] = '\0'; \ + } \ + #define IS_OUT_OF_BOUNDS \ ((!fg->reversed \ ? ((type == STR_WIDE) ? ((j + fg->wlen) > len) \ @@ -723,15 +735,29 @@ tre_compile_fast(fastmatch_t *fg, const tre_char_t *pat, size_t n, } escaped = false; - for (unsigned int i = 0; i < fg->len; i++) - if (fg->pattern[i] == '\\') - escaped = !escaped; - else if (fg->pattern[i] == '.' && fg->escmap && escaped) + char *_checkpat = NULL; + size_t _checklen = 0; + unsigned int escofs = 0; + /* + * Make a copy here of the original pattern, because fg->pattern has + * already been stripped of all escape sequences in the above processing. + * This is necessary if we wish to later treat fg->escmap as an actual, + * functional replacement of fg->wescmap. + */ + CONV_MBS_PAT(pat, _checkpat, _checklen); + for (unsigned int i = 0; i < n; i++) + if (_checkpat[i] == '\\') { - fg->escmap[i] = true; + escaped = !escaped; + if (escaped) + ++escofs; + } + else if (_checkpat[i] == '.' && fg->escmap != NULL && escaped) + { + fg->escmap[i - escofs] = true; escaped = false; } - else if (fg->pattern[i] == '.' && !escaped) + else if (_checkpat[i] == '.' && !escaped) { hasdot = i; if (firstdot == -1) @@ -739,6 +765,7 @@ tre_compile_fast(fastmatch_t *fg, const tre_char_t *pat, size_t n, } else escaped = false; + free(_checkpat); } #else SAVE_PATTERN(tmp, pos, fg->pattern, fg->len);