bsdgrep: annihilate our in-tree TRE, previously disabled by default

It was an old TRE that had plenty of bugs and no performance gain over
regex(3). I disabled it by default in r323615, and there was some confusion
about what the knob does- likely due to poor naming on my part- to the tune
of "well, it sounds like it should speed things up" (mentioned by multiple
people).

To compound this, I have no intention of maintaining a second regex
implementation. If someone would like to step up and volunteer to maintain a
lean-and-mean implementation for grep, this is OK, but we have very few
volunteers to maintain even our primary regex implementation.
This commit is contained in:
Kyle Evans 2018-05-04 03:13:25 +00:00
parent 51688c129f
commit a2584d1b34
16 changed files with 3 additions and 1887 deletions

View File

@ -271,6 +271,7 @@ SCRIPTSGRP_${script:T}?= ${SCRIPTSGRP}
SCRIPTSMODE_${script:T}?= ${SCRIPTSMODE}
STAGE_AS_${script:T}= ${SCRIPTSDIR_${script:T}}/${SCRIPTSNAME_${script:T}}
_scriptsinstall: _SCRIPTSINS_${script:T}
echo ">SFD>F>DF YES"
_SCRIPTSINS_${script:T}: ${script}
${INSTALL} ${TAG_ARGS} -o ${SCRIPTSOWN_${.ALLSRC:T}} \
-g ${SCRIPTSGRP_${.ALLSRC:T}} -m ${SCRIPTSMODE_${.ALLSRC:T}} \

View File

@ -187,7 +187,6 @@ __DEFAULT_YES_OPTIONS = \
__DEFAULT_NO_OPTIONS = \
BSD_GREP \
BSD_GREP_FASTMATCH \
CLANG_EXTRAS \
DTRACE_TESTS \
GNU_GREP_COMPAT \

View File

@ -1,5 +0,0 @@
.\" $FreeBSD$
Set this option to exclude the fastmatch implementation from
.Xr bsdgrep 1 ,
instead using only
.Xr regex 3 .

View File

@ -1,3 +0,0 @@
.\" $FreeBSD$
Set this option to use the fastmatch implementation in
.Xr bsdgrep 1 .

View File

@ -17,15 +17,6 @@ bsdgrep.1: grep.1
.endif
SRCS= file.c grep.c queue.c util.c
.if ${MK_BSD_GREP_FASTMATCH} == "yes"
# Extra files ported backported for some regex improvements
.PATH: ${.CURDIR}/regex
SRCS+= fastmatch.c hashtable.c tre-compile.c tre-fastmatch.c
CFLAGS+=-I${.CURDIR}/regex
.else
CFLAGS+= -DWITHOUT_FASTMATCH
.endif
SCRIPTS= zgrep.sh
LINKS= ${BINDIR}/zgrep ${BINDIR}/zfgrep \
${BINDIR}/zgrep ${BINDIR}/zegrep \

View File

@ -51,9 +51,6 @@ __FBSDID("$FreeBSD$");
#include <string.h>
#include <unistd.h>
#ifndef WITHOUT_FASTMATCH
#include "fastmatch.h"
#endif
#include "grep.h"
#ifndef WITHOUT_NLS
@ -96,9 +93,6 @@ unsigned int patterns;
static unsigned int pattern_sz;
struct pat *pattern;
regex_t *r_pattern;
#ifndef WITHOUT_FASTMATCH
fastmatch_t *fg_pattern;
#endif
/* Filename exclusion/inclusion patterns */
unsigned int fpatterns, dpatterns;
@ -712,9 +706,6 @@ main(int argc, char *argv[])
usage();
}
#ifndef WITHOUT_FASTMATCH
fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
#endif
r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
/* Don't process any patterns if we have a blank one */
@ -725,15 +716,6 @@ main(int argc, char *argv[])
#endif
/* Check if cheating is allowed (always is for fgrep). */
for (i = 0; i < patterns; ++i) {
#ifndef WITHOUT_FASTMATCH
/*
* Attempt compilation with fastmatch regex and
* fallback to regex(3) if it fails.
*/
if (fastncomp(&fg_pattern[i], pattern[i].pat,
pattern[i].len, cflags) == 0)
continue;
#endif
c = regcomp(&r_pattern[i], pattern[i].pat, cflags);
if (c != 0) {
regerror(c, &r_pattern[i], re_error,

View File

@ -38,10 +38,6 @@
#include <stdio.h>
#include <zlib.h>
#ifndef WITHOUT_FASTMATCH
#include "fastmatch.h"
#endif
#ifdef WITHOUT_NLS
#define getstr(n) errstr[n]
#else
@ -131,9 +127,6 @@ extern unsigned int dpatterns, fpatterns, patterns;
extern struct pat *pattern;
extern struct epat *dpattern, *fpattern;
extern regex_t *er_pattern, *r_pattern;
#ifndef WITHOUT_FASTMATCH
extern fastmatch_t *fg_pattern;
#endif
/* For regex errors */
#define RE_ERROR_BUF 512

View File

@ -1,170 +0,0 @@
/* $FreeBSD$ */
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (C) 2011 Gabor Kovesdan <gabor@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "glue.h"
#include <errno.h>
#include <fastmatch.h>
#include <regex.h>
#include <string.h>
#include "tre-fastmatch.h"
int
tre_fixncomp(fastmatch_t *preg, const char *regex, size_t n, int cflags)
{
int ret;
tre_char_t *wregex;
size_t wlen;
if (n != 0)
{
ret = tre_convert_pattern(regex, n, &wregex, &wlen);
if (ret != REG_OK)
return ret;
else
ret = tre_compile_literal(preg, wregex, wlen, cflags);
tre_free_pattern(wregex);
return ret;
}
else
return tre_compile_literal(preg, NULL, 0, cflags);
}
int
tre_fastncomp(fastmatch_t *preg, const char *regex, size_t n, int cflags)
{
int ret;
tre_char_t *wregex;
size_t wlen;
if (n != 0)
{
ret = tre_convert_pattern(regex, n, &wregex, &wlen);
if (ret != REG_OK)
return ret;
else
ret = (cflags & REG_LITERAL)
? tre_compile_literal(preg, wregex, wlen, cflags)
: tre_compile_fast(preg, wregex, wlen, cflags);
tre_free_pattern(wregex);
return ret;
}
else
return tre_compile_literal(preg, NULL, 0, cflags);
}
int
tre_fixcomp(fastmatch_t *preg, const char *regex, int cflags)
{
return tre_fixncomp(preg, regex, regex ? strlen(regex) : 0, cflags);
}
int
tre_fastcomp(fastmatch_t *preg, const char *regex, int cflags)
{
return tre_fastncomp(preg, regex, regex ? strlen(regex) : 0, cflags);
}
int
tre_fixwncomp(fastmatch_t *preg, const wchar_t *regex, size_t n, int cflags)
{
return tre_compile_literal(preg, regex, n, cflags);
}
int
tre_fastwncomp(fastmatch_t *preg, const wchar_t *regex, size_t n, int cflags)
{
return (cflags & REG_LITERAL) ?
tre_compile_literal(preg, regex, n, cflags) :
tre_compile_fast(preg, regex, n, cflags);
}
int
tre_fixwcomp(fastmatch_t *preg, const wchar_t *regex, int cflags)
{
return tre_fixwncomp(preg, regex, regex ? tre_strlen(regex) : 0, cflags);
}
int
tre_fastwcomp(fastmatch_t *preg, const wchar_t *regex, int cflags)
{
return tre_fastwncomp(preg, regex, regex ? tre_strlen(regex) : 0, cflags);
}
void
tre_fastfree(fastmatch_t *preg)
{
tre_free_fast(preg);
}
int
tre_fastnexec(const fastmatch_t *preg, const char *string, size_t len,
size_t nmatch, regmatch_t pmatch[], int eflags)
{
tre_str_type_t type = (TRE_MB_CUR_MAX == 1) ? STR_BYTE : STR_MBS;
if (eflags & REG_STARTEND)
CALL_WITH_OFFSET(tre_match_fast(preg, &string[offset], slen,
type, nmatch, pmatch, eflags));
else
return tre_match_fast(preg, string, len, type, nmatch,
pmatch, eflags);
}
int
tre_fastexec(const fastmatch_t *preg, const char *string, size_t nmatch,
regmatch_t pmatch[], int eflags)
{
return tre_fastnexec(preg, string, (size_t)-1, nmatch, pmatch, eflags);
}
int
tre_fastwnexec(const fastmatch_t *preg, const wchar_t *string, size_t len,
size_t nmatch, regmatch_t pmatch[], int eflags)
{
tre_str_type_t type = STR_WIDE;
if (eflags & REG_STARTEND)
CALL_WITH_OFFSET(tre_match_fast(preg, &string[offset], slen,
type, nmatch, pmatch, eflags));
else
return tre_match_fast(preg, string, len, type, nmatch,
pmatch, eflags);
}
int
tre_fastwexec(const fastmatch_t *preg, const wchar_t *string,
size_t nmatch, regmatch_t pmatch[], int eflags)
{
return tre_fastwnexec(preg, string, (size_t)-1, nmatch, pmatch, eflags);
}

View File

@ -1,95 +0,0 @@
/* $FreeBSD$ */
#ifndef FASTMATCH_H
#define FASTMATCH_H 1
#include <limits.h>
#include <regex.h>
#include <stdbool.h>
#include <wchar.h>
typedef struct {
size_t wlen;
size_t len;
wchar_t *wpattern;
bool *wescmap;
unsigned int qsBc[UCHAR_MAX + 1];
unsigned int *bmGs;
char *pattern;
bool *escmap;
unsigned int defBc;
void *qsBc_table;
unsigned int *sbmGs;
const char *re_endp;
/* flags */
bool hasdot;
bool bol;
bool eol;
bool word;
bool icase;
bool newline;
bool nosub;
bool matchall;
bool reversed;
} fastmatch_t;
extern int
tre_fixcomp(fastmatch_t *preg, const char *regex, int cflags);
extern int
tre_fastcomp(fastmatch_t *preg, const char *regex, int cflags);
extern int
tre_fastexec(const fastmatch_t *preg, const char *string, size_t nmatch,
regmatch_t pmatch[], int eflags);
extern void
tre_fastfree(fastmatch_t *preg);
extern int
tre_fixwcomp(fastmatch_t *preg, const wchar_t *regex, int cflags);
extern int
tre_fastwcomp(fastmatch_t *preg, const wchar_t *regex, int cflags);
extern int
tre_fastwexec(const fastmatch_t *preg, const wchar_t *string,
size_t nmatch, regmatch_t pmatch[], int eflags);
/* Versions with a maximum length argument and therefore the capability to
handle null characters in the middle of the strings. */
extern int
tre_fixncomp(fastmatch_t *preg, const char *regex, size_t len, int cflags);
extern int
tre_fastncomp(fastmatch_t *preg, const char *regex, size_t len, int cflags);
extern int
tre_fastnexec(const fastmatch_t *preg, const char *string, size_t len,
size_t nmatch, regmatch_t pmatch[], int eflags);
extern int
tre_fixwncomp(fastmatch_t *preg, const wchar_t *regex, size_t len, int cflags);
extern int
tre_fastwncomp(fastmatch_t *preg, const wchar_t *regex, size_t len, int cflags);
extern int
tre_fastwnexec(const fastmatch_t *preg, const wchar_t *string, size_t len,
size_t nmatch, regmatch_t pmatch[], int eflags);
#define fixncomp tre_fixncomp
#define fastncomp tre_fastncomp
#define fixcomp tre_fixcomp
#define fastcomp tre_fastcomp
#define fixwncomp tre_fixwncomp
#define fastwncomp tre_fastwncomp
#define fixwcomp tre_fixwcomp
#define fastwcomp tre_fastwcomp
#define fastfree tre_fastfree
#define fastnexec tre_fastnexec
#define fastexec tre_fastexec
#define fastwnexec tre_fastwnexec
#define fastwexec tre_fastwexec
#endif /* FASTMATCH_H */

View File

@ -1,67 +0,0 @@
/* $FreeBSD$ */
#ifndef GLUE_H
#define GLUE_H
#include <limits.h>
#undef RE_DUP_MAX
#include <regex.h>
#include <stdio.h>
#include <stdlib.h>
#define TRE_WCHAR 1
#define TRE_MULTIBYTE 1
#define HAVE_MBSTATE_T 1
#define TRE_CHAR(n) L##n
#define CHF "%lc"
#define tre_char_t wchar_t
#define tre_mbrtowc(pwc, s, n, ps) (mbrtowc((pwc), (s), (n), (ps)))
#define tre_strlen wcslen
#define tre_isspace iswspace
#define tre_isalnum iswalnum
#define REG_OK 0
#define REG_LITERAL 0020
#define REG_WORD 0100
#define REG_GNU 0400
#define TRE_MB_CUR_MAX MB_CUR_MAX
#ifndef _GREP_DEBUG
#define DPRINT(msg)
#else
#define DPRINT(msg) do {printf msg; fflush(stdout);} while(/*CONSTCOND*/0)
#endif
#define MIN(a,b) ((a > b) ? (b) : (a))
#define MAX(a,b) ((a > b) ? (a) : (b))
typedef enum { STR_WIDE, STR_BYTE, STR_MBS, STR_USER } tre_str_type_t;
#define CALL_WITH_OFFSET(fn) \
do \
{ \
size_t slen = (size_t)(pmatch[0].rm_eo - pmatch[0].rm_so); \
size_t offset = pmatch[0].rm_so; \
int ret; \
\
if ((long long)pmatch[0].rm_eo - pmatch[0].rm_so < 0) \
return REG_NOMATCH; \
ret = fn; \
for (unsigned i = 0; (!preg->nosub && (i < nmatch)); i++) \
{ \
pmatch[i].rm_so += offset; \
pmatch[i].rm_eo += offset; \
} \
return ret; \
} while (0 /*CONSTCOND*/)
int
tre_convert_pattern(const char *regex, size_t n, tre_char_t **w,
size_t *wn);
void
tre_free_pattern(tre_char_t *wregex);
#endif

View File

@ -1,270 +0,0 @@
/* $FreeBSD$ */
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (C) 2011 Gabor Kovesdan <gabor@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "glue.h"
#include <errno.h>
#include <inttypes.h>
#include <stdlib.h>
#include <string.h>
#include "hashtable.h"
/*
* Return a 32-bit hash of the given buffer. The init
* value should be 0, or the previous hash value to extend
* the previous hash.
*/
static uint32_t
hash32_buf(const void *buf, size_t len, uint32_t hash)
{
const unsigned char *p = buf;
while (len--)
hash = HASHSTEP(hash, *p++);
return hash;
}
/*
* Initializes a hash table that can hold table_size number of entries,
* each of which has a key of key_size bytes and a value of value_size
* bytes. On successful allocation returns a pointer to the hash table.
* Otherwise, returns NULL and sets errno to indicate the error.
*/
hashtable
*hashtable_init(size_t table_size, size_t key_size, size_t value_size)
{
hashtable *tbl;
DPRINT(("hashtable_init: table_size %zu, key_size %zu, value_size %zu\n",
table_size, key_size, value_size));
tbl = malloc(sizeof(hashtable));
if (tbl == NULL)
goto mem1;
tbl->entries = calloc(sizeof(hashtable_entry *), table_size);
if (tbl->entries == NULL)
goto mem2;
tbl->table_size = table_size;
tbl->usage = 0;
tbl->key_size = key_size;
tbl->value_size = value_size;
return (tbl);
mem2:
free(tbl);
mem1:
DPRINT(("hashtable_init: allocation failed\n"));
errno = ENOMEM;
return (NULL);
}
/*
* Places the key-value pair to the hashtable tbl.
* Returns:
* HASH_OK: if the key was not present in the hash table yet
* but the kay-value pair has been successfully added.
* HASH_UPDATED: if the value for the key has been updated with the
* new value.
* HASH_FULL: if the hash table is full and the entry could not
* be added.
* HASH_FAIL: if an error has occurred and errno has been set to
* indicate the error.
*/
int
hashtable_put(hashtable *tbl, const void *key, const void *value)
{
uint32_t hash = 0;
if (tbl->table_size == tbl->usage)
{
DPRINT(("hashtable_put: hashtable is full\n"));
return (HASH_FULL);
}
hash = hash32_buf(key, tbl->key_size, hash) % tbl->table_size;
DPRINT(("hashtable_put: calculated hash %" PRIu32 "\n", hash));
/*
* On hash collision entries are inserted at the next free space,
* so we have to increase the index until we either find an entry
* with the same key (and update it) or we find a free space.
*/
for(;;)
{
if (tbl->entries[hash] == NULL)
break;
else if (memcmp(tbl->entries[hash]->key, key, tbl->key_size) == 0)
{
memcpy(tbl->entries[hash]->value, value, tbl->value_size);
DPRINT(("hashtable_put: effective location is %" PRIu32
", entry updated\n", hash));
return (HASH_UPDATED);
}
if (++hash == tbl->table_size)
hash = 0;
}
DPRINT(("hashtable_put: effective location is %" PRIu32 "\n", hash));
tbl->entries[hash] = malloc(sizeof(hashtable_entry));
if (tbl->entries[hash] == NULL)
{
errno = ENOMEM;
goto mem1;
}
tbl->entries[hash]->key = malloc(tbl->key_size);
if (tbl->entries[hash]->key == NULL)
{
errno = ENOMEM;
goto mem2;
}
tbl->entries[hash]->value = malloc(tbl->value_size);
if (tbl->entries[hash]->value == NULL)
{
errno = ENOMEM;
goto mem3;
}
memcpy(tbl->entries[hash]->key, key, tbl->key_size);
memcpy(tbl->entries[hash]->value, value, tbl->value_size);
tbl->usage++;
DPRINT(("hashtable_put: entry successfully inserted\n"));
return (HASH_OK);
mem3:
free(tbl->entries[hash]->key);
mem2:
free(tbl->entries[hash]);
mem1:
DPRINT(("hashtable_put: insertion failed\n"));
return (HASH_FAIL);
}
static hashtable_entry
**hashtable_lookup(const hashtable *tbl, const void *key)
{
uint32_t hash = 0;
hash = hash32_buf(key, tbl->key_size, hash) % tbl->table_size;
for (;;)
{
if (tbl->entries[hash] == NULL)
return (NULL);
else if (memcmp(key, tbl->entries[hash]->key, tbl->key_size) == 0)
{
DPRINT(("hashtable_lookup: entry found at location %" PRIu32 "\n", hash));
return (&tbl->entries[hash]);
}
if (++hash == tbl->table_size)
hash = 0;
}
}
/*
* Retrieves the value for key from the hash table tbl and places
* it to the space indicated by the value argument.
* Returns HASH_OK if the value has been found and retrieved or
* HASH_NOTFOUND otherwise.
*/
int
hashtable_get(hashtable *tbl, const void *key, void *value)
{
hashtable_entry **entry;
entry = hashtable_lookup(tbl, key);
if (entry == NULL)
{
DPRINT(("hashtable_get: entry is not available in the hashtable\n"));
return (HASH_NOTFOUND);
}
memcpy(value, (*entry)->value, tbl->value_size);
DPRINT(("hashtable_get: entry successfully copied into output buffer\n"));
return (HASH_OK);
}
/*
* Removes the entry with the specifified key from the hash table
* tbl. Returns HASH_OK if the entry has been found and removed
* or HASH_NOTFOUND otherwise.
*/
int
hashtable_remove(hashtable *tbl, const void *key)
{
hashtable_entry **entry;
entry = hashtable_lookup(tbl, key);
if (entry == NULL)
{
DPRINT(("hashtable_remove: entry is not available in the hashtable\n"));
return (HASH_NOTFOUND);
}
free((*entry)->key);
free((*entry)->value);
free(*entry);
*entry = NULL;
tbl->usage--;
DPRINT(("hashtable_remove: entry successfully removed\n"));
return (HASH_OK);
}
/*
* Frees the resources associated with the hash table tbl.
*/
void
hashtable_free(hashtable *tbl)
{
if (tbl == NULL)
return;
for (unsigned int i = 0; i < tbl->table_size; i++)
if ((tbl->entries[i] != NULL))
{
free(tbl->entries[i]->key);
free(tbl->entries[i]->value);
}
free(tbl->entries);
DPRINT(("hashtable_free: resources are successfully freed\n"));
}

View File

@ -1,35 +0,0 @@
/* $FreeBSD$ */
#ifndef HASHTABLE_H
#define HASHTABLE_H 1
#include <sys/types.h>
#define HASH_OK 0
#define HASH_UPDATED 1
#define HASH_FAIL 2
#define HASH_FULL 3
#define HASH_NOTFOUND 4
#define HASHSTEP(x,c) (((x << 5) + x) + (c))
typedef struct {
void *key;
void *value;
} hashtable_entry;
typedef struct {
size_t key_size;
size_t table_size;
size_t usage;
size_t value_size;
hashtable_entry **entries;
} hashtable;
void hashtable_free(hashtable *);
int hashtable_get(hashtable *, const void *, void *);
hashtable *hashtable_init(size_t, size_t, size_t);
int hashtable_put(hashtable *, const void *, const void *);
int hashtable_remove(hashtable *, const void *);
#endif /* HASHTABLE.H */

View File

@ -1,101 +0,0 @@
/* $FreeBSD$ */
#include "glue.h"
#include <stdio.h>
#include <assert.h>
#include <errno.h>
#include <regex.h>
#include <string.h>
#include <wchar.h>
int
tre_convert_pattern(const char *regex, size_t n, tre_char_t **w,
size_t *wn)
{
#if TRE_WCHAR
tre_char_t *wregex;
size_t wlen;
wregex = malloc(sizeof(tre_char_t) * (n + 1));
if (wregex == NULL)
return REG_ESPACE;
/* If the current locale uses the standard single byte encoding of
characters, we don't do a multibyte string conversion. If we did,
many applications which use the default locale would break since
the default "C" locale uses the 7-bit ASCII character set, and
all characters with the eighth bit set would be considered invalid. */
#if TRE_MULTIBYTE
if (TRE_MB_CUR_MAX == 1)
#endif /* TRE_MULTIBYTE */
{
unsigned int i;
const unsigned char *str = (const unsigned char *)regex;
tre_char_t *wstr = wregex;
for (i = 0; i < n; i++)
*(wstr++) = *(str++);
wlen = n;
}
#if TRE_MULTIBYTE
else
{
int consumed;
tre_char_t *wcptr = wregex;
#ifdef HAVE_MBSTATE_T
mbstate_t state;
memset(&state, '\0', sizeof(state));
#endif /* HAVE_MBSTATE_T */
while (n > 0)
{
consumed = tre_mbrtowc(wcptr, regex, n, &state);
switch (consumed)
{
case 0:
if (*regex == '\0')
consumed = 1;
else
{
free(wregex);
return REG_BADPAT;
}
break;
case -1:
DPRINT(("mbrtowc: error %d: %s.\n", errno, strerror(errno)));
free(wregex);
return REG_BADPAT;
case -2:
/* The last character wasn't complete. Let's not call it a
fatal error. */
consumed = n;
break;
}
regex += consumed;
n -= consumed;
wcptr++;
}
wlen = wcptr - wregex;
}
#endif /* TRE_MULTIBYTE */
wregex[wlen] = L'\0';
*w = wregex;
*wn = wlen;
return REG_OK;
#else /* !TRE_WCHAR */
{
*w = (tre_char_t * const *)regex;
*wn = n;
return REG_OK;
}
#endif /* !TRE_WCHAR */
}
void
tre_free_pattern(tre_char_t *wregex)
{
#if TRE_WCHAR
free(wregex);
#endif
}

File diff suppressed because it is too large Load Diff

View File

@ -1,21 +0,0 @@
/* $FreeBSD$ */
#ifndef TRE_FASTMATCH_H
#define TRE_FASTMATCH_H 1
#include <fastmatch.h>
#include <hashtable.h>
#include <limits.h>
#include <regex.h>
#include <stdbool.h>
#include "hashtable.h"
int tre_compile_literal(fastmatch_t *preg, const tre_char_t *regex,
size_t, int);
int tre_compile_fast(fastmatch_t *preg, const tre_char_t *regex, size_t, int);
int tre_match_fast(const fastmatch_t *fg, const void *data, size_t len,
tre_str_type_t type, int nmatch, regmatch_t pmatch[], int eflags);
void tre_free_fast(fastmatch_t *preg);
#endif /* TRE_FASTMATCH_H */

View File

@ -52,9 +52,6 @@ __FBSDID("$FreeBSD$");
#include <wchar.h>
#include <wctype.h>
#ifndef WITHOUT_FASTMATCH
#include "fastmatch.h"
#endif
#include "grep.h"
static bool first_match = true;
@ -512,14 +509,8 @@ procline(struct parsec *pc)
r = litexec(&pattern[i], pc->ln.dat, 1, &pmatch);
else
#endif
#ifndef WITHOUT_FASTMATCH
if (fg_pattern[i].pattern)
r = fastexec(&fg_pattern[i],
pc->ln.dat, 1, &pmatch, leflags);
else
#endif
r = regexec(&r_pattern[i], pc->ln.dat, 1,
&pmatch, leflags);
r = regexec(&r_pattern[i], pc->ln.dat, 1, &pmatch,
leflags);
if (r != 0)
continue;
/* Check for full match */
@ -527,11 +518,7 @@ procline(struct parsec *pc)
(size_t)pmatch.rm_eo != pc->ln.len))
continue;
/* Check for whole word match */
#ifndef WITHOUT_FASTMATCH
if (wflag || fg_pattern[i].word) {
#else
if (wflag) {
#endif
wbegin = wend = L' ';
if (pmatch.rm_so != 0 &&
sscanf(&pc->ln.dat[pmatch.rm_so - 1],