From ff4dc67299e62f35d32dfde8915148a86069bd31 Mon Sep 17 00:00:00 2001 From: Jilles Tjoelker Date: Wed, 15 Jun 2011 21:48:10 +0000 Subject: [PATCH] sh: Add support for named character classes in bracket expressions. Example: case x in [[:alpha:]]) echo yes ;; esac --- bin/sh/expand.c | 38 +++++++++++++++++++++++- bin/sh/sh.1 | 12 +++++++- tools/regression/bin/sh/builtins/case8.0 | 32 ++++++++++++++++++++ 3 files changed, 80 insertions(+), 2 deletions(-) create mode 100644 tools/regression/bin/sh/builtins/case8.0 diff --git a/bin/sh/expand.c b/bin/sh/expand.c index 95a0d6ad15d0..ea8d78d26b83 100644 --- a/bin/sh/expand.c +++ b/bin/sh/expand.c @@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include /* * Routines to expand arguments to commands. We have to deal with @@ -1400,6 +1401,36 @@ get_wc(const char **p) } +/* + * See if a character matches a character class, starting at the first colon + * of "[:class:]". + * If a valid character class is recognized, a pointer to the next character + * after the final closing bracket is stored into *end, otherwise a null + * pointer is stored into *end. + */ +static int +match_charclass(const char *p, wchar_t chr, const char **end) +{ + char name[20]; + const char *nameend; + wctype_t cclass; + + *end = NULL; + p++; + nameend = strstr(p, ":]"); + if (nameend == NULL || nameend - p >= sizeof(name) || nameend == p) + return 0; + memcpy(name, p, nameend - p); + name[nameend - p] = '\0'; + *end = nameend + 2; + cclass = wctype(name); + /* An unknown class matches nothing but is valid nevertheless. */ + if (cclass == 0) + return 0; + return iswctype(chr, cclass); +} + + /* * Returns true if the pattern matches the string. */ @@ -1407,7 +1438,7 @@ get_wc(const char **p) int patmatch(const char *pattern, const char *string, int squoted) { - const char *p, *q; + const char *p, *q, *end; char c; wchar_t wc, wc2; @@ -1495,6 +1526,11 @@ patmatch(const char *pattern, const char *string, int squoted) do { if (c == CTLQUOTEMARK) continue; + if (c == '[' && *p == ':') { + found |= match_charclass(p, chr, &end); + if (end != NULL) + p = end; + } if (c == CTLESC) c = *p++; if (localeisutf8 && c & 0x80) { diff --git a/bin/sh/sh.1 b/bin/sh/sh.1 index 3b7f3d33462e..67ffcc6bcc25 100644 --- a/bin/sh/sh.1 +++ b/bin/sh/sh.1 @@ -32,7 +32,7 @@ .\" from: @(#)sh.1 8.6 (Berkeley) 5/4/95 .\" $FreeBSD$ .\" -.Dd June 12, 2011 +.Dd June 15, 2011 .Dt SH 1 .Os .Sh NAME @@ -1648,6 +1648,15 @@ matches a rather than introducing a character class. A character class matches any of the characters between the square brackets. A range of characters may be specified using a minus sign. +A named class of characters (see +.Xr wctype 3 ) +may be specified by surrounding the name with +.Ql \&[: +and +.Ql :\&] . +For example, +.Ql \&[\&[:alpha:\&]\&] +is a shell pattern that matches a single letter. The character class may be complemented by making an exclamation point .Pq Ql !\& the first character of the character class. @@ -2572,6 +2581,7 @@ will return the argument. .Xr execve 2 , .Xr getrlimit 2 , .Xr umask 2 , +.Xr wctype 3 , .Xr editrc 5 .Sh HISTORY A diff --git a/tools/regression/bin/sh/builtins/case8.0 b/tools/regression/bin/sh/builtins/case8.0 new file mode 100644 index 000000000000..8d9f8b604d88 --- /dev/null +++ b/tools/regression/bin/sh/builtins/case8.0 @@ -0,0 +1,32 @@ +# $FreeBSD$ + +case aZ_ in +[[:alpha:]_][[:upper:]_][[:alpha:]_]) ;; +*) echo Failed at $LINENO ;; +esac + +case ' ' in +[[:alpha:][:digit:]]) echo Failed at $LINENO ;; +[![:alpha:][:digit:]]) ;; +*) echo Failed at $LINENO ;; +esac + +case '.X.' in +*[[:lower:]]*) echo Failed at $LINENO ;; +*[[:upper:]]*) ;; +*) echo Failed at $LINENO ;; +esac + +case ' ' in +[![:print:]]) echo Failed at $LINENO ;; +[![:alnum:][:punct:]]) ;; +*) echo Failed at $LINENO ;; +esac + +case ' +' in +[[:print:]]) echo Failed at $LINENO ;; +[' +'[:digit:]]) ;; +*) echo Failed at $LINENO ;; +esac