sh: Add support for named character classes in bracket expressions.

Example:
  case x in [[:alpha:]]) echo yes ;; esac
This commit is contained in:
Jilles Tjoelker 2011-06-15 21:48:10 +00:00
parent d2c60314e4
commit ff4dc67299
3 changed files with 80 additions and 2 deletions

View File

@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$");
#include <string.h>
#include <unistd.h>
#include <wchar.h>
#include <wctype.h>
/*
* Routines to expand arguments to commands. We have to deal with
@ -1400,6 +1401,36 @@ get_wc(const char **p)
}
/*
* See if a character matches a character class, starting at the first colon
* of "[:class:]".
* If a valid character class is recognized, a pointer to the next character
* after the final closing bracket is stored into *end, otherwise a null
* pointer is stored into *end.
*/
static int
match_charclass(const char *p, wchar_t chr, const char **end)
{
char name[20];
const char *nameend;
wctype_t cclass;
*end = NULL;
p++;
nameend = strstr(p, ":]");
if (nameend == NULL || nameend - p >= sizeof(name) || nameend == p)
return 0;
memcpy(name, p, nameend - p);
name[nameend - p] = '\0';
*end = nameend + 2;
cclass = wctype(name);
/* An unknown class matches nothing but is valid nevertheless. */
if (cclass == 0)
return 0;
return iswctype(chr, cclass);
}
/*
* Returns true if the pattern matches the string.
*/
@ -1407,7 +1438,7 @@ get_wc(const char **p)
int
patmatch(const char *pattern, const char *string, int squoted)
{
const char *p, *q;
const char *p, *q, *end;
char c;
wchar_t wc, wc2;
@ -1495,6 +1526,11 @@ patmatch(const char *pattern, const char *string, int squoted)
do {
if (c == CTLQUOTEMARK)
continue;
if (c == '[' && *p == ':') {
found |= match_charclass(p, chr, &end);
if (end != NULL)
p = end;
}
if (c == CTLESC)
c = *p++;
if (localeisutf8 && c & 0x80) {

View File

@ -32,7 +32,7 @@
.\" from: @(#)sh.1 8.6 (Berkeley) 5/4/95
.\" $FreeBSD$
.\"
.Dd June 12, 2011
.Dd June 15, 2011
.Dt SH 1
.Os
.Sh NAME
@ -1648,6 +1648,15 @@ matches a
rather than introducing a character class.
A character class matches any of the characters between the square brackets.
A range of characters may be specified using a minus sign.
A named class of characters (see
.Xr wctype 3 )
may be specified by surrounding the name with
.Ql \&[:
and
.Ql :\&] .
For example,
.Ql \&[\&[:alpha:\&]\&]
is a shell pattern that matches a single letter.
The character class may be complemented by making an exclamation point
.Pq Ql !\&
the first character of the character class.
@ -2572,6 +2581,7 @@ will return the argument.
.Xr execve 2 ,
.Xr getrlimit 2 ,
.Xr umask 2 ,
.Xr wctype 3 ,
.Xr editrc 5
.Sh HISTORY
A

View File

@ -0,0 +1,32 @@
# $FreeBSD$
case aZ_ in
[[:alpha:]_][[:upper:]_][[:alpha:]_]) ;;
*) echo Failed at $LINENO ;;
esac
case ' ' in
[[:alpha:][:digit:]]) echo Failed at $LINENO ;;
[![:alpha:][:digit:]]) ;;
*) echo Failed at $LINENO ;;
esac
case '.X.' in
*[[:lower:]]*) echo Failed at $LINENO ;;
*[[:upper:]]*) ;;
*) echo Failed at $LINENO ;;
esac
case ' ' in
[![:print:]]) echo Failed at $LINENO ;;
[![:alnum:][:punct:]]) ;;
*) echo Failed at $LINENO ;;
esac
case '
' in
[[:print:]]) echo Failed at $LINENO ;;
['
'[:digit:]]) ;;
*) echo Failed at $LINENO ;;
esac