sh: Add UTF-8 support to ${#var}.

If the current locale uses UTF-8, ${#var} counts codepoints (more precisely,
bytes b with (b & 0xc0) != 0x80).
This commit is contained in:
Jilles Tjoelker 2011-05-07 14:32:16 +00:00
parent 4fe5d78a0f
commit 4c244ed255
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=221602
3 changed files with 41 additions and 3 deletions

View File

@ -665,6 +665,7 @@ evalvar(char *p, int flag)
int special;
int startloc;
int varlen;
int varlenb;
int easy;
int quotes = flag & (EXP_FULL | EXP_CASE | EXP_REDIR);
@ -712,8 +713,15 @@ evalvar(char *p, int flag)
if (special) {
varvalue(var, varflags & VSQUOTE, subtype, flag);
if (subtype == VSLENGTH) {
varlen = expdest - stackblock() - startloc;
STADJUST(-varlen, expdest);
varlenb = expdest - stackblock() - startloc;
varlen = varlenb;
if (localeisutf8) {
val = stackblock() + startloc;
for (;val != expdest; val++)
if ((*val & 0xC0) == 0x80)
varlen--;
}
STADJUST(-varlenb, expdest);
}
} else {
char const *syntax = (varflags & VSQUOTE) ? DQSYNTAX
@ -721,7 +729,9 @@ evalvar(char *p, int flag)
if (subtype == VSLENGTH) {
for (;*val; val++)
varlen++;
if (!localeisutf8 ||
(*val & 0xC0) != 0x80)
varlen++;
}
else {
if (quotes)

View File

@ -0,0 +1,14 @@
# $FreeBSD$
unset LC_ALL
LC_CTYPE=en_US.UTF-8
export LC_CTYPE
# a umlaut
s=$(printf '\303\244')
# euro sign
s=$s$(printf '\342\202\254')
# some sort of 't' outside BMP
s=$s$(printf '\360\235\225\245')
set -- "$s"
[ ${#s} = 3 ] && [ ${#1} = 3 ]

View File

@ -0,0 +1,14 @@
# $FreeBSD$
unset LC_ALL
LC_CTYPE=en_US.ISO8859-1
export LC_CTYPE
# a umlaut
s=$(printf '\303\244')
# euro sign
s=$s$(printf '\342\202\254')
# some sort of 't' outside BMP
s=$s$(printf '\360\235\225\245')
set -- "$s"
[ ${#s} = 9 ] && [ ${#1} = 9 ]