sh: Add UTF-8 support to ${#var}.
If the current locale uses UTF-8, ${#var} counts codepoints (more precisely, bytes b with (b & 0xc0) != 0x80).
This commit is contained in:
parent
11604ee85b
commit
8bbce85526
@ -665,6 +665,7 @@ evalvar(char *p, int flag)
|
||||
int special;
|
||||
int startloc;
|
||||
int varlen;
|
||||
int varlenb;
|
||||
int easy;
|
||||
int quotes = flag & (EXP_FULL | EXP_CASE | EXP_REDIR);
|
||||
|
||||
@ -712,8 +713,15 @@ again: /* jump here after setting a variable with ${var=text} */
|
||||
if (special) {
|
||||
varvalue(var, varflags & VSQUOTE, subtype, flag);
|
||||
if (subtype == VSLENGTH) {
|
||||
varlen = expdest - stackblock() - startloc;
|
||||
STADJUST(-varlen, expdest);
|
||||
varlenb = expdest - stackblock() - startloc;
|
||||
varlen = varlenb;
|
||||
if (localeisutf8) {
|
||||
val = stackblock() + startloc;
|
||||
for (;val != expdest; val++)
|
||||
if ((*val & 0xC0) == 0x80)
|
||||
varlen--;
|
||||
}
|
||||
STADJUST(-varlenb, expdest);
|
||||
}
|
||||
} else {
|
||||
char const *syntax = (varflags & VSQUOTE) ? DQSYNTAX
|
||||
@ -721,7 +729,9 @@ again: /* jump here after setting a variable with ${var=text} */
|
||||
|
||||
if (subtype == VSLENGTH) {
|
||||
for (;*val; val++)
|
||||
varlen++;
|
||||
if (!localeisutf8 ||
|
||||
(*val & 0xC0) != 0x80)
|
||||
varlen++;
|
||||
}
|
||||
else {
|
||||
if (quotes)
|
||||
|
14
tools/regression/bin/sh/expansion/length7.0
Normal file
14
tools/regression/bin/sh/expansion/length7.0
Normal file
@ -0,0 +1,14 @@
|
||||
# $FreeBSD$
|
||||
|
||||
unset LC_ALL
|
||||
LC_CTYPE=en_US.UTF-8
|
||||
export LC_CTYPE
|
||||
|
||||
# a umlaut
|
||||
s=$(printf '\303\244')
|
||||
# euro sign
|
||||
s=$s$(printf '\342\202\254')
|
||||
# some sort of 't' outside BMP
|
||||
s=$s$(printf '\360\235\225\245')
|
||||
set -- "$s"
|
||||
[ ${#s} = 3 ] && [ ${#1} = 3 ]
|
14
tools/regression/bin/sh/expansion/length8.0
Normal file
14
tools/regression/bin/sh/expansion/length8.0
Normal file
@ -0,0 +1,14 @@
|
||||
# $FreeBSD$
|
||||
|
||||
unset LC_ALL
|
||||
LC_CTYPE=en_US.ISO8859-1
|
||||
export LC_CTYPE
|
||||
|
||||
# a umlaut
|
||||
s=$(printf '\303\244')
|
||||
# euro sign
|
||||
s=$s$(printf '\342\202\254')
|
||||
# some sort of 't' outside BMP
|
||||
s=$s$(printf '\360\235\225\245')
|
||||
set -- "$s"
|
||||
[ ${#s} = 9 ] && [ ${#1} = 9 ]
|
Loading…
x
Reference in New Issue
Block a user