This fixes the obscure endless loop seen with case-insensitive patterns containing characters in 128-255 range; originally found running GNU grep test suite. Our regex implementation being kludgy translates the characters in case-insensitive pattern to bracket expression containing both cases for the character and doesn't correctly handle the case when original character is in bitmap and the other case is not, falling into the endless loop going through in p_bracket(), ordinary(), and bothcases(). Reducing the bitmap to 0-127 range for multibyte locales solves this as none of these characters have other case mapping outside of bitmap. We are also safe in the case when the original character outside of bitmap has other case mapping in the bitmap (there are several of those in our current ctype maps having unidirectional mapping into bitmap). Reviewed by: bapt, kevans, pfg Differential revision: https://reviews.freebsd.org/D18302
54 lines
1.2 KiB
Bash
Executable File
54 lines
1.2 KiB
Bash
Executable File
# $FreeBSD$
|
|
|
|
atf_test_case bmpat
|
|
bmpat_head()
|
|
{
|
|
atf_set "descr" "Check matching multibyte characters (PR153502)"
|
|
}
|
|
bmpat_body()
|
|
{
|
|
export LC_CTYPE="C.UTF-8"
|
|
|
|
printf 'é' | atf_check -o "inline:é" \
|
|
sed -ne '/^.$/p'
|
|
printf 'éé' | atf_check -o "inline:éé" \
|
|
sed -ne '/^..$/p'
|
|
printf 'aéa' | atf_check -o "inline:aéa" \
|
|
sed -ne '/a.a/p'
|
|
printf 'aéa'| atf_check -o "inline:aéa" \
|
|
sed -ne '/a.*a/p'
|
|
printf 'aaéaa' | atf_check -o "inline:aaéaa" \
|
|
sed -ne '/aa.aa/p'
|
|
printf 'aéaéa' | atf_check -o "inline:aéaéa" \
|
|
sed -ne '/a.a.a/p'
|
|
printf 'éa' | atf_check -o "inline:éa" \
|
|
sed -ne '/.a/p'
|
|
printf 'aéaa' | atf_check -o "inline:aéaa" \
|
|
sed -ne '/a.aa/p'
|
|
printf 'éaé' | atf_check -o "inline:éaé" \
|
|
sed -ne '/.a./p'
|
|
}
|
|
|
|
atf_test_case icase
|
|
icase_head()
|
|
{
|
|
atf_set "descr" "Check case-insensitive matching for characters 128-255"
|
|
}
|
|
icase_body()
|
|
{
|
|
export LC_CTYPE="C.UTF-8"
|
|
|
|
a=$(printf '\302\265\n') # U+00B5
|
|
b=$(printf '\316\234\n') # U+039C
|
|
c=$(printf '\316\274\n') # U+03BC
|
|
|
|
echo $b | atf_check -o "inline:$b\n" sed -ne "/$a/Ip"
|
|
echo $c | atf_check -o "inline:$c\n" sed -ne "/$a/Ip"
|
|
}
|
|
|
|
atf_init_test_cases()
|
|
{
|
|
atf_add_test_case bmpat
|
|
atf_add_test_case icase
|
|
}
|