freebsd-dev/usr.bin/wc/tests/wc_test.sh
Dag-Erling Smørgrav 5c870e1b48 wc: Improve test coverage.
* Add a test that causes mbrtowc() to return 0.

* Add a test that causes mbrtowc() to return -2.

Sponsored by:	Klara, Inc.
Reviewed by:	emaste
Differential Revision:	https://reviews.freebsd.org/D38605
2023-02-16 00:40:45 +00:00

249 lines
5.1 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#
# Copyright (c) 2023 Klara, Inc.
#
# SPDX-License-Identifier: BSD-2-Clause
#
#
# These tests need to run in a multibyte locale with non-localized
# error messages.
#
export LC_CTYPE=C.UTF-8
export LC_MESSAGES=C
#
# Size of wc's read buffer.
#
MAXBSIZE=65536
#
# Sample text containing multibyte characters
#
tv="Der bode en underlig gråsprængt en
på den yderste nøgne ø;
han gjorde visst intet menneske mén
hverken på land eller sjø;
dog stundom gnistred hans øjne stygt,
helst mod uroligt vejr,
og da mente folk, at han var forrykt,
og da var der få, som uden frykt
kom Terje Vigen nær.
"
tvl=10
tvw=55
tvc=300
tvm=283
tvcL=42
tvmL=39
#
# Run a series of tests using the same input file. The first argument
# is the name of the file. The next three are the expected line,
# word, and byte counts. The optional fifth is the expected character
# count; if not provided, it is expected to be identical to the byte
# count.
#
atf_check_wc() {
local file="$1"
local l="$2"
local w="$3"
local c="$4"
local m="${5-$4}"
atf_check -o match:"^ +${l} +${w} +${c}\$" wc <"${file}"
atf_check -o match:"^ +${l}\$" wc -l <"${file}"
atf_check -o match:"^ +${w}\$" wc -w <"${file}"
atf_check -o match:"^ +${c}\$" wc -c <"${file}"
atf_check -o match:"^ +${m}\$" wc -m <"${file}"
atf_check -o match:"^ +${l} +${w} +${c} ${file}\$" wc "$file"
atf_check -o match:"^ +${l} ${file}\$" wc -l "$file"
atf_check -o match:"^ +${w} ${file}\$" wc -w "$file"
atf_check -o match:"^ +${c} ${file}\$" wc -c "$file"
atf_check -o match:"^ +${m} ${file}\$" wc -m "$file"
}
atf_test_case basic
basic_head()
{
atf_set "descr" "Basic test case"
}
basic_body()
{
printf "a b\n" >foo
atf_check_wc foo 1 2 4
}
atf_test_case blank
blank_head()
{
atf_set "descr" "Input containing only blank lines"
}
blank_body()
{
printf "\n\n\n" >foo
atf_check_wc foo 3 0 3
}
atf_test_case empty
empty_head()
{
atf_set "descr" "Empty input"
}
empty_body()
{
printf "" >foo
atf_check_wc foo 0 0 0
}
atf_test_case invalid
invalid_head()
{
atf_set "descr" "Invalid multibyte input"
}
invalid_body()
{
printf "a\377b\n" >foo
atf_check \
-e match:"Illegal byte sequence" \
-o match:"^ +4 foo$" \
wc -m foo
}
atf_test_case multiline
multiline_head()
{
atf_set "descr" "Multiline, multibyte input"
}
multiline_body()
{
printf "%s\n" "$tv" >foo
atf_check_wc foo $tvl $tvw $tvc $tvm
# longest line in bytes
atf_check -o match:"^ +$tvc +$tvcL foo" wc -cL foo
atf_check -o match:"^ +$tvc +$tvcL" wc -cL <foo
# longest line in characters
atf_check -o match:"^ +$tvm +$tvmL foo" wc -mL foo
atf_check -o match:"^ +$tvm +$tvmL" wc -mL <foo
}
atf_test_case multiline_repeated
multiline_repeated_head()
{
atf_set "descr" "Multiline input exceeding the input buffer size"
}
multiline_repeated_body()
{
local c=0
while [ $c -lt 1000 ] ; do
printf "%1\$s\n%1\$s\n%1\$s\n%1\$s\n%1\$s\n" "$tv"
c=$((c+5))
done >foo
atf_check_wc foo $((tvl*c)) $((tvw*c)) $((tvc*c)) $((tvm*c))
}
atf_test_case nul
nul_head()
{
atf_set "descr" "Input containing NUL"
}
nul_body()
{
printf "a\0b\n" >foo
atf_check_wc foo 1 1 4
}
atf_test_case poop
poop_head()
{
atf_set "descr" "Multibyte sequence across buffer boundary"
}
poop_body()
{
local l=0 w=0 c=0 m=0
# The code below produces a stream of 4-byte UTF-8 sequences
# aligned on 5-byte boundaries, ensuring that the first full
# read of length MAXBSIZE will end in a partial sequence —
# unless MAXBSIZE is a multiple of 5 (not possible since it's
# a power of 2) or one less than a multiple of 5 (e.g. 2^18 =
# 262,144 = (52429 * 5) - 1) in which case we prepend a single
# newline to push our sequence out of phase.
atf_check_not_equal 0 $((MAXBSIZE % 5))
:>foo
if [ $((MAXBSIZE % 5)) -eq 4 ] ; then
printf "\n"
l=$((l + 1))
c=$((c + 1))
m=$((m + 1))
fi >>foo
while [ $c -le $MAXBSIZE ] ; do
printf "💩.💩.💩.💩.💩.💩.💩.💩.💩.💩.💩.💩.💩.💩.💩.💩\n"
l=$((l + 1))
w=$((w + 1))
c=$((c + 80)) # 80 bytes
m=$((m + 32)) # 32 multibyte characters
done >>foo
atf_check_wc foo $l $w $c $m
}
atf_test_case total
total_head()
{
atf_set "descr" "Multiple inputs"
}
total_body()
{
printf "%s\n" "$tv" >foo
printf "%s\n" "$tv" >bar
atf_check \
-o match:"^ +$((tvl*2)) +$((tvw*2)) +$((tvc*2)) total$" \
wc foo bar
}
atf_test_case unterminated
unterminated_head()
{
atf_set "descr" "Input not ending in newline"
}
unterminated_body()
{
printf "a b" >foo
atf_check_wc foo 0 2 3
}
atf_test_case usage
usage_head()
{
atf_set "descr" "Trigger usage message"
}
usage_body()
{
atf_check -s exit:1 -e match:"usage: wc" wc -\?
}
atf_test_case whitespace
whitespace_head()
{
atf_set "descr" "Input containing only whitespace and newlines"
}
whitespace_body()
{
printf "\n \n\t\n" >foo
atf_check_wc foo 3 0 5
}
atf_init_test_cases()
{
atf_add_test_case basic
atf_add_test_case blank
atf_add_test_case empty
atf_add_test_case invalid
atf_add_test_case multiline
atf_add_test_case multiline_repeated
atf_add_test_case nul
atf_add_test_case poop
atf_add_test_case total
atf_add_test_case unterminated
atf_add_test_case usage
atf_add_test_case whitespace
}