75c3ca1ebf
This should cover all of the basic functionality, as well as the recent enhancement to use a dynamic buffer size rather than limiting patterns and lines to MAXBSIZE. Reviewed by: bapt Sponsored by: Klara, Inc. Differential Revision: https://reviews.freebsd.org/D36324
214 lines
5.7 KiB
Bash
Executable File
214 lines
5.7 KiB
Bash
Executable File
#
|
|
# SPDX-License-Identifier: BSD-2-Clause-FreeBSD
|
|
#
|
|
# Copyright (c) 2022 Klara Systems
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions
|
|
# are met:
|
|
# 1. Redistributions of source code must retain the above copyright
|
|
# notice, this list of conditions and the following disclaimer.
|
|
# 2. Redistributions in binary form must reproduce the above copyright
|
|
# notice, this list of conditions and the following disclaimer in the
|
|
# documentation and/or other materials provided with the distribution.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
# SUCH DAMAGE.
|
|
#
|
|
# $FreeBSD$
|
|
|
|
# sys/param.h
|
|
: ${MAXBSIZE:=65536}
|
|
|
|
atf_test_case bytes
|
|
bytes_body()
|
|
{
|
|
printf "aaaa" > foo-aa
|
|
printf "bb\nc" > foo-ab
|
|
printf "ccc\n" > foo-ac
|
|
|
|
cat foo-* > foo
|
|
atf_check split -b 4 foo split-
|
|
atf_check -o file:foo-aa cat split-aa
|
|
atf_check -o file:foo-ab cat split-ab
|
|
atf_check -o file:foo-ac cat split-ac
|
|
|
|
# MAXBSIZE is the default buffer size, so we'll split at just a little
|
|
# bit past the buffer size to make sure that it still properly splits
|
|
# even when it needs to read again to hit the limit.
|
|
bsize=$((MAXBSIZE + 12))
|
|
rm foo-* foo
|
|
jot -ns "" -b "a" ${bsize} > foo-aa
|
|
jot -ns "" -b "b" ${bsize} > foo-ab
|
|
jot -ns "" -b "c" 12 > foo-ac
|
|
|
|
cat foo-* > foo
|
|
atf_check split -b ${bsize} foo split-
|
|
atf_check -o file:foo-aa cat split-aa
|
|
atf_check -o file:foo-ab cat split-ab
|
|
atf_check -o file:foo-ac cat split-ac
|
|
}
|
|
|
|
atf_test_case chunks
|
|
chunks_body()
|
|
{
|
|
jot -ns "" -b "a" 4096 > foo
|
|
jot -ns "" -b "b" 4096 >> foo
|
|
jot -ns "" -b "c" 4104 >> foo
|
|
|
|
chunks=3
|
|
jot -ns "" -b "a" 4096 > foo-aa
|
|
jot -ns "" -b "b" 2 >> foo-aa
|
|
jot -ns "" -b "b" 4094 > foo-ab
|
|
jot -ns "" -b "c" 4 >> foo-ab
|
|
jot -ns "" -b "c" 4100 > foo-ac
|
|
|
|
atf_check split -n ${chunks} foo split-
|
|
atf_check -o file:foo-aa cat split-aa
|
|
atf_check -o file:foo-ab cat split-ab
|
|
atf_check -o file:foo-ac cat split-ac
|
|
}
|
|
|
|
atf_test_case sensible_lines
|
|
sensible_lines_body()
|
|
{
|
|
echo "The quick brown fox" > foo-aa
|
|
echo "jumps over" > foo-ab
|
|
echo "the lazy dog" > foo-ac
|
|
|
|
cat foo-* > foo
|
|
atf_check split -l 1 foo split-
|
|
atf_check -o file:foo-aa cat split-aa
|
|
atf_check -o file:foo-ab cat split-ab
|
|
atf_check -o file:foo-ac cat split-ac
|
|
|
|
# Try again, make sure that `-` uses stdin as documented.
|
|
atf_check rm split-*
|
|
atf_check -x 'split -l 1 - split- < foo'
|
|
atf_check -o file:foo-aa cat split-aa
|
|
atf_check -o file:foo-ab cat split-ab
|
|
atf_check -o file:foo-ac cat split-ac
|
|
|
|
# Finally, try with -l == 2; we should see a 2/1 split instead of the
|
|
# previous 1/1/1.
|
|
cat foo-aa foo-ab > foo-aa-ng
|
|
cat foo-ac > foo-ab-ng
|
|
|
|
atf_check rm split-*
|
|
atf_check split -l 2 foo split-
|
|
|
|
atf_check -o file:foo-aa-ng cat split-aa
|
|
atf_check -o file:foo-ab-ng cat split-ab
|
|
}
|
|
|
|
atf_test_case long_lines
|
|
long_lines_body()
|
|
{
|
|
|
|
# Test file lines will be:
|
|
# a x MAXBSIZE
|
|
# b x MAXBSIZE + c x MAXBSIZE
|
|
# d x 1024
|
|
#
|
|
# The historical split(1) implementation wouldn't grow its internal
|
|
# buffer, so we'd end up with 2/3 split- files being wrong with -l 1.
|
|
# Notably, split-aa would include most of the first two lines, split-ab
|
|
# a tiny fraction of the second line, and split-ac the third line.
|
|
#
|
|
# Recent split(1) instead grows the buffer until we can either fit the
|
|
# line or we run out of memory.
|
|
jot -s "" -b "a" ${MAXBSIZE} > foo-aa
|
|
jot -ns "" -b "b" ${MAXBSIZE} > foo-ab
|
|
jot -s "" -b "c" ${MAXBSIZE} >> foo-ab
|
|
jot -s "" -b "d" 1024 > foo-ac
|
|
|
|
cat foo-* > foo
|
|
atf_check split -l 1 foo split-
|
|
|
|
atf_check -o file:foo-aa cat split-aa
|
|
atf_check -o file:foo-ab cat split-ab
|
|
atf_check -o file:foo-ac cat split-ac
|
|
}
|
|
|
|
atf_test_case numeric_suffix
|
|
numeric_suffix_body()
|
|
{
|
|
echo "The quick brown fox" > foo-00
|
|
echo "jumps over" > foo-01
|
|
echo "the lazy dog" > foo-02
|
|
|
|
cat foo-* > foo
|
|
atf_check split -d -l 1 foo split-
|
|
|
|
atf_check -o file:foo-00 cat split-00
|
|
atf_check -o file:foo-01 cat split-01
|
|
atf_check -o file:foo-02 cat split-02
|
|
}
|
|
|
|
atf_test_case larger_suffix_length
|
|
larger_suffix_length_body()
|
|
{
|
|
:> foo
|
|
|
|
# Generate foo-000 through foo-009, then foo-010 and foo-011
|
|
for i in $(seq -w 0 11); do
|
|
len=$((${i##0} + 1))
|
|
file="foo-0${i}"
|
|
jot -s "" -b "a" ${len} > ${file}
|
|
cat ${file} >> foo
|
|
done
|
|
|
|
atf_check split -a 3 -d -l 1 foo split-
|
|
for i in $(seq -w 0 11); do
|
|
srcfile="foo-0${i}"
|
|
splitfile="split-0${i}"
|
|
atf_check -o file:"${srcfile}" cat "${splitfile}"
|
|
done
|
|
}
|
|
|
|
atf_test_case pattern
|
|
pattern_body()
|
|
{
|
|
|
|
# Some fake yaml gives us a good realistic use-case for -p, as we can
|
|
# split on top-level stanzas.
|
|
cat <<EOF > foo-aa
|
|
cat:
|
|
aa: true
|
|
ab: true
|
|
ac: true
|
|
EOF
|
|
cat <<EOF > foo-ab
|
|
dog:
|
|
ba: true
|
|
bb: true
|
|
bc: true
|
|
EOF
|
|
|
|
cat foo-* > foo
|
|
|
|
atf_check split -p "^[^[:space:]]+:" foo split-
|
|
atf_check -o file:foo-aa cat split-aa
|
|
atf_check -o file:foo-ab cat split-ab
|
|
}
|
|
|
|
atf_init_test_cases()
|
|
{
|
|
atf_add_test_case bytes
|
|
atf_add_test_case chunks
|
|
atf_add_test_case sensible_lines
|
|
atf_add_test_case long_lines
|
|
atf_add_test_case numeric_suffix
|
|
atf_add_test_case larger_suffix_length
|
|
atf_add_test_case pattern
|
|
}
|