split: switch to getline() for line/pattern matching
Get rid of split's home-grown logic for growing the buffer; arbitrarily breaking at LONG_MAX bytes instead of 65536 bytes gives us much more wiggle room. Additionally, we'll actually fail out entirely if we can't fit a line, which makes noticing this class of problem much easier. Reviewed by: bapt, emaste, pauamma Sponsored by: Klara, Inc. Differential Revision: https://reviews.freebsd.org/D36323
This commit is contained in:
parent
8ba2beacfd
commit
5c053aa3c5
@ -28,7 +28,7 @@
|
||||
.\" @(#)split.1 8.3 (Berkeley) 4/16/94
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd May 9, 2013
|
||||
.Dd October 25, 2022
|
||||
.Dt SPLIT 1
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -213,5 +213,7 @@ A
|
||||
.Nm
|
||||
command appeared in
|
||||
.At v3 .
|
||||
.Sh BUGS
|
||||
The maximum line length for matching patterns is 65536.
|
||||
.Pp
|
||||
Before
|
||||
.Fx 14 ,
|
||||
pattern matching and only operated on lines shorter than 65,536 bytes.
|
||||
|
@ -70,7 +70,6 @@ static off_t chunks = 0; /* Chunks count to split into. */
|
||||
static long numlines; /* Line count to split on. */
|
||||
static int file_open; /* If a file open. */
|
||||
static int ifd = -1, ofd = -1; /* Input/output file descriptors. */
|
||||
static char bfr[MAXBSIZE]; /* I/O buffer. */
|
||||
static char fname[MAXPATHLEN]; /* File name prefix. */
|
||||
static regex_t rgx;
|
||||
static int pflag;
|
||||
@ -203,6 +202,7 @@ main(int argc, char **argv)
|
||||
static void
|
||||
split1(void)
|
||||
{
|
||||
static char bfr[MAXBSIZE];
|
||||
off_t bcnt;
|
||||
char *C;
|
||||
ssize_t dist, len;
|
||||
@ -211,7 +211,7 @@ split1(void)
|
||||
nfiles = 0;
|
||||
|
||||
for (bcnt = 0;;)
|
||||
switch ((len = read(ifd, bfr, MAXBSIZE))) {
|
||||
switch ((len = read(ifd, bfr, sizeof(bfr)))) {
|
||||
case 0:
|
||||
exit(0);
|
||||
case -1:
|
||||
@ -264,46 +264,45 @@ split1(void)
|
||||
static void
|
||||
split2(void)
|
||||
{
|
||||
char *buf;
|
||||
size_t bufsize;
|
||||
ssize_t len;
|
||||
long lcnt = 0;
|
||||
FILE *infp;
|
||||
|
||||
buf = NULL;
|
||||
bufsize = 0;
|
||||
|
||||
/* Stick a stream on top of input file descriptor */
|
||||
if ((infp = fdopen(ifd, "r")) == NULL)
|
||||
err(EX_NOINPUT, "fdopen");
|
||||
|
||||
/* Process input one line at a time */
|
||||
while (fgets(bfr, sizeof(bfr), infp) != NULL) {
|
||||
const int len = strlen(bfr);
|
||||
|
||||
/* If line is too long to deal with, just write it out */
|
||||
if (bfr[len - 1] != '\n')
|
||||
goto writeit;
|
||||
|
||||
while ((len = getline(&buf, &bufsize, infp)) > 0) {
|
||||
/* Check if we need to start a new file */
|
||||
if (pflag) {
|
||||
regmatch_t pmatch;
|
||||
|
||||
pmatch.rm_so = 0;
|
||||
pmatch.rm_eo = len - 1;
|
||||
if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0)
|
||||
if (regexec(&rgx, buf, 0, &pmatch, REG_STARTEND) == 0)
|
||||
newfile();
|
||||
} else if (lcnt++ == numlines) {
|
||||
newfile();
|
||||
lcnt = 1;
|
||||
}
|
||||
|
||||
writeit:
|
||||
/* Open output file if needed */
|
||||
if (!file_open)
|
||||
newfile();
|
||||
|
||||
/* Write out line */
|
||||
if (write(ofd, bfr, len) != len)
|
||||
if (write(ofd, buf, len) != len)
|
||||
err(EX_IOERR, "write");
|
||||
}
|
||||
|
||||
/* EOF or error? */
|
||||
if (ferror(infp))
|
||||
if ((len == -1 && errno != 0) || ferror(infp))
|
||||
err(EX_IOERR, "read");
|
||||
else
|
||||
exit(0);
|
||||
|
Loading…
Reference in New Issue
Block a user