split: switch to getline() for line/pattern matching
Get rid of split's home-grown logic for growing the buffer; arbitrarily breaking at LONG_MAX bytes instead of 65536 bytes gives us much more wiggle room. Additionally, we'll actually fail out entirely if we can't fit a line, which makes noticing this class of problem much easier. Reviewed by: bapt, emaste, pauamma Sponsored by: Klara, Inc. Differential Revision: https://reviews.freebsd.org/D36323
This commit is contained in:
parent
8ba2beacfd
commit
5c053aa3c5
@ -28,7 +28,7 @@
|
|||||||
.\" @(#)split.1 8.3 (Berkeley) 4/16/94
|
.\" @(#)split.1 8.3 (Berkeley) 4/16/94
|
||||||
.\" $FreeBSD$
|
.\" $FreeBSD$
|
||||||
.\"
|
.\"
|
||||||
.Dd May 9, 2013
|
.Dd October 25, 2022
|
||||||
.Dt SPLIT 1
|
.Dt SPLIT 1
|
||||||
.Os
|
.Os
|
||||||
.Sh NAME
|
.Sh NAME
|
||||||
@ -213,5 +213,7 @@ A
|
|||||||
.Nm
|
.Nm
|
||||||
command appeared in
|
command appeared in
|
||||||
.At v3 .
|
.At v3 .
|
||||||
.Sh BUGS
|
.Pp
|
||||||
The maximum line length for matching patterns is 65536.
|
Before
|
||||||
|
.Fx 14 ,
|
||||||
|
pattern matching and only operated on lines shorter than 65,536 bytes.
|
||||||
|
@ -70,7 +70,6 @@ static off_t chunks = 0; /* Chunks count to split into. */
|
|||||||
static long numlines; /* Line count to split on. */
|
static long numlines; /* Line count to split on. */
|
||||||
static int file_open; /* If a file open. */
|
static int file_open; /* If a file open. */
|
||||||
static int ifd = -1, ofd = -1; /* Input/output file descriptors. */
|
static int ifd = -1, ofd = -1; /* Input/output file descriptors. */
|
||||||
static char bfr[MAXBSIZE]; /* I/O buffer. */
|
|
||||||
static char fname[MAXPATHLEN]; /* File name prefix. */
|
static char fname[MAXPATHLEN]; /* File name prefix. */
|
||||||
static regex_t rgx;
|
static regex_t rgx;
|
||||||
static int pflag;
|
static int pflag;
|
||||||
@ -203,6 +202,7 @@ main(int argc, char **argv)
|
|||||||
static void
|
static void
|
||||||
split1(void)
|
split1(void)
|
||||||
{
|
{
|
||||||
|
static char bfr[MAXBSIZE];
|
||||||
off_t bcnt;
|
off_t bcnt;
|
||||||
char *C;
|
char *C;
|
||||||
ssize_t dist, len;
|
ssize_t dist, len;
|
||||||
@ -211,7 +211,7 @@ split1(void)
|
|||||||
nfiles = 0;
|
nfiles = 0;
|
||||||
|
|
||||||
for (bcnt = 0;;)
|
for (bcnt = 0;;)
|
||||||
switch ((len = read(ifd, bfr, MAXBSIZE))) {
|
switch ((len = read(ifd, bfr, sizeof(bfr)))) {
|
||||||
case 0:
|
case 0:
|
||||||
exit(0);
|
exit(0);
|
||||||
case -1:
|
case -1:
|
||||||
@ -264,46 +264,45 @@ split1(void)
|
|||||||
static void
|
static void
|
||||||
split2(void)
|
split2(void)
|
||||||
{
|
{
|
||||||
|
char *buf;
|
||||||
|
size_t bufsize;
|
||||||
|
ssize_t len;
|
||||||
long lcnt = 0;
|
long lcnt = 0;
|
||||||
FILE *infp;
|
FILE *infp;
|
||||||
|
|
||||||
|
buf = NULL;
|
||||||
|
bufsize = 0;
|
||||||
|
|
||||||
/* Stick a stream on top of input file descriptor */
|
/* Stick a stream on top of input file descriptor */
|
||||||
if ((infp = fdopen(ifd, "r")) == NULL)
|
if ((infp = fdopen(ifd, "r")) == NULL)
|
||||||
err(EX_NOINPUT, "fdopen");
|
err(EX_NOINPUT, "fdopen");
|
||||||
|
|
||||||
/* Process input one line at a time */
|
/* Process input one line at a time */
|
||||||
while (fgets(bfr, sizeof(bfr), infp) != NULL) {
|
while ((len = getline(&buf, &bufsize, infp)) > 0) {
|
||||||
const int len = strlen(bfr);
|
|
||||||
|
|
||||||
/* If line is too long to deal with, just write it out */
|
|
||||||
if (bfr[len - 1] != '\n')
|
|
||||||
goto writeit;
|
|
||||||
|
|
||||||
/* Check if we need to start a new file */
|
/* Check if we need to start a new file */
|
||||||
if (pflag) {
|
if (pflag) {
|
||||||
regmatch_t pmatch;
|
regmatch_t pmatch;
|
||||||
|
|
||||||
pmatch.rm_so = 0;
|
pmatch.rm_so = 0;
|
||||||
pmatch.rm_eo = len - 1;
|
pmatch.rm_eo = len - 1;
|
||||||
if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0)
|
if (regexec(&rgx, buf, 0, &pmatch, REG_STARTEND) == 0)
|
||||||
newfile();
|
newfile();
|
||||||
} else if (lcnt++ == numlines) {
|
} else if (lcnt++ == numlines) {
|
||||||
newfile();
|
newfile();
|
||||||
lcnt = 1;
|
lcnt = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
writeit:
|
|
||||||
/* Open output file if needed */
|
/* Open output file if needed */
|
||||||
if (!file_open)
|
if (!file_open)
|
||||||
newfile();
|
newfile();
|
||||||
|
|
||||||
/* Write out line */
|
/* Write out line */
|
||||||
if (write(ofd, bfr, len) != len)
|
if (write(ofd, buf, len) != len)
|
||||||
err(EX_IOERR, "write");
|
err(EX_IOERR, "write");
|
||||||
}
|
}
|
||||||
|
|
||||||
/* EOF or error? */
|
/* EOF or error? */
|
||||||
if (ferror(infp))
|
if ((len == -1 && errno != 0) || ferror(infp))
|
||||||
err(EX_IOERR, "read");
|
err(EX_IOERR, "read");
|
||||||
else
|
else
|
||||||
exit(0);
|
exit(0);
|
||||||
|
Loading…
Reference in New Issue
Block a user