Add new option '-p pattern' for splitting files based on matching lines in

the file with a regular expression. Useful for e.g. 'cvs diff' output.
Also compile cleanly with -Wall and fix a few style bugs.
PR:		bin/9405
This commit is contained in:
Archie Cobbs 1999-02-01 21:16:45 +00:00
parent 9cbac9cee4
commit 2fa6610f1e
3 changed files with 94 additions and 52 deletions

View File

@ -1,5 +1,6 @@
# @(#)Makefile 8.1 (Berkeley) 6/6/93
PROG= split
COPTS+= -Wall
.include <bsd.prog.mk>

View File

@ -30,6 +30,7 @@
.\" SUCH DAMAGE.
.\"
.\" @(#)split.1 8.3 (Berkeley) 4/16/94
.\" $Id$
.\"
.Dd April 16, 1994
.Dt SPLIT 1
@ -41,6 +42,7 @@
.Nm split
.Op Fl b Ar byte_count[k|m]
.Op Fl l Ar line_count
.Op Fl p Ar pattern
.Op Ar file Op Ar name
.Sh DESCRIPTION
The
@ -70,6 +72,16 @@ megabyte pieces.
Create smaller files
.Ar n
lines in length.
.It Fl p Ar pattern
The file is split whenever an input line matches
.Ar pattern ,
which is interpreted as an extended regular expression.
The matching line will be the first line of the next output file.
This option is incompatible with the
.Fl b
and
.Fl l
options.
.El
.Pp
If additional arguments are specified, the first is used as the name
@ -92,6 +104,10 @@ For historical reasons, if you specify
can only create 676 separate
files.
The default naming convention allows 2028 separate files.
.Pp
The maximum line length for matching patterns is 65536.
.Sh SEE ALSO
.Xr re_format 7 .
.Sh HISTORY
A
.Nm split

View File

@ -44,6 +44,7 @@ static char sccsid[] = "@(#)split.c 8.2 (Berkeley) 4/16/94";
#endif /* not lint */
#include <sys/param.h>
#include <sys/types.h>
#include <ctype.h>
#include <err.h>
@ -52,6 +53,8 @@ static char sccsid[] = "@(#)split.c 8.2 (Berkeley) 4/16/94";
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <regex.h>
#include <sysexits.h>
#define DEFLINE 1000 /* Default num lines per file. */
@ -61,6 +64,8 @@ int file_open; /* If a file open. */
int ifd = -1, ofd = -1; /* Input/output file descriptors. */
char bfr[MAXBSIZE]; /* I/O buffer. */
char fname[MAXPATHLEN]; /* File name prefix. */
regex_t rgx;
int pflag;
void newfile __P((void));
void split1 __P((void));
@ -75,7 +80,7 @@ main(argc, argv)
int ch;
char *ep, *p;
while ((ch = getopt(argc, argv, "-0123456789b:l:")) != -1)
while ((ch = getopt(argc, argv, "-0123456789b:l:p:")) != -1)
switch (ch) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
@ -91,7 +96,8 @@ main(argc, argv)
numlines =
strtol(argv[optind] + 1, &ep, 10);
if (numlines <= 0 || *ep)
errx(1, "%s: illegal line count", optarg);
errx(EX_USAGE,
"%s: illegal line count", optarg);
}
break;
case '-': /* Undocumented: historic stdin flag. */
@ -102,17 +108,24 @@ main(argc, argv)
case 'b': /* Byte count. */
if ((bytecnt = strtol(optarg, &ep, 10)) <= 0 ||
(*ep != '\0' && *ep != 'k' && *ep != 'm'))
errx(1, "%s: illegal byte count", optarg);
errx(EX_USAGE,
"%s: illegal byte count", optarg);
if (*ep == 'k')
bytecnt *= 1024;
else if (*ep == 'm')
bytecnt *= 1048576;
break;
case 'p' : /* pattern matching. */
if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0)
errx(EX_USAGE, "%s: illegal regexp", optarg);
pflag = 1;
break;
case 'l': /* Line count. */
if (numlines != 0)
usage();
if ((numlines = strtol(optarg, &ep, 10)) <= 0 || *ep)
errx(1, "%s: illegal line count", optarg);
errx(EX_USAGE,
"%s: illegal line count", optarg);
break;
default:
usage();
@ -123,7 +136,7 @@ main(argc, argv)
if (*argv != NULL)
if (ifd == -1) { /* Input file. */
if ((ifd = open(*argv, O_RDONLY, 0)) < 0)
err(1, "%s", *argv);
err(EX_NOINPUT, "%s", *argv);
++argv;
}
if (*argv != NULL) /* File name prefix. */
@ -131,9 +144,12 @@ main(argc, argv)
if (*argv != NULL)
usage();
if (pflag && (numlines != 0 || bytecnt != 0))
usage();
if (numlines == 0)
numlines = DEFLINE;
else if (bytecnt)
else if (bytecnt != 0)
usage();
if (ifd == -1) /* Stdin by default. */
@ -144,6 +160,8 @@ main(argc, argv)
exit (0);
}
split2();
if (pflag)
regfree(&rgx);
exit(0);
}
@ -159,40 +177,38 @@ split1()
char *C;
for (bcnt = 0;;)
switch (len = read(ifd, bfr, MAXBSIZE)) {
switch ((len = read(ifd, bfr, MAXBSIZE))) {
case 0:
exit(0);
case -1:
err(1, "read");
err(EX_IOERR, "read");
/* NOTREACHED */
default:
if (!file_open) {
if (!file_open)
newfile();
file_open = 1;
}
if (bcnt + len >= bytecnt) {
dist = bytecnt - bcnt;
if (write(ofd, bfr, dist) != dist)
err(1, "write");
err(EX_IOERR, "write");
len -= dist;
for (C = bfr + dist; len >= bytecnt;
len -= bytecnt, C += bytecnt) {
newfile();
if (write(ofd,
C, (int)bytecnt) != bytecnt)
err(1, "write");
err(EX_IOERR, "write");
}
if (len) {
if (len != 0) {
newfile();
if (write(ofd, C, len) != len)
err(1, "write");
err(EX_IOERR, "write");
} else
file_open = 0;
bcnt = len;
} else {
bcnt += len;
if (write(ofd, bfr, len) != len)
err(1, "write");
err(EX_IOERR, "write");
}
}
}
@ -204,40 +220,49 @@ split1()
void
split2()
{
long lcnt;
int len, bcnt;
char *Ce, *Cs;
long lcnt = 0;
FILE *infp;
for (lcnt = 0;;)
switch (len = read(ifd, bfr, MAXBSIZE)) {
case 0:
exit(0);
case -1:
err(1, "read");
/* NOTREACHED */
default:
if (!file_open) {
/* Stick a stream on top of input file descriptor */
if ((infp = fdopen(ifd, "r")) == NULL)
err(EX_NOINPUT, "fdopen");
/* Process input one line at a time */
while (fgets(bfr, sizeof(bfr), infp) != NULL) {
const int len = strlen(bfr);
/* If line is too long to deal with, just write it out */
if (bfr[len - 1] != '\n')
goto writeit;
/* Check if we need to start a new file */
if (pflag) {
regmatch_t pmatch;
pmatch.rm_so = 0;
pmatch.rm_eo = len - 1;
if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0)
newfile();
file_open = 1;
}
for (Cs = Ce = bfr; len--; Ce++)
if (*Ce == '\n' && ++lcnt == numlines) {
bcnt = Ce - Cs + 1;
if (write(ofd, Cs, bcnt) != bcnt)
err(1, "write");
lcnt = 0;
Cs = Ce + 1;
if (len)
newfile();
else
file_open = 0;
}
if (Cs < Ce) {
bcnt = Ce - Cs;
if (write(ofd, Cs, bcnt) != bcnt)
err(1, "write");
}
} else if (lcnt++ == numlines) {
newfile();
lcnt = 1;
}
writeit:
/* Open output file if needed */
if (!file_open)
newfile();
/* Write out line */
if (write(ofd, bfr, len) != len)
err(EX_IOERR, "write");
}
/* EOF or error? */
if (ferror(infp))
err(EX_IOERR, "read");
else
exit(0);
}
/*
@ -269,7 +294,7 @@ newfile()
#define MAXFILES 676
if (fnum == MAXFILES) {
if (!defname || fname[0] == 'z')
errx(1, "too many files");
errx(EX_DATAERR, "too many files");
++fname[0];
fnum = 0;
}
@ -277,13 +302,13 @@ newfile()
fpnt[1] = fnum % 26 + 'a';
++fnum;
if (!freopen(fname, "w", stdout))
err(1, "%s", fname);
err(EX_IOERR, "%s", fname);
file_open = 1;
}
static void
usage()
{
(void)fprintf(stderr,
"usage: split [-b byte_count] [-l line_count] [file [prefix]]\n");
exit(1);
errx(EX_USAGE,
"usage: split [-b byte_count] [-l line_count] [-p pattern] [file [prefix]]");
}