Add new option '-p pattern' for splitting files based on matching lines in
the file with a regular expression. Useful for e.g. 'cvs diff' output. Also compile cleanly with -Wall and fix a few style bugs. PR: bin/9405
This commit is contained in:
parent
9cbac9cee4
commit
2fa6610f1e
@ -1,5 +1,6 @@
|
||||
# @(#)Makefile 8.1 (Berkeley) 6/6/93
|
||||
|
||||
PROG= split
|
||||
COPTS+= -Wall
|
||||
|
||||
.include <bsd.prog.mk>
|
||||
|
@ -30,6 +30,7 @@
|
||||
.\" SUCH DAMAGE.
|
||||
.\"
|
||||
.\" @(#)split.1 8.3 (Berkeley) 4/16/94
|
||||
.\" $Id$
|
||||
.\"
|
||||
.Dd April 16, 1994
|
||||
.Dt SPLIT 1
|
||||
@ -41,6 +42,7 @@
|
||||
.Nm split
|
||||
.Op Fl b Ar byte_count[k|m]
|
||||
.Op Fl l Ar line_count
|
||||
.Op Fl p Ar pattern
|
||||
.Op Ar file Op Ar name
|
||||
.Sh DESCRIPTION
|
||||
The
|
||||
@ -70,6 +72,16 @@ megabyte pieces.
|
||||
Create smaller files
|
||||
.Ar n
|
||||
lines in length.
|
||||
.It Fl p Ar pattern
|
||||
The file is split whenever an input line matches
|
||||
.Ar pattern ,
|
||||
which is interpreted as an extended regular expression.
|
||||
The matching line will be the first line of the next output file.
|
||||
This option is incompatible with the
|
||||
.Fl b
|
||||
and
|
||||
.Fl l
|
||||
options.
|
||||
.El
|
||||
.Pp
|
||||
If additional arguments are specified, the first is used as the name
|
||||
@ -92,6 +104,10 @@ For historical reasons, if you specify
|
||||
can only create 676 separate
|
||||
files.
|
||||
The default naming convention allows 2028 separate files.
|
||||
.Pp
|
||||
The maximum line length for matching patterns is 65536.
|
||||
.Sh SEE ALSO
|
||||
.Xr re_format 7 .
|
||||
.Sh HISTORY
|
||||
A
|
||||
.Nm split
|
||||
|
@ -44,6 +44,7 @@ static char sccsid[] = "@(#)split.c 8.2 (Berkeley) 4/16/94";
|
||||
#endif /* not lint */
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <ctype.h>
|
||||
#include <err.h>
|
||||
@ -52,6 +53,8 @@ static char sccsid[] = "@(#)split.c 8.2 (Berkeley) 4/16/94";
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <regex.h>
|
||||
#include <sysexits.h>
|
||||
|
||||
#define DEFLINE 1000 /* Default num lines per file. */
|
||||
|
||||
@ -61,6 +64,8 @@ int file_open; /* If a file open. */
|
||||
int ifd = -1, ofd = -1; /* Input/output file descriptors. */
|
||||
char bfr[MAXBSIZE]; /* I/O buffer. */
|
||||
char fname[MAXPATHLEN]; /* File name prefix. */
|
||||
regex_t rgx;
|
||||
int pflag;
|
||||
|
||||
void newfile __P((void));
|
||||
void split1 __P((void));
|
||||
@ -75,7 +80,7 @@ main(argc, argv)
|
||||
int ch;
|
||||
char *ep, *p;
|
||||
|
||||
while ((ch = getopt(argc, argv, "-0123456789b:l:")) != -1)
|
||||
while ((ch = getopt(argc, argv, "-0123456789b:l:p:")) != -1)
|
||||
switch (ch) {
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
@ -91,7 +96,8 @@ main(argc, argv)
|
||||
numlines =
|
||||
strtol(argv[optind] + 1, &ep, 10);
|
||||
if (numlines <= 0 || *ep)
|
||||
errx(1, "%s: illegal line count", optarg);
|
||||
errx(EX_USAGE,
|
||||
"%s: illegal line count", optarg);
|
||||
}
|
||||
break;
|
||||
case '-': /* Undocumented: historic stdin flag. */
|
||||
@ -102,17 +108,24 @@ main(argc, argv)
|
||||
case 'b': /* Byte count. */
|
||||
if ((bytecnt = strtol(optarg, &ep, 10)) <= 0 ||
|
||||
(*ep != '\0' && *ep != 'k' && *ep != 'm'))
|
||||
errx(1, "%s: illegal byte count", optarg);
|
||||
errx(EX_USAGE,
|
||||
"%s: illegal byte count", optarg);
|
||||
if (*ep == 'k')
|
||||
bytecnt *= 1024;
|
||||
else if (*ep == 'm')
|
||||
bytecnt *= 1048576;
|
||||
break;
|
||||
case 'p' : /* pattern matching. */
|
||||
if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0)
|
||||
errx(EX_USAGE, "%s: illegal regexp", optarg);
|
||||
pflag = 1;
|
||||
break;
|
||||
case 'l': /* Line count. */
|
||||
if (numlines != 0)
|
||||
usage();
|
||||
if ((numlines = strtol(optarg, &ep, 10)) <= 0 || *ep)
|
||||
errx(1, "%s: illegal line count", optarg);
|
||||
errx(EX_USAGE,
|
||||
"%s: illegal line count", optarg);
|
||||
break;
|
||||
default:
|
||||
usage();
|
||||
@ -123,7 +136,7 @@ main(argc, argv)
|
||||
if (*argv != NULL)
|
||||
if (ifd == -1) { /* Input file. */
|
||||
if ((ifd = open(*argv, O_RDONLY, 0)) < 0)
|
||||
err(1, "%s", *argv);
|
||||
err(EX_NOINPUT, "%s", *argv);
|
||||
++argv;
|
||||
}
|
||||
if (*argv != NULL) /* File name prefix. */
|
||||
@ -131,9 +144,12 @@ main(argc, argv)
|
||||
if (*argv != NULL)
|
||||
usage();
|
||||
|
||||
if (pflag && (numlines != 0 || bytecnt != 0))
|
||||
usage();
|
||||
|
||||
if (numlines == 0)
|
||||
numlines = DEFLINE;
|
||||
else if (bytecnt)
|
||||
else if (bytecnt != 0)
|
||||
usage();
|
||||
|
||||
if (ifd == -1) /* Stdin by default. */
|
||||
@ -144,6 +160,8 @@ main(argc, argv)
|
||||
exit (0);
|
||||
}
|
||||
split2();
|
||||
if (pflag)
|
||||
regfree(&rgx);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
@ -159,40 +177,38 @@ split1()
|
||||
char *C;
|
||||
|
||||
for (bcnt = 0;;)
|
||||
switch (len = read(ifd, bfr, MAXBSIZE)) {
|
||||
switch ((len = read(ifd, bfr, MAXBSIZE))) {
|
||||
case 0:
|
||||
exit(0);
|
||||
case -1:
|
||||
err(1, "read");
|
||||
err(EX_IOERR, "read");
|
||||
/* NOTREACHED */
|
||||
default:
|
||||
if (!file_open) {
|
||||
if (!file_open)
|
||||
newfile();
|
||||
file_open = 1;
|
||||
}
|
||||
if (bcnt + len >= bytecnt) {
|
||||
dist = bytecnt - bcnt;
|
||||
if (write(ofd, bfr, dist) != dist)
|
||||
err(1, "write");
|
||||
err(EX_IOERR, "write");
|
||||
len -= dist;
|
||||
for (C = bfr + dist; len >= bytecnt;
|
||||
len -= bytecnt, C += bytecnt) {
|
||||
newfile();
|
||||
if (write(ofd,
|
||||
C, (int)bytecnt) != bytecnt)
|
||||
err(1, "write");
|
||||
err(EX_IOERR, "write");
|
||||
}
|
||||
if (len) {
|
||||
if (len != 0) {
|
||||
newfile();
|
||||
if (write(ofd, C, len) != len)
|
||||
err(1, "write");
|
||||
err(EX_IOERR, "write");
|
||||
} else
|
||||
file_open = 0;
|
||||
bcnt = len;
|
||||
} else {
|
||||
bcnt += len;
|
||||
if (write(ofd, bfr, len) != len)
|
||||
err(1, "write");
|
||||
err(EX_IOERR, "write");
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -204,40 +220,49 @@ split1()
|
||||
void
|
||||
split2()
|
||||
{
|
||||
long lcnt;
|
||||
int len, bcnt;
|
||||
char *Ce, *Cs;
|
||||
long lcnt = 0;
|
||||
FILE *infp;
|
||||
|
||||
for (lcnt = 0;;)
|
||||
switch (len = read(ifd, bfr, MAXBSIZE)) {
|
||||
case 0:
|
||||
exit(0);
|
||||
case -1:
|
||||
err(1, "read");
|
||||
/* NOTREACHED */
|
||||
default:
|
||||
if (!file_open) {
|
||||
/* Stick a stream on top of input file descriptor */
|
||||
if ((infp = fdopen(ifd, "r")) == NULL)
|
||||
err(EX_NOINPUT, "fdopen");
|
||||
|
||||
/* Process input one line at a time */
|
||||
while (fgets(bfr, sizeof(bfr), infp) != NULL) {
|
||||
const int len = strlen(bfr);
|
||||
|
||||
/* If line is too long to deal with, just write it out */
|
||||
if (bfr[len - 1] != '\n')
|
||||
goto writeit;
|
||||
|
||||
/* Check if we need to start a new file */
|
||||
if (pflag) {
|
||||
regmatch_t pmatch;
|
||||
|
||||
pmatch.rm_so = 0;
|
||||
pmatch.rm_eo = len - 1;
|
||||
if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0)
|
||||
newfile();
|
||||
file_open = 1;
|
||||
}
|
||||
for (Cs = Ce = bfr; len--; Ce++)
|
||||
if (*Ce == '\n' && ++lcnt == numlines) {
|
||||
bcnt = Ce - Cs + 1;
|
||||
if (write(ofd, Cs, bcnt) != bcnt)
|
||||
err(1, "write");
|
||||
lcnt = 0;
|
||||
Cs = Ce + 1;
|
||||
if (len)
|
||||
newfile();
|
||||
else
|
||||
file_open = 0;
|
||||
}
|
||||
if (Cs < Ce) {
|
||||
bcnt = Ce - Cs;
|
||||
if (write(ofd, Cs, bcnt) != bcnt)
|
||||
err(1, "write");
|
||||
}
|
||||
} else if (lcnt++ == numlines) {
|
||||
newfile();
|
||||
lcnt = 1;
|
||||
}
|
||||
|
||||
writeit:
|
||||
/* Open output file if needed */
|
||||
if (!file_open)
|
||||
newfile();
|
||||
|
||||
/* Write out line */
|
||||
if (write(ofd, bfr, len) != len)
|
||||
err(EX_IOERR, "write");
|
||||
}
|
||||
|
||||
/* EOF or error? */
|
||||
if (ferror(infp))
|
||||
err(EX_IOERR, "read");
|
||||
else
|
||||
exit(0);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -269,7 +294,7 @@ newfile()
|
||||
#define MAXFILES 676
|
||||
if (fnum == MAXFILES) {
|
||||
if (!defname || fname[0] == 'z')
|
||||
errx(1, "too many files");
|
||||
errx(EX_DATAERR, "too many files");
|
||||
++fname[0];
|
||||
fnum = 0;
|
||||
}
|
||||
@ -277,13 +302,13 @@ newfile()
|
||||
fpnt[1] = fnum % 26 + 'a';
|
||||
++fnum;
|
||||
if (!freopen(fname, "w", stdout))
|
||||
err(1, "%s", fname);
|
||||
err(EX_IOERR, "%s", fname);
|
||||
file_open = 1;
|
||||
}
|
||||
|
||||
static void
|
||||
usage()
|
||||
{
|
||||
(void)fprintf(stderr,
|
||||
"usage: split [-b byte_count] [-l line_count] [file [prefix]]\n");
|
||||
exit(1);
|
||||
errx(EX_USAGE,
|
||||
"usage: split [-b byte_count] [-l line_count] [-p pattern] [file [prefix]]");
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user