Add -a option (SUSv3) to split(1).

Submitted by:	Tim J. Robbins <tim@robbins.dropbear.id.au>
MFC after:	1 month
This commit is contained in:
Mike Barcroft 2002-02-01 06:55:18 +00:00
parent d9bfecab53
commit 4185049582
2 changed files with 49 additions and 11 deletions

View File

@ -40,6 +40,7 @@
.Nd split a file into pieces
.Sh SYNOPSIS
.Nm
.Op Fl a Ar suffix_length
.Op Fl b Ar byte_count[k|m]
.Op Fl l Ar line_count
.Op Fl p Ar pattern
@ -54,6 +55,10 @@ and breaks it up into files of 1000 lines each.
.Pp
The options are as follows:
.Bl -tag -width Ds
.It Fl a
Use
.Ar suffix_length
letters to form the suffix of the file name.
.It Fl b
Create smaller files
.Ar byte_count
@ -89,14 +94,20 @@ of the input file which is to be split.
If a second additional argument is specified, it is used as a prefix
for the names of the files into which the file is split.
In this case, each file into which the file is split is named by the
prefix followed by a lexically ordered suffix in the range of
.Dq Li aa-zz .
prefix followed by a lexically ordered suffix using
.Ar suffix_length
characters in the range
.Dq Li a-z .
If
.Fl a
is not specified, two letters are used as the suffix.
.Pp
If the
.Ar name
argument is not specified, the file is split into lexically ordered
files named in the range of
.Dq Li xaa-zzz .
files named with prefixes in the range of
.Dq Li x-z
and with suffixes as above.
.Sh BUGS
For historical reasons, if you specify
.Ar name ,
@ -104,6 +115,9 @@ For historical reasons, if you specify
can only create 676 separate
files.
The default naming convention allows 2028 separate files.
The
.Fl a
option can be used to work around this limitation.
.Pp
The maximum line length for matching patterns is 65536.
.Sh SEE ALSO

View File

@ -67,6 +67,7 @@ char bfr[MAXBSIZE]; /* I/O buffer. */
char fname[MAXPATHLEN]; /* File name prefix. */
regex_t rgx;
int pflag;
long sufflen = 2; /* File name suffix length. */
void newfile __P((void));
void split1 __P((void));
@ -81,7 +82,7 @@ main(argc, argv)
int ch;
char *ep, *p;
while ((ch = getopt(argc, argv, "-0123456789b:l:p:")) != -1)
while ((ch = getopt(argc, argv, "-0123456789a:b:l:p:")) != -1)
switch (ch) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
@ -106,6 +107,11 @@ main(argc, argv)
usage();
ifd = 0;
break;
case 'a': /* Suffix length */
if ((sufflen = strtol(optarg, &ep, 10)) <= 0 || *ep)
errx(EX_USAGE,
"%s: illegal suffix length", optarg);
break;
case 'b': /* Byte count. */
if ((bytecnt = strtoq(optarg, &ep, 10)) <= 0 ||
(*ep != '\0' && *ep != 'k' && *ep != 'm'))
@ -145,6 +151,8 @@ main(argc, argv)
if (*argv != NULL)
usage();
if (strlen(fname) + (unsigned long)sufflen >= sizeof(fname))
errx(EX_USAGE, "suffix is too long");
if (pflag && (numlines != 0 || bytecnt != 0))
usage();
@ -273,6 +281,7 @@ split2()
void
newfile()
{
long i, maxfiles, tfnum;
static long fnum;
static int defname;
static char *fpnt;
@ -288,19 +297,32 @@ newfile()
}
ofd = fileno(stdout);
}
/* maxfiles = 26^sufflen, but don't use libm. */
for (maxfiles = 1, i = 0; i < sufflen; i++)
if ((maxfiles *= 26) <= 0)
errx(EX_USAGE, "suffix is too long (max %ld)", i);
/*
* Hack to increase max files; original code wandered through
* magic characters. Maximum files is 3 * 26 * 26 == 2028
* magic characters.
*/
#define MAXFILES 676
if (fnum == MAXFILES) {
if (fnum == maxfiles) {
if (!defname || fname[0] == 'z')
errx(EX_DATAERR, "too many files");
++fname[0];
fnum = 0;
}
fpnt[0] = fnum / 26 + 'a';
fpnt[1] = fnum % 26 + 'a';
/* Generate suffix of sufflen letters */
tfnum = fnum;
i = sufflen - 1;
do {
fpnt[i] = tfnum % 26 + 'a';
tfnum /= 26;
} while (i-- > 0);
fpnt[sufflen] = '\0';
++fnum;
if (!freopen(fname, "w", stdout))
err(EX_IOERR, "%s", fname);
@ -311,6 +333,8 @@ static void
usage()
{
(void)fprintf(stderr,
"usage: split [-b byte_count] [-l line_count] [-p pattern] [file [prefix]]\n");
"usage: split [-a sufflen] [-b byte_count] [-l line_count] [-p pattern]\n");
(void)fprintf(stderr,
" [file [prefix]]\n");
exit(EX_USAGE);
}