Add a -n option to split(1) to split files into N chunks rather than

having to specify the right number of bytes.

Obtained from:	NetBSD
Submitted by:	Jan Schaumann <jschauma@netmeister.org>
PR:		113175
This commit is contained in:
David Schultz 2008-03-16 15:15:32 +00:00
parent cc456a74ab
commit 0e286f086c
2 changed files with 78 additions and 12 deletions

View File

@ -32,7 +32,7 @@
.\" @(#)split.1 8.3 (Berkeley) 4/16/94
.\" $FreeBSD$
.\"
.Dd August 10, 2006
.Dd March 16, 2008
.Dt SPLIT 1
.Os
.Sh NAME
@ -53,6 +53,10 @@
.Op Fl a Ar suffix_length
.Op Ar file Op Ar prefix
.Nm
.Fl n Ar chunk_count
.Op Fl a Ar suffix_length
.Op Ar file Op Ar prefix
.Nm
.Fl p Ar pattern
.Op Fl a Ar suffix_length
.Op Ar file Op Ar prefix
@ -112,6 +116,10 @@ gigabyte pieces.
Create smaller files
.Ar line_count
lines in length.
.It Fl n Ar chunk_count
Split file int
.Ar chunk_count
smaller files.
.It Fl p Ar pattern
The file is split whenever an input line matches
.Ar pattern ,

View File

@ -45,6 +45,8 @@ static const char sccsid[] = "@(#)split.c 8.2 (Berkeley) 4/16/94";
#endif
#include <sys/param.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <ctype.h>
#include <err.h>
@ -64,6 +66,7 @@ static const char sccsid[] = "@(#)split.c 8.2 (Berkeley) 4/16/94";
#define DEFLINE 1000 /* Default num lines per file. */
off_t bytecnt; /* Byte count to split on. */
off_t chunks = 0; /* Chunks count to split into. */
long numlines; /* Line count to split on. */
int file_open; /* If a file open. */
int ifd = -1, ofd = -1; /* Input/output file descriptors. */
@ -73,9 +76,10 @@ regex_t rgx;
int pflag;
long sufflen = 2; /* File name suffix length. */
void newfile(void);
void split1(void);
void split2(void);
static void newfile(void);
static void split1(void);
static void split2(void);
static void split3(void);
static void usage(void);
int
@ -88,7 +92,7 @@ main(int argc, char **argv)
setlocale(LC_ALL, "");
while ((ch = getopt(argc, argv, "0123456789a:b:l:p:")) != -1)
while ((ch = getopt(argc, argv, "0123456789a:b:l:n:p:")) != -1)
switch (ch) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
@ -138,6 +142,15 @@ main(int argc, char **argv)
errx(EX_USAGE,
"%s: illegal line count", optarg);
break;
case 'n': /* Chunks. */
if (!isdigit((unsigned char)optarg[0]) ||
(chunks = (size_t)strtoul(optarg, &ep, 10)) == 0 ||
*ep != '\0') {
errx(EX_USAGE, "%s: illegal number of chunks",
optarg);
}
break;
case 'p': /* pattern matching. */
if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0)
errx(EX_USAGE, "%s: illegal regexp", optarg);
@ -164,12 +177,15 @@ main(int argc, char **argv)
if (strlen(fname) + (unsigned long)sufflen >= sizeof(fname))
errx(EX_USAGE, "suffix is too long");
if (pflag && (numlines != 0 || bytecnt != 0))
if (pflag && (numlines != 0 || bytecnt != 0 || chunks != 0))
usage();
if (numlines == 0)
numlines = DEFLINE;
else if (bytecnt != 0)
else if (bytecnt != 0 || chunks != 0)
usage();
if (bytecnt && chunks)
usage();
if (ifd == -1) /* Stdin by default. */
@ -178,6 +194,9 @@ main(int argc, char **argv)
if (bytecnt) {
split1();
exit (0);
} else if (chunks) {
split3();
exit (0);
}
split2();
if (pflag)
@ -189,12 +208,15 @@ main(int argc, char **argv)
* split1 --
* Split the input by bytes.
*/
void
static void
split1(void)
{
off_t bcnt;
char *C;
ssize_t dist, len;
int nfiles;
nfiles = 0;
for (bcnt = 0;;)
switch ((len = read(ifd, bfr, MAXBSIZE))) {
@ -204,8 +226,12 @@ split1(void)
err(EX_IOERR, "read");
/* NOTREACHED */
default:
if (!file_open)
newfile();
if (!file_open) {
if (!chunks || (nfiles < chunks)) {
newfile();
nfiles++;
}
}
if (bcnt + len >= bytecnt) {
dist = bytecnt - bcnt;
if (write(ofd, bfr, dist) != dist)
@ -213,13 +239,19 @@ split1(void)
len -= dist;
for (C = bfr + dist; len >= bytecnt;
len -= bytecnt, C += bytecnt) {
if (!chunks || (nfiles < chunks)) {
newfile();
nfiles++;
}
if (write(ofd,
C, bytecnt) != bytecnt)
err(EX_IOERR, "write");
}
if (len != 0) {
if (!chunks || (nfiles < chunks)) {
newfile();
nfiles++;
}
if (write(ofd, C, len) != len)
err(EX_IOERR, "write");
} else
@ -237,7 +269,7 @@ split1(void)
* split2 --
* Split the input by lines.
*/
void
static void
split2(void)
{
long lcnt = 0;
@ -285,11 +317,36 @@ writeit:
exit(0);
}
/*
* split3 --
* Split the input into specified number of chunks
*/
static void
split3(void)
{
struct stat sb;
if (fstat(ifd, &sb) == -1) {
err(1, "stat");
/* NOTREACHED */
}
if (chunks > sb.st_size) {
errx(1, "can't split into more than %d files",
(int)sb.st_size);
/* NOTREACHED */
}
bytecnt = sb.st_size / chunks;
split1();
}
/*
* newfile --
* Open a new output file.
*/
void
static void
newfile(void)
{
long i, maxfiles, tfnum;
@ -338,6 +395,7 @@ usage(void)
(void)fprintf(stderr,
"usage: split [-l line_count] [-a suffix_length] [file [prefix]]\n"
" split -b byte_count[K|k|M|m|G|g] [-a suffix_length] [file [prefix]]\n"
" split -n chunk_count [-a suffix_length] [file [prefix]]\n"
" split -p pattern [-a suffix_length] [file [prefix]]\n");
exit(EX_USAGE);
}