Add a -n option to split(1) to split files into N chunks rather than
having to specify the right number of bytes. Obtained from: NetBSD Submitted by: Jan Schaumann <jschauma@netmeister.org> PR: 113175
This commit is contained in:
parent
cc456a74ab
commit
0e286f086c
@ -32,7 +32,7 @@
|
||||
.\" @(#)split.1 8.3 (Berkeley) 4/16/94
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd August 10, 2006
|
||||
.Dd March 16, 2008
|
||||
.Dt SPLIT 1
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -53,6 +53,10 @@
|
||||
.Op Fl a Ar suffix_length
|
||||
.Op Ar file Op Ar prefix
|
||||
.Nm
|
||||
.Fl n Ar chunk_count
|
||||
.Op Fl a Ar suffix_length
|
||||
.Op Ar file Op Ar prefix
|
||||
.Nm
|
||||
.Fl p Ar pattern
|
||||
.Op Fl a Ar suffix_length
|
||||
.Op Ar file Op Ar prefix
|
||||
@ -112,6 +116,10 @@ gigabyte pieces.
|
||||
Create smaller files
|
||||
.Ar line_count
|
||||
lines in length.
|
||||
.It Fl n Ar chunk_count
|
||||
Split file int
|
||||
.Ar chunk_count
|
||||
smaller files.
|
||||
.It Fl p Ar pattern
|
||||
The file is split whenever an input line matches
|
||||
.Ar pattern ,
|
||||
|
@ -45,6 +45,8 @@ static const char sccsid[] = "@(#)split.c 8.2 (Berkeley) 4/16/94";
|
||||
#endif
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include <ctype.h>
|
||||
#include <err.h>
|
||||
@ -64,6 +66,7 @@ static const char sccsid[] = "@(#)split.c 8.2 (Berkeley) 4/16/94";
|
||||
#define DEFLINE 1000 /* Default num lines per file. */
|
||||
|
||||
off_t bytecnt; /* Byte count to split on. */
|
||||
off_t chunks = 0; /* Chunks count to split into. */
|
||||
long numlines; /* Line count to split on. */
|
||||
int file_open; /* If a file open. */
|
||||
int ifd = -1, ofd = -1; /* Input/output file descriptors. */
|
||||
@ -73,9 +76,10 @@ regex_t rgx;
|
||||
int pflag;
|
||||
long sufflen = 2; /* File name suffix length. */
|
||||
|
||||
void newfile(void);
|
||||
void split1(void);
|
||||
void split2(void);
|
||||
static void newfile(void);
|
||||
static void split1(void);
|
||||
static void split2(void);
|
||||
static void split3(void);
|
||||
static void usage(void);
|
||||
|
||||
int
|
||||
@ -88,7 +92,7 @@ main(int argc, char **argv)
|
||||
|
||||
setlocale(LC_ALL, "");
|
||||
|
||||
while ((ch = getopt(argc, argv, "0123456789a:b:l:p:")) != -1)
|
||||
while ((ch = getopt(argc, argv, "0123456789a:b:l:n:p:")) != -1)
|
||||
switch (ch) {
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
@ -138,6 +142,15 @@ main(int argc, char **argv)
|
||||
errx(EX_USAGE,
|
||||
"%s: illegal line count", optarg);
|
||||
break;
|
||||
case 'n': /* Chunks. */
|
||||
if (!isdigit((unsigned char)optarg[0]) ||
|
||||
(chunks = (size_t)strtoul(optarg, &ep, 10)) == 0 ||
|
||||
*ep != '\0') {
|
||||
errx(EX_USAGE, "%s: illegal number of chunks",
|
||||
optarg);
|
||||
}
|
||||
break;
|
||||
|
||||
case 'p': /* pattern matching. */
|
||||
if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0)
|
||||
errx(EX_USAGE, "%s: illegal regexp", optarg);
|
||||
@ -164,12 +177,15 @@ main(int argc, char **argv)
|
||||
|
||||
if (strlen(fname) + (unsigned long)sufflen >= sizeof(fname))
|
||||
errx(EX_USAGE, "suffix is too long");
|
||||
if (pflag && (numlines != 0 || bytecnt != 0))
|
||||
if (pflag && (numlines != 0 || bytecnt != 0 || chunks != 0))
|
||||
usage();
|
||||
|
||||
if (numlines == 0)
|
||||
numlines = DEFLINE;
|
||||
else if (bytecnt != 0)
|
||||
else if (bytecnt != 0 || chunks != 0)
|
||||
usage();
|
||||
|
||||
if (bytecnt && chunks)
|
||||
usage();
|
||||
|
||||
if (ifd == -1) /* Stdin by default. */
|
||||
@ -178,6 +194,9 @@ main(int argc, char **argv)
|
||||
if (bytecnt) {
|
||||
split1();
|
||||
exit (0);
|
||||
} else if (chunks) {
|
||||
split3();
|
||||
exit (0);
|
||||
}
|
||||
split2();
|
||||
if (pflag)
|
||||
@ -189,12 +208,15 @@ main(int argc, char **argv)
|
||||
* split1 --
|
||||
* Split the input by bytes.
|
||||
*/
|
||||
void
|
||||
static void
|
||||
split1(void)
|
||||
{
|
||||
off_t bcnt;
|
||||
char *C;
|
||||
ssize_t dist, len;
|
||||
int nfiles;
|
||||
|
||||
nfiles = 0;
|
||||
|
||||
for (bcnt = 0;;)
|
||||
switch ((len = read(ifd, bfr, MAXBSIZE))) {
|
||||
@ -204,8 +226,12 @@ split1(void)
|
||||
err(EX_IOERR, "read");
|
||||
/* NOTREACHED */
|
||||
default:
|
||||
if (!file_open)
|
||||
newfile();
|
||||
if (!file_open) {
|
||||
if (!chunks || (nfiles < chunks)) {
|
||||
newfile();
|
||||
nfiles++;
|
||||
}
|
||||
}
|
||||
if (bcnt + len >= bytecnt) {
|
||||
dist = bytecnt - bcnt;
|
||||
if (write(ofd, bfr, dist) != dist)
|
||||
@ -213,13 +239,19 @@ split1(void)
|
||||
len -= dist;
|
||||
for (C = bfr + dist; len >= bytecnt;
|
||||
len -= bytecnt, C += bytecnt) {
|
||||
if (!chunks || (nfiles < chunks)) {
|
||||
newfile();
|
||||
nfiles++;
|
||||
}
|
||||
if (write(ofd,
|
||||
C, bytecnt) != bytecnt)
|
||||
err(EX_IOERR, "write");
|
||||
}
|
||||
if (len != 0) {
|
||||
if (!chunks || (nfiles < chunks)) {
|
||||
newfile();
|
||||
nfiles++;
|
||||
}
|
||||
if (write(ofd, C, len) != len)
|
||||
err(EX_IOERR, "write");
|
||||
} else
|
||||
@ -237,7 +269,7 @@ split1(void)
|
||||
* split2 --
|
||||
* Split the input by lines.
|
||||
*/
|
||||
void
|
||||
static void
|
||||
split2(void)
|
||||
{
|
||||
long lcnt = 0;
|
||||
@ -285,11 +317,36 @@ split2(void)
|
||||
exit(0);
|
||||
}
|
||||
|
||||
/*
|
||||
* split3 --
|
||||
* Split the input into specified number of chunks
|
||||
*/
|
||||
static void
|
||||
split3(void)
|
||||
{
|
||||
struct stat sb;
|
||||
|
||||
if (fstat(ifd, &sb) == -1) {
|
||||
err(1, "stat");
|
||||
/* NOTREACHED */
|
||||
}
|
||||
|
||||
if (chunks > sb.st_size) {
|
||||
errx(1, "can't split into more than %d files",
|
||||
(int)sb.st_size);
|
||||
/* NOTREACHED */
|
||||
}
|
||||
|
||||
bytecnt = sb.st_size / chunks;
|
||||
split1();
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* newfile --
|
||||
* Open a new output file.
|
||||
*/
|
||||
void
|
||||
static void
|
||||
newfile(void)
|
||||
{
|
||||
long i, maxfiles, tfnum;
|
||||
@ -338,6 +395,7 @@ usage(void)
|
||||
(void)fprintf(stderr,
|
||||
"usage: split [-l line_count] [-a suffix_length] [file [prefix]]\n"
|
||||
" split -b byte_count[K|k|M|m|G|g] [-a suffix_length] [file [prefix]]\n"
|
||||
" split -n chunk_count [-a suffix_length] [file [prefix]]\n"
|
||||
" split -p pattern [-a suffix_length] [file [prefix]]\n");
|
||||
exit(EX_USAGE);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user