Vendor import NETBSD's sort(1). This will be a replacement for

our GNU sort, as discussed 6 months or more ago.
This commit is contained in:
Mark Murray 2002-03-22 13:54:24 +00:00
parent 1220246fc5
commit 7a0a395027
14 changed files with 3219 additions and 0 deletions

7
usr.bin/sort/Makefile Normal file
View File

@ -0,0 +1,7 @@
# $NetBSD: Makefile,v 1.3 2001/01/08 19:16:49 jdolecek Exp $
# from: @(#)Makefile 8.1 (Berkeley) 6/6/93
PROG= sort
SRCS= append.c fields.c files.c fsort.c init.c msort.c sort.c tmp.c
.include <bsd.prog.mk>

205
usr.bin/sort/append.c Normal file
View File

@ -0,0 +1,205 @@
/* $NetBSD: append.c,v 1.9 2001/01/18 20:59:43 jdolecek Exp $ */
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "sort.h"
#ifndef lint
__RCSID("$NetBSD: append.c,v 1.9 2001/01/18 20:59:43 jdolecek Exp $");
__SCCSID("@(#)append.c 8.1 (Berkeley) 6/6/93");
#endif /* not lint */
#include <stdlib.h>
#include <string.h>
#define OUTPUT { \
if ((n = cpos - ppos) > 1) { \
for (; ppos < cpos; ++ppos) \
*ppos -= odepth; \
ppos -= n; \
if (stable_sort) \
sradixsort(ppos, n, wts1, REC_D); \
else \
radixsort(ppos, n, wts1, REC_D); \
for (; ppos < cpos; ppos++) { \
prec = (const RECHEADER *) (*ppos - sizeof(TRECHEADER));\
put(prec, fp); \
} \
} else put(prec, fp); \
}
/*
* copy sorted lines to output; check for uniqueness
*/
void
append(keylist, nelem, depth, fp, put, ftbl)
const u_char **keylist;
int nelem;
int depth;
FILE *fp;
put_func_t put;
struct field *ftbl;
{
u_char *wts, *wts1;
int n, odepth;
const u_char **cpos, **ppos, **lastkey;
const u_char *cend, *pend, *start;
const struct recheader *crec, *prec;
if (*keylist == '\0' && UNIQUE)
return;
wts1 = wts = ftbl[0].weights;
if ((!UNIQUE) && SINGL_FLD) {
if ((ftbl[0].flags & F) && (ftbl[0].flags & R))
wts1 = Rascii;
else if (ftbl[0].flags & F)
wts1 = ascii;
odepth = depth;
}
lastkey = keylist + nelem;
depth += sizeof(TRECHEADER);
if (SINGL_FLD && (UNIQUE || wts1 != wts)) {
ppos = keylist;
prec = (const RECHEADER *) (*ppos - depth);
if (UNIQUE)
put(prec, fp);
for (cpos = &keylist[1]; cpos < lastkey; cpos++) {
crec = (const RECHEADER *) (*cpos - depth);
if (crec->length == prec->length) {
/*
* Set pend and cend so that trailing NUL and
* record separator is ignored.
*/
pend = (const u_char *) &prec->data + prec->length - 2;
cend = (const u_char *) &crec->data + crec->length - 2;
for (start = *cpos; cend >= start; cend--) {
if (wts[*cend] != wts[*pend])
break;
pend--;
}
if (pend + 1 != *ppos) {
if (!UNIQUE) {
OUTPUT;
} else
put(crec, fp);
ppos = cpos;
prec = crec;
}
} else {
if (!UNIQUE) {
OUTPUT;
} else
put(crec, fp);
ppos = cpos;
prec = crec;
}
}
if (!UNIQUE) { OUTPUT; }
} else if (UNIQUE) {
ppos = keylist;
prec = (const RECHEADER *) (*ppos - depth);
put(prec, fp);
for (cpos = &keylist[1]; cpos < lastkey; cpos++) {
crec = (const RECHEADER *) (*cpos - depth);
if (crec->offset == prec->offset) {
/*
* Set pend and cend so that trailing NUL and
* record separator is ignored.
*/
pend = (const u_char *) &prec->data + prec->offset - 2;
cend = (const u_char *) &crec->data + crec->offset - 2;
for (start = *cpos; cend >= start; cend--) {
if (wts[*cend] != wts[*pend])
break;
pend--;
}
if (pend + 1 != *ppos) {
ppos = cpos;
prec = crec;
put(prec, fp);
}
} else {
ppos = cpos;
prec = crec;
put(prec, fp);
}
}
} else for (cpos = keylist; cpos < lastkey; cpos++) {
crec = (const RECHEADER *) (*cpos - depth);
put(crec, fp);
}
}
/*
* output the already sorted eol bin.
*/
void
rd_append(binno, infl0, nfiles, outfp, buffer, bufend)
u_char *buffer;
int infl0;
int binno, nfiles;
FILE *outfp;
u_char *bufend;
{
RECHEADER *rec;
rec = (RECHEADER *) buffer;
if (!getnext(binno, infl0, NULL, nfiles,
(RECHEADER *) buffer, bufend, 0)) {
putline(rec, outfp);
while (getnext(binno, infl0, NULL, nfiles, (RECHEADER *) buffer,
bufend, 0) == 0) {
if (!UNIQUE)
putline(rec, outfp);
}
}
}
/*
* append plain text--used after sorting the biggest bin.
*/
void
concat(a, b)
FILE *a, *b;
{
int nread;
char buffer[4096];
rewind(b);
while ((nread = fread(buffer, 1, 4096, b)) > 0)
EWRITE(buffer, 1, nread, a);
}

69
usr.bin/sort/extern.h Normal file
View File

@ -0,0 +1,69 @@
/* $NetBSD: extern.h,v 1.5 2001/01/12 19:31:25 jdolecek Exp $ */
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)extern.h 8.1 (Berkeley) 6/6/93
*/
void append __P((const u_char **, int, int, FILE *,
void (*)(const RECHEADER *, FILE *), struct field *));
void concat __P((FILE *, FILE *));
length_t enterkey __P((RECHEADER *, DBT *, int, struct field *));
void fixit __P((int *, char **));
void fldreset __P((struct field *));
FILE *ftmp __P((void));
void fmerge __P((int, int, struct filelist *, int,
get_func_t, FILE *, put_func_t, struct field *));
void fsort __P((int, int, int, struct filelist *, int, FILE *,
struct field *));
int geteasy __P((int, int, struct filelist *,
int, RECHEADER *, u_char *, struct field *));
int getnext __P((int, int, struct filelist *,
int, RECHEADER *, u_char *, struct field *));
int makekey __P((int, int, struct filelist *,
int, RECHEADER *, u_char *, struct field *));
int makeline __P((int, int, struct filelist *,
int, RECHEADER *, u_char *, struct field *));
void merge __P((int, int, get_func_t, FILE *, put_func_t, struct field *));
void num_init __P((void));
void onepass __P((const u_char **, int, long, long *, u_char *, FILE *));
int optval __P((int, int));
void order __P((struct filelist *, get_func_t, struct field *));
void putline __P((const RECHEADER *, FILE *));
void putrec __P((const RECHEADER *, FILE *));
void rd_append __P((int, int, int, FILE *, u_char *, u_char *));
int setfield __P((const char *, struct field *, int));
void settables __P((int));

339
usr.bin/sort/fields.c Normal file
View File

@ -0,0 +1,339 @@
/* $NetBSD: fields.c,v 1.9 2001/02/19 19:52:27 jdolecek Exp $ */
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/* Subroutines to generate sort keys. */
#include "sort.h"
#ifndef lint
__RCSID("$NetBSD: fields.c,v 1.9 2001/02/19 19:52:27 jdolecek Exp $");
__SCCSID("@(#)fields.c 8.1 (Berkeley) 6/6/93");
#endif /* not lint */
#define blancmange(ptr) { \
if (BLANK & d_mask[*(ptr)]) \
while (BLANK & d_mask[*(++(ptr))]); \
}
#define NEXTCOL(pos) { \
if (!SEP_FLAG) \
while (BLANK & l_d_mask[*(++pos)]); \
while (!((FLD_D | REC_D_F) & l_d_mask[*++pos])); \
}
static u_char *enterfield __P((u_char *, u_char *, struct field *, int));
static u_char *number __P((u_char *, u_char *, u_char *, u_char *, int));
extern struct coldesc clist[(ND+1)*2];
extern int ncols;
#define DECIMAL '.'
#define OFFSET 128
u_char TENS[10]; /* TENS[0] = REC_D <= 128 ? 130 - '0' : 2 -'0'... */
u_char NEGTENS[10]; /* NEGTENS[0] = REC_D <= 128 ? 126 + '0' : 252 +'0' */
u_char *OFF_TENS, *OFF_NTENS; /* TENS - '0', NEGTENS - '0' */
u_char fnum[NBINS], rnum[NBINS];
/*
* constructs sort key with leading recheader, followed by the key,
* followed by the original line.
*/
length_t
enterkey(keybuf, line, size, fieldtable)
RECHEADER *keybuf; /* pointer to start of key */
DBT *line;
int size;
struct field fieldtable[];
{
int i;
u_char *l_d_mask;
u_char *lineend, *pos;
u_char *endkey, *keypos;
struct coldesc *clpos;
int col = 1;
struct field *ftpos;
l_d_mask = d_mask;
pos = (u_char *) line->data - 1;
lineend = (u_char *) line->data + line->size-1;
/* don't include rec_delimiter */
for (i = 0; i < ncols; i++) {
clpos = clist + i;
for (; (col < clpos->num) && (pos < lineend); col++) {
NEXTCOL(pos);
}
if (pos >= lineend)
break;
clpos->start = SEP_FLAG ? pos + 1 : pos;
NEXTCOL(pos);
clpos->end = pos;
col++;
if (pos >= lineend) {
clpos->end = lineend;
i++;
break;
}
}
for (; i <= ncols; i++)
clist[i].start = clist[i].end = lineend;
if (clist[0].start < (u_char *) line->data)
clist[0].start++;
keypos = keybuf->data;
endkey = (u_char *) keybuf + size - line->size;
for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++)
if ((keypos = enterfield(keypos, endkey, ftpos,
fieldtable->flags)) == NULL)
return (1);
keybuf->offset = keypos - keybuf->data;
keybuf->length = keybuf->offset + line->size;
if (keybuf->length + sizeof(TRECHEADER) > size) {
/* line too long for buffer */
return (1);
}
/*
* Make [s]radixsort() only sort by relevant part of key if:
* 1. we want to choose unique items by relevant field[s]
* 2. we want stable sort and so the items should be sorted only by
* the relevant field[s]
*/
if (UNIQUE || (stable_sort && keybuf->offset < line->size))
keypos[-1] = REC_D;
memcpy(keybuf->data + keybuf->offset, line->data, line->size);
return (0);
}
/*
* constructs a field (as defined by -k) within a key
*/
static u_char *
enterfield(tablepos, endkey, cur_fld, gflags)
struct field *cur_fld;
u_char *tablepos, *endkey;
int gflags;
{
u_char *start, *end, *lineend, *mask, *lweight;
struct column icol, tcol;
u_int flags;
u_int Rflag;
icol = cur_fld->icol;
tcol = cur_fld->tcol;
flags = cur_fld->flags;
start = icol.p->start;
lineend = clist[ncols].end;
if (flags & BI)
blancmange(start);
start += icol.indent;
start = min(start, lineend);
if (!tcol.num)
end = lineend;
else {
if (tcol.indent) {
end = tcol.p->start;
if (flags & BT)
blancmange(end);
end += tcol.indent;
end = min(end, lineend);
} else
end = tcol.p->end;
}
if (flags & N) {
Rflag = (gflags & R ) ^ (flags & R) ? 1 : 0;
return number(tablepos, endkey, start, end, Rflag);
}
mask = cur_fld->mask;
lweight = cur_fld->weights;
for (; start < end; start++)
if (mask[*start]) {
if (*start <= 1) {
if (tablepos+2 >= endkey)
return (NULL);
*tablepos++ = lweight[1];
*tablepos++ = lweight[*start ? 2 : 1];
} else {
if (tablepos+1 >= endkey)
return (NULL);
*tablepos++ = lweight[*start];
}
}
*tablepos++ = lweight[0];
return (tablepos == endkey ? NULL : tablepos);
}
/* Uses the first bin to assign sign, expsign, 0, and the first
* 61 out of the exponent ( (254 - 3 origins - 4 over/underflows)/4 = 61 ).
* When sorting in forward order:
* use (0-99) -> (130->240) for sorting the mantissa if REC_D <=128;
* else use (0-99)->(2-102).
* If the exponent is >=61, use another byte for each additional 253
* in the exponent. Cutoff is at 567.
* To avoid confusing the exponent and the mantissa, use a field delimiter
* if the exponent is exactly 61, 61+252, etc--this is ok, since it's the
* only time a field delimiter can come in that position.
* Reverse order is done analagously.
*/
static u_char *
number(pos, bufend, line, lineend, Rflag)
u_char *line, *pos, *bufend, *lineend;
int Rflag;
{
int or_sign, parity = 0;
int expincr = 1, exponent = -1;
int bite, expsign = 1, sign = 1;
u_char lastvalue, *nonzero, *tline, *C_TENS;
u_char *nweights;
if (Rflag)
nweights = rnum;
else
nweights = fnum;
if (pos > bufend - 8)
return (NULL);
/*
* or_sign sets the sort direction:
* (-r: +/-)(sign: +/-)(expsign: +/-)
*/
or_sign = sign ^ expsign ^ Rflag;
blancmange(line);
if (*line == '-') { /* set the sign */
or_sign ^= 1;
sign = 0;
line++;
}
/* eat initial zeroes */
for (; *line == '0' && line < lineend; line++)
;
/* calculate exponents < 0 */
if (*line == DECIMAL) {
exponent = 1;
while (*++line == '0' && line < lineend)
exponent++;
expincr = 0;
expsign = 0;
}
/* next character better be a digit */
if (*line < '1' || *line > '9' || line >= lineend) {
*pos++ = nweights[127];
return (pos);
}
if (expincr) {
for (tline = line-1; *++tline >= '0' &&
*tline <= '9' && tline < lineend;)
exponent++;
}
if (exponent > 567) {
*pos++ = nweights[sign ? (expsign ? 254 : 128)
: (expsign ? 0 : 126)];
warnx("exponent out of bounds");
return (pos);
}
bite = min(exponent, 61);
*pos++ = nweights[(sign) ? (expsign ? 189+bite : 189-bite)
: (expsign ? 64-bite : 64+bite)];
if (bite >= 61) {
do {
exponent -= bite;
bite = min(exponent, 254);
*pos++ = nweights[or_sign ? 254-bite : bite];
} while (bite == 254);
}
C_TENS = or_sign ? OFF_NTENS : OFF_TENS;
for (; line < lineend; line++) {
if (*line >= '0' && *line <= '9') {
if (parity) {
*pos++ = C_TENS[lastvalue] + (or_sign ? - *line
: *line);
if (pos == bufend)
return (NULL);
if (*line != '0' || lastvalue != '0')
nonzero = pos;
} else
lastvalue = *line;
parity ^= 1;
} else if(*line == DECIMAL) {
if(!expincr) /* a decimal already occurred once */
break;
expincr = 0;
} else
break;
}
if (parity && lastvalue != '0') {
*pos++ = or_sign ? OFF_NTENS[lastvalue] - '0' :
OFF_TENS[lastvalue] + '0';
} else
pos = nonzero;
if (pos > bufend-1)
return (NULL);
*pos++ = or_sign ? nweights[254] : nweights[0];
return (pos);
}
/* This forces a gap around the record delimiter
* Thus fnum has vaues over (0,254) -> ((0,REC_D-1),(REC_D+1,255));
* rnum over (0,254) -> (255,REC_D+1),(REC_D-1,0))
*/
void
num_init()
{
int i;
TENS[0] = REC_D <=128 ? 130 - '0' : 2 - '0';
NEGTENS[0] = REC_D <=128 ? 126 + '0' : 254 + '0';
OFF_TENS = TENS - '0';
OFF_NTENS = NEGTENS - '0';
for (i = 1; i < 10; i++) {
TENS[i] = TENS[i - 1] + 10;
NEGTENS[i] = NEGTENS[i - 1] - 10;
}
for (i = 0; i < REC_D; i++) {
fnum[i] = i;
rnum[255 - i] = i;
}
for (i = REC_D; i <255; i++) {
fnum[i] = i + 1;
rnum[255 - i] = i - 1;
}
}

372
usr.bin/sort/files.c Normal file
View File

@ -0,0 +1,372 @@
/* $NetBSD: files.c,v 1.16 2001/02/19 20:50:17 jdolecek Exp $ */
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "sort.h"
#include "fsort.h"
#ifndef lint
__RCSID("$NetBSD: files.c,v 1.16 2001/02/19 20:50:17 jdolecek Exp $");
__SCCSID("@(#)files.c 8.1 (Berkeley) 6/6/93");
#endif /* not lint */
#include <string.h>
static int seq __P((FILE *, DBT *, DBT *));
/*
* this is the subroutine for file management for fsort().
* It keeps the buffers for all temporary files.
*/
int
getnext(binno, infl0, filelist, nfiles, pos, end, dummy)
int binno, infl0;
struct filelist *filelist;
int nfiles;
RECHEADER *pos;
u_char *end;
struct field *dummy;
{
int i;
u_char *hp;
static size_t nleft = 0;
static int cnt = 0, flag = -1;
static u_char maxb = 0;
static FILE *fp;
if (nleft == 0) {
if (binno < 0) /* reset files. */ {
for (i = 0; i < nfiles; i++) {
rewind(fstack[infl0 + i].fp);
fstack[infl0 + i].max_o = 0;
}
flag = -1;
nleft = cnt = 0;
return (-1);
}
maxb = fstack[infl0].maxb;
for (; nleft == 0; cnt++) {
if (cnt >= nfiles) {
cnt = 0;
return (EOF);
}
fp = fstack[infl0 + cnt].fp;
fread(&nleft, sizeof(nleft), 1, fp);
if (binno < maxb)
fstack[infl0+cnt].max_o
+= sizeof(nleft) + nleft;
else if (binno == maxb) {
if (binno != fstack[infl0].lastb) {
fseek(fp, fstack[infl0+
cnt].max_o, SEEK_SET);
fread(&nleft, sizeof(nleft), 1, fp);
}
if (nleft == 0)
fclose(fp);
} else if (binno == maxb + 1) { /* skip a bin */
fseek(fp, nleft, SEEK_CUR);
fread(&nleft, sizeof(nleft), 1, fp);
flag = cnt;
}
}
}
if ((u_char *) pos > end - sizeof(TRECHEADER))
return (BUFFEND);
fread(pos, sizeof(TRECHEADER), 1, fp);
if (end - pos->data < pos->length) {
hp = ((u_char *)pos) + sizeof(TRECHEADER);
for (i = sizeof(TRECHEADER); i ; i--)
ungetc(*--hp, fp);
return (BUFFEND);
}
fread(pos->data, pos->length, 1, fp);
nleft -= pos->length + sizeof(TRECHEADER);
if (nleft == 0 && binno == fstack[infl0].maxb)
fclose(fp);
return (0);
}
/*
* this is called when there is no special key. It's only called
* in the first fsort pass.
*/
int
makeline(flno, top, filelist, nfiles, recbuf, bufend, dummy2)
int flno, top;
struct filelist *filelist;
int nfiles;
RECHEADER *recbuf;
u_char *bufend;
struct field *dummy2;
{
static u_char *obufend;
static size_t osz;
char *pos;
static int filenum = 0, overflow = 0;
static FILE *fp = 0;
int c;
pos = (char *) recbuf->data;
if (overflow) {
/*
* Buffer shortage is solved by either of two ways:
* o flush previous buffered data and start using the
* buffer from start (see fsort())
* o realloc buffer and bump bufend
*
* The former is preferred, realloc is only done when
* there is exactly one item in buffer which does not fit.
*/
if (bufend == obufend)
memmove(pos, bufend - osz, osz);
pos += osz;
overflow = 0;
}
for (;;) {
if (flno >= 0 && (fp = fstack[flno].fp) == NULL)
return (EOF);
else if (fp == NULL) {
if (filenum >= nfiles)
return (EOF);
if (!(fp = fopen(filelist->names[filenum], "r")))
err(2, "%s", filelist->names[filenum]);
filenum++;
}
while ((pos < (char *)bufend) && ((c = getc(fp)) != EOF)) {
if ((*pos++ = c) == REC_D) {
recbuf->offset = 0;
recbuf->length = pos - (char *) recbuf->data;
return (0);
}
}
if (pos >= (char *)bufend) {
if (recbuf->data < bufend) {
overflow = 1;
obufend = bufend;
osz = (pos - (char *) recbuf->data);
}
return (BUFFEND);
} else if (c == EOF) {
if (recbuf->data != (u_char *) pos) {
*pos++ = REC_D;
recbuf->offset = 0;
recbuf->length = pos - (char *) recbuf->data;
return (0);
}
FCLOSE(fp);
fp = 0;
if (flno >= 0)
fstack[flno].fp = 0;
} else {
warnx("makeline: line too long: ignoring '%.100s...'", recbuf->data);
/* Consume the rest of line from input */
while((c = getc(fp)) != REC_D && c != EOF)
;
recbuf->offset = 0;
recbuf->length = 0;
return (BUFFEND);
}
}
}
/*
* This generates keys. It's only called in the first fsort pass
*/
int
makekey(flno, top, filelist, nfiles, recbuf, bufend, ftbl)
int flno, top;
struct filelist *filelist;
int nfiles;
RECHEADER *recbuf;
u_char *bufend;
struct field *ftbl;
{
static int filenum = 0;
static FILE *dbdesc = 0;
static DBT dbkey[1], line[1];
static int overflow = 0;
int c;
if (overflow) {
overflow = enterkey(recbuf, line, bufend - (u_char *)recbuf,
ftbl);
if (overflow)
return (BUFFEND);
else
return (0);
}
for (;;) {
if (flno >= 0) {
if (!(dbdesc = fstack[flno].fp))
return (EOF);
} else if (!dbdesc) {
if (filenum >= nfiles)
return (EOF);
dbdesc = fopen(filelist->names[filenum], "r");
if (!dbdesc)
err(2, "%s", filelist->names[filenum]);
filenum++;
}
if (!(c = seq(dbdesc, line, dbkey))) {
if ((signed)line->size > bufend - recbuf->data) {
overflow = 1;
} else {
overflow = enterkey(recbuf, line,
bufend - (u_char *) recbuf, ftbl);
}
if (overflow)
return (BUFFEND);
else
return (0);
}
if (c == EOF) {
FCLOSE(dbdesc);
dbdesc = 0;
if (flno >= 0)
fstack[flno].fp = 0;
} else {
((char *) line->data)[60] = '\000';
warnx("makekey: line too long: ignoring %.100s...",
(char *)line->data);
}
}
}
/*
* get a key/line pair from fp
*/
static int
seq(fp, line, key)
FILE *fp;
DBT *key, *line;
{
static char *buf, flag = 1;
char *end, *pos;
int c;
if (flag) {
flag = 0;
buf = (char *) linebuf;
end = buf + linebuf_size;
line->data = buf;
}
pos = buf;
while ((c = getc(fp)) != EOF) {
if ((*pos++ = c) == REC_D) {
line->size = pos - buf;
return (0);
}
if (pos == end) {
linebuf_size *= 2;
linebuf = realloc(linebuf, linebuf_size);
if (!linebuf)
err(2, "realloc of linebuf to %lu bytes failed",
(unsigned long)linebuf_size);
end = linebuf + linebuf_size;
pos = linebuf + (pos - buf);
line->data = buf = (char *)linebuf;
continue;
}
}
if (pos != buf) {
*pos++ = REC_D;
line->size = pos - buf;
return (0);
} else
return (EOF);
}
/*
* write a key/line pair to a temporary file
*/
void
putrec(rec, fp)
const RECHEADER *rec;
FILE *fp;
{
EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fp);
}
/*
* write a line to output
*/
void
putline(rec, fp)
const RECHEADER *rec;
FILE *fp;
{
EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp);
}
/*
* get a record from a temporary file. (Used by merge sort.)
*/
int
geteasy(flno, top, filelist, nfiles, rec, end, dummy2)
int flno, top;
struct filelist *filelist;
int nfiles;
RECHEADER *rec;
u_char *end;
struct field *dummy2;
{
int i;
FILE *fp;
fp = fstack[flno].fp;
if ((u_char *) rec > end - sizeof(TRECHEADER))
return (BUFFEND);
if (!fread(rec, 1, sizeof(TRECHEADER), fp)) {
fclose(fp);
fstack[flno].fp = 0;
return (EOF);
}
if (end - rec->data < rec->length) {
for (i = sizeof(TRECHEADER) - 1; i >= 0; i--)
ungetc(*((char *) rec + i), fp);
return (BUFFEND);
}
fread(rec->data, rec->length, 1, fp);
return (0);
}

358
usr.bin/sort/fsort.c Normal file
View File

@ -0,0 +1,358 @@
/* $NetBSD: fsort.c,v 1.19 2001/05/15 11:19:45 jdolecek Exp $ */
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* Read in the next bin. If it fits in one segment sort it;
* otherwise refine it by segment deeper by one character,
* and try again on smaller bins. Sort the final bin at this level
* of recursion to keep the head of fstack at 0.
* After PANIC passes, abort to merge sort.
*/
#include "sort.h"
#include "fsort.h"
#ifndef lint
__RCSID("$NetBSD: fsort.c,v 1.19 2001/05/15 11:19:45 jdolecek Exp $");
__SCCSID("@(#)fsort.c 8.1 (Berkeley) 6/6/93");
#endif /* not lint */
#include <stdlib.h>
#include <string.h>
static const u_char **keylist = 0;
u_char *buffer = 0, *linebuf = 0;
size_t bufsize = DEFBUFSIZE;
size_t linebuf_size;
struct tempfile fstack[MAXFCT];
extern char *toutpath;
#define FSORTMAX 4
int PANIC = FSORTMAX;
#define MSTART (MAXFCT - MERGE_FNUM)
#define SALIGN(n) ((n+sizeof(length_t)-1) & ~(sizeof(length_t)-1))
void
fsort(binno, depth, top, filelist, nfiles, outfp, ftbl)
int binno, depth, top;
struct filelist *filelist;
int nfiles;
FILE *outfp;
struct field *ftbl;
{
const u_char **keypos;
u_char *bufend, *tmpbuf;
u_char *weights;
int ntfiles, mfct = 0, total, i, maxb, lastb, panic = 0;
int c, nelem, base;
long sizes [NBINS+1];
get_func_t get;
struct recheader *crec;
struct field tfield[2];
FILE *prevfp, *tailfp[FSORTMAX+1];
memset(tailfp, 0, sizeof(tailfp));
prevfp = outfp;
memset(tfield, 0, sizeof(tfield));
if (ftbl[0].flags & R)
tfield[0].weights = Rascii;
else
tfield[0].weights = ascii;
tfield[0].icol.num = 1;
weights = ftbl[0].weights;
if (!buffer) {
buffer = malloc(bufsize);
keylist = malloc(MAXNUM * sizeof(u_char *));
memset(keylist, 0, MAXNUM * sizeof(u_char *));
if (!SINGL_FLD) {
linebuf_size = DEFLLEN;
if ((linebuf = malloc(linebuf_size)) == NULL)
errx(2, "cannot allocate memory");
}
}
bufend = buffer + bufsize;
if (binno >= 0) {
base = top + nfiles;
get = getnext;
} else {
base = 0;
if (SINGL_FLD)
get = makeline;
else
get = makekey;
}
for (;;) {
memset(sizes, 0, sizeof(sizes));
c = ntfiles = 0;
if (binno == weights[REC_D] &&
!(SINGL_FLD && ftbl[0].flags & F)) { /* pop */
rd_append(weights[REC_D], top,
nfiles, prevfp, buffer, bufend);
break;
} else if (binno == weights[REC_D]) {
depth = 0; /* start over on flat weights */
ftbl = tfield;
weights = ftbl[0].weights;
}
while (c != EOF) {
keypos = keylist;
nelem = 0;
crec = (RECHEADER *) buffer;
do_read:
while((c = get(binno, top, filelist, nfiles, crec,
bufend, ftbl)) == 0) {
*keypos++ = crec->data + depth;
if (++nelem == MAXNUM) {
c = BUFFEND;
break;
}
crec =(RECHEADER *) ((char *) crec +
SALIGN(crec->length) + sizeof(TRECHEADER));
}
if (c == BUFFEND && nelem < MAXNUM
&& bufsize < MAXBUFSIZE) {
const u_char **keyp;
u_char *oldb = buffer;
/* buffer was too small for data, allocate
* bigger buffer */
bufsize *= 2;
buffer = realloc(buffer, bufsize);
if (!buffer) {
err(2, "failed to realloc buffer to %ld bytes",
(unsigned long) bufsize);
}
bufend = buffer + bufsize;
/* patch up keylist[] */
for(keyp = &keypos[-1]; keyp >= keylist; keyp--)
*keyp = buffer + (*keyp - oldb);
crec = (RECHEADER *) (buffer + ((u_char *)crec - oldb));
goto do_read;
}
if (c != BUFFEND && !ntfiles && !mfct) {
/* do not push */
continue;
}
/* push */
if (panic >= PANIC) {
fstack[MSTART + mfct].fp = ftmp();
if ((stable_sort)
? sradixsort(keylist, nelem,
weights, REC_D)
: radixsort(keylist, nelem,
weights, REC_D) )
err(2, NULL);
append(keylist, nelem, depth,
fstack[MSTART + mfct].fp, putrec,
ftbl);
mfct++;
/* reduce number of open files */
if (mfct == MERGE_FNUM ||(c == EOF && ntfiles)) {
/*
* Only copy extra incomplete crec
* data if there are any.
*/
int nodata = (bufend >= (u_char *)crec
&& bufend <= crec->data);
if (!nodata) {
tmpbuf = malloc(bufend -
crec->data);
memmove(tmpbuf, crec->data,
bufend - crec->data);
}
fstack[base + ntfiles].fp = ftmp();
fmerge(0, MSTART, filelist,
mfct, geteasy, fstack[base].fp,
putrec, ftbl);
ntfiles++;
mfct = 0;
if (!nodata) {
memmove(crec->data, tmpbuf,
bufend - crec->data);
free(tmpbuf);
}
}
} else {
fstack[base + ntfiles].fp= ftmp();
onepass(keylist, depth, nelem, sizes,
weights, fstack[base + ntfiles].fp);
ntfiles++;
}
}
if (!ntfiles && !mfct) { /* everything in memory--pop */
if (nelem > 1
&& ((stable_sort)
? sradixsort(keylist, nelem, weights, REC_D)
: radixsort(keylist, nelem, weights, REC_D) ))
err(2, NULL);
if (nelem > 0)
append(keylist, nelem, depth, outfp, putline, ftbl);
break; /* pop */
}
if (panic >= PANIC) {
if (!ntfiles)
fmerge(0, MSTART, filelist, mfct, geteasy,
outfp, putline, ftbl);
else
fmerge(0, base, filelist, ntfiles, geteasy,
outfp, putline, ftbl);
break;
}
total = maxb = lastb = 0; /* find if one bin dominates */
for (i = 0; i < NBINS; i++)
if (sizes[i]) {
if (sizes[i] > sizes[maxb])
maxb = i;
lastb = i;
total += sizes[i];
}
if (sizes[maxb] < max((total / 2) , BUFSIZE))
maxb = lastb; /* otherwise pop after last bin */
fstack[base].lastb = lastb;
fstack[base].maxb = maxb;
/* start refining next level. */
getnext(-1, base, NULL, ntfiles, crec, bufend, 0); /* rewind */
for (i = 0; i < maxb; i++) {
if (!sizes[i]) /* bin empty; step ahead file offset */
getnext(i, base, NULL,ntfiles, crec, bufend, 0);
else
fsort(i, depth+1, base, filelist, ntfiles,
outfp, ftbl);
}
get = getnext;
if (lastb != maxb) {
if (prevfp != outfp)
tailfp[panic] = prevfp;
prevfp = ftmp();
for (i = maxb+1; i <= lastb; i++)
if (!sizes[i])
getnext(i, base, NULL, ntfiles, crec,
bufend,0);
else
fsort(i, depth+1, base, filelist,
ntfiles, prevfp, ftbl);
}
/* sort biggest (or last) bin at this level */
depth++;
panic++;
binno = maxb;
top = base;
nfiles = ntfiles; /* so overwrite them */
}
if (prevfp != outfp) {
concat(outfp, prevfp);
fclose(prevfp);
}
for (i = panic; i >= 0; --i)
if (tailfp[i]) {
concat(outfp, tailfp[i]);
fclose(tailfp[i]);
}
/* If on top level, free our structures */
if (depth == 0) {
free(keylist), keylist = NULL;
free(buffer), buffer = NULL;
}
}
/*
* This is one pass of radix exchange, dumping the bins to disk.
*/
#define swap(a, b, t) t = a, a = b, b = t
void
onepass(a, depth, n, sizes, tr, fp)
const u_char **a;
int depth;
long n, sizes[];
u_char *tr;
FILE *fp;
{
size_t tsizes[NBINS+1];
const u_char **bin[257], ***bp, ***bpmax, **top[256], ***tp;
static int histo[256];
int *hp;
int c;
const u_char **an, *t, **aj;
const u_char **ak, *r;
memset(tsizes, 0, sizeof(tsizes));
depth += sizeof(TRECHEADER);
an = &a[n];
for (ak = a; ak < an; ak++) {
histo[c = tr[**ak]]++;
tsizes[c] += ((const RECHEADER *) (*ak -= depth))->length;
}
bin[0] = a;
bpmax = bin + 256;
tp = top, hp = histo;
for (bp = bin; bp < bpmax; bp++) {
*tp++ = *(bp+1) = *bp + (c = *hp);
*hp++ = 0;
if (c <= 1)
continue;
}
for (aj = a; aj < an; *aj = r, aj = bin[c+1])
for (r = *aj; aj < (ak = --top[c = tr[r[depth]]]) ;)
swap(*ak, r, t);
for (ak = a, c = 0; c < 256; c++) {
an = bin[c+1];
n = an - ak;
tsizes[c] += n * sizeof(TRECHEADER);
/* tell getnext how many elements in this bin, this segment. */
EWRITE(&tsizes[c], sizeof(size_t), 1, fp);
sizes[c] += tsizes[c];
for (; ak < an; ++ak)
putrec((const RECHEADER *) *ak, fp);
}
}

77
usr.bin/sort/fsort.h Normal file
View File

@ -0,0 +1,77 @@
/* $NetBSD: fsort.h,v 1.8 2001/02/19 20:50:17 jdolecek Exp $ */
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)fsort.h 8.1 (Berkeley) 6/6/93
*/
#define BUFSIZE (1<<20)
#define MAXNUM 131072 /* low guess at average record count */
#define BUFFEND (EOF-2)
#define MAXFCT 1000
#define DEFLLEN 65536
/*
* Default (initial) and maximum size of record buffer for fsort().
* Note that no more than MAXNUM records are stored in the buffer,
* even if the buffer is not full yet.
*/
#define DEFBUFSIZE (1 << 20) /* 1MB */
#define MAXBUFSIZE (8 << 20) /* 10 MB */
/*
* Number of files merge() can merge in one pass.
* This should be power of two so that it's possible to use this value
* for rouding.
*/
#define MERGE_FNUM 16
extern u_char *buffer, *linebuf;
extern size_t bufsize, linebuf_size;
/* temp files in the stack have a file descriptor, a largest bin (maxb)
* which becomes the last non-empty bin (lastb) when the actual largest
* bin is smaller than max(half the total file, BUFSIZE)
* Max_o is the offset of maxb so it can be sought after the other bins
* are sorted.
*/
struct tempfile {
FILE *fp;
u_char maxb;
u_char lastb;
int max_o;
};
extern struct tempfile fstack[MAXFCT];

355
usr.bin/sort/init.c Normal file
View File

@ -0,0 +1,355 @@
/* $NetBSD: init.c,v 1.5 2001/02/19 20:50:17 jdolecek Exp $ */
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "sort.h"
#ifndef lint
__RCSID("$NetBSD: init.c,v 1.5 2001/02/19 20:50:17 jdolecek Exp $");
__SCCSID("@(#)init.c 8.1 (Berkeley) 6/6/93");
#endif /* not lint */
#include <ctype.h>
#include <string.h>
static void insertcol __P((struct field *));
static const char *setcolumn __P((const char *, struct field *, int));
int setfield __P((const char *, struct field *, int));
extern struct coldesc clist[(ND+1)*2];
extern int ncols;
u_char gweights[NBINS];
/*
* masks of ignored characters. Alltable is 256 ones.
*/
static u_char alltable[NBINS], dtable[NBINS], itable[NBINS];
/*
* clist (list of columns which correspond to one or more icol or tcol)
* is in increasing order of columns.
* Fields are kept in increasing order of fields.
*/
/*
* keep clist in order--inserts a column in a sorted array
*/
static void
insertcol(field)
struct field *field;
{
int i;
for (i = 0; i < ncols; i++)
if (field->icol.num <= clist[i].num)
break;
if (field->icol.num != clist[i].num) {
memmove(clist+i+1, clist+i, sizeof(COLDESC)*(ncols-i));
clist[i].num = field->icol.num;
ncols++;
}
if (field->tcol.num && field->tcol.num != field->icol.num) {
for (i = 0; i < ncols; i++)
if (field->tcol.num <= clist[i].num)
break;
if (field->tcol.num != clist[i].num) {
memmove(clist+i+1, clist+i,sizeof(COLDESC)*(ncols-i));
clist[i].num = field->tcol.num;
ncols++;
}
}
}
/*
* matches fields with the appropriate columns--n^2 but who cares?
*/
void
fldreset(fldtab)
struct field *fldtab;
{
int i;
fldtab[0].tcol.p = clist+ncols-1;
for (++fldtab; fldtab->icol.num; ++fldtab) {
for (i = 0; fldtab->icol.num != clist[i].num; i++)
;
fldtab->icol.p = clist + i;
if (!fldtab->tcol.num)
continue;
for (i = 0; fldtab->tcol.num != clist[i].num; i++)
;
fldtab->tcol.p = clist + i;
}
}
/*
* interprets a column in a -k field
*/
static const char *
setcolumn(pos, cur_fld, gflag)
const char *pos;
struct field *cur_fld;
int gflag;
{
struct column *col;
int tmp;
col = cur_fld->icol.num ? (&(*cur_fld).tcol) : (&(*cur_fld).icol);
pos += sscanf(pos, "%d", &(col->num));
while (isdigit(*pos))
pos++;
if (col->num <= 0 && !(col->num == 0 && col == &(cur_fld->tcol)))
errx(2, "field numbers must be positive");
if (*pos == '.') {
if (!col->num)
errx(2, "cannot indent end of line");
++pos;
pos += sscanf(pos, "%d", &(col->indent));
while (isdigit(*pos))
pos++;
if (&cur_fld->icol == col)
col->indent--;
if (col->indent < 0)
errx(2, "illegal offset");
}
if (optval(*pos, cur_fld->tcol.num))
while ((tmp = optval(*pos, cur_fld->tcol.num))) {
cur_fld->flags |= tmp;
pos++;
}
if (cur_fld->icol.num == 0)
cur_fld->icol.num = 1;
return (pos);
}
int
setfield(pos, cur_fld, gflag)
const char *pos;
struct field *cur_fld;
int gflag;
{
static int nfields = 0;
int tmp;
if (++nfields == ND)
errx(2, "too many sort keys. (Limit is %d)", ND-1);
cur_fld->weights = ascii;
cur_fld->mask = alltable;
pos = setcolumn(pos, cur_fld, gflag);
if (*pos == '\0') /* key extends to EOL. */
cur_fld->tcol.num = 0;
else {
if (*pos != ',')
errx(2, "illegal field descriptor");
setcolumn((++pos), cur_fld, gflag);
}
if (!cur_fld->flags)
cur_fld->flags = gflag;
tmp = cur_fld->flags;
/*
* Assign appropriate mask table and weight table.
* If the global weights are reversed, the local field
* must be "re-reversed".
*/
if (((tmp & R) ^ (gflag & R)) && (tmp & F))
cur_fld->weights = RFtable;
else if (tmp & F)
cur_fld->weights = Ftable;
else if ((tmp & R) ^ (gflag & R))
cur_fld->weights = Rascii;
if (tmp & I)
cur_fld->mask = itable;
else if (tmp & D)
cur_fld->mask = dtable;
cur_fld->flags |= (gflag & (BI | BT));
if (!cur_fld->tcol.indent) /* BT has no meaning at end of field */
cur_fld->flags &= ~BT;
if (cur_fld->tcol.num && !(!(cur_fld->flags & BI)
&& cur_fld->flags & BT) && (cur_fld->tcol.num <= cur_fld->icol.num
&& cur_fld->tcol.indent < cur_fld->icol.indent))
errx(2, "fields out of order");
insertcol(cur_fld);
return (cur_fld->tcol.num);
}
int
optval(desc, tcolflag)
int desc, tcolflag;
{
switch(desc) {
case 'b':
if (!tcolflag)
return (BI);
else
return (BT);
case 'd': return (D);
case 'f': return (F);
case 'i': return (I);
case 'n': return (N);
case 'r': return (R);
default: return (0);
}
}
void
fixit(argc, argv)
int *argc;
char **argv;
{
int i, j, v, w, x;
static char vbuf[ND*20], *vpos, *tpos;
vpos = vbuf;
for (i = 1; i < *argc; i++) {
if (argv[i][0] == '+') {
tpos = argv[i]+1;
argv[i] = vpos;
vpos += sprintf(vpos, "-k");
tpos += sscanf(tpos, "%d", &v);
while (isdigit(*tpos))
tpos++;
vpos += sprintf(vpos, "%d", v+1);
if (*tpos == '.') {
++tpos;
tpos += sscanf(tpos, "%d", &x);
vpos += sprintf(vpos, ".%d", x+1);
}
while (*tpos)
*vpos++ = *tpos++;
vpos += sprintf(vpos, ",");
if (argv[i+1] &&
argv[i+1][0] == '-' && isdigit(argv[i+1][1])) {
tpos = argv[i+1] + 1;
tpos += sscanf(tpos, "%d", &w);
while (isdigit(*tpos))
tpos++;
x = 0;
if (*tpos == '.') {
++tpos;
tpos += sscanf(tpos, "%d", &x);
while (isdigit(*tpos))
tpos++;
}
if (x) {
vpos += sprintf(vpos, "%d", w+1);
vpos += sprintf(vpos, ".%d", x);
} else
vpos += sprintf(vpos, "%d", w);
while (*tpos)
*vpos++ = *tpos++;
for (j= i+1; j < *argc; j++)
argv[j] = argv[j+1];
*argc -= 1;
}
}
}
}
/*
* ascii, Rascii, Ftable, and RFtable map
* REC_D -> REC_D; {not REC_D} -> {not REC_D}.
* gweights maps REC_D -> (0 or 255); {not REC_D} -> {not gweights[REC_D]}.
* Note: when sorting in forward order, to encode character zero in a key,
* use \001\001; character 1 becomes \001\002. In this case, character 0
* is reserved for the field delimiter. Analagously for -r (fld_d = 255).
* Note: this is only good for ASCII sorting. For different LC 's,
* all bets are off. See also num_init in number.c
*/
void
settables(gflags)
int gflags;
{
u_char *wts;
int i, incr;
for (i=0; i < 256; i++) {
ascii[i] = i;
if (i > REC_D && i < 255 - REC_D+1)
Rascii[i] = 255 - i + 1;
else
Rascii[i] = 255 - i;
if (islower(i)) {
Ftable[i] = Ftable[toupper(i)];
RFtable[i] = RFtable[toupper(i)];
} else if (REC_D>= 'A' && REC_D < 'Z' && i < 'a' && i > REC_D) {
Ftable[i] = i + 1;
RFtable[i] = Rascii[i] - 1;
} else {
Ftable[i] = i;
RFtable[i] = Rascii[i];
}
alltable[i] = 1;
if (i == '\n' || isprint(i))
itable[i] = 1;
else
itable[i] = 0;
if (i == '\n' || i == '\t' || i == ' ' || isalnum(i))
dtable[i] = 1;
else
dtable[i] = 0;
}
Rascii[REC_D] = RFtable[REC_D] = REC_D;
if (isupper(REC_D))
Ftable[tolower(REC_D)]++;
if ((gflags & R) && !((gflags & F) && SINGL_FLD))
wts = Rascii;
else if (!((gflags & F) && SINGL_FLD))
wts = ascii;
else if (gflags & R)
wts = RFtable;
else
wts = Ftable;
memmove(gweights, wts, sizeof(gweights));
incr = (gflags & R) ? -1 : 1;
for (i = 0; i < REC_D; i++)
gweights[i] += incr;
gweights[REC_D] = ((gflags & R) ? 255 : 0);
if (SINGL_FLD && (gflags & F)) {
for (i = 0; i < REC_D; i++) {
ascii[i] += incr;
Rascii[i] += incr;
}
ascii[REC_D] = Rascii[REC_D] = gweights[REC_D];
}
}

390
usr.bin/sort/msort.c Normal file
View File

@ -0,0 +1,390 @@
/* $NetBSD: msort.c,v 1.9 2001/01/19 10:50:31 jdolecek Exp $ */
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "sort.h"
#include "fsort.h"
#ifndef lint
__RCSID("$NetBSD: msort.c,v 1.9 2001/01/19 10:50:31 jdolecek Exp $");
__SCCSID("@(#)msort.c 8.1 (Berkeley) 6/6/93");
#endif /* not lint */
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
/* Subroutines using comparisons: merge sort and check order */
#define DELETE (1)
typedef struct mfile {
u_char *end;
short flno;
struct recheader rec[1];
} MFILE;
static u_char *wts, *wts1 = NULL;
static int cmp __P((RECHEADER *, RECHEADER *));
static int insert __P((struct mfile **, struct mfile **, int, int));
void
fmerge(binno, top, filelist, nfiles, get, outfp, fput, ftbl)
int binno, top;
struct filelist *filelist;
int nfiles;
get_func_t get;
FILE *outfp;
put_func_t fput;
struct field *ftbl;
{
FILE *tout;
int i, j, last;
put_func_t put;
struct tempfile *l_fstack;
wts = ftbl->weights;
if (!UNIQUE && SINGL_FLD && ftbl->flags & F)
wts1 = (ftbl->flags & R) ? Rascii : ascii;
if (!buffer) {
buffer = malloc(bufsize);
if (!buffer)
err(2, "fmerge(): realloc");
if (!linebuf && !SINGL_FLD) {
linebuf_size = DEFLLEN;
linebuf = malloc(linebuf_size);
}
}
if (binno >= 0)
l_fstack = fstack + top;
else
l_fstack = fstack;
while (nfiles) {
put = putrec;
for (j = 0; j < nfiles; j += MERGE_FNUM) {
if (nfiles <= MERGE_FNUM) {
tout = outfp;
put = fput;
}
else
tout = ftmp();
last = min(MERGE_FNUM, nfiles - j);
if (binno < 0) {
for (i = 0; i < last; i++)
if (!(l_fstack[i+MAXFCT-1-MERGE_FNUM].fp =
fopen(filelist->names[j+i], "r")))
err(2, "%s",
filelist->names[j+i]);
merge(MAXFCT-1-MERGE_FNUM, last, get, tout, put, ftbl);
} else {
for (i = 0; i< last; i++)
rewind(l_fstack[i+j].fp);
merge(top+j, last, get, tout, put, ftbl);
}
if (nfiles > MERGE_FNUM)
l_fstack[j/MERGE_FNUM].fp = tout;
}
nfiles = (nfiles + (MERGE_FNUM - 1)) / MERGE_FNUM;
if (nfiles == 1)
nfiles = 0;
if (binno < 0) {
binno = 0;
get = geteasy;
top = 0;
}
}
}
void
merge(infl0, nfiles, get, outfp, put, ftbl)
int infl0, nfiles;
get_func_t get;
put_func_t put;
FILE *outfp;
struct field *ftbl;
{
int c, i, j, nf = nfiles;
struct mfile *flist[MERGE_FNUM], *cfile;
size_t availsz = bufsize;
static void *bufs[MERGE_FNUM+1];
static size_t bufs_sz[MERGE_FNUM+1];
/*
* We need nfiles + 1 buffers. One is 'buffer', the
* rest needs to be allocated.
*/
bufs[0] = buffer;
bufs_sz[0] = bufsize;
for(i=1; i < nfiles+1; i++) {
if (bufs[i])
continue;
bufs[i] = malloc(DEFLLEN);
if (!bufs[i])
err(2, "merge(): realloc");
bufs_sz[i] = DEFLLEN;
}
for (i = j = 0; i < nfiles; i++) {
cfile = (struct mfile *) bufs[j];
cfile->flno = infl0 + j;
cfile->end = (u_char *) bufs[j] + bufs_sz[j];
for (c = 1; c == 1;) {
if (EOF == (c = get(cfile->flno, 0, NULL, nfiles,
cfile->rec, cfile->end, ftbl))) {
--i;
--nfiles;
break;
}
if (c == BUFFEND) {
cfile = realloc(bufs[j], bufs_sz[j] *= 2);
bufs[j] = (void *) cfile;
if (!cfile)
err(2, "merge(): realloc");
cfile->end = (u_char *)cfile + bufs_sz[j];
c = 1;
continue;
}
if (i)
c = insert(flist, &cfile, i, !DELETE);
else
flist[0] = cfile;
}
j++;
}
cfile = (struct mfile *) bufs[nf];
cfile->flno = flist[0]->flno;
cfile->end = (u_char *) cfile + bufs_sz[nf];
while (nfiles) {
for (c = 1; c == 1;) {
if (EOF == (c = get(cfile->flno, 0, NULL, nfiles,
cfile->rec, cfile->end, ftbl))) {
put(flist[0]->rec, outfp);
memmove(flist, flist + 1,
sizeof(MFILE *) * (--nfiles));
cfile->flno = flist[0]->flno;
break;
}
if (c == BUFFEND) {
char *oldbuf = (char *) cfile;
availsz = (char *) cfile->end - oldbuf;
availsz *= 2;
cfile = realloc(oldbuf, availsz);
for(i=0; i < nf+1; i++) {
if (bufs[i] == oldbuf) {
bufs[i] = (char *)cfile;
bufs_sz[i] = availsz;
break;
}
}
if (!cfile)
err(2, "merge: realloc");
cfile->end = (u_char *)cfile + availsz;
c = 1;
continue;
}
if (!(c = insert(flist, &cfile, nfiles, DELETE)))
put(cfile->rec, outfp);
}
}
if (bufs_sz[0] > bufsize) {
buffer = bufs[0];
bufsize = bufs_sz[0];
}
}
/*
* if delete: inserts *rec in flist, deletes flist[0], and leaves it in *rec;
* otherwise just inserts *rec in flist.
*/
static int
insert(flist, rec, ttop, delete)
struct mfile **flist, **rec;
int delete, ttop; /* delete = 0 or 1 */
{
struct mfile *tmprec = *rec;
int mid, top = ttop, bot = 0, cmpv = 1;
for (mid = top/2; bot +1 != top; mid = (bot+top)/2) {
cmpv = cmp(tmprec->rec, flist[mid]->rec);
if (cmpv < 0)
top = mid;
else if (cmpv > 0)
bot = mid;
else {
if (UNIQUE)
break;
if (stable_sort) {
/*
* Apply sort by fileno, to give priority
* to earlier specified files, hence providing
* more stable sort.
* If fileno is same, the new record should
* be put _after_ the previous entry.
*/
cmpv = tmprec->flno - flist[mid]->flno;
if (cmpv >= 0)
bot = mid;
else /* cmpv == 0 */
bot = mid - 1;
} else {
/* non-stable sort */
bot = mid - 1;
}
break;
}
}
if (delete) {
if (UNIQUE) {
if (!bot && cmpv)
cmpv = cmp(tmprec->rec, flist[0]->rec);
if (!cmpv)
return (1);
}
tmprec = flist[0];
if (bot)
memmove(flist, flist+1, bot * sizeof(MFILE **));
flist[bot] = *rec;
*rec = tmprec;
(*rec)->flno = flist[0]->flno;
return (0);
} else {
if (!bot && !(UNIQUE && !cmpv)) {
cmpv = cmp(tmprec->rec, flist[0]->rec);
if (cmpv < 0)
bot = -1;
}
if (UNIQUE && !cmpv)
return (1);
bot++;
memmove(flist + bot+1, flist + bot,
(ttop - bot) * sizeof(MFILE **));
flist[bot] = *rec;
return (0);
}
}
/*
* check order on one file
*/
void
order(filelist, get, ftbl)
struct filelist *filelist;
get_func_t get;
struct field *ftbl;
{
u_char *crec_end, *prec_end, *trec_end;
int c;
RECHEADER *crec, *prec, *trec;
if (!SINGL_FLD)
linebuf = malloc(DEFLLEN);
buffer = malloc(2 * (DEFLLEN + sizeof(TRECHEADER)));
crec = (RECHEADER *) buffer;
crec_end = buffer + DEFLLEN + sizeof(TRECHEADER);
prec = (RECHEADER *) (buffer + DEFLLEN + sizeof(TRECHEADER));
prec_end = buffer + 2*(DEFLLEN + sizeof(TRECHEADER));
wts = ftbl->weights;
if (SINGL_FLD && (ftbl->flags & F))
wts1 = (ftbl->flags & R) ? Rascii : ascii;
else
wts1 = NULL;
if (0 == get(-1, 0, filelist, 1, prec, prec_end, ftbl))
while (0 == get(-1, 0, filelist, 1, crec, crec_end, ftbl)) {
if (0 < (c = cmp(prec, crec))) {
crec->data[crec->length-1] = 0;
errx(1, "found disorder: %s", crec->data+crec->offset);
}
if (UNIQUE && !c) {
crec->data[crec->length-1] = 0;
errx(1, "found non-uniqueness: %s",
crec->data+crec->offset);
}
/*
* Swap pointers so that this record is on place pointed
* to by prec and new record is read to place pointed to by
* crec.
*/
trec = prec;
prec = crec;
crec = trec;
trec_end = prec_end;
prec_end = crec_end;
crec_end = trec_end;
}
exit(0);
}
static int
cmp(rec1, rec2)
RECHEADER *rec1, *rec2;
{
int r;
u_char *pos1, *pos2, *end;
u_char *cwts;
for (cwts = wts; cwts; cwts = (cwts == wts1 ? NULL : wts1)) {
pos1 = rec1->data;
pos2 = rec2->data;
if (!SINGL_FLD && (UNIQUE || stable_sort))
end = pos1 + min(rec1->offset, rec2->offset);
else
end = pos1 + min(rec1->length, rec2->length);
for (; pos1 < end; ) {
if ((r = cwts[*pos1++] - cwts[*pos2++]))
return (r);
}
}
return (0);
}

41
usr.bin/sort/pathnames.h Normal file
View File

@ -0,0 +1,41 @@
/* $NetBSD: pathnames.h,v 1.2 2000/10/07 18:37:10 bjh21 Exp $ */
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)pathnames.h 8.1 (Berkeley) 6/6/93
*/
#define _PATH_STDIN "/dev/stdin"

441
usr.bin/sort/sort.1 Normal file
View File

@ -0,0 +1,441 @@
.\" $NetBSD: sort.1,v 1.17 2001/12/08 19:16:07 wiz Exp $
.\"
.\" Copyright (c) 1991, 1993
.\" The Regents of the University of California. All rights reserved.
.\"
.\" This code is derived from software contributed to Berkeley by
.\" the Institute of Electrical and Electronics Engineers, Inc.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\" 3. All advertising materials mentioning features or use of this software
.\" must display the following acknowledgement:
.\" This product includes software developed by the University of
.\" California, Berkeley and its contributors.
.\" 4. Neither the name of the University nor the names of its contributors
.\" may be used to endorse or promote products derived from this software
.\" without specific prior written permission.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" @(#)sort.1 8.1 (Berkeley) 6/6/93
.\"
.Dd January 13, 2001
.Dt SORT 1
.Os
.Sh NAME
.Nm sort
.Nd sort or merge text files
.Sh SYNOPSIS
.Nm sort
.Op Fl cmubdfHinrsS
.Op Fl t Ar char
.Op Fl R Ar char
.Oo
.Fl k
.Ar field1 Ns Op Li \&, Ns Ar field2
.Oc
.Op Fl T Ar dir
.Op Fl o Ar output
.Op Ar
.Sh DESCRIPTION
The
.Nm
utility sorts text files by lines.
Comparisons are based on one or more sort keys extracted
from each line of input, and are performed lexicographically.
By default, if keys are not given,
.Nm
regards each input line as a single field.
.Pp
The following options are available:
.Bl -tag -width Fl
.It Fl c
Check that the single input file is sorted.
If the file is not sorted,
.Nm
produces the appropriate error messages and exits with code 1; otherwise,
.Nm
returns 0.
.Nm
.Fl c
produces no output.
.It Fl m
Merge only; the input files are assumed to be pre-sorted.
.It Fl o Ar output
The argument given is the name of an
.Ar output
file to be used instead of the standard output.
This file can be the same as one of the input files.
.It Fl T Ar dir
Use
.Ar dir
as the directory for temporary files.
The default is the value specified in the environment variable
.Ev TMPDIR or
.Pa /tmp
if
.Ev TMPDIR
is not defined.
.It Fl u
Unique: suppress all but one in each set of lines having equal keys.
If used with the
.Fl c
option, check that there are no lines with duplicate keys.
.El
.Pp
The following options override the default ordering rules.
When ordering options appear independent of key field
specifications, the requested field ordering rules are
applied globally to all sort keys.
When attached to a specific key (see
.Fl k ) ,
the ordering options override
all global ordering options for that key.
.Bl -tag -width Fl
.It Fl d
Only blank space and alphanumeric characters
.\" according
.\" to the current setting of LC_CTYPE
are used
in making comparisons.
.It Fl f
Considers all lowercase characters that have uppercase
equivalents to be the same for purposes of comparison.
.It Fl i
Ignore all non-printable characters.
.It Fl n
An initial numeric string, consisting of optional blank space, optional
minus sign, and zero or more digits (including decimal point)
.\" with
.\" optional radix character and thousands
.\" separator
.\" (as defined in the current locale),
is sorted by arithmetic value.
(The
.Fl n
option no longer implies the
.Fl b
option.)
.It Fl r
Reverse the sense of comparisons.
.It Fl S
Don't use stable sort.
Default is to use stable sort.
.It Fl s
Use stable sort.
This is the default.
Provided for compatiblity with other
.Nm
implementations only.
.It Fl H
Use a merge sort instead of a radix sort.
This option should be used for files larger than 60Mb.
.El
.Pp
The treatment of field separators can be altered using these options:
.Bl -tag -width Fl
.It Fl b
Ignores leading blank space when determining the start
and end of a restricted sort key.
A
.Fl b
option specified before the first
.Fl k
option applies globally to all
.Fl k
options.
Otherwise, the
.Fl b
option can be attached independently to each
.Ar field
argument of the
.Fl k
option (see below).
Note that the
.Fl b
option has no effect unless key fields are specified.
.It Fl t Ar char
.Ar char
is used as the field separator character.
The initial
.Ar char
is not considered to be part of a field when determining
key offsets (see below).
Each occurrence of
.Ar char
is significant (for example,
.Dq Ar charchar
delimits an empty field).
If
.Fl t
is not specified, the default field separator is a sequence of
blank-space characters, and consecutive blank spaces do
.Em not
delimit an empty field; further, the initial blank space
.Em is
considered part of a field when determining key offsets.
.It Fl R Ar char
.Ar char
is used as the record separator character.
This should be used with discretion;
.Fl R Ar \*[Lt]alphanumeric\*[Gt]
usually produces undesirable results.
The default record separator is newline.
.It Xo
.Fl k
.Ar field1 Ns Op Li \&, Ns Ar field2
.Xc
Designates the starting position,
.Ar field1 ,
and optional ending position,
.Ar field2 ,
of a key field.
The
.Fl k
option replaces the obsolescent options
.Cm \(pl Ns Ar pos1
and
.Fl Ns Ar pos2 .
.El
.Pp
The following operands are available:
.Bl -tag -width Ar
.It Ar file
The pathname of a file to be sorted, merged, or checked.
If no
.Ar file
operands are specified, or if
a
.Ar file
operand is
.Fl ,
the standard input is used.
.El
.Pp
A field is defined as a minimal sequence of characters followed by a
field separator or a newline character.
By default, the first
blank space of a sequence of blank spaces acts as the field separator.
All blank spaces in a sequence of blank spaces are considered
as part of the next field; for example, all blank spaces at
the beginning of a line are considered to be part of the
first field.
.Pp
Fields are specified
by the
.Fl k
.Ar field1 Ns Op \&, Ns Ar field2
argument.
A missing
.Ar field2
argument defaults to the end of a line.
.Pp
The arguments
.Ar field1
and
.Ar field2
have the form
.Ar m Ns Li \&. Ns Ar n
and can be followed by one or more of the letters
.Cm b , d , f , i ,
.Cm n ,
and
.Cm r ,
which correspond to the options discussed above.
A
.Ar field1
position specified by
.Ar m Ns Li \&. Ns Ar n
.Pq Ar m , n No \*[Gt] 0
is interpreted as the
.Ar n Ns th
character in the
.Ar m Ns th
field.
A missing
.Li \&. Ns Ar n
in
.Ar field1
means
.Ql \&.1 ,
indicating the first character of the
.Ar m Ns th
field; if the
.Fl b
option is in effect,
.Ar n
is counted from the first non-blank character in the
.Ar m Ns th
field;
.Ar m Ns Li \&.1b
refers to the first non-blank character in the
.Ar m Ns th
field.
.Pp
A
.Ar field2
position specified by
.Ar m Ns Li \&. Ns Ar n
is interpreted as
the
.Ar n Ns th
character (including separators) of the
.Ar m Ns th
field.
A missing
.Li \&. Ns Ar n
indicates the last character of the
.Ar m Ns th
field;
.Ar m
= \&0
designates the end of a line.
Thus the option
.Fl k
.Sm off
.Xo
.Ar v Li \&. Ar x Li \&,
.Ar w Li \&. Ar y
.Xc
.Sm on
is synonymous with the obsolescent option
.Sm off
.Cm \(pl Ar v-\&1 Li \&. Ar x-\&1
.Fl Ar w-\&1 Li \&. Ar y ;
.Sm on
when
.Ar y
is omitted,
.Fl k
.Sm off
.Ar v Li \&. Ar x Li \&, Ar w
.Sm on
is synonymous with
.Sm off
.Cm \(pl Ar v-\&1 Li \&. Ar x-\&1
.Fl Ar w+1 Li \&.0 .
.Sm on
The obsolescent
.Cm \(pl Ns Ar pos1
.Fl Ns Ar pos2
option is still supported, except for
.Fl Ns Ar w Ns Li \&.0b ,
which has no
.Fl k
equivalent.
.Sh RETURN VALUES
Sort exits with one of the following values:
.Bl -tag -width flag -compact
.It 0
Normal behavior.
.It 1
On disorder (or non-uniqueness) with the
.Fl c
option
.It 2
An error occurred.
.El
.Sh ENVIRONMENT
If the following environment variable exists, it is utilized by
.Nm "" .
.Bl -tag -width Ev
.It Ev TMPDIR
.Nm
uses the contents of the
.Ev TMPDIR
environment variable as the path in which to store
temporary files.
.El
.Sh FILES
.Bl -tag -width outputNUMBER+some -compact
.It Pa /tmp/sort.*
Default temporary files.
.It Pa Ar output Ns NUMBER
Temporary file which is used for output if
.Ar output
already exists.
Once sorting is finished, this file replaces
.Ar output
(via
.Xr link 2
and
.Xr unlink 2 ) .
.El
.Sh SEE ALSO
.Xr comm 1 ,
.Xr join 1 ,
.Xr uniq 1 ,
.Xr qsort 3 ,
.Xr radixsort 3
.Sh HISTORY
A
.Nm
command appeared in
.At v5 .
This
.Nm
implementation appeared in
.Bx 4.4
and is used since
.Nx 1.6 .
.Sh BUGS
To sort files larger than 60Mb, use
.Nm
.Fl H ;
files larger than 704Mb must be sorted in smaller pieces, then merged.
.Sh NOTES
This
.Nm
has no limits on input line length (other than imposed by available
memory) or any restrictions on bytes allowed within lines.
.Pp
To protect data
.Nm
.Fl o
calls
.Xr link 2
and
.Xr unlink 2 ,
and thus fails on protected directories.
.Pp
Input files should be text files.
If file doesn't end with record separator (which is typically newline), the
.Nm
utility silently supplies one.
.Pp
The current
.Nm
uses lexicographic radix sorting, which requires
that sort keys be kept in memory (as opposed to previous versions which used quick
and merge sorts and did not.)
Thus performance depends highly on efficient choice of sort keys, and the
.Fl b
option and the
.Ar field2
argument of the
.Fl k
option should be used whenever possible.
Similarly,
.Nm
.Fl k1f
is equivalent to
.Nm
.Fl f
and may take twice as long.

332
usr.bin/sort/sort.c Normal file
View File

@ -0,0 +1,332 @@
/* $NetBSD: sort.c,v 1.26 2001/04/30 00:25:09 ross Exp $ */
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/* Sort sorts a file using an optional user-defined key.
* Sort uses radix sort for internal sorting, and allows
* a choice of merge sort and radix sort for external sorting.
*/
#include "sort.h"
#include "fsort.h"
#include "pathnames.h"
#ifndef lint
__COPYRIGHT("@(#) Copyright (c) 1993\n\
The Regents of the University of California. All rights reserved.\n");
#endif /* not lint */
#ifndef lint
__RCSID("$NetBSD: sort.c,v 1.26 2001/04/30 00:25:09 ross Exp $");
__SCCSID("@(#)sort.c 8.1 (Berkeley) 6/6/93");
#endif /* not lint */
#include <sys/types.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <paths.h>
#include <signal.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <locale.h>
int REC_D = '\n';
u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */
/*
* weight tables. Gweights is one of ascii, Rascii..
* modified to weight rec_d = 0 (or 255)
*/
u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS];
int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0;
struct coldesc clist[(ND+1)*2];
int ncols = 0;
extern struct coldesc clist[(ND+1)*2];
extern int ncols;
/*
* Default to stable sort.
*/
int stable_sort = 1;
char toutpath[MAXPATHLEN];
const char *tmpdir; /* where temporary files should be put */
static void cleanup __P((void));
static void onsignal __P((int));
static void usage __P((const char *));
static void many_files __P((void));
int main __P((int argc, char **argv));
int
main(argc, argv)
int argc;
char *argv[];
{
get_func_t get;
int ch, i, stdinflag = 0, tmp = 0;
char cflag = 0, mflag = 0;
char *outfile, *outpath = 0;
struct field fldtab[ND+2], *ftpos;
struct filelist filelist;
FILE *outfp = NULL;
setlocale(LC_ALL, "");
memset(fldtab, 0, (ND+2)*sizeof(struct field));
memset(d_mask, 0, NBINS);
d_mask[REC_D = '\n'] = REC_D_F;
SINGL_FLD = SEP_FLAG = 0;
d_mask['\t'] = d_mask[' '] = BLANK | FLD_D;
ftpos = fldtab;
many_files();
fixit(&argc, argv);
if (!(tmpdir = getenv("TMPDIR")))
tmpdir = _PATH_TMP;
while ((ch = getopt(argc, argv, "bcdfik:mHno:rR:sSt:T:ux")) != -1) {
switch (ch) {
case 'b':
fldtab->flags |= BI | BT;
break;
case 'c':
cflag = 1;
break;
case 'd': case 'f': case 'i': case 'n': case 'r':
tmp |= optval(ch, 0);
if ((tmp & R) && (tmp & F))
fldtab->weights = RFtable;
else if (tmp & F)
fldtab->weights = Ftable;
else if (tmp & R)
fldtab->weights = Rascii;
fldtab->flags |= tmp;
break;
case 'H':
PANIC = 0;
break;
case 'k':
setfield(optarg, ++ftpos, fldtab->flags);
break;
case 'm':
mflag = 1;
break;
case 'o':
outpath = optarg;
break;
case 's':
/* for GNU sort compatibility (this is our default) */
stable_sort = 1;
break;
case 'S':
stable_sort = 0;
break;
case 't':
if (SEP_FLAG)
usage("multiple field delimiters");
SEP_FLAG = 1;
d_mask[' '] &= ~FLD_D;
d_mask['\t'] &= ~FLD_D;
d_mask[(u_char)*optarg] |= FLD_D;
if (d_mask[(u_char)*optarg] & REC_D_F)
errx(2, "record/field delimiter clash");
break;
case 'R':
if (REC_D != '\n')
usage("multiple record delimiters");
if ('\n' == (REC_D = *optarg))
break;
d_mask['\n'] = d_mask[' '];
d_mask[REC_D] = REC_D_F;
break;
case 'T':
/* -T tmpdir */
tmpdir = optarg;
break;
case 'u':
UNIQUE = 1;
break;
case '?':
default:
usage(NULL);
}
}
if (cflag && argc > optind+1)
errx(2, "too many input files for -c option");
if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) {
outpath = argv[argc-1];
argc -= 2;
}
if (mflag && argc - optind > (MAXFCT - (16+1))*16)
errx(2, "too many input files for -m option");
for (i = optind; i < argc; i++) {
/* allow one occurrence of /dev/stdin */
if (!strcmp(argv[i], "-") || !strcmp(argv[i], _PATH_STDIN)) {
if (stdinflag)
warnx("ignoring extra \"%s\" in file list",
argv[i]);
else
stdinflag = 1;
/* change to /dev/stdin if '-' */
if (argv[i][0] == '-')
argv[i] = _PATH_STDIN;
} else if ((ch = access(argv[i], R_OK)))
err(2, "%s", argv[i]);
}
if (!(fldtab->flags & (I|D|N) || fldtab[1].icol.num)) {
SINGL_FLD = 1;
fldtab[0].icol.num = 1;
} else {
if (!fldtab[1].icol.num) {
fldtab[0].flags &= ~(BI|BT);
setfield("1", ++ftpos, fldtab->flags);
}
fldreset(fldtab);
fldtab[0].flags &= ~F;
}
settables(fldtab[0].flags);
num_init();
fldtab->weights = gweights;
if (optind == argc) {
static const char * const names[] = { _PATH_STDIN, NULL };
filelist.names = names;
optind--;
} else
filelist.names = (const char * const *) &argv[optind];
if (SINGL_FLD)
get = makeline;
else
get = makekey;
if (cflag) {
order(&filelist, get, fldtab);
/* NOT REACHED */
}
if (!outpath) {
(void)snprintf(toutpath,
sizeof(toutpath), "%sstdout", _PATH_DEV);
outfile = outpath = toutpath;
outfp = stdout;
} else if (!(ch = access(outpath, 0)) &&
strncmp(_PATH_DEV, outpath, 5)) {
static const struct sigaction act =
{ onsignal, {{0}}, SA_RESTART | SA_RESETHAND };
static const int sigtable[] = {SIGHUP, SIGINT, SIGPIPE,
SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, 0};
int outfd;
errno = 0;
if (access(outpath, W_OK))
err(2, "%s", outpath);
(void)snprintf(toutpath, sizeof(toutpath), "%sXXXXXX",
outpath);
if ((outfd = mkstemp(toutpath)) == -1)
err(2, "Cannot create temporary file `%s'", toutpath);
if ((outfp = fdopen(outfd, "w")) == NULL)
err(2, "Cannot open temporary file `%s'", toutpath);
outfile = toutpath;
(void)atexit(cleanup);
for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */
sigaction(sigtable[i], &act, 0);
} else
outfile = outpath;
if (outfp == NULL && (outfp = fopen(outfile, "w")) == NULL)
err(2, "output file %s", outfile);
if (mflag) {
fmerge(-1, 0, &filelist, argc-optind, get, outfp, putline,
fldtab);
} else
fsort(-1, 0, 0, &filelist, argc-optind, outfp, fldtab);
if (outfile != outpath) {
if (access(outfile, 0))
err(2, "%s", outfile);
(void)unlink(outpath);
if (link(outfile, outpath))
err(2, "cannot link %s: output left in %s",
outpath, outfile);
(void)unlink(outfile);
}
exit(0);
}
static void
onsignal(sig)
int sig;
{
cleanup();
}
static void
cleanup()
{
if (toutpath[0])
(void)unlink(toutpath);
}
static void
usage(msg)
const char *msg;
{
if (msg != NULL)
(void)fprintf(stderr, "sort: %s\n", msg);
(void)fprintf(stderr, "usage: [-o output] [-cmubdfinrsS] [-t char] ");
(void)fprintf(stderr, "[-R char] [-k keydef] ... [files]\n");
exit(2);
}
static void
many_files()
{
#if 0
struct rlimit rlp_many_files[1];
if (getrlimit(RLIMIT_NOFILE, rlp_many_files) == 0) {
rlp_many_files->rlim_cur = rlp_many_files->rlim_max;
setrlimit(RLIMIT_NOFILE, rlp_many_files);
}
#endif
}

149
usr.bin/sort/sort.h Normal file
View File

@ -0,0 +1,149 @@
/* $NetBSD: sort.h,v 1.11 2001/01/19 10:14:31 jdolecek Exp $ */
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)sort.h 8.1 (Berkeley) 6/6/93
*/
#include <sys/param.h>
#include <db.h>
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define NBINS 256
#define MAXMERGE 16
/* values for masks, weights, and other flags. */
#define I 1 /* mask out non-printable characters */
#define D 2 /* sort alphanumeric characters only */
#define N 4 /* Field is a number */
#define F 8 /* weight lower and upper case the same */
#define R 16 /* Field is reversed with respect to the global weight */
#define BI 32 /* ignore blanks in icol */
#define BT 64 /* ignore blanks in tcol */
/* masks for delimiters: blanks, fields, and termination. */
#define BLANK 1 /* ' ', '\t'; '\n' if -T is invoked */
#define FLD_D 2 /* ' ', '\t' default; from -t otherwise */
#define REC_D_F 4 /* '\n' default; from -T otherwise */
#define ND 10 /* limit on number of -k options. */
#define min(a, b) ((a) < (b) ? (a) : (b))
#define max(a, b) ((a) > (b) ? (a) : (b))
#define FCLOSE(file) { \
if (EOF == fclose(file)) \
err(2, "%p", file); \
}
#define EWRITE(ptr, size, n, f) { \
if (!fwrite(ptr, size, n, f)) \
err(2, NULL); \
}
/* length of record is currently limited to maximum string length (size_t) */
typedef size_t length_t;
/* a record is a key/line pair starting at rec.data. It has a total length
* and an offset to the start of the line half of the pair.
*/
typedef struct recheader {
length_t length;
length_t offset;
u_char data[1];
} RECHEADER;
typedef struct trecheader {
length_t length;
length_t offset;
} TRECHEADER;
/* This is the column as seen by struct field. It is used by enterfield.
* They are matched with corresponding coldescs during initialization.
*/
struct column {
struct coldesc *p;
int num;
int indent;
};
/* a coldesc has a number and pointers to the beginning and end of the
* corresponding column in the current line. This is determined in enterkey.
*/
typedef struct coldesc {
u_char *start;
u_char *end;
int num;
} COLDESC;
/* A field has an initial and final column; an omitted final column
* implies the end of the line. Flags regulate omission of blanks and
* numerical sorts; mask determines which characters are ignored (from -i, -d);
* weights determines the sort weights of a character (from -f, -r).
*/
struct field {
struct column icol;
struct column tcol;
u_int flags;
u_char *mask;
u_char *weights;
};
struct filelist {
const char * const * names;
};
typedef int (*get_func_t) __P((int, int, struct filelist *, int,
RECHEADER *, u_char *, struct field *));
typedef void (*put_func_t) __P((const struct recheader *, FILE *));
extern int PANIC; /* maximum depth of fsort before fmerge is called */
extern u_char ascii[NBINS], Rascii[NBINS], Ftable[NBINS], RFtable[NBINS];
extern u_char d_mask[NBINS];
extern int SINGL_FLD, SEP_FLAG, UNIQUE;
extern int REC_D;
extern const char *tmpdir;
extern int stable_sort;
extern u_char gweights[NBINS];
#include "extern.h"

84
usr.bin/sort/tmp.c Normal file
View File

@ -0,0 +1,84 @@
/* $NetBSD: tmp.c,v 1.7 2001/02/19 15:45:45 jdolecek Exp $ */
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <ctype.h>
#ifndef lint
__RCSID("$NetBSD: tmp.c,v 1.7 2001/02/19 15:45:45 jdolecek Exp $");
__SCCSID("@(#)tmp.c 8.1 (Berkeley) 6/6/93");
#endif /* not lint */
#include <sys/param.h>
#include <err.h>
#include <errno.h>
#include <limits.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "sort.h"
#include "pathnames.h"
#define _NAME_TMP "sort.XXXXXXXX"
FILE *
ftmp()
{
sigset_t set, oset;
FILE *fp;
int fd;
char pathb[MAXPATHLEN], *path;
path = pathb;
(void)snprintf(path, sizeof(pathb), "%s%s%s", tmpdir,
(tmpdir[strlen(tmpdir)-1] != '/') ? "/" : "", _NAME_TMP);
sigfillset(&set);
(void)sigprocmask(SIG_BLOCK, &set, &oset);
if ((fd = mkstemp(path)) < 0)
err(2, "ftmp: mkstemp(\"%s\")", path);
if (!(fp = fdopen(fd, "w+")))
err(2, "ftmp: fdopen(\"%s\")", path);
(void)unlink(path);
(void)sigprocmask(SIG_SETMASK, &oset, NULL);
return (fp);
}