1993-06-18 04:22:21 +00:00
|
|
|
/*
|
|
|
|
* iop.c - do i/o related things.
|
|
|
|
*/
|
|
|
|
|
1995-05-30 05:05:38 +00:00
|
|
|
/*
|
1994-05-31 18:58:00 +00:00
|
|
|
* Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
|
1995-05-30 05:05:38 +00:00
|
|
|
*
|
1993-06-18 04:22:21 +00:00
|
|
|
* This file is part of GAWK, the GNU implementation of the
|
|
|
|
* AWK Progamming Language.
|
1995-05-30 05:05:38 +00:00
|
|
|
*
|
1993-06-18 04:22:21 +00:00
|
|
|
* GAWK is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
* (at your option) any later version.
|
1995-05-30 05:05:38 +00:00
|
|
|
*
|
1993-06-18 04:22:21 +00:00
|
|
|
* GAWK is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
1995-05-30 05:05:38 +00:00
|
|
|
*
|
1993-06-18 04:22:21 +00:00
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with GAWK; see the file COPYING. If not, write to
|
|
|
|
* the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "awk.h"
|
|
|
|
|
|
|
|
#ifndef atarist
|
|
|
|
#define INVALID_HANDLE (-1)
|
|
|
|
#else
|
|
|
|
#include <stddef.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
#define INVALID_HANDLE (__SMALLEST_VALID_HANDLE - 1)
|
|
|
|
#endif /* atarist */
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef TEST
|
|
|
|
int bufsize = 8192;
|
|
|
|
|
|
|
|
void
|
|
|
|
fatal(s)
|
|
|
|
char *s;
|
|
|
|
{
|
|
|
|
printf("%s\n", s);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
int
|
|
|
|
optimal_bufsize(fd)
|
|
|
|
int fd;
|
|
|
|
{
|
|
|
|
struct stat stb;
|
|
|
|
|
|
|
|
#ifdef VMS
|
|
|
|
/*
|
|
|
|
* These values correspond with the RMS multi-block count used by
|
|
|
|
* vms_open() in vms/vms_misc.c.
|
|
|
|
*/
|
|
|
|
if (isatty(fd) > 0)
|
|
|
|
return BUFSIZ;
|
|
|
|
else if (fstat(fd, &stb) < 0)
|
|
|
|
return 8*512; /* conservative in case of DECnet access */
|
|
|
|
else
|
1994-05-31 18:58:00 +00:00
|
|
|
return 32*512;
|
1993-06-18 04:22:21 +00:00
|
|
|
|
|
|
|
#else
|
|
|
|
/*
|
|
|
|
* System V doesn't have the file system block size in the
|
|
|
|
* stat structure. So we have to make some sort of reasonable
|
|
|
|
* guess. We use stdio's BUFSIZ, since that is what it was
|
|
|
|
* meant for in the first place.
|
|
|
|
*/
|
|
|
|
#ifdef BLKSIZE_MISSING
|
|
|
|
#define DEFBLKSIZE BUFSIZ
|
|
|
|
#else
|
|
|
|
#define DEFBLKSIZE (stb.st_blksize ? stb.st_blksize : BUFSIZ)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef TEST
|
|
|
|
return bufsize;
|
|
|
|
#else
|
|
|
|
#ifndef atarist
|
|
|
|
if (isatty(fd))
|
|
|
|
#else
|
|
|
|
/*
|
|
|
|
* On ST redirected stdin does not have a name attached
|
|
|
|
* (this could be hard to do to) and fstat would fail
|
|
|
|
*/
|
|
|
|
if (0 == fd || isatty(fd))
|
|
|
|
#endif /*atarist */
|
|
|
|
return BUFSIZ;
|
|
|
|
#ifndef BLKSIZE_MISSING
|
|
|
|
/* VMS POSIX 1.0: st_blksize is never assigned a value, so zero it */
|
|
|
|
stb.st_blksize = 0;
|
|
|
|
#endif
|
|
|
|
if (fstat(fd, &stb) == -1)
|
|
|
|
fatal("can't stat fd %d (%s)", fd, strerror(errno));
|
|
|
|
if (lseek(fd, (off_t)0, 0) == -1)
|
|
|
|
return DEFBLKSIZE;
|
|
|
|
return ((int) (stb.st_size < DEFBLKSIZE ? stb.st_size : DEFBLKSIZE));
|
|
|
|
#endif /*! TEST */
|
|
|
|
#endif /*! VMS */
|
|
|
|
}
|
|
|
|
|
|
|
|
IOBUF *
|
|
|
|
iop_alloc(fd)
|
|
|
|
int fd;
|
|
|
|
{
|
|
|
|
IOBUF *iop;
|
|
|
|
|
|
|
|
if (fd == INVALID_HANDLE)
|
|
|
|
return NULL;
|
|
|
|
emalloc(iop, IOBUF *, sizeof(IOBUF), "iop_alloc");
|
|
|
|
iop->flag = 0;
|
|
|
|
if (isatty(fd))
|
|
|
|
iop->flag |= IOP_IS_TTY;
|
|
|
|
iop->size = optimal_bufsize(fd);
|
|
|
|
iop->secsiz = -2;
|
|
|
|
errno = 0;
|
|
|
|
iop->fd = fd;
|
|
|
|
iop->off = iop->buf = NULL;
|
|
|
|
iop->cnt = 0;
|
|
|
|
return iop;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get the next record. Uses a "split buffer" where the latter part is
|
|
|
|
* the normal read buffer and the head part is an "overflow" area that is used
|
|
|
|
* when a record spans the end of the normal buffer, in which case the first
|
|
|
|
* part of the record is copied into the overflow area just before the
|
|
|
|
* normal buffer. Thus, the eventual full record can be returned as a
|
|
|
|
* contiguous area of memory with a minimum of copying. The overflow area
|
|
|
|
* is expanded as needed, so that records are unlimited in length.
|
|
|
|
* We also mark both the end of the buffer and the end of the read() with
|
|
|
|
* a sentinel character (the current record separator) so that the inside
|
|
|
|
* loop can run as a single test.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
get_a_record(out, iop, grRS, errcode)
|
|
|
|
char **out;
|
|
|
|
IOBUF *iop;
|
|
|
|
register int grRS;
|
|
|
|
int *errcode;
|
|
|
|
{
|
|
|
|
register char *bp = iop->off;
|
|
|
|
char *bufend;
|
|
|
|
char *start = iop->off; /* beginning of record */
|
|
|
|
char rs;
|
1994-05-31 18:58:00 +00:00
|
|
|
int saw_newline = 0, eat_whitespace = 0; /* used iff grRS==0 */
|
1993-06-18 04:22:21 +00:00
|
|
|
|
1994-05-31 18:58:00 +00:00
|
|
|
if (iop->cnt == EOF) { /* previous read hit EOF */
|
|
|
|
*out = NULL;
|
1993-06-18 04:22:21 +00:00
|
|
|
return EOF;
|
1994-05-31 18:58:00 +00:00
|
|
|
}
|
1993-06-18 04:22:21 +00:00
|
|
|
|
|
|
|
if (grRS == 0) { /* special case: grRS == "" */
|
|
|
|
rs = '\n';
|
|
|
|
} else
|
|
|
|
rs = (char) grRS;
|
|
|
|
|
|
|
|
/* set up sentinel */
|
|
|
|
if (iop->buf) {
|
|
|
|
bufend = iop->buf + iop->size + iop->secsiz;
|
|
|
|
*bufend = rs;
|
|
|
|
} else
|
|
|
|
bufend = NULL;
|
|
|
|
|
|
|
|
for (;;) { /* break on end of record, read error or EOF */
|
|
|
|
|
|
|
|
/* Following code is entered on the first call of this routine
|
|
|
|
* for a new iop, or when we scan to the end of the buffer.
|
|
|
|
* In the latter case, we copy the current partial record to
|
|
|
|
* the space preceding the normal read buffer. If necessary,
|
|
|
|
* we expand this space. This is done so that we can return
|
|
|
|
* the record as a contiguous area of memory.
|
|
|
|
*/
|
|
|
|
if ((iop->flag & IOP_IS_INTERNAL) == 0 && bp >= bufend) {
|
|
|
|
char *oldbuf = NULL;
|
|
|
|
char *oldsplit = iop->buf + iop->secsiz;
|
|
|
|
long len; /* record length so far */
|
|
|
|
|
|
|
|
len = bp - start;
|
|
|
|
if (len > iop->secsiz) {
|
|
|
|
/* expand secondary buffer */
|
|
|
|
if (iop->secsiz == -2)
|
|
|
|
iop->secsiz = 256;
|
|
|
|
while (len > iop->secsiz)
|
|
|
|
iop->secsiz *= 2;
|
|
|
|
oldbuf = iop->buf;
|
|
|
|
emalloc(iop->buf, char *,
|
|
|
|
iop->size+iop->secsiz+2, "get_a_record");
|
|
|
|
bufend = iop->buf + iop->size + iop->secsiz;
|
|
|
|
*bufend = rs;
|
|
|
|
}
|
|
|
|
if (len > 0) {
|
|
|
|
char *newsplit = iop->buf + iop->secsiz;
|
|
|
|
|
|
|
|
if (start < oldsplit) {
|
|
|
|
memcpy(newsplit - len, start,
|
|
|
|
oldsplit - start);
|
|
|
|
memcpy(newsplit - (bp - oldsplit),
|
|
|
|
oldsplit, bp - oldsplit);
|
|
|
|
} else
|
|
|
|
memcpy(newsplit - len, start, len);
|
|
|
|
}
|
|
|
|
bp = iop->end = iop->off = iop->buf + iop->secsiz;
|
|
|
|
start = bp - len;
|
|
|
|
if (oldbuf) {
|
|
|
|
free(oldbuf);
|
|
|
|
oldbuf = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/* Following code is entered whenever we have no more data to
|
|
|
|
* scan. In most cases this will read into the beginning of
|
|
|
|
* the main buffer, but in some cases (terminal, pipe etc.)
|
|
|
|
* we may be doing smallish reads into more advanced positions.
|
|
|
|
*/
|
|
|
|
if (bp >= iop->end) {
|
|
|
|
if ((iop->flag & IOP_IS_INTERNAL) != 0) {
|
|
|
|
iop->cnt = EOF;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
iop->cnt = read(iop->fd, iop->end, bufend - iop->end);
|
|
|
|
if (iop->cnt == -1) {
|
|
|
|
if (! do_unix && errcode != NULL) {
|
|
|
|
*errcode = errno;
|
|
|
|
iop->cnt = EOF;
|
|
|
|
break;
|
|
|
|
} else
|
|
|
|
fatal("error reading input: %s",
|
|
|
|
strerror(errno));
|
|
|
|
} else if (iop->cnt == 0) {
|
|
|
|
iop->cnt = EOF;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
iop->end += iop->cnt;
|
|
|
|
*iop->end = rs;
|
|
|
|
}
|
|
|
|
if (grRS == 0) {
|
|
|
|
extern int default_FS;
|
|
|
|
|
|
|
|
if (default_FS && (bp == start || eat_whitespace)) {
|
1994-05-31 18:58:00 +00:00
|
|
|
while (bp < iop->end
|
|
|
|
&& (*bp == ' ' || *bp == '\t' || *bp == '\n'))
|
1993-06-18 04:22:21 +00:00
|
|
|
bp++;
|
|
|
|
if (bp == iop->end) {
|
|
|
|
eat_whitespace = 1;
|
|
|
|
continue;
|
|
|
|
} else
|
|
|
|
eat_whitespace = 0;
|
|
|
|
}
|
|
|
|
if (saw_newline && *bp == rs) {
|
|
|
|
bp++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
saw_newline = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (*bp++ != rs)
|
|
|
|
;
|
|
|
|
|
|
|
|
if (bp <= iop->end) {
|
|
|
|
if (grRS == 0)
|
|
|
|
saw_newline = 1;
|
|
|
|
else
|
|
|
|
break;
|
|
|
|
} else
|
|
|
|
bp--;
|
|
|
|
|
|
|
|
if ((iop->flag & IOP_IS_INTERNAL) != 0)
|
|
|
|
iop->cnt = bp - start;
|
|
|
|
}
|
|
|
|
if (iop->cnt == EOF
|
1994-05-31 18:58:00 +00:00
|
|
|
&& (((iop->flag & IOP_IS_INTERNAL) != 0) || start == bp)) {
|
|
|
|
*out = NULL;
|
1993-06-18 04:22:21 +00:00
|
|
|
return EOF;
|
1994-05-31 18:58:00 +00:00
|
|
|
}
|
1993-06-18 04:22:21 +00:00
|
|
|
|
|
|
|
iop->off = bp;
|
|
|
|
bp--;
|
|
|
|
if (*bp != rs)
|
|
|
|
bp++;
|
|
|
|
*bp = '\0';
|
|
|
|
if (grRS == 0) {
|
1994-05-31 18:58:00 +00:00
|
|
|
/* there could be more newlines left, clean 'em out now */
|
|
|
|
while (*(iop->off) == rs && iop->off <= iop->end)
|
|
|
|
(iop->off)++;
|
|
|
|
|
1993-06-18 04:22:21 +00:00
|
|
|
if (*--bp == rs)
|
|
|
|
*bp = '\0';
|
|
|
|
else
|
|
|
|
bp++;
|
|
|
|
}
|
|
|
|
|
|
|
|
*out = start;
|
|
|
|
return bp - start;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef TEST
|
|
|
|
main(argc, argv)
|
|
|
|
int argc;
|
|
|
|
char *argv[];
|
|
|
|
{
|
|
|
|
IOBUF *iop;
|
|
|
|
char *out;
|
|
|
|
int cnt;
|
|
|
|
char rs[2];
|
|
|
|
|
|
|
|
rs[0] = 0;
|
|
|
|
if (argc > 1)
|
|
|
|
bufsize = atoi(argv[1]);
|
|
|
|
if (argc > 2)
|
|
|
|
rs[0] = *argv[2];
|
|
|
|
iop = iop_alloc(0);
|
|
|
|
while ((cnt = get_a_record(&out, iop, rs[0], NULL)) > 0) {
|
|
|
|
fwrite(out, 1, cnt, stdout);
|
|
|
|
fwrite(rs, 1, 1, stdout);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|