Vendor import of bwk's 2002-02-18 release. Most significant update is the

inclusion of my character class patch.
This commit is contained in:
Dag-Erling Smørgrav 2002-02-19 09:35:25 +00:00
parent 82b279f73e
commit 007c6572bf
7 changed files with 124 additions and 28 deletions

View File

@ -25,6 +25,38 @@ THIS SOFTWARE.
This file lists all bug fixes, changes, etc., made since the AWK book This file lists all bug fixes, changes, etc., made since the AWK book
was sent to the printers in August, 1987. was sent to the printers in August, 1987.
Feb 10, 2002:
changed types in posix chars structure to quiet solaris cc.
Jan 1, 2002:
fflush() or fflush("") flushes all files and pipes.
length(arrayname) returns number of elements; thanks to
arnold robbins for suggestion.
added a makefile.win to make it easier to build on windows.
based on dan allen's buildwin.bat.
Nov 16, 2001:
added support for posix character class names like [:digit:],
which are not exactly shorter than [0-9] and perhaps no more
portable. thanks to dag-erling smorgrav for code.
Feb 16, 2001:
removed -m option; no longer needed, and it was actually
broken (noted thanks to volker kiefel).
Feb 10, 2001:
fixed an appalling bug in gettok: any sequence of digits, +,-, E, e,
and period was accepted as a valid number if it started with a period.
this would never have happened with the lex version.
other 1-character botches, now fixed, include a bare $ and a
bare " at the end of the input.
Feb 7, 2001:
more (const char *) casts in b.c and tran.c to silence warnings.
Nov 15, 2000: Nov 15, 2000:
fixed a bug introduced in august 1997 that caused expressions fixed a bug introduced in august 1997 that caused expressions
like $f[1] to be syntax errors. thanks to arnold robbins for like $f[1] to be syntax errors. thanks to arnold robbins for

View File

@ -68,7 +68,7 @@ compilers on a variety of systems, but new systems or compilers
may raise some new complaint; reports of difficulties are may raise some new complaint; reports of difficulties are
welcome. welcome.
This also compiles with Visual C++ on Windows 95 and Windows NT, This also compiles with Visual C++ on all flavors of Windows,
*if* you provide versions of popen and pclose. The file *if* you provide versions of popen and pclose. The file
missing95.c contains versions that can be used to get started missing95.c contains versions that can be used to get started
with, though the underlying support has mysterious properties, with, though the underlying support has mysterious properties,
@ -76,8 +76,7 @@ the symptom of which can be truncated pipe output. Beware.
This is also said to compile on Macintosh systems, using the This is also said to compile on Macintosh systems, using the
file "buildmac" provided by Dan Allen (danallen@microsoft.com), file "buildmac" provided by Dan Allen (danallen@microsoft.com),
to whom many thanks. Dan also provided buildwin.bat, a simple to whom many thanks.
script for compiling on NT if you prefer.
The version of malloc that comes with some systems is sometimes The version of malloc that comes with some systems is sometimes
astonishly slow. If awk seems slow, you might try fixing that. astonishly slow. If awk seems slow, you might try fixing that.

View File

@ -93,7 +93,7 @@ fa *makedfa(char *s, int anchor) /* returns dfa for reg expr s */
return mkdfa(s, anchor); return mkdfa(s, anchor);
for (i = 0; i < nfatab; i++) /* is it there already? */ for (i = 0; i < nfatab; i++) /* is it there already? */
if (fatab[i]->anchor == anchor if (fatab[i]->anchor == anchor
&& strcmp(fatab[i]->restr, s) == 0) { && strcmp((const char *) fatab[i]->restr, s) == 0) {
fatab[i]->use = now++; fatab[i]->use = now++;
return fatab[i]; return fatab[i];
} }
@ -683,6 +683,37 @@ Node *unary(Node *np)
} }
} }
/*
* Character class definitions conformant to the POSIX locale as
* defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
* and operating character sets are both ASCII (ISO646) or supersets
* thereof.
*
* Note that to avoid overflowing the temporary buffer used in
* relex(), the expanded character class (prior to range expansion)
* must be less than twice the size of their full name.
*/
struct charclass {
const char *cc_name;
int cc_namelen;
const char *cc_expand;
} charclasses[] = {
{ "alnum", 5, "0-9A-Za-z" },
{ "alpha", 5, "A-Za-z" },
{ "blank", 5, " \t" },
{ "cntrl", 5, "\000-\037\177" },
{ "digit", 5, "0-9" },
{ "graph", 5, "\041-\176" },
{ "lower", 5, "a-z" },
{ "print", 5, " \041-\176" },
{ "punct", 5, "\041-\057\072-\100\133-\140\173-\176" },
{ "space", 5, " \f\n\r\t\v" },
{ "upper", 5, "A-Z" },
{ "xdigit", 6, "0-9A-Fa-f" },
{ NULL, 0, NULL },
};
int relex(void) /* lexical analyzer for reparse */ int relex(void) /* lexical analyzer for reparse */
{ {
int c, n; int c, n;
@ -690,6 +721,8 @@ int relex(void) /* lexical analyzer for reparse */
static uschar *buf = 0; static uschar *buf = 0;
static int bufsz = 100; static int bufsz = 100;
uschar *bp; uschar *bp;
struct charclass *cc;
const uschar *p;
switch (c = *prestr++) { switch (c = *prestr++) {
case '|': return OR; case '|': return OR;
@ -719,7 +752,7 @@ int relex(void) /* lexical analyzer for reparse */
} }
else else
cflag = 0; cflag = 0;
n = 2 * strlen(prestr)+1; n = 2 * strlen((const char *) prestr)+1;
if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, 0)) if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, 0))
FATAL("out of space for reg expr %.10s...", lastre); FATAL("out of space for reg expr %.10s...", lastre);
for (; ; ) { for (; ; ) {
@ -730,6 +763,18 @@ int relex(void) /* lexical analyzer for reparse */
*bp++ = c; *bp++ = c;
/* } else if (c == '\n') { */ /* } else if (c == '\n') { */
/* FATAL("newline in character class %.20s...", lastre); */ /* FATAL("newline in character class %.20s...", lastre); */
} else if (c == '[' && *prestr == ':') {
/* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
for (cc = charclasses; cc->cc_name; cc++)
if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
break;
if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
prestr[2 + cc->cc_namelen] == ']') {
prestr += cc->cc_namelen + 3;
for (p = (const uschar *) cc->cc_expand; *p; p++)
*bp++ = *p;
} else
*bp++ = c;
} else if (c == '\0') { } else if (c == '\0') {
FATAL("nonterminated character class %.20s", lastre); FATAL("nonterminated character class %.20s", lastre);
} else if (bp == buf) { /* 1st char is special */ } else if (bp == buf) { /* 1st char is special */

View File

@ -105,7 +105,7 @@ int peek(void)
int gettok(char **pbuf, int *psz) /* get next input token */ int gettok(char **pbuf, int *psz) /* get next input token */
{ {
int c; int c, retc;
char *buf = *pbuf; char *buf = *pbuf;
int sz = *psz; int sz = *psz;
char *bp = buf; char *bp = buf;
@ -133,6 +133,7 @@ int gettok(char **pbuf, int *psz) /* get next input token */
} }
} }
*bp = 0; *bp = 0;
retc = 'a'; /* alphanumeric */
} else { /* it's a number */ } else { /* it's a number */
char *rem; char *rem;
/* read input until can't be a number */ /* read input until can't be a number */
@ -151,11 +152,17 @@ int gettok(char **pbuf, int *psz) /* get next input token */
*bp = 0; *bp = 0;
strtod(buf, &rem); /* parse the number */ strtod(buf, &rem); /* parse the number */
unputstr(rem); /* put rest back for later */ unputstr(rem); /* put rest back for later */
rem[0] = 0; if (rem == buf) { /* it wasn't a valid number at all */
buf[1] = 0; /* so return one character as token */
retc = buf[0]; /* character is its own type */
} else { /* some prefix was a number */
rem[0] = 0; /* so truncate where failure started */
retc = '0'; /* number */
}
} }
*pbuf = buf; *pbuf = buf;
*psz = sz; *psz = sz;
return buf[0]; return retc;
} }
int word(char *); int word(char *);
@ -186,7 +193,7 @@ int yylex(void)
return 0; return 0;
if (isalpha(c) || c == '_') if (isalpha(c) || c == '_')
return word(buf); return word(buf);
if (isdigit(c) || c == '.') { if (isdigit(c)) {
yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab); yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab);
/* should this also have STR set? */ /* should this also have STR set? */
RET(NUMBER); RET(NUMBER);
@ -311,6 +318,9 @@ int yylex(void)
} }
yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab); yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
RET(IVAR); RET(IVAR);
} else if (c == 0) { /* */
SYNTAX( "unexpected end of input after $" );
RET(';');
} else { } else {
unputstr(buf); unputstr(buf);
RET(INDIRECT); RET(INDIRECT);
@ -366,6 +376,8 @@ int string(void)
case 0: case 0:
SYNTAX( "non-terminated string %.10s...", buf ); SYNTAX( "non-terminated string %.10s...", buf );
lineno++; lineno++;
if (c == 0) /* hopeless */
FATAL( "giving up" );
break; break;
case '\\': case '\\':
c = input(); c = input();

View File

@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE. THIS SOFTWARE.
****************************************************************/ ****************************************************************/
char *version = "version 20001115"; char *version = "version 20020101";
#define DEBUG #define DEBUG
#include <stdio.h> #include <stdio.h>
@ -52,8 +52,7 @@ int safe = 0; /* 1 => "safe" mode */
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
char *fs = NULL, *marg; char *fs = NULL;
int temp;
cmdname = argv[0]; cmdname = argv[0];
if (argc == 1) { if (argc == 1) {
@ -102,19 +101,8 @@ int main(int argc, char *argv[])
setclvar(argv[1]); setclvar(argv[1]);
break; break;
case 'm': /* more memory: -mr=record, -mf=fields */ case 'm': /* more memory: -mr=record, -mf=fields */
/* no longer needed */ /* no longer supported */
marg = argv[1]; WARNING("obsolete option %s ignored", argv[1]);
if (argv[1][3])
temp = atoi(&argv[1][3]);
else {
argv++; argc--;
temp = atoi(&argv[1][0]);
}
switch (marg[2]) {
case 'r': recsize = temp; break;
case 'f': nfields = temp; break;
default: FATAL("unknown option %s\n", marg);
}
break; break;
case 'd': case 'd':
dbg = atoi(&argv[1][2]); dbg = atoi(&argv[1][2]);

View File

@ -1448,13 +1448,18 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
char *p, *buf; char *p, *buf;
Node *nextarg; Node *nextarg;
FILE *fp; FILE *fp;
void flush_all(void);
t = ptoi(a[0]); t = ptoi(a[0]);
x = execute(a[1]); x = execute(a[1]);
nextarg = a[1]->nnext; nextarg = a[1]->nnext;
switch (t) { switch (t) {
case FLENGTH: case FLENGTH:
u = strlen(getsval(x)); break; if (isarr(x))
u = ((Array *) x->sval)->nelem; /* GROT. should be function*/
else
u = strlen(getsval(x));
break;
case FLOG: case FLOG:
u = errcheck(log(getfval(x)), "log"); break; u = errcheck(log(getfval(x)), "log"); break;
case FINT: case FINT:
@ -1511,7 +1516,10 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
free(buf); free(buf);
return x; return x;
case FFLUSH: case FFLUSH:
if ((fp = openfile(FFLUSH, getsval(x))) == NULL) if (isrec(x) || strlen(getsval(x)) == 0) {
flush_all(); /* fflush() or fflush("") -> all */
u = 0;
} else if ((fp = openfile(FFLUSH, getsval(x))) == NULL)
u = EOF; u = EOF;
else else
u = fflush(fp); u = fflush(fp);
@ -1702,6 +1710,15 @@ void closeall(void)
} }
} }
void flush_all(void)
{
int i;
for (i = 0; i < FOPEN_MAX; i++)
if (files[i].fp)
fflush(files[i].fp);
}
void backsub(char **pb_ptr, char **sptr_ptr); void backsub(char **pb_ptr, char **sptr_ptr);
Cell *sub(Node **a, int nnn) /* substitute command */ Cell *sub(Node **a, int nnn) /* substitute command */

View File

@ -170,9 +170,12 @@ void freesymtab(Cell *ap) /* free a symbol table */
xfree(cp->sval); xfree(cp->sval);
temp = cp->cnext; /* avoids freeing then using */ temp = cp->cnext; /* avoids freeing then using */
free(cp); free(cp);
tp->nelem--;
} }
tp->tab[i] = 0; tp->tab[i] = 0;
} }
if (tp->nelem != 0)
WARNING("can't happen: inconsistent element count freeing %s", ap->nval);
free(tp->tab); free(tp->tab);
free(tp); free(tp);
} }
@ -396,7 +399,7 @@ char *qstring(char *is, int delim) /* collect string up to next delim */
uschar *s = (uschar *) is; uschar *s = (uschar *) is;
uschar *buf, *bp; uschar *buf, *bp;
if ((buf = (uschar *) malloc(strlen(s)+3)) == NULL) if ((buf = (uschar *) malloc(strlen(is)+3)) == NULL)
FATAL( "out of space in qstring(%s)", s); FATAL( "out of space in qstring(%s)", s);
for (bp = buf; (c = *s) != delim; s++) { for (bp = buf; (c = *s) != delim; s++) {
if (c == '\n') if (c == '\n')