Vendor import of bwk's 14-March-2003 release.

This commit is contained in:
David E. O'Brien 2003-03-17 07:59:59 +00:00
parent 72969a2212
commit fc6b1dfe95
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/vendor/one-true-awk/dist/; revision=112336
7 changed files with 99 additions and 26 deletions

View File

@ -25,6 +25,21 @@ THIS SOFTWARE.
This file lists all bug fixes, changes, etc., made since the AWK book
was sent to the printers in August, 1987.
Mar 14, 2003:
the internationalization changes, somewhat modified, are now
reinstated. in theory awk will now do character comparisons
and case conversions in national language, but "." will always
be the decimal point separator on input and output regardless
of national language. isblank(){} has an #ifndef.
this no longer compiles on windows: LC_MESSAGES isn't defined
in vc6++.
fixed subtle behavior in field and record splitting: if FS is
a single character and RS is not empty, \n is NOT a separator.
this tortuous reading is found in the awk book; behavior now
matches gawk and mawk.
Dec 13, 2002:
for the moment, the internationalization changes of nov 29 are
rolled back -- programs like x = 1.2 don't work in some locales,

View File

@ -72,8 +72,14 @@ This also compiles with Visual C++ on all flavors of Windows,
*if* you provide versions of popen and pclose. The file
missing95.c contains versions that can be used to get started
with, though the underlying support has mysterious properties,
the symptom of which can be truncated pipe output. Beware.
The file makefile.win gives hints on how to proceed.
the symptom of which can be truncated pipe output. Beware. The
file makefile.win gives hints on how to proceed; if you run
vcvars32.bat, it will set up necessary paths and parameters so
you can subsequently run nmake -f makefile.win. Beware also that
when running on Windows under command.com, various quoting
conventions are different from Unix systems: single quotes won't
work around arguments, and various characters like % are
interpreted within double quotes.
This compiles without change on Macintosh OS X using gcc and
the standard developer tools.

View File

@ -282,9 +282,24 @@ int quoted(char **pp) /* pick up next thing after a \\ */
return c;
}
static int collate_range_cmp(int a, int b)
{
int r;
static char s[2][2];
if ((uschar)a == (uschar)b)
return 0;
s[0][0] = a;
s[1][0] = b;
if ((r = strcoll(s[0], s[1])) == 0)
r = (uschar)a - (uschar)b;
return r;
}
char *cclenter(const char *argp) /* add a character class */
{
int i, c, c2;
int j;
uschar *p = (uschar *) argp;
uschar *op, *bp;
static uschar *buf = 0;
@ -303,15 +318,18 @@ char *cclenter(const char *argp) /* add a character class */
c2 = *p++;
if (c2 == '\\')
c2 = quoted((char **) &p);
if (c > c2) { /* empty; ignore */
if (collate_range_cmp(c, c2) > 0) { /* empty; ignore */
bp--;
i--;
continue;
}
while (c < c2) {
for (j = 0; j < NCHARS; j++) {
if ((collate_range_cmp(c, j) > 0) ||
collate_range_cmp(j, c2) > 0)
continue;
if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
FATAL("out of space for character class [%.10s...] 2", p);
*bp++ = ++c;
*bp++ = j;
i++;
}
continue;
@ -695,23 +713,39 @@ Node *unary(Node *np)
* relex(), the expanded character class (prior to range expansion)
* must be less than twice the size of their full name.
*/
/* Because isblank doesn't show up in any of the header files on any
* system i use, it's defined here. if some other locale has a richer
* definition of "blank", define HAS_ISBLANK and provide your own
* version.
*/
#ifndef HAS_ISBLANK
int isblank(int c)
{
return c==' ' || c=='\t';
}
#endif
struct charclass {
const char *cc_name;
int cc_namelen;
const char *cc_expand;
int (*cc_func)(int);
} charclasses[] = {
{ "alnum", 5, "0-9A-Za-z" },
{ "alpha", 5, "A-Za-z" },
{ "blank", 5, " \t" },
{ "cntrl", 5, "\000-\037\177" },
{ "digit", 5, "0-9" },
{ "graph", 5, "\041-\176" },
{ "lower", 5, "a-z" },
{ "print", 5, " \041-\176" },
{ "punct", 5, "\041-\057\072-\100\133-\140\173-\176" },
{ "space", 5, " \f\n\r\t\v" },
{ "upper", 5, "A-Z" },
{ "xdigit", 6, "0-9A-Fa-f" },
{ "alnum", 5, isalnum },
{ "alpha", 5, isalpha },
{ "blank", 5, isblank },
{ "cntrl", 5, iscntrl },
{ "digit", 5, isdigit },
{ "graph", 5, isgraph },
{ "lower", 5, islower },
{ "print", 5, isprint },
{ "punct", 5, ispunct },
{ "space", 5, isspace },
{ "upper", 5, isupper },
{ "xdigit", 6, isxdigit },
{ NULL, 0, NULL },
};
@ -724,7 +758,7 @@ int relex(void) /* lexical analyzer for reparse */
static int bufsz = 100;
uschar *bp;
struct charclass *cc;
const uschar *p;
int i;
switch (c = *prestr++) {
case '|': return OR;
@ -773,8 +807,14 @@ int relex(void) /* lexical analyzer for reparse */
if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
prestr[2 + cc->cc_namelen] == ']') {
prestr += cc->cc_namelen + 3;
for (p = (const uschar *) cc->cc_expand; *p; p++)
*bp++ = *p;
for (i = 0; i < NCHARS; i++) {
if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, 0))
FATAL("out of space for reg expr %.10s...", lastre);
if (cc->cc_func(i)) {
*bp++ = i;
n++;
}
}
} else
*bp++ = c;
} else if (c == '\0') {

View File

@ -308,6 +308,13 @@ void fldbld(void) /* create fields from current record */
}
*fr = 0;
} else if (*r != 0) { /* if 0, it's a null field */
/* subtlecase : if length(FS) == 1 && length(RS > 0)
* \n is NOT a field separator (cf awk book 61,84).
* this variable is tested in the inner while loop.
*/
int rtest = '\n'; /* normal case */
if (strlen(*RS) > 0)
rtest = '\0';
for (;;) {
i++;
if (i > nfields)
@ -316,7 +323,7 @@ void fldbld(void) /* create fields from current record */
xfree(fldtab[i]->sval);
fldtab[i]->sval = fr;
fldtab[i]->tval = FLD | STR | DONTFREE;
while (*r != sep && *r != '\n' && *r != '\0') /* \n is always a separator */
while (*r != sep && *r != rtest && *r != '\0') /* \n is always a separator */
*fr++ = *r++;
*fr++ = 0;
if (*r++ == 0)

View File

@ -22,11 +22,12 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE.
****************************************************************/
const char *version = "version 20021213";
const char *version = "version 20030314";
#define DEBUG
#include <stdio.h>
#include <ctype.h>
#include <locale.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
@ -54,6 +55,10 @@ int main(int argc, char *argv[])
{
const char *fs = NULL;
setlocale(LC_ALL, "");
setlocale(LC_COLLATE, "");
setlocale(LC_CTYPE, "");
setlocale(LC_MESSAGES, "");
cmdname = argv[0];
if (argc == 1) {
fprintf(stderr, "Usage: %s [-f programfile | 'program'] [-Ffieldsep] [-v var=value] [files]\n", cmdname);

View File

@ -45,7 +45,7 @@ LISTING = awk.h proto.h awkgram.y lex.c b.c main.c maketab.c parse.c \
lib.c run.c tran.c missing95.c
SHIP = README FIXES $(SOURCE) ytab[ch].bak makefile makefile.win \
buildwin.bat mac.code awk.1
vcvars32.bat buildwin.bat mac.code awk.1
a.out: ytab.o $(OFILES)
$(CC) $(CFLAGS) ytab.o $(OFILES) $(ALLOC) -lm

View File

@ -1509,11 +1509,11 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
if (t == FTOUPPER) {
for (p = buf; *p; p++)
if (islower((uschar) *p))
*p = toupper(*p);
*p = toupper((uschar)*p);
} else {
for (p = buf; *p; p++)
if (isupper((uschar) *p))
*p = tolower(*p);
*p = tolower((uschar)*p);
}
tempfree(x);
x = gettemp();