diff --git a/contrib/one-true-awk/ChangeLog b/contrib/one-true-awk/ChangeLog new file mode 100644 index 000000000000..fd03b2bbca0b --- /dev/null +++ b/contrib/one-true-awk/ChangeLog @@ -0,0 +1,245 @@ +2019-05-29 Arnold D. Robbins + + * lib.c (isclvar): Remove check for additional '=' after + first one. No longer needed. + +2019-01-26 Arnold D. Robbins + + * main.c (version): Updated. + +2019-01-25 Arnold D. Robbins + + * run.c (awkgetline): Check for numeric value in all getline + variants. See the numeric-getline.* files in bugs-fixed directory. + +2018-08-29 Arnold D. Robbins + + * REGRESS: Check for existence of a.out. If not there, run + make. Enable core dumps for T.arnold system status test + to work on MacOS X. + +2018-08-22 Arnold D. Robbins + + * awktest.tar (testdir/T.expr): Fix test for unary plus. + +2018-08-22 Arnold D. Robbins + + * REGRESS: Extract tests if necessary, set PATH to include '.'. + * regdir/beebe.tar (Makefile): Fix longwrds test to prefix + sort with LC_ALL=C. + * awktest.tar: Updated from fixed test suite, directory + it extracts is now called 'testdir' to match what's in top-level + REGRESS script. + * regdir: Removed, as Brian wants to keep the test suite in + the tar file. + +2018-08-22 Arnold D. Robbins + + * FIXES, lib.c, run.c, makefile, main.c: Merge from Brian's tree. + * REGRESS: New file, from Brian. + * awktest.tar: Restored from Brian's tree. + +2018-08-22 Arnold D. Robbins + + * awkgram.y (UPLUS): New token. In the grammar, call op1() + with it. + * maketab.c (proc): Add entry for UPLUS. + * run.c (arith): Handle UPLUS. + * main.c (version): Updated. + * bugs-fixed/unary-plus.awk, bugs-fixed/unary-plus.bad, + bugs-fixed/unary-plus.ok: New files. + +2018-08-10 Arnold D. Robbins + + * TODO: Updated. + * awk.1: Improve use of macros, add some additional explanation + in a few places, alphabetize list of variables. + +2018-08-08 Arnold D. Robbins + + * awk.h (Cell): Add new field `fmt' to track xFMT value used + for a string conversion. + [CONVC, CONVO]: New flag macros. + * bugs-fixed/README: Updated. + * bugs-fixed/string-conv.awk, bugs-fixed/string-conv.bad, + bugs-fixed/string-conv.ok: New files. + * main.c (version): Updated. + * proto.h (flags2str): Add declaration. + * tran.c (setfval): Clear CONVC and CONVO flags and set vp->fmt + to NULL. + (setsval): Ditto. Add large comment and new code to manage + correct conversion of number to string based on various flags + and the value of vp->fmt. The idea is to not convert again + if xFMT is the same as before and we're doing the same conversion. + Otherwise, clear the old flags, set the new, and reconvert. + (flags2str): New function. For debug prints and for use from a debugger. + +2018-08-05 Arnold D. Robbins + + Fix filename conflicts in regdir where the only difference was + in letter case. This caused problems on Windows systems. + + * regdir/Compare.T1: Renamed from regdir/Compare.T. + * regdir/t.delete0: Renamed from regdir/t.delete. + * regdir/t.getline1: Renamed from regdir/t.getline. + * regdir/t.redir1: Renamed from regdir/t.redir. + * regdir/t.split1: Renamed from regdir/t.split. + * regdir/t.sub0: Renamed from regdir/t.sub. + * regdir/REGRESS: Adjusted. + +2018-08-04 Arnold D. Robbins + + With scalpel, tweasers, magnifying glass and bated breath, + borrow code from the NetBSD version of nawk to fix the years-old + bug whereby decrementing the value of NF did not change the + record. + + * lib.c (fldbld): Set donerec to 1 when done. + (setlastfld): New function. + * proto.h (setlastfld): Add declaration. + * run.c (copycell): Make code smarter about flags (from NetBSD code). + * tran.c (setfree): New function. + * tran.c (setfval): Normalize negative zero to positive zero. + If setting NF, clear donerec and call setlastfld(). + (setsval): Remove call to save_old_OFS(). If setting OFS, call + recbld(). If setting NF, clear donerec and call setlastfld(). + + As part of the process, revert OFS-related changes of 2018-05-22: + + * awk.h (saveOFS, saveOFSlen, save_old_OFS): Remove declarations. + * lib.c (recbld): Use *OFS instead of saveOFS. + * run.c (saveOFS, saveOFSlen, save_old_OFS): Remove. + * tran.c (syminit): Remove initialization of saveOFS and saveOFSlen. + + General stuff that goes along with all this: + + * bugs-fixed/README: Updated. + * bugs-fixed/decr-NF.awk, bugs-fixed/decr-NF.bad, + bugs-fixed/decr-NF.ok: New files. + * main.c (version): Updated. + * regdir/README.TESTS: Fix awk book title. + * regdir/T.misc: Revise test to match fixed code. + * run.c (format): Increase size of buffer used for %a test. (Unrelated + to NF or OFS, but fixes a compiler complaint.) + +2018-06-07 Arnold D. Robbins + + * regdir/beebe.tar: Fix longwrds.ok so that the test will pass. + The file was incorrectly sorted. + +2018-06-06 Arnold D. Robbins + + * regdir/T.lilly: Fix the bug again in the second instance + of the code. Thanks to BWK for pointing this out. + +2018-05-31 Arnold D. Robbins + + * regdir/T.lilly: Fix a syntax error and ordering bug + in creating the 'foo' file. + +2018-05-23 Arnold D. Robbins + + * awk.1: Remove standalone 'awk' at the top of file, it messed up + the formatting. Arrange built-in variable list in alphabetical + order. + +2018-05-23 Arnold D. Robbins + + * main.c (version): Add my email address and a date so that + users can tell this isn't straight BWK awk. + * README.md: Minor updates. + * TODO: Updated. + +2018-05-22 Arnold D. Robbins + + Add POSIX-required formats %a and %A. + + * run.c (format): Check for %a support in C library. If there, + allow %a and %A as valid formats. + * TODO: Updated. + * bugs-fixed/README: Updated. + * bugs-fixed/a-format.awk, bugs-fixed/a-format.bad, + bugs-fixed/a-format.ok: New files. + +2018-05-22 Arnold D. Robbins + + * FIXES: Restored a line from a much earlier version that + apparently got lost when the dates were reordered. + * TODO: Updated. + +2018-05-22 Arnold D. Robbins + + * README.md: New file. + +2018-05-22 Arnold D. Robbins + + * regdir/echo.c, regdir/time.c: Minor fixes to compile without + warning on current GCC / Linux. + +2018-05-22 Arnold D. Robbins + + * TODO: New file. + +2018-05-22 Arnold D. Robbins + + * makefile (gitadd, gitpush): Remove these targets. They + should not be automated and were incorrect for things that + would be done regularly. + +2018-05-22 Arnold D. Robbins + + Fix nawk so that [[:blank:]] only matches space and tab instead + of any whitespace character, originally made May 10, 2018. + See bugs-fixed/space.awk. + + This appears to have been a thinko on Brian's part. + + * b.c (charclasses): Use xisblank() function for [[:blank:]]. + * bugs-fixed/README: Updated. + * bugs-fixed/space.awk, bugs-fixed/space.bad, + bugs-fixed/space.ok: New files. + +2018-05-22 Arnold D. Robbins + + * .gitignore: New file. + +2018-05-22 Arnold D. Robbins + + Fix nawk to provide reasonable exit status for system(), + a la gawk, originally made March 12, 2016. See + bugs-fixed/system-status.awk. + + * run.c (bltin): For FSYSTEM, use the macros defined for wait(2) + to produce a reasonable exit value, instead of doing a floating-point + division by 256. + * awk.1: Document the return status values. + * bugs-fixed/README: Updated. + * bugs-fixed/system-status.awk, bugs-fixed/system-status.bad, + bugs-fixed/system-status.ok: New files. + +2018-05-22 Arnold D. Robbins + + Bug fix with respect to rebuilding a record, originally + made August 19, 2014. See bugs-fixed/ofs-rebuild.awk. + + * awk.h (saveOFS, saveOFSlen): Declare new variables. + * lib.c (recbld): Use them when rebuilding the record. + * run.c (saveOFS, saveOFSlen): Define new variables. + (save_old_OFS): New function to save OFS aside. + * tran.c (syminit): Initialize saveOFS and saveOFSlen. + (setsval): If setting a field, call save_old_OFS(). + * bugs-fixed/README, bugs-fixed/ofs-rebuild.awk, + bugs-fixed/ofs-rebuild.bad, bugs-fixed/ofs-rebuild.ok: New files. + +2018-05-22 Arnold D. Robbins + + * makefile (YACC): Use bison. + +2018-05-22 Arnold D. Robbins + + * ChangeLog: Created. + * regdir: Created. Based on contents of awktest.a. + * .gitattributes: Created, to preserve CR LF in regdir/t.crlf. + * awktest.a: Removed. + * regdir/T.gawk, regdir/T.latin1: Updated from awktest.tar. + * awktest.tar: Removed. diff --git a/contrib/one-true-awk/FIXES b/contrib/one-true-awk/FIXES index c78aabc511f3..183eaedee47d 100644 --- a/contrib/one-true-awk/FIXES +++ b/contrib/one-true-awk/FIXES @@ -25,6 +25,113 @@ THIS SOFTWARE. This file lists all bug fixes, changes, etc., made since the AWK book was sent to the printers in August, 1987. +May 29,2019: + Fix check for command line arguments to no longer require that + first character after '=' not be another '='. Reverts change of + August 11, 1989. Thanks to GitHub user Jamie Landeg Jones for + pointing out the issue; from Issue #38. + +Apr 7, 2019: + Update awktest.tar(p.50) to use modern options to sort. Needed + for Android development. Thanks to GitHub user mohd-akram (Mohamed + Akram). From Comment #33. + +Mar 12, 2019: + Added very simplistic support for cross-compiling in the + makefile. We are NOT going to go in the direction of the + autotools, though. Thanks to GitHub user nee-san for + the basic change. (Merged from PR #34.) + +Mar 5, 2019: + Added support for POSIX-standard interval expressions (a.k.a. + bounds, a.k.a. repetition expressions) in regular expressions, + backported (via NetBSD) from Apple awk-24 (20070501). + Thanks to Martijn Dekker for the port. + (Merged from PR #30.) + +Mar 3, 2019: + Merge PRs as follows: + #12: Avoid undefined behaviour when using ctype(3) functions in + relex(). Thanks to GitHub user iamleot. + #31: Make getline handle numeric strings, and update FIXES. Thanks + to GitHub user arnoldrobbins + #32: maketab: support build systems with read-only source. Thanks + to GitHub user enh. + +Jan 25, 2019: + Make getline handle numeric strings properly in all cases. + (Thanks, Arnold.) + +Jan 21, 2019: + Merged a number of small fixes from GitHub pull requests. + Thanks to GitHub users Arnold Robbins (arnoldrobbins), + Cody Mello (melloc) and Christoph Junghans (junghans). + PR numbers: 13-21, 23, 24, 27. + +Oct 25, 2018: + Added test in maketab.c to prevent generating a proctab entry + for YYSTYPE_IS_DEFINED. It was harmless but some gcc settings + generated a warning message. Thanks to Nan Xiao for report. + +Aug 27, 2018: + Disallow '$' in printf formats; arguments evaluated in order + and printed in order. + + Added some casts to silence warnings on debugging printfs. + (Thanks, Arnold.) + +Aug 23, 2018: + A long list of fixes courtesy of Arnold Robbins, + to whom profound thanks. + + 1. ofs-rebuild: OFS value used to rebuild the record was incorrect. + Fixed August 19, 2014. Revised fix August 2018. + + 2. system-status: Instead of a floating-point division by 256, use + the wait(2) macros to create a reasonable exit status. + Fixed March 12, 2016. + + 3. space: Use provided xisblank() function instead of ispace() for + matching [[:blank:]]. + + 4. a-format: Add POSIX standard %a and %A to supported formats. Check + at runtime that this format is available. + + 5. decr-NF: Decrementing NF did not change $0. This is a decades-old + bug. There are interactions with the old and new value of OFS as well. + Most of the fix came from the NetBSD awk. + + 6. string-conv: String conversions of scalars were sticky. Once a + conversion to string happened, even with OFMT, that value was used until + a new numeric value was assigned, even if OFMT differed from CONVFMT, + and also if CONVFMT changed. + + 7. unary-plus: Unary plus on a string constant returned the string. + Instead, it should convert the value to numeric and give that value. + + Also added Arnold's tests for these to awktest.tar as T.arnold. + +Aug 15, 2018: + fixed mangled awktest.tar (thanks, Arnold), posted all + current (very minor) fixes to github / onetrueawk + +Jun 7, 2018: + (yes, a long layoff) + Updated some broken tests (beebe.tar, T.lilly) + [thanks to Arnold Robbins] + +Mar 26, 2015: + buffer overflow in error reporting; thanks to tobias ulmer + and john-mark gurney for spotting it and the fix. + +Feb 4, 2013: + cleaned up a handful of tests that didn't seem to actually + test for correct behavior: T.latin1, T.gawk. + +Jan 5, 2013: + added ,NULL initializer to static Cells in run.c; not really + needed but cleaner. Thanks to Michael Bombardieri. + Dec 20, 2012: fiddled makefile to get correct yacc and bison flags. pick yacc (linux) or bison (mac) as necessary. @@ -493,6 +600,8 @@ May 12, 1998: Mar 12, 1998: added -V to print version number and die. +[notify dave kerns, dkerns@dacsoup.ih.lucent.com] + Feb 11, 1998: subtle silent bug in lex.c: if the program ended with a number longer than 1 digit, part of the input would be pushed back and diff --git a/contrib/one-true-awk/LICENSE b/contrib/one-true-awk/LICENSE new file mode 100644 index 000000000000..07dfd7b73b11 --- /dev/null +++ b/contrib/one-true-awk/LICENSE @@ -0,0 +1,23 @@ +/**************************************************************** +Copyright (C) Lucent Technologies 1997 +All Rights Reserved + +Permission to use, copy, modify, and distribute this software and +its documentation for any purpose and without fee is hereby +granted, provided that the above copyright notice appear in all +copies and that both that the copyright notice and this +permission notice and warranty disclaimer appear in supporting +documentation, and that the name Lucent Technologies or any of +its entities not be used in advertising or publicity pertaining +to distribution of the software without specific, written prior +permission. + +LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, +INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. +IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY +SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER +IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, +ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF +THIS SOFTWARE. +****************************************************************/ diff --git a/contrib/one-true-awk/REGRESS b/contrib/one-true-awk/REGRESS new file mode 100755 index 000000000000..7d3ded69d536 --- /dev/null +++ b/contrib/one-true-awk/REGRESS @@ -0,0 +1,35 @@ +#! /bin/sh + +case `uname` in +CYGWIN) EXE=a.exe ;; +*) EXE=a.out ;; +esac + +if [ ! -f $EXE ] +then + make || exit 1 +fi + +if [ -d testdir ] +then + true # do nothing +elif [ -f awktest.tar ] +then + echo extracting testdir + tar -xpf awktest.tar +else + echo $0: No testdir directory and no awktest.tar to extract it from! >&2 + exit 1 +fi + +cd testdir +pwd +PATH=.:$PATH +export PATH +if (ulimit -c unlimited > /dev/null 2>&1) +then + # Workaround broken default on MacOS X + ulimit -c unlimited +fi + +REGRESS diff --git a/contrib/one-true-awk/awk.1 b/contrib/one-true-awk/awk.1 index b2c36558fc83..b8e00cb60449 100644 --- a/contrib/one-true-awk/awk.1 +++ b/contrib/one-true-awk/awk.1 @@ -7,7 +7,6 @@ .fi .ft 1 .. -awk .TH AWK 1 .CT 1 files prog_other .SH NAME @@ -36,7 +35,7 @@ awk \- pattern-directed scanning and processing language scans each input .I file for lines that match any of a set of patterns specified literally in -.IR prog +.I prog or in one or more files specified as .B \-f @@ -53,7 +52,7 @@ The file name .B \- means the standard input. Any -.IR file +.I file of the form .I var=value is treated as an assignment, not a filename, @@ -70,12 +69,12 @@ any number of options may be present. The .B \-F -.IR fs +.I fs option defines the input field separator to be the regular expression -.IR fs. +.IR fs . .PP An input line is normally made up of fields separated by white space, -or by regular expression +or by the regular expression .BR FS . The fields are denoted .BR $1 , @@ -87,7 +86,7 @@ If .BR FS is null, the input line is split into one field per character. .PP -A pattern-action statement has the form +A pattern-action statement has the form: .IP .IB pattern " { " action " } .PP @@ -101,7 +100,7 @@ An action is a sequence of statements. A statement can be one of the following: .PP .EX -.ta \w'\f(CWdelete array[expression]'u +.ta \w'\f(CWdelete array[expression]\fR'u .RS .nf .ft CW @@ -145,7 +144,7 @@ The operators are also available in expressions. Variables may be scalars, array elements (denoted -.IB x [ i ] ) +.IB x [ i ] \fR) or fields. Variables are initialized to the null string. Array subscripts may be any string, @@ -161,11 +160,11 @@ The .B print statement prints its arguments on the standard output (or on a file if -.BI > file +.BI > " file or -.BI >> file +.BI >> " file is present or on a pipe if -.BI | cmd +.BI | " cmd is present), separated by the current output field separator, and terminated by the output record separator. .I file @@ -176,9 +175,10 @@ identical string values in different statements denote the same open file. The .B printf -statement formats its expression list according to the format +statement formats its expression list according to the +.I format (see -.IR printf (3)) . +.IR printf (3)). The built-in function .BI close( expr ) closes the file or pipe @@ -189,13 +189,13 @@ flushes any buffered output for the file or pipe .IR expr . .PP The mathematical functions +.BR atan2 , +.BR cos , .BR exp , .BR log , -.BR sqrt , .BR sin , -.BR cos , and -.BR atan2 +.B sqrt are built in. Other built-in functions: .TF length @@ -203,7 +203,8 @@ Other built-in functions: .B length the length of its argument taken as a string, -or of +number of elements in an array for an array argument, +or length of .B $0 if no argument. .TP @@ -218,14 +219,18 @@ and returns the previous seed. .B int truncates to an integer value .TP -.BI substr( s , " m" , " n\fB) +\fBsubstr(\fIs\fB, \fIm\fR [\fB, \fIn\^\fR]\fB)\fR the .IR n -character substring of .I s that begins at position -.IR m +.I m counted from 1. +If no +.IR m , +use the rest of the string +.I .TP .BI index( s , " t" ) the position in @@ -246,14 +251,14 @@ and .B RLENGTH are set to the position and length of the matched string. .TP -.BI split( s , " a" , " fs\fB) +\fBsplit(\fIs\fB, \fIa \fR[\fB, \fIfs\^\fR]\fB)\fR splits the string .I s into array elements -.IB a [1] , -.IB a [2] , +.IB a [1] \fR, +.IB a [2] \fR, \&..., -.IB a [ n ] , +.IB a [ n ] \fR, and returns .IR n . The separation is done with the regular expression @@ -266,7 +271,7 @@ is not given. An empty string as field separator splits the string into one array element per character. .TP -.BI sub( r , " t" , " s\fB) +\fBsub(\fIr\fB, \fIt \fR[, \fIs\^\fR]\fB) substitutes .I t for the first occurrence of the regular expression @@ -279,7 +284,7 @@ is not given, .B $0 is used. .TP -.B gsub +\fBgsub(\fIr\fB, \fIt \fR[, \fIs\^\fR]\fB) same as .B sub except that all occurrences of the regular expression @@ -289,18 +294,28 @@ and .B gsub return the number of replacements. .TP -.BI sprintf( fmt , " expr" , " ...\fB ) +.BI sprintf( fmt , " expr" , " ...\fB) the string resulting from formatting .I expr ... according to the .IR printf (3) format -.I fmt +.IR fmt . .TP .BI system( cmd ) executes .I cmd -and returns its exit status +and returns its exit status. This will be \-1 upon error, +.IR cmd 's +exit status upon a normal exit, +256 + +.I sig +upon death-by-signal, where +.I sig +is the number of the murdering signal, +or 512 + +.I sig +if there was a core dump. .TP .BI tolower( str ) returns a copy of @@ -321,7 +336,7 @@ sets .B $0 to the next input record from the current input file; .B getline -.BI < file +.BI < " file sets .B $0 to the next record from @@ -363,7 +378,7 @@ Isolated regular expressions in a pattern apply to the entire line. Regular expressions may also occur in relational expressions, using the operators -.BR ~ +.B ~ and .BR !~ . .BI / re / @@ -387,8 +402,12 @@ A relational expression is one of the following: .br .BI ( expr , expr,... ") in " array-name .PP -where a relop is any of the six relational operators in C, -and a matchop is either +where a +.I relop +is any of the six relational operators in C, +and a +.I matchop +is either .B ~ (matches) or @@ -409,57 +428,68 @@ and after the last. and .B END do not combine with other patterns. +They may appear multiple times in a program and execute +in the order they are read by +.IR awk . .PP Variable names with special meanings: .TF FILENAME .TP +.B ARGC +argument count, assignable. +.TP +.B ARGV +argument array, assignable; +non-null members are taken as filenames. +.TP .B CONVFMT conversion format used when converting numbers (default -.BR "%.6g" ) +.BR "%.6g" ). +.TP +.B ENVIRON +array of environment variables; subscripts are names. +.TP +.B FILENAME +the name of the current input file. +.TP +.B FNR +ordinal number of the current record in the current file. .TP .B FS regular expression used to separate fields; also settable by option -.BI \-F fs. +.BI \-F fs\fR. .TP .BR NF -number of fields in the current record +number of fields in the current record. .TP .B NR -ordinal number of the current record -.TP -.B FNR -ordinal number of the current record in the current file -.TP -.B FILENAME -the name of the current input file -.TP -.B RS -input record separator (default newline) -.TP -.B OFS -output field separator (default blank) -.TP -.B ORS -output record separator (default newline) +ordinal number of the current record. .TP .B OFMT output format for numbers (default -.BR "%.6g" ) +.BR "%.6g" ). +.TP +.B OFS +output field separator (default space). +.TP +.B ORS +output record separator (default newline). +.TP +.B RLENGTH +the length of a string matched by +.BR match . +.TP +.B RS +input record separator (default newline). +.TP +.B RSTART +the start position of a string matched by +.BR match . .TP .B SUBSEP -separates multiple subscripts (default 034) -.TP -.B ARGC -argument count, assignable -.TP -.B ARGV -argument array, assignable; -non-null members are taken as filenames -.TP -.B ENVIRON -array of environment variables; subscripts are names. +separates multiple subscripts (default 034). .PD .PP Functions may be defined (at the position of a pattern-action statement) thus: @@ -490,7 +520,7 @@ BEGIN { FS = ",[ \et]*|[ \et]+" } .EE .ns .IP -Same, with input fields separated by comma and/or blanks and tabs. +Same, with input fields separated by comma and/or spaces and tabs. .PP .EX .nf @@ -516,13 +546,13 @@ BEGIN { # Simulate echo(1) .fi .EE .SH SEE ALSO +.IR grep (1), .IR lex (1), .IR sed (1) .br A. V. Aho, B. W. Kernighan, P. J. Weinberger, -.I -The AWK Programming Language, -Addison-Wesley, 1988. ISBN 0-201-07981-X +.IR "The AWK Programming Language" , +Addison-Wesley, 1988. ISBN 0-201-07981-X. .SH BUGS There are no explicit conversions between numbers and strings. To force an expression to be treated as a number add 0 to it; @@ -531,3 +561,5 @@ to force it to be treated as a string concatenate .br The scope rules for variables in functions are a botch; the syntax is worse. +.br +Only eight-bit characters sets are handled correctly. diff --git a/contrib/one-true-awk/awk.h b/contrib/one-true-awk/awk.h index 5564af7619a4..b16c2f36f828 100644 --- a/contrib/one-true-awk/awk.h +++ b/contrib/one-true-awk/awk.h @@ -81,7 +81,8 @@ typedef struct Cell { char *nval; /* name, for variables only */ char *sval; /* string value */ Awkfloat fval; /* value as number */ - int tval; /* type info: STR|NUM|ARR|FCN|FLD|CON|DONTFREE */ + int tval; /* type info: STR|NUM|ARR|FCN|FLD|CON|DONTFREE|CONVC|CONVO */ + char *fmt; /* CONVFMT/OFMT value used to convert from number */ struct Cell *cnext; /* ptr to next if chained */ } Cell; @@ -96,9 +97,14 @@ extern Array *symtab; extern Cell *nrloc; /* NR */ extern Cell *fnrloc; /* FNR */ +extern Cell *fsloc; /* FS */ extern Cell *nfloc; /* NF */ +extern Cell *ofsloc; /* OFS */ +extern Cell *orsloc; /* ORS */ +extern Cell *rsloc; /* RS */ extern Cell *rstartloc; /* RSTART */ extern Cell *rlengthloc; /* RLENGTH */ +extern Cell *subseploc; /* SUBSEP */ /* Cell.tval values: */ #define NUM 01 /* number value is valid */ @@ -109,6 +115,8 @@ extern Cell *rlengthloc; /* RLENGTH */ #define FCN 040 /* this is a function name */ #define FLD 0100 /* this is a field $1, $2, ... */ #define REC 0200 /* this is $0 */ +#define CONVC 0400 /* string was converted from number via CONVFMT */ +#define CONVO 01000 /* string was converted from number via OFMT */ /* function types */ diff --git a/contrib/one-true-awk/awkgram.y b/contrib/one-true-awk/awkgram.y index 5b5c461b3eed..e4abeeddcb6a 100644 --- a/contrib/one-true-awk/awkgram.y +++ b/contrib/one-true-awk/awkgram.y @@ -86,7 +86,7 @@ Node *arglist = 0; /* list of args for current function */ %left CAT %left '+' '-' %left '*' '/' '%' -%left NOT UMINUS +%left NOT UMINUS UPLUS %right POWER %right DECR INCR %left INDIRECT @@ -357,7 +357,7 @@ term: | term '%' term { $$ = op2(MOD, $1, $3); } | term POWER term { $$ = op2(POWER, $1, $3); } | '-' term %prec UMINUS { $$ = op1(UMINUS, $2); } - | '+' term %prec UMINUS { $$ = $2; } + | '+' term %prec UMINUS { $$ = op1(UPLUS, $2); } | NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); } | BLTIN '(' ')' { $$ = op2(BLTIN, itonp($1), rectonode()); } | BLTIN '(' patlist ')' { $$ = op2(BLTIN, itonp($1), $3); } diff --git a/contrib/one-true-awk/b.c b/contrib/one-true-awk/b.c index f9dae52a0a95..6bf473820488 100644 --- a/contrib/one-true-awk/b.c +++ b/contrib/one-true-awk/b.c @@ -30,6 +30,7 @@ __FBSDID("$FreeBSD$"); #define DEBUG #include +#include #include #include #include @@ -68,6 +69,11 @@ int rlxval; static uschar *rlxstr; static uschar *prestr; /* current position in current re */ static uschar *lastre; /* origin of last re */ +static uschar *lastatom; /* origin of last Atom */ +static uschar *starttok; +static uschar *basestr; /* starts with original, replaced during + repetition processing */ +static uschar *firstbasestr; static int setcnt; static int poscnt; @@ -85,11 +91,11 @@ fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */ fa *pfa; static int now = 1; - if (setvec == NULL) { /* first time through any RE */ + if (setvec == 0) { /* first time through any RE */ maxsetvec = MAXLIN; setvec = (int *) malloc(maxsetvec * sizeof(int)); tmpset = (int *) malloc(maxsetvec * sizeof(int)); - if (setvec == NULL || tmpset == NULL) + if (setvec == 0 || tmpset == 0) overflo("out of space initializing makedfa"); } @@ -127,6 +133,8 @@ fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */ Node *p, *p1; fa *f; + firstbasestr = (uschar *) s; + basestr = firstbasestr; p = reparse(s); p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p); /* put ALL STAR in front of reg. exp. */ @@ -140,7 +148,7 @@ fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */ f->accept = poscnt-1; /* penter has computed number of positions in re */ cfoll(f, p1); /* set up follow sets */ freetr(p1); - if ((f->posns[0] = (int *) calloc(*(f->re[0].lfollow), sizeof(int))) == NULL) + if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL) overflo("out of space in makedfa"); if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL) overflo("out of space in makedfa"); @@ -148,6 +156,10 @@ fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */ f->initstat = makeinit(f, anchor); f->anchor = anchor; f->restr = (uschar *) tostring(s); + if (firstbasestr != basestr) { + if (basestr) + xfree(basestr); + } return f; } @@ -160,7 +172,7 @@ int makeinit(fa *f, int anchor) f->reset = 0; k = *(f->re[0].lfollow); xfree(f->posns[2]); - if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL) + if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) overflo("out of space in makeinit"); for (i=0; i <= k; i++) { (f->posns[2])[i] = (f->re[0].lfollow)[i]; @@ -305,11 +317,11 @@ char *cclenter(const char *argp) /* add a character class */ int j; uschar *p = (uschar *) argp; uschar *op, *bp; - static uschar *buf = NULL; + static uschar *buf = 0; static int bufsz = 100; op = p; - if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL) + if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) FATAL("out of space for character class [%.10s...] 1", p); bp = buf; for (i = 0; (c = *p++) != 0; ) { @@ -368,14 +380,14 @@ void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfo maxsetvec *= 4; setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); - if (setvec == NULL || tmpset == NULL) + if (setvec == 0 || tmpset == 0) overflo("out of space in cfoll()"); } for (i = 0; i <= f->accept; i++) setvec[i] = 0; setcnt = 0; follow(v); /* computes setvec and setcnt */ - if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL) + if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL) overflo("out of space building follow set"); f->re[info(v)].lfollow = p; *p = setcnt; @@ -409,7 +421,7 @@ int first(Node *p) /* collects initially active leaves of p into setvec */ maxsetvec *= 4; setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); - if (setvec == NULL || tmpset == NULL) + if (setvec == 0 || tmpset == 0) overflo("out of space in first()"); } if (type(p) == EMPTYRE) { @@ -549,7 +561,7 @@ int pmatch(fa *f, const char *p0) /* longest match, for sub */ for (i = 2; i <= f->curstat; i++) xfree(f->posns[i]); k = *f->posns[0]; - if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL) + if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) overflo("out of space in pmatch"); for (i = 0; i <= k; i++) (f->posns[2])[i] = (f->posns[0])[i]; @@ -606,7 +618,7 @@ int nematch(fa *f, const char *p0) /* non-empty match, for sub */ for (i = 2; i <= f->curstat; i++) xfree(f->posns[i]); k = *f->posns[0]; - if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL) + if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) overflo("out of state space"); for (i = 0; i <= k; i++) (f->posns[2])[i] = (f->posns[0])[i]; @@ -646,9 +658,11 @@ Node *regexp(void) /* top-level parse of reg expr */ Node *primary(void) { Node *np; + int savelastatom; switch (rtok) { case CHAR: + lastatom = starttok; np = op2(CHAR, NIL, itonp(rlxval)); rtok = relex(); return (unary(np)); @@ -657,16 +671,19 @@ Node *primary(void) return (unary(op2(ALL, NIL, NIL))); case EMPTYRE: rtok = relex(); - return (unary(op2(ALL, NIL, NIL))); + return (unary(op2(EMPTYRE, NIL, NIL))); case DOT: + lastatom = starttok; rtok = relex(); return (unary(op2(DOT, NIL, NIL))); case CCL: np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr)); + lastatom = starttok; rtok = relex(); return (unary(np)); case NCCL: np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr)); + lastatom = starttok; rtok = relex(); return (unary(np)); case '^': @@ -676,6 +693,8 @@ Node *primary(void) rtok = relex(); return (unary(op2(CHAR, NIL, NIL))); case '(': + lastatom = starttok; + savelastatom = starttok - basestr; /* Retain over recursion */ rtok = relex(); if (rtok == ')') { /* special pleading for () */ rtok = relex(); @@ -683,6 +702,7 @@ Node *primary(void) } np = regexp(); if (rtok == ')') { + lastatom = basestr + savelastatom; /* Restore */ rtok = relex(); return (unary(np)); } @@ -697,8 +717,12 @@ Node *primary(void) Node *concat(Node *np) { switch (rtok) { - case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(': + case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(': return (concat(op2(CAT, np, primary()))); + case EMPTYRE: + rtok = relex(); + return (concat(op2(CAT, op2(CCL, NIL, (Node *) tostring("")), + primary()))); } return (np); } @@ -767,7 +791,7 @@ struct charclass { { "alnum", 5, isalnum }, { "alpha", 5, isalpha }, #ifndef HAS_ISBLANK - { "blank", 5, isspace }, /* was isblank */ + { "blank", 5, xisblank }, #else { "blank", 5, isblank }, #endif @@ -783,16 +807,132 @@ struct charclass { { NULL, 0, NULL }, }; +#define REPEAT_SIMPLE 0 +#define REPEAT_PLUS_APPENDED 1 +#define REPEAT_WITH_Q 2 +#define REPEAT_ZERO 3 + +static int +replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom, + int atomlen, int firstnum, int secondnum, int special_case) +{ + int i, j; + uschar *buf = 0; + int ret = 1; + int init_q = (firstnum==0); /* first added char will be ? */ + int n_q_reps = secondnum-firstnum; /* m>n, so reduce until {1,m-n} left */ + int prefix_length = reptok - basestr; /* prefix includes first rep */ + int suffix_length = strlen((char *) reptok) - reptoklen; /* string after rep specifier */ + int size = prefix_length + suffix_length; + + if (firstnum > 1) { /* add room for reps 2 through firstnum */ + size += atomlen*(firstnum-1); + } + + /* Adjust size of buffer for special cases */ + if (special_case == REPEAT_PLUS_APPENDED) { + size++; /* for the final + */ + } else if (special_case == REPEAT_WITH_Q) { + size += init_q + (atomlen+1)* n_q_reps; + } else if (special_case == REPEAT_ZERO) { + size += 2; /* just a null ERE: () */ + } + if ((buf = (uschar *) malloc(size+1)) == NULL) + FATAL("out of space in reg expr %.10s..", lastre); + memcpy(buf, basestr, prefix_length); /* copy prefix */ + j = prefix_length; + if (special_case == REPEAT_ZERO) { + j -= atomlen; + buf[j++] = '('; + buf[j++] = ')'; + } + for (i=1; i < firstnum; i++) { /* copy x reps */ + memcpy(&buf[j], atom, atomlen); + j += atomlen; + } + if (special_case == REPEAT_PLUS_APPENDED) { + buf[j++] = '+'; + } else if (special_case == REPEAT_WITH_Q) { + if (init_q) buf[j++] = '?'; + for (i=0; i < n_q_reps; i++) { /* copy x? reps */ + memcpy(&buf[j], atom, atomlen); + j += atomlen; + buf[j++] = '?'; + } + } + memcpy(&buf[j], reptok+reptoklen, suffix_length); + if (special_case == REPEAT_ZERO) { + buf[j+suffix_length] = '\0'; + } else { + buf[size] = '\0'; + } + /* free old basestr */ + if (firstbasestr != basestr) { + if (basestr) + xfree(basestr); + } + basestr = buf; + prestr = buf + prefix_length; + if (special_case == REPEAT_ZERO) { + prestr -= atomlen; + ret++; + } + return ret; +} + +static int repeat(const uschar *reptok, int reptoklen, const uschar *atom, + int atomlen, int firstnum, int secondnum) +{ + /* + In general, the repetition specifier or "bound" is replaced here + by an equivalent ERE string, repeating the immediately previous atom + and appending ? and + as needed. Note that the first copy of the + atom is left in place, except in the special_case of a zero-repeat + (i.e., {0}). + */ + if (secondnum < 0) { /* means {n,} -> repeat n-1 times followed by PLUS */ + if (firstnum < 2) { + /* 0 or 1: should be handled before you get here */ + FATAL("internal error"); + } else { + return replace_repeat(reptok, reptoklen, atom, atomlen, + firstnum, secondnum, REPEAT_PLUS_APPENDED); + } + } else if (firstnum == secondnum) { /* {n} or {n,n} -> simply repeat n-1 times */ + if (firstnum == 0) { /* {0} or {0,0} */ + /* This case is unusual because the resulting + replacement string might actually be SMALLER than + the original ERE */ + return replace_repeat(reptok, reptoklen, atom, atomlen, + firstnum, secondnum, REPEAT_ZERO); + } else { /* (firstnum >= 1) */ + return replace_repeat(reptok, reptoklen, atom, atomlen, + firstnum, secondnum, REPEAT_SIMPLE); + } + } else if (firstnum < secondnum) { /* {n,m} -> repeat n-1 times then alternate */ + /* x{n,m} => xx...x{1, m-n+1} => xx...x?x?x?..x? */ + return replace_repeat(reptok, reptoklen, atom, atomlen, + firstnum, secondnum, REPEAT_WITH_Q); + } else { /* Error - shouldn't be here (n>m) */ + FATAL("internal error"); + } + return 0; +} int relex(void) /* lexical analyzer for reparse */ { int c, n; int cflag; - static uschar *buf = NULL; + static uschar *buf = 0; static int bufsz = 100; uschar *bp; struct charclass *cc; int i; + int num, m, commafound, digitfound; + const uschar *startreptok; + +rescan: + starttok = prestr; switch (c = *prestr++) { case '|': return OR; @@ -813,7 +953,7 @@ int relex(void) /* lexical analyzer for reparse */ rlxval = c; return CHAR; case '[': - if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL) + if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) FATAL("out of space in reg expr %.10s..", lastre); bp = buf; if (*prestr == '^') { @@ -841,7 +981,15 @@ int relex(void) /* lexical analyzer for reparse */ if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' && prestr[2 + cc->cc_namelen] == ']') { prestr += cc->cc_namelen + 3; - for (i = 1; i < NCHARS; i++) { + /* + * BUG: We begin at 1, instead of 0, since we + * would otherwise prematurely terminate the + * string for classes like [[:cntrl:]]. This + * means that we can't match the NUL character, + * not without first adapting the entire + * program to track each string's length. + */ + for (i = 1; i <= UCHAR_MAX; i++) { if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2")) FATAL("out of space for reg expr %.10s...", lastre); if (cc->cc_func(i)) { @@ -851,6 +999,40 @@ int relex(void) /* lexical analyzer for reparse */ } } else *bp++ = c; + } else if (c == '[' && *prestr == '.') { + char collate_char; + prestr++; + collate_char = *prestr++; + if (*prestr == '.' && prestr[1] == ']') { + prestr += 2; + /* Found it: map via locale TBD: for + now, simply return this char. This + is sufficient to pass conformance + test awk.ex 156 + */ + if (*prestr == ']') { + prestr++; + rlxval = collate_char; + return CHAR; + } + } + } else if (c == '[' && *prestr == '=') { + char equiv_char; + prestr++; + equiv_char = *prestr++; + if (*prestr == '=' && prestr[1] == ']') { + prestr += 2; + /* Found it: map via locale TBD: for now + simply return this char. This is + sufficient to pass conformance test + awk.ex 156 + */ + if (*prestr == ']') { + prestr++; + rlxval = equiv_char; + return CHAR; + } + } } else if (c == '\0') { FATAL("nonterminated character class %.20s", lastre); } else if (bp == buf) { /* 1st char is special */ @@ -865,6 +1047,75 @@ int relex(void) /* lexical analyzer for reparse */ } else *bp++ = c; } + break; + case '{': + if (isdigit(*(prestr))) { + num = 0; /* Process as a repetition */ + n = -1; m = -1; + commafound = 0; + digitfound = 0; + startreptok = prestr-1; + /* Remember start of previous atom here ? */ + } else { /* just a { char, not a repetition */ + rlxval = c; + return CHAR; + } + for (; ; ) { + if ((c = *prestr++) == '}') { + if (commafound) { + if (digitfound) { /* {n,m} */ + m = num; + if (m 0) { + if ((n==0) && (m==0)) { + return EMPTYRE; + } + /* must rescan input for next token */ + goto rescan; + } + /* Failed to replace: eat up {...} characters + and treat like just PLUS */ + return PLUS; + } else if (c == '\0') { + FATAL("nonterminated character class %.20s", + lastre); + } else if (isdigit(c)) { + num = 10 * num + c - '0'; + digitfound = 1; + } else if (c == ',') { + if (commafound) + FATAL("illegal repetition expression: class %.20s", + lastre); + /* looking for {n,} or {n,m} */ + commafound = 1; + n = num; + digitfound = 0; /* reset */ + num = 0; + } else { + FATAL("illegal repetition expression: class %.20s", + lastre); + } + } + break; } } @@ -878,7 +1129,7 @@ int cgoto(fa *f, int s, int c) maxsetvec *= 4; setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); - if (setvec == NULL || tmpset == NULL) + if (setvec == 0 || tmpset == 0) overflo("out of space in cgoto()"); } for (i = 0; i <= f->accept; i++) @@ -900,7 +1151,7 @@ int cgoto(fa *f, int s, int c) maxsetvec *= 4; setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); - if (setvec == NULL || tmpset == NULL) + if (setvec == 0 || tmpset == 0) overflo("cgoto overflow"); } if (setvec[q[j]] == 0) { @@ -943,7 +1194,7 @@ int cgoto(fa *f, int s, int c) for (i = 0; i < NCHARS; i++) f->gototab[f->curstat][i] = 0; xfree(f->posns[f->curstat]); - if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL) + if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL) overflo("out of space in cgoto"); f->posns[f->curstat] = p; diff --git a/contrib/one-true-awk/bugs-fixed/README b/contrib/one-true-awk/bugs-fixed/README new file mode 100644 index 000000000000..2f27c1039873 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/README @@ -0,0 +1,57 @@ +List of bugs fixed. + +1. ofs-rebuild: OFS value used to rebuild the record was incorrect. +Fixed August 19, 2014. Revised fix August 2018. + +2. system-status: Instead of a floating-point division by 256, use +the wait(2) macros to create a reasonable exit status. Fixed March 12, 2016. + +3. space: Use provided xisblank() function instead of ispace() for +matching [[:blank:]]. + +4. a-format: Add POSIX standard %a and %A to supported formats. Check +at runtime that this format is available. + +5. decr-NF: Decrementing NF did not change $0. This is a decades-old +bug. There are interactions with the old and new value of OFS as well. +Most of the fix came from the NetBSD awk. + +6. string-conv: String conversions of scalars were sticky. Once a +conversion to string happened, even with OFMT, that value was used until +a new numeric value was assigned, even if OFMT differed from CONVFMT, +and also if CONVFMT changed. + +7. unary-plus: Unary plus on a string constant returned the string. +Instead, it should convert the value to numeric and give that value. + +8. concat-assign-same: Concatenation previously evaluated both sides of the +expression before doing its work, which, since assign() evaluates to the cell +being assigned to, meant that expressions like "print (a = 1) (a = 2)" would +print "22" rather than "12". + +9. missing-precision: When using the format string "%*s", the precision +argument was used without checking if it was present first. + +10. missing-precision: When using the format string "%*s", the precision +argument was used without checking if it was present first. + +11. fmt-overflow: The buffer used for OFMT/CONVFMT conversions was written +to with sprintf(), which meant that some conversions could write past the +end. + +12. numeric-subsep, numeric-fs, numeric-output-seps, numerics-rs: If SUBSEP, +FS, RS, OFS, or ORS were set to a numeric value, then their string values +wouldn't always be generated before being needed. + +13. subsep-overflow: The length of SUBSEP needs to be rechecked after +calling execute(), in case SUBSEP itself has been changed. + +14. split-fs-from-array: If the third argument to split() comes from the +array passed as the second argument, then split() would previously read +from the freed memory and possibly produce incorrect results (depending +on the system's malloc()/free() behaviour.) + +15. getline-numeric: The `getline xx < file' syntax did not check if +values were numeric, in discordance from POSIX. Test case adapted from +one posted by Ben Bacarisse in comp.lang.awk, +January 2019. diff --git a/contrib/one-true-awk/bugs-fixed/a-format.awk b/contrib/one-true-awk/bugs-fixed/a-format.awk new file mode 100644 index 000000000000..5b7929ee3eea --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/a-format.awk @@ -0,0 +1,3 @@ +BEGIN { + printf("%a\n", 42) +} diff --git a/contrib/one-true-awk/bugs-fixed/a-format.bad b/contrib/one-true-awk/bugs-fixed/a-format.bad new file mode 100644 index 000000000000..1281825b1111 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/a-format.bad @@ -0,0 +1,3 @@ +nawk: weird printf conversion %a + source line number 2 +%a42 diff --git a/contrib/one-true-awk/bugs-fixed/a-format.ok b/contrib/one-true-awk/bugs-fixed/a-format.ok new file mode 100644 index 000000000000..e421e2d01ba6 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/a-format.ok @@ -0,0 +1 @@ +0x1.5p+5 diff --git a/contrib/one-true-awk/bugs-fixed/concat-assign-same.awk b/contrib/one-true-awk/bugs-fixed/concat-assign-same.awk new file mode 100644 index 000000000000..ed19f35ca835 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/concat-assign-same.awk @@ -0,0 +1,4 @@ +BEGIN { + print (a = 1) (a = 2) (a = 3) (a = 4) (a = 5); + print (a = 1), (a = 2), (a = 3), (a = 4), (a = 5); +} diff --git a/contrib/one-true-awk/bugs-fixed/concat-assign-same.bad b/contrib/one-true-awk/bugs-fixed/concat-assign-same.bad new file mode 100644 index 000000000000..294725b28a97 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/concat-assign-same.bad @@ -0,0 +1,2 @@ +22345 +1 2 3 4 5 diff --git a/contrib/one-true-awk/bugs-fixed/concat-assign-same.ok b/contrib/one-true-awk/bugs-fixed/concat-assign-same.ok new file mode 100644 index 000000000000..447505259d02 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/concat-assign-same.ok @@ -0,0 +1,2 @@ +12345 +1 2 3 4 5 diff --git a/contrib/one-true-awk/bugs-fixed/decr-NF.awk b/contrib/one-true-awk/bugs-fixed/decr-NF.awk new file mode 100644 index 000000000000..7474991d196e --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/decr-NF.awk @@ -0,0 +1,11 @@ +BEGIN { + $0 = "a b c d e f" + print NF + OFS = ":" + NF-- + print $0 + print NF + NF++ + print $0 + print NF +} diff --git a/contrib/one-true-awk/bugs-fixed/decr-NF.bad b/contrib/one-true-awk/bugs-fixed/decr-NF.bad new file mode 100644 index 000000000000..b634e065954c --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/decr-NF.bad @@ -0,0 +1,5 @@ +6 +a b c d e f +5 +a b c d e f +6 diff --git a/contrib/one-true-awk/bugs-fixed/decr-NF.ok b/contrib/one-true-awk/bugs-fixed/decr-NF.ok new file mode 100644 index 000000000000..3359cf2312d1 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/decr-NF.ok @@ -0,0 +1,5 @@ +6 +a:b:c:d:e +5 +a:b:c:d:e: +6 diff --git a/contrib/one-true-awk/bugs-fixed/fmt-overflow.awk b/contrib/one-true-awk/bugs-fixed/fmt-overflow.awk new file mode 100644 index 000000000000..bf5877e4abac --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/fmt-overflow.awk @@ -0,0 +1 @@ +BEGIN { OFMT = "%.1000f"; print 1.25; } diff --git a/contrib/one-true-awk/bugs-fixed/fmt-overflow.ok b/contrib/one-true-awk/bugs-fixed/fmt-overflow.ok new file mode 100644 index 000000000000..5f7449e68073 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/fmt-overflow.ok @@ -0,0 +1 @@ +1.2500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 diff --git a/contrib/one-true-awk/bugs-fixed/fs-overflow.awk b/contrib/one-true-awk/bugs-fixed/fs-overflow.awk new file mode 100644 index 000000000000..be10f5a46f0d --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/fs-overflow.awk @@ -0,0 +1,13 @@ +function foo() { + a = ""; + for (i = 0; i < 10000; i++) { + a = a "c"; + } + return a; +} + +BEGIN { + FS = foo(); + $0="foo"; + print $1; +} diff --git a/contrib/one-true-awk/bugs-fixed/getline-numeric.awk b/contrib/one-true-awk/bugs-fixed/getline-numeric.awk new file mode 100644 index 000000000000..5571a9589a3d --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/getline-numeric.awk @@ -0,0 +1,6 @@ +{ + print $0, ($0 <= 50 ? "<=" : ">"), 50 + getline dd < ARGV[1] + print dd, (dd <= 50 ? "<=" : ">"), 50 + if (dd == $0) print "same" +} diff --git a/contrib/one-true-awk/bugs-fixed/getline-numeric.bad b/contrib/one-true-awk/bugs-fixed/getline-numeric.bad new file mode 100644 index 000000000000..d911c774fa9a --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/getline-numeric.bad @@ -0,0 +1,3 @@ +120 > 50 +120 <= 50 +same diff --git a/contrib/one-true-awk/bugs-fixed/getline-numeric.in b/contrib/one-true-awk/bugs-fixed/getline-numeric.in new file mode 100644 index 000000000000..52bd8e43afb0 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/getline-numeric.in @@ -0,0 +1 @@ +120 diff --git a/contrib/one-true-awk/bugs-fixed/getline-numeric.ok b/contrib/one-true-awk/bugs-fixed/getline-numeric.ok new file mode 100644 index 000000000000..f7efd3db506f --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/getline-numeric.ok @@ -0,0 +1,3 @@ +120 > 50 +120 > 50 +same diff --git a/contrib/one-true-awk/bugs-fixed/missing-precision.awk b/contrib/one-true-awk/bugs-fixed/missing-precision.awk new file mode 100644 index 000000000000..4e7a74b2c964 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/missing-precision.awk @@ -0,0 +1 @@ +BEGIN { printf("%*s"); } diff --git a/contrib/one-true-awk/bugs-fixed/missing-precision.ok b/contrib/one-true-awk/bugs-fixed/missing-precision.ok new file mode 100644 index 000000000000..608b4fa48666 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/missing-precision.ok @@ -0,0 +1,2 @@ +./a.out: not enough args in printf(%*s) + source line number 1 diff --git a/contrib/one-true-awk/bugs-fixed/negative-nf.awk b/contrib/one-true-awk/bugs-fixed/negative-nf.awk new file mode 100644 index 000000000000..6caeee4602b5 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/negative-nf.awk @@ -0,0 +1 @@ +BEGIN { NF = -5; } diff --git a/contrib/one-true-awk/bugs-fixed/negative-nf.ok b/contrib/one-true-awk/bugs-fixed/negative-nf.ok new file mode 100644 index 000000000000..71c860468cc0 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/negative-nf.ok @@ -0,0 +1,2 @@ +./a.out: cannot set NF to a negative value + source line number 1 diff --git a/contrib/one-true-awk/bugs-fixed/nf-self-assign.awk b/contrib/one-true-awk/bugs-fixed/nf-self-assign.awk new file mode 100644 index 000000000000..6ae29eef916d --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/nf-self-assign.awk @@ -0,0 +1,6 @@ +BEGIN { + $0="a b c"; + OFS=","; + NF = NF; + print; +} diff --git a/contrib/one-true-awk/bugs-fixed/nf-self-assign.bad b/contrib/one-true-awk/bugs-fixed/nf-self-assign.bad new file mode 100644 index 000000000000..3774da60e546 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/nf-self-assign.bad @@ -0,0 +1 @@ +a b c diff --git a/contrib/one-true-awk/bugs-fixed/nf-self-assign.ok b/contrib/one-true-awk/bugs-fixed/nf-self-assign.ok new file mode 100644 index 000000000000..b2ffb02521e6 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/nf-self-assign.ok @@ -0,0 +1 @@ +a,b,c diff --git a/contrib/one-true-awk/bugs-fixed/numeric-fs.awk b/contrib/one-true-awk/bugs-fixed/numeric-fs.awk new file mode 100644 index 000000000000..01e438d4aa28 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/numeric-fs.awk @@ -0,0 +1,5 @@ +BEGIN { + FS = 0; split("20202", a); print a[1]; + FS = 1; $0="31313"; print $1; + FS = 2; "echo 42424" | getline; print $1; +} diff --git a/contrib/one-true-awk/bugs-fixed/numeric-fs.ok b/contrib/one-true-awk/bugs-fixed/numeric-fs.ok new file mode 100644 index 000000000000..dcf37cd5e262 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/numeric-fs.ok @@ -0,0 +1,3 @@ +2 +3 +4 diff --git a/contrib/one-true-awk/bugs-fixed/numeric-output-seps.awk b/contrib/one-true-awk/bugs-fixed/numeric-output-seps.awk new file mode 100644 index 000000000000..daa0f72aa6ff --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/numeric-output-seps.awk @@ -0,0 +1,8 @@ +BEGIN { + $0 = "a b c"; + OFS = 1; + ORS = 2; + NF = 2; + print; + print "d", "e"; +} diff --git a/contrib/one-true-awk/bugs-fixed/numeric-output-seps.bad b/contrib/one-true-awk/bugs-fixed/numeric-output-seps.bad new file mode 100644 index 000000000000..95310f78a7f3 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/numeric-output-seps.bad @@ -0,0 +1,2 @@ +a b +d e diff --git a/contrib/one-true-awk/bugs-fixed/numeric-output-seps.ok b/contrib/one-true-awk/bugs-fixed/numeric-output-seps.ok new file mode 100644 index 000000000000..de6b2026e539 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/numeric-output-seps.ok @@ -0,0 +1 @@ +a1b2d1e2 \ No newline at end of file diff --git a/contrib/one-true-awk/bugs-fixed/numeric-rs.awk b/contrib/one-true-awk/bugs-fixed/numeric-rs.awk new file mode 100644 index 000000000000..cc7a0a0c08c2 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/numeric-rs.awk @@ -0,0 +1,6 @@ +BEGIN { + RS = 1; + while ("echo a1b1c1d" | getline > 0) { + print $1; + } +} diff --git a/contrib/one-true-awk/bugs-fixed/numeric-rs.bad b/contrib/one-true-awk/bugs-fixed/numeric-rs.bad new file mode 100644 index 000000000000..2027bc6f27c9 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/numeric-rs.bad @@ -0,0 +1 @@ +a1b1c1d diff --git a/contrib/one-true-awk/bugs-fixed/numeric-rs.ok b/contrib/one-true-awk/bugs-fixed/numeric-rs.ok new file mode 100644 index 000000000000..d68dd4031d2a --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/numeric-rs.ok @@ -0,0 +1,4 @@ +a +b +c +d diff --git a/contrib/one-true-awk/bugs-fixed/numeric-subsep.awk b/contrib/one-true-awk/bugs-fixed/numeric-subsep.awk new file mode 100644 index 000000000000..1252e4a99607 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/numeric-subsep.awk @@ -0,0 +1,5 @@ +BEGIN { + SUBSEP = 123.456; + a["hello", "world"] = "foo"; + print a["hello" SUBSEP "world"]; +} diff --git a/contrib/one-true-awk/bugs-fixed/numeric-subsep.bad b/contrib/one-true-awk/bugs-fixed/numeric-subsep.bad new file mode 100644 index 000000000000..8b137891791f --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/numeric-subsep.bad @@ -0,0 +1 @@ + diff --git a/contrib/one-true-awk/bugs-fixed/numeric-subsep.ok b/contrib/one-true-awk/bugs-fixed/numeric-subsep.ok new file mode 100644 index 000000000000..257cc5642cb1 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/numeric-subsep.ok @@ -0,0 +1 @@ +foo diff --git a/contrib/one-true-awk/bugs-fixed/ofs-rebuild.awk b/contrib/one-true-awk/bugs-fixed/ofs-rebuild.awk new file mode 100644 index 000000000000..dd2700031524 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/ofs-rebuild.awk @@ -0,0 +1,17 @@ +# The bug here is that nawk should use the value of OFS that +# was current when $0 became invalid to rebuild the record. + +BEGIN { + OFS = ":" + $0 = "a b c d e f g" + $3 = "3333" + # Conceptually, $0 should now be "a:b:3333:d:e:f:g" + + # Change OFS after (conceptually) rebuilding the record + OFS = "<>" + + # Unmodifed nawk prints "a<>b<>3333<>d<>e<>f<>g" because + # it delays rebuilding $0 until it's needed, and then it uses + # the current value of OFS. Oops. + print +} diff --git a/contrib/one-true-awk/bugs-fixed/ofs-rebuild.bad b/contrib/one-true-awk/bugs-fixed/ofs-rebuild.bad new file mode 100644 index 000000000000..7570811e2c16 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/ofs-rebuild.bad @@ -0,0 +1 @@ +a<>b<>3333<>d<>e<>f<>g diff --git a/contrib/one-true-awk/bugs-fixed/ofs-rebuild.ok b/contrib/one-true-awk/bugs-fixed/ofs-rebuild.ok new file mode 100644 index 000000000000..26892181f91b --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/ofs-rebuild.ok @@ -0,0 +1 @@ +a:b:3333:d:e:f:g diff --git a/contrib/one-true-awk/bugs-fixed/space.awk b/contrib/one-true-awk/bugs-fixed/space.awk new file mode 100644 index 000000000000..6aa87d2e6259 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/space.awk @@ -0,0 +1,22 @@ +BEGIN { + c[" "] = "\" \"" + c["\a"] = "\\a" + c["\b"] = "\\b" + c["\f"] = "\\f" + c["\n"] = "\\n" + c["\r"] = "\\r" + c["\t"] = "\\t" + c["\v"] = "\\v" + + sort = "LC_ALL=C sort" + + for (i in c) + printf("%s %s [[:space:]]\n", c[i], + i ~ /[[:space:]]/ ? "~" : "!~") | sort + + for (i in c) + printf("%s %s [[:blank:]]\n", c[i], + i ~ /[[:blank:]]/ ? "~" : "!~") | sort + + close(sort) +} diff --git a/contrib/one-true-awk/bugs-fixed/space.bad b/contrib/one-true-awk/bugs-fixed/space.bad new file mode 100644 index 000000000000..f92055fd0c26 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/space.bad @@ -0,0 +1,16 @@ +" " ~ [[:blank:]] +" " ~ [[:space:]] +\a !~ [[:blank:]] +\a !~ [[:space:]] +\b !~ [[:blank:]] +\b !~ [[:space:]] +\f ~ [[:blank:]] +\f ~ [[:space:]] +\n ~ [[:blank:]] +\n ~ [[:space:]] +\r ~ [[:blank:]] +\r ~ [[:space:]] +\t ~ [[:blank:]] +\t ~ [[:space:]] +\v ~ [[:blank:]] +\v ~ [[:space:]] diff --git a/contrib/one-true-awk/bugs-fixed/space.ok b/contrib/one-true-awk/bugs-fixed/space.ok new file mode 100644 index 000000000000..4278c5c9df3b --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/space.ok @@ -0,0 +1,16 @@ +" " ~ [[:blank:]] +" " ~ [[:space:]] +\a !~ [[:blank:]] +\a !~ [[:space:]] +\b !~ [[:blank:]] +\b !~ [[:space:]] +\f !~ [[:blank:]] +\f ~ [[:space:]] +\n !~ [[:blank:]] +\n ~ [[:space:]] +\r !~ [[:blank:]] +\r ~ [[:space:]] +\t ~ [[:blank:]] +\t ~ [[:space:]] +\v !~ [[:blank:]] +\v ~ [[:space:]] diff --git a/contrib/one-true-awk/bugs-fixed/split-fs-from-array.awk b/contrib/one-true-awk/bugs-fixed/split-fs-from-array.awk new file mode 100644 index 000000000000..fce1607c2a97 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/split-fs-from-array.awk @@ -0,0 +1,5 @@ +BEGIN { + a[1] = "elephantie" + a[2] = "e" + print split(a[1],a,a[2]), a[2], a[3], split(a[2],a,a[2]) +} diff --git a/contrib/one-true-awk/bugs-fixed/split-fs-from-array.ok b/contrib/one-true-awk/bugs-fixed/split-fs-from-array.ok new file mode 100644 index 000000000000..9402b94f4fae --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/split-fs-from-array.ok @@ -0,0 +1 @@ +4 l phanti 2 diff --git a/contrib/one-true-awk/bugs-fixed/string-conv.awk b/contrib/one-true-awk/bugs-fixed/string-conv.awk new file mode 100644 index 000000000000..a1f04aba354b --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/string-conv.awk @@ -0,0 +1,13 @@ +BEGIN { + OFMT = ">>%.6g<<" + a = 12.1234 + print "a =", a + b = a "" + print "1 ->", b + CONVFMT = "%2.2f" + b = a "" + print "2 ->", b + CONVFMT = "%.12g" + b = a "" + print "3 ->", b +} diff --git a/contrib/one-true-awk/bugs-fixed/string-conv.bad b/contrib/one-true-awk/bugs-fixed/string-conv.bad new file mode 100644 index 000000000000..2ab95e87d0a8 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/string-conv.bad @@ -0,0 +1,4 @@ +a = >>12.1234<< +1 -> >>12.1234<< +2 -> >>12.1234<< +3 -> >>12.1234<< diff --git a/contrib/one-true-awk/bugs-fixed/string-conv.ok b/contrib/one-true-awk/bugs-fixed/string-conv.ok new file mode 100644 index 000000000000..7c097113207a --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/string-conv.ok @@ -0,0 +1,4 @@ +a = >>12.1234<< +1 -> 12.1234 +2 -> 12.12 +3 -> 12.1234 diff --git a/contrib/one-true-awk/bugs-fixed/subsep-overflow.awk b/contrib/one-true-awk/bugs-fixed/subsep-overflow.awk new file mode 100644 index 000000000000..66c7c24db0e6 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/subsep-overflow.awk @@ -0,0 +1,24 @@ +function foo(c, n) { + s = ""; + for (i = 0; i < n; i++) { + s = s c; + } + return s; +} + +BEGIN { + str1 = foo("a", 4500); + str2 = foo("b", 9000); + + a[(SUBSEP = str1), (SUBSEP = str2), "c"] = 1; + + for (k in a) { + print length(k); + } + + print (((SUBSEP = str1), (SUBSEP = str2), "c") in a); + print (((SUBSEP = str1) SUBSEP (SUBSEP = str2) SUBSEP "c") in a); + delete a[(SUBSEP = str1), (SUBSEP = str2), "c"]; + print (((SUBSEP = str1), (SUBSEP = str2), "c") in a); + print (((SUBSEP = str1) SUBSEP (SUBSEP = str2) SUBSEP "c") in a); +} diff --git a/contrib/one-true-awk/bugs-fixed/subsep-overflow.ok b/contrib/one-true-awk/bugs-fixed/subsep-overflow.ok new file mode 100644 index 000000000000..ddbbd78707ee --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/subsep-overflow.ok @@ -0,0 +1,5 @@ +27001 +1 +1 +0 +0 diff --git a/contrib/one-true-awk/bugs-fixed/system-status.awk b/contrib/one-true-awk/bugs-fixed/system-status.awk new file mode 100644 index 000000000000..8daf563e6f4f --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/system-status.awk @@ -0,0 +1,19 @@ +# Unmodified nawk prints the 16 bit exit status divided by 256, but +# does so using floating point arithmetic, yielding strange results. +# +# The fix is to use the various macros defined for wait(2) and to +# use the signal number + 256 for death by signal, or signal number + 512 +# for death by signal with core dump. + +BEGIN { + status = system("exit 42") + print "normal status", status + + status = system("kill -HUP $$") + print "death by signal status", status + + status = system("kill -ABRT $$") + print "death by signal with core dump status", status + + system("rm -f core*") +} diff --git a/contrib/one-true-awk/bugs-fixed/system-status.bad b/contrib/one-true-awk/bugs-fixed/system-status.bad new file mode 100644 index 000000000000..a1317dba54a8 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/system-status.bad @@ -0,0 +1,3 @@ +normal status 42 +death by signal status 0.00390625 +death by signal with core dump status 0.523438 diff --git a/contrib/one-true-awk/bugs-fixed/system-status.ok b/contrib/one-true-awk/bugs-fixed/system-status.ok new file mode 100644 index 000000000000..737828f5ed7a --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/system-status.ok @@ -0,0 +1,3 @@ +normal status 42 +death by signal status 257 +death by signal with core dump status 518 diff --git a/contrib/one-true-awk/bugs-fixed/unary-plus.awk b/contrib/one-true-awk/bugs-fixed/unary-plus.awk new file mode 100644 index 000000000000..ba6185b96704 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/unary-plus.awk @@ -0,0 +1,4 @@ +BEGIN { + print +"q" + print +"43.12345678912345678" +} diff --git a/contrib/one-true-awk/bugs-fixed/unary-plus.bad b/contrib/one-true-awk/bugs-fixed/unary-plus.bad new file mode 100644 index 000000000000..76f57d5d580c --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/unary-plus.bad @@ -0,0 +1,2 @@ +q +43.12345678912345678 diff --git a/contrib/one-true-awk/bugs-fixed/unary-plus.ok b/contrib/one-true-awk/bugs-fixed/unary-plus.ok new file mode 100644 index 000000000000..90f97afc5c44 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/unary-plus.ok @@ -0,0 +1,2 @@ +0 +43.1235 diff --git a/contrib/one-true-awk/lex.c b/contrib/one-true-awk/lex.c index 3aeffb7f0539..18927f842b23 100644 --- a/contrib/one-true-awk/lex.c +++ b/contrib/one-true-awk/lex.c @@ -176,10 +176,10 @@ int reg = 0; /* 1 => return a REGEXPR now */ int yylex(void) { int c; - static char *buf = NULL; + static char *buf = 0; static int bufsize = 5; /* BUG: setting this small causes core dump! */ - if (buf == NULL && (buf = (char *) malloc(bufsize)) == NULL) + if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL) FATAL( "out of space in yylex" ); if (sc) { sc = 0; @@ -204,6 +204,7 @@ int yylex(void) yylval.i = c; switch (c) { case '\n': /* {EOL} */ + lineno++; RET(NL); case '\r': /* assume \n is coming */ case ' ': /* {WS}+ */ @@ -219,6 +220,7 @@ int yylex(void) case '\\': if (peek() == '\n') { input(); + lineno++; } else if (peek() == '\r') { input(); input(); /* \n */ lineno++; @@ -364,10 +366,10 @@ int string(void) { int c, n; char *s, *bp; - static char *buf = NULL; + static char *buf = 0; static int bufsz = 500; - if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL) + if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL) FATAL("out of space for strings"); for (bp = buf; (c = input()) != '"'; ) { if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string")) @@ -376,10 +378,11 @@ int string(void) case '\n': case '\r': case 0: + *bp = '\0'; SYNTAX( "non-terminated string %.10s...", buf ); - lineno++; if (c == 0) /* hopeless */ FATAL( "giving up" ); + lineno++; break; case '\\': c = input(); @@ -510,17 +513,18 @@ void startreg(void) /* next call to yylex will return a regular expression */ int regexpr(void) { int c; - static char *buf = NULL; + static char *buf = 0; static int bufsz = 500; char *bp; - if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL) + if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL) FATAL("out of space for rex expr"); bp = buf; for ( ; (c = input()) != '/' && c != 0; ) { if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr")) FATAL("out of space for reg expr %.10s...", buf); if (c == '\n') { + *bp = '\0'; SYNTAX( "newline in regular expression %.10s...", buf ); unput('\n'); break; @@ -545,7 +549,7 @@ char ebuf[300]; char *ep = ebuf; char yysbuf[100]; /* pushback buffer */ char *yysptr = yysbuf; -FILE *yyin = NULL; +FILE *yyin = 0; int input(void) /* get next lexical input character */ { @@ -559,19 +563,19 @@ int input(void) /* get next lexical input character */ lexprog++; } else /* awk -f ... */ c = pgetc(); - if (c == '\n') - lineno++; - else if (c == EOF) + if (c == EOF) c = 0; if (ep >= ebuf + sizeof ebuf) ep = ebuf; - return *ep++ = c; + *ep = c; + if (c != 0) { + ep++; + } + return (c); } void unput(int c) /* put lexical character back on input */ { - if (c == '\n') - lineno--; if (yysptr >= yysbuf + sizeof(yysbuf)) FATAL("pushed back too much: %.20s...", yysbuf); *yysptr++ = c; diff --git a/contrib/one-true-awk/lib.c b/contrib/one-true-awk/lib.c index e2688a2debe4..3d33c20d6096 100644 --- a/contrib/one-true-awk/lib.c +++ b/contrib/one-true-awk/lib.c @@ -59,7 +59,7 @@ void recinit(unsigned int n) { if ( (record = (char *) malloc(n)) == NULL || (fields = (char *) malloc(n+1)) == NULL - || (fldtab = (Cell **) malloc((nfields+1) * sizeof(Cell *))) == NULL + || (fldtab = (Cell **) malloc((nfields+2) * sizeof(Cell *))) == NULL || (fldtab[0] = (Cell *) malloc(sizeof(Cell))) == NULL ) FATAL("out of space for $0 and fields"); *record = '\0'; @@ -190,12 +190,13 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf * int sep, c; char *rr, *buf = *pbuf; int bufsize = *pbufsize; + char *rs = getsval(rsloc); - if (strlen(*FS) >= sizeof(inputFS)) + if (strlen(getsval(fsloc)) >= sizeof (inputFS)) FATAL("field separator %.10s... is too long", *FS); /*fflush(stdout); avoids some buffering problem but makes it 25% slower*/ strcpy(inputFS, *FS); /* for subsequent field splitting */ - if ((sep = **RS) == 0) { + if ((sep = *rs) == 0) { sep = '\n'; while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */ ; @@ -209,7 +210,7 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf * FATAL("input record `%.30s...' too long", buf); *rr++ = c; } - if (**RS == sep || c == EOF) + if (*rs == sep || c == EOF) break; if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */ break; @@ -284,6 +285,8 @@ void fldbld(void) /* create fields from current record */ } fr = fields; i = 0; /* number of fields accumulated here */ + if (strlen(getsval(fsloc)) >= sizeof (inputFS)) + FATAL("field separator %.10s... is too long", *FS); strcpy(inputFS, *FS); if (strlen(inputFS) > 1) { /* it's a regular expression */ i = refldbld(r, inputFS); @@ -357,6 +360,7 @@ void fldbld(void) /* create fields from current record */ } } setfval(nfloc, (Awkfloat) lastfld); + donerec = 1; /* restore */ if (dbg) { for (j = 0; j <= lastfld; j++) { p = fldtab[j]; @@ -388,6 +392,21 @@ void newfld(int n) /* add field n after end of existing lastfld */ setfval(nfloc, (Awkfloat) n); } +void setlastfld(int n) /* set lastfld cleaning fldtab cells if necessary */ +{ + if (n < 0) + FATAL("cannot set NF to a negative value"); + if (n > nfields) + growfldtab(n); + + if (lastfld < n) + cleanfld(lastfld+1, n); + else + cleanfld(n+1, lastfld); + + lastfld = n; +} + Cell *fieldadr(int n) /* get nth field */ { if (n < 0) @@ -466,6 +485,7 @@ void recbld(void) /* create $0 from $1..$NF if necessary */ { int i; char *r, *p; + char *sep = getsval(ofsloc); if (donerec == 1) return; @@ -477,9 +497,9 @@ void recbld(void) /* create $0 from $1..$NF if necessary */ while ((*r = *p++) != 0) r++; if (i < *NF) { - if (!adjbuf(&record, &recsize, 2+strlen(*OFS)+r-record, recsize, &r, "recbld 2")) + if (!adjbuf(&record, &recsize, 2+strlen(sep)+r-record, recsize, &r, "recbld 2")) FATAL("created $0 `%.30s...' too long", record); - for (p = *OFS; (*r = *p++) != 0; ) + for (p = sep; (*r = *p++) != 0; ) r++; } } @@ -619,6 +639,8 @@ void eprint(void) /* try to print context around error */ if (compile_time == 2 || compile_time == 0 || been_here++ > 0 || ebuf == ep) return; + if (ebuf == ep) + return; p = ep - 1; if (p > ebuf && *p == '\n') p--; @@ -682,7 +704,7 @@ int isclvar(const char *s) /* is s of form var=something ? */ for ( ; *s; s++) if (!(isalnum((uschar) *s) || *s == '_')) break; - return *s == '=' && s > os && *(s+1) != '='; + return *s == '=' && s > os; } /* strtod is supposed to be a proper test of what's a valid number */ diff --git a/contrib/one-true-awk/main.c b/contrib/one-true-awk/main.c index ec7029454386..e45df738a624 100644 --- a/contrib/one-true-awk/main.c +++ b/contrib/one-true-awk/main.c @@ -24,8 +24,7 @@ THIS SOFTWARE. #include __FBSDID("$FreeBSD$"); - -const char *version = "version 20121220 (FreeBSD)"; +const char *version = "version 20190529 (FreeBSD)"; #define DEBUG #include @@ -57,6 +56,13 @@ int curpfile = 0; /* current filename */ int safe = 0; /* 1 => "safe" mode */ +/* Can this work with recursive calls? I don't think so. +void segvcatch(int n) +{ + FATAL("segfault. Do you have an unbounded recursive call?", n); +} +*/ + int main(int argc, char *argv[]) { const char *fs = NULL; @@ -72,6 +78,7 @@ int main(int argc, char *argv[]) exit(1); } signal(SIGFPE, fpecatch); + /*signal(SIGSEGV, segvcatch); experiment */ srand_seed = 1; srandom((unsigned long) srand_seed); @@ -84,7 +91,7 @@ int main(int argc, char *argv[]) exit(0); break; } - if (strncmp(argv[1], "--", 2) == 0) { /* explicit end of args */ + if (strcmp(argv[1], "--") == 0) { /* explicit end of args */ argc--; argv++; break; diff --git a/contrib/one-true-awk/makefile b/contrib/one-true-awk/makefile index 88f992421561..3c0b62e1df9e 100644 --- a/contrib/one-true-awk/makefile +++ b/contrib/one-true-awk/makefile @@ -23,18 +23,21 @@ # ****************************************************************/ CFLAGS = -g -CFLAGS = -O2 CFLAGS = +CFLAGS = -O2 -CC = gcc -Wall -g -Wwrite-strings -CC = gcc -fprofile-arcs -ftest-coverage # then gcov f1.c; cat f1.c.gcov -CC = gcc -g -Wall -pedantic -CC = gcc -O4 -Wall -pedantic -fno-strict-aliasing +# compiler options +#CC = gcc -Wall -g -Wwrite-strings +#CC = gcc -O4 -Wall -pedantic -fno-strict-aliasing +#CC = gcc -fprofile-arcs -ftest-coverage # then gcov f1.c; cat f1.c.gcov +HOSTCC = gcc -g -Wall -pedantic +CC = $(HOSTCC) # change this is cross-compiling. -YACC = bison -d -y -YACC = yacc -d -S +# yacc options. pick one; this varies a lot by system. #YFLAGS = -d -S - # -S uses sprintf in yacc parser instead of sprint +YACC = bison -d -y +#YACC = yacc -d +# -S uses sprintf in yacc parser instead of sprint OFILES = b.o main.o parse.o proctab.o tran.o lib.o run.o lex.o @@ -44,7 +47,7 @@ SOURCE = awk.h ytab.c ytab.h proto.h awkgram.y lex.c b.c main.c \ LISTING = awk.h proto.h awkgram.y lex.c b.c main.c maketab.c parse.c \ lib.c run.c tran.c -SHIP = README FIXES $(SOURCE) ytab[ch].bak makefile \ +SHIP = README LICENSE FIXES $(SOURCE) ytab[ch].bak makefile \ awk.1 a.out: ytab.o $(OFILES) @@ -52,17 +55,23 @@ a.out: ytab.o $(OFILES) $(OFILES): awk.h ytab.h proto.h -ytab.o: awk.h proto.h awkgram.y +#Clear dependency for parallel build: (make -j) +#YACC generated y.tab.c and y.tab.h at the same time +#this needs to be a static pattern rules otherwise multiple target +#are mapped onto multiple executions of yacc, which overwrite +#each others outputs. +y%.c y%.h: awk.h proto.h awkgram.y $(YACC) $(YFLAGS) awkgram.y - mv y.tab.c ytab.c - mv y.tab.h ytab.h - $(CC) $(CFLAGS) -c ytab.c + mv y.$*.c y$*.c + mv y.$*.h y$*.h + +ytab.h: ytab.c proctab.c: maketab - ./maketab >proctab.c + ./maketab ytab.h >proctab.c maketab: ytab.h maketab.c - $(CC) $(CFLAGS) maketab.c -o maketab + $(HOSTCC) $(CFLAGS) maketab.c -o maketab bundle: @cp ytab.h ytabh.bak @@ -79,8 +88,22 @@ tar: @zip awk.zip $(SHIP) ls -l awk.zip +gitadd: + git add README LICENSE FIXES \ + awk.h proto.h awkgram.y lex.c b.c main.c maketab.c parse.c \ + lib.c run.c tran.c \ + makefile awk.1 awktest.tar + +gitpush: + # only do this once: + # git remote add origin https://github.com/onetrueawk/awk.git + git push -u origin master + names: @echo $(LISTING) clean: rm -f a.out *.o *.obj maketab maketab.exe *.bb *.bbg *.da *.gcov *.gcno *.gcda # proctab.c + +cleaner: + rm -f a.out *.o *.obj maketab maketab.exe *.bb *.bbg *.da *.gcov *.gcno *.gcda proctab.c ytab* diff --git a/contrib/one-true-awk/maketab.c b/contrib/one-true-awk/maketab.c index b59e81592fdb..dbe3d241fcc8 100644 --- a/contrib/one-true-awk/maketab.c +++ b/contrib/one-true-awk/maketab.c @@ -62,6 +62,7 @@ struct xx { DIVIDE, "arith", " / " }, { MOD, "arith", " % " }, { UMINUS, "arith", " -" }, + { UPLUS, "arith", " +" }, { POWER, "arith", " **" }, { PREINCR, "incrdecr", "++" }, { POSTINCR, "incrdecr", "++" }, @@ -124,8 +125,12 @@ int main(int argc, char *argv[]) for (i = SIZE; --i >= 0; ) names[i] = ""; - if ((fp = fopen("ytab.h", "r")) == NULL) { - fprintf(stderr, "maketab can't open ytab.h!\n"); + if (argc != 2) { + fprintf(stderr, "usage: maketab YTAB_H\n"); + exit(1); + } + if ((fp = fopen(argv[1], "r")) == NULL) { + fprintf(stderr, "maketab can't open %s!\n", argv[1]); exit(1); } printf("static char *printname[%d] = {\n", SIZE); @@ -134,6 +139,8 @@ int main(int argc, char *argv[]) n = sscanf(buf, "%1c %s %s %d", &c, def, name, &tok); if (c != '#' || (n != 4 && strcmp(def,"define") != 0)) /* not a valid #define */ continue; + if (strcmp(name, "YYSTYPE_IS_DECLARED") == 0) + continue; if (tok < FIRSTTOKEN || tok > LASTTOKEN) { /* fprintf(stderr, "maketab funny token %d %s ignored\n", tok, buf); */ continue; @@ -149,7 +156,7 @@ int main(int argc, char *argv[]) table[p->token-FIRSTTOKEN] = p->name; printf("\nCell *(*proctab[%d])(Node **, int) = {\n", SIZE); for (i=0; innext, n++) + for (n = 0; p != 0; p = p->nnext, n++) if (strcmp(((Cell *)(p->narg[0]))->nval, s) == 0) return n; return -1; diff --git a/contrib/one-true-awk/proctab.c b/contrib/one-true-awk/proctab.c new file mode 100644 index 000000000000..ff212c416c3a --- /dev/null +++ b/contrib/one-true-awk/proctab.c @@ -0,0 +1,209 @@ +#include +#include "awk.h" +#include "ytab.h" + +static char *printname[94] = { + (char *) "FIRSTTOKEN", /* 258 */ + (char *) "PROGRAM", /* 259 */ + (char *) "PASTAT", /* 260 */ + (char *) "PASTAT2", /* 261 */ + (char *) "XBEGIN", /* 262 */ + (char *) "XEND", /* 263 */ + (char *) "NL", /* 264 */ + (char *) "ARRAY", /* 265 */ + (char *) "MATCH", /* 266 */ + (char *) "NOTMATCH", /* 267 */ + (char *) "MATCHOP", /* 268 */ + (char *) "FINAL", /* 269 */ + (char *) "DOT", /* 270 */ + (char *) "ALL", /* 271 */ + (char *) "CCL", /* 272 */ + (char *) "NCCL", /* 273 */ + (char *) "CHAR", /* 274 */ + (char *) "OR", /* 275 */ + (char *) "STAR", /* 276 */ + (char *) "QUEST", /* 277 */ + (char *) "PLUS", /* 278 */ + (char *) "EMPTYRE", /* 279 */ + (char *) "AND", /* 280 */ + (char *) "BOR", /* 281 */ + (char *) "APPEND", /* 282 */ + (char *) "EQ", /* 283 */ + (char *) "GE", /* 284 */ + (char *) "GT", /* 285 */ + (char *) "LE", /* 286 */ + (char *) "LT", /* 287 */ + (char *) "NE", /* 288 */ + (char *) "IN", /* 289 */ + (char *) "ARG", /* 290 */ + (char *) "BLTIN", /* 291 */ + (char *) "BREAK", /* 292 */ + (char *) "CLOSE", /* 293 */ + (char *) "CONTINUE", /* 294 */ + (char *) "DELETE", /* 295 */ + (char *) "DO", /* 296 */ + (char *) "EXIT", /* 297 */ + (char *) "FOR", /* 298 */ + (char *) "FUNC", /* 299 */ + (char *) "SUB", /* 300 */ + (char *) "GSUB", /* 301 */ + (char *) "IF", /* 302 */ + (char *) "INDEX", /* 303 */ + (char *) "LSUBSTR", /* 304 */ + (char *) "MATCHFCN", /* 305 */ + (char *) "NEXT", /* 306 */ + (char *) "NEXTFILE", /* 307 */ + (char *) "ADD", /* 308 */ + (char *) "MINUS", /* 309 */ + (char *) "MULT", /* 310 */ + (char *) "DIVIDE", /* 311 */ + (char *) "MOD", /* 312 */ + (char *) "ASSIGN", /* 313 */ + (char *) "ASGNOP", /* 314 */ + (char *) "ADDEQ", /* 315 */ + (char *) "SUBEQ", /* 316 */ + (char *) "MULTEQ", /* 317 */ + (char *) "DIVEQ", /* 318 */ + (char *) "MODEQ", /* 319 */ + (char *) "POWEQ", /* 320 */ + (char *) "PRINT", /* 321 */ + (char *) "PRINTF", /* 322 */ + (char *) "SPRINTF", /* 323 */ + (char *) "ELSE", /* 324 */ + (char *) "INTEST", /* 325 */ + (char *) "CONDEXPR", /* 326 */ + (char *) "POSTINCR", /* 327 */ + (char *) "PREINCR", /* 328 */ + (char *) "POSTDECR", /* 329 */ + (char *) "PREDECR", /* 330 */ + (char *) "VAR", /* 331 */ + (char *) "IVAR", /* 332 */ + (char *) "VARNF", /* 333 */ + (char *) "CALL", /* 334 */ + (char *) "NUMBER", /* 335 */ + (char *) "STRING", /* 336 */ + (char *) "REGEXPR", /* 337 */ + (char *) "GETLINE", /* 338 */ + (char *) "RETURN", /* 339 */ + (char *) "SPLIT", /* 340 */ + (char *) "SUBSTR", /* 341 */ + (char *) "WHILE", /* 342 */ + (char *) "CAT", /* 343 */ + (char *) "NOT", /* 344 */ + (char *) "UMINUS", /* 345 */ + (char *) "UPLUS", /* 346 */ + (char *) "POWER", /* 347 */ + (char *) "DECR", /* 348 */ + (char *) "INCR", /* 349 */ + (char *) "INDIRECT", /* 350 */ + (char *) "LASTTOKEN", /* 351 */ +}; + + +Cell *(*proctab[94])(Node **, int) = { + nullproc, /* FIRSTTOKEN */ + program, /* PROGRAM */ + pastat, /* PASTAT */ + dopa2, /* PASTAT2 */ + nullproc, /* XBEGIN */ + nullproc, /* XEND */ + nullproc, /* NL */ + array, /* ARRAY */ + matchop, /* MATCH */ + matchop, /* NOTMATCH */ + nullproc, /* MATCHOP */ + nullproc, /* FINAL */ + nullproc, /* DOT */ + nullproc, /* ALL */ + nullproc, /* CCL */ + nullproc, /* NCCL */ + nullproc, /* CHAR */ + nullproc, /* OR */ + nullproc, /* STAR */ + nullproc, /* QUEST */ + nullproc, /* PLUS */ + nullproc, /* EMPTYRE */ + boolop, /* AND */ + boolop, /* BOR */ + nullproc, /* APPEND */ + relop, /* EQ */ + relop, /* GE */ + relop, /* GT */ + relop, /* LE */ + relop, /* LT */ + relop, /* NE */ + instat, /* IN */ + arg, /* ARG */ + bltin, /* BLTIN */ + jump, /* BREAK */ + closefile, /* CLOSE */ + jump, /* CONTINUE */ + awkdelete, /* DELETE */ + dostat, /* DO */ + jump, /* EXIT */ + forstat, /* FOR */ + nullproc, /* FUNC */ + sub, /* SUB */ + gsub, /* GSUB */ + ifstat, /* IF */ + sindex, /* INDEX */ + nullproc, /* LSUBSTR */ + matchop, /* MATCHFCN */ + jump, /* NEXT */ + jump, /* NEXTFILE */ + arith, /* ADD */ + arith, /* MINUS */ + arith, /* MULT */ + arith, /* DIVIDE */ + arith, /* MOD */ + assign, /* ASSIGN */ + nullproc, /* ASGNOP */ + assign, /* ADDEQ */ + assign, /* SUBEQ */ + assign, /* MULTEQ */ + assign, /* DIVEQ */ + assign, /* MODEQ */ + assign, /* POWEQ */ + printstat, /* PRINT */ + awkprintf, /* PRINTF */ + awksprintf, /* SPRINTF */ + nullproc, /* ELSE */ + intest, /* INTEST */ + condexpr, /* CONDEXPR */ + incrdecr, /* POSTINCR */ + incrdecr, /* PREINCR */ + incrdecr, /* POSTDECR */ + incrdecr, /* PREDECR */ + nullproc, /* VAR */ + nullproc, /* IVAR */ + getnf, /* VARNF */ + call, /* CALL */ + nullproc, /* NUMBER */ + nullproc, /* STRING */ + nullproc, /* REGEXPR */ + awkgetline, /* GETLINE */ + jump, /* RETURN */ + split, /* SPLIT */ + substr, /* SUBSTR */ + whilestat, /* WHILE */ + cat, /* CAT */ + boolop, /* NOT */ + arith, /* UMINUS */ + arith, /* UPLUS */ + arith, /* POWER */ + nullproc, /* DECR */ + nullproc, /* INCR */ + indirect, /* INDIRECT */ + nullproc, /* LASTTOKEN */ +}; + +char *tokname(int n) +{ + static char buf[100]; + + if (n < FIRSTTOKEN || n > LASTTOKEN) { + sprintf(buf, "token %d", n); + return buf; + } + return printname[n-FIRSTTOKEN]; +} diff --git a/contrib/one-true-awk/proto.h b/contrib/one-true-awk/proto.h index 9a657ef73ec1..ad6f2e80a594 100644 --- a/contrib/one-true-awk/proto.h +++ b/contrib/one-true-awk/proto.h @@ -124,6 +124,7 @@ extern void setclvar(char *); extern void fldbld(void); extern void cleanfld(int, int); extern void newfld(int); +extern void setlastfld(int); extern int refldbld(const char *, const char *); extern void recbld(void); extern Cell *fieldadr(int); @@ -193,3 +194,5 @@ extern Cell *gsub(Node **, int); extern FILE *popen(const char *, const char *); extern int pclose(FILE *); + +extern const char *flags2str(int flags); diff --git a/contrib/one-true-awk/run.c b/contrib/one-true-awk/run.c index 1b02a79fb46b..c3a3e5f4751a 100644 --- a/contrib/one-true-awk/run.c +++ b/contrib/one-true-awk/run.c @@ -34,6 +34,8 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include +#include #include "awk.h" #include "ytab.h" @@ -74,23 +76,23 @@ extern Awkfloat srand_seed; Node *winner = NULL; /* root of parse tree */ Cell *tmps; /* free temporary cells for execution */ -static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM }; +static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL }; Cell *True = &truecell; -static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM }; +static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL }; Cell *False = &falsecell; -static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM }; +static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL }; Cell *jbreak = &breakcell; -static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM }; +static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL }; Cell *jcont = &contcell; -static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM }; +static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL }; Cell *jnext = &nextcell; -static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM }; +static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL }; Cell *jnextfile = &nextfilecell; -static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM }; +static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL }; Cell *jexit = &exitcell; -static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM }; +static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL }; Cell *jret = &retcell; -static Cell tempcell ={ OCELL, CTEMP, 0, "", 0.0, NUM|STR|DONTFREE }; +static Cell tempcell ={ OCELL, CTEMP, 0, "", 0.0, NUM|STR|DONTFREE, NULL }; Node *curnode = NULL; /* the node being executed, for debugging */ @@ -115,7 +117,7 @@ int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, if (rminlen) minlen += quantum - rminlen; tbuf = (char *) realloc(*pbuf, minlen); - dprintf( ("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, *pbuf, tbuf) ); + dprintf( ("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void *) *pbuf, (void *) tbuf) ); if (tbuf == NULL) { if (whatrtn) FATAL("out of memory in %s", whatrtn); @@ -224,7 +226,7 @@ struct Frame *fp = NULL; /* frame pointer. bottom level unused */ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ { - static Cell newcopycell = { OCELL, CCOPY, 0, "", 0.0, NUM|STR|DONTFREE }; + static Cell newcopycell = { OCELL, CCOPY, 0, "", 0.0, NUM|STR|DONTFREE, NULL }; int i, ncall, ndef; int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */ Node *x; @@ -326,14 +328,18 @@ Cell *copycell(Cell *x) /* make a copy of a cell in a temp */ { Cell *y; + /* copy is not constant or field */ + y = gettemp(); + y->tval = x->tval & ~(CON|FLD|REC); y->csub = CCOPY; /* prevents freeing until call is over */ y->nval = x->nval; /* BUG? */ - if (isstr(x)) + if (isstr(x) /* || x->ctype == OCELL */) { y->sval = tostring(x->sval); + y->tval &= ~DONTFREE; + } else + y->tval |= DONTFREE; y->fval = x->fval; - y->tval = x->tval & ~(CON|FLD|REC|DONTFREE); /* copy is not constant or field */ - /* is DONTFREE right? */ return y; } @@ -422,6 +428,10 @@ Cell *awkgetline(Node **a, int n) /* get next line from specific input */ } else if (a[0] != NULL) { /* getline var sval)) { + x->fval = atof(x->sval); + x->tval |= NUM; + } tempfree(x); } else { /* getline sval)) { + x->fval = atof(x->sval); + x->tval |= NUM; + } tempfree(x); } } @@ -459,7 +473,7 @@ Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ Node *np; char *buf; int bufsz = recsize; - int nsub = strlen(*SUBSEP); + int nsub; if ((buf = (char *) malloc(bufsz)) == NULL) FATAL("out of memory in array"); @@ -469,6 +483,7 @@ Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ for (np = a[1]; np; np = np->nnext) { y = execute(np); /* subscript */ s = getsval(y); + nsub = strlen(getsval(subseploc)); if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "array")) FATAL("out of memory for %s[%s...]", x->nval, buf); strcat(buf, s); @@ -497,12 +512,12 @@ Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts * Cell *x, *y; Node *np; char *s; - int nsub = strlen(*SUBSEP); + int nsub; x = execute(a[0]); /* Cell* for symbol table */ if (!isarr(x)) return True; - if (a[1] == NULL) { /* delete the elements, not the table */ + if (a[1] == 0) { /* delete the elements, not the table */ freesymtab(x); x->tval &= ~STR; x->tval |= ARR; @@ -516,9 +531,10 @@ Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts * for (np = a[1]; np; np = np->nnext) { y = execute(np); /* subscript */ s = getsval(y); + nsub = strlen(getsval(subseploc)); if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "awkdelete")) FATAL("out of memory deleting %s[%s...]", x->nval, buf); - strcat(buf, s); + strcat(buf, s); if (np->nnext) strcat(buf, *SUBSEP); tempfree(y); @@ -537,7 +553,7 @@ Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ char *buf; char *s; int bufsz = recsize; - int nsub = strlen(*SUBSEP); + int nsub; ap = execute(a[1]); /* array name */ if (!isarr(ap)) { @@ -555,6 +571,7 @@ Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ for (p = a[0]; p; p = p->nnext) { x = execute(p); /* expr */ s = getsval(x); + nsub = strlen(getsval(subseploc)); if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "intest")) FATAL("out of memory deleting %s[%s...]", x->nval, buf); strcat(buf, s); @@ -586,7 +603,7 @@ Cell *matchop(Node **a, int n) /* ~ and match() */ } x = execute(a[1]); /* a[1] = target text */ s = getsval(x); - if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */ + if (a[0] == 0) /* a[1] == 0: already-compiled reg expr */ i = (*mf)((fa *) a[2], s); else { y = execute(a[2]); /* a[2] = regular expr */ @@ -702,7 +719,7 @@ Cell *gettemp(void) /* get a tempcell */ FATAL("out of space for temporaries"); for(i = 1; i < 100; i++) tmps[i-1].cnext = &tmps[i]; - tmps[i-1].cnext = NULL; + tmps[i-1].cnext = 0; } x = tmps; tmps = x->cnext; @@ -737,18 +754,18 @@ Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ int k, m, n; char *s; int temp; - Cell *x, *y, *z = NULL; + Cell *x, *y, *z = 0; x = execute(a[0]); y = execute(a[1]); - if (a[2] != NULL) + if (a[2] != 0) z = execute(a[2]); s = getsval(x); k = strlen(s) + 1; if (k <= 1) { tempfree(x); tempfree(y); - if (a[2] != NULL) { + if (a[2] != 0) { tempfree(z); } x = gettemp(); @@ -761,7 +778,7 @@ Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ else if (m > k) m = k; tempfree(y); - if (a[2] != NULL) { + if (a[2] != 0) { n = (int) getfval(z); tempfree(z); } else @@ -820,6 +837,17 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co char *buf = *pbuf; int bufsize = *pbufsize; + static int first = 1; + static int have_a_format = 0; + + if (first) { + char buf[100]; + + sprintf(buf, "%a", 42.0); + have_a_format = (strcmp(buf, "0x1.5p+5") == 0); + first = 0; + } + os = s; p = buf; if ((fmt = (char *) malloc(fmtsz)) == NULL) @@ -845,7 +873,13 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co FATAL("format item %.30s... ran format() out of memory", os); if (isalpha((uschar)*s) && *s != 'l' && *s != 'h' && *s != 'L') break; /* the ansi panoply */ + if (*s == '$') { + FATAL("'$' not permitted in awk formats"); + } if (*s == '*') { + if (a == NULL) { + FATAL("not enough args in printf(%s)", os); + } x = execute(a); a = a->nnext; sprintf(t-1, "%d", fmtwd=(int) getfval(x)); @@ -860,8 +894,13 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co if (fmtwd < 0) fmtwd = -fmtwd; adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4"); - switch (*s) { + case 'a': case 'A': + if (have_a_format) + flag = *s; + else + flag = 'f'; + break; case 'f': case 'e': case 'g': case 'E': case 'G': flag = 'f'; break; @@ -904,6 +943,8 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co p += strlen(p); sprintf(p, "%s", t); break; + case 'a': + case 'A': case 'f': sprintf(p, fmt, getfval(x)); break; case 'd': sprintf(p, fmt, (long) getfval(x)); break; case 'u': sprintf(p, fmt, (int) getfval(x)); break; @@ -1006,7 +1047,7 @@ Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ x = execute(a[0]); i = getfval(x); tempfree(x); - if (n != UMINUS) { + if (n != UMINUS && n != UPLUS) { y = execute(a[1]); j = getfval(y); tempfree(y); @@ -1036,6 +1077,8 @@ Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ case UMINUS: i = -i; break; + case UPLUS: /* handled by getfval(), above */ + break; case POWER: if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */ i = ipow(i, (int) j); @@ -1091,8 +1134,8 @@ Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */ y = execute(a[1]); x = execute(a[0]); if (n == ASSIGN) { /* ordinary assignment */ - if (x == y && !(x->tval & (FLD|REC))) /* self-assignment: */ - ; /* leave alone unless it's a field */ + if (x == y && !(x->tval & (FLD|REC)) && x != nfloc) + ; /* self-assignment: leave alone unless it's a field or NF */ else if ((y->tval & (STR|NUM)) == (STR|NUM)) { setsval(x, getsval(y)); x->fval = getfval(y); @@ -1149,25 +1192,26 @@ Cell *cat(Node **a, int q) /* a[0] cat a[1] */ { Cell *x, *y, *z; int n1, n2; - char *s; + char *s = NULL; + int ssz = 0; x = execute(a[0]); + n1 = strlen(getsval(x)); + adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1"); + (void) strncpy(s, x->sval, ssz); + y = execute(a[1]); - getsval(x); - getsval(y); - n1 = strlen(x->sval); - n2 = strlen(y->sval); - s = (char *) malloc(n1 + n2 + 1); - if (s == NULL) - FATAL("out of space concatenating %.15s... and %.15s...", - x->sval, y->sval); - strcpy(s, x->sval); - strcpy(s+n1, y->sval); + n2 = strlen(getsval(y)); + adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2"); + (void) strncpy(s + n1, y->sval, ssz - n1); + tempfree(x); tempfree(y); + z = gettemp(); z->sval = s; z->tval = STR; + return(z); } @@ -1175,7 +1219,7 @@ Cell *pastat(Node **a, int n) /* a[0] { a[1] } */ { Cell *x; - if (a[0] == NULL) + if (a[0] == 0) x = execute(a[1]); else { x = execute(a[0]); @@ -1212,20 +1256,22 @@ Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ { - Cell *x = NULL, *y, *ap; + Cell *x = 0, *y, *ap; char *s, *origs; + char *fs, *origfs = NULL; int sep; - char *t, temp, num[50], *fs = NULL; + char *t, temp, num[50]; int n, tempstat, arg3type; y = execute(a[0]); /* source string */ origs = s = strdup(getsval(y)); arg3type = ptoi(a[3]); - if (a[2] == NULL) /* fs string */ - fs = *FS; + if (a[2] == 0) /* fs string */ + fs = getsval(fsloc); else if (arg3type == STRING) { /* split(str,arr,"string") */ x = execute(a[2]); - fs = getsval(x); + origfs = fs = strdup(getsval(x)); + tempfree(x); } else if (arg3type == REGEXPR) fs = "(regexpr)"; /* split(str,arr,/regexpr/) */ else @@ -1340,9 +1386,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ tempfree(ap); tempfree(y); free(origs); - if (a[2] != NULL && arg3type == STRING) { - tempfree(x); - } + free(origfs); x = gettemp(); x->tval = NUM; x->fval = n; @@ -1372,7 +1416,7 @@ Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */ if (istrue(x)) { tempfree(x); x = execute(a[1]); - } else if (a[2] != NULL) { + } else if (a[2] != 0) { tempfree(x); x = execute(a[2]); } @@ -1424,7 +1468,7 @@ Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */ x = execute(a[0]); tempfree(x); for (;;) { - if (a[1]!=NULL) { + if (a[1]!=0) { x = execute(a[1]); if (!istrue(x)) return(x); else tempfree(x); @@ -1482,6 +1526,7 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis Node *nextarg; FILE *fp; void flush_all(void); + int status = 0; t = ptoi(a[0]); x = execute(a[1]); @@ -1506,7 +1551,7 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis case FCOS: u = cos(getfval(x)); break; case FATAN: - if (nextarg == NULL) { + if (nextarg == 0) { WARNING("atan2 requires two arguments; returning 1.0"); u = 1.0; } else { @@ -1588,7 +1633,20 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis break; case FSYSTEM: fflush(stdout); /* in case something is buffered already */ - u = (Awkfloat) system(getsval(x)) / 256; /* 256 is unix-dep */ + status = system(getsval(x)); + u = status; + if (status != -1) { + if (WIFEXITED(status)) { + u = WEXITSTATUS(status); + } else if (WIFSIGNALED(status)) { + u = WTERMSIG(status) + 256; +#ifdef WCOREDUMP + if (WCOREDUMP(status)) + u += 256; +#endif + } else /* something else?!? */ + u = 0; + } break; case FRAND: /* random() returns numbers in [0..2^31-1] @@ -1639,7 +1697,7 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis tempfree(x); x = gettemp(); setfval(x, u); - if (nextarg != NULL) { + if (nextarg != 0) { WARNING("warning: function has too many arguments"); for ( ; nextarg; nextarg = nextarg->nnext) execute(nextarg); @@ -1653,7 +1711,7 @@ Cell *printstat(Node **a, int n) /* print a[0] */ Cell *y; FILE *fp; - if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */ + if (a[1] == 0) /* a[1] is redirection operator, a[2] is file */ fp = stdout; else fp = redirect(ptoi(a[1]), a[2]); @@ -1662,11 +1720,11 @@ Cell *printstat(Node **a, int n) /* print a[0] */ fputs(getpssval(y), fp); tempfree(y); if (x->nnext == NULL) - fputs(*ORS, fp); + fputs(getsval(orsloc), fp); else - fputs(*OFS, fp); + fputs(getsval(ofsloc), fp); } - if (a[1] != NULL) + if (a[1] != 0) fflush(fp); if (ferror(fp)) FATAL("write error on %s", filename(fp)); @@ -1675,8 +1733,6 @@ Cell *printstat(Node **a, int n) /* print a[0] */ Cell *nullproc(Node **a, int n) { - n = n; - a = a; return 0; } @@ -1725,7 +1781,7 @@ FILE *openfile(int a, const char *us) { const char *s = us; int i, m; - FILE *fp = NULL; + FILE *fp = 0; if (*s == '\0') FATAL("null file name in print or getline"); @@ -1740,7 +1796,7 @@ FILE *openfile(int a, const char *us) return NULL; for (i=0; i < nfiles; i++) - if (files[i].fp == NULL) + if (files[i].fp == 0) break; if (i >= nfiles) { struct files *nf; @@ -1790,7 +1846,6 @@ Cell *closefile(Node **a, int n) Cell *x; int i, stat; - n = n; x = execute(a[0]); getsval(x); stat = -1; @@ -1857,7 +1912,7 @@ Cell *sub(Node **a, int nnn) /* substitute command */ FATAL("out of memory in sub"); x = execute(a[3]); /* target string */ t = getsval(x); - if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ + if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */ pfa = (fa *) a[1]; /* regular expression */ else { y = execute(a[1]); @@ -1897,7 +1952,7 @@ Cell *sub(Node **a, int nnn) /* substitute command */ if (pb > buf + bufsz) FATAL("sub result2 %.30s too big; can't happen", buf); setsval(x, buf); /* BUG: should be able to avoid copy */ - result = True; + result = True;; } tempfree(x); tempfree(y); @@ -1920,7 +1975,7 @@ Cell *gsub(Node **a, int nnn) /* global substitute */ num = 0; x = execute(a[3]); /* target string */ t = getsval(x); - if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ + if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */ pfa = (fa *) a[1]; /* regular expression */ else { y = execute(a[1]); diff --git a/contrib/one-true-awk/tran.c b/contrib/one-true-awk/tran.c index e364ebd81928..d1dfe2b2f176 100644 --- a/contrib/one-true-awk/tran.c +++ b/contrib/one-true-awk/tran.c @@ -55,10 +55,14 @@ Cell *fsloc; /* FS */ Cell *nrloc; /* NR */ Cell *nfloc; /* NF */ Cell *fnrloc; /* FNR */ +Cell *ofsloc; /* OFS */ +Cell *orsloc; /* ORS */ +Cell *rsloc; /* RS */ Array *ARGVtab; /* symbol table containing ARGV[...] */ Array *ENVtab; /* symbol table containing ENVIRON[...] */ Cell *rstartloc; /* RSTART */ Cell *rlengthloc; /* RLENGTH */ +Cell *subseploc; /* SUBSEP */ Cell *symtabloc; /* SYMTAB */ Cell *nullloc; /* a guaranteed empty cell */ @@ -67,6 +71,18 @@ Cell *literal0; extern Cell **fldtab; +static void +setfree(Cell *vp) +{ + if (&vp->sval == FS || &vp->sval == RS || + &vp->sval == OFS || &vp->sval == ORS || + &vp->sval == OFMT || &vp->sval == CONVFMT || + &vp->sval == FILENAME || &vp->sval == SUBSEP) + vp->tval |= DONTFREE; + else + vp->tval &= ~DONTFREE; +} + void syminit(void) /* initialize symbol table with builtin vars */ { literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab); @@ -76,9 +92,12 @@ void syminit(void) /* initialize symbol table with builtin vars */ fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab); FS = &fsloc->sval; - RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval; - OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval; - ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval; + rsloc = setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab); + RS = &rsloc->sval; + ofsloc = setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab); + OFS = &ofsloc->sval; + orsloc = setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab); + ORS = &orsloc->sval; OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval; CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval; FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval; @@ -88,7 +107,8 @@ void syminit(void) /* initialize symbol table with builtin vars */ NR = &nrloc->fval; fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab); FNR = &fnrloc->fval; - SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval; + subseploc = setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab); + SUBSEP = &subseploc->sval; rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab); RSTART = &rstartloc->fval; rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab); @@ -174,7 +194,7 @@ void freesymtab(Cell *ap) /* free a symbol table */ free(cp); tp->nelem--; } - tp->tab[i] = NULL; + tp->tab[i] = 0; } if (tp->nelem != 0) WARNING("can't happen: inconsistent element count freeing %s", ap->nval); @@ -282,6 +302,7 @@ Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */ { int fldno; + f += 0.0; /* normalise negative zero to positive zero */ if ((vp->tval & (NUM | STR)) == 0) funnyvar(vp, "assign to"); if (isfld(vp)) { @@ -290,13 +311,21 @@ Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */ if (fldno > *NF) newfld(fldno); dprintf( ("setting field %d to %g\n", fldno, f) ); + } else if (&vp->fval == NF) { + donerec = 0; /* mark $0 invalid */ + setlastfld(f); + dprintf( ("setting NF to %g\n", f) ); } else if (isrec(vp)) { donefld = 0; /* mark $1... invalid */ donerec = 1; + } else if (vp == ofsloc) { + if (donerec == 0) + recbld(); } if (freeable(vp)) xfree(vp->sval); /* free any previous string */ - vp->tval &= ~STR; /* mark string invalid */ + vp->tval &= ~(STR|CONVC|CONVO); /* mark string invalid */ + vp->fmt = NULL; vp->tval |= NUM; /* mark number ok */ if (f == -0) /* who would have thought this possible? */ f = 0; @@ -318,6 +347,7 @@ char *setsval(Cell *vp, const char *s) /* set string val of a Cell */ { char *t; int fldno; + Awkfloat f; dprintf( ("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n", (void*)vp, NN(vp->nval), s, vp->tval, donerec, donefld) ); @@ -328,20 +358,32 @@ char *setsval(Cell *vp, const char *s) /* set string val of a Cell */ fldno = atoi(vp->nval); if (fldno > *NF) newfld(fldno); - dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) ); + dprintf( ("setting field %d to %s (%p)\n", fldno, s, (void *) s) ); } else if (isrec(vp)) { donefld = 0; /* mark $1... invalid */ donerec = 1; + } else if (vp == ofsloc) { + if (donerec == 0) + recbld(); } - t = tostring(s); /* in case it's self-assign */ + t = s ? tostring(s) : tostring(""); /* in case it's self-assign */ if (freeable(vp)) xfree(vp->sval); - vp->tval &= ~NUM; + vp->tval &= ~(NUM|CONVC|CONVO); vp->tval |= STR; - vp->tval &= ~DONTFREE; + vp->fmt = NULL; + setfree(vp); dprintf( ("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n", - (void*)vp, NN(vp->nval), t,t, vp->tval, donerec, donefld) ); - return(vp->sval = t); + (void*)vp, NN(vp->nval), t, (void *) t, vp->tval, donerec, donefld) ); + vp->sval = t; + if (&vp->fval == NF) { + donerec = 0; /* mark $0 invalid */ + f = getfval(vp); + setlastfld(f); + dprintf( ("setting NF to %g\n", f) ); + } + + return(vp->sval); } Awkfloat getfval(Cell *vp) /* get float val of a Cell */ @@ -364,7 +406,7 @@ Awkfloat getfval(Cell *vp) /* get float val of a Cell */ static char *get_str_val(Cell *vp, char **fmt) /* get string val of a Cell */ { - char s[100]; /* BUG: unchecked */ + char s[256]; double dtemp; if ((vp->tval & (NUM | STR)) == 0) @@ -373,19 +415,80 @@ static char *get_str_val(Cell *vp, char **fmt) /* get string val of a Cel fldbld(); else if (isrec(vp) && donerec == 0) recbld(); - if (isstr(vp) == 0) { - if (freeable(vp)) - xfree(vp->sval); - if (modf(vp->fval, &dtemp) == 0) /* it's integral */ - sprintf(s, "%.30g", vp->fval); - else - sprintf(s, *fmt, vp->fval); - vp->sval = tostring(s); - vp->tval &= ~DONTFREE; - vp->tval |= STR; + + /* + * ADR: This is complicated and more fragile than is desirable. + * Retrieving a string value for a number associates the string + * value with the scalar. Previously, the string value was + * sticky, meaning if converted via OFMT that became the value + * (even though POSIX wants it to be via CONVFMT). Or if CONVFMT + * changed after a string value was retrieved, the original value + * was maintained and used. Also not per POSIX. + * + * We work around this design by adding two additional flags, + * CONVC and CONVO, indicating how the string value was + * obtained (via CONVFMT or OFMT) and _also_ maintaining a copy + * of the pointer to the xFMT format string used for the + * conversion. This pointer is only read, **never** dereferenced. + * The next time we do a conversion, if it's coming from the same + * xFMT as last time, and the pointer value is different, we + * know that the xFMT format string changed, and we need to + * redo the conversion. If it's the same, we don't have to. + * + * There are also several cases where we don't do a conversion, + * such as for a field (see the checks below). + */ + + /* Don't duplicate the code for actually updating the value */ +#define update_str_val(vp) \ + { \ + if (freeable(vp)) \ + xfree(vp->sval); \ + if (modf(vp->fval, &dtemp) == 0) /* it's integral */ \ + snprintf(s, sizeof (s), "%.30g", vp->fval); \ + else \ + snprintf(s, sizeof (s), *fmt, vp->fval); \ + vp->sval = tostring(s); \ + vp->tval &= ~DONTFREE; \ + vp->tval |= STR; \ } + + if (isstr(vp) == 0) { + update_str_val(vp); + if (fmt == OFMT) { + vp->tval &= ~CONVC; + vp->tval |= CONVO; + } else { + /* CONVFMT */ + vp->tval &= ~CONVO; + vp->tval |= CONVC; + } + vp->fmt = *fmt; + } else if ((vp->tval & DONTFREE) != 0 || ! isnum(vp) || isfld(vp)) { + goto done; + } else if (isstr(vp)) { + if (fmt == OFMT) { + if ((vp->tval & CONVC) != 0 + || ((vp->tval & CONVO) != 0 && vp->fmt != *fmt)) { + update_str_val(vp); + vp->tval &= ~CONVC; + vp->tval |= CONVO; + vp->fmt = *fmt; + } + } else { + /* CONVFMT */ + if ((vp->tval & CONVO) != 0 + || ((vp->tval & CONVC) != 0 && vp->fmt != *fmt)) { + update_str_val(vp); + vp->tval &= ~CONVO; + vp->tval |= CONVC; + vp->fmt = *fmt; + } + } + } +done: dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n", - (void*)vp, NN(vp->nval), vp->sval, vp->sval, vp->tval) ); + (void*)vp, NN(vp->nval), vp->sval, (void *) vp->sval, vp->tval) ); return(vp->sval); } @@ -457,3 +560,37 @@ char *qstring(const char *is, int delim) /* collect string up to next delim */ *bp++ = 0; return (char *) buf; } + +const char *flags2str(int flags) +{ + static const struct ftab { + const char *name; + int value; + } flagtab[] = { + { "NUM", NUM }, + { "STR", STR }, + { "DONTFREE", DONTFREE }, + { "CON", CON }, + { "ARR", ARR }, + { "FCN", FCN }, + { "FLD", FLD }, + { "REC", REC }, + { "CONVC", CONVC }, + { "CONVO", CONVO }, + { NULL, 0 } + }; + static char buf[100]; + int i; + char *cp = buf; + + for (i = 0; flagtab[i].name != NULL; i++) { + if ((flags & flagtab[i].value) != 0) { + if (cp > buf) + *cp++ = '|'; + strcpy(cp, flagtab[i].name); + cp += strlen(cp); + } + } + + return buf; +}