Import latest one-true-awk from upstream
Import git hash 4189ef5d from https://github.com/onetrueawk/awk.git as there's not been a release in a while. Upstream one-true-awk woke-up! Time to catch up. This may also revert FreeBSD changes that we'd placed in the vendor branch in anticipation of their inclusion in upstream. That's not yet the case, and these will be resolved in the merge. See FIXES for a complete list of bugs fixed (starting with the Jun 7, 2018 entry).
This commit is contained in:
parent
3a4488f93f
commit
03ee4d05f1
245
ChangeLog
Normal file
245
ChangeLog
Normal file
@ -0,0 +1,245 @@
|
||||
2019-05-29 Arnold D. Robbins <arnold@skeeve.com>
|
||||
|
||||
* lib.c (isclvar): Remove check for additional '=' after
|
||||
first one. No longer needed.
|
||||
|
||||
2019-01-26 Arnold D. Robbins <arnold@skeeve.com>
|
||||
|
||||
* main.c (version): Updated.
|
||||
|
||||
2019-01-25 Arnold D. Robbins <arnold@skeeve.com>
|
||||
|
||||
* run.c (awkgetline): Check for numeric value in all getline
|
||||
variants. See the numeric-getline.* files in bugs-fixed directory.
|
||||
|
||||
2018-08-29 Arnold D. Robbins <arnold@skeeve.com>
|
||||
|
||||
* REGRESS: Check for existence of a.out. If not there, run
|
||||
make. Enable core dumps for T.arnold system status test
|
||||
to work on MacOS X.
|
||||
|
||||
2018-08-22 Arnold D. Robbins <arnold@skeeve.com>
|
||||
|
||||
* awktest.tar (testdir/T.expr): Fix test for unary plus.
|
||||
|
||||
2018-08-22 Arnold D. Robbins <arnold@skeeve.com>
|
||||
|
||||
* REGRESS: Extract tests if necessary, set PATH to include '.'.
|
||||
* regdir/beebe.tar (Makefile): Fix longwrds test to prefix
|
||||
sort with LC_ALL=C.
|
||||
* awktest.tar: Updated from fixed test suite, directory
|
||||
it extracts is now called 'testdir' to match what's in top-level
|
||||
REGRESS script.
|
||||
* regdir: Removed, as Brian wants to keep the test suite in
|
||||
the tar file.
|
||||
|
||||
2018-08-22 Arnold D. Robbins <arnold@skeeve.com>
|
||||
|
||||
* FIXES, lib.c, run.c, makefile, main.c: Merge from Brian's tree.
|
||||
* REGRESS: New file, from Brian.
|
||||
* awktest.tar: Restored from Brian's tree.
|
||||
|
||||
2018-08-22 Arnold D. Robbins <arnold@skeeve.com>
|
||||
|
||||
* awkgram.y (UPLUS): New token. In the grammar, call op1()
|
||||
with it.
|
||||
* maketab.c (proc): Add entry for UPLUS.
|
||||
* run.c (arith): Handle UPLUS.
|
||||
* main.c (version): Updated.
|
||||
* bugs-fixed/unary-plus.awk, bugs-fixed/unary-plus.bad,
|
||||
bugs-fixed/unary-plus.ok: New files.
|
||||
|
||||
2018-08-10 Arnold D. Robbins <arnold@skeeve.com>
|
||||
|
||||
* TODO: Updated.
|
||||
* awk.1: Improve use of macros, add some additional explanation
|
||||
in a few places, alphabetize list of variables.
|
||||
|
||||
2018-08-08 Arnold D. Robbins <arnold@skeeve.com>
|
||||
|
||||
* awk.h (Cell): Add new field `fmt' to track xFMT value used
|
||||
for a string conversion.
|
||||
[CONVC, CONVO]: New flag macros.
|
||||
* bugs-fixed/README: Updated.
|
||||
* bugs-fixed/string-conv.awk, bugs-fixed/string-conv.bad,
|
||||
bugs-fixed/string-conv.ok: New files.
|
||||
* main.c (version): Updated.
|
||||
* proto.h (flags2str): Add declaration.
|
||||
* tran.c (setfval): Clear CONVC and CONVO flags and set vp->fmt
|
||||
to NULL.
|
||||
(setsval): Ditto. Add large comment and new code to manage
|
||||
correct conversion of number to string based on various flags
|
||||
and the value of vp->fmt. The idea is to not convert again
|
||||
if xFMT is the same as before and we're doing the same conversion.
|
||||
Otherwise, clear the old flags, set the new, and reconvert.
|
||||
(flags2str): New function. For debug prints and for use from a debugger.
|
||||
|
||||
2018-08-05 Arnold D. Robbins <arnold@skeeve.com>
|
||||
|
||||
Fix filename conflicts in regdir where the only difference was
|
||||
in letter case. This caused problems on Windows systems.
|
||||
|
||||
* regdir/Compare.T1: Renamed from regdir/Compare.T.
|
||||
* regdir/t.delete0: Renamed from regdir/t.delete.
|
||||
* regdir/t.getline1: Renamed from regdir/t.getline.
|
||||
* regdir/t.redir1: Renamed from regdir/t.redir.
|
||||
* regdir/t.split1: Renamed from regdir/t.split.
|
||||
* regdir/t.sub0: Renamed from regdir/t.sub.
|
||||
* regdir/REGRESS: Adjusted.
|
||||
|
||||
2018-08-04 Arnold D. Robbins <arnold@skeeve.com>
|
||||
|
||||
With scalpel, tweasers, magnifying glass and bated breath,
|
||||
borrow code from the NetBSD version of nawk to fix the years-old
|
||||
bug whereby decrementing the value of NF did not change the
|
||||
record.
|
||||
|
||||
* lib.c (fldbld): Set donerec to 1 when done.
|
||||
(setlastfld): New function.
|
||||
* proto.h (setlastfld): Add declaration.
|
||||
* run.c (copycell): Make code smarter about flags (from NetBSD code).
|
||||
* tran.c (setfree): New function.
|
||||
* tran.c (setfval): Normalize negative zero to positive zero.
|
||||
If setting NF, clear donerec and call setlastfld().
|
||||
(setsval): Remove call to save_old_OFS(). If setting OFS, call
|
||||
recbld(). If setting NF, clear donerec and call setlastfld().
|
||||
|
||||
As part of the process, revert OFS-related changes of 2018-05-22:
|
||||
|
||||
* awk.h (saveOFS, saveOFSlen, save_old_OFS): Remove declarations.
|
||||
* lib.c (recbld): Use *OFS instead of saveOFS.
|
||||
* run.c (saveOFS, saveOFSlen, save_old_OFS): Remove.
|
||||
* tran.c (syminit): Remove initialization of saveOFS and saveOFSlen.
|
||||
|
||||
General stuff that goes along with all this:
|
||||
|
||||
* bugs-fixed/README: Updated.
|
||||
* bugs-fixed/decr-NF.awk, bugs-fixed/decr-NF.bad,
|
||||
bugs-fixed/decr-NF.ok: New files.
|
||||
* main.c (version): Updated.
|
||||
* regdir/README.TESTS: Fix awk book title.
|
||||
* regdir/T.misc: Revise test to match fixed code.
|
||||
* run.c (format): Increase size of buffer used for %a test. (Unrelated
|
||||
to NF or OFS, but fixes a compiler complaint.)
|
||||
|
||||
2018-06-07 Arnold D. Robbins <arnold@skeeve.com>
|
||||
|
||||
* regdir/beebe.tar: Fix longwrds.ok so that the test will pass.
|
||||
The file was incorrectly sorted.
|
||||
|
||||
2018-06-06 Arnold D. Robbins <arnold@skeeve.com>
|
||||
|
||||
* regdir/T.lilly: Fix the bug again in the second instance
|
||||
of the code. Thanks to BWK for pointing this out.
|
||||
|
||||
2018-05-31 Arnold D. Robbins <arnold@skeeve.com>
|
||||
|
||||
* regdir/T.lilly: Fix a syntax error and ordering bug
|
||||
in creating the 'foo' file.
|
||||
|
||||
2018-05-23 Arnold D. Robbins <arnold@skeeve.com>
|
||||
|
||||
* awk.1: Remove standalone 'awk' at the top of file, it messed up
|
||||
the formatting. Arrange built-in variable list in alphabetical
|
||||
order.
|
||||
|
||||
2018-05-23 Arnold D. Robbins <arnold@skeeve.com>
|
||||
|
||||
* main.c (version): Add my email address and a date so that
|
||||
users can tell this isn't straight BWK awk.
|
||||
* README.md: Minor updates.
|
||||
* TODO: Updated.
|
||||
|
||||
2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
|
||||
|
||||
Add POSIX-required formats %a and %A.
|
||||
|
||||
* run.c (format): Check for %a support in C library. If there,
|
||||
allow %a and %A as valid formats.
|
||||
* TODO: Updated.
|
||||
* bugs-fixed/README: Updated.
|
||||
* bugs-fixed/a-format.awk, bugs-fixed/a-format.bad,
|
||||
bugs-fixed/a-format.ok: New files.
|
||||
|
||||
2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
|
||||
|
||||
* FIXES: Restored a line from a much earlier version that
|
||||
apparently got lost when the dates were reordered.
|
||||
* TODO: Updated.
|
||||
|
||||
2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
|
||||
|
||||
* README.md: New file.
|
||||
|
||||
2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
|
||||
|
||||
* regdir/echo.c, regdir/time.c: Minor fixes to compile without
|
||||
warning on current GCC / Linux.
|
||||
|
||||
2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
|
||||
|
||||
* TODO: New file.
|
||||
|
||||
2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
|
||||
|
||||
* makefile (gitadd, gitpush): Remove these targets. They
|
||||
should not be automated and were incorrect for things that
|
||||
would be done regularly.
|
||||
|
||||
2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
|
||||
|
||||
Fix nawk so that [[:blank:]] only matches space and tab instead
|
||||
of any whitespace character, originally made May 10, 2018.
|
||||
See bugs-fixed/space.awk.
|
||||
|
||||
This appears to have been a thinko on Brian's part.
|
||||
|
||||
* b.c (charclasses): Use xisblank() function for [[:blank:]].
|
||||
* bugs-fixed/README: Updated.
|
||||
* bugs-fixed/space.awk, bugs-fixed/space.bad,
|
||||
bugs-fixed/space.ok: New files.
|
||||
|
||||
2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
|
||||
|
||||
* .gitignore: New file.
|
||||
|
||||
2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
|
||||
|
||||
Fix nawk to provide reasonable exit status for system(),
|
||||
a la gawk, originally made March 12, 2016. See
|
||||
bugs-fixed/system-status.awk.
|
||||
|
||||
* run.c (bltin): For FSYSTEM, use the macros defined for wait(2)
|
||||
to produce a reasonable exit value, instead of doing a floating-point
|
||||
division by 256.
|
||||
* awk.1: Document the return status values.
|
||||
* bugs-fixed/README: Updated.
|
||||
* bugs-fixed/system-status.awk, bugs-fixed/system-status.bad,
|
||||
bugs-fixed/system-status.ok: New files.
|
||||
|
||||
2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
|
||||
|
||||
Bug fix with respect to rebuilding a record, originally
|
||||
made August 19, 2014. See bugs-fixed/ofs-rebuild.awk.
|
||||
|
||||
* awk.h (saveOFS, saveOFSlen): Declare new variables.
|
||||
* lib.c (recbld): Use them when rebuilding the record.
|
||||
* run.c (saveOFS, saveOFSlen): Define new variables.
|
||||
(save_old_OFS): New function to save OFS aside.
|
||||
* tran.c (syminit): Initialize saveOFS and saveOFSlen.
|
||||
(setsval): If setting a field, call save_old_OFS().
|
||||
* bugs-fixed/README, bugs-fixed/ofs-rebuild.awk,
|
||||
bugs-fixed/ofs-rebuild.bad, bugs-fixed/ofs-rebuild.ok: New files.
|
||||
|
||||
2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
|
||||
|
||||
* makefile (YACC): Use bison.
|
||||
|
||||
2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
|
||||
|
||||
* ChangeLog: Created.
|
||||
* regdir: Created. Based on contents of awktest.a.
|
||||
* .gitattributes: Created, to preserve CR LF in regdir/t.crlf.
|
||||
* awktest.a: Removed.
|
||||
* regdir/T.gawk, regdir/T.latin1: Updated from awktest.tar.
|
||||
* awktest.tar: Removed.
|
109
FIXES
109
FIXES
@ -25,6 +25,113 @@ THIS SOFTWARE.
|
||||
This file lists all bug fixes, changes, etc., made since the AWK book
|
||||
was sent to the printers in August, 1987.
|
||||
|
||||
May 29,2019:
|
||||
Fix check for command line arguments to no longer require that
|
||||
first character after '=' not be another '='. Reverts change of
|
||||
August 11, 1989. Thanks to GitHub user Jamie Landeg Jones for
|
||||
pointing out the issue; from Issue #38.
|
||||
|
||||
Apr 7, 2019:
|
||||
Update awktest.tar(p.50) to use modern options to sort. Needed
|
||||
for Android development. Thanks to GitHub user mohd-akram (Mohamed
|
||||
Akram). From Comment #33.
|
||||
|
||||
Mar 12, 2019:
|
||||
Added very simplistic support for cross-compiling in the
|
||||
makefile. We are NOT going to go in the direction of the
|
||||
autotools, though. Thanks to GitHub user nee-san for
|
||||
the basic change. (Merged from PR #34.)
|
||||
|
||||
Mar 5, 2019:
|
||||
Added support for POSIX-standard interval expressions (a.k.a.
|
||||
bounds, a.k.a. repetition expressions) in regular expressions,
|
||||
backported (via NetBSD) from Apple awk-24 (20070501).
|
||||
Thanks to Martijn Dekker <martijn@inlv.org> for the port.
|
||||
(Merged from PR #30.)
|
||||
|
||||
Mar 3, 2019:
|
||||
Merge PRs as follows:
|
||||
#12: Avoid undefined behaviour when using ctype(3) functions in
|
||||
relex(). Thanks to GitHub user iamleot.
|
||||
#31: Make getline handle numeric strings, and update FIXES. Thanks
|
||||
to GitHub user arnoldrobbins
|
||||
#32: maketab: support build systems with read-only source. Thanks
|
||||
to GitHub user enh.
|
||||
|
||||
Jan 25, 2019:
|
||||
Make getline handle numeric strings properly in all cases.
|
||||
(Thanks, Arnold.)
|
||||
|
||||
Jan 21, 2019:
|
||||
Merged a number of small fixes from GitHub pull requests.
|
||||
Thanks to GitHub users Arnold Robbins (arnoldrobbins),
|
||||
Cody Mello (melloc) and Christoph Junghans (junghans).
|
||||
PR numbers: 13-21, 23, 24, 27.
|
||||
|
||||
Oct 25, 2018:
|
||||
Added test in maketab.c to prevent generating a proctab entry
|
||||
for YYSTYPE_IS_DEFINED. It was harmless but some gcc settings
|
||||
generated a warning message. Thanks to Nan Xiao for report.
|
||||
|
||||
Aug 27, 2018:
|
||||
Disallow '$' in printf formats; arguments evaluated in order
|
||||
and printed in order.
|
||||
|
||||
Added some casts to silence warnings on debugging printfs.
|
||||
(Thanks, Arnold.)
|
||||
|
||||
Aug 23, 2018:
|
||||
A long list of fixes courtesy of Arnold Robbins,
|
||||
to whom profound thanks.
|
||||
|
||||
1. ofs-rebuild: OFS value used to rebuild the record was incorrect.
|
||||
Fixed August 19, 2014. Revised fix August 2018.
|
||||
|
||||
2. system-status: Instead of a floating-point division by 256, use
|
||||
the wait(2) macros to create a reasonable exit status.
|
||||
Fixed March 12, 2016.
|
||||
|
||||
3. space: Use provided xisblank() function instead of ispace() for
|
||||
matching [[:blank:]].
|
||||
|
||||
4. a-format: Add POSIX standard %a and %A to supported formats. Check
|
||||
at runtime that this format is available.
|
||||
|
||||
5. decr-NF: Decrementing NF did not change $0. This is a decades-old
|
||||
bug. There are interactions with the old and new value of OFS as well.
|
||||
Most of the fix came from the NetBSD awk.
|
||||
|
||||
6. string-conv: String conversions of scalars were sticky. Once a
|
||||
conversion to string happened, even with OFMT, that value was used until
|
||||
a new numeric value was assigned, even if OFMT differed from CONVFMT,
|
||||
and also if CONVFMT changed.
|
||||
|
||||
7. unary-plus: Unary plus on a string constant returned the string.
|
||||
Instead, it should convert the value to numeric and give that value.
|
||||
|
||||
Also added Arnold's tests for these to awktest.tar as T.arnold.
|
||||
|
||||
Aug 15, 2018:
|
||||
fixed mangled awktest.tar (thanks, Arnold), posted all
|
||||
current (very minor) fixes to github / onetrueawk
|
||||
|
||||
Jun 7, 2018:
|
||||
(yes, a long layoff)
|
||||
Updated some broken tests (beebe.tar, T.lilly)
|
||||
[thanks to Arnold Robbins]
|
||||
|
||||
Mar 26, 2015:
|
||||
buffer overflow in error reporting; thanks to tobias ulmer
|
||||
and john-mark gurney for spotting it and the fix.
|
||||
|
||||
Feb 4, 2013:
|
||||
cleaned up a handful of tests that didn't seem to actually
|
||||
test for correct behavior: T.latin1, T.gawk.
|
||||
|
||||
Jan 5, 2013:
|
||||
added ,NULL initializer to static Cells in run.c; not really
|
||||
needed but cleaner. Thanks to Michael Bombardieri.
|
||||
|
||||
Dec 20, 2012:
|
||||
fiddled makefile to get correct yacc and bison flags. pick yacc
|
||||
(linux) or bison (mac) as necessary.
|
||||
@ -493,6 +600,8 @@ May 12, 1998:
|
||||
Mar 12, 1998:
|
||||
added -V to print version number and die.
|
||||
|
||||
[notify dave kerns, dkerns@dacsoup.ih.lucent.com]
|
||||
|
||||
Feb 11, 1998:
|
||||
subtle silent bug in lex.c: if the program ended with a number
|
||||
longer than 1 digit, part of the input would be pushed back and
|
||||
|
23
LICENSE
Normal file
23
LICENSE
Normal file
@ -0,0 +1,23 @@
|
||||
/****************************************************************
|
||||
Copyright (C) Lucent Technologies 1997
|
||||
All Rights Reserved
|
||||
|
||||
Permission to use, copy, modify, and distribute this software and
|
||||
its documentation for any purpose and without fee is hereby
|
||||
granted, provided that the above copyright notice appear in all
|
||||
copies and that both that the copyright notice and this
|
||||
permission notice and warranty disclaimer appear in supporting
|
||||
documentation, and that the name Lucent Technologies or any of
|
||||
its entities not be used in advertising or publicity pertaining
|
||||
to distribution of the software without specific, written prior
|
||||
permission.
|
||||
|
||||
LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
|
||||
IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
|
||||
SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
|
||||
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
|
||||
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
|
||||
THIS SOFTWARE.
|
||||
****************************************************************/
|
35
REGRESS
Executable file
35
REGRESS
Executable file
@ -0,0 +1,35 @@
|
||||
#! /bin/sh
|
||||
|
||||
case `uname` in
|
||||
CYGWIN) EXE=a.exe ;;
|
||||
*) EXE=a.out ;;
|
||||
esac
|
||||
|
||||
if [ ! -f $EXE ]
|
||||
then
|
||||
make || exit 1
|
||||
fi
|
||||
|
||||
if [ -d testdir ]
|
||||
then
|
||||
true # do nothing
|
||||
elif [ -f awktest.tar ]
|
||||
then
|
||||
echo extracting testdir
|
||||
tar -xpf awktest.tar
|
||||
else
|
||||
echo $0: No testdir directory and no awktest.tar to extract it from! >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cd testdir
|
||||
pwd
|
||||
PATH=.:$PATH
|
||||
export PATH
|
||||
if (ulimit -c unlimited > /dev/null 2>&1)
|
||||
then
|
||||
# Workaround broken default on MacOS X
|
||||
ulimit -c unlimited
|
||||
fi
|
||||
|
||||
REGRESS
|
168
awk.1
168
awk.1
@ -7,7 +7,6 @@
|
||||
.fi
|
||||
.ft 1
|
||||
..
|
||||
awk
|
||||
.TH AWK 1
|
||||
.CT 1 files prog_other
|
||||
.SH NAME
|
||||
@ -36,7 +35,7 @@ awk \- pattern-directed scanning and processing language
|
||||
scans each input
|
||||
.I file
|
||||
for lines that match any of a set of patterns specified literally in
|
||||
.IR prog
|
||||
.I prog
|
||||
or in one or more files
|
||||
specified as
|
||||
.B \-f
|
||||
@ -53,7 +52,7 @@ The file name
|
||||
.B \-
|
||||
means the standard input.
|
||||
Any
|
||||
.IR file
|
||||
.I file
|
||||
of the form
|
||||
.I var=value
|
||||
is treated as an assignment, not a filename,
|
||||
@ -70,12 +69,12 @@ any number of
|
||||
options may be present.
|
||||
The
|
||||
.B \-F
|
||||
.IR fs
|
||||
.I fs
|
||||
option defines the input field separator to be the regular expression
|
||||
.IR fs.
|
||||
.IR fs .
|
||||
.PP
|
||||
An input line is normally made up of fields separated by white space,
|
||||
or by regular expression
|
||||
or by the regular expression
|
||||
.BR FS .
|
||||
The fields are denoted
|
||||
.BR $1 ,
|
||||
@ -87,7 +86,7 @@ If
|
||||
.BR FS
|
||||
is null, the input line is split into one field per character.
|
||||
.PP
|
||||
A pattern-action statement has the form
|
||||
A pattern-action statement has the form:
|
||||
.IP
|
||||
.IB pattern " { " action " }
|
||||
.PP
|
||||
@ -101,7 +100,7 @@ An action is a sequence of statements.
|
||||
A statement can be one of the following:
|
||||
.PP
|
||||
.EX
|
||||
.ta \w'\f(CWdelete array[expression]'u
|
||||
.ta \w'\f(CWdelete array[expression]\fR'u
|
||||
.RS
|
||||
.nf
|
||||
.ft CW
|
||||
@ -145,7 +144,7 @@ The operators
|
||||
are also available in expressions.
|
||||
Variables may be scalars, array elements
|
||||
(denoted
|
||||
.IB x [ i ] )
|
||||
.IB x [ i ] \fR)
|
||||
or fields.
|
||||
Variables are initialized to the null string.
|
||||
Array subscripts may be any string,
|
||||
@ -161,11 +160,11 @@ The
|
||||
.B print
|
||||
statement prints its arguments on the standard output
|
||||
(or on a file if
|
||||
.BI > file
|
||||
.BI > " file
|
||||
or
|
||||
.BI >> file
|
||||
.BI >> " file
|
||||
is present or on a pipe if
|
||||
.BI | cmd
|
||||
.BI | " cmd
|
||||
is present), separated by the current output field separator,
|
||||
and terminated by the output record separator.
|
||||
.I file
|
||||
@ -176,9 +175,10 @@ identical string values in different statements denote
|
||||
the same open file.
|
||||
The
|
||||
.B printf
|
||||
statement formats its expression list according to the format
|
||||
statement formats its expression list according to the
|
||||
.I format
|
||||
(see
|
||||
.IR printf (3)) .
|
||||
.IR printf (3)).
|
||||
The built-in function
|
||||
.BI close( expr )
|
||||
closes the file or pipe
|
||||
@ -189,13 +189,13 @@ flushes any buffered output for the file or pipe
|
||||
.IR expr .
|
||||
.PP
|
||||
The mathematical functions
|
||||
.BR atan2 ,
|
||||
.BR cos ,
|
||||
.BR exp ,
|
||||
.BR log ,
|
||||
.BR sqrt ,
|
||||
.BR sin ,
|
||||
.BR cos ,
|
||||
and
|
||||
.BR atan2
|
||||
.B sqrt
|
||||
are built in.
|
||||
Other built-in functions:
|
||||
.TF length
|
||||
@ -203,7 +203,8 @@ Other built-in functions:
|
||||
.B length
|
||||
the length of its argument
|
||||
taken as a string,
|
||||
or of
|
||||
number of elements in an array for an array argument,
|
||||
or length of
|
||||
.B $0
|
||||
if no argument.
|
||||
.TP
|
||||
@ -218,14 +219,18 @@ and returns the previous seed.
|
||||
.B int
|
||||
truncates to an integer value
|
||||
.TP
|
||||
.BI substr( s , " m" , " n\fB)
|
||||
\fBsubstr(\fIs\fB, \fIm\fR [\fB, \fIn\^\fR]\fB)\fR
|
||||
the
|
||||
.IR n -character
|
||||
substring of
|
||||
.I s
|
||||
that begins at position
|
||||
.IR m
|
||||
.I m
|
||||
counted from 1.
|
||||
If no
|
||||
.IR m ,
|
||||
use the rest of the string
|
||||
.I
|
||||
.TP
|
||||
.BI index( s , " t" )
|
||||
the position in
|
||||
@ -246,14 +251,14 @@ and
|
||||
.B RLENGTH
|
||||
are set to the position and length of the matched string.
|
||||
.TP
|
||||
.BI split( s , " a" , " fs\fB)
|
||||
\fBsplit(\fIs\fB, \fIa \fR[\fB, \fIfs\^\fR]\fB)\fR
|
||||
splits the string
|
||||
.I s
|
||||
into array elements
|
||||
.IB a [1] ,
|
||||
.IB a [2] ,
|
||||
.IB a [1] \fR,
|
||||
.IB a [2] \fR,
|
||||
\&...,
|
||||
.IB a [ n ] ,
|
||||
.IB a [ n ] \fR,
|
||||
and returns
|
||||
.IR n .
|
||||
The separation is done with the regular expression
|
||||
@ -266,7 +271,7 @@ is not given.
|
||||
An empty string as field separator splits the string
|
||||
into one array element per character.
|
||||
.TP
|
||||
.BI sub( r , " t" , " s\fB)
|
||||
\fBsub(\fIr\fB, \fIt \fR[, \fIs\^\fR]\fB)
|
||||
substitutes
|
||||
.I t
|
||||
for the first occurrence of the regular expression
|
||||
@ -279,7 +284,7 @@ is not given,
|
||||
.B $0
|
||||
is used.
|
||||
.TP
|
||||
.B gsub
|
||||
\fBgsub(\fIr\fB, \fIt \fR[, \fIs\^\fR]\fB)
|
||||
same as
|
||||
.B sub
|
||||
except that all occurrences of the regular expression
|
||||
@ -289,18 +294,28 @@ and
|
||||
.B gsub
|
||||
return the number of replacements.
|
||||
.TP
|
||||
.BI sprintf( fmt , " expr" , " ...\fB )
|
||||
.BI sprintf( fmt , " expr" , " ...\fB)
|
||||
the string resulting from formatting
|
||||
.I expr ...
|
||||
according to the
|
||||
.IR printf (3)
|
||||
format
|
||||
.I fmt
|
||||
.IR fmt .
|
||||
.TP
|
||||
.BI system( cmd )
|
||||
executes
|
||||
.I cmd
|
||||
and returns its exit status
|
||||
and returns its exit status. This will be \-1 upon error,
|
||||
.IR cmd 's
|
||||
exit status upon a normal exit,
|
||||
256 +
|
||||
.I sig
|
||||
upon death-by-signal, where
|
||||
.I sig
|
||||
is the number of the murdering signal,
|
||||
or 512 +
|
||||
.I sig
|
||||
if there was a core dump.
|
||||
.TP
|
||||
.BI tolower( str )
|
||||
returns a copy of
|
||||
@ -321,7 +336,7 @@ sets
|
||||
.B $0
|
||||
to the next input record from the current input file;
|
||||
.B getline
|
||||
.BI < file
|
||||
.BI < " file
|
||||
sets
|
||||
.B $0
|
||||
to the next record from
|
||||
@ -359,7 +374,7 @@ Isolated regular expressions
|
||||
in a pattern apply to the entire line.
|
||||
Regular expressions may also occur in
|
||||
relational expressions, using the operators
|
||||
.BR ~
|
||||
.B ~
|
||||
and
|
||||
.BR !~ .
|
||||
.BI / re /
|
||||
@ -383,8 +398,12 @@ A relational expression is one of the following:
|
||||
.br
|
||||
.BI ( expr , expr,... ") in " array-name
|
||||
.PP
|
||||
where a relop is any of the six relational operators in C,
|
||||
and a matchop is either
|
||||
where a
|
||||
.I relop
|
||||
is any of the six relational operators in C,
|
||||
and a
|
||||
.I matchop
|
||||
is either
|
||||
.B ~
|
||||
(matches)
|
||||
or
|
||||
@ -405,57 +424,68 @@ and after the last.
|
||||
and
|
||||
.B END
|
||||
do not combine with other patterns.
|
||||
They may appear multiple times in a program and execute
|
||||
in the order they are read by
|
||||
.IR awk .
|
||||
.PP
|
||||
Variable names with special meanings:
|
||||
.TF FILENAME
|
||||
.TP
|
||||
.B ARGC
|
||||
argument count, assignable.
|
||||
.TP
|
||||
.B ARGV
|
||||
argument array, assignable;
|
||||
non-null members are taken as filenames.
|
||||
.TP
|
||||
.B CONVFMT
|
||||
conversion format used when converting numbers
|
||||
(default
|
||||
.BR "%.6g" )
|
||||
.BR "%.6g" ).
|
||||
.TP
|
||||
.B ENVIRON
|
||||
array of environment variables; subscripts are names.
|
||||
.TP
|
||||
.B FILENAME
|
||||
the name of the current input file.
|
||||
.TP
|
||||
.B FNR
|
||||
ordinal number of the current record in the current file.
|
||||
.TP
|
||||
.B FS
|
||||
regular expression used to separate fields; also settable
|
||||
by option
|
||||
.BI \-F fs.
|
||||
.BI \-F fs\fR.
|
||||
.TP
|
||||
.BR NF
|
||||
number of fields in the current record
|
||||
number of fields in the current record.
|
||||
.TP
|
||||
.B NR
|
||||
ordinal number of the current record
|
||||
.TP
|
||||
.B FNR
|
||||
ordinal number of the current record in the current file
|
||||
.TP
|
||||
.B FILENAME
|
||||
the name of the current input file
|
||||
.TP
|
||||
.B RS
|
||||
input record separator (default newline)
|
||||
.TP
|
||||
.B OFS
|
||||
output field separator (default blank)
|
||||
.TP
|
||||
.B ORS
|
||||
output record separator (default newline)
|
||||
ordinal number of the current record.
|
||||
.TP
|
||||
.B OFMT
|
||||
output format for numbers (default
|
||||
.BR "%.6g" )
|
||||
.BR "%.6g" ).
|
||||
.TP
|
||||
.B OFS
|
||||
output field separator (default space).
|
||||
.TP
|
||||
.B ORS
|
||||
output record separator (default newline).
|
||||
.TP
|
||||
.B RLENGTH
|
||||
the length of a string matched by
|
||||
.BR match .
|
||||
.TP
|
||||
.B RS
|
||||
input record separator (default newline).
|
||||
.TP
|
||||
.B RSTART
|
||||
the start position of a string matched by
|
||||
.BR match .
|
||||
.TP
|
||||
.B SUBSEP
|
||||
separates multiple subscripts (default 034)
|
||||
.TP
|
||||
.B ARGC
|
||||
argument count, assignable
|
||||
.TP
|
||||
.B ARGV
|
||||
argument array, assignable;
|
||||
non-null members are taken as filenames
|
||||
.TP
|
||||
.B ENVIRON
|
||||
array of environment variables; subscripts are names.
|
||||
separates multiple subscripts (default 034).
|
||||
.PD
|
||||
.PP
|
||||
Functions may be defined (at the position of a pattern-action statement) thus:
|
||||
@ -486,7 +516,7 @@ BEGIN { FS = ",[ \et]*|[ \et]+" }
|
||||
.EE
|
||||
.ns
|
||||
.IP
|
||||
Same, with input fields separated by comma and/or blanks and tabs.
|
||||
Same, with input fields separated by comma and/or spaces and tabs.
|
||||
.PP
|
||||
.EX
|
||||
.nf
|
||||
@ -512,13 +542,13 @@ BEGIN { # Simulate echo(1)
|
||||
.fi
|
||||
.EE
|
||||
.SH SEE ALSO
|
||||
.IR grep (1),
|
||||
.IR lex (1),
|
||||
.IR sed (1)
|
||||
.br
|
||||
A. V. Aho, B. W. Kernighan, P. J. Weinberger,
|
||||
.I
|
||||
The AWK Programming Language,
|
||||
Addison-Wesley, 1988. ISBN 0-201-07981-X
|
||||
.IR "The AWK Programming Language" ,
|
||||
Addison-Wesley, 1988. ISBN 0-201-07981-X.
|
||||
.SH BUGS
|
||||
There are no explicit conversions between numbers and strings.
|
||||
To force an expression to be treated as a number add 0 to it;
|
||||
@ -527,3 +557,5 @@ to force it to be treated as a string concatenate
|
||||
.br
|
||||
The scope rules for variables in functions are a botch;
|
||||
the syntax is worse.
|
||||
.br
|
||||
Only eight-bit characters sets are handled correctly.
|
||||
|
10
awk.h
10
awk.h
@ -81,7 +81,8 @@ typedef struct Cell {
|
||||
char *nval; /* name, for variables only */
|
||||
char *sval; /* string value */
|
||||
Awkfloat fval; /* value as number */
|
||||
int tval; /* type info: STR|NUM|ARR|FCN|FLD|CON|DONTFREE */
|
||||
int tval; /* type info: STR|NUM|ARR|FCN|FLD|CON|DONTFREE|CONVC|CONVO */
|
||||
char *fmt; /* CONVFMT/OFMT value used to convert from number */
|
||||
struct Cell *cnext; /* ptr to next if chained */
|
||||
} Cell;
|
||||
|
||||
@ -96,9 +97,14 @@ extern Array *symtab;
|
||||
|
||||
extern Cell *nrloc; /* NR */
|
||||
extern Cell *fnrloc; /* FNR */
|
||||
extern Cell *fsloc; /* FS */
|
||||
extern Cell *nfloc; /* NF */
|
||||
extern Cell *ofsloc; /* OFS */
|
||||
extern Cell *orsloc; /* ORS */
|
||||
extern Cell *rsloc; /* RS */
|
||||
extern Cell *rstartloc; /* RSTART */
|
||||
extern Cell *rlengthloc; /* RLENGTH */
|
||||
extern Cell *subseploc; /* SUBSEP */
|
||||
|
||||
/* Cell.tval values: */
|
||||
#define NUM 01 /* number value is valid */
|
||||
@ -109,6 +115,8 @@ extern Cell *rlengthloc; /* RLENGTH */
|
||||
#define FCN 040 /* this is a function name */
|
||||
#define FLD 0100 /* this is a field $1, $2, ... */
|
||||
#define REC 0200 /* this is $0 */
|
||||
#define CONVC 0400 /* string was converted from number via CONVFMT */
|
||||
#define CONVO 01000 /* string was converted from number via OFMT */
|
||||
|
||||
|
||||
/* function types */
|
||||
|
@ -86,7 +86,7 @@ Node *arglist = 0; /* list of args for current function */
|
||||
%left CAT
|
||||
%left '+' '-'
|
||||
%left '*' '/' '%'
|
||||
%left NOT UMINUS
|
||||
%left NOT UMINUS UPLUS
|
||||
%right POWER
|
||||
%right DECR INCR
|
||||
%left INDIRECT
|
||||
@ -357,7 +357,7 @@ term:
|
||||
| term '%' term { $$ = op2(MOD, $1, $3); }
|
||||
| term POWER term { $$ = op2(POWER, $1, $3); }
|
||||
| '-' term %prec UMINUS { $$ = op1(UMINUS, $2); }
|
||||
| '+' term %prec UMINUS { $$ = $2; }
|
||||
| '+' term %prec UMINUS { $$ = op1(UPLUS, $2); }
|
||||
| NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); }
|
||||
| BLTIN '(' ')' { $$ = op2(BLTIN, itonp($1), rectonode()); }
|
||||
| BLTIN '(' patlist ')' { $$ = op2(BLTIN, itonp($1), $3); }
|
||||
|
291
b.c
291
b.c
@ -27,6 +27,7 @@ THIS SOFTWARE.
|
||||
#define DEBUG
|
||||
|
||||
#include <ctype.h>
|
||||
#include <limits.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
@ -65,6 +66,11 @@ int rlxval;
|
||||
static uschar *rlxstr;
|
||||
static uschar *prestr; /* current position in current re */
|
||||
static uschar *lastre; /* origin of last re */
|
||||
static uschar *lastatom; /* origin of last Atom */
|
||||
static uschar *starttok;
|
||||
static uschar *basestr; /* starts with original, replaced during
|
||||
repetition processing */
|
||||
static uschar *firstbasestr;
|
||||
|
||||
static int setcnt;
|
||||
static int poscnt;
|
||||
@ -82,11 +88,11 @@ fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */
|
||||
fa *pfa;
|
||||
static int now = 1;
|
||||
|
||||
if (setvec == NULL) { /* first time through any RE */
|
||||
if (setvec == 0) { /* first time through any RE */
|
||||
maxsetvec = MAXLIN;
|
||||
setvec = (int *) malloc(maxsetvec * sizeof(int));
|
||||
tmpset = (int *) malloc(maxsetvec * sizeof(int));
|
||||
if (setvec == NULL || tmpset == NULL)
|
||||
if (setvec == 0 || tmpset == 0)
|
||||
overflo("out of space initializing makedfa");
|
||||
}
|
||||
|
||||
@ -124,6 +130,8 @@ fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */
|
||||
Node *p, *p1;
|
||||
fa *f;
|
||||
|
||||
firstbasestr = (uschar *) s;
|
||||
basestr = firstbasestr;
|
||||
p = reparse(s);
|
||||
p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
|
||||
/* put ALL STAR in front of reg. exp. */
|
||||
@ -137,7 +145,7 @@ fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */
|
||||
f->accept = poscnt-1; /* penter has computed number of positions in re */
|
||||
cfoll(f, p1); /* set up follow sets */
|
||||
freetr(p1);
|
||||
if ((f->posns[0] = (int *) calloc(*(f->re[0].lfollow), sizeof(int))) == NULL)
|
||||
if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
|
||||
overflo("out of space in makedfa");
|
||||
if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
|
||||
overflo("out of space in makedfa");
|
||||
@ -145,6 +153,10 @@ fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */
|
||||
f->initstat = makeinit(f, anchor);
|
||||
f->anchor = anchor;
|
||||
f->restr = (uschar *) tostring(s);
|
||||
if (firstbasestr != basestr) {
|
||||
if (basestr)
|
||||
xfree(basestr);
|
||||
}
|
||||
return f;
|
||||
}
|
||||
|
||||
@ -157,7 +169,7 @@ int makeinit(fa *f, int anchor)
|
||||
f->reset = 0;
|
||||
k = *(f->re[0].lfollow);
|
||||
xfree(f->posns[2]);
|
||||
if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
|
||||
if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
|
||||
overflo("out of space in makeinit");
|
||||
for (i=0; i <= k; i++) {
|
||||
(f->posns[2])[i] = (f->re[0].lfollow)[i];
|
||||
@ -290,11 +302,11 @@ char *cclenter(const char *argp) /* add a character class */
|
||||
int i, c, c2;
|
||||
uschar *p = (uschar *) argp;
|
||||
uschar *op, *bp;
|
||||
static uschar *buf = NULL;
|
||||
static uschar *buf = 0;
|
||||
static int bufsz = 100;
|
||||
|
||||
op = p;
|
||||
if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL)
|
||||
if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
|
||||
FATAL("out of space for character class [%.10s...] 1", p);
|
||||
bp = buf;
|
||||
for (i = 0; (c = *p++) != 0; ) {
|
||||
@ -350,14 +362,14 @@ void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfo
|
||||
maxsetvec *= 4;
|
||||
setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
|
||||
tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
|
||||
if (setvec == NULL || tmpset == NULL)
|
||||
if (setvec == 0 || tmpset == 0)
|
||||
overflo("out of space in cfoll()");
|
||||
}
|
||||
for (i = 0; i <= f->accept; i++)
|
||||
setvec[i] = 0;
|
||||
setcnt = 0;
|
||||
follow(v); /* computes setvec and setcnt */
|
||||
if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
|
||||
if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
|
||||
overflo("out of space building follow set");
|
||||
f->re[info(v)].lfollow = p;
|
||||
*p = setcnt;
|
||||
@ -391,7 +403,7 @@ int first(Node *p) /* collects initially active leaves of p into setvec */
|
||||
maxsetvec *= 4;
|
||||
setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
|
||||
tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
|
||||
if (setvec == NULL || tmpset == NULL)
|
||||
if (setvec == 0 || tmpset == 0)
|
||||
overflo("out of space in first()");
|
||||
}
|
||||
if (type(p) == EMPTYRE) {
|
||||
@ -531,7 +543,7 @@ int pmatch(fa *f, const char *p0) /* longest match, for sub */
|
||||
for (i = 2; i <= f->curstat; i++)
|
||||
xfree(f->posns[i]);
|
||||
k = *f->posns[0];
|
||||
if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
|
||||
if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
|
||||
overflo("out of space in pmatch");
|
||||
for (i = 0; i <= k; i++)
|
||||
(f->posns[2])[i] = (f->posns[0])[i];
|
||||
@ -588,7 +600,7 @@ int nematch(fa *f, const char *p0) /* non-empty match, for sub */
|
||||
for (i = 2; i <= f->curstat; i++)
|
||||
xfree(f->posns[i]);
|
||||
k = *f->posns[0];
|
||||
if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
|
||||
if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
|
||||
overflo("out of state space");
|
||||
for (i = 0; i <= k; i++)
|
||||
(f->posns[2])[i] = (f->posns[0])[i];
|
||||
@ -628,9 +640,11 @@ Node *regexp(void) /* top-level parse of reg expr */
|
||||
Node *primary(void)
|
||||
{
|
||||
Node *np;
|
||||
int savelastatom;
|
||||
|
||||
switch (rtok) {
|
||||
case CHAR:
|
||||
lastatom = starttok;
|
||||
np = op2(CHAR, NIL, itonp(rlxval));
|
||||
rtok = relex();
|
||||
return (unary(np));
|
||||
@ -639,16 +653,19 @@ Node *primary(void)
|
||||
return (unary(op2(ALL, NIL, NIL)));
|
||||
case EMPTYRE:
|
||||
rtok = relex();
|
||||
return (unary(op2(ALL, NIL, NIL)));
|
||||
return (unary(op2(EMPTYRE, NIL, NIL)));
|
||||
case DOT:
|
||||
lastatom = starttok;
|
||||
rtok = relex();
|
||||
return (unary(op2(DOT, NIL, NIL)));
|
||||
case CCL:
|
||||
np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
|
||||
lastatom = starttok;
|
||||
rtok = relex();
|
||||
return (unary(np));
|
||||
case NCCL:
|
||||
np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
|
||||
lastatom = starttok;
|
||||
rtok = relex();
|
||||
return (unary(np));
|
||||
case '^':
|
||||
@ -658,6 +675,8 @@ Node *primary(void)
|
||||
rtok = relex();
|
||||
return (unary(op2(CHAR, NIL, NIL)));
|
||||
case '(':
|
||||
lastatom = starttok;
|
||||
savelastatom = starttok - basestr; /* Retain over recursion */
|
||||
rtok = relex();
|
||||
if (rtok == ')') { /* special pleading for () */
|
||||
rtok = relex();
|
||||
@ -665,6 +684,7 @@ Node *primary(void)
|
||||
}
|
||||
np = regexp();
|
||||
if (rtok == ')') {
|
||||
lastatom = basestr + savelastatom; /* Restore */
|
||||
rtok = relex();
|
||||
return (unary(np));
|
||||
}
|
||||
@ -679,8 +699,12 @@ Node *primary(void)
|
||||
Node *concat(Node *np)
|
||||
{
|
||||
switch (rtok) {
|
||||
case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(':
|
||||
case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
|
||||
return (concat(op2(CAT, np, primary())));
|
||||
case EMPTYRE:
|
||||
rtok = relex();
|
||||
return (concat(op2(CAT, op2(CCL, NIL, (Node *) tostring("")),
|
||||
primary())));
|
||||
}
|
||||
return (np);
|
||||
}
|
||||
@ -749,7 +773,7 @@ struct charclass {
|
||||
{ "alnum", 5, isalnum },
|
||||
{ "alpha", 5, isalpha },
|
||||
#ifndef HAS_ISBLANK
|
||||
{ "blank", 5, isspace }, /* was isblank */
|
||||
{ "blank", 5, xisblank },
|
||||
#else
|
||||
{ "blank", 5, isblank },
|
||||
#endif
|
||||
@ -765,16 +789,132 @@ struct charclass {
|
||||
{ NULL, 0, NULL },
|
||||
};
|
||||
|
||||
#define REPEAT_SIMPLE 0
|
||||
#define REPEAT_PLUS_APPENDED 1
|
||||
#define REPEAT_WITH_Q 2
|
||||
#define REPEAT_ZERO 3
|
||||
|
||||
static int
|
||||
replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom,
|
||||
int atomlen, int firstnum, int secondnum, int special_case)
|
||||
{
|
||||
int i, j;
|
||||
uschar *buf = 0;
|
||||
int ret = 1;
|
||||
int init_q = (firstnum==0); /* first added char will be ? */
|
||||
int n_q_reps = secondnum-firstnum; /* m>n, so reduce until {1,m-n} left */
|
||||
int prefix_length = reptok - basestr; /* prefix includes first rep */
|
||||
int suffix_length = strlen((char *) reptok) - reptoklen; /* string after rep specifier */
|
||||
int size = prefix_length + suffix_length;
|
||||
|
||||
if (firstnum > 1) { /* add room for reps 2 through firstnum */
|
||||
size += atomlen*(firstnum-1);
|
||||
}
|
||||
|
||||
/* Adjust size of buffer for special cases */
|
||||
if (special_case == REPEAT_PLUS_APPENDED) {
|
||||
size++; /* for the final + */
|
||||
} else if (special_case == REPEAT_WITH_Q) {
|
||||
size += init_q + (atomlen+1)* n_q_reps;
|
||||
} else if (special_case == REPEAT_ZERO) {
|
||||
size += 2; /* just a null ERE: () */
|
||||
}
|
||||
if ((buf = (uschar *) malloc(size+1)) == NULL)
|
||||
FATAL("out of space in reg expr %.10s..", lastre);
|
||||
memcpy(buf, basestr, prefix_length); /* copy prefix */
|
||||
j = prefix_length;
|
||||
if (special_case == REPEAT_ZERO) {
|
||||
j -= atomlen;
|
||||
buf[j++] = '(';
|
||||
buf[j++] = ')';
|
||||
}
|
||||
for (i=1; i < firstnum; i++) { /* copy x reps */
|
||||
memcpy(&buf[j], atom, atomlen);
|
||||
j += atomlen;
|
||||
}
|
||||
if (special_case == REPEAT_PLUS_APPENDED) {
|
||||
buf[j++] = '+';
|
||||
} else if (special_case == REPEAT_WITH_Q) {
|
||||
if (init_q) buf[j++] = '?';
|
||||
for (i=0; i < n_q_reps; i++) { /* copy x? reps */
|
||||
memcpy(&buf[j], atom, atomlen);
|
||||
j += atomlen;
|
||||
buf[j++] = '?';
|
||||
}
|
||||
}
|
||||
memcpy(&buf[j], reptok+reptoklen, suffix_length);
|
||||
if (special_case == REPEAT_ZERO) {
|
||||
buf[j+suffix_length] = '\0';
|
||||
} else {
|
||||
buf[size] = '\0';
|
||||
}
|
||||
/* free old basestr */
|
||||
if (firstbasestr != basestr) {
|
||||
if (basestr)
|
||||
xfree(basestr);
|
||||
}
|
||||
basestr = buf;
|
||||
prestr = buf + prefix_length;
|
||||
if (special_case == REPEAT_ZERO) {
|
||||
prestr -= atomlen;
|
||||
ret++;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int repeat(const uschar *reptok, int reptoklen, const uschar *atom,
|
||||
int atomlen, int firstnum, int secondnum)
|
||||
{
|
||||
/*
|
||||
In general, the repetition specifier or "bound" is replaced here
|
||||
by an equivalent ERE string, repeating the immediately previous atom
|
||||
and appending ? and + as needed. Note that the first copy of the
|
||||
atom is left in place, except in the special_case of a zero-repeat
|
||||
(i.e., {0}).
|
||||
*/
|
||||
if (secondnum < 0) { /* means {n,} -> repeat n-1 times followed by PLUS */
|
||||
if (firstnum < 2) {
|
||||
/* 0 or 1: should be handled before you get here */
|
||||
FATAL("internal error");
|
||||
} else {
|
||||
return replace_repeat(reptok, reptoklen, atom, atomlen,
|
||||
firstnum, secondnum, REPEAT_PLUS_APPENDED);
|
||||
}
|
||||
} else if (firstnum == secondnum) { /* {n} or {n,n} -> simply repeat n-1 times */
|
||||
if (firstnum == 0) { /* {0} or {0,0} */
|
||||
/* This case is unusual because the resulting
|
||||
replacement string might actually be SMALLER than
|
||||
the original ERE */
|
||||
return replace_repeat(reptok, reptoklen, atom, atomlen,
|
||||
firstnum, secondnum, REPEAT_ZERO);
|
||||
} else { /* (firstnum >= 1) */
|
||||
return replace_repeat(reptok, reptoklen, atom, atomlen,
|
||||
firstnum, secondnum, REPEAT_SIMPLE);
|
||||
}
|
||||
} else if (firstnum < secondnum) { /* {n,m} -> repeat n-1 times then alternate */
|
||||
/* x{n,m} => xx...x{1, m-n+1} => xx...x?x?x?..x? */
|
||||
return replace_repeat(reptok, reptoklen, atom, atomlen,
|
||||
firstnum, secondnum, REPEAT_WITH_Q);
|
||||
} else { /* Error - shouldn't be here (n>m) */
|
||||
FATAL("internal error");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int relex(void) /* lexical analyzer for reparse */
|
||||
{
|
||||
int c, n;
|
||||
int cflag;
|
||||
static uschar *buf = NULL;
|
||||
static uschar *buf = 0;
|
||||
static int bufsz = 100;
|
||||
uschar *bp;
|
||||
struct charclass *cc;
|
||||
int i;
|
||||
int num, m, commafound, digitfound;
|
||||
const uschar *startreptok;
|
||||
|
||||
rescan:
|
||||
starttok = prestr;
|
||||
|
||||
switch (c = *prestr++) {
|
||||
case '|': return OR;
|
||||
@ -795,7 +935,7 @@ int relex(void) /* lexical analyzer for reparse */
|
||||
rlxval = c;
|
||||
return CHAR;
|
||||
case '[':
|
||||
if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL)
|
||||
if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
|
||||
FATAL("out of space in reg expr %.10s..", lastre);
|
||||
bp = buf;
|
||||
if (*prestr == '^') {
|
||||
@ -823,7 +963,15 @@ int relex(void) /* lexical analyzer for reparse */
|
||||
if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
|
||||
prestr[2 + cc->cc_namelen] == ']') {
|
||||
prestr += cc->cc_namelen + 3;
|
||||
for (i = 0; i < NCHARS; i++) {
|
||||
/*
|
||||
* BUG: We begin at 1, instead of 0, since we
|
||||
* would otherwise prematurely terminate the
|
||||
* string for classes like [[:cntrl:]]. This
|
||||
* means that we can't match the NUL character,
|
||||
* not without first adapting the entire
|
||||
* program to track each string's length.
|
||||
*/
|
||||
for (i = 1; i <= UCHAR_MAX; i++) {
|
||||
if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
|
||||
FATAL("out of space for reg expr %.10s...", lastre);
|
||||
if (cc->cc_func(i)) {
|
||||
@ -833,6 +981,40 @@ int relex(void) /* lexical analyzer for reparse */
|
||||
}
|
||||
} else
|
||||
*bp++ = c;
|
||||
} else if (c == '[' && *prestr == '.') {
|
||||
char collate_char;
|
||||
prestr++;
|
||||
collate_char = *prestr++;
|
||||
if (*prestr == '.' && prestr[1] == ']') {
|
||||
prestr += 2;
|
||||
/* Found it: map via locale TBD: for
|
||||
now, simply return this char. This
|
||||
is sufficient to pass conformance
|
||||
test awk.ex 156
|
||||
*/
|
||||
if (*prestr == ']') {
|
||||
prestr++;
|
||||
rlxval = collate_char;
|
||||
return CHAR;
|
||||
}
|
||||
}
|
||||
} else if (c == '[' && *prestr == '=') {
|
||||
char equiv_char;
|
||||
prestr++;
|
||||
equiv_char = *prestr++;
|
||||
if (*prestr == '=' && prestr[1] == ']') {
|
||||
prestr += 2;
|
||||
/* Found it: map via locale TBD: for now
|
||||
simply return this char. This is
|
||||
sufficient to pass conformance test
|
||||
awk.ex 156
|
||||
*/
|
||||
if (*prestr == ']') {
|
||||
prestr++;
|
||||
rlxval = equiv_char;
|
||||
return CHAR;
|
||||
}
|
||||
}
|
||||
} else if (c == '\0') {
|
||||
FATAL("nonterminated character class %.20s", lastre);
|
||||
} else if (bp == buf) { /* 1st char is special */
|
||||
@ -847,6 +1029,75 @@ int relex(void) /* lexical analyzer for reparse */
|
||||
} else
|
||||
*bp++ = c;
|
||||
}
|
||||
break;
|
||||
case '{':
|
||||
if (isdigit(*(prestr))) {
|
||||
num = 0; /* Process as a repetition */
|
||||
n = -1; m = -1;
|
||||
commafound = 0;
|
||||
digitfound = 0;
|
||||
startreptok = prestr-1;
|
||||
/* Remember start of previous atom here ? */
|
||||
} else { /* just a { char, not a repetition */
|
||||
rlxval = c;
|
||||
return CHAR;
|
||||
}
|
||||
for (; ; ) {
|
||||
if ((c = *prestr++) == '}') {
|
||||
if (commafound) {
|
||||
if (digitfound) { /* {n,m} */
|
||||
m = num;
|
||||
if (m<n)
|
||||
FATAL("illegal repetition expression: class %.20s",
|
||||
lastre);
|
||||
if ((n==0) && (m==1)) {
|
||||
return QUEST;
|
||||
}
|
||||
} else { /* {n,} */
|
||||
if (n==0) return STAR;
|
||||
if (n==1) return PLUS;
|
||||
}
|
||||
} else {
|
||||
if (digitfound) { /* {n} same as {n,n} */
|
||||
n = num;
|
||||
m = num;
|
||||
} else { /* {} */
|
||||
FATAL("illegal repetition expression: class %.20s",
|
||||
lastre);
|
||||
}
|
||||
}
|
||||
if (repeat(starttok, prestr-starttok, lastatom,
|
||||
startreptok - lastatom, n, m) > 0) {
|
||||
if ((n==0) && (m==0)) {
|
||||
return EMPTYRE;
|
||||
}
|
||||
/* must rescan input for next token */
|
||||
goto rescan;
|
||||
}
|
||||
/* Failed to replace: eat up {...} characters
|
||||
and treat like just PLUS */
|
||||
return PLUS;
|
||||
} else if (c == '\0') {
|
||||
FATAL("nonterminated character class %.20s",
|
||||
lastre);
|
||||
} else if (isdigit(c)) {
|
||||
num = 10 * num + c - '0';
|
||||
digitfound = 1;
|
||||
} else if (c == ',') {
|
||||
if (commafound)
|
||||
FATAL("illegal repetition expression: class %.20s",
|
||||
lastre);
|
||||
/* looking for {n,} or {n,m} */
|
||||
commafound = 1;
|
||||
n = num;
|
||||
digitfound = 0; /* reset */
|
||||
num = 0;
|
||||
} else {
|
||||
FATAL("illegal repetition expression: class %.20s",
|
||||
lastre);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -860,7 +1111,7 @@ int cgoto(fa *f, int s, int c)
|
||||
maxsetvec *= 4;
|
||||
setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
|
||||
tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
|
||||
if (setvec == NULL || tmpset == NULL)
|
||||
if (setvec == 0 || tmpset == 0)
|
||||
overflo("out of space in cgoto()");
|
||||
}
|
||||
for (i = 0; i <= f->accept; i++)
|
||||
@ -882,7 +1133,7 @@ int cgoto(fa *f, int s, int c)
|
||||
maxsetvec *= 4;
|
||||
setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
|
||||
tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
|
||||
if (setvec == NULL || tmpset == NULL)
|
||||
if (setvec == 0 || tmpset == 0)
|
||||
overflo("cgoto overflow");
|
||||
}
|
||||
if (setvec[q[j]] == 0) {
|
||||
@ -925,7 +1176,7 @@ int cgoto(fa *f, int s, int c)
|
||||
for (i = 0; i < NCHARS; i++)
|
||||
f->gototab[f->curstat][i] = 0;
|
||||
xfree(f->posns[f->curstat]);
|
||||
if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
|
||||
if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
|
||||
overflo("out of space in cgoto");
|
||||
|
||||
f->posns[f->curstat] = p;
|
||||
|
57
bugs-fixed/README
Normal file
57
bugs-fixed/README
Normal file
@ -0,0 +1,57 @@
|
||||
List of bugs fixed.
|
||||
|
||||
1. ofs-rebuild: OFS value used to rebuild the record was incorrect.
|
||||
Fixed August 19, 2014. Revised fix August 2018.
|
||||
|
||||
2. system-status: Instead of a floating-point division by 256, use
|
||||
the wait(2) macros to create a reasonable exit status. Fixed March 12, 2016.
|
||||
|
||||
3. space: Use provided xisblank() function instead of ispace() for
|
||||
matching [[:blank:]].
|
||||
|
||||
4. a-format: Add POSIX standard %a and %A to supported formats. Check
|
||||
at runtime that this format is available.
|
||||
|
||||
5. decr-NF: Decrementing NF did not change $0. This is a decades-old
|
||||
bug. There are interactions with the old and new value of OFS as well.
|
||||
Most of the fix came from the NetBSD awk.
|
||||
|
||||
6. string-conv: String conversions of scalars were sticky. Once a
|
||||
conversion to string happened, even with OFMT, that value was used until
|
||||
a new numeric value was assigned, even if OFMT differed from CONVFMT,
|
||||
and also if CONVFMT changed.
|
||||
|
||||
7. unary-plus: Unary plus on a string constant returned the string.
|
||||
Instead, it should convert the value to numeric and give that value.
|
||||
|
||||
8. concat-assign-same: Concatenation previously evaluated both sides of the
|
||||
expression before doing its work, which, since assign() evaluates to the cell
|
||||
being assigned to, meant that expressions like "print (a = 1) (a = 2)" would
|
||||
print "22" rather than "12".
|
||||
|
||||
9. missing-precision: When using the format string "%*s", the precision
|
||||
argument was used without checking if it was present first.
|
||||
|
||||
10. missing-precision: When using the format string "%*s", the precision
|
||||
argument was used without checking if it was present first.
|
||||
|
||||
11. fmt-overflow: The buffer used for OFMT/CONVFMT conversions was written
|
||||
to with sprintf(), which meant that some conversions could write past the
|
||||
end.
|
||||
|
||||
12. numeric-subsep, numeric-fs, numeric-output-seps, numerics-rs: If SUBSEP,
|
||||
FS, RS, OFS, or ORS were set to a numeric value, then their string values
|
||||
wouldn't always be generated before being needed.
|
||||
|
||||
13. subsep-overflow: The length of SUBSEP needs to be rechecked after
|
||||
calling execute(), in case SUBSEP itself has been changed.
|
||||
|
||||
14. split-fs-from-array: If the third argument to split() comes from the
|
||||
array passed as the second argument, then split() would previously read
|
||||
from the freed memory and possibly produce incorrect results (depending
|
||||
on the system's malloc()/free() behaviour.)
|
||||
|
||||
15. getline-numeric: The `getline xx < file' syntax did not check if
|
||||
values were numeric, in discordance from POSIX. Test case adapted from
|
||||
one posted by Ben Bacarisse <ben.usenet@bsb.me.uk> in comp.lang.awk,
|
||||
January 2019.
|
3
bugs-fixed/a-format.awk
Normal file
3
bugs-fixed/a-format.awk
Normal file
@ -0,0 +1,3 @@
|
||||
BEGIN {
|
||||
printf("%a\n", 42)
|
||||
}
|
3
bugs-fixed/a-format.bad
Normal file
3
bugs-fixed/a-format.bad
Normal file
@ -0,0 +1,3 @@
|
||||
nawk: weird printf conversion %a
|
||||
source line number 2
|
||||
%a42
|
1
bugs-fixed/a-format.ok
Normal file
1
bugs-fixed/a-format.ok
Normal file
@ -0,0 +1 @@
|
||||
0x1.5p+5
|
4
bugs-fixed/concat-assign-same.awk
Normal file
4
bugs-fixed/concat-assign-same.awk
Normal file
@ -0,0 +1,4 @@
|
||||
BEGIN {
|
||||
print (a = 1) (a = 2) (a = 3) (a = 4) (a = 5);
|
||||
print (a = 1), (a = 2), (a = 3), (a = 4), (a = 5);
|
||||
}
|
2
bugs-fixed/concat-assign-same.bad
Normal file
2
bugs-fixed/concat-assign-same.bad
Normal file
@ -0,0 +1,2 @@
|
||||
22345
|
||||
1 2 3 4 5
|
2
bugs-fixed/concat-assign-same.ok
Normal file
2
bugs-fixed/concat-assign-same.ok
Normal file
@ -0,0 +1,2 @@
|
||||
12345
|
||||
1 2 3 4 5
|
11
bugs-fixed/decr-NF.awk
Normal file
11
bugs-fixed/decr-NF.awk
Normal file
@ -0,0 +1,11 @@
|
||||
BEGIN {
|
||||
$0 = "a b c d e f"
|
||||
print NF
|
||||
OFS = ":"
|
||||
NF--
|
||||
print $0
|
||||
print NF
|
||||
NF++
|
||||
print $0
|
||||
print NF
|
||||
}
|
5
bugs-fixed/decr-NF.bad
Normal file
5
bugs-fixed/decr-NF.bad
Normal file
@ -0,0 +1,5 @@
|
||||
6
|
||||
a b c d e f
|
||||
5
|
||||
a b c d e f
|
||||
6
|
5
bugs-fixed/decr-NF.ok
Normal file
5
bugs-fixed/decr-NF.ok
Normal file
@ -0,0 +1,5 @@
|
||||
6
|
||||
a:b:c:d:e
|
||||
5
|
||||
a:b:c:d:e:
|
||||
6
|
1
bugs-fixed/fmt-overflow.awk
Normal file
1
bugs-fixed/fmt-overflow.awk
Normal file
@ -0,0 +1 @@
|
||||
BEGIN { OFMT = "%.1000f"; print 1.25; }
|
1
bugs-fixed/fmt-overflow.ok
Normal file
1
bugs-fixed/fmt-overflow.ok
Normal file
@ -0,0 +1 @@
|
||||
1.2500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
|
13
bugs-fixed/fs-overflow.awk
Normal file
13
bugs-fixed/fs-overflow.awk
Normal file
@ -0,0 +1,13 @@
|
||||
function foo() {
|
||||
a = "";
|
||||
for (i = 0; i < 10000; i++) {
|
||||
a = a "c";
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
BEGIN {
|
||||
FS = foo();
|
||||
$0="foo";
|
||||
print $1;
|
||||
}
|
6
bugs-fixed/getline-numeric.awk
Normal file
6
bugs-fixed/getline-numeric.awk
Normal file
@ -0,0 +1,6 @@
|
||||
{
|
||||
print $0, ($0 <= 50 ? "<=" : ">"), 50
|
||||
getline dd < ARGV[1]
|
||||
print dd, (dd <= 50 ? "<=" : ">"), 50
|
||||
if (dd == $0) print "same"
|
||||
}
|
3
bugs-fixed/getline-numeric.bad
Normal file
3
bugs-fixed/getline-numeric.bad
Normal file
@ -0,0 +1,3 @@
|
||||
120 > 50
|
||||
120 <= 50
|
||||
same
|
1
bugs-fixed/getline-numeric.in
Normal file
1
bugs-fixed/getline-numeric.in
Normal file
@ -0,0 +1 @@
|
||||
120
|
3
bugs-fixed/getline-numeric.ok
Normal file
3
bugs-fixed/getline-numeric.ok
Normal file
@ -0,0 +1,3 @@
|
||||
120 > 50
|
||||
120 > 50
|
||||
same
|
1
bugs-fixed/missing-precision.awk
Normal file
1
bugs-fixed/missing-precision.awk
Normal file
@ -0,0 +1 @@
|
||||
BEGIN { printf("%*s"); }
|
2
bugs-fixed/missing-precision.ok
Normal file
2
bugs-fixed/missing-precision.ok
Normal file
@ -0,0 +1,2 @@
|
||||
./a.out: not enough args in printf(%*s)
|
||||
source line number 1
|
1
bugs-fixed/negative-nf.awk
Normal file
1
bugs-fixed/negative-nf.awk
Normal file
@ -0,0 +1 @@
|
||||
BEGIN { NF = -5; }
|
2
bugs-fixed/negative-nf.ok
Normal file
2
bugs-fixed/negative-nf.ok
Normal file
@ -0,0 +1,2 @@
|
||||
./a.out: cannot set NF to a negative value
|
||||
source line number 1
|
6
bugs-fixed/nf-self-assign.awk
Normal file
6
bugs-fixed/nf-self-assign.awk
Normal file
@ -0,0 +1,6 @@
|
||||
BEGIN {
|
||||
$0="a b c";
|
||||
OFS=",";
|
||||
NF = NF;
|
||||
print;
|
||||
}
|
1
bugs-fixed/nf-self-assign.bad
Normal file
1
bugs-fixed/nf-self-assign.bad
Normal file
@ -0,0 +1 @@
|
||||
a b c
|
1
bugs-fixed/nf-self-assign.ok
Normal file
1
bugs-fixed/nf-self-assign.ok
Normal file
@ -0,0 +1 @@
|
||||
a,b,c
|
5
bugs-fixed/numeric-fs.awk
Normal file
5
bugs-fixed/numeric-fs.awk
Normal file
@ -0,0 +1,5 @@
|
||||
BEGIN {
|
||||
FS = 0; split("20202", a); print a[1];
|
||||
FS = 1; $0="31313"; print $1;
|
||||
FS = 2; "echo 42424" | getline; print $1;
|
||||
}
|
3
bugs-fixed/numeric-fs.ok
Normal file
3
bugs-fixed/numeric-fs.ok
Normal file
@ -0,0 +1,3 @@
|
||||
2
|
||||
3
|
||||
4
|
8
bugs-fixed/numeric-output-seps.awk
Normal file
8
bugs-fixed/numeric-output-seps.awk
Normal file
@ -0,0 +1,8 @@
|
||||
BEGIN {
|
||||
$0 = "a b c";
|
||||
OFS = 1;
|
||||
ORS = 2;
|
||||
NF = 2;
|
||||
print;
|
||||
print "d", "e";
|
||||
}
|
2
bugs-fixed/numeric-output-seps.bad
Normal file
2
bugs-fixed/numeric-output-seps.bad
Normal file
@ -0,0 +1,2 @@
|
||||
a b
|
||||
d e
|
1
bugs-fixed/numeric-output-seps.ok
Normal file
1
bugs-fixed/numeric-output-seps.ok
Normal file
@ -0,0 +1 @@
|
||||
a1b2d1e2
|
6
bugs-fixed/numeric-rs.awk
Normal file
6
bugs-fixed/numeric-rs.awk
Normal file
@ -0,0 +1,6 @@
|
||||
BEGIN {
|
||||
RS = 1;
|
||||
while ("echo a1b1c1d" | getline > 0) {
|
||||
print $1;
|
||||
}
|
||||
}
|
1
bugs-fixed/numeric-rs.bad
Normal file
1
bugs-fixed/numeric-rs.bad
Normal file
@ -0,0 +1 @@
|
||||
a1b1c1d
|
4
bugs-fixed/numeric-rs.ok
Normal file
4
bugs-fixed/numeric-rs.ok
Normal file
@ -0,0 +1,4 @@
|
||||
a
|
||||
b
|
||||
c
|
||||
d
|
5
bugs-fixed/numeric-subsep.awk
Normal file
5
bugs-fixed/numeric-subsep.awk
Normal file
@ -0,0 +1,5 @@
|
||||
BEGIN {
|
||||
SUBSEP = 123.456;
|
||||
a["hello", "world"] = "foo";
|
||||
print a["hello" SUBSEP "world"];
|
||||
}
|
1
bugs-fixed/numeric-subsep.bad
Normal file
1
bugs-fixed/numeric-subsep.bad
Normal file
@ -0,0 +1 @@
|
||||
|
1
bugs-fixed/numeric-subsep.ok
Normal file
1
bugs-fixed/numeric-subsep.ok
Normal file
@ -0,0 +1 @@
|
||||
foo
|
17
bugs-fixed/ofs-rebuild.awk
Normal file
17
bugs-fixed/ofs-rebuild.awk
Normal file
@ -0,0 +1,17 @@
|
||||
# The bug here is that nawk should use the value of OFS that
|
||||
# was current when $0 became invalid to rebuild the record.
|
||||
|
||||
BEGIN {
|
||||
OFS = ":"
|
||||
$0 = "a b c d e f g"
|
||||
$3 = "3333"
|
||||
# Conceptually, $0 should now be "a:b:3333:d:e:f:g"
|
||||
|
||||
# Change OFS after (conceptually) rebuilding the record
|
||||
OFS = "<>"
|
||||
|
||||
# Unmodifed nawk prints "a<>b<>3333<>d<>e<>f<>g" because
|
||||
# it delays rebuilding $0 until it's needed, and then it uses
|
||||
# the current value of OFS. Oops.
|
||||
print
|
||||
}
|
1
bugs-fixed/ofs-rebuild.bad
Normal file
1
bugs-fixed/ofs-rebuild.bad
Normal file
@ -0,0 +1 @@
|
||||
a<>b<>3333<>d<>e<>f<>g
|
1
bugs-fixed/ofs-rebuild.ok
Normal file
1
bugs-fixed/ofs-rebuild.ok
Normal file
@ -0,0 +1 @@
|
||||
a:b:3333:d:e:f:g
|
22
bugs-fixed/space.awk
Normal file
22
bugs-fixed/space.awk
Normal file
@ -0,0 +1,22 @@
|
||||
BEGIN {
|
||||
c[" "] = "\" \""
|
||||
c["\a"] = "\\a"
|
||||
c["\b"] = "\\b"
|
||||
c["\f"] = "\\f"
|
||||
c["\n"] = "\\n"
|
||||
c["\r"] = "\\r"
|
||||
c["\t"] = "\\t"
|
||||
c["\v"] = "\\v"
|
||||
|
||||
sort = "LC_ALL=C sort"
|
||||
|
||||
for (i in c)
|
||||
printf("%s %s [[:space:]]\n", c[i],
|
||||
i ~ /[[:space:]]/ ? "~" : "!~") | sort
|
||||
|
||||
for (i in c)
|
||||
printf("%s %s [[:blank:]]\n", c[i],
|
||||
i ~ /[[:blank:]]/ ? "~" : "!~") | sort
|
||||
|
||||
close(sort)
|
||||
}
|
16
bugs-fixed/space.bad
Normal file
16
bugs-fixed/space.bad
Normal file
@ -0,0 +1,16 @@
|
||||
" " ~ [[:blank:]]
|
||||
" " ~ [[:space:]]
|
||||
\a !~ [[:blank:]]
|
||||
\a !~ [[:space:]]
|
||||
\b !~ [[:blank:]]
|
||||
\b !~ [[:space:]]
|
||||
\f ~ [[:blank:]]
|
||||
\f ~ [[:space:]]
|
||||
\n ~ [[:blank:]]
|
||||
\n ~ [[:space:]]
|
||||
\r ~ [[:blank:]]
|
||||
\r ~ [[:space:]]
|
||||
\t ~ [[:blank:]]
|
||||
\t ~ [[:space:]]
|
||||
\v ~ [[:blank:]]
|
||||
\v ~ [[:space:]]
|
16
bugs-fixed/space.ok
Normal file
16
bugs-fixed/space.ok
Normal file
@ -0,0 +1,16 @@
|
||||
" " ~ [[:blank:]]
|
||||
" " ~ [[:space:]]
|
||||
\a !~ [[:blank:]]
|
||||
\a !~ [[:space:]]
|
||||
\b !~ [[:blank:]]
|
||||
\b !~ [[:space:]]
|
||||
\f !~ [[:blank:]]
|
||||
\f ~ [[:space:]]
|
||||
\n !~ [[:blank:]]
|
||||
\n ~ [[:space:]]
|
||||
\r !~ [[:blank:]]
|
||||
\r ~ [[:space:]]
|
||||
\t ~ [[:blank:]]
|
||||
\t ~ [[:space:]]
|
||||
\v !~ [[:blank:]]
|
||||
\v ~ [[:space:]]
|
5
bugs-fixed/split-fs-from-array.awk
Normal file
5
bugs-fixed/split-fs-from-array.awk
Normal file
@ -0,0 +1,5 @@
|
||||
BEGIN {
|
||||
a[1] = "elephantie"
|
||||
a[2] = "e"
|
||||
print split(a[1],a,a[2]), a[2], a[3], split(a[2],a,a[2])
|
||||
}
|
1
bugs-fixed/split-fs-from-array.ok
Normal file
1
bugs-fixed/split-fs-from-array.ok
Normal file
@ -0,0 +1 @@
|
||||
4 l phanti 2
|
13
bugs-fixed/string-conv.awk
Normal file
13
bugs-fixed/string-conv.awk
Normal file
@ -0,0 +1,13 @@
|
||||
BEGIN {
|
||||
OFMT = ">>%.6g<<"
|
||||
a = 12.1234
|
||||
print "a =", a
|
||||
b = a ""
|
||||
print "1 ->", b
|
||||
CONVFMT = "%2.2f"
|
||||
b = a ""
|
||||
print "2 ->", b
|
||||
CONVFMT = "%.12g"
|
||||
b = a ""
|
||||
print "3 ->", b
|
||||
}
|
4
bugs-fixed/string-conv.bad
Normal file
4
bugs-fixed/string-conv.bad
Normal file
@ -0,0 +1,4 @@
|
||||
a = >>12.1234<<
|
||||
1 -> >>12.1234<<
|
||||
2 -> >>12.1234<<
|
||||
3 -> >>12.1234<<
|
4
bugs-fixed/string-conv.ok
Normal file
4
bugs-fixed/string-conv.ok
Normal file
@ -0,0 +1,4 @@
|
||||
a = >>12.1234<<
|
||||
1 -> 12.1234
|
||||
2 -> 12.12
|
||||
3 -> 12.1234
|
24
bugs-fixed/subsep-overflow.awk
Normal file
24
bugs-fixed/subsep-overflow.awk
Normal file
@ -0,0 +1,24 @@
|
||||
function foo(c, n) {
|
||||
s = "";
|
||||
for (i = 0; i < n; i++) {
|
||||
s = s c;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
BEGIN {
|
||||
str1 = foo("a", 4500);
|
||||
str2 = foo("b", 9000);
|
||||
|
||||
a[(SUBSEP = str1), (SUBSEP = str2), "c"] = 1;
|
||||
|
||||
for (k in a) {
|
||||
print length(k);
|
||||
}
|
||||
|
||||
print (((SUBSEP = str1), (SUBSEP = str2), "c") in a);
|
||||
print (((SUBSEP = str1) SUBSEP (SUBSEP = str2) SUBSEP "c") in a);
|
||||
delete a[(SUBSEP = str1), (SUBSEP = str2), "c"];
|
||||
print (((SUBSEP = str1), (SUBSEP = str2), "c") in a);
|
||||
print (((SUBSEP = str1) SUBSEP (SUBSEP = str2) SUBSEP "c") in a);
|
||||
}
|
5
bugs-fixed/subsep-overflow.ok
Normal file
5
bugs-fixed/subsep-overflow.ok
Normal file
@ -0,0 +1,5 @@
|
||||
27001
|
||||
1
|
||||
1
|
||||
0
|
||||
0
|
19
bugs-fixed/system-status.awk
Normal file
19
bugs-fixed/system-status.awk
Normal file
@ -0,0 +1,19 @@
|
||||
# Unmodified nawk prints the 16 bit exit status divided by 256, but
|
||||
# does so using floating point arithmetic, yielding strange results.
|
||||
#
|
||||
# The fix is to use the various macros defined for wait(2) and to
|
||||
# use the signal number + 256 for death by signal, or signal number + 512
|
||||
# for death by signal with core dump.
|
||||
|
||||
BEGIN {
|
||||
status = system("exit 42")
|
||||
print "normal status", status
|
||||
|
||||
status = system("kill -HUP $$")
|
||||
print "death by signal status", status
|
||||
|
||||
status = system("kill -ABRT $$")
|
||||
print "death by signal with core dump status", status
|
||||
|
||||
system("rm -f core*")
|
||||
}
|
3
bugs-fixed/system-status.bad
Normal file
3
bugs-fixed/system-status.bad
Normal file
@ -0,0 +1,3 @@
|
||||
normal status 42
|
||||
death by signal status 0.00390625
|
||||
death by signal with core dump status 0.523438
|
3
bugs-fixed/system-status.ok
Normal file
3
bugs-fixed/system-status.ok
Normal file
@ -0,0 +1,3 @@
|
||||
normal status 42
|
||||
death by signal status 257
|
||||
death by signal with core dump status 518
|
4
bugs-fixed/unary-plus.awk
Normal file
4
bugs-fixed/unary-plus.awk
Normal file
@ -0,0 +1,4 @@
|
||||
BEGIN {
|
||||
print +"q"
|
||||
print +"43.12345678912345678"
|
||||
}
|
2
bugs-fixed/unary-plus.bad
Normal file
2
bugs-fixed/unary-plus.bad
Normal file
@ -0,0 +1,2 @@
|
||||
q
|
||||
43.12345678912345678
|
2
bugs-fixed/unary-plus.ok
Normal file
2
bugs-fixed/unary-plus.ok
Normal file
@ -0,0 +1,2 @@
|
||||
0
|
||||
43.1235
|
32
lex.c
32
lex.c
@ -170,10 +170,10 @@ int reg = 0; /* 1 => return a REGEXPR now */
|
||||
int yylex(void)
|
||||
{
|
||||
int c;
|
||||
static char *buf = NULL;
|
||||
static char *buf = 0;
|
||||
static int bufsize = 5; /* BUG: setting this small causes core dump! */
|
||||
|
||||
if (buf == NULL && (buf = (char *) malloc(bufsize)) == NULL)
|
||||
if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL)
|
||||
FATAL( "out of space in yylex" );
|
||||
if (sc) {
|
||||
sc = 0;
|
||||
@ -198,6 +198,7 @@ int yylex(void)
|
||||
yylval.i = c;
|
||||
switch (c) {
|
||||
case '\n': /* {EOL} */
|
||||
lineno++;
|
||||
RET(NL);
|
||||
case '\r': /* assume \n is coming */
|
||||
case ' ': /* {WS}+ */
|
||||
@ -213,6 +214,7 @@ int yylex(void)
|
||||
case '\\':
|
||||
if (peek() == '\n') {
|
||||
input();
|
||||
lineno++;
|
||||
} else if (peek() == '\r') {
|
||||
input(); input(); /* \n */
|
||||
lineno++;
|
||||
@ -358,10 +360,10 @@ int string(void)
|
||||
{
|
||||
int c, n;
|
||||
char *s, *bp;
|
||||
static char *buf = NULL;
|
||||
static char *buf = 0;
|
||||
static int bufsz = 500;
|
||||
|
||||
if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
|
||||
if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
|
||||
FATAL("out of space for strings");
|
||||
for (bp = buf; (c = input()) != '"'; ) {
|
||||
if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
|
||||
@ -370,10 +372,11 @@ int string(void)
|
||||
case '\n':
|
||||
case '\r':
|
||||
case 0:
|
||||
*bp = '\0';
|
||||
SYNTAX( "non-terminated string %.10s...", buf );
|
||||
lineno++;
|
||||
if (c == 0) /* hopeless */
|
||||
FATAL( "giving up" );
|
||||
lineno++;
|
||||
break;
|
||||
case '\\':
|
||||
c = input();
|
||||
@ -504,17 +507,18 @@ void startreg(void) /* next call to yylex will return a regular expression */
|
||||
int regexpr(void)
|
||||
{
|
||||
int c;
|
||||
static char *buf = NULL;
|
||||
static char *buf = 0;
|
||||
static int bufsz = 500;
|
||||
char *bp;
|
||||
|
||||
if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
|
||||
if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
|
||||
FATAL("out of space for rex expr");
|
||||
bp = buf;
|
||||
for ( ; (c = input()) != '/' && c != 0; ) {
|
||||
if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
|
||||
FATAL("out of space for reg expr %.10s...", buf);
|
||||
if (c == '\n') {
|
||||
*bp = '\0';
|
||||
SYNTAX( "newline in regular expression %.10s...", buf );
|
||||
unput('\n');
|
||||
break;
|
||||
@ -539,7 +543,7 @@ char ebuf[300];
|
||||
char *ep = ebuf;
|
||||
char yysbuf[100]; /* pushback buffer */
|
||||
char *yysptr = yysbuf;
|
||||
FILE *yyin = NULL;
|
||||
FILE *yyin = 0;
|
||||
|
||||
int input(void) /* get next lexical input character */
|
||||
{
|
||||
@ -553,19 +557,19 @@ int input(void) /* get next lexical input character */
|
||||
lexprog++;
|
||||
} else /* awk -f ... */
|
||||
c = pgetc();
|
||||
if (c == '\n')
|
||||
lineno++;
|
||||
else if (c == EOF)
|
||||
if (c == EOF)
|
||||
c = 0;
|
||||
if (ep >= ebuf + sizeof ebuf)
|
||||
ep = ebuf;
|
||||
return *ep++ = c;
|
||||
*ep = c;
|
||||
if (c != 0) {
|
||||
ep++;
|
||||
}
|
||||
return (c);
|
||||
}
|
||||
|
||||
void unput(int c) /* put lexical character back on input */
|
||||
{
|
||||
if (c == '\n')
|
||||
lineno--;
|
||||
if (yysptr >= yysbuf + sizeof(yysbuf))
|
||||
FATAL("pushed back too much: %.20s...", yysbuf);
|
||||
*yysptr++ = c;
|
||||
|
36
lib.c
36
lib.c
@ -59,7 +59,7 @@ void recinit(unsigned int n)
|
||||
{
|
||||
if ( (record = (char *) malloc(n)) == NULL
|
||||
|| (fields = (char *) malloc(n+1)) == NULL
|
||||
|| (fldtab = (Cell **) malloc((nfields+1) * sizeof(Cell *))) == NULL
|
||||
|| (fldtab = (Cell **) malloc((nfields+2) * sizeof(Cell *))) == NULL
|
||||
|| (fldtab[0] = (Cell *) malloc(sizeof(Cell))) == NULL )
|
||||
FATAL("out of space for $0 and fields");
|
||||
*fldtab[0] = dollar0;
|
||||
@ -189,12 +189,13 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf *
|
||||
int sep, c;
|
||||
char *rr, *buf = *pbuf;
|
||||
int bufsize = *pbufsize;
|
||||
char *rs = getsval(rsloc);
|
||||
|
||||
if (strlen(*FS) >= sizeof(inputFS))
|
||||
if (strlen(getsval(fsloc)) >= sizeof (inputFS))
|
||||
FATAL("field separator %.10s... is too long", *FS);
|
||||
/*fflush(stdout); avoids some buffering problem but makes it 25% slower*/
|
||||
strcpy(inputFS, *FS); /* for subsequent field splitting */
|
||||
if ((sep = **RS) == 0) {
|
||||
if ((sep = *rs) == 0) {
|
||||
sep = '\n';
|
||||
while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */
|
||||
;
|
||||
@ -208,7 +209,7 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf *
|
||||
FATAL("input record `%.30s...' too long", buf);
|
||||
*rr++ = c;
|
||||
}
|
||||
if (**RS == sep || c == EOF)
|
||||
if (*rs == sep || c == EOF)
|
||||
break;
|
||||
if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
|
||||
break;
|
||||
@ -283,6 +284,8 @@ void fldbld(void) /* create fields from current record */
|
||||
}
|
||||
fr = fields;
|
||||
i = 0; /* number of fields accumulated here */
|
||||
if (strlen(getsval(fsloc)) >= sizeof (inputFS))
|
||||
FATAL("field separator %.10s... is too long", *FS);
|
||||
strcpy(inputFS, *FS);
|
||||
if (strlen(inputFS) > 1) { /* it's a regular expression */
|
||||
i = refldbld(r, inputFS);
|
||||
@ -356,6 +359,7 @@ void fldbld(void) /* create fields from current record */
|
||||
}
|
||||
}
|
||||
setfval(nfloc, (Awkfloat) lastfld);
|
||||
donerec = 1; /* restore */
|
||||
if (dbg) {
|
||||
for (j = 0; j <= lastfld; j++) {
|
||||
p = fldtab[j];
|
||||
@ -387,6 +391,21 @@ void newfld(int n) /* add field n after end of existing lastfld */
|
||||
setfval(nfloc, (Awkfloat) n);
|
||||
}
|
||||
|
||||
void setlastfld(int n) /* set lastfld cleaning fldtab cells if necessary */
|
||||
{
|
||||
if (n < 0)
|
||||
FATAL("cannot set NF to a negative value");
|
||||
if (n > nfields)
|
||||
growfldtab(n);
|
||||
|
||||
if (lastfld < n)
|
||||
cleanfld(lastfld+1, n);
|
||||
else
|
||||
cleanfld(n+1, lastfld);
|
||||
|
||||
lastfld = n;
|
||||
}
|
||||
|
||||
Cell *fieldadr(int n) /* get nth field */
|
||||
{
|
||||
if (n < 0)
|
||||
@ -465,6 +484,7 @@ void recbld(void) /* create $0 from $1..$NF if necessary */
|
||||
{
|
||||
int i;
|
||||
char *r, *p;
|
||||
char *sep = getsval(ofsloc);
|
||||
|
||||
if (donerec == 1)
|
||||
return;
|
||||
@ -476,9 +496,9 @@ void recbld(void) /* create $0 from $1..$NF if necessary */
|
||||
while ((*r = *p++) != 0)
|
||||
r++;
|
||||
if (i < *NF) {
|
||||
if (!adjbuf(&record, &recsize, 2+strlen(*OFS)+r-record, recsize, &r, "recbld 2"))
|
||||
if (!adjbuf(&record, &recsize, 2+strlen(sep)+r-record, recsize, &r, "recbld 2"))
|
||||
FATAL("created $0 `%.30s...' too long", record);
|
||||
for (p = *OFS; (*r = *p++) != 0; )
|
||||
for (p = sep; (*r = *p++) != 0; )
|
||||
r++;
|
||||
}
|
||||
}
|
||||
@ -618,6 +638,8 @@ void eprint(void) /* try to print context around error */
|
||||
|
||||
if (compile_time == 2 || compile_time == 0 || been_here++ > 0)
|
||||
return;
|
||||
if (ebuf == ep)
|
||||
return;
|
||||
p = ep - 1;
|
||||
if (p > ebuf && *p == '\n')
|
||||
p--;
|
||||
@ -681,7 +703,7 @@ int isclvar(const char *s) /* is s of form var=something ? */
|
||||
for ( ; *s; s++)
|
||||
if (!(isalnum((uschar) *s) || *s == '_'))
|
||||
break;
|
||||
return *s == '=' && s > os && *(s+1) != '=';
|
||||
return *s == '=' && s > os;
|
||||
}
|
||||
|
||||
/* strtod is supposed to be a proper test of what's a valid number */
|
||||
|
12
main.c
12
main.c
@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
|
||||
THIS SOFTWARE.
|
||||
****************************************************************/
|
||||
|
||||
const char *version = "version 20121220";
|
||||
const char *version = "version 20190529";
|
||||
|
||||
#define DEBUG
|
||||
#include <stdio.h>
|
||||
@ -54,6 +54,13 @@ int curpfile = 0; /* current filename */
|
||||
|
||||
int safe = 0; /* 1 => "safe" mode */
|
||||
|
||||
/* Can this work with recursive calls? I don't think so.
|
||||
void segvcatch(int n)
|
||||
{
|
||||
FATAL("segfault. Do you have an unbounded recursive call?", n);
|
||||
}
|
||||
*/
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
const char *fs = NULL;
|
||||
@ -68,6 +75,7 @@ int main(int argc, char *argv[])
|
||||
exit(1);
|
||||
}
|
||||
signal(SIGFPE, fpecatch);
|
||||
/*signal(SIGSEGV, segvcatch); experiment */
|
||||
|
||||
srand_seed = 1;
|
||||
srand(srand_seed);
|
||||
@ -80,7 +88,7 @@ int main(int argc, char *argv[])
|
||||
exit(0);
|
||||
break;
|
||||
}
|
||||
if (strncmp(argv[1], "--", 2) == 0) { /* explicit end of args */
|
||||
if (strcmp(argv[1], "--") == 0) { /* explicit end of args */
|
||||
argc--;
|
||||
argv++;
|
||||
break;
|
||||
|
53
makefile
53
makefile
@ -23,18 +23,21 @@
|
||||
# ****************************************************************/
|
||||
|
||||
CFLAGS = -g
|
||||
CFLAGS = -O2
|
||||
CFLAGS =
|
||||
CFLAGS = -O2
|
||||
|
||||
CC = gcc -Wall -g -Wwrite-strings
|
||||
CC = gcc -fprofile-arcs -ftest-coverage # then gcov f1.c; cat f1.c.gcov
|
||||
CC = gcc -g -Wall -pedantic
|
||||
CC = gcc -O4 -Wall -pedantic -fno-strict-aliasing
|
||||
# compiler options
|
||||
#CC = gcc -Wall -g -Wwrite-strings
|
||||
#CC = gcc -O4 -Wall -pedantic -fno-strict-aliasing
|
||||
#CC = gcc -fprofile-arcs -ftest-coverage # then gcov f1.c; cat f1.c.gcov
|
||||
HOSTCC = gcc -g -Wall -pedantic
|
||||
CC = $(HOSTCC) # change this is cross-compiling.
|
||||
|
||||
YACC = bison -d -y
|
||||
YACC = yacc -d -S
|
||||
# yacc options. pick one; this varies a lot by system.
|
||||
#YFLAGS = -d -S
|
||||
# -S uses sprintf in yacc parser instead of sprint
|
||||
YACC = bison -d -y
|
||||
#YACC = yacc -d
|
||||
# -S uses sprintf in yacc parser instead of sprint
|
||||
|
||||
OFILES = b.o main.o parse.o proctab.o tran.o lib.o run.o lex.o
|
||||
|
||||
@ -44,7 +47,7 @@ SOURCE = awk.h ytab.c ytab.h proto.h awkgram.y lex.c b.c main.c \
|
||||
LISTING = awk.h proto.h awkgram.y lex.c b.c main.c maketab.c parse.c \
|
||||
lib.c run.c tran.c
|
||||
|
||||
SHIP = README FIXES $(SOURCE) ytab[ch].bak makefile \
|
||||
SHIP = README LICENSE FIXES $(SOURCE) ytab[ch].bak makefile \
|
||||
awk.1
|
||||
|
||||
a.out: ytab.o $(OFILES)
|
||||
@ -52,17 +55,23 @@ a.out: ytab.o $(OFILES)
|
||||
|
||||
$(OFILES): awk.h ytab.h proto.h
|
||||
|
||||
ytab.o: awk.h proto.h awkgram.y
|
||||
#Clear dependency for parallel build: (make -j)
|
||||
#YACC generated y.tab.c and y.tab.h at the same time
|
||||
#this needs to be a static pattern rules otherwise multiple target
|
||||
#are mapped onto multiple executions of yacc, which overwrite
|
||||
#each others outputs.
|
||||
y%.c y%.h: awk.h proto.h awkgram.y
|
||||
$(YACC) $(YFLAGS) awkgram.y
|
||||
mv y.tab.c ytab.c
|
||||
mv y.tab.h ytab.h
|
||||
$(CC) $(CFLAGS) -c ytab.c
|
||||
mv y.$*.c y$*.c
|
||||
mv y.$*.h y$*.h
|
||||
|
||||
ytab.h: ytab.c
|
||||
|
||||
proctab.c: maketab
|
||||
./maketab >proctab.c
|
||||
./maketab ytab.h >proctab.c
|
||||
|
||||
maketab: ytab.h maketab.c
|
||||
$(CC) $(CFLAGS) maketab.c -o maketab
|
||||
$(HOSTCC) $(CFLAGS) maketab.c -o maketab
|
||||
|
||||
bundle:
|
||||
@cp ytab.h ytabh.bak
|
||||
@ -79,8 +88,22 @@ tar:
|
||||
@zip awk.zip $(SHIP)
|
||||
ls -l awk.zip
|
||||
|
||||
gitadd:
|
||||
git add README LICENSE FIXES \
|
||||
awk.h proto.h awkgram.y lex.c b.c main.c maketab.c parse.c \
|
||||
lib.c run.c tran.c \
|
||||
makefile awk.1 awktest.tar
|
||||
|
||||
gitpush:
|
||||
# only do this once:
|
||||
# git remote add origin https://github.com/onetrueawk/awk.git
|
||||
git push -u origin master
|
||||
|
||||
names:
|
||||
@echo $(LISTING)
|
||||
|
||||
clean:
|
||||
rm -f a.out *.o *.obj maketab maketab.exe *.bb *.bbg *.da *.gcov *.gcno *.gcda # proctab.c
|
||||
|
||||
cleaner:
|
||||
rm -f a.out *.o *.obj maketab maketab.exe *.bb *.bbg *.da *.gcov *.gcno *.gcda proctab.c ytab*
|
||||
|
13
maketab.c
13
maketab.c
@ -62,6 +62,7 @@ struct xx
|
||||
{ DIVIDE, "arith", " / " },
|
||||
{ MOD, "arith", " % " },
|
||||
{ UMINUS, "arith", " -" },
|
||||
{ UPLUS, "arith", " +" },
|
||||
{ POWER, "arith", " **" },
|
||||
{ PREINCR, "incrdecr", "++" },
|
||||
{ POSTINCR, "incrdecr", "++" },
|
||||
@ -124,8 +125,12 @@ int main(int argc, char *argv[])
|
||||
for (i = SIZE; --i >= 0; )
|
||||
names[i] = "";
|
||||
|
||||
if ((fp = fopen("ytab.h", "r")) == NULL) {
|
||||
fprintf(stderr, "maketab can't open ytab.h!\n");
|
||||
if (argc != 2) {
|
||||
fprintf(stderr, "usage: maketab YTAB_H\n");
|
||||
exit(1);
|
||||
}
|
||||
if ((fp = fopen(argv[1], "r")) == NULL) {
|
||||
fprintf(stderr, "maketab can't open %s!\n", argv[1]);
|
||||
exit(1);
|
||||
}
|
||||
printf("static char *printname[%d] = {\n", SIZE);
|
||||
@ -134,6 +139,8 @@ int main(int argc, char *argv[])
|
||||
n = sscanf(buf, "%1c %s %s %d", &c, def, name, &tok);
|
||||
if (c != '#' || (n != 4 && strcmp(def,"define") != 0)) /* not a valid #define */
|
||||
continue;
|
||||
if (strcmp(name, "YYSTYPE_IS_DECLARED") == 0)
|
||||
continue;
|
||||
if (tok < FIRSTTOKEN || tok > LASTTOKEN) {
|
||||
/* fprintf(stderr, "maketab funny token %d %s ignored\n", tok, buf); */
|
||||
continue;
|
||||
@ -149,7 +156,7 @@ int main(int argc, char *argv[])
|
||||
table[p->token-FIRSTTOKEN] = p->name;
|
||||
printf("\nCell *(*proctab[%d])(Node **, int) = {\n", SIZE);
|
||||
for (i=0; i<SIZE; i++)
|
||||
if (table[i]==NULL)
|
||||
if (table[i]==0)
|
||||
printf("\tnullproc,\t/* %s */\n", names[i]);
|
||||
else
|
||||
printf("\t%s,\t/* %s */\n", table[i], names[i]);
|
||||
|
2
parse.c
2
parse.c
@ -259,7 +259,7 @@ int isarg(const char *s) /* is s in argument list for current function? */
|
||||
Node *p = arglist;
|
||||
int n;
|
||||
|
||||
for (n = 0; p != NULL; p = p->nnext, n++)
|
||||
for (n = 0; p != 0; p = p->nnext, n++)
|
||||
if (strcmp(((Cell *)(p->narg[0]))->nval, s) == 0)
|
||||
return n;
|
||||
return -1;
|
||||
|
209
proctab.c
Normal file
209
proctab.c
Normal file
@ -0,0 +1,209 @@
|
||||
#include <stdio.h>
|
||||
#include "awk.h"
|
||||
#include "ytab.h"
|
||||
|
||||
static char *printname[94] = {
|
||||
(char *) "FIRSTTOKEN", /* 258 */
|
||||
(char *) "PROGRAM", /* 259 */
|
||||
(char *) "PASTAT", /* 260 */
|
||||
(char *) "PASTAT2", /* 261 */
|
||||
(char *) "XBEGIN", /* 262 */
|
||||
(char *) "XEND", /* 263 */
|
||||
(char *) "NL", /* 264 */
|
||||
(char *) "ARRAY", /* 265 */
|
||||
(char *) "MATCH", /* 266 */
|
||||
(char *) "NOTMATCH", /* 267 */
|
||||
(char *) "MATCHOP", /* 268 */
|
||||
(char *) "FINAL", /* 269 */
|
||||
(char *) "DOT", /* 270 */
|
||||
(char *) "ALL", /* 271 */
|
||||
(char *) "CCL", /* 272 */
|
||||
(char *) "NCCL", /* 273 */
|
||||
(char *) "CHAR", /* 274 */
|
||||
(char *) "OR", /* 275 */
|
||||
(char *) "STAR", /* 276 */
|
||||
(char *) "QUEST", /* 277 */
|
||||
(char *) "PLUS", /* 278 */
|
||||
(char *) "EMPTYRE", /* 279 */
|
||||
(char *) "AND", /* 280 */
|
||||
(char *) "BOR", /* 281 */
|
||||
(char *) "APPEND", /* 282 */
|
||||
(char *) "EQ", /* 283 */
|
||||
(char *) "GE", /* 284 */
|
||||
(char *) "GT", /* 285 */
|
||||
(char *) "LE", /* 286 */
|
||||
(char *) "LT", /* 287 */
|
||||
(char *) "NE", /* 288 */
|
||||
(char *) "IN", /* 289 */
|
||||
(char *) "ARG", /* 290 */
|
||||
(char *) "BLTIN", /* 291 */
|
||||
(char *) "BREAK", /* 292 */
|
||||
(char *) "CLOSE", /* 293 */
|
||||
(char *) "CONTINUE", /* 294 */
|
||||
(char *) "DELETE", /* 295 */
|
||||
(char *) "DO", /* 296 */
|
||||
(char *) "EXIT", /* 297 */
|
||||
(char *) "FOR", /* 298 */
|
||||
(char *) "FUNC", /* 299 */
|
||||
(char *) "SUB", /* 300 */
|
||||
(char *) "GSUB", /* 301 */
|
||||
(char *) "IF", /* 302 */
|
||||
(char *) "INDEX", /* 303 */
|
||||
(char *) "LSUBSTR", /* 304 */
|
||||
(char *) "MATCHFCN", /* 305 */
|
||||
(char *) "NEXT", /* 306 */
|
||||
(char *) "NEXTFILE", /* 307 */
|
||||
(char *) "ADD", /* 308 */
|
||||
(char *) "MINUS", /* 309 */
|
||||
(char *) "MULT", /* 310 */
|
||||
(char *) "DIVIDE", /* 311 */
|
||||
(char *) "MOD", /* 312 */
|
||||
(char *) "ASSIGN", /* 313 */
|
||||
(char *) "ASGNOP", /* 314 */
|
||||
(char *) "ADDEQ", /* 315 */
|
||||
(char *) "SUBEQ", /* 316 */
|
||||
(char *) "MULTEQ", /* 317 */
|
||||
(char *) "DIVEQ", /* 318 */
|
||||
(char *) "MODEQ", /* 319 */
|
||||
(char *) "POWEQ", /* 320 */
|
||||
(char *) "PRINT", /* 321 */
|
||||
(char *) "PRINTF", /* 322 */
|
||||
(char *) "SPRINTF", /* 323 */
|
||||
(char *) "ELSE", /* 324 */
|
||||
(char *) "INTEST", /* 325 */
|
||||
(char *) "CONDEXPR", /* 326 */
|
||||
(char *) "POSTINCR", /* 327 */
|
||||
(char *) "PREINCR", /* 328 */
|
||||
(char *) "POSTDECR", /* 329 */
|
||||
(char *) "PREDECR", /* 330 */
|
||||
(char *) "VAR", /* 331 */
|
||||
(char *) "IVAR", /* 332 */
|
||||
(char *) "VARNF", /* 333 */
|
||||
(char *) "CALL", /* 334 */
|
||||
(char *) "NUMBER", /* 335 */
|
||||
(char *) "STRING", /* 336 */
|
||||
(char *) "REGEXPR", /* 337 */
|
||||
(char *) "GETLINE", /* 338 */
|
||||
(char *) "RETURN", /* 339 */
|
||||
(char *) "SPLIT", /* 340 */
|
||||
(char *) "SUBSTR", /* 341 */
|
||||
(char *) "WHILE", /* 342 */
|
||||
(char *) "CAT", /* 343 */
|
||||
(char *) "NOT", /* 344 */
|
||||
(char *) "UMINUS", /* 345 */
|
||||
(char *) "UPLUS", /* 346 */
|
||||
(char *) "POWER", /* 347 */
|
||||
(char *) "DECR", /* 348 */
|
||||
(char *) "INCR", /* 349 */
|
||||
(char *) "INDIRECT", /* 350 */
|
||||
(char *) "LASTTOKEN", /* 351 */
|
||||
};
|
||||
|
||||
|
||||
Cell *(*proctab[94])(Node **, int) = {
|
||||
nullproc, /* FIRSTTOKEN */
|
||||
program, /* PROGRAM */
|
||||
pastat, /* PASTAT */
|
||||
dopa2, /* PASTAT2 */
|
||||
nullproc, /* XBEGIN */
|
||||
nullproc, /* XEND */
|
||||
nullproc, /* NL */
|
||||
array, /* ARRAY */
|
||||
matchop, /* MATCH */
|
||||
matchop, /* NOTMATCH */
|
||||
nullproc, /* MATCHOP */
|
||||
nullproc, /* FINAL */
|
||||
nullproc, /* DOT */
|
||||
nullproc, /* ALL */
|
||||
nullproc, /* CCL */
|
||||
nullproc, /* NCCL */
|
||||
nullproc, /* CHAR */
|
||||
nullproc, /* OR */
|
||||
nullproc, /* STAR */
|
||||
nullproc, /* QUEST */
|
||||
nullproc, /* PLUS */
|
||||
nullproc, /* EMPTYRE */
|
||||
boolop, /* AND */
|
||||
boolop, /* BOR */
|
||||
nullproc, /* APPEND */
|
||||
relop, /* EQ */
|
||||
relop, /* GE */
|
||||
relop, /* GT */
|
||||
relop, /* LE */
|
||||
relop, /* LT */
|
||||
relop, /* NE */
|
||||
instat, /* IN */
|
||||
arg, /* ARG */
|
||||
bltin, /* BLTIN */
|
||||
jump, /* BREAK */
|
||||
closefile, /* CLOSE */
|
||||
jump, /* CONTINUE */
|
||||
awkdelete, /* DELETE */
|
||||
dostat, /* DO */
|
||||
jump, /* EXIT */
|
||||
forstat, /* FOR */
|
||||
nullproc, /* FUNC */
|
||||
sub, /* SUB */
|
||||
gsub, /* GSUB */
|
||||
ifstat, /* IF */
|
||||
sindex, /* INDEX */
|
||||
nullproc, /* LSUBSTR */
|
||||
matchop, /* MATCHFCN */
|
||||
jump, /* NEXT */
|
||||
jump, /* NEXTFILE */
|
||||
arith, /* ADD */
|
||||
arith, /* MINUS */
|
||||
arith, /* MULT */
|
||||
arith, /* DIVIDE */
|
||||
arith, /* MOD */
|
||||
assign, /* ASSIGN */
|
||||
nullproc, /* ASGNOP */
|
||||
assign, /* ADDEQ */
|
||||
assign, /* SUBEQ */
|
||||
assign, /* MULTEQ */
|
||||
assign, /* DIVEQ */
|
||||
assign, /* MODEQ */
|
||||
assign, /* POWEQ */
|
||||
printstat, /* PRINT */
|
||||
awkprintf, /* PRINTF */
|
||||
awksprintf, /* SPRINTF */
|
||||
nullproc, /* ELSE */
|
||||
intest, /* INTEST */
|
||||
condexpr, /* CONDEXPR */
|
||||
incrdecr, /* POSTINCR */
|
||||
incrdecr, /* PREINCR */
|
||||
incrdecr, /* POSTDECR */
|
||||
incrdecr, /* PREDECR */
|
||||
nullproc, /* VAR */
|
||||
nullproc, /* IVAR */
|
||||
getnf, /* VARNF */
|
||||
call, /* CALL */
|
||||
nullproc, /* NUMBER */
|
||||
nullproc, /* STRING */
|
||||
nullproc, /* REGEXPR */
|
||||
awkgetline, /* GETLINE */
|
||||
jump, /* RETURN */
|
||||
split, /* SPLIT */
|
||||
substr, /* SUBSTR */
|
||||
whilestat, /* WHILE */
|
||||
cat, /* CAT */
|
||||
boolop, /* NOT */
|
||||
arith, /* UMINUS */
|
||||
arith, /* UPLUS */
|
||||
arith, /* POWER */
|
||||
nullproc, /* DECR */
|
||||
nullproc, /* INCR */
|
||||
indirect, /* INDIRECT */
|
||||
nullproc, /* LASTTOKEN */
|
||||
};
|
||||
|
||||
char *tokname(int n)
|
||||
{
|
||||
static char buf[100];
|
||||
|
||||
if (n < FIRSTTOKEN || n > LASTTOKEN) {
|
||||
sprintf(buf, "token %d", n);
|
||||
return buf;
|
||||
}
|
||||
return printname[n-FIRSTTOKEN];
|
||||
}
|
3
proto.h
3
proto.h
@ -124,6 +124,7 @@ extern void setclvar(char *);
|
||||
extern void fldbld(void);
|
||||
extern void cleanfld(int, int);
|
||||
extern void newfld(int);
|
||||
extern void setlastfld(int);
|
||||
extern int refldbld(const char *, const char *);
|
||||
extern void recbld(void);
|
||||
extern Cell *fieldadr(int);
|
||||
@ -193,3 +194,5 @@ extern Cell *gsub(Node **, int);
|
||||
|
||||
extern FILE *popen(const char *, const char *);
|
||||
extern int pclose(FILE *);
|
||||
|
||||
extern const char *flags2str(int flags);
|
||||
|
187
run.c
187
run.c
@ -31,6 +31,8 @@ THIS SOFTWARE.
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
#include "awk.h"
|
||||
#include "ytab.h"
|
||||
|
||||
@ -71,23 +73,23 @@ extern Awkfloat srand_seed;
|
||||
Node *winner = NULL; /* root of parse tree */
|
||||
Cell *tmps; /* free temporary cells for execution */
|
||||
|
||||
static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM };
|
||||
static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL };
|
||||
Cell *True = &truecell;
|
||||
static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM };
|
||||
static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL };
|
||||
Cell *False = &falsecell;
|
||||
static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM };
|
||||
static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL };
|
||||
Cell *jbreak = &breakcell;
|
||||
static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM };
|
||||
static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL };
|
||||
Cell *jcont = &contcell;
|
||||
static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM };
|
||||
static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL };
|
||||
Cell *jnext = &nextcell;
|
||||
static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM };
|
||||
static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL };
|
||||
Cell *jnextfile = &nextfilecell;
|
||||
static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM };
|
||||
static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL };
|
||||
Cell *jexit = &exitcell;
|
||||
static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM };
|
||||
static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL };
|
||||
Cell *jret = &retcell;
|
||||
static Cell tempcell ={ OCELL, CTEMP, 0, "", 0.0, NUM|STR|DONTFREE };
|
||||
static Cell tempcell ={ OCELL, CTEMP, 0, "", 0.0, NUM|STR|DONTFREE, NULL };
|
||||
|
||||
Node *curnode = NULL; /* the node being executed, for debugging */
|
||||
|
||||
@ -112,7 +114,7 @@ int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr,
|
||||
if (rminlen)
|
||||
minlen += quantum - rminlen;
|
||||
tbuf = (char *) realloc(*pbuf, minlen);
|
||||
dprintf( ("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, *pbuf, tbuf) );
|
||||
dprintf( ("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void *) *pbuf, (void *) tbuf) );
|
||||
if (tbuf == NULL) {
|
||||
if (whatrtn)
|
||||
FATAL("out of memory in %s", whatrtn);
|
||||
@ -221,7 +223,7 @@ struct Frame *fp = NULL; /* frame pointer. bottom level unused */
|
||||
|
||||
Cell *call(Node **a, int n) /* function call. very kludgy and fragile */
|
||||
{
|
||||
static Cell newcopycell = { OCELL, CCOPY, 0, "", 0.0, NUM|STR|DONTFREE };
|
||||
static Cell newcopycell = { OCELL, CCOPY, 0, "", 0.0, NUM|STR|DONTFREE, NULL };
|
||||
int i, ncall, ndef;
|
||||
int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */
|
||||
Node *x;
|
||||
@ -323,14 +325,18 @@ Cell *copycell(Cell *x) /* make a copy of a cell in a temp */
|
||||
{
|
||||
Cell *y;
|
||||
|
||||
/* copy is not constant or field */
|
||||
|
||||
y = gettemp();
|
||||
y->tval = x->tval & ~(CON|FLD|REC);
|
||||
y->csub = CCOPY; /* prevents freeing until call is over */
|
||||
y->nval = x->nval; /* BUG? */
|
||||
if (isstr(x))
|
||||
if (isstr(x) /* || x->ctype == OCELL */) {
|
||||
y->sval = tostring(x->sval);
|
||||
y->tval &= ~DONTFREE;
|
||||
} else
|
||||
y->tval |= DONTFREE;
|
||||
y->fval = x->fval;
|
||||
y->tval = x->tval & ~(CON|FLD|REC|DONTFREE); /* copy is not constant or field */
|
||||
/* is DONTFREE right? */
|
||||
return y;
|
||||
}
|
||||
|
||||
@ -419,6 +425,10 @@ Cell *awkgetline(Node **a, int n) /* get next line from specific input */
|
||||
} else if (a[0] != NULL) { /* getline var <file */
|
||||
x = execute(a[0]);
|
||||
setsval(x, buf);
|
||||
if (is_number(x->sval)) {
|
||||
x->fval = atof(x->sval);
|
||||
x->tval |= NUM;
|
||||
}
|
||||
tempfree(x);
|
||||
} else { /* getline <file */
|
||||
setsval(fldtab[0], buf);
|
||||
@ -434,6 +444,10 @@ Cell *awkgetline(Node **a, int n) /* get next line from specific input */
|
||||
n = getrec(&buf, &bufsize, 0);
|
||||
x = execute(a[0]);
|
||||
setsval(x, buf);
|
||||
if (is_number(x->sval)) {
|
||||
x->fval = atof(x->sval);
|
||||
x->tval |= NUM;
|
||||
}
|
||||
tempfree(x);
|
||||
}
|
||||
}
|
||||
@ -456,7 +470,7 @@ Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
|
||||
Node *np;
|
||||
char *buf;
|
||||
int bufsz = recsize;
|
||||
int nsub = strlen(*SUBSEP);
|
||||
int nsub;
|
||||
|
||||
if ((buf = (char *) malloc(bufsz)) == NULL)
|
||||
FATAL("out of memory in array");
|
||||
@ -466,6 +480,7 @@ Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
|
||||
for (np = a[1]; np; np = np->nnext) {
|
||||
y = execute(np); /* subscript */
|
||||
s = getsval(y);
|
||||
nsub = strlen(getsval(subseploc));
|
||||
if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "array"))
|
||||
FATAL("out of memory for %s[%s...]", x->nval, buf);
|
||||
strcat(buf, s);
|
||||
@ -494,12 +509,12 @@ Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts *
|
||||
Cell *x, *y;
|
||||
Node *np;
|
||||
char *s;
|
||||
int nsub = strlen(*SUBSEP);
|
||||
int nsub;
|
||||
|
||||
x = execute(a[0]); /* Cell* for symbol table */
|
||||
if (!isarr(x))
|
||||
return True;
|
||||
if (a[1] == NULL) { /* delete the elements, not the table */
|
||||
if (a[1] == 0) { /* delete the elements, not the table */
|
||||
freesymtab(x);
|
||||
x->tval &= ~STR;
|
||||
x->tval |= ARR;
|
||||
@ -513,9 +528,10 @@ Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts *
|
||||
for (np = a[1]; np; np = np->nnext) {
|
||||
y = execute(np); /* subscript */
|
||||
s = getsval(y);
|
||||
nsub = strlen(getsval(subseploc));
|
||||
if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "awkdelete"))
|
||||
FATAL("out of memory deleting %s[%s...]", x->nval, buf);
|
||||
strcat(buf, s);
|
||||
strcat(buf, s);
|
||||
if (np->nnext)
|
||||
strcat(buf, *SUBSEP);
|
||||
tempfree(y);
|
||||
@ -534,7 +550,7 @@ Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */
|
||||
char *buf;
|
||||
char *s;
|
||||
int bufsz = recsize;
|
||||
int nsub = strlen(*SUBSEP);
|
||||
int nsub;
|
||||
|
||||
ap = execute(a[1]); /* array name */
|
||||
if (!isarr(ap)) {
|
||||
@ -552,6 +568,7 @@ Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */
|
||||
for (p = a[0]; p; p = p->nnext) {
|
||||
x = execute(p); /* expr */
|
||||
s = getsval(x);
|
||||
nsub = strlen(getsval(subseploc));
|
||||
if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "intest"))
|
||||
FATAL("out of memory deleting %s[%s...]", x->nval, buf);
|
||||
strcat(buf, s);
|
||||
@ -583,7 +600,7 @@ Cell *matchop(Node **a, int n) /* ~ and match() */
|
||||
}
|
||||
x = execute(a[1]); /* a[1] = target text */
|
||||
s = getsval(x);
|
||||
if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */
|
||||
if (a[0] == 0) /* a[1] == 0: already-compiled reg expr */
|
||||
i = (*mf)((fa *) a[2], s);
|
||||
else {
|
||||
y = execute(a[2]); /* a[2] = regular expr */
|
||||
@ -699,7 +716,7 @@ Cell *gettemp(void) /* get a tempcell */
|
||||
FATAL("out of space for temporaries");
|
||||
for(i = 1; i < 100; i++)
|
||||
tmps[i-1].cnext = &tmps[i];
|
||||
tmps[i-1].cnext = NULL;
|
||||
tmps[i-1].cnext = 0;
|
||||
}
|
||||
x = tmps;
|
||||
tmps = x->cnext;
|
||||
@ -734,18 +751,18 @@ Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */
|
||||
int k, m, n;
|
||||
char *s;
|
||||
int temp;
|
||||
Cell *x, *y, *z = NULL;
|
||||
Cell *x, *y, *z = 0;
|
||||
|
||||
x = execute(a[0]);
|
||||
y = execute(a[1]);
|
||||
if (a[2] != NULL)
|
||||
if (a[2] != 0)
|
||||
z = execute(a[2]);
|
||||
s = getsval(x);
|
||||
k = strlen(s) + 1;
|
||||
if (k <= 1) {
|
||||
tempfree(x);
|
||||
tempfree(y);
|
||||
if (a[2] != NULL) {
|
||||
if (a[2] != 0) {
|
||||
tempfree(z);
|
||||
}
|
||||
x = gettemp();
|
||||
@ -758,7 +775,7 @@ Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */
|
||||
else if (m > k)
|
||||
m = k;
|
||||
tempfree(y);
|
||||
if (a[2] != NULL) {
|
||||
if (a[2] != 0) {
|
||||
n = (int) getfval(z);
|
||||
tempfree(z);
|
||||
} else
|
||||
@ -817,6 +834,17 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co
|
||||
char *buf = *pbuf;
|
||||
int bufsize = *pbufsize;
|
||||
|
||||
static int first = 1;
|
||||
static int have_a_format = 0;
|
||||
|
||||
if (first) {
|
||||
char buf[100];
|
||||
|
||||
sprintf(buf, "%a", 42.0);
|
||||
have_a_format = (strcmp(buf, "0x1.5p+5") == 0);
|
||||
first = 0;
|
||||
}
|
||||
|
||||
os = s;
|
||||
p = buf;
|
||||
if ((fmt = (char *) malloc(fmtsz)) == NULL)
|
||||
@ -842,7 +870,13 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co
|
||||
FATAL("format item %.30s... ran format() out of memory", os);
|
||||
if (isalpha((uschar)*s) && *s != 'l' && *s != 'h' && *s != 'L')
|
||||
break; /* the ansi panoply */
|
||||
if (*s == '$') {
|
||||
FATAL("'$' not permitted in awk formats");
|
||||
}
|
||||
if (*s == '*') {
|
||||
if (a == NULL) {
|
||||
FATAL("not enough args in printf(%s)", os);
|
||||
}
|
||||
x = execute(a);
|
||||
a = a->nnext;
|
||||
sprintf(t-1, "%d", fmtwd=(int) getfval(x));
|
||||
@ -857,8 +891,13 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co
|
||||
if (fmtwd < 0)
|
||||
fmtwd = -fmtwd;
|
||||
adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4");
|
||||
|
||||
switch (*s) {
|
||||
case 'a': case 'A':
|
||||
if (have_a_format)
|
||||
flag = *s;
|
||||
else
|
||||
flag = 'f';
|
||||
break;
|
||||
case 'f': case 'e': case 'g': case 'E': case 'G':
|
||||
flag = 'f';
|
||||
break;
|
||||
@ -901,6 +940,8 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co
|
||||
p += strlen(p);
|
||||
sprintf(p, "%s", t);
|
||||
break;
|
||||
case 'a':
|
||||
case 'A':
|
||||
case 'f': sprintf(p, fmt, getfval(x)); break;
|
||||
case 'd': sprintf(p, fmt, (long) getfval(x)); break;
|
||||
case 'u': sprintf(p, fmt, (int) getfval(x)); break;
|
||||
@ -1003,7 +1044,7 @@ Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */
|
||||
x = execute(a[0]);
|
||||
i = getfval(x);
|
||||
tempfree(x);
|
||||
if (n != UMINUS) {
|
||||
if (n != UMINUS && n != UPLUS) {
|
||||
y = execute(a[1]);
|
||||
j = getfval(y);
|
||||
tempfree(y);
|
||||
@ -1033,6 +1074,8 @@ Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */
|
||||
case UMINUS:
|
||||
i = -i;
|
||||
break;
|
||||
case UPLUS: /* handled by getfval(), above */
|
||||
break;
|
||||
case POWER:
|
||||
if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */
|
||||
i = ipow(i, (int) j);
|
||||
@ -1088,8 +1131,8 @@ Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */
|
||||
y = execute(a[1]);
|
||||
x = execute(a[0]);
|
||||
if (n == ASSIGN) { /* ordinary assignment */
|
||||
if (x == y && !(x->tval & (FLD|REC))) /* self-assignment: */
|
||||
; /* leave alone unless it's a field */
|
||||
if (x == y && !(x->tval & (FLD|REC)) && x != nfloc)
|
||||
; /* self-assignment: leave alone unless it's a field or NF */
|
||||
else if ((y->tval & (STR|NUM)) == (STR|NUM)) {
|
||||
setsval(x, getsval(y));
|
||||
x->fval = getfval(y);
|
||||
@ -1146,25 +1189,26 @@ Cell *cat(Node **a, int q) /* a[0] cat a[1] */
|
||||
{
|
||||
Cell *x, *y, *z;
|
||||
int n1, n2;
|
||||
char *s;
|
||||
char *s = NULL;
|
||||
int ssz = 0;
|
||||
|
||||
x = execute(a[0]);
|
||||
n1 = strlen(getsval(x));
|
||||
adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1");
|
||||
(void) strncpy(s, x->sval, ssz);
|
||||
|
||||
y = execute(a[1]);
|
||||
getsval(x);
|
||||
getsval(y);
|
||||
n1 = strlen(x->sval);
|
||||
n2 = strlen(y->sval);
|
||||
s = (char *) malloc(n1 + n2 + 1);
|
||||
if (s == NULL)
|
||||
FATAL("out of space concatenating %.15s... and %.15s...",
|
||||
x->sval, y->sval);
|
||||
strcpy(s, x->sval);
|
||||
strcpy(s+n1, y->sval);
|
||||
n2 = strlen(getsval(y));
|
||||
adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2");
|
||||
(void) strncpy(s + n1, y->sval, ssz - n1);
|
||||
|
||||
tempfree(x);
|
||||
tempfree(y);
|
||||
|
||||
z = gettemp();
|
||||
z->sval = s;
|
||||
z->tval = STR;
|
||||
|
||||
return(z);
|
||||
}
|
||||
|
||||
@ -1172,7 +1216,7 @@ Cell *pastat(Node **a, int n) /* a[0] { a[1] } */
|
||||
{
|
||||
Cell *x;
|
||||
|
||||
if (a[0] == NULL)
|
||||
if (a[0] == 0)
|
||||
x = execute(a[1]);
|
||||
else {
|
||||
x = execute(a[0]);
|
||||
@ -1209,20 +1253,22 @@ Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */
|
||||
|
||||
Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
|
||||
{
|
||||
Cell *x = NULL, *y, *ap;
|
||||
Cell *x = 0, *y, *ap;
|
||||
char *s, *origs;
|
||||
char *fs, *origfs = NULL;
|
||||
int sep;
|
||||
char *t, temp, num[50], *fs = NULL;
|
||||
char *t, temp, num[50];
|
||||
int n, tempstat, arg3type;
|
||||
|
||||
y = execute(a[0]); /* source string */
|
||||
origs = s = strdup(getsval(y));
|
||||
arg3type = ptoi(a[3]);
|
||||
if (a[2] == NULL) /* fs string */
|
||||
fs = *FS;
|
||||
if (a[2] == 0) /* fs string */
|
||||
fs = getsval(fsloc);
|
||||
else if (arg3type == STRING) { /* split(str,arr,"string") */
|
||||
x = execute(a[2]);
|
||||
fs = getsval(x);
|
||||
origfs = fs = strdup(getsval(x));
|
||||
tempfree(x);
|
||||
} else if (arg3type == REGEXPR)
|
||||
fs = "(regexpr)"; /* split(str,arr,/regexpr/) */
|
||||
else
|
||||
@ -1337,9 +1383,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
|
||||
tempfree(ap);
|
||||
tempfree(y);
|
||||
free(origs);
|
||||
if (a[2] != NULL && arg3type == STRING) {
|
||||
tempfree(x);
|
||||
}
|
||||
free(origfs);
|
||||
x = gettemp();
|
||||
x->tval = NUM;
|
||||
x->fval = n;
|
||||
@ -1369,7 +1413,7 @@ Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */
|
||||
if (istrue(x)) {
|
||||
tempfree(x);
|
||||
x = execute(a[1]);
|
||||
} else if (a[2] != NULL) {
|
||||
} else if (a[2] != 0) {
|
||||
tempfree(x);
|
||||
x = execute(a[2]);
|
||||
}
|
||||
@ -1421,7 +1465,7 @@ Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */
|
||||
x = execute(a[0]);
|
||||
tempfree(x);
|
||||
for (;;) {
|
||||
if (a[1]!=NULL) {
|
||||
if (a[1]!=0) {
|
||||
x = execute(a[1]);
|
||||
if (!istrue(x)) return(x);
|
||||
else tempfree(x);
|
||||
@ -1479,6 +1523,7 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
|
||||
Node *nextarg;
|
||||
FILE *fp;
|
||||
void flush_all(void);
|
||||
int status = 0;
|
||||
|
||||
t = ptoi(a[0]);
|
||||
x = execute(a[1]);
|
||||
@ -1503,7 +1548,7 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
|
||||
case FCOS:
|
||||
u = cos(getfval(x)); break;
|
||||
case FATAN:
|
||||
if (nextarg == NULL) {
|
||||
if (nextarg == 0) {
|
||||
WARNING("atan2 requires two arguments; returning 1.0");
|
||||
u = 1.0;
|
||||
} else {
|
||||
@ -1515,7 +1560,20 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
|
||||
break;
|
||||
case FSYSTEM:
|
||||
fflush(stdout); /* in case something is buffered already */
|
||||
u = (Awkfloat) system(getsval(x)) / 256; /* 256 is unix-dep */
|
||||
status = system(getsval(x));
|
||||
u = status;
|
||||
if (status != -1) {
|
||||
if (WIFEXITED(status)) {
|
||||
u = WEXITSTATUS(status);
|
||||
} else if (WIFSIGNALED(status)) {
|
||||
u = WTERMSIG(status) + 256;
|
||||
#ifdef WCOREDUMP
|
||||
if (WCOREDUMP(status))
|
||||
u += 256;
|
||||
#endif
|
||||
} else /* something else?!? */
|
||||
u = 0;
|
||||
}
|
||||
break;
|
||||
case FRAND:
|
||||
/* in principle, rand() returns something in 0..RAND_MAX */
|
||||
@ -1564,7 +1622,7 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
|
||||
tempfree(x);
|
||||
x = gettemp();
|
||||
setfval(x, u);
|
||||
if (nextarg != NULL) {
|
||||
if (nextarg != 0) {
|
||||
WARNING("warning: function has too many arguments");
|
||||
for ( ; nextarg; nextarg = nextarg->nnext)
|
||||
execute(nextarg);
|
||||
@ -1578,7 +1636,7 @@ Cell *printstat(Node **a, int n) /* print a[0] */
|
||||
Cell *y;
|
||||
FILE *fp;
|
||||
|
||||
if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */
|
||||
if (a[1] == 0) /* a[1] is redirection operator, a[2] is file */
|
||||
fp = stdout;
|
||||
else
|
||||
fp = redirect(ptoi(a[1]), a[2]);
|
||||
@ -1587,11 +1645,11 @@ Cell *printstat(Node **a, int n) /* print a[0] */
|
||||
fputs(getpssval(y), fp);
|
||||
tempfree(y);
|
||||
if (x->nnext == NULL)
|
||||
fputs(*ORS, fp);
|
||||
fputs(getsval(orsloc), fp);
|
||||
else
|
||||
fputs(*OFS, fp);
|
||||
fputs(getsval(ofsloc), fp);
|
||||
}
|
||||
if (a[1] != NULL)
|
||||
if (a[1] != 0)
|
||||
fflush(fp);
|
||||
if (ferror(fp))
|
||||
FATAL("write error on %s", filename(fp));
|
||||
@ -1600,8 +1658,6 @@ Cell *printstat(Node **a, int n) /* print a[0] */
|
||||
|
||||
Cell *nullproc(Node **a, int n)
|
||||
{
|
||||
n = n;
|
||||
a = a;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1650,7 +1706,7 @@ FILE *openfile(int a, const char *us)
|
||||
{
|
||||
const char *s = us;
|
||||
int i, m;
|
||||
FILE *fp = NULL;
|
||||
FILE *fp = 0;
|
||||
|
||||
if (*s == '\0')
|
||||
FATAL("null file name in print or getline");
|
||||
@ -1665,7 +1721,7 @@ FILE *openfile(int a, const char *us)
|
||||
return NULL;
|
||||
|
||||
for (i=0; i < nfiles; i++)
|
||||
if (files[i].fp == NULL)
|
||||
if (files[i].fp == 0)
|
||||
break;
|
||||
if (i >= nfiles) {
|
||||
struct files *nf;
|
||||
@ -1715,7 +1771,6 @@ Cell *closefile(Node **a, int n)
|
||||
Cell *x;
|
||||
int i, stat;
|
||||
|
||||
n = n;
|
||||
x = execute(a[0]);
|
||||
getsval(x);
|
||||
stat = -1;
|
||||
@ -1782,7 +1837,7 @@ Cell *sub(Node **a, int nnn) /* substitute command */
|
||||
FATAL("out of memory in sub");
|
||||
x = execute(a[3]); /* target string */
|
||||
t = getsval(x);
|
||||
if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */
|
||||
if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
|
||||
pfa = (fa *) a[1]; /* regular expression */
|
||||
else {
|
||||
y = execute(a[1]);
|
||||
@ -1822,7 +1877,7 @@ Cell *sub(Node **a, int nnn) /* substitute command */
|
||||
if (pb > buf + bufsz)
|
||||
FATAL("sub result2 %.30s too big; can't happen", buf);
|
||||
setsval(x, buf); /* BUG: should be able to avoid copy */
|
||||
result = True;
|
||||
result = True;;
|
||||
}
|
||||
tempfree(x);
|
||||
tempfree(y);
|
||||
@ -1845,7 +1900,7 @@ Cell *gsub(Node **a, int nnn) /* global substitute */
|
||||
num = 0;
|
||||
x = execute(a[3]); /* target string */
|
||||
t = getsval(x);
|
||||
if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */
|
||||
if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
|
||||
pfa = (fa *) a[1]; /* regular expression */
|
||||
else {
|
||||
y = execute(a[1]);
|
||||
|
185
tran.c
185
tran.c
@ -55,10 +55,14 @@ Cell *fsloc; /* FS */
|
||||
Cell *nrloc; /* NR */
|
||||
Cell *nfloc; /* NF */
|
||||
Cell *fnrloc; /* FNR */
|
||||
Cell *ofsloc; /* OFS */
|
||||
Cell *orsloc; /* ORS */
|
||||
Cell *rsloc; /* RS */
|
||||
Array *ARGVtab; /* symbol table containing ARGV[...] */
|
||||
Array *ENVtab; /* symbol table containing ENVIRON[...] */
|
||||
Cell *rstartloc; /* RSTART */
|
||||
Cell *rlengthloc; /* RLENGTH */
|
||||
Cell *subseploc; /* SUBSEP */
|
||||
Cell *symtabloc; /* SYMTAB */
|
||||
|
||||
Cell *nullloc; /* a guaranteed empty cell */
|
||||
@ -67,6 +71,18 @@ Cell *literal0;
|
||||
|
||||
extern Cell **fldtab;
|
||||
|
||||
static void
|
||||
setfree(Cell *vp)
|
||||
{
|
||||
if (&vp->sval == FS || &vp->sval == RS ||
|
||||
&vp->sval == OFS || &vp->sval == ORS ||
|
||||
&vp->sval == OFMT || &vp->sval == CONVFMT ||
|
||||
&vp->sval == FILENAME || &vp->sval == SUBSEP)
|
||||
vp->tval |= DONTFREE;
|
||||
else
|
||||
vp->tval &= ~DONTFREE;
|
||||
}
|
||||
|
||||
void syminit(void) /* initialize symbol table with builtin vars */
|
||||
{
|
||||
literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
|
||||
@ -76,9 +92,12 @@ void syminit(void) /* initialize symbol table with builtin vars */
|
||||
|
||||
fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab);
|
||||
FS = &fsloc->sval;
|
||||
RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
|
||||
OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
|
||||
ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
|
||||
rsloc = setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab);
|
||||
RS = &rsloc->sval;
|
||||
ofsloc = setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab);
|
||||
OFS = &ofsloc->sval;
|
||||
orsloc = setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab);
|
||||
ORS = &orsloc->sval;
|
||||
OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
|
||||
CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
|
||||
FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
|
||||
@ -88,7 +107,8 @@ void syminit(void) /* initialize symbol table with builtin vars */
|
||||
NR = &nrloc->fval;
|
||||
fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
|
||||
FNR = &fnrloc->fval;
|
||||
SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
|
||||
subseploc = setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab);
|
||||
SUBSEP = &subseploc->sval;
|
||||
rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
|
||||
RSTART = &rstartloc->fval;
|
||||
rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
|
||||
@ -174,7 +194,7 @@ void freesymtab(Cell *ap) /* free a symbol table */
|
||||
free(cp);
|
||||
tp->nelem--;
|
||||
}
|
||||
tp->tab[i] = NULL;
|
||||
tp->tab[i] = 0;
|
||||
}
|
||||
if (tp->nelem != 0)
|
||||
WARNING("can't happen: inconsistent element count freeing %s", ap->nval);
|
||||
@ -282,6 +302,7 @@ Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
|
||||
{
|
||||
int fldno;
|
||||
|
||||
f += 0.0; /* normalise negative zero to positive zero */
|
||||
if ((vp->tval & (NUM | STR)) == 0)
|
||||
funnyvar(vp, "assign to");
|
||||
if (isfld(vp)) {
|
||||
@ -290,13 +311,21 @@ Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
|
||||
if (fldno > *NF)
|
||||
newfld(fldno);
|
||||
dprintf( ("setting field %d to %g\n", fldno, f) );
|
||||
} else if (&vp->fval == NF) {
|
||||
donerec = 0; /* mark $0 invalid */
|
||||
setlastfld(f);
|
||||
dprintf( ("setting NF to %g\n", f) );
|
||||
} else if (isrec(vp)) {
|
||||
donefld = 0; /* mark $1... invalid */
|
||||
donerec = 1;
|
||||
} else if (vp == ofsloc) {
|
||||
if (donerec == 0)
|
||||
recbld();
|
||||
}
|
||||
if (freeable(vp))
|
||||
xfree(vp->sval); /* free any previous string */
|
||||
vp->tval &= ~STR; /* mark string invalid */
|
||||
vp->tval &= ~(STR|CONVC|CONVO); /* mark string invalid */
|
||||
vp->fmt = NULL;
|
||||
vp->tval |= NUM; /* mark number ok */
|
||||
if (f == -0) /* who would have thought this possible? */
|
||||
f = 0;
|
||||
@ -318,6 +347,7 @@ char *setsval(Cell *vp, const char *s) /* set string val of a Cell */
|
||||
{
|
||||
char *t;
|
||||
int fldno;
|
||||
Awkfloat f;
|
||||
|
||||
dprintf( ("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n",
|
||||
(void*)vp, NN(vp->nval), s, vp->tval, donerec, donefld) );
|
||||
@ -328,20 +358,32 @@ char *setsval(Cell *vp, const char *s) /* set string val of a Cell */
|
||||
fldno = atoi(vp->nval);
|
||||
if (fldno > *NF)
|
||||
newfld(fldno);
|
||||
dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) );
|
||||
dprintf( ("setting field %d to %s (%p)\n", fldno, s, (void *) s) );
|
||||
} else if (isrec(vp)) {
|
||||
donefld = 0; /* mark $1... invalid */
|
||||
donerec = 1;
|
||||
} else if (vp == ofsloc) {
|
||||
if (donerec == 0)
|
||||
recbld();
|
||||
}
|
||||
t = tostring(s); /* in case it's self-assign */
|
||||
t = s ? tostring(s) : tostring(""); /* in case it's self-assign */
|
||||
if (freeable(vp))
|
||||
xfree(vp->sval);
|
||||
vp->tval &= ~NUM;
|
||||
vp->tval &= ~(NUM|CONVC|CONVO);
|
||||
vp->tval |= STR;
|
||||
vp->tval &= ~DONTFREE;
|
||||
vp->fmt = NULL;
|
||||
setfree(vp);
|
||||
dprintf( ("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n",
|
||||
(void*)vp, NN(vp->nval), t,t, vp->tval, donerec, donefld) );
|
||||
return(vp->sval = t);
|
||||
(void*)vp, NN(vp->nval), t, (void *) t, vp->tval, donerec, donefld) );
|
||||
vp->sval = t;
|
||||
if (&vp->fval == NF) {
|
||||
donerec = 0; /* mark $0 invalid */
|
||||
f = getfval(vp);
|
||||
setlastfld(f);
|
||||
dprintf( ("setting NF to %g\n", f) );
|
||||
}
|
||||
|
||||
return(vp->sval);
|
||||
}
|
||||
|
||||
Awkfloat getfval(Cell *vp) /* get float val of a Cell */
|
||||
@ -364,7 +406,7 @@ Awkfloat getfval(Cell *vp) /* get float val of a Cell */
|
||||
|
||||
static char *get_str_val(Cell *vp, char **fmt) /* get string val of a Cell */
|
||||
{
|
||||
char s[100]; /* BUG: unchecked */
|
||||
char s[256];
|
||||
double dtemp;
|
||||
|
||||
if ((vp->tval & (NUM | STR)) == 0)
|
||||
@ -373,19 +415,80 @@ static char *get_str_val(Cell *vp, char **fmt) /* get string val of a Cel
|
||||
fldbld();
|
||||
else if (isrec(vp) && donerec == 0)
|
||||
recbld();
|
||||
if (isstr(vp) == 0) {
|
||||
if (freeable(vp))
|
||||
xfree(vp->sval);
|
||||
if (modf(vp->fval, &dtemp) == 0) /* it's integral */
|
||||
sprintf(s, "%.30g", vp->fval);
|
||||
else
|
||||
sprintf(s, *fmt, vp->fval);
|
||||
vp->sval = tostring(s);
|
||||
vp->tval &= ~DONTFREE;
|
||||
vp->tval |= STR;
|
||||
|
||||
/*
|
||||
* ADR: This is complicated and more fragile than is desirable.
|
||||
* Retrieving a string value for a number associates the string
|
||||
* value with the scalar. Previously, the string value was
|
||||
* sticky, meaning if converted via OFMT that became the value
|
||||
* (even though POSIX wants it to be via CONVFMT). Or if CONVFMT
|
||||
* changed after a string value was retrieved, the original value
|
||||
* was maintained and used. Also not per POSIX.
|
||||
*
|
||||
* We work around this design by adding two additional flags,
|
||||
* CONVC and CONVO, indicating how the string value was
|
||||
* obtained (via CONVFMT or OFMT) and _also_ maintaining a copy
|
||||
* of the pointer to the xFMT format string used for the
|
||||
* conversion. This pointer is only read, **never** dereferenced.
|
||||
* The next time we do a conversion, if it's coming from the same
|
||||
* xFMT as last time, and the pointer value is different, we
|
||||
* know that the xFMT format string changed, and we need to
|
||||
* redo the conversion. If it's the same, we don't have to.
|
||||
*
|
||||
* There are also several cases where we don't do a conversion,
|
||||
* such as for a field (see the checks below).
|
||||
*/
|
||||
|
||||
/* Don't duplicate the code for actually updating the value */
|
||||
#define update_str_val(vp) \
|
||||
{ \
|
||||
if (freeable(vp)) \
|
||||
xfree(vp->sval); \
|
||||
if (modf(vp->fval, &dtemp) == 0) /* it's integral */ \
|
||||
snprintf(s, sizeof (s), "%.30g", vp->fval); \
|
||||
else \
|
||||
snprintf(s, sizeof (s), *fmt, vp->fval); \
|
||||
vp->sval = tostring(s); \
|
||||
vp->tval &= ~DONTFREE; \
|
||||
vp->tval |= STR; \
|
||||
}
|
||||
|
||||
if (isstr(vp) == 0) {
|
||||
update_str_val(vp);
|
||||
if (fmt == OFMT) {
|
||||
vp->tval &= ~CONVC;
|
||||
vp->tval |= CONVO;
|
||||
} else {
|
||||
/* CONVFMT */
|
||||
vp->tval &= ~CONVO;
|
||||
vp->tval |= CONVC;
|
||||
}
|
||||
vp->fmt = *fmt;
|
||||
} else if ((vp->tval & DONTFREE) != 0 || ! isnum(vp) || isfld(vp)) {
|
||||
goto done;
|
||||
} else if (isstr(vp)) {
|
||||
if (fmt == OFMT) {
|
||||
if ((vp->tval & CONVC) != 0
|
||||
|| ((vp->tval & CONVO) != 0 && vp->fmt != *fmt)) {
|
||||
update_str_val(vp);
|
||||
vp->tval &= ~CONVC;
|
||||
vp->tval |= CONVO;
|
||||
vp->fmt = *fmt;
|
||||
}
|
||||
} else {
|
||||
/* CONVFMT */
|
||||
if ((vp->tval & CONVO) != 0
|
||||
|| ((vp->tval & CONVC) != 0 && vp->fmt != *fmt)) {
|
||||
update_str_val(vp);
|
||||
vp->tval &= ~CONVO;
|
||||
vp->tval |= CONVC;
|
||||
vp->fmt = *fmt;
|
||||
}
|
||||
}
|
||||
}
|
||||
done:
|
||||
dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n",
|
||||
(void*)vp, NN(vp->nval), vp->sval, vp->sval, vp->tval) );
|
||||
(void*)vp, NN(vp->nval), vp->sval, (void *) vp->sval, vp->tval) );
|
||||
return(vp->sval);
|
||||
}
|
||||
|
||||
@ -457,3 +560,37 @@ char *qstring(const char *is, int delim) /* collect string up to next delim */
|
||||
*bp++ = 0;
|
||||
return (char *) buf;
|
||||
}
|
||||
|
||||
const char *flags2str(int flags)
|
||||
{
|
||||
static const struct ftab {
|
||||
const char *name;
|
||||
int value;
|
||||
} flagtab[] = {
|
||||
{ "NUM", NUM },
|
||||
{ "STR", STR },
|
||||
{ "DONTFREE", DONTFREE },
|
||||
{ "CON", CON },
|
||||
{ "ARR", ARR },
|
||||
{ "FCN", FCN },
|
||||
{ "FLD", FLD },
|
||||
{ "REC", REC },
|
||||
{ "CONVC", CONVC },
|
||||
{ "CONVO", CONVO },
|
||||
{ NULL, 0 }
|
||||
};
|
||||
static char buf[100];
|
||||
int i;
|
||||
char *cp = buf;
|
||||
|
||||
for (i = 0; flagtab[i].name != NULL; i++) {
|
||||
if ((flags & flagtab[i].value) != 0) {
|
||||
if (cp > buf)
|
||||
*cp++ = '|';
|
||||
strcpy(cp, flagtab[i].name);
|
||||
cp += strlen(cp);
|
||||
}
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user