Vendor import of expat 2.2.0 (trimmed).
This commit is contained in:
parent
36e3acbc34
commit
17c9c52d9a
5
COPYING
5
COPYING
@ -1,6 +1,5 @@
|
||||
Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
|
||||
and Clark Cooper
|
||||
Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Expat maintainers.
|
||||
Copyright (c) 1998-2000 Thai Open Source Software Center Ltd and Clark Cooper
|
||||
Copyright (c) 2001-2016 Expat maintainers
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
|
66
Changes
66
Changes
@ -1,3 +1,67 @@
|
||||
Release 2.2.0 Tue June 21 2016
|
||||
Security fixes:
|
||||
#537 CVE-2016-0718 -- Fix crash on malformed input
|
||||
CVE-2016-4472 -- Improve insufficient fix to CVE-2015-1283 /
|
||||
CVE-2015-2716 introduced with Expat 2.1.1
|
||||
#499 CVE-2016-5300 -- Use more entropy for hash initialization
|
||||
than the original fix to CVE-2012-0876
|
||||
#519 CVE-2012-6702 -- Resolve troublesome internal call to srand
|
||||
that was introduced with Expat 2.1.0
|
||||
when addressing CVE-2012-0876 (issue #496)
|
||||
|
||||
Bug fixes:
|
||||
Fix uninitialized reads of size 1
|
||||
(e.g. in little2_updatePosition)
|
||||
Fix detection of UTF-8 character boundaries
|
||||
|
||||
Other changes:
|
||||
#532 Fix compilation for Visual Studio 2010 (keyword "C99")
|
||||
Autotools: Resolve use of "$<" to better support bmake
|
||||
Autotools: Add QA script "qa.sh" (and make target "qa")
|
||||
Autotools: Respect CXXFLAGS if given
|
||||
Autotools: Fix "make run-xmltest"
|
||||
Autotools: Have "make run-xmltest" check for expected output
|
||||
p90 CMake: Fix static build (BUILD_shared=OFF) on Windows
|
||||
#536 CMake: Add soversion, support -DNO_SONAME=yes to bypass
|
||||
#323 CMake: Add suffix "d" to differentiate debug from release
|
||||
CMake: Define WIN32 with CMake on Windows
|
||||
Annotate memory allocators for GCC
|
||||
Address all currently known compile warnings
|
||||
Make sure that API symbols remain visible despite
|
||||
-fvisibility=hidden
|
||||
Remove executable flag from source files
|
||||
Resolve COMPILED_FROM_DSP in favor of WIN32
|
||||
|
||||
Special thanks to:
|
||||
Björn Lindahl
|
||||
Christian Heimes
|
||||
Cristian Rodríguez
|
||||
Daniel Krügler
|
||||
Gustavo Grieco
|
||||
Karl Waclawek
|
||||
László Böszörményi
|
||||
Marco Grassi
|
||||
Pascal Cuoq
|
||||
Sergei Nikulov
|
||||
Thomas Beutlich
|
||||
Warren Young
|
||||
Yann Droneaud
|
||||
|
||||
Release 2.1.1 Sat March 12 2016
|
||||
Security fixes:
|
||||
#582: CVE-2015-1283 - Multiple integer overflows in XML_GetBuffer
|
||||
|
||||
Bug fixes:
|
||||
#502: Fix potential null pointer dereference
|
||||
#520: Symbol XML_SetHashSalt was not exported
|
||||
Output of "xmlwf -h" was incomplete
|
||||
|
||||
Other changes:
|
||||
#503: Document behavior of calling XML_SetHashSalt with salt 0
|
||||
Minor improvements to man page xmlwf(1)
|
||||
Improvements to the experimental CMake build system
|
||||
libtool now invoked with --verbose
|
||||
|
||||
Release 2.1.0 Sat March 24 2012
|
||||
- Bug Fixes:
|
||||
#1742315: Harmful XML_ParserCreateNS suggestion.
|
||||
@ -23,7 +87,7 @@ Release 2.1.0 Sat March 24 2012
|
||||
#3312568: CMake support.
|
||||
#3446384: Report byte offsets for attr names and values.
|
||||
- New Features / API changes:
|
||||
Added new API member XML_SetHashSalt() that allows setting an intial
|
||||
Added new API member XML_SetHashSalt() that allows setting an initial
|
||||
value (salt) for hash calculations. This is part of the fix for
|
||||
bug #3496608 to randomize hash parameters.
|
||||
When compiled with XML_ATTR_INFO defined, adds new API member
|
||||
|
4
MANIFEST
4
MANIFEST
@ -44,7 +44,7 @@ doc/reference.html
|
||||
doc/style.css
|
||||
doc/valid-xhtml10.png
|
||||
doc/xmlwf.1
|
||||
doc/xmlwf.sgml
|
||||
doc/xmlwf.xml
|
||||
CMakeLists.txt
|
||||
CMake.README
|
||||
COPYING
|
||||
@ -54,7 +54,7 @@ MANIFEST
|
||||
Makefile.in
|
||||
README
|
||||
configure
|
||||
configure.in
|
||||
configure.ac
|
||||
expat_config.h.in
|
||||
expat_config.h.cmake
|
||||
expat.pc.in
|
||||
|
29
Makefile.in
29
Makefile.in
@ -42,7 +42,7 @@ INSTALL_DATA = @INSTALL_DATA@
|
||||
mkinstalldirs = $(SHELL) $(top_srcdir)/conftools/mkinstalldirs
|
||||
|
||||
MANFILE = $(srcdir)/doc/xmlwf.1
|
||||
APIHEADER = $(srcdir)/lib/expat.h $(srcdir)/lib/expat_external.h
|
||||
APIHEADER = $(srcdir)/lib/expat.h $(srcdir)/lib/expat_external.h expat_config.h
|
||||
LIBRARY = libexpat.la
|
||||
|
||||
DESTDIR = $(INSTALL_ROOT)
|
||||
@ -51,7 +51,7 @@ default: buildlib xmlwf/xmlwf@EXEEXT@
|
||||
|
||||
buildlib: $(LIBRARY) expat.pc
|
||||
|
||||
all: $(LIBRARY) expat.pc xmlwf/xmlwf@EXEEXT@ examples/elements examples/outline
|
||||
all: $(LIBRARY) expat.pc xmlwf/xmlwf@EXEEXT@ examples/elements examples/outline $(MANFILE)
|
||||
|
||||
clean:
|
||||
cd lib && rm -f $(LIBRARY) *.@OBJEXT@ *.lo && rm -rf .libs _libs
|
||||
@ -77,7 +77,10 @@ check: tests/runtests tests/runtestspp
|
||||
tests/runtests
|
||||
tests/runtestspp
|
||||
|
||||
install: xmlwf/xmlwf@EXEEXT@ installlib
|
||||
$(MANFILE):
|
||||
$(MAKE) -C doc xmlwf.1
|
||||
|
||||
install: xmlwf/xmlwf@EXEEXT@ installlib $(MANFILE)
|
||||
$(mkinstalldirs) $(DESTDIR)$(bindir) $(DESTDIR)$(man1dir)
|
||||
$(LIBTOOL) --mode=install $(INSTALL_PROGRAM) xmlwf/xmlwf@EXEEXT@ $(DESTDIR)$(bindir)/xmlwf
|
||||
$(INSTALL_DATA) $(MANFILE) $(DESTDIR)$(man1dir)
|
||||
@ -116,7 +119,7 @@ CXXFLAGS = @CXXFLAGS@
|
||||
VSNFLAG = -version-info @LIBCURRENT@:@LIBREVISION@:@LIBAGE@
|
||||
|
||||
### autoconf this?
|
||||
LTFLAGS = --silent
|
||||
LTFLAGS = --verbose
|
||||
|
||||
COMPILE = $(CC) $(INCLUDES) $(CFLAGS) $(DEFS) $(CPPFLAGS)
|
||||
CXXCOMPILE = $(CXX) $(INCLUDES) $(CXXFLAGS) $(DEFS) $(CPPFLAGS)
|
||||
@ -154,11 +157,11 @@ xmlwf/xmlwf@EXEEXT@: $(XMLWF_OBJS) $(LIBRARY)
|
||||
|
||||
examples/elements.@OBJEXT@: examples/elements.c
|
||||
examples/elements: examples/elements.@OBJEXT@ $(LIBRARY)
|
||||
$(LINK_EXE) $< $(LIBRARY)
|
||||
$(LINK_EXE) examples/elements.@OBJEXT@ $(LIBRARY)
|
||||
|
||||
examples/outline.@OBJEXT@: examples/outline.c
|
||||
examples/outline: examples/outline.@OBJEXT@ $(LIBRARY)
|
||||
$(LINK_EXE) $< $(LIBRARY)
|
||||
$(LINK_EXE) examples/outline.@OBJEXT@ $(LIBRARY)
|
||||
|
||||
tests/chardata.@OBJEXT@: tests/chardata.c tests/chardata.h
|
||||
tests/minicheck.@OBJEXT@: tests/minicheck.c tests/minicheck.h
|
||||
@ -180,11 +183,19 @@ tests/xmlts.zip:
|
||||
wget --output-document=tests/xmlts.zip \
|
||||
http://www.w3.org/XML/Test/xmlts20080827.zip
|
||||
|
||||
tests/XML-Test-Suite: tests/xmlts.zip
|
||||
tests/xmlconf: tests/xmlts.zip
|
||||
cd tests && unzip -q xmlts.zip
|
||||
|
||||
run-xmltest: xmlwf/xmlwf@EXEEXT@ tests/XML-Test-Suite
|
||||
tests/xmltest.sh
|
||||
run-xmltest: xmlwf/xmlwf@EXEEXT@ tests/xmlconf
|
||||
tests/xmltest.sh 2>&1 | tee tests/xmltest.log
|
||||
diff -u tests/xmltest.log.expected tests/xmltest.log
|
||||
|
||||
.PHONY: qa
|
||||
qa:
|
||||
./qa.sh address
|
||||
./qa.sh memory
|
||||
./qa.sh undefined
|
||||
./qa.sh coverage
|
||||
|
||||
.SUFFIXES: .c .cpp .lo .@OBJEXT@
|
||||
|
||||
|
4
README
4
README
@ -1,5 +1,5 @@
|
||||
|
||||
Expat, Release 2.1.0
|
||||
Expat, Release 2.2.0
|
||||
|
||||
This is Expat, a C library for parsing XML, written by James Clark.
|
||||
Expat is a stream-oriented XML parser. This means that you register
|
||||
@ -114,7 +114,7 @@ Note for Solaris users: The "ar" command is usually located in
|
||||
"/usr/ccs/bin", which is not in the default PATH. You will need to
|
||||
add this to your path for the "make" command, and probably also switch
|
||||
to GNU make (the "make" found in /usr/ccs/bin does not seem to work
|
||||
properly -- appearantly it does not understand .PHONY directives). If
|
||||
properly -- apparently it does not understand .PHONY directives). If
|
||||
you're using ksh or bash, use this command to build:
|
||||
|
||||
PATH=/usr/ccs/bin:$PATH make
|
||||
|
@ -25,7 +25,7 @@ dnl test. I believe this test will work, but I don't have a place with non-
|
||||
dnl GNU M4 to test it right now.
|
||||
define([expat_version], ifdef([__gnu__],
|
||||
[esyscmd(conftools/get-version.sh lib/expat.h)],
|
||||
[2.1.x]))
|
||||
[2.2.x]))
|
||||
AC_INIT(expat, expat_version, expat-bugs@libexpat.org)
|
||||
undefine([expat_version])
|
||||
|
||||
@ -45,9 +45,9 @@ dnl
|
||||
dnl If the API changes incompatibly set LIBAGE back to 0
|
||||
dnl
|
||||
|
||||
LIBCURRENT=7
|
||||
LIBREVISION=0
|
||||
LIBAGE=6
|
||||
LIBCURRENT=7 # sync
|
||||
LIBREVISION=2 # with
|
||||
LIBAGE=6 # CMakeLists.txt!
|
||||
|
||||
AC_CONFIG_HEADER(expat_config.h)
|
||||
|
||||
@ -77,7 +77,9 @@ if test "$GCC" = yes ; then
|
||||
AC_TRY_LINK( , ,
|
||||
AC_MSG_RESULT(yes),
|
||||
AC_MSG_RESULT(no); CFLAGS="$OLDCFLAGS")
|
||||
if test "x$CXXFLAGS" = x ; then
|
||||
CXXFLAGS=`echo "$CFLAGS" | sed 's/ -Wmissing-prototypes -Wstrict-prototypes//'`
|
||||
fi
|
||||
fi
|
||||
|
||||
dnl Checks for header files.
|
BIN
doc/expat.png
BIN
doc/expat.png
Binary file not shown.
Before Width: | Height: | Size: 1.0 KiB After Width: | Height: | Size: 1.0 KiB |
@ -2151,8 +2151,12 @@ Helps in preventing DoS attacks based on predicting hash
|
||||
function behavior. In order to have an effect this must be called
|
||||
before parsing has started. Returns 1 if successful, 0 when called
|
||||
after <code>XML_Parse</code> or <code>XML_ParseBuffer</code>.
|
||||
<p><b>Note:</b> This call is optional, as the parser will auto-generate a new
|
||||
random salt value if no value has been set at the start of parsing.</p>
|
||||
<p><b>Note:</b>This call is optional, as the parser will auto-generate
|
||||
a new random salt value if no value has been set at the start of parsing.
|
||||
<p><b>Note:</b>One should not call <code>XML_SetHashSalt</code> with a
|
||||
hash salt value of 0, as this value is used as sentinel value to indicate
|
||||
that <code>XML_SetHashSalt</code> has <b>not</b> been called. Consequently
|
||||
such a call will have no effect, even if it returns 1.</p>
|
||||
</div>
|
||||
|
||||
<pre class="fcndec" id="XML_UseForeignDTD">
|
||||
|
214
doc/xmlwf.1
214
doc/xmlwf.1
@ -1,33 +1,40 @@
|
||||
.\" This manpage has been automatically generated by docbook2man
|
||||
.\" from a DocBook document. This tool can be found at:
|
||||
.\" <http://shell.ipoline.com/~elmert/comp/docbook2X/>
|
||||
.\" Please send any bug reports, improvements, comments, patches,
|
||||
.\" etc. to Steve Cheng <steve@ggi-project.org>.
|
||||
.TH "XMLWF" "1" "24 January 2003" "" ""
|
||||
'\" -*- coding: us-ascii -*-
|
||||
.if \n(.g .ds T< \\FC
|
||||
.if \n(.g .ds T> \\F[\n[.fam]]
|
||||
.de URL
|
||||
\\$2 \(la\\$1\(ra\\$3
|
||||
..
|
||||
.if \n(.g .mso www.tmac
|
||||
.TH XMLWF 1 "March 11, 2016" "" ""
|
||||
.SH NAME
|
||||
xmlwf \- Determines if an XML document is well-formed
|
||||
.SH SYNOPSIS
|
||||
|
||||
\fBxmlwf\fR [ \fB-s\fR] [ \fB-n\fR] [ \fB-p\fR] [ \fB-x\fR] [ \fB-e \fIencoding\fB\fR] [ \fB-w\fR] [ \fB-d \fIoutput-dir\fB\fR] [ \fB-c\fR] [ \fB-m\fR] [ \fB-r\fR] [ \fB-t\fR] [ \fB-v\fR] [ \fBfile ...\fR]
|
||||
|
||||
.SH "DESCRIPTION"
|
||||
.PP
|
||||
'nh
|
||||
.fi
|
||||
.ad l
|
||||
\fBxmlwf\fR \kx
|
||||
.if (\nx>(\n(.l/2)) .nr x (\n(.l/5)
|
||||
'in \n(.iu+\nxu
|
||||
[\fB-s\fR] [\fB-n\fR] [\fB-p\fR] [\fB-x\fR] [\fB-e \fIencoding\fB\fR] [\fB-w\fR] [\fB-d \fIoutput-dir\fB\fR] [\fB-c\fR] [\fB-m\fR] [\fB-r\fR] [\fB-t\fR] [\fB-v\fR] [file ...]
|
||||
'in \n(.iu-\nxu
|
||||
.ad b
|
||||
'hy
|
||||
.SH DESCRIPTION
|
||||
\fBxmlwf\fR uses the Expat library to
|
||||
determine if an XML document is well-formed. It is
|
||||
determine if an XML document is well-formed. It is
|
||||
non-validating.
|
||||
.PP
|
||||
If you do not specify any files on the command-line, and you
|
||||
have a recent version of \fBxmlwf\fR, the
|
||||
input file will be read from standard input.
|
||||
.SH "WELL-FORMED DOCUMENTS"
|
||||
.PP
|
||||
A well-formed document must adhere to the
|
||||
following rules:
|
||||
.TP 0.2i
|
||||
\(bu
|
||||
The file begins with an XML declaration. For instance,
|
||||
<?xml version="1.0" standalone="yes"?>.
|
||||
\fBNOTE:\fR
|
||||
The file begins with an XML declaration. For instance,
|
||||
\*(T<<?xml version="1.0" standalone="yes"?>\*(T>.
|
||||
\fINOTE:\fR
|
||||
\fBxmlwf\fR does not currently
|
||||
check for a valid XML declaration.
|
||||
.TP 0.2i
|
||||
@ -36,8 +43,8 @@ Every start tag is either empty (<tag/>)
|
||||
or has a corresponding end tag.
|
||||
.TP 0.2i
|
||||
\(bu
|
||||
There is exactly one root element. This element must contain
|
||||
all other elements in the document. Only comments, white
|
||||
There is exactly one root element. This element must contain
|
||||
all other elements in the document. Only comments, white
|
||||
space, and processing instructions may come after the close
|
||||
of the root element.
|
||||
.TP 0.2i
|
||||
@ -49,39 +56,38 @@ All attribute values are enclosed in quotes (either single
|
||||
or double).
|
||||
.PP
|
||||
If the document has a DTD, and it strictly complies with that
|
||||
DTD, then the document is also considered \fBvalid\fR.
|
||||
DTD, then the document is also considered \fIvalid\fR.
|
||||
\fBxmlwf\fR is a non-validating parser --
|
||||
it does not check the DTD. However, it does support
|
||||
external entities (see the \fB-x\fR option).
|
||||
.SH "OPTIONS"
|
||||
.PP
|
||||
it does not check the DTD. However, it does support
|
||||
external entities (see the \*(T<\fB\-x\fR\*(T> option).
|
||||
.SH OPTIONS
|
||||
When an option includes an argument, you may specify the argument either
|
||||
separately ("\fB-d\fR output") or concatenated with the
|
||||
option ("\fB-d\fRoutput"). \fBxmlwf\fR
|
||||
separately ("\*(T<\fB\-d\fR\*(T> output") or concatenated with the
|
||||
option ("\*(T<\fB\-d\fR\*(T>output"). \fBxmlwf\fR
|
||||
supports both.
|
||||
.TP
|
||||
\fB-c\fR
|
||||
.TP
|
||||
\*(T<\fB\-c\fR\*(T>
|
||||
If the input file is well-formed and \fBxmlwf\fR
|
||||
doesn't encounter any errors, the input file is simply copied to
|
||||
the output directory unchanged.
|
||||
This implies no namespaces (turns off \fB-n\fR) and
|
||||
requires \fB-d\fR to specify an output file.
|
||||
.TP
|
||||
\fB-d output-dir\fR
|
||||
This implies no namespaces (turns off \*(T<\fB\-n\fR\*(T>) and
|
||||
requires \*(T<\fB\-d\fR\*(T> to specify an output file.
|
||||
.TP
|
||||
\*(T<\fB\-d output\-dir\fR\*(T>
|
||||
Specifies a directory to contain transformed
|
||||
representations of the input files.
|
||||
By default, \fB-d\fR outputs a canonical representation
|
||||
By default, \*(T<\fB\-d\fR\*(T> outputs a canonical representation
|
||||
(described below).
|
||||
You can select different output formats using \fB-c\fR
|
||||
and \fB-m\fR.
|
||||
You can select different output formats using \*(T<\fB\-c\fR\*(T>
|
||||
and \*(T<\fB\-m\fR\*(T>.
|
||||
|
||||
The output filenames will
|
||||
be exactly the same as the input filenames or "STDIN" if the input is
|
||||
coming from standard input. Therefore, you must be careful that the
|
||||
coming from standard input. Therefore, you must be careful that the
|
||||
output file does not go into the same directory as the input
|
||||
file. Otherwise, \fBxmlwf\fR will delete the
|
||||
file. Otherwise, \fBxmlwf\fR will delete the
|
||||
input file before it generates the output file (just like running
|
||||
cat < file > file in most shells).
|
||||
\*(T<cat < file > file\*(T> in most shells).
|
||||
|
||||
Two structurally equivalent XML documents have a byte-for-byte
|
||||
identical canonical XML representation.
|
||||
@ -89,39 +95,39 @@ Note that ignorable white space is considered significant and
|
||||
is treated equivalently to data.
|
||||
More on canonical XML can be found at
|
||||
http://www.jclark.com/xml/canonxml.html .
|
||||
.TP
|
||||
\fB-e encoding\fR
|
||||
.TP
|
||||
\*(T<\fB\-e encoding\fR\*(T>
|
||||
Specifies the character encoding for the document, overriding
|
||||
any document encoding declaration. \fBxmlwf\fR
|
||||
any document encoding declaration. \fBxmlwf\fR
|
||||
supports four built-in encodings:
|
||||
US-ASCII,
|
||||
UTF-8,
|
||||
UTF-16, and
|
||||
ISO-8859-1.
|
||||
Also see the \fB-w\fR option.
|
||||
.TP
|
||||
\fB-m\fR
|
||||
\*(T<US\-ASCII\*(T>,
|
||||
\*(T<UTF\-8\*(T>,
|
||||
\*(T<UTF\-16\*(T>, and
|
||||
\*(T<ISO\-8859\-1\*(T>.
|
||||
Also see the \*(T<\fB\-w\fR\*(T> option.
|
||||
.TP
|
||||
\*(T<\fB\-m\fR\*(T>
|
||||
Outputs some strange sort of XML file that completely
|
||||
describes the input file, including character positions.
|
||||
Requires \fB-d\fR to specify an output file.
|
||||
.TP
|
||||
\fB-n\fR
|
||||
Turns on namespace processing. (describe namespaces)
|
||||
\fB-c\fR disables namespaces.
|
||||
.TP
|
||||
\fB-p\fR
|
||||
Requires \*(T<\fB\-d\fR\*(T> to specify an output file.
|
||||
.TP
|
||||
\*(T<\fB\-n\fR\*(T>
|
||||
Turns on namespace processing. (describe namespaces)
|
||||
\*(T<\fB\-c\fR\*(T> disables namespaces.
|
||||
.TP
|
||||
\*(T<\fB\-p\fR\*(T>
|
||||
Tells xmlwf to process external DTDs and parameter
|
||||
entities.
|
||||
|
||||
Normally \fBxmlwf\fR never parses parameter
|
||||
entities. \fB-p\fR tells it to always parse them.
|
||||
\fB-p\fR implies \fB-x\fR.
|
||||
.TP
|
||||
\fB-r\fR
|
||||
entities. \*(T<\fB\-p\fR\*(T> tells it to always parse them.
|
||||
\*(T<\fB\-p\fR\*(T> implies \*(T<\fB\-x\fR\*(T>.
|
||||
.TP
|
||||
\*(T<\fB\-r\fR\*(T>
|
||||
Normally \fBxmlwf\fR memory-maps the XML file
|
||||
before parsing; this can result in faster parsing on many
|
||||
platforms.
|
||||
\fB-r\fR turns off memory-mapping and uses normal file
|
||||
\*(T<\fB\-r\fR\*(T> turns off memory-mapping and uses normal file
|
||||
IO calls instead.
|
||||
Of course, memory-mapping is automatically turned off
|
||||
when reading from standard input.
|
||||
@ -131,34 +137,33 @@ substantially higher memory usage for
|
||||
\fBxmlwf\fR, but this appears to be a matter of
|
||||
the operating system reporting memory in a strange way; there is
|
||||
not a leak in \fBxmlwf\fR.
|
||||
.TP
|
||||
\fB-s\fR
|
||||
.TP
|
||||
\*(T<\fB\-s\fR\*(T>
|
||||
Prints an error if the document is not standalone.
|
||||
A document is standalone if it has no external subset and no
|
||||
references to parameter entities.
|
||||
.TP
|
||||
\fB-t\fR
|
||||
Turns on timings. This tells Expat to parse the entire file,
|
||||
.TP
|
||||
\*(T<\fB\-t\fR\*(T>
|
||||
Turns on timings. This tells Expat to parse the entire file,
|
||||
but not perform any processing.
|
||||
This gives a fairly accurate idea of the raw speed of Expat itself
|
||||
without client overhead.
|
||||
\fB-t\fR turns off most of the output options
|
||||
(\fB-d\fR, \fB-m\fR, \fB-c\fR,
|
||||
\&...).
|
||||
.TP
|
||||
\fB-v\fR
|
||||
\*(T<\fB\-t\fR\*(T> turns off most of the output options
|
||||
(\*(T<\fB\-d\fR\*(T>, \*(T<\fB\-m\fR\*(T>, \*(T<\fB\-c\fR\*(T>, ...).
|
||||
.TP
|
||||
\*(T<\fB\-v\fR\*(T>
|
||||
Prints the version of the Expat library being used, including some
|
||||
information on the compile-time configuration of the library, and
|
||||
then exits.
|
||||
.TP
|
||||
\fB-w\fR
|
||||
.TP
|
||||
\*(T<\fB\-w\fR\*(T>
|
||||
Enables support for Windows code pages.
|
||||
Normally, \fBxmlwf\fR will throw an error if it
|
||||
runs across an encoding that it is not equipped to handle itself. With
|
||||
\fB-w\fR, xmlwf will try to use a Windows code
|
||||
page. See also \fB-e\fR.
|
||||
.TP
|
||||
\fB-x\fR
|
||||
runs across an encoding that it is not equipped to handle itself. With
|
||||
\*(T<\fB\-w\fR\*(T>, xmlwf will try to use a Windows code
|
||||
page. See also \*(T<\fB\-e\fR\*(T>.
|
||||
.TP
|
||||
\*(T<\fB\-x\fR\*(T>
|
||||
Turns on parsing external entities.
|
||||
|
||||
Non-validating parsers are not required to resolve external
|
||||
@ -172,80 +177,75 @@ data from outside the XML file currently being parsed.
|
||||
This is an example of an internal entity:
|
||||
|
||||
.nf
|
||||
|
||||
<!ENTITY vers '1.0.2'>
|
||||
.fi
|
||||
|
||||
And here are some examples of external entities:
|
||||
|
||||
.nf
|
||||
<!ENTITY header SYSTEM "header-&vers;.xml"> (parsed)
|
||||
|
||||
<!ENTITY header SYSTEM "header\-&vers;.xml"> (parsed)
|
||||
<!ENTITY logo SYSTEM "logo.png" PNG> (unparsed)
|
||||
.fi
|
||||
.TP
|
||||
\fB--\fR
|
||||
.TP
|
||||
\*(T<\fB\-\-\fR\*(T>
|
||||
(Two hyphens.)
|
||||
Terminates the list of options. This is only needed if a filename
|
||||
starts with a hyphen. For example:
|
||||
Terminates the list of options. This is only needed if a filename
|
||||
starts with a hyphen. For example:
|
||||
|
||||
.nf
|
||||
xmlwf -- -myfile.xml
|
||||
|
||||
xmlwf \-\- \-myfile.xml
|
||||
.fi
|
||||
|
||||
will run \fBxmlwf\fR on the file
|
||||
\fI-myfile.xml\fR.
|
||||
\*(T<\fI\-myfile.xml\fR\*(T>.
|
||||
.PP
|
||||
Older versions of \fBxmlwf\fR do not support
|
||||
reading from standard input.
|
||||
.SH "OUTPUT"
|
||||
.PP
|
||||
.SH OUTPUT
|
||||
If an input file is not well-formed,
|
||||
\fBxmlwf\fR prints a single line describing
|
||||
the problem to standard output. If a file is well formed,
|
||||
the problem to standard output. If a file is well formed,
|
||||
\fBxmlwf\fR outputs nothing.
|
||||
Note that the result code is \fBnot\fR set.
|
||||
.SH "BUGS"
|
||||
.PP
|
||||
According to the W3C standard, an XML file without a
|
||||
declaration at the beginning is not considered well-formed.
|
||||
However, \fBxmlwf\fR allows this to pass.
|
||||
.PP
|
||||
Note that the result code is \fInot\fR set.
|
||||
.SH BUGS
|
||||
\fBxmlwf\fR returns a 0 - noerr result,
|
||||
even if the file is not well-formed. There is no good way for
|
||||
even if the file is not well-formed. There is no good way for
|
||||
a program to use \fBxmlwf\fR to quickly
|
||||
check a file -- it must parse \fBxmlwf\fR's
|
||||
standard output.
|
||||
.PP
|
||||
The errors should go to standard error, not standard output.
|
||||
.PP
|
||||
There should be a way to get \fB-d\fR to send its
|
||||
There should be a way to get \*(T<\fB\-d\fR\*(T> to send its
|
||||
output to standard output rather than forcing the user to send
|
||||
it to a file.
|
||||
.PP
|
||||
I have no idea why anyone would want to use the
|
||||
\fB-d\fR, \fB-c\fR, and
|
||||
\fB-m\fR options. If someone could explain it to
|
||||
\*(T<\fB\-d\fR\*(T>, \*(T<\fB\-c\fR\*(T>, and
|
||||
\*(T<\fB\-m\fR\*(T> options. If someone could explain it to
|
||||
me, I'd like to add this information to this manpage.
|
||||
.SH "ALTERNATIVES"
|
||||
.PP
|
||||
.SH ALTERNATIVES
|
||||
Here are some XML validators on the web:
|
||||
|
||||
.nf
|
||||
http://www.hcrc.ed.ac.uk/~richard/xml-check.html
|
||||
|
||||
http://www.hcrc.ed.ac.uk/~richard/xml\-check.html
|
||||
http://www.stg.brown.edu/service/xmlvalid/
|
||||
http://www.scripting.com/frontier5/xml/code/xmlValidator.html
|
||||
http://www.xml.com/pub/a/tools/ruwf/check.html
|
||||
.fi
|
||||
.SH "SEE ALSO"
|
||||
.PP
|
||||
|
||||
.nf
|
||||
|
||||
The Expat home page: http://www.libexpat.org/
|
||||
The W3 XML specification: http://www.w3.org/TR/REC-xml
|
||||
The W3 XML specification: http://www.w3.org/TR/REC\-xml
|
||||
.fi
|
||||
.SH "AUTHOR"
|
||||
.PP
|
||||
This manual page was written by Scott Bronson <bronson@rinspin.com> for
|
||||
the Debian GNU/Linux system (but may be used by others). Permission is
|
||||
.SH AUTHOR
|
||||
This manual page was written by Scott Bronson <\*(T<bronson@rinspin.com\*(T>> for
|
||||
the Debian GNU/Linux system (but may be used by others). Permission is
|
||||
granted to copy, distribute and/or modify this document under
|
||||
the terms of the GNU Free Documentation
|
||||
License, Version 1.1.
|
||||
|
@ -1,19 +1,9 @@
|
||||
<!doctype refentry PUBLIC "-//OASIS//DTD DocBook V4.1//EN" [
|
||||
|
||||
<!-- Process this file with docbook-to-man to generate an nroff manual
|
||||
page: `docbook-to-man manpage.sgml > manpage.1'. You may view
|
||||
the manual page with: `docbook-to-man manpage.sgml | nroff -man |
|
||||
less'. A typical entry in a Makefile or Makefile.am is:
|
||||
|
||||
manpage.1: manpage.sgml
|
||||
docbook-to-man $< > $@
|
||||
-->
|
||||
|
||||
<!DOCTYPE refentry [
|
||||
<!-- Fill in your name for FIRSTNAME and SURNAME. -->
|
||||
<!ENTITY dhfirstname "<firstname>Scott</firstname>">
|
||||
<!ENTITY dhsurname "<surname>Bronson</surname>">
|
||||
<!-- Please adjust the date whenever revising the manpage. -->
|
||||
<!ENTITY dhdate "<date>December 5, 2001</date>">
|
||||
<!ENTITY dhdate "<date>March 11, 2016</date>">
|
||||
<!-- SECTION should be 1-8, maybe w/ subsection other parameters are
|
||||
allowed: see man(7), man(1). -->
|
||||
<!ENTITY dhsection "<manvolnum>1</manvolnum>">
|
||||
@ -213,7 +203,7 @@ supports both.
|
||||
<listitem>
|
||||
<para>
|
||||
Outputs some strange sort of XML file that completely
|
||||
describes the the input file, including character postitions.
|
||||
describes the input file, including character positions.
|
||||
Requires <option>-d</option> to specify an output file.
|
||||
</para>
|
||||
</listitem>
|
||||
@ -286,8 +276,7 @@ supports both.
|
||||
This gives a fairly accurate idea of the raw speed of Expat itself
|
||||
without client overhead.
|
||||
<option>-t</option> turns off most of the output options
|
||||
(<option>-d</option>, <option>-m</option>, <option>-c</option>,
|
||||
...).
|
||||
(<option>-d</option>, <option>-m</option>, <option>-c</option>, ...).
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
@ -449,20 +438,3 @@ The W3 XML specification: http://www.w3.org/TR/REC-xml
|
||||
</para>
|
||||
</refsect1>
|
||||
</refentry>
|
||||
|
||||
<!-- Keep this comment at the end of the file
|
||||
Local variables:
|
||||
mode: sgml
|
||||
sgml-omittag:t
|
||||
sgml-shorttag:t
|
||||
sgml-minimize-attributes:nil
|
||||
sgml-always-quote-attributes:t
|
||||
sgml-indent-step:2
|
||||
sgml-indent-data:t
|
||||
sgml-parent-document:nil
|
||||
sgml-default-dtd-file:nil
|
||||
sgml-exposed-tags:nil
|
||||
sgml-local-catalogs:nil
|
||||
sgml-local-ecat-files:nil
|
||||
End:
|
||||
-->
|
@ -27,6 +27,8 @@ startElement(void *userData, const char *name, const char **atts)
|
||||
{
|
||||
int i;
|
||||
int *depthPtr = (int *)userData;
|
||||
(void)atts;
|
||||
|
||||
for (i = 0; i < *depthPtr; i++)
|
||||
putchar('\t');
|
||||
puts(name);
|
||||
@ -37,6 +39,8 @@ static void XMLCALL
|
||||
endElement(void *userData, const char *name)
|
||||
{
|
||||
int *depthPtr = (int *)userData;
|
||||
(void)name;
|
||||
|
||||
*depthPtr -= 1;
|
||||
}
|
||||
|
||||
@ -47,10 +51,13 @@ main(int argc, char *argv[])
|
||||
XML_Parser parser = XML_ParserCreate(NULL);
|
||||
int done;
|
||||
int depth = 0;
|
||||
(void)argc;
|
||||
(void)argv;
|
||||
|
||||
XML_SetUserData(parser, &depth);
|
||||
XML_SetElementHandler(parser, startElement, endElement);
|
||||
do {
|
||||
int len = (int)fread(buf, 1, sizeof(buf), stdin);
|
||||
size_t len = fread(buf, 1, sizeof(buf), stdin);
|
||||
done = len < sizeof(buf);
|
||||
if (XML_Parse(parser, buf, len, done) == XML_STATUS_ERROR) {
|
||||
fprintf(stderr,
|
||||
|
@ -49,6 +49,7 @@ static void XMLCALL
|
||||
start(void *data, const char *el, const char **attr)
|
||||
{
|
||||
int i;
|
||||
(void)data;
|
||||
|
||||
for (i = 0; i < Depth; i++)
|
||||
printf(" ");
|
||||
@ -66,6 +67,9 @@ start(void *data, const char *el, const char **attr)
|
||||
static void XMLCALL
|
||||
end(void *data, const char *el)
|
||||
{
|
||||
(void)data;
|
||||
(void)el;
|
||||
|
||||
Depth--;
|
||||
}
|
||||
|
||||
@ -73,6 +77,9 @@ int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
XML_Parser p = XML_ParserCreate(NULL);
|
||||
(void)argc;
|
||||
(void)argv;
|
||||
|
||||
if (! p) {
|
||||
fprintf(stderr, "Couldn't allocate memory for parser\n");
|
||||
exit(-1);
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* expat_config.h.in. Generated from configure.in by autoheader. */
|
||||
/* expat_config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
/* 1234 = LIL_ENDIAN, 4321 = BIGENDIAN */
|
||||
#undef BYTEORDER
|
||||
@ -51,8 +51,7 @@
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
#undef HAVE_UNISTD_H
|
||||
|
||||
/* Define to the sub-directory in which libtool stores uninstalled libraries.
|
||||
*/
|
||||
/* Define to the sub-directory where libtool stores uninstalled libraries. */
|
||||
#undef LT_OBJDIR
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
|
13
lib/expat.h
13
lib/expat.h
@ -342,7 +342,7 @@ XML_SetEntityDeclHandler(XML_Parser parser,
|
||||
XML_EntityDeclHandler handler);
|
||||
|
||||
/* OBSOLETE -- OBSOLETE -- OBSOLETE
|
||||
This handler has been superceded by the EntityDeclHandler above.
|
||||
This handler has been superseded by the EntityDeclHandler above.
|
||||
It is provided here for backward compatibility.
|
||||
|
||||
This is called for a declaration of an unparsed (NDATA) entity.
|
||||
@ -973,9 +973,12 @@ XML_FreeContentModel(XML_Parser parser, XML_Content *model);
|
||||
|
||||
/* Exposing the memory handling functions used in Expat */
|
||||
XMLPARSEAPI(void *)
|
||||
XML_ATTR_MALLOC
|
||||
XML_ATTR_ALLOC_SIZE(2)
|
||||
XML_MemMalloc(XML_Parser parser, size_t size);
|
||||
|
||||
XMLPARSEAPI(void *)
|
||||
XML_ATTR_ALLOC_SIZE(3)
|
||||
XML_MemRealloc(XML_Parser parser, void *ptr, size_t size);
|
||||
|
||||
XMLPARSEAPI(void)
|
||||
@ -1031,13 +1034,11 @@ XMLPARSEAPI(const XML_Feature *)
|
||||
XML_GetFeatureList(void);
|
||||
|
||||
|
||||
/* Expat follows the GNU/Linux convention of odd number minor version for
|
||||
beta/development releases and even number minor version for stable
|
||||
releases. Micro is bumped with each release, and set to 0 with each
|
||||
change to major or minor version.
|
||||
/* Expat follows the semantic versioning convention.
|
||||
See http://semver.org.
|
||||
*/
|
||||
#define XML_MAJOR_VERSION 2
|
||||
#define XML_MINOR_VERSION 1
|
||||
#define XML_MINOR_VERSION 2
|
||||
#define XML_MICRO_VERSION 0
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -65,12 +65,26 @@
|
||||
#endif
|
||||
#endif /* not defined XML_STATIC */
|
||||
|
||||
#if !defined(XMLIMPORT) && defined(__GNUC__) && (__GNUC__ >= 4)
|
||||
#define XMLIMPORT __attribute__ ((visibility ("default")))
|
||||
#endif
|
||||
|
||||
/* If we didn't define it above, define it away: */
|
||||
#ifndef XMLIMPORT
|
||||
#define XMLIMPORT
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96))
|
||||
#define XML_ATTR_MALLOC __attribute__((__malloc__))
|
||||
#else
|
||||
#define XML_ATTR_MALLOC
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
|
||||
#define XML_ATTR_ALLOC_SIZE(x) __attribute__((__alloc_size__(x)))
|
||||
#else
|
||||
#define XML_ATTR_ALLOC_SIZE(x)
|
||||
#endif
|
||||
|
||||
#define XMLPARSEAPI(type) XMLIMPORT type XMLCALL
|
||||
|
||||
|
@ -71,3 +71,25 @@
|
||||
#define inline
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef UNUSED_P
|
||||
# ifdef __GNUC__
|
||||
# define UNUSED_P(p) UNUSED_ ## p __attribute__((__unused__))
|
||||
# else
|
||||
# define UNUSED_P(p) UNUSED_ ## p
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
void
|
||||
align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
127
lib/xmlparse.c
127
lib/xmlparse.c
@ -6,11 +6,18 @@
|
||||
#include <string.h> /* memset(), memcpy() */
|
||||
#include <assert.h>
|
||||
#include <limits.h> /* UINT_MAX */
|
||||
#include <time.h> /* time() */
|
||||
|
||||
#ifdef WIN32
|
||||
#define getpid GetCurrentProcessId
|
||||
#else
|
||||
#include <sys/time.h> /* gettimeofday() */
|
||||
#include <sys/types.h> /* getpid() */
|
||||
#include <unistd.h> /* getpid() */
|
||||
#endif
|
||||
|
||||
#define XML_BUILDING_EXPAT 1
|
||||
|
||||
#ifdef COMPILED_FROM_DSP
|
||||
#ifdef WIN32
|
||||
#include "winconfig.h"
|
||||
#elif defined(MACOS_CLASSIC)
|
||||
#include "macconfig.h"
|
||||
@ -20,7 +27,7 @@
|
||||
#include "watcomconfig.h"
|
||||
#elif defined(HAVE_EXPAT_CONFIG_H)
|
||||
#include <expat_config.h>
|
||||
#endif /* ndef COMPILED_FROM_DSP */
|
||||
#endif /* ndef WIN32 */
|
||||
|
||||
#include "ascii.h"
|
||||
#include "expat.h"
|
||||
@ -432,7 +439,7 @@ static ELEMENT_TYPE *
|
||||
getElementType(XML_Parser parser, const ENCODING *enc,
|
||||
const char *ptr, const char *end);
|
||||
|
||||
static unsigned long generate_hash_secret_salt(void);
|
||||
static unsigned long generate_hash_secret_salt(XML_Parser parser);
|
||||
static XML_Bool startParsing(XML_Parser parser);
|
||||
|
||||
static XML_Parser
|
||||
@ -691,11 +698,38 @@ static const XML_Char implicitContext[] = {
|
||||
};
|
||||
|
||||
static unsigned long
|
||||
generate_hash_secret_salt(void)
|
||||
gather_time_entropy(void)
|
||||
{
|
||||
unsigned int seed = time(NULL) % UINT_MAX;
|
||||
srand(seed);
|
||||
return rand();
|
||||
#ifdef WIN32
|
||||
FILETIME ft;
|
||||
GetSystemTimeAsFileTime(&ft); /* never fails */
|
||||
return ft.dwHighDateTime ^ ft.dwLowDateTime;
|
||||
#else
|
||||
struct timeval tv;
|
||||
int gettimeofday_res;
|
||||
|
||||
gettimeofday_res = gettimeofday(&tv, NULL);
|
||||
assert (gettimeofday_res == 0);
|
||||
|
||||
/* Microseconds time is <20 bits entropy */
|
||||
return tv.tv_usec;
|
||||
#endif
|
||||
}
|
||||
|
||||
static unsigned long
|
||||
generate_hash_secret_salt(XML_Parser parser)
|
||||
{
|
||||
/* Process ID is 0 bits entropy if attacker has local access
|
||||
* XML_Parser address is few bits of entropy if attacker has local access */
|
||||
const unsigned long entropy =
|
||||
gather_time_entropy() ^ getpid() ^ (unsigned long)parser;
|
||||
|
||||
/* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
|
||||
if (sizeof(unsigned long) == 4) {
|
||||
return entropy * 2147483647;
|
||||
} else {
|
||||
return entropy * (unsigned long)2305843009213693951;
|
||||
}
|
||||
}
|
||||
|
||||
static XML_Bool /* only valid for root parser */
|
||||
@ -703,7 +737,7 @@ startParsing(XML_Parser parser)
|
||||
{
|
||||
/* hash functions must be initialized before setContext() is called */
|
||||
if (hash_secret_salt == 0)
|
||||
hash_secret_salt = generate_hash_secret_salt();
|
||||
hash_secret_salt = generate_hash_secret_salt(parser);
|
||||
if (ns) {
|
||||
/* implicit context only set for root parser, since child
|
||||
parsers (i.e. external entity parsers) will inherit it
|
||||
@ -1550,7 +1584,7 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
|
||||
else if (bufferPtr == bufferEnd) {
|
||||
const char *end;
|
||||
int nLeftOver;
|
||||
enum XML_Error result;
|
||||
enum XML_Status result;
|
||||
parseEndByteIndex += len;
|
||||
positionPtr = s;
|
||||
ps_finalBuffer = (XML_Bool)isFinal;
|
||||
@ -1678,6 +1712,10 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal)
|
||||
void * XMLCALL
|
||||
XML_GetBuffer(XML_Parser parser, int len)
|
||||
{
|
||||
if (len < 0) {
|
||||
errorCode = XML_ERROR_NO_MEMORY;
|
||||
return NULL;
|
||||
}
|
||||
switch (ps_parsing) {
|
||||
case XML_SUSPENDED:
|
||||
errorCode = XML_ERROR_SUSPENDED;
|
||||
@ -1689,11 +1727,17 @@ XML_GetBuffer(XML_Parser parser, int len)
|
||||
}
|
||||
|
||||
if (len > bufferLim - bufferEnd) {
|
||||
/* FIXME avoid integer overflow */
|
||||
int neededSize = len + (int)(bufferEnd - bufferPtr);
|
||||
#ifdef XML_CONTEXT_BYTES
|
||||
int keep = (int)(bufferPtr - buffer);
|
||||
|
||||
int keep;
|
||||
#endif /* defined XML_CONTEXT_BYTES */
|
||||
/* Do not invoke signed arithmetic overflow: */
|
||||
int neededSize = (int) ((unsigned)len + (unsigned)(bufferEnd - bufferPtr));
|
||||
if (neededSize < 0) {
|
||||
errorCode = XML_ERROR_NO_MEMORY;
|
||||
return NULL;
|
||||
}
|
||||
#ifdef XML_CONTEXT_BYTES
|
||||
keep = (int)(bufferPtr - buffer);
|
||||
if (keep > XML_CONTEXT_BYTES)
|
||||
keep = XML_CONTEXT_BYTES;
|
||||
neededSize += keep;
|
||||
@ -1718,8 +1762,13 @@ XML_GetBuffer(XML_Parser parser, int len)
|
||||
if (bufferSize == 0)
|
||||
bufferSize = INIT_BUFFER_SIZE;
|
||||
do {
|
||||
bufferSize *= 2;
|
||||
} while (bufferSize < neededSize);
|
||||
/* Do not invoke signed arithmetic overflow: */
|
||||
bufferSize = (int) (2U * (unsigned) bufferSize);
|
||||
} while (bufferSize < neededSize && bufferSize > 0);
|
||||
if (bufferSize <= 0) {
|
||||
errorCode = XML_ERROR_NO_MEMORY;
|
||||
return NULL;
|
||||
}
|
||||
newBuf = (char *)MALLOC(bufferSize);
|
||||
if (newBuf == 0) {
|
||||
errorCode = XML_ERROR_NO_MEMORY;
|
||||
@ -1841,7 +1890,7 @@ XML_Index XMLCALL
|
||||
XML_GetCurrentByteIndex(XML_Parser parser)
|
||||
{
|
||||
if (eventPtr)
|
||||
return parseEndByteIndex - (parseEndPtr - eventPtr);
|
||||
return (XML_Index)(parseEndByteIndex - (parseEndPtr - eventPtr));
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -2415,11 +2464,11 @@ doContent(XML_Parser parser,
|
||||
for (;;) {
|
||||
int bufSize;
|
||||
int convLen;
|
||||
XmlConvert(enc,
|
||||
const enum XML_Convert_Result convert_res = XmlConvert(enc,
|
||||
&fromPtr, rawNameEnd,
|
||||
(ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1);
|
||||
convLen = (int)(toPtr - (XML_Char *)tag->buf);
|
||||
if (fromPtr == rawNameEnd) {
|
||||
if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
|
||||
tag->name.strLen = convLen;
|
||||
break;
|
||||
}
|
||||
@ -2640,11 +2689,11 @@ doContent(XML_Parser parser,
|
||||
if (MUST_CONVERT(enc, s)) {
|
||||
for (;;) {
|
||||
ICHAR *dataPtr = (ICHAR *)dataBuf;
|
||||
XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
|
||||
const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
|
||||
*eventEndPP = s;
|
||||
charDataHandler(handlerArg, dataBuf,
|
||||
(int)(dataPtr - (ICHAR *)dataBuf));
|
||||
if (s == next)
|
||||
if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
|
||||
break;
|
||||
*eventPP = s;
|
||||
}
|
||||
@ -2911,6 +2960,8 @@ storeAtts(XML_Parser parser, const ENCODING *enc,
|
||||
unsigned long uriHash = hash_secret_salt;
|
||||
((XML_Char *)s)[-1] = 0; /* clear flag */
|
||||
id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
|
||||
if (!id || !id->prefix)
|
||||
return XML_ERROR_NO_MEMORY;
|
||||
b = id->prefix->binding;
|
||||
if (!b)
|
||||
return XML_ERROR_UNBOUND_PREFIX;
|
||||
@ -3248,11 +3299,11 @@ doCdataSection(XML_Parser parser,
|
||||
if (MUST_CONVERT(enc, s)) {
|
||||
for (;;) {
|
||||
ICHAR *dataPtr = (ICHAR *)dataBuf;
|
||||
XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
|
||||
const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
|
||||
*eventEndPP = next;
|
||||
charDataHandler(handlerArg, dataBuf,
|
||||
(int)(dataPtr - (ICHAR *)dataBuf));
|
||||
if (s == next)
|
||||
if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
|
||||
break;
|
||||
*eventPP = s;
|
||||
}
|
||||
@ -4911,9 +4962,9 @@ internalEntityProcessor(XML_Parser parser,
|
||||
|
||||
static enum XML_Error PTRCALL
|
||||
errorProcessor(XML_Parser parser,
|
||||
const char *s,
|
||||
const char *end,
|
||||
const char **nextPtr)
|
||||
const char *UNUSED_P(s),
|
||||
const char *UNUSED_P(end),
|
||||
const char **UNUSED_P(nextPtr))
|
||||
{
|
||||
return errorCode;
|
||||
}
|
||||
@ -5329,6 +5380,7 @@ reportDefault(XML_Parser parser, const ENCODING *enc,
|
||||
const char *s, const char *end)
|
||||
{
|
||||
if (MUST_CONVERT(enc, s)) {
|
||||
enum XML_Convert_Result convert_res;
|
||||
const char **eventPP;
|
||||
const char **eventEndPP;
|
||||
if (enc == encoding) {
|
||||
@ -5341,11 +5393,11 @@ reportDefault(XML_Parser parser, const ENCODING *enc,
|
||||
}
|
||||
do {
|
||||
ICHAR *dataPtr = (ICHAR *)dataBuf;
|
||||
XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
|
||||
convert_res = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
|
||||
*eventEndPP = s;
|
||||
defaultHandler(handlerArg, dataBuf, (int)(dataPtr - (ICHAR *)dataBuf));
|
||||
*eventPP = s;
|
||||
} while (s != end);
|
||||
} while ((convert_res != XML_CONVERT_COMPLETED) && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
|
||||
}
|
||||
else
|
||||
defaultHandler(handlerArg, (XML_Char *)s, (int)((XML_Char *)end - (XML_Char *)s));
|
||||
@ -5475,6 +5527,8 @@ getAttributeId(XML_Parser parser, const ENCODING *enc,
|
||||
return NULL;
|
||||
id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
|
||||
sizeof(PREFIX));
|
||||
if (!id->prefix)
|
||||
return NULL;
|
||||
if (id->prefix->name == poolStart(&dtd->pool))
|
||||
poolFinish(&dtd->pool);
|
||||
else
|
||||
@ -6148,8 +6202,8 @@ poolAppend(STRING_POOL *pool, const ENCODING *enc,
|
||||
if (!pool->ptr && !poolGrow(pool))
|
||||
return NULL;
|
||||
for (;;) {
|
||||
XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
|
||||
if (ptr == end)
|
||||
const enum XML_Convert_Result convert_res = XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
|
||||
if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
|
||||
break;
|
||||
if (!poolGrow(pool))
|
||||
return NULL;
|
||||
@ -6233,8 +6287,13 @@ poolGrow(STRING_POOL *pool)
|
||||
}
|
||||
}
|
||||
if (pool->blocks && pool->start == pool->blocks->s) {
|
||||
int blockSize = (int)(pool->end - pool->start)*2;
|
||||
BLOCK *temp = (BLOCK *)
|
||||
BLOCK *temp;
|
||||
int blockSize = (int)((unsigned)(pool->end - pool->start)*2U);
|
||||
|
||||
if (blockSize < 0)
|
||||
return XML_FALSE;
|
||||
|
||||
temp = (BLOCK *)
|
||||
pool->mem->realloc_fcn(pool->blocks,
|
||||
(offsetof(BLOCK, s)
|
||||
+ blockSize * sizeof(XML_Char)));
|
||||
@ -6249,6 +6308,10 @@ poolGrow(STRING_POOL *pool)
|
||||
else {
|
||||
BLOCK *tem;
|
||||
int blockSize = (int)(pool->end - pool->start);
|
||||
|
||||
if (blockSize < 0)
|
||||
return XML_FALSE;
|
||||
|
||||
if (blockSize < INIT_BLOCK_SIZE)
|
||||
blockSize = INIT_BLOCK_SIZE;
|
||||
else
|
||||
|
224
lib/xmlrole.c
224
lib/xmlrole.c
@ -4,7 +4,7 @@
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef COMPILED_FROM_DSP
|
||||
#ifdef WIN32
|
||||
#include "winconfig.h"
|
||||
#elif defined(MACOS_CLASSIC)
|
||||
#include "macconfig.h"
|
||||
@ -16,7 +16,7 @@
|
||||
#ifdef HAVE_EXPAT_CONFIG_H
|
||||
#include <expat_config.h>
|
||||
#endif
|
||||
#endif /* ndef COMPILED_FROM_DSP */
|
||||
#endif /* ndef WIN32 */
|
||||
|
||||
#include "expat_external.h"
|
||||
#include "internal.h"
|
||||
@ -195,9 +195,9 @@ prolog1(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
prolog2(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -216,9 +216,9 @@ prolog2(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
doctype0(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -264,9 +264,9 @@ doctype1(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
doctype2(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -281,9 +281,9 @@ doctype2(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
doctype3(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -298,9 +298,9 @@ doctype3(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
doctype4(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -318,9 +318,9 @@ doctype4(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
doctype5(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -437,9 +437,9 @@ externalSubset1(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
entity0(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -457,9 +457,9 @@ entity0(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
entity1(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -502,9 +502,9 @@ entity2(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
entity3(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -519,9 +519,9 @@ entity3(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
entity4(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -559,9 +559,9 @@ entity5(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
entity6(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -605,9 +605,9 @@ entity7(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
entity8(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -622,9 +622,9 @@ entity8(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
entity9(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -639,9 +639,9 @@ entity9(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
entity10(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -656,9 +656,9 @@ entity10(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
notation0(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -697,9 +697,9 @@ notation1(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
notation2(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -714,9 +714,9 @@ notation2(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
notation3(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -732,9 +732,9 @@ notation3(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
notation4(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -753,9 +753,9 @@ notation4(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
attlist0(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -771,9 +771,9 @@ attlist0(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
attlist1(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -833,9 +833,9 @@ attlist2(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
attlist3(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -852,9 +852,9 @@ attlist3(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
attlist4(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -872,9 +872,9 @@ attlist4(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
attlist5(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -889,9 +889,9 @@ attlist5(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
attlist6(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -906,9 +906,9 @@ attlist6(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
attlist7(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -967,9 +967,9 @@ attlist8(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
attlist9(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -984,9 +984,9 @@ attlist9(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
element0(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -1072,9 +1072,9 @@ element2(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
element3(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -1097,9 +1097,9 @@ element3(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
element4(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -1115,9 +1115,9 @@ element4(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
element5(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -1136,9 +1136,9 @@ element5(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
element6(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -1166,9 +1166,9 @@ element6(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
element7(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -1240,9 +1240,9 @@ condSect0(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
condSect1(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -1258,9 +1258,9 @@ condSect1(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
condSect2(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -1277,9 +1277,9 @@ condSect2(PROLOG_STATE *state,
|
||||
static int PTRCALL
|
||||
declClose(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
switch (tok) {
|
||||
case XML_TOK_PROLOG_S:
|
||||
@ -1292,11 +1292,11 @@ declClose(PROLOG_STATE *state,
|
||||
}
|
||||
|
||||
static int PTRCALL
|
||||
error(PROLOG_STATE *state,
|
||||
int tok,
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const ENCODING *enc)
|
||||
error(PROLOG_STATE *UNUSED_P(state),
|
||||
int UNUSED_P(tok),
|
||||
const char *UNUSED_P(ptr),
|
||||
const char *UNUSED_P(end),
|
||||
const ENCODING *UNUSED_P(enc))
|
||||
{
|
||||
return XML_ROLE_NONE;
|
||||
}
|
||||
|
230
lib/xmltok.c
230
lib/xmltok.c
@ -4,7 +4,7 @@
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef COMPILED_FROM_DSP
|
||||
#ifdef WIN32
|
||||
#include "winconfig.h"
|
||||
#elif defined(MACOS_CLASSIC)
|
||||
#include "macconfig.h"
|
||||
@ -16,7 +16,7 @@
|
||||
#ifdef HAVE_EXPAT_CONFIG_H
|
||||
#include <expat_config.h>
|
||||
#endif
|
||||
#endif /* ndef COMPILED_FROM_DSP */
|
||||
#endif /* ndef WIN32 */
|
||||
|
||||
#include "expat_external.h"
|
||||
#include "internal.h"
|
||||
@ -46,7 +46,7 @@
|
||||
#define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
|
||||
|
||||
#define UCS2_GET_NAMING(pages, hi, lo) \
|
||||
(namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F)))
|
||||
(namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo) & 0x1F)))
|
||||
|
||||
/* A 2 byte UTF-8 representation splits the characters 11 bits between
|
||||
the bottom 5 and 6 bits of the bytes. We need 8 bits to index into
|
||||
@ -56,7 +56,7 @@
|
||||
(namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
|
||||
+ ((((byte)[0]) & 3) << 1) \
|
||||
+ ((((byte)[1]) >> 5) & 1)] \
|
||||
& (1 << (((byte)[1]) & 0x1F)))
|
||||
& (1u << (((byte)[1]) & 0x1F)))
|
||||
|
||||
/* A 3 byte UTF-8 representation splits the characters 16 bits between
|
||||
the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index
|
||||
@ -69,7 +69,7 @@
|
||||
<< 3) \
|
||||
+ ((((byte)[1]) & 3) << 1) \
|
||||
+ ((((byte)[2]) >> 5) & 1)] \
|
||||
& (1 << (((byte)[2]) & 0x1F)))
|
||||
& (1u << (((byte)[2]) & 0x1F)))
|
||||
|
||||
#define UTF8_GET_NAMING(pages, p, n) \
|
||||
((n) == 2 \
|
||||
@ -122,19 +122,19 @@
|
||||
((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))
|
||||
|
||||
static int PTRFASTCALL
|
||||
isNever(const ENCODING *enc, const char *p)
|
||||
isNever(const ENCODING *UNUSED_P(enc), const char *UNUSED_P(p))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int PTRFASTCALL
|
||||
utf8_isName2(const ENCODING *enc, const char *p)
|
||||
utf8_isName2(const ENCODING *UNUSED_P(enc), const char *p)
|
||||
{
|
||||
return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);
|
||||
}
|
||||
|
||||
static int PTRFASTCALL
|
||||
utf8_isName3(const ENCODING *enc, const char *p)
|
||||
utf8_isName3(const ENCODING *UNUSED_P(enc), const char *p)
|
||||
{
|
||||
return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);
|
||||
}
|
||||
@ -142,13 +142,13 @@ utf8_isName3(const ENCODING *enc, const char *p)
|
||||
#define utf8_isName4 isNever
|
||||
|
||||
static int PTRFASTCALL
|
||||
utf8_isNmstrt2(const ENCODING *enc, const char *p)
|
||||
utf8_isNmstrt2(const ENCODING *UNUSED_P(enc), const char *p)
|
||||
{
|
||||
return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);
|
||||
}
|
||||
|
||||
static int PTRFASTCALL
|
||||
utf8_isNmstrt3(const ENCODING *enc, const char *p)
|
||||
utf8_isNmstrt3(const ENCODING *UNUSED_P(enc), const char *p)
|
||||
{
|
||||
return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);
|
||||
}
|
||||
@ -156,19 +156,19 @@ utf8_isNmstrt3(const ENCODING *enc, const char *p)
|
||||
#define utf8_isNmstrt4 isNever
|
||||
|
||||
static int PTRFASTCALL
|
||||
utf8_isInvalid2(const ENCODING *enc, const char *p)
|
||||
utf8_isInvalid2(const ENCODING *UNUSED_P(enc), const char *p)
|
||||
{
|
||||
return UTF8_INVALID2((const unsigned char *)p);
|
||||
}
|
||||
|
||||
static int PTRFASTCALL
|
||||
utf8_isInvalid3(const ENCODING *enc, const char *p)
|
||||
utf8_isInvalid3(const ENCODING *UNUSED_P(enc), const char *p)
|
||||
{
|
||||
return UTF8_INVALID3((const unsigned char *)p);
|
||||
}
|
||||
|
||||
static int PTRFASTCALL
|
||||
utf8_isInvalid4(const ENCODING *enc, const char *p)
|
||||
utf8_isInvalid4(const ENCODING *UNUSED_P(enc), const char *p)
|
||||
{
|
||||
return UTF8_INVALID4((const unsigned char *)p);
|
||||
}
|
||||
@ -222,6 +222,17 @@ struct normal_encoding {
|
||||
E ## isInvalid3, \
|
||||
E ## isInvalid4
|
||||
|
||||
#define NULL_VTABLE \
|
||||
/* isName2 */ NULL, \
|
||||
/* isName3 */ NULL, \
|
||||
/* isName4 */ NULL, \
|
||||
/* isNmstrt2 */ NULL, \
|
||||
/* isNmstrt3 */ NULL, \
|
||||
/* isNmstrt4 */ NULL, \
|
||||
/* isInvalid2 */ NULL, \
|
||||
/* isInvalid3 */ NULL, \
|
||||
/* isInvalid4 */ NULL
|
||||
|
||||
static int FASTCALL checkCharRefNumber(int);
|
||||
|
||||
#include "xmltok_impl.h"
|
||||
@ -318,39 +329,89 @@ enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */
|
||||
UTF8_cval4 = 0xf0
|
||||
};
|
||||
|
||||
static void PTRCALL
|
||||
utf8_toUtf8(const ENCODING *enc,
|
||||
void
|
||||
align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef)
|
||||
{
|
||||
const char * fromLim = *fromLimRef;
|
||||
size_t walked = 0;
|
||||
for (; fromLim > from; fromLim--, walked++) {
|
||||
const unsigned char prev = (unsigned char)fromLim[-1];
|
||||
if ((prev & 0xf8u) == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */
|
||||
if (walked + 1 >= 4) {
|
||||
fromLim += 4 - 1;
|
||||
break;
|
||||
} else {
|
||||
walked = 0;
|
||||
}
|
||||
} else if ((prev & 0xf0u) == 0xe0u) { /* 3-byte character, lead by 0b1110xxxx byte */
|
||||
if (walked + 1 >= 3) {
|
||||
fromLim += 3 - 1;
|
||||
break;
|
||||
} else {
|
||||
walked = 0;
|
||||
}
|
||||
} else if ((prev & 0xe0u) == 0xc0u) { /* 2-byte character, lead by 0b110xxxxx byte */
|
||||
if (walked + 1 >= 2) {
|
||||
fromLim += 2 - 1;
|
||||
break;
|
||||
} else {
|
||||
walked = 0;
|
||||
}
|
||||
} else if ((prev & 0x80u) == 0x00u) { /* 1-byte character, matching 0b0xxxxxxx */
|
||||
break;
|
||||
}
|
||||
}
|
||||
*fromLimRef = fromLim;
|
||||
}
|
||||
|
||||
static enum XML_Convert_Result PTRCALL
|
||||
utf8_toUtf8(const ENCODING *UNUSED_P(enc),
|
||||
const char **fromP, const char *fromLim,
|
||||
char **toP, const char *toLim)
|
||||
{
|
||||
enum XML_Convert_Result res = XML_CONVERT_COMPLETED;
|
||||
char *to;
|
||||
const char *from;
|
||||
if (fromLim - *fromP > toLim - *toP) {
|
||||
/* Avoid copying partial characters. */
|
||||
for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--)
|
||||
if (((unsigned char)fromLim[-1] & 0xc0) != 0x80)
|
||||
break;
|
||||
res = XML_CONVERT_OUTPUT_EXHAUSTED;
|
||||
fromLim = *fromP + (toLim - *toP);
|
||||
align_limit_to_full_utf8_characters(*fromP, &fromLim);
|
||||
}
|
||||
for (to = *toP, from = *fromP; from != fromLim; from++, to++)
|
||||
for (to = *toP, from = *fromP; (from < fromLim) && (to < toLim); from++, to++)
|
||||
*to = *from;
|
||||
*fromP = from;
|
||||
*toP = to;
|
||||
|
||||
if ((to == toLim) && (from < fromLim))
|
||||
return XML_CONVERT_OUTPUT_EXHAUSTED;
|
||||
else
|
||||
return res;
|
||||
}
|
||||
|
||||
static void PTRCALL
|
||||
static enum XML_Convert_Result PTRCALL
|
||||
utf8_toUtf16(const ENCODING *enc,
|
||||
const char **fromP, const char *fromLim,
|
||||
unsigned short **toP, const unsigned short *toLim)
|
||||
{
|
||||
enum XML_Convert_Result res = XML_CONVERT_COMPLETED;
|
||||
unsigned short *to = *toP;
|
||||
const char *from = *fromP;
|
||||
while (from != fromLim && to != toLim) {
|
||||
while (from < fromLim && to < toLim) {
|
||||
switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {
|
||||
case BT_LEAD2:
|
||||
if (fromLim - from < 2) {
|
||||
res = XML_CONVERT_INPUT_INCOMPLETE;
|
||||
break;
|
||||
}
|
||||
*to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f));
|
||||
from += 2;
|
||||
break;
|
||||
case BT_LEAD3:
|
||||
if (fromLim - from < 3) {
|
||||
res = XML_CONVERT_INPUT_INCOMPLETE;
|
||||
break;
|
||||
}
|
||||
*to++ = (unsigned short)(((from[0] & 0xf) << 12)
|
||||
| ((from[1] & 0x3f) << 6) | (from[2] & 0x3f));
|
||||
from += 3;
|
||||
@ -358,8 +419,14 @@ utf8_toUtf16(const ENCODING *enc,
|
||||
case BT_LEAD4:
|
||||
{
|
||||
unsigned long n;
|
||||
if (to + 1 == toLim)
|
||||
if (toLim - to < 2) {
|
||||
res = XML_CONVERT_OUTPUT_EXHAUSTED;
|
||||
goto after;
|
||||
}
|
||||
if (fromLim - from < 4) {
|
||||
res = XML_CONVERT_INPUT_INCOMPLETE;
|
||||
goto after;
|
||||
}
|
||||
n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12)
|
||||
| ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
|
||||
n -= 0x10000;
|
||||
@ -377,6 +444,7 @@ utf8_toUtf16(const ENCODING *enc,
|
||||
after:
|
||||
*fromP = from;
|
||||
*toP = to;
|
||||
return res;
|
||||
}
|
||||
|
||||
#ifdef XML_NS
|
||||
@ -425,38 +493,43 @@ static const struct normal_encoding internal_utf8_encoding = {
|
||||
STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
|
||||
};
|
||||
|
||||
static void PTRCALL
|
||||
latin1_toUtf8(const ENCODING *enc,
|
||||
static enum XML_Convert_Result PTRCALL
|
||||
latin1_toUtf8(const ENCODING *UNUSED_P(enc),
|
||||
const char **fromP, const char *fromLim,
|
||||
char **toP, const char *toLim)
|
||||
{
|
||||
for (;;) {
|
||||
unsigned char c;
|
||||
if (*fromP == fromLim)
|
||||
break;
|
||||
return XML_CONVERT_COMPLETED;
|
||||
c = (unsigned char)**fromP;
|
||||
if (c & 0x80) {
|
||||
if (toLim - *toP < 2)
|
||||
break;
|
||||
return XML_CONVERT_OUTPUT_EXHAUSTED;
|
||||
*(*toP)++ = (char)((c >> 6) | UTF8_cval2);
|
||||
*(*toP)++ = (char)((c & 0x3f) | 0x80);
|
||||
(*fromP)++;
|
||||
}
|
||||
else {
|
||||
if (*toP == toLim)
|
||||
break;
|
||||
return XML_CONVERT_OUTPUT_EXHAUSTED;
|
||||
*(*toP)++ = *(*fromP)++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void PTRCALL
|
||||
latin1_toUtf16(const ENCODING *enc,
|
||||
static enum XML_Convert_Result PTRCALL
|
||||
latin1_toUtf16(const ENCODING *UNUSED_P(enc),
|
||||
const char **fromP, const char *fromLim,
|
||||
unsigned short **toP, const unsigned short *toLim)
|
||||
{
|
||||
while (*fromP != fromLim && *toP != toLim)
|
||||
while (*fromP < fromLim && *toP < toLim)
|
||||
*(*toP)++ = (unsigned char)*(*fromP)++;
|
||||
|
||||
if ((*toP == toLim) && (*fromP < fromLim))
|
||||
return XML_CONVERT_OUTPUT_EXHAUSTED;
|
||||
else
|
||||
return XML_CONVERT_COMPLETED;
|
||||
}
|
||||
|
||||
#ifdef XML_NS
|
||||
@ -467,7 +540,7 @@ static const struct normal_encoding latin1_encoding_ns = {
|
||||
#include "asciitab.h"
|
||||
#include "latin1tab.h"
|
||||
},
|
||||
STANDARD_VTABLE(sb_)
|
||||
STANDARD_VTABLE(sb_) NULL_VTABLE
|
||||
};
|
||||
|
||||
#endif
|
||||
@ -480,16 +553,21 @@ static const struct normal_encoding latin1_encoding = {
|
||||
#undef BT_COLON
|
||||
#include "latin1tab.h"
|
||||
},
|
||||
STANDARD_VTABLE(sb_)
|
||||
STANDARD_VTABLE(sb_) NULL_VTABLE
|
||||
};
|
||||
|
||||
static void PTRCALL
|
||||
ascii_toUtf8(const ENCODING *enc,
|
||||
static enum XML_Convert_Result PTRCALL
|
||||
ascii_toUtf8(const ENCODING *UNUSED_P(enc),
|
||||
const char **fromP, const char *fromLim,
|
||||
char **toP, const char *toLim)
|
||||
{
|
||||
while (*fromP != fromLim && *toP != toLim)
|
||||
while (*fromP < fromLim && *toP < toLim)
|
||||
*(*toP)++ = *(*fromP)++;
|
||||
|
||||
if ((*toP == toLim) && (*fromP < fromLim))
|
||||
return XML_CONVERT_OUTPUT_EXHAUSTED;
|
||||
else
|
||||
return XML_CONVERT_COMPLETED;
|
||||
}
|
||||
|
||||
#ifdef XML_NS
|
||||
@ -500,7 +578,7 @@ static const struct normal_encoding ascii_encoding_ns = {
|
||||
#include "asciitab.h"
|
||||
/* BT_NONXML == 0 */
|
||||
},
|
||||
STANDARD_VTABLE(sb_)
|
||||
STANDARD_VTABLE(sb_) NULL_VTABLE
|
||||
};
|
||||
|
||||
#endif
|
||||
@ -513,7 +591,7 @@ static const struct normal_encoding ascii_encoding = {
|
||||
#undef BT_COLON
|
||||
/* BT_NONXML == 0 */
|
||||
},
|
||||
STANDARD_VTABLE(sb_)
|
||||
STANDARD_VTABLE(sb_) NULL_VTABLE
|
||||
};
|
||||
|
||||
static int PTRFASTCALL
|
||||
@ -536,13 +614,14 @@ unicode_byte_type(char hi, char lo)
|
||||
}
|
||||
|
||||
#define DEFINE_UTF16_TO_UTF8(E) \
|
||||
static void PTRCALL \
|
||||
E ## toUtf8(const ENCODING *enc, \
|
||||
static enum XML_Convert_Result PTRCALL \
|
||||
E ## toUtf8(const ENCODING *UNUSED_P(enc), \
|
||||
const char **fromP, const char *fromLim, \
|
||||
char **toP, const char *toLim) \
|
||||
{ \
|
||||
const char *from; \
|
||||
for (from = *fromP; from != fromLim; from += 2) { \
|
||||
const char *from = *fromP; \
|
||||
fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */ \
|
||||
for (; from < fromLim; from += 2) { \
|
||||
int plane; \
|
||||
unsigned char lo2; \
|
||||
unsigned char lo = GET_LO(from); \
|
||||
@ -552,7 +631,7 @@ E ## toUtf8(const ENCODING *enc, \
|
||||
if (lo < 0x80) { \
|
||||
if (*toP == toLim) { \
|
||||
*fromP = from; \
|
||||
return; \
|
||||
return XML_CONVERT_OUTPUT_EXHAUSTED; \
|
||||
} \
|
||||
*(*toP)++ = lo; \
|
||||
break; \
|
||||
@ -562,7 +641,7 @@ E ## toUtf8(const ENCODING *enc, \
|
||||
case 0x4: case 0x5: case 0x6: case 0x7: \
|
||||
if (toLim - *toP < 2) { \
|
||||
*fromP = from; \
|
||||
return; \
|
||||
return XML_CONVERT_OUTPUT_EXHAUSTED; \
|
||||
} \
|
||||
*(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \
|
||||
*(*toP)++ = ((lo & 0x3f) | 0x80); \
|
||||
@ -570,7 +649,7 @@ E ## toUtf8(const ENCODING *enc, \
|
||||
default: \
|
||||
if (toLim - *toP < 3) { \
|
||||
*fromP = from; \
|
||||
return; \
|
||||
return XML_CONVERT_OUTPUT_EXHAUSTED; \
|
||||
} \
|
||||
/* 16 bits divided 4, 6, 6 amongst 3 bytes */ \
|
||||
*(*toP)++ = ((hi >> 4) | UTF8_cval3); \
|
||||
@ -580,7 +659,11 @@ E ## toUtf8(const ENCODING *enc, \
|
||||
case 0xD8: case 0xD9: case 0xDA: case 0xDB: \
|
||||
if (toLim - *toP < 4) { \
|
||||
*fromP = from; \
|
||||
return; \
|
||||
return XML_CONVERT_OUTPUT_EXHAUSTED; \
|
||||
} \
|
||||
if (fromLim - from < 4) { \
|
||||
*fromP = from; \
|
||||
return XML_CONVERT_INPUT_INCOMPLETE; \
|
||||
} \
|
||||
plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \
|
||||
*(*toP)++ = ((plane >> 2) | UTF8_cval4); \
|
||||
@ -596,20 +679,32 @@ E ## toUtf8(const ENCODING *enc, \
|
||||
} \
|
||||
} \
|
||||
*fromP = from; \
|
||||
if (from < fromLim) \
|
||||
return XML_CONVERT_INPUT_INCOMPLETE; \
|
||||
else \
|
||||
return XML_CONVERT_COMPLETED; \
|
||||
}
|
||||
|
||||
#define DEFINE_UTF16_TO_UTF16(E) \
|
||||
static void PTRCALL \
|
||||
E ## toUtf16(const ENCODING *enc, \
|
||||
static enum XML_Convert_Result PTRCALL \
|
||||
E ## toUtf16(const ENCODING *UNUSED_P(enc), \
|
||||
const char **fromP, const char *fromLim, \
|
||||
unsigned short **toP, const unsigned short *toLim) \
|
||||
{ \
|
||||
enum XML_Convert_Result res = XML_CONVERT_COMPLETED; \
|
||||
fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */ \
|
||||
/* Avoid copying first half only of surrogate */ \
|
||||
if (fromLim - *fromP > ((toLim - *toP) << 1) \
|
||||
&& (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \
|
||||
&& (GET_HI(fromLim - 2) & 0xF8) == 0xD8) { \
|
||||
fromLim -= 2; \
|
||||
for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \
|
||||
res = XML_CONVERT_INPUT_INCOMPLETE; \
|
||||
} \
|
||||
for (; *fromP < fromLim && *toP < toLim; *fromP += 2) \
|
||||
*(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \
|
||||
if ((*toP == toLim) && (*fromP < fromLim)) \
|
||||
return XML_CONVERT_OUTPUT_EXHAUSTED; \
|
||||
else \
|
||||
return res; \
|
||||
}
|
||||
|
||||
#define SET2(ptr, ch) \
|
||||
@ -726,7 +821,7 @@ static const struct normal_encoding little2_encoding_ns = {
|
||||
#include "asciitab.h"
|
||||
#include "latin1tab.h"
|
||||
},
|
||||
STANDARD_VTABLE(little2_)
|
||||
STANDARD_VTABLE(little2_) NULL_VTABLE
|
||||
};
|
||||
|
||||
#endif
|
||||
@ -745,7 +840,7 @@ static const struct normal_encoding little2_encoding = {
|
||||
#undef BT_COLON
|
||||
#include "latin1tab.h"
|
||||
},
|
||||
STANDARD_VTABLE(little2_)
|
||||
STANDARD_VTABLE(little2_) NULL_VTABLE
|
||||
};
|
||||
|
||||
#if BYTEORDER != 4321
|
||||
@ -758,7 +853,7 @@ static const struct normal_encoding internal_little2_encoding_ns = {
|
||||
#include "iasciitab.h"
|
||||
#include "latin1tab.h"
|
||||
},
|
||||
STANDARD_VTABLE(little2_)
|
||||
STANDARD_VTABLE(little2_) NULL_VTABLE
|
||||
};
|
||||
|
||||
#endif
|
||||
@ -771,7 +866,7 @@ static const struct normal_encoding internal_little2_encoding = {
|
||||
#undef BT_COLON
|
||||
#include "latin1tab.h"
|
||||
},
|
||||
STANDARD_VTABLE(little2_)
|
||||
STANDARD_VTABLE(little2_) NULL_VTABLE
|
||||
};
|
||||
|
||||
#endif
|
||||
@ -867,7 +962,7 @@ static const struct normal_encoding big2_encoding_ns = {
|
||||
#include "asciitab.h"
|
||||
#include "latin1tab.h"
|
||||
},
|
||||
STANDARD_VTABLE(big2_)
|
||||
STANDARD_VTABLE(big2_) NULL_VTABLE
|
||||
};
|
||||
|
||||
#endif
|
||||
@ -886,7 +981,7 @@ static const struct normal_encoding big2_encoding = {
|
||||
#undef BT_COLON
|
||||
#include "latin1tab.h"
|
||||
},
|
||||
STANDARD_VTABLE(big2_)
|
||||
STANDARD_VTABLE(big2_) NULL_VTABLE
|
||||
};
|
||||
|
||||
#if BYTEORDER != 1234
|
||||
@ -899,7 +994,7 @@ static const struct normal_encoding internal_big2_encoding_ns = {
|
||||
#include "iasciitab.h"
|
||||
#include "latin1tab.h"
|
||||
},
|
||||
STANDARD_VTABLE(big2_)
|
||||
STANDARD_VTABLE(big2_) NULL_VTABLE
|
||||
};
|
||||
|
||||
#endif
|
||||
@ -912,7 +1007,7 @@ static const struct normal_encoding internal_big2_encoding = {
|
||||
#undef BT_COLON
|
||||
#include "latin1tab.h"
|
||||
},
|
||||
STANDARD_VTABLE(big2_)
|
||||
STANDARD_VTABLE(big2_) NULL_VTABLE
|
||||
};
|
||||
|
||||
#endif
|
||||
@ -938,7 +1033,7 @@ streqci(const char *s1, const char *s2)
|
||||
}
|
||||
|
||||
static void PTRCALL
|
||||
initUpdatePosition(const ENCODING *enc, const char *ptr,
|
||||
initUpdatePosition(const ENCODING *UNUSED_P(enc), const char *ptr,
|
||||
const char *end, POSITION *pos)
|
||||
{
|
||||
normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
|
||||
@ -1288,7 +1383,7 @@ unknown_isInvalid(const ENCODING *enc, const char *p)
|
||||
return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
|
||||
}
|
||||
|
||||
static void PTRCALL
|
||||
static enum XML_Convert_Result PTRCALL
|
||||
unknown_toUtf8(const ENCODING *enc,
|
||||
const char **fromP, const char *fromLim,
|
||||
char **toP, const char *toLim)
|
||||
@ -1299,21 +1394,21 @@ unknown_toUtf8(const ENCODING *enc,
|
||||
const char *utf8;
|
||||
int n;
|
||||
if (*fromP == fromLim)
|
||||
break;
|
||||
return XML_CONVERT_COMPLETED;
|
||||
utf8 = uenc->utf8[(unsigned char)**fromP];
|
||||
n = *utf8++;
|
||||
if (n == 0) {
|
||||
int c = uenc->convert(uenc->userData, *fromP);
|
||||
n = XmlUtf8Encode(c, buf);
|
||||
if (n > toLim - *toP)
|
||||
break;
|
||||
return XML_CONVERT_OUTPUT_EXHAUSTED;
|
||||
utf8 = buf;
|
||||
*fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
|
||||
- (BT_LEAD2 - 2));
|
||||
}
|
||||
else {
|
||||
if (n > toLim - *toP)
|
||||
break;
|
||||
return XML_CONVERT_OUTPUT_EXHAUSTED;
|
||||
(*fromP)++;
|
||||
}
|
||||
do {
|
||||
@ -1322,13 +1417,13 @@ unknown_toUtf8(const ENCODING *enc,
|
||||
}
|
||||
}
|
||||
|
||||
static void PTRCALL
|
||||
static enum XML_Convert_Result PTRCALL
|
||||
unknown_toUtf16(const ENCODING *enc,
|
||||
const char **fromP, const char *fromLim,
|
||||
unsigned short **toP, const unsigned short *toLim)
|
||||
{
|
||||
const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
|
||||
while (*fromP != fromLim && *toP != toLim) {
|
||||
while (*fromP < fromLim && *toP < toLim) {
|
||||
unsigned short c = uenc->utf16[(unsigned char)**fromP];
|
||||
if (c == 0) {
|
||||
c = (unsigned short)
|
||||
@ -1340,6 +1435,11 @@ unknown_toUtf16(const ENCODING *enc,
|
||||
(*fromP)++;
|
||||
*(*toP)++ = c;
|
||||
}
|
||||
|
||||
if ((*toP == toLim) && (*fromP < fromLim))
|
||||
return XML_CONVERT_OUTPUT_EXHAUSTED;
|
||||
else
|
||||
return XML_CONVERT_COMPLETED;
|
||||
}
|
||||
|
||||
ENCODING *
|
||||
@ -1503,7 +1603,7 @@ initScan(const ENCODING * const *encodingTable,
|
||||
{
|
||||
const ENCODING **encPtr;
|
||||
|
||||
if (ptr == end)
|
||||
if (ptr >= end)
|
||||
return XML_TOK_NONE;
|
||||
encPtr = enc->encPtr;
|
||||
if (ptr + 1 == end) {
|
||||
|
10
lib/xmltok.h
10
lib/xmltok.h
@ -130,6 +130,12 @@ typedef int (PTRCALL *SCANNER)(const ENCODING *,
|
||||
const char *,
|
||||
const char **);
|
||||
|
||||
enum XML_Convert_Result {
|
||||
XML_CONVERT_COMPLETED = 0,
|
||||
XML_CONVERT_INPUT_INCOMPLETE = 1,
|
||||
XML_CONVERT_OUTPUT_EXHAUSTED = 2 /* and therefore potentially input remaining as well */
|
||||
};
|
||||
|
||||
struct encoding {
|
||||
SCANNER scanners[XML_N_STATES];
|
||||
SCANNER literalScanners[XML_N_LITERAL_TYPES];
|
||||
@ -158,12 +164,12 @@ struct encoding {
|
||||
const char *ptr,
|
||||
const char *end,
|
||||
const char **badPtr);
|
||||
void (PTRCALL *utf8Convert)(const ENCODING *enc,
|
||||
enum XML_Convert_Result (PTRCALL *utf8Convert)(const ENCODING *enc,
|
||||
const char **fromP,
|
||||
const char *fromLim,
|
||||
char **toP,
|
||||
const char *toLim);
|
||||
void (PTRCALL *utf16Convert)(const ENCODING *enc,
|
||||
enum XML_Convert_Result (PTRCALL *utf16Convert)(const ENCODING *enc,
|
||||
const char **fromP,
|
||||
const char *fromLim,
|
||||
unsigned short **toP,
|
||||
|
@ -87,27 +87,45 @@
|
||||
#define PREFIX(ident) ident
|
||||
#endif
|
||||
|
||||
|
||||
#define HAS_CHARS(enc, ptr, end, count) \
|
||||
(end - ptr >= count * MINBPC(enc))
|
||||
|
||||
#define HAS_CHAR(enc, ptr, end) \
|
||||
HAS_CHARS(enc, ptr, end, 1)
|
||||
|
||||
#define REQUIRE_CHARS(enc, ptr, end, count) \
|
||||
{ \
|
||||
if (! HAS_CHARS(enc, ptr, end, count)) { \
|
||||
return XML_TOK_PARTIAL; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define REQUIRE_CHAR(enc, ptr, end) \
|
||||
REQUIRE_CHARS(enc, ptr, end, 1)
|
||||
|
||||
|
||||
/* ptr points to character following "<!-" */
|
||||
|
||||
static int PTRCALL
|
||||
PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
|
||||
const char *end, const char **nextTokPtr)
|
||||
{
|
||||
if (ptr != end) {
|
||||
if (HAS_CHAR(enc, ptr, end)) {
|
||||
if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
|
||||
*nextTokPtr = ptr;
|
||||
return XML_TOK_INVALID;
|
||||
}
|
||||
ptr += MINBPC(enc);
|
||||
while (ptr != end) {
|
||||
while (HAS_CHAR(enc, ptr, end)) {
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
INVALID_CASES(ptr, nextTokPtr)
|
||||
case BT_MINUS:
|
||||
if ((ptr += MINBPC(enc)) == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
ptr += MINBPC(enc);
|
||||
REQUIRE_CHAR(enc, ptr, end);
|
||||
if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
|
||||
if ((ptr += MINBPC(enc)) == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
ptr += MINBPC(enc);
|
||||
REQUIRE_CHAR(enc, ptr, end);
|
||||
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
|
||||
*nextTokPtr = ptr;
|
||||
return XML_TOK_INVALID;
|
||||
@ -131,8 +149,7 @@ static int PTRCALL
|
||||
PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
|
||||
const char *end, const char **nextTokPtr)
|
||||
{
|
||||
if (ptr == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
REQUIRE_CHAR(enc, ptr, end);
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
case BT_MINUS:
|
||||
return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
||||
@ -147,11 +164,10 @@ PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
|
||||
*nextTokPtr = ptr;
|
||||
return XML_TOK_INVALID;
|
||||
}
|
||||
while (ptr != end) {
|
||||
while (HAS_CHAR(enc, ptr, end)) {
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
case BT_PERCNT:
|
||||
if (ptr + MINBPC(enc) == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
REQUIRE_CHARS(enc, ptr, end, 2);
|
||||
/* don't allow <!ENTITY% foo "whatever"> */
|
||||
switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
|
||||
case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
|
||||
@ -175,7 +191,7 @@ PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
|
||||
}
|
||||
|
||||
static int PTRCALL
|
||||
PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr,
|
||||
PREFIX(checkPiTarget)(const ENCODING *UNUSED_P(enc), const char *ptr,
|
||||
const char *end, int *tokPtr)
|
||||
{
|
||||
int upper = 0;
|
||||
@ -225,15 +241,14 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
|
||||
{
|
||||
int tok;
|
||||
const char *target = ptr;
|
||||
if (ptr == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
REQUIRE_CHAR(enc, ptr, end);
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
||||
default:
|
||||
*nextTokPtr = ptr;
|
||||
return XML_TOK_INVALID;
|
||||
}
|
||||
while (ptr != end) {
|
||||
while (HAS_CHAR(enc, ptr, end)) {
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
||||
case BT_S: case BT_CR: case BT_LF:
|
||||
@ -242,13 +257,12 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
|
||||
return XML_TOK_INVALID;
|
||||
}
|
||||
ptr += MINBPC(enc);
|
||||
while (ptr != end) {
|
||||
while (HAS_CHAR(enc, ptr, end)) {
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
INVALID_CASES(ptr, nextTokPtr)
|
||||
case BT_QUEST:
|
||||
ptr += MINBPC(enc);
|
||||
if (ptr == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
REQUIRE_CHAR(enc, ptr, end);
|
||||
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
|
||||
*nextTokPtr = ptr + MINBPC(enc);
|
||||
return tok;
|
||||
@ -266,8 +280,7 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
|
||||
return XML_TOK_INVALID;
|
||||
}
|
||||
ptr += MINBPC(enc);
|
||||
if (ptr == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
REQUIRE_CHAR(enc, ptr, end);
|
||||
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
|
||||
*nextTokPtr = ptr + MINBPC(enc);
|
||||
return tok;
|
||||
@ -282,15 +295,14 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
|
||||
}
|
||||
|
||||
static int PTRCALL
|
||||
PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr,
|
||||
PREFIX(scanCdataSection)(const ENCODING *UNUSED_P(enc), const char *ptr,
|
||||
const char *end, const char **nextTokPtr)
|
||||
{
|
||||
static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
|
||||
ASCII_T, ASCII_A, ASCII_LSQB };
|
||||
int i;
|
||||
/* CDATA[ */
|
||||
if (end - ptr < 6 * MINBPC(enc))
|
||||
return XML_TOK_PARTIAL;
|
||||
REQUIRE_CHARS(enc, ptr, end, 6);
|
||||
for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
|
||||
if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
|
||||
*nextTokPtr = ptr;
|
||||
@ -305,7 +317,7 @@ static int PTRCALL
|
||||
PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
|
||||
const char *end, const char **nextTokPtr)
|
||||
{
|
||||
if (ptr == end)
|
||||
if (ptr >= end)
|
||||
return XML_TOK_NONE;
|
||||
if (MINBPC(enc) > 1) {
|
||||
size_t n = end - ptr;
|
||||
@ -319,13 +331,11 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
case BT_RSQB:
|
||||
ptr += MINBPC(enc);
|
||||
if (ptr == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
REQUIRE_CHAR(enc, ptr, end);
|
||||
if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
|
||||
break;
|
||||
ptr += MINBPC(enc);
|
||||
if (ptr == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
REQUIRE_CHAR(enc, ptr, end);
|
||||
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
|
||||
ptr -= MINBPC(enc);
|
||||
break;
|
||||
@ -334,8 +344,7 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
|
||||
return XML_TOK_CDATA_SECT_CLOSE;
|
||||
case BT_CR:
|
||||
ptr += MINBPC(enc);
|
||||
if (ptr == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
REQUIRE_CHAR(enc, ptr, end);
|
||||
if (BYTE_TYPE(enc, ptr) == BT_LF)
|
||||
ptr += MINBPC(enc);
|
||||
*nextTokPtr = ptr;
|
||||
@ -348,7 +357,7 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
|
||||
ptr += MINBPC(enc);
|
||||
break;
|
||||
}
|
||||
while (ptr != end) {
|
||||
while (HAS_CHAR(enc, ptr, end)) {
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
#define LEAD_CASE(n) \
|
||||
case BT_LEAD ## n: \
|
||||
@ -383,19 +392,18 @@ static int PTRCALL
|
||||
PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
|
||||
const char *end, const char **nextTokPtr)
|
||||
{
|
||||
if (ptr == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
REQUIRE_CHAR(enc, ptr, end);
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
||||
default:
|
||||
*nextTokPtr = ptr;
|
||||
return XML_TOK_INVALID;
|
||||
}
|
||||
while (ptr != end) {
|
||||
while (HAS_CHAR(enc, ptr, end)) {
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
||||
case BT_S: case BT_CR: case BT_LF:
|
||||
for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
|
||||
for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
case BT_S: case BT_CR: case BT_LF:
|
||||
break;
|
||||
@ -432,7 +440,7 @@ static int PTRCALL
|
||||
PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
|
||||
const char *end, const char **nextTokPtr)
|
||||
{
|
||||
if (ptr != end) {
|
||||
if (HAS_CHAR(enc, ptr, end)) {
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
case BT_DIGIT:
|
||||
case BT_HEX:
|
||||
@ -441,7 +449,7 @@ PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
|
||||
*nextTokPtr = ptr;
|
||||
return XML_TOK_INVALID;
|
||||
}
|
||||
for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
|
||||
for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
case BT_DIGIT:
|
||||
case BT_HEX:
|
||||
@ -464,7 +472,7 @@ static int PTRCALL
|
||||
PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
|
||||
const char *end, const char **nextTokPtr)
|
||||
{
|
||||
if (ptr != end) {
|
||||
if (HAS_CHAR(enc, ptr, end)) {
|
||||
if (CHAR_MATCHES(enc, ptr, ASCII_x))
|
||||
return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
@ -474,7 +482,7 @@ PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
|
||||
*nextTokPtr = ptr;
|
||||
return XML_TOK_INVALID;
|
||||
}
|
||||
for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
|
||||
for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
case BT_DIGIT:
|
||||
break;
|
||||
@ -496,8 +504,7 @@ static int PTRCALL
|
||||
PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
const char **nextTokPtr)
|
||||
{
|
||||
if (ptr == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
REQUIRE_CHAR(enc, ptr, end);
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
||||
case BT_NUM:
|
||||
@ -506,7 +513,7 @@ PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
*nextTokPtr = ptr;
|
||||
return XML_TOK_INVALID;
|
||||
}
|
||||
while (ptr != end) {
|
||||
while (HAS_CHAR(enc, ptr, end)) {
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
||||
case BT_SEMI:
|
||||
@ -529,7 +536,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
#ifdef XML_NS
|
||||
int hadColon = 0;
|
||||
#endif
|
||||
while (ptr != end) {
|
||||
while (HAS_CHAR(enc, ptr, end)) {
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
||||
#ifdef XML_NS
|
||||
@ -540,8 +547,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
}
|
||||
hadColon = 1;
|
||||
ptr += MINBPC(enc);
|
||||
if (ptr == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
REQUIRE_CHAR(enc, ptr, end);
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
||||
default:
|
||||
@ -555,8 +561,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
int t;
|
||||
|
||||
ptr += MINBPC(enc);
|
||||
if (ptr == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
REQUIRE_CHAR(enc, ptr, end);
|
||||
t = BYTE_TYPE(enc, ptr);
|
||||
if (t == BT_EQUALS)
|
||||
break;
|
||||
@ -579,8 +584,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
#endif
|
||||
for (;;) {
|
||||
ptr += MINBPC(enc);
|
||||
if (ptr == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
REQUIRE_CHAR(enc, ptr, end);
|
||||
open = BYTE_TYPE(enc, ptr);
|
||||
if (open == BT_QUOT || open == BT_APOS)
|
||||
break;
|
||||
@ -598,8 +602,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
/* in attribute value */
|
||||
for (;;) {
|
||||
int t;
|
||||
if (ptr == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
REQUIRE_CHAR(enc, ptr, end);
|
||||
t = BYTE_TYPE(enc, ptr);
|
||||
if (t == open)
|
||||
break;
|
||||
@ -624,8 +627,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
}
|
||||
}
|
||||
ptr += MINBPC(enc);
|
||||
if (ptr == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
REQUIRE_CHAR(enc, ptr, end);
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
case BT_S:
|
||||
case BT_CR:
|
||||
@ -642,8 +644,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
/* ptr points to closing quote */
|
||||
for (;;) {
|
||||
ptr += MINBPC(enc);
|
||||
if (ptr == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
REQUIRE_CHAR(enc, ptr, end);
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
||||
case BT_S: case BT_CR: case BT_LF:
|
||||
@ -655,8 +656,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
case BT_SOL:
|
||||
sol:
|
||||
ptr += MINBPC(enc);
|
||||
if (ptr == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
REQUIRE_CHAR(enc, ptr, end);
|
||||
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
|
||||
*nextTokPtr = ptr;
|
||||
return XML_TOK_INVALID;
|
||||
@ -688,13 +688,12 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
#ifdef XML_NS
|
||||
int hadColon;
|
||||
#endif
|
||||
if (ptr == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
REQUIRE_CHAR(enc, ptr, end);
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
||||
case BT_EXCL:
|
||||
if ((ptr += MINBPC(enc)) == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
ptr += MINBPC(enc);
|
||||
REQUIRE_CHAR(enc, ptr, end);
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
case BT_MINUS:
|
||||
return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
||||
@ -716,7 +715,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
hadColon = 0;
|
||||
#endif
|
||||
/* we have a start-tag */
|
||||
while (ptr != end) {
|
||||
while (HAS_CHAR(enc, ptr, end)) {
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
||||
#ifdef XML_NS
|
||||
@ -727,8 +726,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
}
|
||||
hadColon = 1;
|
||||
ptr += MINBPC(enc);
|
||||
if (ptr == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
REQUIRE_CHAR(enc, ptr, end);
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
||||
default:
|
||||
@ -740,7 +738,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
case BT_S: case BT_CR: case BT_LF:
|
||||
{
|
||||
ptr += MINBPC(enc);
|
||||
while (ptr != end) {
|
||||
while (HAS_CHAR(enc, ptr, end)) {
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
||||
case BT_GT:
|
||||
@ -765,8 +763,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
case BT_SOL:
|
||||
sol:
|
||||
ptr += MINBPC(enc);
|
||||
if (ptr == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
REQUIRE_CHAR(enc, ptr, end);
|
||||
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
|
||||
*nextTokPtr = ptr;
|
||||
return XML_TOK_INVALID;
|
||||
@ -785,7 +782,7 @@ static int PTRCALL
|
||||
PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
const char **nextTokPtr)
|
||||
{
|
||||
if (ptr == end)
|
||||
if (ptr >= end)
|
||||
return XML_TOK_NONE;
|
||||
if (MINBPC(enc) > 1) {
|
||||
size_t n = end - ptr;
|
||||
@ -803,7 +800,7 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
||||
case BT_CR:
|
||||
ptr += MINBPC(enc);
|
||||
if (ptr == end)
|
||||
if (! HAS_CHAR(enc, ptr, end))
|
||||
return XML_TOK_TRAILING_CR;
|
||||
if (BYTE_TYPE(enc, ptr) == BT_LF)
|
||||
ptr += MINBPC(enc);
|
||||
@ -814,12 +811,12 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
return XML_TOK_DATA_NEWLINE;
|
||||
case BT_RSQB:
|
||||
ptr += MINBPC(enc);
|
||||
if (ptr == end)
|
||||
if (! HAS_CHAR(enc, ptr, end))
|
||||
return XML_TOK_TRAILING_RSQB;
|
||||
if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
|
||||
break;
|
||||
ptr += MINBPC(enc);
|
||||
if (ptr == end)
|
||||
if (! HAS_CHAR(enc, ptr, end))
|
||||
return XML_TOK_TRAILING_RSQB;
|
||||
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
|
||||
ptr -= MINBPC(enc);
|
||||
@ -832,7 +829,7 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
ptr += MINBPC(enc);
|
||||
break;
|
||||
}
|
||||
while (ptr != end) {
|
||||
while (HAS_CHAR(enc, ptr, end)) {
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
#define LEAD_CASE(n) \
|
||||
case BT_LEAD ## n: \
|
||||
@ -845,12 +842,12 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
|
||||
#undef LEAD_CASE
|
||||
case BT_RSQB:
|
||||
if (ptr + MINBPC(enc) != end) {
|
||||
if (HAS_CHARS(enc, ptr, end, 2)) {
|
||||
if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
|
||||
ptr += MINBPC(enc);
|
||||
break;
|
||||
}
|
||||
if (ptr + 2*MINBPC(enc) != end) {
|
||||
if (HAS_CHARS(enc, ptr, end, 3)) {
|
||||
if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
|
||||
ptr += MINBPC(enc);
|
||||
break;
|
||||
@ -884,8 +881,7 @@ static int PTRCALL
|
||||
PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
const char **nextTokPtr)
|
||||
{
|
||||
if (ptr == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
REQUIRE_CHAR(enc, ptr, end);
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
||||
case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
|
||||
@ -895,7 +891,7 @@ PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
*nextTokPtr = ptr;
|
||||
return XML_TOK_INVALID;
|
||||
}
|
||||
while (ptr != end) {
|
||||
while (HAS_CHAR(enc, ptr, end)) {
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
||||
case BT_SEMI:
|
||||
@ -913,15 +909,14 @@ static int PTRCALL
|
||||
PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
const char **nextTokPtr)
|
||||
{
|
||||
if (ptr == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
REQUIRE_CHAR(enc, ptr, end);
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
||||
default:
|
||||
*nextTokPtr = ptr;
|
||||
return XML_TOK_INVALID;
|
||||
}
|
||||
while (ptr != end) {
|
||||
while (HAS_CHAR(enc, ptr, end)) {
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
||||
case BT_CR: case BT_LF: case BT_S:
|
||||
@ -941,7 +936,7 @@ PREFIX(scanLit)(int open, const ENCODING *enc,
|
||||
const char *ptr, const char *end,
|
||||
const char **nextTokPtr)
|
||||
{
|
||||
while (ptr != end) {
|
||||
while (HAS_CHAR(enc, ptr, end)) {
|
||||
int t = BYTE_TYPE(enc, ptr);
|
||||
switch (t) {
|
||||
INVALID_CASES(ptr, nextTokPtr)
|
||||
@ -950,7 +945,7 @@ PREFIX(scanLit)(int open, const ENCODING *enc,
|
||||
ptr += MINBPC(enc);
|
||||
if (t != open)
|
||||
break;
|
||||
if (ptr == end)
|
||||
if (! HAS_CHAR(enc, ptr, end))
|
||||
return -XML_TOK_LITERAL;
|
||||
*nextTokPtr = ptr;
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
@ -973,7 +968,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
const char **nextTokPtr)
|
||||
{
|
||||
int tok;
|
||||
if (ptr == end)
|
||||
if (ptr >= end)
|
||||
return XML_TOK_NONE;
|
||||
if (MINBPC(enc) > 1) {
|
||||
size_t n = end - ptr;
|
||||
@ -992,8 +987,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
case BT_LT:
|
||||
{
|
||||
ptr += MINBPC(enc);
|
||||
if (ptr == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
REQUIRE_CHAR(enc, ptr, end);
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
case BT_EXCL:
|
||||
return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
||||
@ -1021,7 +1015,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
case BT_S: case BT_LF:
|
||||
for (;;) {
|
||||
ptr += MINBPC(enc);
|
||||
if (ptr == end)
|
||||
if (! HAS_CHAR(enc, ptr, end))
|
||||
break;
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
case BT_S: case BT_LF:
|
||||
@ -1048,11 +1042,10 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
return XML_TOK_OPEN_BRACKET;
|
||||
case BT_RSQB:
|
||||
ptr += MINBPC(enc);
|
||||
if (ptr == end)
|
||||
if (! HAS_CHAR(enc, ptr, end))
|
||||
return -XML_TOK_CLOSE_BRACKET;
|
||||
if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
|
||||
if (ptr + MINBPC(enc) == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
REQUIRE_CHARS(enc, ptr, end, 2);
|
||||
if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
|
||||
*nextTokPtr = ptr + 2*MINBPC(enc);
|
||||
return XML_TOK_COND_SECT_CLOSE;
|
||||
@ -1065,7 +1058,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
return XML_TOK_OPEN_PAREN;
|
||||
case BT_RPAR:
|
||||
ptr += MINBPC(enc);
|
||||
if (ptr == end)
|
||||
if (! HAS_CHAR(enc, ptr, end))
|
||||
return -XML_TOK_CLOSE_PAREN;
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
case BT_AST:
|
||||
@ -1141,7 +1134,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
*nextTokPtr = ptr;
|
||||
return XML_TOK_INVALID;
|
||||
}
|
||||
while (ptr != end) {
|
||||
while (HAS_CHAR(enc, ptr, end)) {
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
||||
case BT_GT: case BT_RPAR: case BT_COMMA:
|
||||
@ -1154,8 +1147,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
ptr += MINBPC(enc);
|
||||
switch (tok) {
|
||||
case XML_TOK_NAME:
|
||||
if (ptr == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
REQUIRE_CHAR(enc, ptr, end);
|
||||
tok = XML_TOK_PREFIXED_NAME;
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
||||
@ -1204,10 +1196,12 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
|
||||
const char *end, const char **nextTokPtr)
|
||||
{
|
||||
const char *start;
|
||||
if (ptr == end)
|
||||
if (ptr >= end)
|
||||
return XML_TOK_NONE;
|
||||
else if (! HAS_CHAR(enc, ptr, end))
|
||||
return XML_TOK_PARTIAL;
|
||||
start = ptr;
|
||||
while (ptr != end) {
|
||||
while (HAS_CHAR(enc, ptr, end)) {
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
#define LEAD_CASE(n) \
|
||||
case BT_LEAD ## n: ptr += n; break;
|
||||
@ -1232,7 +1226,7 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
|
||||
case BT_CR:
|
||||
if (ptr == start) {
|
||||
ptr += MINBPC(enc);
|
||||
if (ptr == end)
|
||||
if (! HAS_CHAR(enc, ptr, end))
|
||||
return XML_TOK_TRAILING_CR;
|
||||
if (BYTE_TYPE(enc, ptr) == BT_LF)
|
||||
ptr += MINBPC(enc);
|
||||
@ -1262,10 +1256,12 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
|
||||
const char *end, const char **nextTokPtr)
|
||||
{
|
||||
const char *start;
|
||||
if (ptr == end)
|
||||
if (ptr >= end)
|
||||
return XML_TOK_NONE;
|
||||
else if (! HAS_CHAR(enc, ptr, end))
|
||||
return XML_TOK_PARTIAL;
|
||||
start = ptr;
|
||||
while (ptr != end) {
|
||||
while (HAS_CHAR(enc, ptr, end)) {
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
#define LEAD_CASE(n) \
|
||||
case BT_LEAD ## n: ptr += n; break;
|
||||
@ -1294,7 +1290,7 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
|
||||
case BT_CR:
|
||||
if (ptr == start) {
|
||||
ptr += MINBPC(enc);
|
||||
if (ptr == end)
|
||||
if (! HAS_CHAR(enc, ptr, end))
|
||||
return XML_TOK_TRAILING_CR;
|
||||
if (BYTE_TYPE(enc, ptr) == BT_LF)
|
||||
ptr += MINBPC(enc);
|
||||
@ -1326,15 +1322,15 @@ PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
|
||||
end = ptr + n;
|
||||
}
|
||||
}
|
||||
while (ptr != end) {
|
||||
while (HAS_CHAR(enc, ptr, end)) {
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
INVALID_CASES(ptr, nextTokPtr)
|
||||
case BT_LT:
|
||||
if ((ptr += MINBPC(enc)) == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
ptr += MINBPC(enc);
|
||||
REQUIRE_CHAR(enc, ptr, end);
|
||||
if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
|
||||
if ((ptr += MINBPC(enc)) == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
ptr += MINBPC(enc);
|
||||
REQUIRE_CHAR(enc, ptr, end);
|
||||
if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
|
||||
++level;
|
||||
ptr += MINBPC(enc);
|
||||
@ -1342,11 +1338,11 @@ PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
|
||||
}
|
||||
break;
|
||||
case BT_RSQB:
|
||||
if ((ptr += MINBPC(enc)) == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
ptr += MINBPC(enc);
|
||||
REQUIRE_CHAR(enc, ptr, end);
|
||||
if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
|
||||
if ((ptr += MINBPC(enc)) == end)
|
||||
return XML_TOK_PARTIAL;
|
||||
ptr += MINBPC(enc);
|
||||
REQUIRE_CHAR(enc, ptr, end);
|
||||
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
|
||||
ptr += MINBPC(enc);
|
||||
if (level == 0) {
|
||||
@ -1373,7 +1369,7 @@ PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
|
||||
{
|
||||
ptr += MINBPC(enc);
|
||||
end -= MINBPC(enc);
|
||||
for (; ptr != end; ptr += MINBPC(enc)) {
|
||||
for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
case BT_DIGIT:
|
||||
case BT_HEX:
|
||||
@ -1521,7 +1517,7 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
|
||||
}
|
||||
|
||||
static int PTRFASTCALL
|
||||
PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
|
||||
PREFIX(charRefNumber)(const ENCODING *UNUSED_P(enc), const char *ptr)
|
||||
{
|
||||
int result = 0;
|
||||
/* skip &# */
|
||||
@ -1565,7 +1561,7 @@ PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
|
||||
}
|
||||
|
||||
static int PTRCALL
|
||||
PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr,
|
||||
PREFIX(predefinedEntityName)(const ENCODING *UNUSED_P(enc), const char *ptr,
|
||||
const char *end)
|
||||
{
|
||||
switch ((end - ptr)/MINBPC(enc)) {
|
||||
@ -1683,11 +1679,11 @@ PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
|
||||
}
|
||||
|
||||
static int PTRCALL
|
||||
PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
|
||||
PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1,
|
||||
const char *end1, const char *ptr2)
|
||||
{
|
||||
for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
|
||||
if (ptr1 == end1)
|
||||
if (end1 - ptr1 < MINBPC(enc))
|
||||
return 0;
|
||||
if (!CHAR_MATCHES(enc, ptr1, *ptr2))
|
||||
return 0;
|
||||
@ -1744,7 +1740,7 @@ PREFIX(updatePosition)(const ENCODING *enc,
|
||||
const char *end,
|
||||
POSITION *pos)
|
||||
{
|
||||
while (ptr < end) {
|
||||
while (HAS_CHAR(enc, ptr, end)) {
|
||||
switch (BYTE_TYPE(enc, ptr)) {
|
||||
#define LEAD_CASE(n) \
|
||||
case BT_LEAD ## n: \
|
||||
@ -1760,7 +1756,7 @@ PREFIX(updatePosition)(const ENCODING *enc,
|
||||
case BT_CR:
|
||||
pos->lineNumber++;
|
||||
ptr += MINBPC(enc);
|
||||
if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF)
|
||||
if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF)
|
||||
ptr += MINBPC(enc);
|
||||
pos->columnNumber = (XML_Size)-1;
|
||||
break;
|
||||
|
@ -13,4 +13,4 @@ The command line arguments are:
|
||||
Returns:
|
||||
|
||||
The time (in seconds) it takes to parse the test file,
|
||||
averaged over the number of iterations.
|
||||
averaged over the number of iterations.@
|
||||
|
@ -7,11 +7,7 @@
|
||||
#ifdef HAVE_EXPAT_CONFIG_H
|
||||
#include <expat_config.h>
|
||||
#endif
|
||||
#ifdef HAVE_CHECK_H
|
||||
#include <check.h>
|
||||
#else
|
||||
#include "minicheck.h"
|
||||
#endif
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
@ -51,7 +47,7 @@ CharData_AppendString(CharData *storage, const char *s)
|
||||
if ((len + storage->count) > maxchars) {
|
||||
len = (maxchars - storage->count);
|
||||
}
|
||||
if (len + storage->count < sizeof(storage->data)) {
|
||||
if (len + storage->count < (int)sizeof(storage->data)) {
|
||||
memcpy(storage->data + storage->count, s, len);
|
||||
storage->count += len;
|
||||
}
|
||||
@ -72,7 +68,7 @@ CharData_AppendXMLChars(CharData *storage, const XML_Char *s, int len)
|
||||
if ((len + storage->count) > maxchars) {
|
||||
len = (maxchars - storage->count);
|
||||
}
|
||||
if (len + storage->count < sizeof(storage->data)) {
|
||||
if (len + storage->count < (int)sizeof(storage->data)) {
|
||||
memcpy(storage->data + storage->count, s,
|
||||
len * sizeof(storage->data[0]));
|
||||
storage->count += len;
|
||||
|
@ -10,10 +10,11 @@
|
||||
#include <setjmp.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "internal.h" /* for UNUSED_P only */
|
||||
#include "minicheck.h"
|
||||
|
||||
Suite *
|
||||
suite_create(char *name)
|
||||
suite_create(const char *name)
|
||||
{
|
||||
Suite *suite = (Suite *) calloc(1, sizeof(Suite));
|
||||
if (suite != NULL) {
|
||||
@ -23,7 +24,7 @@ suite_create(char *name)
|
||||
}
|
||||
|
||||
TCase *
|
||||
tcase_create(char *name)
|
||||
tcase_create(const char *name)
|
||||
{
|
||||
TCase *tc = (TCase *) calloc(1, sizeof(TCase));
|
||||
if (tc != NULL) {
|
||||
@ -156,7 +157,7 @@ srunner_run_all(SRunner *runner, int verbosity)
|
||||
}
|
||||
|
||||
void
|
||||
_fail_unless(int condition, const char *file, int line, char *msg)
|
||||
_fail_unless(int UNUSED_P(condition), const char *UNUSED_P(file), int UNUSED_P(line), const char *msg)
|
||||
{
|
||||
/* Always print the error message so it isn't lost. In this case,
|
||||
we have a failure, so there's no reason to be quiet about what
|
||||
|
@ -26,6 +26,11 @@ extern "C" {
|
||||
#define __func__ __FUNCTION__
|
||||
#endif
|
||||
|
||||
/* ISO C90 does not support '__func__' predefined identifier */
|
||||
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ < 199901)
|
||||
# define __func__ "(unknown)"
|
||||
#endif
|
||||
|
||||
#define START_TEST(testname) static void testname(void) { \
|
||||
_check_set_test_info(__func__, __FILE__, __LINE__); \
|
||||
{
|
||||
@ -48,12 +53,12 @@ struct SRunner {
|
||||
};
|
||||
|
||||
struct Suite {
|
||||
char *name;
|
||||
const char *name;
|
||||
TCase *tests;
|
||||
};
|
||||
|
||||
struct TCase {
|
||||
char *name;
|
||||
const char *name;
|
||||
tcase_setup_function setup;
|
||||
tcase_teardown_function teardown;
|
||||
tcase_test_function *tests;
|
||||
@ -72,9 +77,9 @@ void _check_set_test_info(char const *function,
|
||||
* Prototypes for the actual implementation.
|
||||
*/
|
||||
|
||||
void _fail_unless(int condition, const char *file, int line, char *msg);
|
||||
Suite *suite_create(char *name);
|
||||
TCase *tcase_create(char *name);
|
||||
void _fail_unless(int condition, const char *file, int line, const char *msg);
|
||||
Suite *suite_create(const char *name);
|
||||
TCase *tcase_create(const char *name);
|
||||
void suite_add_tcase(Suite *suite, TCase *tc);
|
||||
void tcase_add_checked_fixture(TCase *,
|
||||
tcase_setup_function,
|
||||
|
479
tests/runtests.c
479
tests/runtests.c
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,4 @@
|
||||
#! /bin/sh
|
||||
#! /usr/bin/env bash
|
||||
|
||||
# EXPAT TEST SCRIPT FOR W3C XML TEST SUITE
|
||||
|
||||
@ -20,12 +20,14 @@
|
||||
# produced by xmlwf conforms to an older definition of canonical XML
|
||||
# and does not generate notation declarations.
|
||||
|
||||
shopt -s nullglob
|
||||
|
||||
MYDIR="`dirname \"$0\"`"
|
||||
cd "$MYDIR"
|
||||
MYDIR="`pwd`"
|
||||
XMLWF="`dirname \"$MYDIR\"`/xmlwf/xmlwf"
|
||||
# XMLWF=/usr/local/bin/xmlwf
|
||||
TS="$MYDIR/XML-Test-Suite"
|
||||
TS="$MYDIR"
|
||||
# OUTPUT must terminate with the directory separator.
|
||||
OUTPUT="$TS/out/"
|
||||
# OUTPUT=/home/tmp/xml-testsuite-out/
|
||||
@ -100,7 +102,7 @@ for xmldir in ibm/valid/P* \
|
||||
RunXmlwfWF "$xmlfile" "$xmldir/"
|
||||
UpdateStatus $?
|
||||
done
|
||||
rm outfile
|
||||
rm -f outfile
|
||||
done
|
||||
|
||||
cd "$TS/xmlconf/oasis"
|
||||
|
@ -3,6 +3,7 @@
|
||||
*/
|
||||
|
||||
#include "codepage.h"
|
||||
#include "internal.h" /* for UNUSED_P only */
|
||||
|
||||
#if (defined(WIN32) || (defined(__WATCOMC__) && defined(__NT__)))
|
||||
#define STRICT 1
|
||||
@ -54,13 +55,13 @@ codepageConvert(int cp, const char *p)
|
||||
#else /* not WIN32 */
|
||||
|
||||
int
|
||||
codepageMap(int cp, int *map)
|
||||
codepageMap(int UNUSED_P(cp), int *UNUSED_P(map))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
codepageConvert(int cp, const char *p)
|
||||
codepageConvert(int UNUSED_P(cp), const char *UNUSED_P(p))
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
@ -8,16 +8,17 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
/* Functions close(2) and read(2) */
|
||||
#ifdef __WATCOMC__
|
||||
#ifndef __LINUX__
|
||||
#include <io.h>
|
||||
#else
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef __BEOS__
|
||||
#include <unistd.h>
|
||||
#else
|
||||
# if !defined(WIN32) && !defined(_WIN32) && !defined(_WIN64)
|
||||
# include <unistd.h>
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef S_ISREG
|
||||
|
@ -51,7 +51,7 @@ filemap(const char *name,
|
||||
close(fd);
|
||||
return 1;
|
||||
}
|
||||
p = (void *)mmap((caddr_t)0, (size_t)nbytes, PROT_READ,
|
||||
p = (void *)mmap((void *)0, (size_t)nbytes, PROT_READ,
|
||||
MAP_FILE|MAP_PRIVATE, fd, (off_t)0);
|
||||
if (p == (void *)-1) {
|
||||
perror(name);
|
||||
@ -59,7 +59,7 @@ filemap(const char *name,
|
||||
return 0;
|
||||
}
|
||||
processor(p, nbytes, name, arg);
|
||||
munmap((caddr_t)p, nbytes);
|
||||
munmap((void *)p, nbytes);
|
||||
close(fd);
|
||||
return 1;
|
||||
}
|
||||
|
@ -8,7 +8,7 @@
|
||||
#include <string.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#ifdef COMPILED_FROM_DSP
|
||||
#ifdef WIN32
|
||||
#include "winconfig.h"
|
||||
#elif defined(MACOS_CLASSIC)
|
||||
#include "macconfig.h"
|
||||
@ -18,9 +18,10 @@
|
||||
#include "watcomconfig.h"
|
||||
#elif defined(HAVE_EXPAT_CONFIG_H)
|
||||
#include <expat_config.h>
|
||||
#endif /* ndef COMPILED_FROM_DSP */
|
||||
#endif /* ndef WIN32 */
|
||||
|
||||
#include "expat.h"
|
||||
#include "internal.h" /* for UNUSED_P only */
|
||||
#include "xmlfile.h"
|
||||
#include "xmltchar.h"
|
||||
#include "filemap.h"
|
||||
@ -132,7 +133,7 @@ externalEntityRefFilemap(XML_Parser parser,
|
||||
const XML_Char *context,
|
||||
const XML_Char *base,
|
||||
const XML_Char *systemId,
|
||||
const XML_Char *publicId)
|
||||
const XML_Char *UNUSED_P(publicId))
|
||||
{
|
||||
int result;
|
||||
XML_Char *s;
|
||||
@ -200,7 +201,7 @@ externalEntityRefStream(XML_Parser parser,
|
||||
const XML_Char *context,
|
||||
const XML_Char *base,
|
||||
const XML_Char *systemId,
|
||||
const XML_Char *publicId)
|
||||
const XML_Char *UNUSED_P(publicId))
|
||||
{
|
||||
XML_Char *s;
|
||||
const XML_Char *filename;
|
||||
|
@ -9,6 +9,7 @@
|
||||
|
||||
#include "expat.h"
|
||||
#include "codepage.h"
|
||||
#include "internal.h" /* for UNUSED_P only */
|
||||
#include "xmlfile.h"
|
||||
#include "xmltchar.h"
|
||||
|
||||
@ -248,49 +249,49 @@ processingInstruction(void *userData, const XML_Char *target,
|
||||
#endif /* not W3C14N */
|
||||
|
||||
static void XMLCALL
|
||||
defaultCharacterData(void *userData, const XML_Char *s, int len)
|
||||
defaultCharacterData(void *userData, const XML_Char *UNUSED_P(s), int UNUSED_P(len))
|
||||
{
|
||||
XML_DefaultCurrent((XML_Parser) userData);
|
||||
}
|
||||
|
||||
static void XMLCALL
|
||||
defaultStartElement(void *userData, const XML_Char *name,
|
||||
const XML_Char **atts)
|
||||
defaultStartElement(void *userData, const XML_Char *UNUSED_P(name),
|
||||
const XML_Char **UNUSED_P(atts))
|
||||
{
|
||||
XML_DefaultCurrent((XML_Parser) userData);
|
||||
}
|
||||
|
||||
static void XMLCALL
|
||||
defaultEndElement(void *userData, const XML_Char *name)
|
||||
defaultEndElement(void *userData, const XML_Char *UNUSED_P(name))
|
||||
{
|
||||
XML_DefaultCurrent((XML_Parser) userData);
|
||||
}
|
||||
|
||||
static void XMLCALL
|
||||
defaultProcessingInstruction(void *userData, const XML_Char *target,
|
||||
const XML_Char *data)
|
||||
defaultProcessingInstruction(void *userData, const XML_Char *UNUSED_P(target),
|
||||
const XML_Char *UNUSED_P(data))
|
||||
{
|
||||
XML_DefaultCurrent((XML_Parser) userData);
|
||||
}
|
||||
|
||||
static void XMLCALL
|
||||
nopCharacterData(void *userData, const XML_Char *s, int len)
|
||||
nopCharacterData(void *UNUSED_P(userData), const XML_Char *UNUSED_P(s), int UNUSED_P(len))
|
||||
{
|
||||
}
|
||||
|
||||
static void XMLCALL
|
||||
nopStartElement(void *userData, const XML_Char *name, const XML_Char **atts)
|
||||
nopStartElement(void *UNUSED_P(userData), const XML_Char *UNUSED_P(name), const XML_Char **UNUSED_P(atts))
|
||||
{
|
||||
}
|
||||
|
||||
static void XMLCALL
|
||||
nopEndElement(void *userData, const XML_Char *name)
|
||||
nopEndElement(void *UNUSED_P(userData), const XML_Char *UNUSED_P(name))
|
||||
{
|
||||
}
|
||||
|
||||
static void XMLCALL
|
||||
nopProcessingInstruction(void *userData, const XML_Char *target,
|
||||
const XML_Char *data)
|
||||
nopProcessingInstruction(void *UNUSED_P(userData), const XML_Char *UNUSED_P(target),
|
||||
const XML_Char *UNUSED_P(data))
|
||||
{
|
||||
}
|
||||
|
||||
@ -434,9 +435,9 @@ metaCharacterData(void *userData, const XML_Char *s, int len)
|
||||
static void XMLCALL
|
||||
metaStartDoctypeDecl(void *userData,
|
||||
const XML_Char *doctypeName,
|
||||
const XML_Char *sysid,
|
||||
const XML_Char *pubid,
|
||||
int has_internal_subset)
|
||||
const XML_Char *UNUSED_P(sysid),
|
||||
const XML_Char *UNUSED_P(pubid),
|
||||
int UNUSED_P(has_internal_subset))
|
||||
{
|
||||
XML_Parser parser = (XML_Parser) userData;
|
||||
FILE *fp = (FILE *)XML_GetUserData(parser);
|
||||
@ -458,7 +459,7 @@ metaEndDoctypeDecl(void *userData)
|
||||
static void XMLCALL
|
||||
metaNotationDecl(void *userData,
|
||||
const XML_Char *notationName,
|
||||
const XML_Char *base,
|
||||
const XML_Char *UNUSED_P(base),
|
||||
const XML_Char *systemId,
|
||||
const XML_Char *publicId)
|
||||
{
|
||||
@ -480,10 +481,10 @@ metaNotationDecl(void *userData,
|
||||
static void XMLCALL
|
||||
metaEntityDecl(void *userData,
|
||||
const XML_Char *entityName,
|
||||
int is_param,
|
||||
int UNUSED_P(is_param),
|
||||
const XML_Char *value,
|
||||
int value_length,
|
||||
const XML_Char *base,
|
||||
const XML_Char *UNUSED_P(base),
|
||||
const XML_Char *systemId,
|
||||
const XML_Char *publicId,
|
||||
const XML_Char *notationName)
|
||||
@ -558,7 +559,7 @@ unknownEncodingConvert(void *data, const char *p)
|
||||
}
|
||||
|
||||
static int XMLCALL
|
||||
unknownEncoding(void *userData, const XML_Char *name, XML_Encoding *info)
|
||||
unknownEncoding(void *UNUSED_P(userData), const XML_Char *name, XML_Encoding *info)
|
||||
{
|
||||
int cp;
|
||||
static const XML_Char prefixL[] = T("windows-");
|
||||
@ -594,7 +595,7 @@ unknownEncoding(void *userData, const XML_Char *name, XML_Encoding *info)
|
||||
}
|
||||
|
||||
static int XMLCALL
|
||||
notStandalone(void *userData)
|
||||
notStandalone(void *UNUSED_P(userData))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@ -634,8 +635,7 @@ static void
|
||||
usage(const XML_Char *prog, int rc)
|
||||
{
|
||||
ftprintf(stderr,
|
||||
T("usage: %s [-n] [-p] [-r] [-s] [-w] [-x] [-d output-dir] "
|
||||
"[-e encoding] file ...\n"), prog);
|
||||
T("usage: %s [-s] [-n] [-p] [-x] [-e encoding] [-w] [-d output-dir] [-c] [-m] [-r] [-t] [file ...]\n"), prog);
|
||||
exit(rc);
|
||||
}
|
||||
|
||||
@ -760,6 +760,12 @@ tmain(int argc, XML_Char **argv)
|
||||
parser = XML_ParserCreateNS(encoding, NSSEP);
|
||||
else
|
||||
parser = XML_ParserCreate(encoding);
|
||||
|
||||
if (! parser) {
|
||||
tperror("Could not instantiate parser");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (requireStandalone)
|
||||
XML_SetNotStandaloneHandler(parser, notStandalone);
|
||||
XML_SetParamEntityParsing(parser, paramEntityParsing);
|
||||
|
Loading…
Reference in New Issue
Block a user