Vendor import of expat 2.4.7.

This commit is contained in:
Xin LI 2022-03-27 22:19:28 -07:00
parent d1b143ee9a
commit 45916e7c91
11 changed files with 232 additions and 32 deletions

34
Changes
View File

@ -2,6 +2,40 @@ NOTE: We are looking for help with a few things:
https://github.com/libexpat/libexpat/labels/help%20wanted
If you can help, please get in touch. Thanks!
Release 2.4.7 Fri March 4 2022
Bug fixes:
#572 #577 Relax fix to CVE-2022-25236 (introduced with release 2.4.5)
with regard to all valid URI characters (RFC 3986),
i.e. the following set (excluding whitespace):
ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz
0123456789 % -._~ :/?#[]@ !$&'()*+,;=
Other changes:
#555 #570 #581 CMake|Windows: Store Expat version in the DLL
#577 Document consequences of namespace separator choices not just
in doc/reference.html but also in header <expat.h>
#577 Document Expat's lack of validation of namespace URIs against
RFC 3986, and that the XML 1.0r4 specification doesn't
require Expat to validate namespace URIs, and that Expat
may do more in that regard in future releases.
If you find need for strict RFC 3986 URI validation on
application level today, https://uriparser.github.io/ may
be of interest.
#579 Fix documentation of XML_EndDoctypeDeclHandler in <expat.h>
#575 Document that a call to XML_FreeContentModel can be done at
a later time from outside the element declaration handler
#574 Make hardcoded namespace URIs easier to find in code
#573 Update documentation on use of XML_POOR_ENTOPY on Solaris
#569 #571 tests: Resolve use of macros NAN and INFINITY for GNU G++
4.8.2 on Solaris.
#578 #580 Version info bumped from 9:6:8 to 9:7:8;
see https://verbump.de/ for what these numbers do
Special thanks to:
Jeffrey Walton
Johnny Jazeix
Thijs Schreijer
Release 2.4.6 Sun February 20 2022
Bug fixes:
#566 Fix a regression introduced by the fix for CVE-2022-25313

View File

@ -8,6 +8,7 @@
#
# Copyright (c) 2017-2021 Sebastian Pipping <sebastian@pipping.org>
# Copyright (c) 2018 KangLin <kl222@126.com>
# Copyright (c) 2022 Johnny Jazeix <jazeix@gmail.com>
# Licensed under the MIT license:
#
# Permission is hereby granted, free of charge, to any person obtaining
@ -82,7 +83,8 @@ _EXTRA_DIST_WINDOWS = \
win32/build_expat_iss.bat \
win32/expat.iss \
win32/MANIFEST.txt \
win32/README.txt
win32/README.txt \
win32/version.rc
EXTRA_DIST = \
$(_EXTRA_DIST_CMAKE) \

View File

@ -24,6 +24,7 @@
#
# Copyright (c) 2017-2021 Sebastian Pipping <sebastian@pipping.org>
# Copyright (c) 2018 KangLin <kl222@126.com>
# Copyright (c) 2022 Johnny Jazeix <jazeix@gmail.com>
# Licensed under the MIT license:
#
# Permission is hereby granted, free of charge, to any person obtaining
@ -475,7 +476,8 @@ _EXTRA_DIST_WINDOWS = \
win32/build_expat_iss.bat \
win32/expat.iss \
win32/MANIFEST.txt \
win32/README.txt
win32/README.txt \
win32/version.rc
EXTRA_DIST = \
$(_EXTRA_DIST_CMAKE) \

View File

@ -5,7 +5,7 @@
[![Downloads GitHub](https://img.shields.io/github/downloads/libexpat/libexpat/total?label=Downloads%20GitHub)](https://github.com/libexpat/libexpat/releases)
# Expat, Release 2.4.6
# Expat, Release 2.4.7
This is Expat, a C library for parsing XML, started by
[James Clark](https://en.wikipedia.org/wiki/James_Clark_%28programmer%29) in 1997.

View File

@ -82,7 +82,7 @@ dnl If the API changes incompatibly set LIBAGE back to 0
dnl
LIBCURRENT=9 # sync
LIBREVISION=6 # with
LIBREVISION=7 # with
LIBAGE=8 # CMakeLists.txt!
AC_CONFIG_HEADERS([expat_config.h])

View File

@ -18,6 +18,7 @@
Copyright (c) 2017 Jakub Wilk <jwilk@jwilk.net>
Copyright (c) 2021 Tomas Korbar <tkorbar@redhat.com>
Copyright (c) 2021 Nicolas Cavallari <nicolas.cavallari@green-communications.fr>
Copyright (c) 2022 Thijs Schreijer <thijs@thijsschreijer.nl>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@ -49,7 +50,7 @@
<div>
<h1>
The Expat XML Parser
<small>Release 2.4.6</small>
<small>Release 2.4.7</small>
</h1>
</div>
<div class="content">
@ -974,6 +975,14 @@ the local part will be concatenated without any separator - this is intended
to support RDF processors. It is a programming error to use the null separator
with <a href= "#XML_SetReturnNSTriplet">namespace triplets</a>.</div>
<p><strong>Note:</strong>
Expat does not validate namespace URIs (beyond encoding)
against RFC 3986 today (and is not required to do so with regard to
the XML 1.0 namespaces specification) but it may start doing that
in future releases. Before that, an application using Expat must
be ready to receive namespace URIs containing non-URI characters.
</p>
<h4 id="XML_ParserCreate_MM">XML_ParserCreate_MM</h4>
<pre class="fcndec">
XML_Parser XMLCALL
@ -1808,10 +1817,11 @@ struct XML_cp {
</pre>
<p>Sets a handler for element declarations in a DTD. The handler gets
called with the name of the element in the declaration and a pointer
to a structure that contains the element model. It is the
application's responsibility to free this data structure using
<code><a href="#XML_FreeContentModel"
>XML_FreeContentModel</a></code>.</p>
to a structure that contains the element model. It's the user code's
responsibility to free model when finished with it. See <code>
<a href="#XML_FreeContentModel">XML_FreeContentModel</a></code>.
There is no need to free the model from the handler, it can be kept
around and freed at a later stage.</p>
<p>The <code>model</code> argument is the root of a tree of
<code>XML_Content</code> nodes. If <code>type</code> equals

View File

@ -5,7 +5,7 @@
\\$2 \(la\\$1\(ra\\$3
..
.if \n(.g .mso www.tmac
.TH XMLWF 1 "February 20, 2022" "" ""
.TH XMLWF 1 "March 4, 2022" "" ""
.SH NAME
xmlwf \- Determines if an XML document is well-formed
.SH SYNOPSIS

View File

@ -21,7 +21,7 @@
"http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" [
<!ENTITY dhfirstname "<firstname>Scott</firstname>">
<!ENTITY dhsurname "<surname>Bronson</surname>">
<!ENTITY dhdate "<date>February 20, 2022</date>">
<!ENTITY dhdate "<date>March 4, 2022</date>">
<!-- Please adjust this^^ date whenever cutting a new release. -->
<!ENTITY dhsection "<manvolnum>1</manvolnum>">
<!ENTITY dhemail "<email>bronson@rinspin.com</email>">

View File

@ -15,6 +15,7 @@
Copyright (c) 2016 Cristian Rodríguez <crrodriguez@opensuse.org>
Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de>
Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
Copyright (c) 2022 Thijs Schreijer <thijs@thijsschreijer.nl>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@ -174,8 +175,10 @@ struct XML_cp {
};
/* This is called for an element declaration. See above for
description of the model argument. It's the caller's responsibility
to free model when finished with it.
description of the model argument. It's the user code's responsibility
to free model when finished with it. See XML_FreeContentModel.
There is no need to free the model from the handler, it can be kept
around and freed at a later stage.
*/
typedef void(XMLCALL *XML_ElementDeclHandler)(void *userData,
const XML_Char *name,
@ -237,6 +240,17 @@ XML_ParserCreate(const XML_Char *encoding);
and the local part will be concatenated without any separator.
It is a programming error to use the separator '\0' with namespace
triplets (see XML_SetReturnNSTriplet).
If a namespace separator is chosen that can be part of a URI or
part of an XML name, splitting an expanded name back into its
1, 2 or 3 original parts on application level in the element handler
may end up vulnerable, so these are advised against; sane choices for
a namespace separator are e.g. '\n' (line feed) and '|' (pipe).
Note that Expat does not validate namespace URIs (beyond encoding)
against RFC 3986 today (and is not required to do so with regard to
the XML 1.0 namespaces specification) but it may start doing that
in future releases. Before that, an application using Expat must
be ready to receive namespace URIs containing non-URI characters.
*/
XMLPARSEAPI(XML_Parser)
XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator);
@ -317,7 +331,7 @@ typedef void(XMLCALL *XML_StartDoctypeDeclHandler)(void *userData,
const XML_Char *pubid,
int has_internal_subset);
/* This is called for the start of the DOCTYPE declaration when the
/* This is called for the end of the DOCTYPE declaration when the
closing > is encountered, but after processing any external
subset.
*/
@ -1041,7 +1055,7 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold(
*/
#define XML_MAJOR_VERSION 2
#define XML_MINOR_VERSION 4
#define XML_MICRO_VERSION 6
#define XML_MICRO_VERSION 7
#ifdef __cplusplus
}

View File

@ -1,4 +1,4 @@
/* a30d2613dcfdef81475a9d1a349134d2d42722172fdaa7d5bb12ed2aa74b9596 (2.4.6+)
/* fcb1a62fefa945567301146eb98e3ad3413e823a41c4378e84e8b6b6f308d824 (2.4.7+)
__ __ _
___\ \/ /_ __ __ _| |_
/ _ \\ /| '_ \ / _` | __|
@ -34,6 +34,7 @@
Copyright (c) 2019 Vadim Zeitlin <vadim@zeitlins.org>
Copyright (c) 2021 Dong-hee Na <donghee.na@python.org>
Copyright (c) 2022 Samanta Navarro <ferivoz@riseup.net>
Copyright (c) 2022 Jeffrey Walton <noloader@gmail.com>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@ -133,7 +134,7 @@
* BSD / macOS (including <10.7) (arc4random): HAVE_ARC4RANDOM, \
* libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
* libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
* Linux (including <3.17) / BSD / macOS (including <10.7) (/dev/urandom): XML_DEV_URANDOM, \
* Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \
* Windows >=Vista (rand_s): _WIN32. \
\
If insist on not using any of these, bypass this error by defining \
@ -722,6 +723,7 @@ XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
return XML_ParserCreate_MM(encodingName, NULL, tmp);
}
// "xml=http://www.w3.org/XML/1998/namespace"
static const XML_Char implicitContext[]
= {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h,
ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
@ -3704,12 +3706,124 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
return XML_ERROR_NONE;
}
static XML_Bool
is_rfc3986_uri_char(XML_Char candidate) {
// For the RFC 3986 ANBF grammar see
// https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
switch (candidate) {
// From rule "ALPHA" (uppercase half)
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
case 'H':
case 'I':
case 'J':
case 'K':
case 'L':
case 'M':
case 'N':
case 'O':
case 'P':
case 'Q':
case 'R':
case 'S':
case 'T':
case 'U':
case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
// From rule "ALPHA" (lowercase half)
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
case 'u':
case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
// From rule "DIGIT"
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
// From rule "pct-encoded"
case '%':
// From rule "unreserved"
case '-':
case '.':
case '_':
case '~':
// From rule "gen-delims"
case ':':
case '/':
case '?':
case '#':
case '[':
case ']':
case '@':
// From rule "sub-delims"
case '!':
case '$':
case '&':
case '\'':
case '(':
case ')':
case '*':
case '+':
case ',':
case ';':
case '=':
return XML_TRUE;
default:
return XML_FALSE;
}
}
/* addBinding() overwrites the value of prefix->binding without checking.
Therefore one must keep track of the old value outside of addBinding().
*/
static enum XML_Error
addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
const XML_Char *uri, BINDING **bindingsPtr) {
// "http://www.w3.org/XML/1998/namespace"
static const XML_Char xmlNamespace[]
= {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON,
ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
@ -3720,6 +3834,7 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
ASCII_e, '\0'};
static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1;
// "http://www.w3.org/2000/xmlns/"
static const XML_Char xmlnsNamespace[]
= {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w,
@ -3760,14 +3875,26 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
&& (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
isXMLNS = XML_FALSE;
// NOTE: While Expat does not validate namespace URIs against RFC 3986,
// we have to at least make sure that the XML processor on top of
// Expat (that is splitting tag names by namespace separator into
// 2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused
// by an attacker putting additional namespace separator characters
// into namespace declarations. That would be ambiguous and not to
// be expected.
if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)) {
// NOTE: While Expat does not validate namespace URIs against RFC 3986
// today (and is not REQUIRED to do so with regard to the XML 1.0
// namespaces specification) we have to at least make sure, that
// the application on top of Expat (that is likely splitting expanded
// element names ("qualified names") of form
// "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
// in its element handler code) cannot be confused by an attacker
// putting additional namespace separator characters into namespace
// declarations. That would be ambiguous and not to be expected.
//
// While the HTML API docs of function XML_ParserCreateNS have been
// advising against use of a namespace separator character that can
// appear in a URI for >20 years now, some widespread applications
// are using URI characters (':' (colon) in particular) for a
// namespace separator, in practice. To keep these applications
// functional, we only reject namespaces URIs containing the
// application-chosen namespace separator if the chosen separator
// is a non-URI character with regard to RFC 3986.
if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
&& ! is_rfc3986_uri_char(uri[len])) {
return XML_ERROR_SYNTAX;
}
}

View File

@ -54,7 +54,6 @@
#include <ctype.h>
#include <limits.h>
#include <stdint.h> /* intptr_t uint64_t */
#include <math.h> /* NAN, INFINITY, isnan */
#if ! defined(__cplusplus)
# include <stdbool.h>
@ -7407,16 +7406,18 @@ START_TEST(test_ns_separator_in_uri) {
struct test_case {
enum XML_Status expectedStatus;
const char *doc;
XML_Char namesep;
};
struct test_case cases[] = {
{XML_STATUS_OK, "<doc xmlns='one_two' />"},
{XML_STATUS_ERROR, "<doc xmlns='one&#x0A;two' />"},
{XML_STATUS_OK, "<doc xmlns='one_two' />", XCS('\n')},
{XML_STATUS_ERROR, "<doc xmlns='one&#x0A;two' />", XCS('\n')},
{XML_STATUS_OK, "<doc xmlns='one:two' />", XCS(':')},
};
size_t i = 0;
size_t failCount = 0;
for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
XML_Parser parser = XML_ParserCreateNS(NULL, '\n');
XML_Parser parser = XML_ParserCreateNS(NULL, cases[i].namesep);
XML_SetElementHandler(parser, dummy_start_element, dummy_end_element);
if (XML_Parse(parser, cases[i].doc, (int)strlen(cases[i].doc),
/*isFinal*/ XML_TRUE)
@ -7588,7 +7589,7 @@ START_TEST(test_misc_version) {
fail("Version mismatch");
#if ! defined(XML_UNICODE) || defined(XML_UNICODE_WCHAR_T)
if (xcstrcmp(version_text, XCS("expat_2.4.6"))) /* needs bump on releases */
if (xcstrcmp(version_text, XCS("expat_2.4.7"))) /* needs bump on releases */
fail("XML_*_VERSION in expat.h out of sync?\n");
#else
/* If we have XML_UNICODE defined but not XML_UNICODE_WCHAR_T
@ -11762,6 +11763,16 @@ START_TEST(test_accounting_precision) {
}
END_TEST
static float
portableNAN() {
return strtof("nan", NULL);
}
static float
portableINFINITY() {
return strtof("infinity", NULL);
}
START_TEST(test_billion_laughs_attack_protection_api) {
XML_Parser parserWithoutParent = XML_ParserCreate(NULL);
XML_Parser parserWithParent
@ -11780,7 +11791,7 @@ START_TEST(test_billion_laughs_attack_protection_api) {
== XML_TRUE)
fail("Call with non-root parser is NOT supposed to succeed");
if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(
parserWithoutParent, NAN)
parserWithoutParent, portableNAN())
== XML_TRUE)
fail("Call with NaN limit is NOT supposed to succeed");
if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(
@ -11802,7 +11813,7 @@ START_TEST(test_billion_laughs_attack_protection_api) {
== XML_FALSE)
fail("Call with positive limit >=1.0 is supposed to succeed");
if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(
parserWithoutParent, INFINITY)
parserWithoutParent, portableINFINITY())
== XML_FALSE)
fail("Call with positive limit >=1.0 is supposed to succeed");