Make UTF-8 parsing and generation more strict.

- in mbrtowc() we need to disallow codepoints above 0x10ffff.
- In wcrtomb() we need to disallow codepoints between 0xd800 and 0xdfff.

Reviewed by:	bapt
Differential Revision:	https://reviews.freebsd.org/D3399
This commit is contained in:
Ed Schouten 2015-08-25 09:16:09 +00:00
parent 2bf1d4880d
commit 57c69b1478
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=287125

View File

@ -191,7 +191,7 @@ _UTF8_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
errno = EILSEQ;
return ((size_t)-1);
}
if (wch >= 0xd800 && wch <= 0xdfff) {
if ((wch >= 0xd800 && wch <= 0xdfff) || wch > 0x10ffff) {
/*
* Malformed input; invalid code points.
*/
@ -318,6 +318,10 @@ _UTF8_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps)
lead = 0xc0;
len = 2;
} else if ((wc & ~0xffff) == 0) {
if (wc >= 0xd800 && wc <= 0xdfff) {
errno = EILSEQ;
return ((size_t)-1);
}
lead = 0xe0;
len = 3;
} else if (wc >= 0 && wc <= 0x10ffff) {