iconv_std: complete the //IGNORE support

Previously, it would only ignore failures due to csmapper conversion
failure.  It may be the case that the input string contains invalid
sequences that also need to be ignored.

A good example of //IGNORE application is sanitizing user- or remotely-
specified strings that are expected to be UTF-8; perhaps as part of a
pipeline that will feed the result into a system less tested against or
tolerant of illegal UTF-8 sequences.

Sponsored by:	Klara, Inc.
Differential Revision:	https://reviews.freebsd.org/D34345
This commit is contained in:
Kyle Evans 2022-02-22 01:15:04 -06:00
parent 2300a22c97
commit 693f88c9da

View File

@ -472,7 +472,7 @@ _citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv,
_csid_t csid;
_index_t idx;
char *tmpin;
size_t inval, szrin, szrout;
size_t inval, in_mb_cur_min, szrin, szrout;
int ret, state = 0;
inval = 0;
@ -504,6 +504,8 @@ _citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv,
return (0);
}
in_mb_cur_min = _stdenc_get_mb_cur_min(is->is_src_encoding);
/* normal case */
for (;;) {
if (*inbytes == 0) {
@ -522,8 +524,20 @@ _citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv,
szrin = szrout = 0;
ret = mbtocsx(&sc->sc_src_encoding, &csid, &idx, &tmpin,
*inbytes, &szrin, cv->cv_shared->ci_hooks);
if (ret)
if (ret != 0 && (ret != EILSEQ ||
!cv->cv_shared->ci_discard_ilseq)) {
goto err;
} else if (ret == EILSEQ) {
/*
* If //IGNORE was specified, we'll just keep crunching
* through invalid characters.
*/
*in += in_mb_cur_min;
*inbytes -= in_mb_cur_min;
restore_encoding_state(&sc->sc_src_encoding);
restore_encoding_state(&sc->sc_dst_encoding);
continue;
}
if (szrin == (size_t)-2) {
/* incompleted character */