Previously, vfscanf()'s wide character processing functions were

reading wide characters manually.  With this change, they now use
fgetwc().  To make this work, we use an internal version of fgetwc()
with a few extensions: it takes an mbstate * because non-wide streams
don't have a built-in mbstate, and it indicates the number of bytes
read.

vfscanf() now resembles vfwscanf() more closely.  Minor functional
improvements include working xlocale support in vfscanf(), setting the
stream error indicator on encoding errors, and proper handling of
shift-based encodings.  (Actually, making shift-based encodings work
with non-wide streams is hopeless, but the implementation now matches
the broken specification.)
This commit is contained in:
David Schultz 2012-04-29 16:28:39 +00:00
parent 35225ae651
commit d7af8cf14b
3 changed files with 82 additions and 149 deletions

View File

@ -59,6 +59,7 @@ fgetwc_l(FILE *fp, locale_t locale)
return (r);
}
wint_t
fgetwc(FILE *fp)
{
@ -66,40 +67,45 @@ fgetwc(FILE *fp)
}
/*
* Non-MT-safe version.
* Internal (non-MPSAFE) version of fgetwc(). This version takes an
* mbstate_t argument specifying the initial conversion state. For
* wide streams, this should always be fp->_mbstate. On return, *nread
* is set to the number of bytes read.
*/
wint_t
__fgetwc(FILE *fp, locale_t locale)
wint_t
__fgetwc_mbs(FILE *fp, mbstate_t *mbs, int *nread, locale_t locale)
{
wchar_t wc;
size_t nconv;
struct xlocale_ctype *l = XLOCALE_CTYPE(locale);
if (fp->_r <= 0 && __srefill(fp))
if (fp->_r <= 0 && __srefill(fp)) {
*nread = 0;
return (WEOF);
}
if (MB_CUR_MAX == 1) {
/* Fast path for single-byte encodings. */
wc = *fp->_p++;
fp->_r--;
*nread = 1;
return (wc);
}
*nread = 0;
do {
nconv = l->__mbrtowc(&wc, fp->_p, fp->_r, &fp->_mbstate);
nconv = l->__mbrtowc(&wc, fp->_p, fp->_r, mbs);
if (nconv == (size_t)-1)
break;
else if (nconv == (size_t)-2)
continue;
else if (nconv == 0) {
/*
* Assume that the only valid representation of
* the null wide character is a single null byte.
*/
fp->_p++;
fp->_r--;
(*nread)++;
return (L'\0');
} else {
fp->_p += nconv;
fp->_r -= nconv;
*nread += nconv;
return (wc);
}
} while (__srefill(fp) == 0);

View File

@ -56,7 +56,7 @@ extern int _ftello(FILE *, fpos_t *);
extern int _fseeko(FILE *, off_t, int, int);
extern int __fflush(FILE *fp);
extern void __fcloseall(void);
extern wint_t __fgetwc(FILE *, locale_t);
extern wint_t __fgetwc_mbs(FILE *, mbstate_t *, int *, locale_t);
extern wint_t __fputwc(wchar_t, FILE *, locale_t);
extern int __sflush(FILE *);
extern FILE *__sfp(void);
@ -85,6 +85,13 @@ extern size_t __fread(void * __restrict buf, size_t size, size_t count,
FILE * __restrict fp);
extern int __sdidinit;
static inline wint_t
__fgetwc(FILE *fp, locale_t locale)
{
int nread;
return (__fgetwc_mbs(fp, &fp->_mbstate, &nread, locale));
}
/*
* Prepare the given FILE for writing, and return 0 iff it

View File

@ -127,9 +127,8 @@ static const mbstate_t initial_mbs;
static __inline int
convert_char(FILE *fp, char * __restrict p, int width)
{
int n, nread;
int n;
nread = 0;
if (p == SUPPRESS_PTR) {
size_t sum = 0;
for (;;) {
@ -149,59 +148,34 @@ convert_char(FILE *fp, char * __restrict p, int width)
break;
}
}
nread += sum;
return (sum);
} else {
size_t r = __fread(p, 1, width, fp);
if (r == 0)
return (-1);
nread += r;
return (r);
}
return (nread);
}
static __inline int
convert_wchar(FILE *fp, wchar_t *wcp, int width)
convert_wchar(FILE *fp, wchar_t *wcp, int width, locale_t locale)
{
mbstate_t mbs;
size_t nconv;
int n, nread;
char buf[MB_CUR_MAX];
wint_t wi;
nread = 0;
mbs = initial_mbs;
n = 0;
while (width != 0) {
if (n == MB_CUR_MAX) {
fp->_flags |= __SERR;
return (-1);
}
buf[n++] = *fp->_p;
fp->_p++;
fp->_r--;
mbs = initial_mbs;
nconv = mbrtowc(wcp, buf, n, &mbs);
if (nconv == (size_t)-1) {
fp->_flags |= __SERR;
return (-1);
}
if (nconv == 0 && wcp != SUPPRESS_PTR)
*wcp = L'\0';
if (nconv != (size_t)-2) {
nread += n;
width--;
if (wcp != SUPPRESS_PTR)
wcp++;
n = 0;
}
if (fp->_r <= 0 && __srefill(fp)) {
if (n != 0) {
fp->_flags |= __SERR;
return (-1);
}
break;
}
while (width-- != 0 &&
(wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF) {
if (wcp != SUPPRESS_PTR)
*wcp++ = (wchar_t)wi;
n += nread;
}
return (nread);
if (n == 0)
return (-1);
return (n);
}
static __inline int
@ -244,63 +218,34 @@ convert_ccl(FILE *fp, char * __restrict p, int width, const char *ccltab)
}
static __inline int
convert_wccl(FILE *fp, wchar_t *wcp, int width, const char *ccltab)
convert_wccl(FILE *fp, wchar_t *wcp, int width, const char *ccltab,
locale_t locale)
{
mbstate_t mbs;
wchar_t twc;
int n, nchars, nconv;
char buf[MB_CUR_MAX];
wint_t wi;
int n, nread;
if (wcp == SUPPRESS_PTR)
wcp = &twc;
mbs = initial_mbs;
n = 0;
nchars = 0;
while (width != 0) {
if (n == MB_CUR_MAX) {
fp->_flags |= __SERR;
return (-1);
}
buf[n++] = *fp->_p;
fp->_p++;
fp->_r--;
mbs = initial_mbs;
nconv = mbrtowc(wcp, buf, n, &mbs);
if (nconv == (size_t)-1) {
fp->_flags |= __SERR;
return (-1);
}
if (nconv == 0)
*wcp = L'\0';
if (nconv != (size_t)-2) {
if (wctob(*wcp) != EOF && !ccltab[wctob(*wcp)]) {
while (n != 0) {
n--;
__ungetc(buf[n], fp);
}
break;
}
width--;
if (wcp != &twc)
wcp++;
nchars++;
n = 0;
}
if (fp->_r <= 0 && __srefill(fp)) {
if (n != 0) {
fp->_flags |= __SERR;
return (-1);
}
break;
if (wcp == SUPPRESS_PTR) {
while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
width-- != 0 && ccltab[wctob(wi)])
n += nread;
if (wi != WEOF)
__ungetwc(wi, fp, __get_locale());
} else {
while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
width-- != 0 && ccltab[wctob(wi)]) {
*wcp++ = (wchar_t)wi;
n += nread;
}
if (wi != WEOF)
__ungetwc(wi, fp, __get_locale());
if (n == 0)
return (0);
*wcp = 0;
}
if (n != 0) {
fp->_flags |= __SERR;
return (-1);
}
if (nchars == 0)
return (0);
*wcp = L'\0';
return (nchars);
return (n);
}
static __inline int
@ -335,56 +280,31 @@ convert_string(FILE *fp, char * __restrict p, int width)
}
static __inline int
convert_wstring(FILE *fp, wchar_t *wcp, int width)
convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale)
{
mbstate_t mbs;
wchar_t twc;
int n, nconv, nread;
char buf[MB_CUR_MAX];
wint_t wi;
int n, nread;
if (wcp == SUPPRESS_PTR)
wcp = &twc;
n = nread = 0;
while (!isspace(*fp->_p) && width != 0) {
if (n == MB_CUR_MAX) {
fp->_flags |= __SERR;
return (-1);
}
buf[n++] = *fp->_p;
fp->_p++;
fp->_r--;
mbs = initial_mbs;
nconv = mbrtowc(wcp, buf, n, &mbs);
if (nconv == (size_t)-1) {
fp->_flags |= __SERR;
return (-1);
}
if (nconv == 0)
*wcp = L'\0';
if (nconv != (size_t)-2) {
if (iswspace(*wcp)) {
while (n != 0) {
n--;
__ungetc(buf[n], fp);
}
break;
}
nread += n;
width--;
if (wcp != &twc)
wcp++;
n = 0;
}
if (fp->_r <= 0 && __srefill(fp)) {
if (n != 0) {
fp->_flags |= __SERR;
return (-1);
}
break;
mbs = initial_mbs;
n = 0;
if (wcp == SUPPRESS_PTR) {
while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
width-- != 0 && !iswspace(wi))
n += nread;
if (wi != WEOF)
__ungetwc(wi, fp, __get_locale());
} else {
while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
width-- != 0 && !iswspace(wi)) {
*wcp++ = (wchar_t)wi;
n += nread;
}
if (wi != WEOF)
__ungetwc(wi, fp, __get_locale());
*wcp = '\0';
}
*wcp = L'\0';
return (nread);
return (n);
}
/*
@ -766,7 +686,7 @@ again: c = *fmt++;
width = 1;
if (flags & LONG) {
nr = convert_wchar(fp, GETARG(wchar_t *),
width);
width, locale);
} else {
nr = convert_char(fp, GETARG(char *), width);
}
@ -780,7 +700,7 @@ again: c = *fmt++;
width = (size_t)~0; /* `infinity' */
if (flags & LONG) {
nr = convert_wccl(fp, GETARG(wchar_t *), width,
ccltab);
ccltab, locale);
} else {
nr = convert_ccl(fp, GETARG(char *), width,
ccltab);
@ -799,7 +719,7 @@ again: c = *fmt++;
width = (size_t)~0;
if (flags & LONG) {
nr = convert_wstring(fp, GETARG(wchar_t *),
width);
width, locale);
} else {
nr = convert_string(fp, GETARG(char *), width);
}