Implement the %lc, %ls and %[ conversions, which read sequences of wide

characters, non-whitespace wide character strings and wide character
strings in a scanset.
This commit is contained in:
Tim J. Robbins 2002-09-23 11:35:50 +00:00
parent adc106840c
commit 4712aa3b59
2 changed files with 137 additions and 16 deletions
lib/libc/stdio

@ -178,9 +178,10 @@ and the next pointer is a pointer to
(rather than
.Vt float ) ,
or that the conversion will be one of
.Cm c
or
.Cm c ,
.Cm s
or
.Cm \&[
and the next pointer is a pointer to an array of
.Vt wchar_t
(rather than
@ -254,8 +255,15 @@ If no width is given,
a default of
.Dq infinity
is used (with one exception, below);
otherwise at most this many characters are scanned
otherwise at most this many bytes are scanned
in processing the conversion.
In the case of the
.Cm lc ,
.Cm ls
and
.Cm l[
conversions, the field width specifies the maximum number
of multibyte characters that will be scanned.
Before conversion begins,
most conversions skip white space;
this white space is not counted against the field width.
@ -334,6 +342,13 @@ terminating
character.
The input string stops at white space
or at the maximum field width, whichever occurs first.
.Pp
If an
.Cm l
qualifier is present, the next pointer must be a pointer to
.Vt wchar_t ,
into which the input will be placed after conversion by
.Xr mbrtowc 3 .
.It Cm S
The same as
.Cm ls .
@ -350,6 +365,13 @@ and there must be enough room for all the characters
is added).
The usual skip of leading white space is suppressed.
To skip white space first, use an explicit space in the format.
.Pp
If an
.Cm l
qualifier is present, the next pointer must be a pointer to
.Vt wchar_t ,
into which the input will be placed after conversion by
.Xr mbrtowc 3 .
.It Cm C
The same as
.Cm lc .
@ -395,6 +417,13 @@ means the set
The string ends with the appearance of a character not in the
(or, with a circumflex, in) set
or when the field width runs out.
.Pp
If an
.Cm l
qualifier is present, the next pointer must be a pointer to
.Vt wchar_t ,
into which the input will be placed after conversion by
.Xr mbrtowc 3 .
.It Cm p
Matches a pointer value (as printed by
.Ql %p
@ -492,18 +521,6 @@ The
modifiers for positional arguments are not implemented.
.Pp
The
.Cm l
modifier for
.Cm %c
and
.Cm %s
(and
.Cm %C
and
.Cm %S )
to specify wide characters and strings is not implemented.
.Pp
The
.Cm \&%a
and
.Cm \&%A

@ -48,6 +48,8 @@ __FBSDID("$FreeBSD$");
#include <stddef.h>
#include <stdarg.h>
#include <string.h>
#include <wchar.h>
#include <wctype.h>
#include "un-namespace.h"
#include "collate.h"
@ -136,7 +138,11 @@ __svfscanf(FILE *fp, const char *fmt0, va_list ap)
int nread; /* number of characters consumed from fp */
int base; /* base argument to conversion function */
char ccltab[256]; /* character class table for %[...] */
char buf[BUF]; /* buffer for numeric conversions */
char buf[BUF]; /* buffer for numeric and mb conversions */
wchar_t *wcp; /* handy wide character pointer */
wchar_t *wcp0; /* saves original value of wcp */
mbstate_t mbs; /* multibyte conversion state */
size_t nconv; /* length of multibyte sequence converted */
/* `basefix' is used to avoid `if' tests in the integer scanner */
static short basefix[17] =
@ -371,6 +377,32 @@ literal:
}
}
nread += sum;
} else if (flags & LONG) {
wcp = va_arg(ap, wchar_t *);
n = 0;
while (width != 0) {
if (n == MB_CUR_MAX)
goto input_failure;
buf[n++] = *fp->_p;
fp->_p++;
fp->_r--;
memset(&mbs, 0, sizeof(mbs));
nconv = mbrtowc(wcp, buf, n, &mbs);
if (nconv == 0 || nconv == (size_t)-1)
goto input_failure;
if (nconv != (size_t)-2) {
nread += n;
width--;
wcp++;
n = 0;
}
if (fp->_r <= 0 && __srefill(fp)) {
if (n != 0)
goto input_failure;
break;
}
}
nassigned++;
} else {
size_t r = fread((void *)va_arg(ap, char *), 1,
width, fp);
@ -402,6 +434,45 @@ literal:
}
if (n == 0)
goto match_failure;
} else if (flags & LONG) {
wcp = wcp0 = va_arg(ap, wchar_t *);
n = 0;
while (width != 0) {
if (n == MB_CUR_MAX)
goto input_failure;
buf[n++] = *fp->_p;
fp->_p++;
fp->_r--;
memset(&mbs, 0, sizeof(mbs));
nconv = mbrtowc(wcp, buf, n, &mbs);
if (nconv == 0 || nconv == (size_t)-1)
goto input_failure;
if (nconv != (size_t)-2) {
if (wctob(*wcp) != EOF &&
!ccltab[wctob(*wcp)]) {
while (--n > 0)
__ungetc(buf[n],
fp);
break;
}
nread += n;
width--;
wcp++;
n = 0;
}
if (fp->_r <= 0 && __srefill(fp)) {
if (n != 0)
goto input_failure;
break;
}
}
if (n != 0)
goto input_failure;
n = wcp - wcp0;
if (n == 0)
goto match_failure;
*wcp = L'\0';
nassigned++;
} else {
p0 = p = va_arg(ap, char *);
while (ccltab[*fp->_p]) {
@ -439,6 +510,39 @@ literal:
break;
}
nread += n;
} else if (flags & LONG) {
wcp = va_arg(ap, wchar_t *);
n = 0;
while (!isspace(*fp->_p) && width != 0) {
if (n == MB_CUR_MAX)
goto input_failure;
buf[n++] = *fp->_p;
fp->_p++;
fp->_r--;
memset(&mbs, 0, sizeof(mbs));
nconv = mbrtowc(wcp, buf, n, &mbs);
if (nconv == 0 || nconv == (size_t)-1)
goto input_failure;
if (nconv != (size_t)-2) {
if (iswspace(*wcp)) {
while (--n > 0)
__ungetc(buf[n],
fp);
break;
}
nread += n;
width--;
wcp++;
n = 0;
}
if (fp->_r <= 0 && __srefill(fp)) {
if (n != 0)
goto input_failure;
break;
}
}
*wcp = L'\0';
nassigned++;
} else {
p0 = p = va_arg(ap, char *);
while (!isspace(*fp->_p)) {