- Add support for multibyte decimal_point encodings, e.g., U+066B.

A forthcoming gdtoa import is needed to make this fully work.
- Improve the way "nan(...)" is parsed.
This commit is contained in:
David Schultz 2009-01-19 06:19:51 +00:00
parent 5004a238c3
commit f8f571931d
2 changed files with 50 additions and 24 deletions

View File

@ -911,13 +911,13 @@ static int
parsefloat(FILE *fp, char *buf, char *end)
{
char *commit, *p;
int infnanpos = 0;
int infnanpos = 0, decptpos = 0;
enum {
S_START, S_GOTSIGN, S_INF, S_NAN, S_MAYBEHEX,
S_DIGITS, S_FRAC, S_EXP, S_EXPDIGITS
S_START, S_GOTSIGN, S_INF, S_NAN, S_DONE, S_MAYBEHEX,
S_DIGITS, S_DECPT, S_FRAC, S_EXP, S_EXPDIGITS
} state = S_START;
unsigned char c;
char decpt = *localeconv()->decimal_point;
const char *decpt = localeconv()->decimal_point;
_Bool gotmantdig = 0, ishex = 0;
/*
@ -970,8 +970,6 @@ parsefloat(FILE *fp, char *buf, char *end)
break;
case S_NAN:
switch (infnanpos) {
case -1: /* XXX kludge to deal with nan(...) */
goto parsedone;
case 0:
if (c != 'A' && c != 'a')
goto parsedone;
@ -989,13 +987,15 @@ parsefloat(FILE *fp, char *buf, char *end)
default:
if (c == ')') {
commit = p;
infnanpos = -2;
state = S_DONE;
} else if (!isalnum(c) && c != '_')
goto parsedone;
break;
}
infnanpos++;
break;
case S_DONE:
goto parsedone;
case S_MAYBEHEX:
state = S_DIGITS;
if (c == 'X' || c == 'x') {
@ -1006,16 +1006,34 @@ parsefloat(FILE *fp, char *buf, char *end)
goto reswitch;
}
case S_DIGITS:
if ((ishex && isxdigit(c)) || isdigit(c))
if ((ishex && isxdigit(c)) || isdigit(c)) {
gotmantdig = 1;
else {
state = S_FRAC;
if (c != decpt)
goto reswitch;
}
if (gotmantdig)
commit = p;
break;
break;
} else {
state = S_DECPT;
goto reswitch;
}
case S_DECPT:
if (c == decpt[decptpos]) {
if (decpt[++decptpos] == '\0') {
/* We read the complete decpt seq. */
state = S_FRAC;
if (gotmantdig)
commit = p;
}
break;
} else if (!decptpos) {
/* We didn't read any decpt characters. */
state = S_FRAC;
goto reswitch;
} else {
/*
* We read part of a multibyte decimal point,
* but the rest is invalid, so bail.
*/
goto parsedone;
}
case S_FRAC:
if (((c == 'E' || c == 'e') && !ishex) ||
((c == 'P' || c == 'p') && ishex)) {

View File

@ -103,6 +103,8 @@ static int parsefloat(FILE *, wchar_t *, wchar_t *);
(cclcompl ? (wmemchr(ccls, (_c), ccle - ccls) == NULL) : \
(wmemchr(ccls, (_c), ccle - ccls) != NULL))
static const mbstate_t initial_mbs;
/*
* MT-safe version.
*/
@ -142,7 +144,6 @@ __vfwscanf(FILE * __restrict fp, const wchar_t * __restrict fmt, va_list ap)
char *mbp; /* multibyte string pointer for %c %s %[ */
size_t nconv; /* number of bytes in mb. conversion */
char mbbuf[MB_LEN_MAX]; /* temporary mb. character buffer */
static const mbstate_t initial;
mbstate_t mbs;
/* `basefix' is used to avoid `if' tests in the integer scanner */
@ -375,7 +376,7 @@ again: c = *fmt++;
if (!(flags & SUPPRESS))
mbp = va_arg(ap, char *);
n = 0;
mbs = initial;
mbs = initial_mbs;
while (width != 0 &&
(wi = __fgetwc(fp)) != WEOF) {
if (width >= MB_CUR_MAX &&
@ -440,7 +441,7 @@ again: c = *fmt++;
if (!(flags & SUPPRESS))
mbp = va_arg(ap, char *);
n = 0;
mbs = initial;
mbs = initial_mbs;
while ((wi = __fgetwc(fp)) != WEOF &&
width != 0 && INCCL(wi)) {
if (width >= MB_CUR_MAX &&
@ -501,7 +502,7 @@ again: c = *fmt++;
} else {
if (!(flags & SUPPRESS))
mbp = va_arg(ap, char *);
mbs = initial;
mbs = initial_mbs;
while ((wi = __fgetwc(fp)) != WEOF &&
width != 0 &&
!iswspace(wi)) {
@ -721,16 +722,23 @@ again: c = *fmt++;
static int
parsefloat(FILE *fp, wchar_t *buf, wchar_t *end)
{
mbstate_t mbs;
size_t nconv;
wchar_t *commit, *p;
int infnanpos = 0;
enum {
S_START, S_GOTSIGN, S_INF, S_NAN, S_MAYBEHEX,
S_START, S_GOTSIGN, S_INF, S_NAN, S_DONE, S_MAYBEHEX,
S_DIGITS, S_FRAC, S_EXP, S_EXPDIGITS
} state = S_START;
wchar_t c;
wchar_t decpt = (wchar_t)(unsigned char)*localeconv()->decimal_point;
wchar_t decpt;
_Bool gotmantdig = 0, ishex = 0;
mbs = initial_mbs;
nconv = mbrtowc(&decpt, localeconv()->decimal_point, MB_CUR_MAX, &mbs);
if (nconv == (size_t)-1 || nconv == (size_t)-2)
decpt = '.'; /* failsafe */
/*
* We set commit = p whenever the string we have read so far
* constitutes a valid representation of a floating point
@ -783,8 +791,6 @@ parsefloat(FILE *fp, wchar_t *buf, wchar_t *end)
break;
case S_NAN:
switch (infnanpos) {
case -1: /* XXX kludge to deal with nan(...) */
goto parsedone;
case 0:
if (c != 'A' && c != 'a')
goto parsedone;
@ -802,13 +808,15 @@ parsefloat(FILE *fp, wchar_t *buf, wchar_t *end)
default:
if (c == ')') {
commit = p;
infnanpos = -2;
state = S_DONE;
} else if (!iswalnum(c) && c != '_')
goto parsedone;
break;
}
infnanpos++;
break;
case S_DONE:
goto parsedone;
case S_MAYBEHEX:
state = S_DIGITS;
if (c == 'X' || c == 'x') {