Add support for multibyte thousands_sep encodings, e.g., U+066C.

The integer thousands' separator code is rewritten in order to
avoid having to preallocate a buffer for the largest possible
digit string with the most possible instances of the longest
possible multibyte thousands' separator. The new version inserts
thousands' separators for integers using the same code as floating point.
This commit is contained in:
David Schultz 2009-01-22 08:14:28 +00:00
parent b62baf95fe
commit 21ca178ece
3 changed files with 225 additions and 173 deletions

View File

@ -54,10 +54,8 @@ static int exponent(CHAR *, int, CHAR);
#endif /* !NO_FLOATING_POINT */
static CHAR *__ujtoa(uintmax_t, CHAR *, int, int, const char *, int, char,
const char *);
static CHAR *__ultoa(u_long, CHAR *, int, int, const char *, int, char,
const char *);
static CHAR *__ujtoa(uintmax_t, CHAR *, int, int, const char *);
static CHAR *__ultoa(u_long, CHAR *, int, int, const char *);
#define NIOV 8
struct io_state {
@ -158,12 +156,10 @@ io_flush(struct io_state *iop)
* use the given digits.
*/
static CHAR *
__ultoa(u_long val, CHAR *endp, int base, int octzero, const char *xdigs,
int needgrp, char thousep, const char *grp)
__ultoa(u_long val, CHAR *endp, int base, int octzero, const char *xdigs)
{
CHAR *cp = endp;
long sval;
int ndig;
/*
* Handle the three cases separately, in the hope of getting
@ -175,7 +171,6 @@ __ultoa(u_long val, CHAR *endp, int base, int octzero, const char *xdigs,
*--cp = to_char(val);
return (cp);
}
ndig = 0;
/*
* On many machines, unsigned arithmetic is harder than
* signed arithmetic, so we do at most one unsigned mod and
@ -184,29 +179,11 @@ __ultoa(u_long val, CHAR *endp, int base, int octzero, const char *xdigs,
*/
if (val > LONG_MAX) {
*--cp = to_char(val % 10);
ndig++;
sval = val / 10;
} else
sval = val;
do {
*--cp = to_char(sval % 10);
ndig++;
/*
* If (*grp == CHAR_MAX) then no more grouping
* should be performed.
*/
if (needgrp && ndig == *grp && *grp != CHAR_MAX
&& sval > 9) {
*--cp = thousep;
ndig = 0;
/*
* If (*(grp+1) == '\0') then we have to
* use *grp character (last grouping rule)
* for all next cases
*/
if (*(grp+1) != '\0')
grp++;
}
sval /= 10;
} while (sval != 0);
break;
@ -235,50 +212,28 @@ __ultoa(u_long val, CHAR *endp, int base, int octzero, const char *xdigs,
/* Identical to __ultoa, but for intmax_t. */
static CHAR *
__ujtoa(uintmax_t val, CHAR *endp, int base, int octzero, const char *xdigs,
int needgrp, char thousep, const char *grp)
__ujtoa(uintmax_t val, CHAR *endp, int base, int octzero, const char *xdigs)
{
CHAR *cp = endp;
intmax_t sval;
int ndig;
/* quick test for small values; __ultoa is typically much faster */
/* (perhaps instead we should run until small, then call __ultoa?) */
if (val <= ULONG_MAX)
return (__ultoa((u_long)val, endp, base, octzero, xdigs,
needgrp, thousep, grp));
return (__ultoa((u_long)val, endp, base, octzero, xdigs));
switch (base) {
case 10:
if (val < 10) {
*--cp = to_char(val % 10);
return (cp);
}
ndig = 0;
if (val > INTMAX_MAX) {
*--cp = to_char(val % 10);
ndig++;
sval = val / 10;
} else
sval = val;
do {
*--cp = to_char(sval % 10);
ndig++;
/*
* If (*grp == CHAR_MAX) then no more grouping
* should be performed.
*/
if (needgrp && *grp != CHAR_MAX && ndig == *grp
&& sval > 9) {
*--cp = thousep;
ndig = 0;
/*
* If (*(grp+1) == '\0') then we have to
* use *grp character (last grouping rule)
* for all next cases
*/
if (*(grp+1) != '\0')
grp++;
}
sval /= 10;
} while (sval != 0);
break;

View File

@ -72,6 +72,75 @@ static char *__wcsconv(wchar_t *, int);
#define CHAR char
#include "printfcommon.h"
struct grouping_state {
char *thousands_sep; /* locale-specific thousands separator */
int thousep_len; /* length of thousands_sep */
const char *grouping; /* locale-specific numeric grouping rules */
int lead; /* sig figs before decimal or group sep */
int nseps; /* number of group separators with ' */
int nrepeats; /* number of repeats of the last group */
};
/*
* Initialize the thousands' grouping state in preparation to print a
* number with ndigits digits. This routine returns the total number
* of bytes that will be needed.
*/
static int
grouping_init(struct grouping_state *gs, int ndigits)
{
struct lconv *locale;
locale = localeconv();
gs->grouping = locale->grouping;
gs->thousands_sep = locale->thousands_sep;
gs->thousep_len = strlen(gs->thousands_sep);
gs->nseps = gs->nrepeats = 0;
gs->lead = ndigits;
while (*gs->grouping != CHAR_MAX) {
if (gs->lead <= *gs->grouping)
break;
gs->lead -= *gs->grouping;
if (*(gs->grouping+1)) {
gs->nseps++;
gs->grouping++;
} else
gs->nrepeats++;
}
return ((gs->nseps + gs->nrepeats) * gs->thousep_len);
}
/*
* Print a number with thousands' separators.
*/
static int
grouping_print(struct grouping_state *gs, struct io_state *iop,
const CHAR *cp, const CHAR *ep)
{
const CHAR *cp0 = cp;
if (io_printandpad(iop, cp, ep, gs->lead, zeroes))
return (-1);
cp += gs->lead;
while (gs->nseps > 0 || gs->nrepeats > 0) {
if (gs->nrepeats > 0)
gs->nrepeats--;
else {
gs->grouping--;
gs->nseps--;
}
if (io_print(iop, gs->thousands_sep, gs->thousep_len))
return (-1);
if (io_printandpad(iop, cp, ep, *gs->grouping, zeroes))
return (-1);
cp += *gs->grouping;
}
if (cp > ep)
cp = ep;
return (cp - cp0);
}
/*
* Flush out all the vectors defined by the given uio,
* then reset it so that it can be reused.
@ -210,12 +279,14 @@ vfprintf(FILE * __restrict fp, const char * __restrict fmt0, va_list ap)
/*
* The size of the buffer we use as scratch space for integer
* conversions, among other things. Technically, we would need the
* most space for base 10 conversions with thousands' grouping
* characters between each pair of digits. 100 bytes is a
* conservative overestimate even for a 128-bit uintmax_t.
* conversions, among other things. We need enough space to
* write a uintmax_t in octal (plus one byte).
*/
#define BUF 100
#if UINTMAX_MAX <= UINT64_MAX
#define BUF 32
#else
#error "BUF must be large enough to format a uintmax_t"
#endif
/*
* Non-MT-safe version
@ -232,8 +303,7 @@ __vfprintf(FILE *fp, const char *fmt0, va_list ap)
int width; /* width from format (%8d), or 0 */
int prec; /* precision from format; <0 for N/A */
char sign; /* sign prefix (' ', '+', '-', or \0) */
char thousands_sep; /* locale specific thousands separator */
const char *grouping; /* locale specific numeric grouping rules */
struct grouping_state gs; /* thousands' grouping info */
#ifndef NO_FLOATING_POINT
/*
@ -261,12 +331,9 @@ __vfprintf(FILE *fp, const char *fmt0, va_list ap)
char expchar; /* exponent character: [eEpP\0] */
char *dtoaend; /* pointer to end of converted digits */
int expsize; /* character count for expstr */
int lead; /* sig figs before decimal or group sep */
int ndig; /* actual number of digits returned by dtoa */
char expstr[MAXEXPDIG+2]; /* buffer for exponent string: e+ZZZ */
char *dtoaresult; /* buffer allocated by dtoa */
int nseps; /* number of group separators with ' */
int nrepeats; /* number of repeats of the last group */
#endif
u_long ulval; /* integer arguments %[diouxX] */
uintmax_t ujval; /* %j, %ll, %q, %t, %z integers */
@ -378,8 +445,6 @@ __vfprintf(FILE *fp, const char *fmt0, va_list ap)
if (prepwrite(fp) != 0)
return (EOF);
thousands_sep = '\0';
grouping = NULL;
convbuf = NULL;
fmt = (char *)fmt0;
argtable = NULL;
@ -416,6 +481,7 @@ __vfprintf(FILE *fp, const char *fmt0, va_list ap)
dprec = 0;
width = 0;
prec = -1;
gs.grouping = NULL;
sign = '\0';
ox[1] = '\0';
@ -453,8 +519,6 @@ reswitch: switch (ch) {
goto rflag;
case '\'':
flags |= GROUPING;
thousands_sep = *(localeconv()->thousands_sep);
grouping = localeconv()->grouping;
goto rflag;
case '.':
if ((ch = *fmt++) == '*') {
@ -685,23 +749,8 @@ fp_common:
/* space for decimal pt and following digits */
if (prec || flags & ALT)
size += prec + decpt_len;
if (grouping && expt > 0) {
/* space for thousands' grouping */
nseps = nrepeats = 0;
lead = expt;
while (*grouping != CHAR_MAX) {
if (lead <= *grouping)
break;
lead -= *grouping;
if (*(grouping+1)) {
nseps++;
grouping++;
} else
nrepeats++;
}
size += nseps + nrepeats;
} else
lead = expt;
if ((flags & GROUPING) && expt > 0)
size += grouping_init(&gs, expt);
}
break;
#endif /* !NO_FLOATING_POINT */
@ -842,20 +891,18 @@ number: if ((dprec = prec) >= 0)
if (ujval != 0 || prec != 0 ||
(flags & ALT && base == 8))
cp = __ujtoa(ujval, cp, base,
flags & ALT, xdigs,
flags & GROUPING, thousands_sep,
grouping);
flags & ALT, xdigs);
} else {
if (ulval != 0 || prec != 0 ||
(flags & ALT && base == 8))
cp = __ultoa(ulval, cp, base,
flags & ALT, xdigs,
flags & GROUPING, thousands_sep,
grouping);
flags & ALT, xdigs);
}
size = buf + BUF - cp;
if (size > BUF) /* should never happen */
abort();
if ((flags & GROUPING) && size != 0)
size += grouping_init(&gs, size);
break;
default: /* "%?" prints ?, unless ? is NUL */
if (ch == '\0')
@ -911,13 +958,19 @@ number: if ((dprec = prec) >= 0)
if ((flags & (LADJUST|ZEROPAD)) == ZEROPAD)
PAD(width - realsz, zeroes);
/* leading zeroes from decimal precision */
PAD(dprec - size, zeroes);
/* the string or number proper */
#ifndef NO_FLOATING_POINT
if ((flags & FPT) == 0) {
PRINT(cp, size);
#endif
/* leading zeroes from decimal precision */
PAD(dprec - size, zeroes);
if (gs.grouping) {
if (grouping_print(&gs, &io, cp, buf+BUF) < 0)
goto error;
} else {
PRINT(cp, size);
}
#ifndef NO_FLOATING_POINT
} else { /* glue together f_p fragments */
if (!expchar) { /* %[fF] or sufficiently short %[gG] */
if (expt <= 0) {
@ -928,24 +981,16 @@ number: if ((dprec = prec) >= 0)
/* already handled initial 0's */
prec += expt;
} else {
PRINTANDPAD(cp, dtoaend, lead, zeroes);
cp += lead;
if (grouping) {
while (nseps>0 || nrepeats>0) {
if (nrepeats > 0)
nrepeats--;
else {
grouping--;
nseps--;
}
PRINT(&thousands_sep,
1);
PRINTANDPAD(cp,dtoaend,
*grouping, zeroes);
cp += *grouping;
}
if (cp > dtoaend)
cp = dtoaend;
if (gs.grouping) {
n = grouping_print(&gs, &io,
cp, dtoaend);
if (n < 0)
goto error;
cp += n;
} else {
PRINTANDPAD(cp, dtoaend,
expt, zeroes);
cp += expt;
}
if (prec || flags & ALT)
PRINT(decimal_point,decpt_len);
@ -962,8 +1007,6 @@ number: if ((dprec = prec) >= 0)
PRINT(expstr, expsize);
}
}
#else
PRINT(cp, size);
#endif
/* left-adjusting padding (always blank) */
if (flags & LADJUST)

View File

@ -74,6 +74,14 @@ static wchar_t *__mbsconv(char *, int);
#define CHAR wchar_t
#include "printfcommon.h"
struct grouping_state {
wchar_t thousands_sep; /* locale-specific thousands separator */
const char *grouping; /* locale-specific numeric grouping rules */
int lead; /* sig figs before decimal or group sep */
int nseps; /* number of group separators with ' */
int nrepeats; /* number of repeats of the last group */
};
static const mbstate_t initial_mbs;
static inline wchar_t
@ -90,6 +98,79 @@ get_decpt(void)
return (decpt);
}
static inline wchar_t
get_thousep(void)
{
mbstate_t mbs;
wchar_t thousep;
int nconv;
mbs = initial_mbs;
nconv = mbrtowc(&thousep, localeconv()->thousands_sep,
MB_CUR_MAX, &mbs);
if (nconv == (size_t)-1 || nconv == (size_t)-2)
thousep = '\0'; /* failsafe */
return (thousep);
}
/*
* Initialize the thousands' grouping state in preparation to print a
* number with ndigits digits. This routine returns the total number
* of wide characters that will be printed.
*/
static int
grouping_init(struct grouping_state *gs, int ndigits)
{
gs->grouping = localeconv()->grouping;
gs->thousands_sep = get_thousep();
gs->nseps = gs->nrepeats = 0;
gs->lead = ndigits;
while (*gs->grouping != CHAR_MAX) {
if (gs->lead <= *gs->grouping)
break;
gs->lead -= *gs->grouping;
if (*(gs->grouping+1)) {
gs->nseps++;
gs->grouping++;
} else
gs->nrepeats++;
}
return (gs->nseps + gs->nrepeats);
}
/*
* Print a number with thousands' separators.
*/
static int
grouping_print(struct grouping_state *gs, struct io_state *iop,
const CHAR *cp, const CHAR *ep)
{
const CHAR *cp0 = cp;
if (io_printandpad(iop, cp, ep, gs->lead, zeroes))
return (-1);
cp += gs->lead;
while (gs->nseps > 0 || gs->nrepeats > 0) {
if (gs->nrepeats > 0)
gs->nrepeats--;
else {
gs->grouping--;
gs->nseps--;
}
if (io_print(iop, &gs->thousands_sep, 1))
return (-1);
if (io_printandpad(iop, cp, ep, *gs->grouping, zeroes))
return (-1);
cp += *gs->grouping;
}
if (cp > ep)
cp = ep;
return (cp - cp0);
}
/*
* Flush out all the vectors defined by the given uio,
* then reset it so that it can be reused.
@ -280,12 +361,14 @@ vfwprintf(FILE * __restrict fp, const wchar_t * __restrict fmt0, va_list ap)
/*
* The size of the buffer we use as scratch space for integer
* conversions, among other things. Technically, we would need the
* most space for base 10 conversions with thousands' grouping
* characters between each pair of digits. 100 bytes is a
* conservative overestimate even for a 128-bit uintmax_t.
* conversions, among other things. We need enough space to
* write a uintmax_t in octal (plus one byte).
*/
#define BUF 100
#if UINTMAX_MAX <= UINT64_MAX
#define BUF 32
#else
#error "BUF must be large enough to format a uintmax_t"
#endif
/*
* Non-MT-safe version
@ -302,8 +385,7 @@ __vfwprintf(FILE *fp, const wchar_t *fmt0, va_list ap)
int width; /* width from format (%8d), or 0 */
int prec; /* precision from format; <0 for N/A */
wchar_t sign; /* sign prefix (' ', '+', '-', or \0) */
wchar_t thousands_sep; /* locale specific thousands separator */
const char *grouping; /* locale specific numeric grouping rules */
struct grouping_state gs; /* thousands' grouping info */
#ifndef NO_FLOATING_POINT
/*
* We can decompose the printed representation of floating
@ -329,12 +411,9 @@ __vfwprintf(FILE *fp, const wchar_t *fmt0, va_list ap)
char expchar; /* exponent character: [eEpP\0] */
char *dtoaend; /* pointer to end of converted digits */
int expsize; /* character count for expstr */
int lead; /* sig figs before decimal or group sep */
int ndig; /* actual number of digits returned by dtoa */
wchar_t expstr[MAXEXPDIG+2]; /* buffer for exponent string: e+ZZZ */
char *dtoaresult; /* buffer allocated by dtoa */
int nseps; /* number of group separators with ' */
int nrepeats; /* number of repeats of the last group */
#endif
u_long ulval; /* integer arguments %[diouxX] */
uintmax_t ujval; /* %j, %ll, %q, %t, %z integers */
@ -442,8 +521,6 @@ __vfwprintf(FILE *fp, const wchar_t *fmt0, va_list ap)
if (prepwrite(fp) != 0)
return (EOF);
thousands_sep = '\0';
grouping = NULL;
convbuf = NULL;
fmt = (wchar_t *)fmt0;
argtable = NULL;
@ -477,6 +554,7 @@ __vfwprintf(FILE *fp, const wchar_t *fmt0, va_list ap)
dprec = 0;
width = 0;
prec = -1;
gs.grouping = NULL;
sign = '\0';
ox[1] = '\0';
@ -514,8 +592,6 @@ reswitch: switch (ch) {
goto rflag;
case '\'':
flags |= GROUPING;
thousands_sep = *(localeconv()->thousands_sep);
grouping = localeconv()->grouping;
goto rflag;
case '.':
if ((ch = *fmt++) == '*') {
@ -739,23 +815,8 @@ fp_common:
/* space for decimal pt and following digits */
if (prec || flags & ALT)
size += prec + 1;
if (grouping && expt > 0) {
/* space for thousands' grouping */
nseps = nrepeats = 0;
lead = expt;
while (*grouping != CHAR_MAX) {
if (lead <= *grouping)
break;
lead -= *grouping;
if (*(grouping+1)) {
nseps++;
grouping++;
} else
nrepeats++;
}
size += nseps + nrepeats;
} else
lead = expt;
if ((flags & GROUPING) && expt > 0)
size += grouping_init(&gs, expt);
}
break;
#endif /* !NO_FLOATING_POINT */
@ -899,20 +960,18 @@ number: if ((dprec = prec) >= 0)
if (ujval != 0 || prec != 0 ||
(flags & ALT && base == 8))
cp = __ujtoa(ujval, cp, base,
flags & ALT, xdigs,
flags & GROUPING, thousands_sep,
grouping);
flags & ALT, xdigs);
} else {
if (ulval != 0 || prec != 0 ||
(flags & ALT && base == 8))
cp = __ultoa(ulval, cp, base,
flags & ALT, xdigs,
flags & GROUPING, thousands_sep,
grouping);
flags & ALT, xdigs);
}
size = buf + BUF - cp;
if (size > BUF) /* should never happen */
abort();
if ((flags & GROUPING) && size != 0)
size += grouping_init(&gs, size);
break;
default: /* "%?" prints ?, unless ? is NUL */
if (ch == '\0')
@ -968,13 +1027,19 @@ number: if ((dprec = prec) >= 0)
if ((flags & (LADJUST|ZEROPAD)) == ZEROPAD)
PAD(width - realsz, zeroes);
/* leading zeroes from decimal precision */
PAD(dprec - size, zeroes);
/* the string or number proper */
#ifndef NO_FLOATING_POINT
if ((flags & FPT) == 0) {
PRINT(cp, size);
#endif
/* leading zeroes from decimal precision */
PAD(dprec - size, zeroes);
if (gs.grouping) {
if (grouping_print(&gs, &io, cp, buf+BUF) < 0)
goto error;
} else {
PRINT(cp, size);
}
#ifndef NO_FLOATING_POINT
} else { /* glue together f_p fragments */
if (!expchar) { /* %[fF] or sufficiently short %[gG] */
if (expt <= 0) {
@ -985,25 +1050,16 @@ number: if ((dprec = prec) >= 0)
/* already handled initial 0's */
prec += expt;
} else {
PRINTANDPAD(cp, convbuf + ndig, lead, zeroes);
cp += lead;
if (grouping) {
while (nseps>0 || nrepeats>0) {
if (nrepeats > 0)
nrepeats--;
else {
grouping--;
nseps--;
}
PRINT(&thousands_sep,
1);
PRINTANDPAD(cp,
convbuf + ndig,
*grouping, zeroes);
cp += *grouping;
}
if (cp > convbuf + ndig)
cp = convbuf + ndig;
if (gs.grouping) {
n = grouping_print(&gs, &io,
cp, convbuf + ndig);
if (n < 0)
goto error;
cp += n;
} else {
PRINTANDPAD(cp, convbuf + ndig,
expt, zeroes);
cp += expt;
}
if (prec || flags & ALT)
PRINT(&decimal_point, 1);
@ -1021,8 +1077,6 @@ number: if ((dprec = prec) >= 0)
PRINT(expstr, expsize);
}
}
#else
PRINT(cp, size);
#endif
/* left-adjusting padding (always blank) */
if (flags & LADJUST)