printf: Allow multibyte characters for '<char> form, avoid negative codes.

Examples:
  LC_ALL=en_US.UTF-8 printf '%d\n' $(printf \'\\303\\244)
  LC_ALL=en_US.ISO8859-1 printf '%d\n' $(printf \'\\344)
Both of these should print 228.

Like some other shells, incomplete or invalid multibyte characters yield the
value of the first byte without a warning.

Note that there is no general way to go back from the character code to the
character.
This commit is contained in:
jilles 2011-05-28 11:37:47 +00:00
parent 36cd7cef5e
commit 979af05e77
5 changed files with 24 additions and 7 deletions

View File

@ -0,0 +1 @@
228

View File

@ -0,0 +1 @@
228

View File

@ -2,11 +2,13 @@
REGRESSION_START($1)
echo '1..9'
echo '1..11'
REGRESSION_TEST(`b', `printf "abc%b%b" "def\n" "\cghi"')
REGRESSION_TEST(`d', `printf "%d,%5d,%.5d,%0*d,%.*d\n" 123 123 123 5 123 5 123')
REGRESSION_TEST(`f', `printf "%f,%-8.3f,%f,%f\n" +42.25 -42.25 inf nan')
REGRESSION_TEST(`l1', `LC_ALL=en_US.ISO8859-1 printf "%d\n" $(printf \"\\344)')
REGRESSION_TEST(`l2', `LC_ALL=en_US.UTF-8 printf "%d\n" $(printf \"\\303\\244)')
REGRESSION_TEST(`m1', `printf "%c%%%d\0\045\n" abc \"abc')
REGRESSION_TEST(`m2', `printf "abc\n\cdef"')
REGRESSION_TEST(`m3', `printf "%%%s\n" abc def ghi jkl')

View File

@ -31,7 +31,7 @@
.\" @(#)printf.1 8.1 (Berkeley) 6/6/93
.\" $FreeBSD$
.\"
.Dd April 25, 2011
.Dd May 28, 2011
.Dt PRINTF 1
.Os
.Sh NAME
@ -68,8 +68,7 @@ otherwise it is evaluated as a C constant, with the following extensions:
A leading plus or minus sign is allowed.
.It
If the leading character is a single or double quote, the value is the
.Tn ASCII
code of the next character.
character code of the next character.
.El
.Pp
The format string is reused as often as necessary to satisfy the

View File

@ -58,6 +58,7 @@ static const char rcsid[] =
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <wchar.h>
#ifdef SHELL
#define main printfcmd
@ -537,10 +538,23 @@ static int
asciicode(void)
{
int ch;
wchar_t wch;
mbstate_t mbs;
ch = **gargv;
if (ch == '\'' || ch == '"')
ch = (*gargv)[1];
ch = (unsigned char)**gargv;
if (ch == '\'' || ch == '"') {
memset(&mbs, 0, sizeof(mbs));
switch (mbrtowc(&wch, *gargv + 1, MB_LEN_MAX, &mbs)) {
case (size_t)-2:
case (size_t)-1:
wch = (unsigned char)gargv[0][1];
break;
case 0:
wch = 0;
break;
}
ch = wch;
}
++gargv;
return (ch);
}