printf: Allow multibyte characters for '<char> form, avoid negative codes.

Examples:
  LC_ALL=en_US.UTF-8 printf '%d\n' $(printf \'\\303\\244)
  LC_ALL=en_US.ISO8859-1 printf '%d\n' $(printf \'\\344)
Both of these should print 228.

Like some other shells, incomplete or invalid multibyte characters yield the
value of the first byte without a warning.

Note that there is no general way to go back from the character code to the
character.
This commit is contained in:
Jilles Tjoelker 2011-05-28 11:37:47 +00:00
parent 802e09ac9e
commit 98102dabd3
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=222418
5 changed files with 24 additions and 7 deletions

View File

@ -0,0 +1 @@
228

View File

@ -0,0 +1 @@
228

View File

@ -2,11 +2,13 @@
REGRESSION_START($1)
echo '1..9'
echo '1..11'
REGRESSION_TEST(`b', `printf "abc%b%b" "def\n" "\cghi"')
REGRESSION_TEST(`d', `printf "%d,%5d,%.5d,%0*d,%.*d\n" 123 123 123 5 123 5 123')
REGRESSION_TEST(`f', `printf "%f,%-8.3f,%f,%f\n" +42.25 -42.25 inf nan')
REGRESSION_TEST(`l1', `LC_ALL=en_US.ISO8859-1 printf "%d\n" $(printf \"\\344)')
REGRESSION_TEST(`l2', `LC_ALL=en_US.UTF-8 printf "%d\n" $(printf \"\\303\\244)')
REGRESSION_TEST(`m1', `printf "%c%%%d\0\045\n" abc \"abc')
REGRESSION_TEST(`m2', `printf "abc\n\cdef"')
REGRESSION_TEST(`m3', `printf "%%%s\n" abc def ghi jkl')

View File

@ -31,7 +31,7 @@
.\" @(#)printf.1 8.1 (Berkeley) 6/6/93
.\" $FreeBSD$
.\"
.Dd April 25, 2011
.Dd May 28, 2011
.Dt PRINTF 1
.Os
.Sh NAME
@ -68,8 +68,7 @@ otherwise it is evaluated as a C constant, with the following extensions:
A leading plus or minus sign is allowed.
.It
If the leading character is a single or double quote, the value is the
.Tn ASCII
code of the next character.
character code of the next character.
.El
.Pp
The format string is reused as often as necessary to satisfy the

View File

@ -58,6 +58,7 @@ static const char rcsid[] =
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <wchar.h>
#ifdef SHELL
#define main printfcmd
@ -537,10 +538,23 @@ static int
asciicode(void)
{
int ch;
wchar_t wch;
mbstate_t mbs;
ch = **gargv;
if (ch == '\'' || ch == '"')
ch = (*gargv)[1];
ch = (unsigned char)**gargv;
if (ch == '\'' || ch == '"') {
memset(&mbs, 0, sizeof(mbs));
switch (mbrtowc(&wch, *gargv + 1, MB_LEN_MAX, &mbs)) {
case (size_t)-2:
case (size_t)-1:
wch = (unsigned char)gargv[0][1];
break;
case 0:
wch = 0;
break;
}
ch = wch;
}
++gargv;
return (ch);
}