When deciding which characters (of a filename, for instance)

are safe to print, try to take into account the current locale.
This iterates over output strings using mbtowc() to identify
multi-byte sequences.  If iswprint() claims the corresponding
wide character is printable, the original bytes are passed
through.  Otherwise, we expand characters into C-style
\-escape sequences.

Submitted by:	Michihiro NAKAJIMA
MFC after:	30 days
This commit is contained in:
Tim Kientzle 2008-11-29 20:06:53 +00:00
parent 0d9d2de11a
commit 5af3583731
2 changed files with 121 additions and 51 deletions

View File

@ -101,6 +101,7 @@
#define HAVE_UNISTD_H 1
#define HAVE_UNSIGNED_LONG_LONG
#define HAVE_VPRINTF 1
#define HAVE_WCTYPE_H 1
#define HAVE_ZLIB_H 1
#undef MAJOR_IN_MKDEV
#define STDC_HEADERS 1

View File

@ -46,92 +46,161 @@ __FBSDID("$FreeBSD$");
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#ifdef HAVE_WCTYPE_H
#include <wctype.h>
#else
/* If we don't have wctype, we need to hack up some version of iswprint(). */
#define iswprint isprint
#endif
#include "bsdtar.h"
static void bsdtar_vwarnc(struct bsdtar *, int code,
const char *fmt, va_list ap);
static size_t bsdtar_expand_char(char *, size_t, char);
static const char *strip_components(const char *path, int elements);
/* TODO: Hack up a version of mbtowc for platforms with no wide
* character support at all. I think the following might suffice,
* but it needs careful testing.
* #if !HAVE_MBTOWC
* #define mbtowc(wcp, p, n) ((*wcp = *p), 1)
* #endif
*/
/*
* Print a string, taking care with any non-printable characters.
*
* Note that we use a stack-allocated buffer to receive the formatted
* string if we can. This is partly performance (avoiding a call to
* malloc()), partly out of expedience (we have to call vsnprintf()
* before malloc() anyway to find out how big a buffer we need; we may
* as well point that first call at a small local buffer in case it
* works), but mostly for safety (so we can use this to print messages
* about out-of-memory conditions).
*/
void
safe_fprintf(FILE *f, const char *fmt, ...)
{
char *buff;
char *buff_heap;
int buff_length;
char fmtbuff_stack[256]; /* Place to format the printf() string. */
char outbuff[256]; /* Buffer for outgoing characters. */
char *fmtbuff_heap; /* If fmtbuff_stack is too small, we use malloc */
char *fmtbuff; /* Pointer to fmtbuff_stack or fmtbuff_heap. */
int fmtbuff_length;
int length;
va_list ap;
char *p;
const char *p;
unsigned i;
char buff_stack[256];
char copy_buff[256];
wchar_t wc;
char try_wc;
/* Use a stack-allocated buffer if we can, for speed and safety. */
buff_heap = NULL;
buff_length = sizeof(buff_stack);
buff = buff_stack;
fmtbuff_heap = NULL;
fmtbuff_length = sizeof(fmtbuff_stack);
fmtbuff = fmtbuff_stack;
/* Try formatting into the stack buffer. */
va_start(ap, fmt);
length = vsnprintf(buff, buff_length, fmt, ap);
length = vsnprintf(fmtbuff, fmtbuff_length, fmt, ap);
va_end(ap);
/* If the result is too large, allocate a buffer on the heap. */
if (length >= buff_length) {
buff_length = length+1;
buff_heap = malloc(buff_length);
/* Failsafe: use the truncated string if malloc fails. */
if (buff_heap != NULL) {
buff = buff_heap;
/* If the result was too large, allocate a buffer on the heap. */
if (length >= fmtbuff_length) {
fmtbuff_length = length+1;
fmtbuff_heap = malloc(fmtbuff_length);
/* Reformat the result into the heap buffer if we can. */
if (fmtbuff_heap != NULL) {
fmtbuff = fmtbuff_heap;
va_start(ap, fmt);
length = vsnprintf(buff, buff_length, fmt, ap);
length = vsnprintf(fmtbuff, fmtbuff_length, fmt, ap);
va_end(ap);
} else {
/* Leave fmtbuff pointing to the truncated
* string in fmtbuff_stack. */
length = sizeof(fmtbuff_stack) - 1;
}
}
/* Write data, expanding unprintable characters. */
p = buff;
i = 0;
while (*p != '\0') {
unsigned char c = *p++;
/* Note: mbrtowc() has a cleaner API, but mbtowc() seems a bit
* more portable, so we use that here instead. */
mbtowc(NULL, NULL, 0); /* Reset the shift state. */
if (isprint(c) && c != '\\')
copy_buff[i++] = c;
else {
copy_buff[i++] = '\\';
switch (c) {
case '\a': copy_buff[i++] = 'a'; break;
case '\b': copy_buff[i++] = 'b'; break;
case '\f': copy_buff[i++] = 'f'; break;
case '\n': copy_buff[i++] = 'n'; break;
#if '\r' != '\n'
/* On some platforms, \n and \r are the same. */
case '\r': copy_buff[i++] = 'r'; break;
#endif
case '\t': copy_buff[i++] = 't'; break;
case '\v': copy_buff[i++] = 'v'; break;
case '\\': copy_buff[i++] = '\\'; break;
default:
sprintf(copy_buff + i, "%03o", c);
i += 3;
/* Write data, expanding unprintable characters. */
p = fmtbuff;
i = 0;
try_wc = 1;
while (*p != '\0') {
int n;
/* Convert to wide char, test if the wide
* char is printable in the current locale. */
if (try_wc && (n = mbtowc(&wc, p, length)) != -1) {
length -= n;
if (iswprint(wc) && wc != L'\\') {
/* Printable, copy the bytes through. */
while (n-- > 0)
outbuff[i++] = *p++;
} else {
/* Not printable, format the bytes. */
while (n-- > 0)
i += bsdtar_expand_char(
outbuff, i, *p++);
}
} else {
/* After any conversion failure, don't bother
* trying to convert the rest. */
i += bsdtar_expand_char(outbuff, i, *p++);
try_wc = 0;
}
/* If our temp buffer is full, dump it and keep going. */
if (i > (sizeof(copy_buff) - 8)) {
copy_buff[i++] = '\0';
fprintf(f, "%s", copy_buff);
/* If our output buffer is full, dump it and keep going. */
if (i > (sizeof(outbuff) - 20)) {
outbuff[i++] = '\0';
fprintf(f, "%s", outbuff);
i = 0;
}
}
copy_buff[i++] = '\0';
fprintf(f, "%s", copy_buff);
outbuff[i++] = '\0';
fprintf(f, "%s", outbuff);
/* If we allocated a heap-based buffer, free it now. */
if (buff_heap != NULL)
free(buff_heap);
/* If we allocated a heap-based formatting buffer, free it now. */
if (fmtbuff_heap != NULL)
free(fmtbuff_heap);
}
/*
* Render an arbitrary sequence of bytes into printable ASCII characters.
*/
static size_t
bsdtar_expand_char(char *buff, size_t offset, char c)
{
size_t i = offset;
if (isprint(c) && c != '\\')
buff[i++] = c;
else {
buff[i++] = '\\';
switch (c) {
case '\a': buff[i++] = 'a'; break;
case '\b': buff[i++] = 'b'; break;
case '\f': buff[i++] = 'f'; break;
case '\n': buff[i++] = 'n'; break;
#if '\r' != '\n'
/* On some platforms, \n and \r are the same. */
case '\r': buff[i++] = 'r'; break;
#endif
case '\t': buff[i++] = 't'; break;
case '\v': buff[i++] = 'v'; break;
case '\\': buff[i++] = '\\'; break;
default:
sprintf(buff + i, "%03o", 0xFF & (int)c);
i += 3;
}
}
return (i - offset);
}
static void