Simplify some of the wide-character handling, inspired

in part by OpenBSD's not-quite-standard-compliant
standard libraries.  (No loss of functionality,
just minor recoding to not rely on certain "standard"
facilities that weren't actually needed.)
This commit is contained in:
Tim Kientzle 2006-05-01 01:02:19 +00:00
parent 673ec5a652
commit d3b6573b00
3 changed files with 40 additions and 62 deletions

View File

@ -9,7 +9,7 @@ LDADD= -lbz2 -lz
# Major: Bumped ONLY when API/ABI breakage happens.
# Minor: Bumped when significant new features are added (see SHLIB_MAJOR)
# Revision: Bumped on any notable change
VERSION= 1.2.51
VERSION= 1.2.53
ARCHIVE_API_MAJOR!= echo ${VERSION} | sed -e 's/\..*//'
ARCHIVE_API_MINOR!= echo ${VERSION} | sed -e 's/[0-9]*\.//' | sed -e 's/\..*//'

View File

@ -1453,16 +1453,16 @@ set_xattrs(struct archive *a, int fd, struct archive_entry *entry)
name, value, size, 0);
}
if (e == -1) {
if (err == ENOTSUP) {
if (errno == ENOTSUP) {
if (!warning_done) {
warning_done = 1;
archive_set_error(a, err,
archive_set_error(a, errno,
"Cannot restore extended "
"attributes on this file "
"system");
}
} else
archive_set_error(a, err,
archive_set_error(a, errno,
"Failed to set extended attribute");
ret = ARCHIVE_WARN;
}

View File

@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
#ifdef HAVE_WCHAR_H
#include <wchar.h>
#else
/* Good enough for equality testing, which is all we need. */
static int wcscmp(const wchar_t *s1, const wchar_t *s2)
{
int diff = *s1 - *s2;
@ -53,6 +54,14 @@ static int wcscmp(const wchar_t *s1, const wchar_t *s2)
diff = (int)*++s1 - (int)*++s2;
return diff;
}
/* Good enough for equality testing, which is all we need. */
static int wcsncmp(const wchar_t *s1, const wchar_t *s2, size_t n)
{
int diff = *s1 - *s2;
while (*s1 && diff == 0 && n-- > 0)
diff = (int)*++s1 - (int)*++s2;
return diff;
}
static size_t wcslen(const wchar_t *s)
{
const wchar_t *p = s;
@ -1644,12 +1653,6 @@ utf8_decode(wchar_t *dest, const char *src, size_t length)
n = UTF8_mbrtowc(dest, src, length);
if (n == 0)
break;
if (n > 8) {
/* Invalid byte encountered; try to keep going. */
*dest = L'?';
n = 1;
err = 1;
}
dest++;
src += n;
length -= n;
@ -1659,68 +1662,52 @@ utf8_decode(wchar_t *dest, const char *src, size_t length)
}
/*
* Copied from FreeBSD libc/locale.
* Copied and simplified from FreeBSD libc/locale.
*/
static size_t
UTF8_mbrtowc(wchar_t *pwc, const char *s, size_t n)
{
int ch, i, len, mask;
unsigned long lbound, wch;
unsigned long wch;
if (s == NULL)
/* Reset to initial shift state (no-op) */
if (s == NULL || n == 0 || pwc == NULL)
return (0);
if (n == 0)
/* Incomplete multibyte sequence */
return ((size_t)-2);
/*
* Determine the number of octets that make up this character from
* the first octet, and a mask that extracts the interesting bits of
* the first octet.
*
* We also specify a lower bound for the character code to detect
* redundant, non-"shortest form" encodings. For example, the
* sequence C0 80 is _not_ a legal representation of the null
* character. This enforces a 1-to-1 mapping between character
* codes and their multibyte representations.
*/
ch = (unsigned char)*s;
if ((ch & 0x80) == 0) {
mask = 0x7f;
len = 1;
lbound = 0;
} else if ((ch & 0xe0) == 0xc0) {
mask = 0x1f;
len = 2;
lbound = 0x80;
} else if ((ch & 0xf0) == 0xe0) {
mask = 0x0f;
len = 3;
lbound = 0x800;
} else if ((ch & 0xf8) == 0xf0) {
mask = 0x07;
len = 4;
lbound = 0x10000;
} else if ((ch & 0xfc) == 0xf8) {
mask = 0x03;
len = 5;
lbound = 0x200000;
} else if ((ch & 0xfc) == 0xfc) {
} else if ((ch & 0xfe) == 0xfc) {
mask = 0x01;
len = 6;
lbound = 0x4000000;
} else {
/*
* Malformed input; input is not UTF-8.
*/
errno = EILSEQ;
return ((size_t)-1);
/* Invalid first byte; convert to '?' */
*pwc = '?';
return (1);
}
if (n < (size_t)len)
/* Incomplete multibyte sequence */
return ((size_t)-2);
if (n < (size_t)len) {
/* Invalid first byte; convert to '?' */
*pwc = '?';
return (1);
}
/*
* Decode the octet sequence representing the character in chunks
@ -1730,36 +1717,27 @@ UTF8_mbrtowc(wchar_t *pwc, const char *s, size_t n)
i = len;
while (--i != 0) {
if ((*s & 0xc0) != 0x80) {
/*
* Malformed input; bad characters in the middle
* of a character.
*/
errno = EILSEQ;
return ((size_t)-1);
/* Invalid intermediate byte; consume one byte and
* emit '?' */
*pwc = '?';
return (1);
}
wch <<= 6;
wch |= *s++ & 0x3f;
}
if (wch < lbound) {
/*
* Malformed input; redundant encoding.
*/
errno = EILSEQ;
return ((size_t)-1);
}
if (pwc != NULL) {
/* Assign the value to the output; out-of-range values
* just get truncated. */
*pwc = (wchar_t)wch;
/* Assign the value to the output; out-of-range values
* just get truncated. */
*pwc = (wchar_t)wch;
#ifdef WCHAR_MAX
/*
* If platform has WCHAR_MAX, we can do something
* more sensible with out-of-range values.
*/
if (wch >= WCHAR_MAX)
*pwc = '?';
/*
* If platform has WCHAR_MAX, we can do something
* more sensible with out-of-range values.
*/
if (wch >= WCHAR_MAX)
*pwc = '?';
#endif
}
/* Return number of bytes input consumed: 0 for end-of-string. */
return (wch == L'\0' ? 0 : len);
}