In pax interchange format, use UTF8 for writing

link names, usernames, or group names that contain
non-ASCII characters.

In particular, this corrects an inconsistency reported
by Ed Maste when archiving symlinks with odd characters:
long symlinks would get preserved, short ones would
be changed.
This commit is contained in:
Tim Kientzle 2005-10-12 03:26:09 +00:00
parent 85c13a8375
commit 52a88d3b57
3 changed files with 70 additions and 21 deletions

View File

@ -203,6 +203,8 @@ aes_copy(struct aes *dest, struct aes *src)
static const char *
aes_get_mbs(struct aes *aes)
{
if (aes->aes_mbs == NULL && aes->aes_wcs == NULL)
return NULL;
if (aes->aes_mbs == NULL && aes->aes_wcs != NULL) {
/*
* XXX Need to estimate the number of byte in the
@ -224,6 +226,8 @@ aes_get_mbs(struct aes *aes)
static const wchar_t *
aes_get_wcs(struct aes *aes)
{
if (aes->aes_wcs == NULL && aes->aes_mbs == NULL)
return NULL;
if (aes->aes_wcs == NULL && aes->aes_mbs != NULL) {
/*
* No single byte will be more than one wide character,
@ -457,12 +461,24 @@ archive_entry_gname(struct archive_entry *entry)
return (aes_get_mbs(&entry->ae_gname));
}
const wchar_t *
archive_entry_gname_w(struct archive_entry *entry)
{
return (aes_get_wcs(&entry->ae_gname));
}
const char *
archive_entry_hardlink(struct archive_entry *entry)
{
return (aes_get_mbs(&entry->ae_hardlink));
}
const wchar_t *
archive_entry_hardlink_w(struct archive_entry *entry)
{
return (aes_get_wcs(&entry->ae_hardlink));
}
ino_t
archive_entry_ino(struct archive_entry *entry)
{
@ -536,6 +552,12 @@ archive_entry_symlink(struct archive_entry *entry)
return (aes_get_mbs(&entry->ae_symlink));
}
const wchar_t *
archive_entry_symlink_w(struct archive_entry *entry)
{
return (aes_get_wcs(&entry->ae_symlink));
}
uid_t
archive_entry_uid(struct archive_entry *entry)
{
@ -548,6 +570,12 @@ archive_entry_uname(struct archive_entry *entry)
return (aes_get_mbs(&entry->ae_uname));
}
const wchar_t *
archive_entry_uname_w(struct archive_entry *entry)
{
return (aes_get_wcs(&entry->ae_uname));
}
/*
* Functions to set archive_entry properties.
*/

View File

@ -79,7 +79,9 @@ void archive_entry_fflags(struct archive_entry *,
const char *archive_entry_fflags_text(struct archive_entry *);
gid_t archive_entry_gid(struct archive_entry *);
const char *archive_entry_gname(struct archive_entry *);
const wchar_t *archive_entry_gname_w(struct archive_entry *);
const char *archive_entry_hardlink(struct archive_entry *);
const wchar_t *archive_entry_hardlink_w(struct archive_entry *);
ino_t archive_entry_ino(struct archive_entry *);
mode_t archive_entry_mode(struct archive_entry *);
time_t archive_entry_mtime(struct archive_entry *);
@ -92,8 +94,10 @@ dev_t archive_entry_rdevminor(struct archive_entry *);
int64_t archive_entry_size(struct archive_entry *);
const struct stat *archive_entry_stat(struct archive_entry *);
const char *archive_entry_symlink(struct archive_entry *);
const wchar_t *archive_entry_symlink_w(struct archive_entry *);
uid_t archive_entry_uid(struct archive_entry *);
const char *archive_entry_uname(struct archive_entry *);
const wchar_t *archive_entry_uname_w(struct archive_entry *);
/*
* Set fields in an archive_entry.

View File

@ -62,6 +62,7 @@ static int archive_write_pax_header(struct archive *,
static char *build_pax_attribute_name(char *dest, const char *src);
static char *build_ustar_entry_name(char *dest, const char *src, const char *insert);
static char *format_int(char *dest, int64_t);
static int has_non_ASCII(const wchar_t *);
static int write_nulls(struct archive *, size_t);
/*
@ -315,7 +316,7 @@ archive_write_pax_header(struct archive *a,
struct archive_entry *entry_main;
const char *linkname, *p;
const char *hardlink;
const wchar_t *wp, *wp2;
const wchar_t *wp;
const char *suffix_start;
int need_extension, r, ret;
struct pax *pax;
@ -375,36 +376,42 @@ archive_write_pax_header(struct archive *a,
/* Find the largest suffix that fits in 'name' field. */
suffix_start = strchr(p + strlen(p) - 100 - 1, '/');
/* Find non-ASCII character, if any. */
wp2 = wp;
while (*wp2 != L'\0' && *wp2 < 128)
wp2++;
/*
* If name is too long, or has non-ASCII characters, add
* 'path' to pax extended attrs.
*/
if (suffix_start == NULL || suffix_start - p > 155 || *wp2 != L'\0') {
if (suffix_start == NULL || suffix_start - p > 155 || has_non_ASCII(wp)) {
add_pax_attr_w(&(pax->pax_header), "path", wp);
archive_entry_set_pathname(entry_main,
build_ustar_entry_name(ustar_entry_name, p, NULL));
need_extension = 1;
}
/* If link name is too long, add 'linkpath' to pax extended attrs. */
/* If link name is too long or has non-ASCII characters, add
* 'linkpath' to pax extended attrs. */
linkname = hardlink;
if (linkname == NULL)
linkname = archive_entry_symlink(entry_main);
if (linkname != NULL && strlen(linkname) > 100) {
add_pax_attr(&(pax->pax_header), "linkpath", linkname);
if (linkname != NULL) {
/* There is a link name, get the wide version as well. */
if (hardlink != NULL)
archive_entry_set_hardlink(entry_main,
"././@LongHardLink");
wp = archive_entry_hardlink_w(entry_main);
else
archive_entry_set_symlink(entry_main,
"././@LongSymLink");
need_extension = 1;
wp = archive_entry_symlink_w(entry_main);
/* If the link is long or has a non-ASCII character,
* store it as a pax extended attribute. */
if (strlen(linkname) > 100 || has_non_ASCII(wp)) {
add_pax_attr_w(&(pax->pax_header), "linkpath", wp);
if (hardlink != NULL)
archive_entry_set_hardlink(entry_main,
"././@LongHardLink");
else
archive_entry_set_symlink(entry_main,
"././@LongSymLink");
need_extension = 1;
}
}
/* If file size is too large, add 'size' to pax extended attrs. */
@ -419,11 +426,12 @@ archive_write_pax_header(struct archive *a,
need_extension = 1;
}
/* If group name is too large, add 'gname' to pax extended attrs. */
/* TODO: If gname has non-ASCII characters, use pax attribute. */
/* If group name is too large or has non-ASCII characters, add
* 'gname' to pax extended attrs. */
p = archive_entry_gname(entry_main);
if (p != NULL && strlen(p) > 31) {
add_pax_attr(&(pax->pax_header), "gname", p);
wp = archive_entry_gname_w(entry_main);
if (p != NULL && (strlen(p) > 31 || has_non_ASCII(wp))) {
add_pax_attr_w(&(pax->pax_header), "gname", wp);
archive_entry_set_gname(entry_main, NULL);
need_extension = 1;
}
@ -437,8 +445,9 @@ archive_write_pax_header(struct archive *a,
/* If user name is too large, add 'uname' to pax extended attrs. */
/* TODO: If uname has non-ASCII characters, use pax attribute. */
p = archive_entry_uname(entry_main);
if (p != NULL && strlen(p) > 31) {
add_pax_attr(&(pax->pax_header), "uname", p);
wp = archive_entry_uname_w(entry_main);
if (p != NULL && (strlen(p) > 31 || has_non_ASCII(wp))) {
add_pax_attr_w(&(pax->pax_header), "uname", wp);
archive_entry_set_uname(entry_main, NULL);
need_extension = 1;
}
@ -1001,3 +1010,11 @@ archive_write_pax_data(struct archive *a, const void *buff, size_t s)
pax->entry_bytes_remaining -= s;
return (ret);
}
static int
has_non_ASCII(const wchar_t *wp)
{
while (*wp != L'\0' && *wp < 128)
wp++;
return (*wp != L'\0');
}