In pax interchange format, use UTF8 for writing
link names, usernames, or group names that contain non-ASCII characters. In particular, this corrects an inconsistency reported by Ed Maste when archiving symlinks with odd characters: long symlinks would get preserved, short ones would be changed.
This commit is contained in:
parent
85c13a8375
commit
52a88d3b57
@ -203,6 +203,8 @@ aes_copy(struct aes *dest, struct aes *src)
|
||||
static const char *
|
||||
aes_get_mbs(struct aes *aes)
|
||||
{
|
||||
if (aes->aes_mbs == NULL && aes->aes_wcs == NULL)
|
||||
return NULL;
|
||||
if (aes->aes_mbs == NULL && aes->aes_wcs != NULL) {
|
||||
/*
|
||||
* XXX Need to estimate the number of byte in the
|
||||
@ -224,6 +226,8 @@ aes_get_mbs(struct aes *aes)
|
||||
static const wchar_t *
|
||||
aes_get_wcs(struct aes *aes)
|
||||
{
|
||||
if (aes->aes_wcs == NULL && aes->aes_mbs == NULL)
|
||||
return NULL;
|
||||
if (aes->aes_wcs == NULL && aes->aes_mbs != NULL) {
|
||||
/*
|
||||
* No single byte will be more than one wide character,
|
||||
@ -457,12 +461,24 @@ archive_entry_gname(struct archive_entry *entry)
|
||||
return (aes_get_mbs(&entry->ae_gname));
|
||||
}
|
||||
|
||||
const wchar_t *
|
||||
archive_entry_gname_w(struct archive_entry *entry)
|
||||
{
|
||||
return (aes_get_wcs(&entry->ae_gname));
|
||||
}
|
||||
|
||||
const char *
|
||||
archive_entry_hardlink(struct archive_entry *entry)
|
||||
{
|
||||
return (aes_get_mbs(&entry->ae_hardlink));
|
||||
}
|
||||
|
||||
const wchar_t *
|
||||
archive_entry_hardlink_w(struct archive_entry *entry)
|
||||
{
|
||||
return (aes_get_wcs(&entry->ae_hardlink));
|
||||
}
|
||||
|
||||
ino_t
|
||||
archive_entry_ino(struct archive_entry *entry)
|
||||
{
|
||||
@ -536,6 +552,12 @@ archive_entry_symlink(struct archive_entry *entry)
|
||||
return (aes_get_mbs(&entry->ae_symlink));
|
||||
}
|
||||
|
||||
const wchar_t *
|
||||
archive_entry_symlink_w(struct archive_entry *entry)
|
||||
{
|
||||
return (aes_get_wcs(&entry->ae_symlink));
|
||||
}
|
||||
|
||||
uid_t
|
||||
archive_entry_uid(struct archive_entry *entry)
|
||||
{
|
||||
@ -548,6 +570,12 @@ archive_entry_uname(struct archive_entry *entry)
|
||||
return (aes_get_mbs(&entry->ae_uname));
|
||||
}
|
||||
|
||||
const wchar_t *
|
||||
archive_entry_uname_w(struct archive_entry *entry)
|
||||
{
|
||||
return (aes_get_wcs(&entry->ae_uname));
|
||||
}
|
||||
|
||||
/*
|
||||
* Functions to set archive_entry properties.
|
||||
*/
|
||||
|
@ -79,7 +79,9 @@ void archive_entry_fflags(struct archive_entry *,
|
||||
const char *archive_entry_fflags_text(struct archive_entry *);
|
||||
gid_t archive_entry_gid(struct archive_entry *);
|
||||
const char *archive_entry_gname(struct archive_entry *);
|
||||
const wchar_t *archive_entry_gname_w(struct archive_entry *);
|
||||
const char *archive_entry_hardlink(struct archive_entry *);
|
||||
const wchar_t *archive_entry_hardlink_w(struct archive_entry *);
|
||||
ino_t archive_entry_ino(struct archive_entry *);
|
||||
mode_t archive_entry_mode(struct archive_entry *);
|
||||
time_t archive_entry_mtime(struct archive_entry *);
|
||||
@ -92,8 +94,10 @@ dev_t archive_entry_rdevminor(struct archive_entry *);
|
||||
int64_t archive_entry_size(struct archive_entry *);
|
||||
const struct stat *archive_entry_stat(struct archive_entry *);
|
||||
const char *archive_entry_symlink(struct archive_entry *);
|
||||
const wchar_t *archive_entry_symlink_w(struct archive_entry *);
|
||||
uid_t archive_entry_uid(struct archive_entry *);
|
||||
const char *archive_entry_uname(struct archive_entry *);
|
||||
const wchar_t *archive_entry_uname_w(struct archive_entry *);
|
||||
|
||||
/*
|
||||
* Set fields in an archive_entry.
|
||||
|
@ -62,6 +62,7 @@ static int archive_write_pax_header(struct archive *,
|
||||
static char *build_pax_attribute_name(char *dest, const char *src);
|
||||
static char *build_ustar_entry_name(char *dest, const char *src, const char *insert);
|
||||
static char *format_int(char *dest, int64_t);
|
||||
static int has_non_ASCII(const wchar_t *);
|
||||
static int write_nulls(struct archive *, size_t);
|
||||
|
||||
/*
|
||||
@ -315,7 +316,7 @@ archive_write_pax_header(struct archive *a,
|
||||
struct archive_entry *entry_main;
|
||||
const char *linkname, *p;
|
||||
const char *hardlink;
|
||||
const wchar_t *wp, *wp2;
|
||||
const wchar_t *wp;
|
||||
const char *suffix_start;
|
||||
int need_extension, r, ret;
|
||||
struct pax *pax;
|
||||
@ -375,36 +376,42 @@ archive_write_pax_header(struct archive *a,
|
||||
/* Find the largest suffix that fits in 'name' field. */
|
||||
suffix_start = strchr(p + strlen(p) - 100 - 1, '/');
|
||||
|
||||
/* Find non-ASCII character, if any. */
|
||||
wp2 = wp;
|
||||
while (*wp2 != L'\0' && *wp2 < 128)
|
||||
wp2++;
|
||||
|
||||
/*
|
||||
* If name is too long, or has non-ASCII characters, add
|
||||
* 'path' to pax extended attrs.
|
||||
*/
|
||||
if (suffix_start == NULL || suffix_start - p > 155 || *wp2 != L'\0') {
|
||||
if (suffix_start == NULL || suffix_start - p > 155 || has_non_ASCII(wp)) {
|
||||
add_pax_attr_w(&(pax->pax_header), "path", wp);
|
||||
archive_entry_set_pathname(entry_main,
|
||||
build_ustar_entry_name(ustar_entry_name, p, NULL));
|
||||
need_extension = 1;
|
||||
}
|
||||
|
||||
/* If link name is too long, add 'linkpath' to pax extended attrs. */
|
||||
/* If link name is too long or has non-ASCII characters, add
|
||||
* 'linkpath' to pax extended attrs. */
|
||||
linkname = hardlink;
|
||||
if (linkname == NULL)
|
||||
linkname = archive_entry_symlink(entry_main);
|
||||
|
||||
if (linkname != NULL && strlen(linkname) > 100) {
|
||||
add_pax_attr(&(pax->pax_header), "linkpath", linkname);
|
||||
if (linkname != NULL) {
|
||||
/* There is a link name, get the wide version as well. */
|
||||
if (hardlink != NULL)
|
||||
archive_entry_set_hardlink(entry_main,
|
||||
"././@LongHardLink");
|
||||
wp = archive_entry_hardlink_w(entry_main);
|
||||
else
|
||||
archive_entry_set_symlink(entry_main,
|
||||
"././@LongSymLink");
|
||||
need_extension = 1;
|
||||
wp = archive_entry_symlink_w(entry_main);
|
||||
|
||||
/* If the link is long or has a non-ASCII character,
|
||||
* store it as a pax extended attribute. */
|
||||
if (strlen(linkname) > 100 || has_non_ASCII(wp)) {
|
||||
add_pax_attr_w(&(pax->pax_header), "linkpath", wp);
|
||||
if (hardlink != NULL)
|
||||
archive_entry_set_hardlink(entry_main,
|
||||
"././@LongHardLink");
|
||||
else
|
||||
archive_entry_set_symlink(entry_main,
|
||||
"././@LongSymLink");
|
||||
need_extension = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* If file size is too large, add 'size' to pax extended attrs. */
|
||||
@ -419,11 +426,12 @@ archive_write_pax_header(struct archive *a,
|
||||
need_extension = 1;
|
||||
}
|
||||
|
||||
/* If group name is too large, add 'gname' to pax extended attrs. */
|
||||
/* TODO: If gname has non-ASCII characters, use pax attribute. */
|
||||
/* If group name is too large or has non-ASCII characters, add
|
||||
* 'gname' to pax extended attrs. */
|
||||
p = archive_entry_gname(entry_main);
|
||||
if (p != NULL && strlen(p) > 31) {
|
||||
add_pax_attr(&(pax->pax_header), "gname", p);
|
||||
wp = archive_entry_gname_w(entry_main);
|
||||
if (p != NULL && (strlen(p) > 31 || has_non_ASCII(wp))) {
|
||||
add_pax_attr_w(&(pax->pax_header), "gname", wp);
|
||||
archive_entry_set_gname(entry_main, NULL);
|
||||
need_extension = 1;
|
||||
}
|
||||
@ -437,8 +445,9 @@ archive_write_pax_header(struct archive *a,
|
||||
/* If user name is too large, add 'uname' to pax extended attrs. */
|
||||
/* TODO: If uname has non-ASCII characters, use pax attribute. */
|
||||
p = archive_entry_uname(entry_main);
|
||||
if (p != NULL && strlen(p) > 31) {
|
||||
add_pax_attr(&(pax->pax_header), "uname", p);
|
||||
wp = archive_entry_uname_w(entry_main);
|
||||
if (p != NULL && (strlen(p) > 31 || has_non_ASCII(wp))) {
|
||||
add_pax_attr_w(&(pax->pax_header), "uname", wp);
|
||||
archive_entry_set_uname(entry_main, NULL);
|
||||
need_extension = 1;
|
||||
}
|
||||
@ -1001,3 +1010,11 @@ archive_write_pax_data(struct archive *a, const void *buff, size_t s)
|
||||
pax->entry_bytes_remaining -= s;
|
||||
return (ret);
|
||||
}
|
||||
|
||||
static int
|
||||
has_non_ASCII(const wchar_t *wp)
|
||||
{
|
||||
while (*wp != L'\0' && *wp < 128)
|
||||
wp++;
|
||||
return (*wp != L'\0');
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user