Conventionally, tar archives have always included a trailing '/'

for directories.  bsdtar used to add this, but that recently got
lost somehow.  So now I'm adding it back in libarchive.
The only odd part of doing this in libarchive:  Adding a directory to
a tar archive and then reading it back again can yield a different name.

Add a test case to exercise some boundary conditions with
tar filenames and ensure that trailing slashes are added to
dir names only as necessary.

Thanks to: Oliver Lehmann for bringing this regression to my attention.
This commit is contained in:
Tim Kientzle 2007-04-14 08:20:31 +00:00
parent 76aa565c72
commit 015f35775b
5 changed files with 261 additions and 74 deletions

View File

@ -9,7 +9,7 @@ LDADD= -lbz2 -lz
# Major: Bumped ONLY when API/ABI breakage happens (see SHLIB_MAJOR)
# Minor: Bumped when significant new features are added
# Revision: Bumped on any notable change
VERSION= 2.0.28
VERSION= 2.0.29
ARCHIVE_API_MAJOR!= echo ${VERSION} | sed -e 's/[^0-9]/./g' -e 's/\..*//'
ARCHIVE_API_MINOR!= echo ${VERSION} | sed -e 's/[^0-9]/./g' -e 's/[0-9]*\.//' -e 's/\..*//'

View File

@ -67,8 +67,8 @@ static void add_pax_attr_int(struct archive_string *,
static void add_pax_attr_time(struct archive_string *,
const char *key, int64_t sec,
unsigned long nanos);
static void add_pax_attr_w(struct archive_string *,
const char *key, const wchar_t *wvalue);
static void add_pax_attr_w(struct archive_string *, const char *,
const wchar_t *, const wchar_t *);
static ssize_t archive_write_pax_data(struct archive_write *,
const void *, size_t);
static int archive_write_pax_finish(struct archive_write *);
@ -205,30 +205,42 @@ add_pax_attr_int(struct archive_string *as, const char *key, int64_t value)
add_pax_attr(as, key, format_int(tmp + sizeof(tmp) - 1, value));
}
/*
* UTF-8 encode the concatenation of two strings.
*
* This interface eliminates the need to do some string
* manipulations at higher layers.
*/
static char *
utf8_encode(const wchar_t *wval)
utf8_encode(const wchar_t *wval1, const wchar_t *wval2)
{
int utf8len;
const wchar_t *wp;
const wchar_t *wp, **wpp;
unsigned long wc;
char *utf8_value, *p;
const wchar_t *vals[2];
vals[0] = wval1;
vals[1] = wval2;
utf8len = 0;
for (wp = wval; *wp != L'\0'; ) {
wc = *wp++;
if (wc <= 0x7f)
utf8len++;
else if (wc <= 0x7ff)
utf8len += 2;
else if (wc <= 0xffff)
utf8len += 3;
else if (wc <= 0x1fffff)
utf8len += 4;
else if (wc <= 0x3ffffff)
utf8len += 5;
else if (wc <= 0x7fffffff)
utf8len += 6;
/* Ignore larger values; UTF-8 can't encode them. */
for (wpp = vals; wpp < vals + 2 && *wpp; wpp++) {
for (wp = *wpp; *wp != L'\0'; ) {
wc = *wp++;
if (wc <= 0x7f)
utf8len++;
else if (wc <= 0x7ff)
utf8len += 2;
else if (wc <= 0xffff)
utf8len += 3;
else if (wc <= 0x1fffff)
utf8len += 4;
else if (wc <= 0x3ffffff)
utf8len += 5;
else if (wc <= 0x7fffffff)
utf8len += 6;
/* Ignore larger values; UTF-8 can't encode them. */
}
}
utf8_value = (char *)malloc(utf8len + 1);
@ -237,42 +249,45 @@ utf8_encode(const wchar_t *wval)
return (NULL);
}
for (wp = wval, p = utf8_value; *wp != L'\0'; ) {
wc = *wp++;
if (wc <= 0x7f) {
*p++ = (char)wc;
} else if (wc <= 0x7ff) {
p[0] = 0xc0 | ((wc >> 6) & 0x1f);
p[1] = 0x80 | (wc & 0x3f);
p += 2;
} else if (wc <= 0xffff) {
p[0] = 0xe0 | ((wc >> 12) & 0x0f);
p[1] = 0x80 | ((wc >> 6) & 0x3f);
p[2] = 0x80 | (wc & 0x3f);
p += 3;
} else if (wc <= 0x1fffff) {
p[0] = 0xf0 | ((wc >> 18) & 0x07);
p[1] = 0x80 | ((wc >> 12) & 0x3f);
p[2] = 0x80 | ((wc >> 6) & 0x3f);
p[3] = 0x80 | (wc & 0x3f);
p += 4;
} else if (wc <= 0x3ffffff) {
p[0] = 0xf8 | ((wc >> 24) & 0x03);
p[1] = 0x80 | ((wc >> 18) & 0x3f);
p[2] = 0x80 | ((wc >> 12) & 0x3f);
p[3] = 0x80 | ((wc >> 6) & 0x3f);
p[4] = 0x80 | (wc & 0x3f);
p += 5;
} else if (wc <= 0x7fffffff) {
p[0] = 0xfc | ((wc >> 30) & 0x01);
p[1] = 0x80 | ((wc >> 24) & 0x3f);
p[1] = 0x80 | ((wc >> 18) & 0x3f);
p[2] = 0x80 | ((wc >> 12) & 0x3f);
p[3] = 0x80 | ((wc >> 6) & 0x3f);
p[4] = 0x80 | (wc & 0x3f);
p += 6;
p = utf8_value;
for (wpp = vals; wpp < vals + 2 && *wpp; wpp++) {
for (wp = *wpp; *wp != L'\0'; ) {
wc = *wp++;
if (wc <= 0x7f) {
*p++ = (char)wc;
} else if (wc <= 0x7ff) {
p[0] = 0xc0 | ((wc >> 6) & 0x1f);
p[1] = 0x80 | (wc & 0x3f);
p += 2;
} else if (wc <= 0xffff) {
p[0] = 0xe0 | ((wc >> 12) & 0x0f);
p[1] = 0x80 | ((wc >> 6) & 0x3f);
p[2] = 0x80 | (wc & 0x3f);
p += 3;
} else if (wc <= 0x1fffff) {
p[0] = 0xf0 | ((wc >> 18) & 0x07);
p[1] = 0x80 | ((wc >> 12) & 0x3f);
p[2] = 0x80 | ((wc >> 6) & 0x3f);
p[3] = 0x80 | (wc & 0x3f);
p += 4;
} else if (wc <= 0x3ffffff) {
p[0] = 0xf8 | ((wc >> 24) & 0x03);
p[1] = 0x80 | ((wc >> 18) & 0x3f);
p[2] = 0x80 | ((wc >> 12) & 0x3f);
p[3] = 0x80 | ((wc >> 6) & 0x3f);
p[4] = 0x80 | (wc & 0x3f);
p += 5;
} else if (wc <= 0x7fffffff) {
p[0] = 0xfc | ((wc >> 30) & 0x01);
p[1] = 0x80 | ((wc >> 24) & 0x3f);
p[1] = 0x80 | ((wc >> 18) & 0x3f);
p[2] = 0x80 | ((wc >> 12) & 0x3f);
p[3] = 0x80 | ((wc >> 6) & 0x3f);
p[4] = 0x80 | (wc & 0x3f);
p += 6;
}
/* Ignore larger values; UTF-8 can't encode them. */
}
/* Ignore larger values; UTF-8 can't encode them. */
}
*p = '\0';
@ -280,9 +295,10 @@ utf8_encode(const wchar_t *wval)
}
static void
add_pax_attr_w(struct archive_string *as, const char *key, const wchar_t *wval)
add_pax_attr_w(struct archive_string *as, const char *key,
const wchar_t *wval1, const wchar_t *wval2)
{
char *utf8_value = utf8_encode(wval);
char *utf8_value = utf8_encode(wval1, wval2);
if (utf8_value == NULL)
return;
add_pax_attr(as, key, utf8_value);
@ -367,7 +383,7 @@ archive_write_pax_header_xattrs(struct pax *pax, struct archive_entry *entry)
free(url_encoded_name); /* Done with this. */
}
if (wcs_name != NULL) {
encoded_name = utf8_encode(wcs_name);
encoded_name = utf8_encode(wcs_name, NULL);
free(wcs_name); /* Done with wchar_t name. */
}
@ -401,6 +417,7 @@ archive_write_pax_header(struct archive_write *a,
const wchar_t *wp;
const char *suffix_start;
int need_extension, r, ret;
int need_slash = 0;
struct pax *pax;
const struct stat *st_main, *st_original;
@ -451,18 +468,23 @@ archive_write_pax_header(struct archive_write *a,
*/
wp = archive_entry_pathname_w(entry_main);
p = archive_entry_pathname(entry_main);
if (strlen(p) <= 100) /* Short enough for just 'name' field */
if (S_ISDIR(st_original->st_mode))
if (p[strlen(p) - 1] != '/')
need_slash = 1;
/* Short enough for just 'name' field */
if (strlen(p) + need_slash <= 100)
suffix_start = p; /* Record a zero-length prefix */
else
/* Find the largest suffix that fits in 'name' field. */
suffix_start = strchr(p + strlen(p) - 100 - 1, '/');
suffix_start = strchr(p + strlen(p) + need_slash - 100 - 1, '/');
/*
* If name is too long, or has non-ASCII characters, add
* 'path' to pax extended attrs.
*/
if (suffix_start == NULL || suffix_start - p > 155 || has_non_ASCII(wp)) {
add_pax_attr_w(&(pax->pax_header), "path", wp);
add_pax_attr_w(&(pax->pax_header), "path", wp,
need_slash ? L"/" : NULL);
archive_entry_set_pathname(entry_main,
build_ustar_entry_name(ustar_entry_name, p, strlen(p), NULL));
need_extension = 1;
@ -484,7 +506,7 @@ archive_write_pax_header(struct archive_write *a,
/* If the link is long or has a non-ASCII character,
* store it as a pax extended attribute. */
if (strlen(linkname) > 100 || has_non_ASCII(wp)) {
add_pax_attr_w(&(pax->pax_header), "linkpath", wp);
add_pax_attr_w(&(pax->pax_header), "linkpath", wp, NULL);
if (hardlink != NULL)
archive_entry_set_hardlink(entry_main,
"././@LongHardLink");
@ -512,7 +534,7 @@ archive_write_pax_header(struct archive_write *a,
p = archive_entry_gname(entry_main);
wp = archive_entry_gname_w(entry_main);
if (p != NULL && (strlen(p) > 31 || has_non_ASCII(wp))) {
add_pax_attr_w(&(pax->pax_header), "gname", wp);
add_pax_attr_w(&(pax->pax_header), "gname", wp, NULL);
archive_entry_set_gname(entry_main, NULL);
need_extension = 1;
}
@ -528,7 +550,7 @@ archive_write_pax_header(struct archive_write *a,
p = archive_entry_uname(entry_main);
wp = archive_entry_uname_w(entry_main);
if (p != NULL && (strlen(p) > 31 || has_non_ASCII(wp))) {
add_pax_attr_w(&(pax->pax_header), "uname", wp);
add_pax_attr_w(&(pax->pax_header), "uname", wp, NULL);
archive_entry_set_uname(entry_main, NULL);
need_extension = 1;
}
@ -655,13 +677,13 @@ archive_write_pax_header(struct archive_write *a,
ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID);
if (wp != NULL && *wp != L'\0')
add_pax_attr_w(&(pax->pax_header),
"SCHILY.acl.access", wp);
"SCHILY.acl.access", wp, NULL);
wp = archive_entry_acl_text_w(entry_original,
ARCHIVE_ENTRY_ACL_TYPE_DEFAULT |
ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID);
if (wp != NULL && *wp != L'\0')
add_pax_attr_w(&(pax->pax_header),
"SCHILY.acl.default", wp);
"SCHILY.acl.default", wp, NULL);
/* Include star-compatible metadata info. */
/* Note: "SCHILY.dev{major,minor}" are NOT the

View File

@ -243,7 +243,7 @@ __archive_write_format_header_ustar(struct archive_write *a, char h[512],
{
unsigned int checksum;
int i, ret;
size_t copy_length;
size_t copy_length, ps, extra_slash;
const char *p, *pp;
const struct stat *st;
int mytartype;
@ -256,6 +256,7 @@ __archive_write_format_header_ustar(struct archive_write *a, char h[512],
* elements.
*/
memcpy(h, &template_header, 512);
st = archive_entry_stat(entry);
/*
* Because the block is already null-filled, and strings
@ -264,11 +265,18 @@ __archive_write_format_header_ustar(struct archive_write *a, char h[512],
*/
pp = archive_entry_pathname(entry);
if (strlen(pp) <= USTAR_name_size)
memcpy(h + USTAR_name_offset, pp, strlen(pp));
else {
ps = strlen(pp);
if (S_ISDIR(st->st_mode) && pp[ps - 1] != '/')
extra_slash = 1;
else
extra_slash = 0;
if (ps + extra_slash <= USTAR_name_size) {
memcpy(h + USTAR_name_offset, pp, ps);
if (extra_slash)
h[USTAR_name_offset + ps] = '/';
} else {
/* Store in two pieces, splitting at a '/'. */
p = strchr(pp + strlen(pp) - USTAR_name_size - 1, '/');
p = strchr(pp + ps + extra_slash - USTAR_name_size - 1, '/');
/*
* If there is no path separator, or the prefix or
* remaining name are too large, return an error.
@ -284,7 +292,9 @@ __archive_write_format_header_ustar(struct archive_write *a, char h[512],
} else {
/* Copy prefix and remainder to appropriate places */
memcpy(h + USTAR_prefix_offset, pp, p - pp);
memcpy(h + USTAR_name_offset, p + 1, pp + strlen(pp) - p - 1);
memcpy(h + USTAR_name_offset, p + 1, pp + ps - p - 1);
if (extra_slash)
h[USTAR_name_offset + pp + ps - p - 1] = '/';
}
}
@ -328,8 +338,6 @@ __archive_write_format_header_ustar(struct archive_write *a, char h[512],
memcpy(h + USTAR_gname_offset, p, copy_length);
}
st = archive_entry_stat(entry);
if (format_number(st->st_mode & 07777, h + USTAR_mode_offset, USTAR_mode_size, USTAR_mode_max_size, strict)) {
archive_set_error(&a->archive, ERANGE, "Numeric mode too large");
ret = ARCHIVE_WARN;

View File

@ -28,6 +28,7 @@ TESTS= \
test_read_large.c \
test_read_position.c \
test_read_truncated.c \
test_tar_filenames.c \
test_write_disk.c \
test_write_disk_perms.c \
test_write_disk_secure.c \

View File

@ -0,0 +1,156 @@
/*-
* Copyright (c) 2003-2007 Tim Kientzle
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "test.h"
__FBSDID("$FreeBSD$");
/*
* Exercise various lengths of filenames in tar archives,
* especially around the magic sizes where ustar breaks
* filenames into prefix/suffix.
*/
static
test_filename(int dlen, int flen)
{
char buff[8192];
char filename[400];
char dirname[400];
struct archive_entry *ae;
struct archive *a;
size_t used;
int i;
for (i = 0; i < dlen; i++)
filename[i] = 'a';
filename[i++] = '/';
for (; i < dlen + flen + 1; i++)
filename[i] = 'b';
filename[i++] = '\0';
strcpy(dirname, filename);
/* Create a new archive in memory. */
assert((a = archive_write_new()) != NULL);
assertA(0 == archive_write_set_format_pax_restricted(a));
assertA(0 == archive_write_set_compression_none(a));
assertA(0 == archive_write_set_bytes_per_block(a,0));
assertA(0 == archive_write_open_memory(a, buff, sizeof(buff), &used));
/*
* Write a file to it.
*/
assert((ae = archive_entry_new()) != NULL);
archive_entry_copy_pathname(ae, filename);
archive_entry_set_mode(ae, S_IFREG | 0755);
failure("Pathname %d/%d", dlen, flen);
assertA(0 == archive_write_header(a, ae));
/*
* Write a dir to it (without trailing '/').
*/
assert((ae = archive_entry_new()) != NULL);
archive_entry_copy_pathname(ae, dirname);
archive_entry_set_mode(ae, S_IFDIR | 0755);
failure("Dirname %d/%d", dlen, flen);
assertA(0 == archive_write_header(a, ae));
/* Tar adds a '/' to directory names. */
strcat(dirname, "/");
/*
* Write a dir to it (with trailing '/').
*/
assert((ae = archive_entry_new()) != NULL);
archive_entry_copy_pathname(ae, dirname);
archive_entry_set_mode(ae, S_IFDIR | 0755);
failure("Dirname %d/%d", dlen, flen);
assertA(0 == archive_write_header(a, ae));
/* Close out the archive. */
assertA(0 == archive_write_close(a));
#if ARCHIVE_API_VERSION > 1
assertA(0 == archive_write_finish(a));
#else
archive_write_finish(a);
#endif
/*
* Now, read the data back.
*/
assert((a = archive_read_new()) != NULL);
assertA(0 == archive_read_support_format_all(a));
assertA(0 == archive_read_support_compression_all(a));
assertA(0 == archive_read_open_memory(a, buff, used));
/* Read the file and check the filename. */
assertA(0 == archive_read_next_header(a, &ae));
failure("Pathname %d/%d: %s", dlen, flen, archive_entry_pathname(ae));
assert(0 == strcmp(filename, archive_entry_pathname(ae)));
assert((S_IFREG | 0755) == archive_entry_mode(ae));
/*
* Read the two dirs and check the names.
*
* Both dirs should read back with the same name, since
* tar should add a trailing '/' to any dir that doesn't
* already have one.
*/
assertA(0 == archive_read_next_header(a, &ae));
failure("Pathname %d/%d: %s", dlen, flen, archive_entry_pathname(ae));
assert(0 == strcmp(dirname, archive_entry_pathname(ae)));
assert((S_IFDIR | 0755) == archive_entry_mode(ae));
assertA(0 == archive_read_next_header(a, &ae));
failure("Pathname %d/%d: %s", dlen, flen, archive_entry_pathname(ae));
assert(0 == strcmp(dirname, archive_entry_pathname(ae)));
assert((S_IFDIR | 0755) == archive_entry_mode(ae));
/* Verify the end of the archive. */
assert(1 == archive_read_next_header(a, &ae));
assert(0 == archive_read_close(a));
#if ARCHIVE_API_VERSION > 1
assert(0 == archive_read_finish(a));
#else
archive_read_finish(a);
#endif
}
DEFINE_TEST(test_tar_filenames)
{
int dlen, flen;
/* Repeat the following for a variety of dir/file lengths. */
for (dlen = 40; dlen < 60; dlen++) {
for (flen = 40; flen < 60; flen++) {
test_filename(dlen, flen);
}
}
for (dlen = 140; dlen < 160; dlen++) {
for (flen = 90; flen < 110; flen++) {
test_filename(dlen, flen);
}
}
}