Issues with hardlinks in newc-format files prompted me to

write a new test to exercise the hardlink strategies used
by different archive formats (tar, old cpio, new cpio).
This uncovered two problems, both fixed by this commit:

1) Enforce file size when writing files to disk.

2) When restoring hardlink entries, if they have data associated, go
   ahead and open the file so we can write the data.

In particular, this fixes bsdtar/bsdcpio extraction of new cpio
formats where the "original" is empty and the subsequent "hardlink"
entry actually carries the data.  It also provides correct behavior
for old cpio archives where hardlinked entries have their bodies
stored multiple times in the archive; the last body should always be
the one that ends up in the final file.  The new pax format also
permits (but does not require) hardlinks to carry file data; again,
the last contents should always win.

Note that with any of these, a size of zero on a hardlink simply means
that the hardlink carries no data; it does not mean that the file has
zero size.  A non-zero size on a hardlink does provide the file size.

Thanks to: John Baldwin, for reminding me about this long-standing bug
    and sending me a simple example archive that prompted this test case
This commit is contained in:
Tim Kientzle 2008-01-18 05:05:58 +00:00
parent 23b11f239a
commit 2adbd7ee43
3 changed files with 198 additions and 6 deletions

View File

@ -171,6 +171,8 @@ struct archive_write_disk {
int fd;
/* Current offset for writing data to the file. */
off_t offset;
/* Maximum size of file. */
off_t filesize;
/* Dir we were in before this restore; only for deep paths. */
int restore_pwd;
/* Mode we should use for this entry; affected by _PERM and umask. */
@ -302,6 +304,7 @@ _archive_write_header(struct archive *_a, struct archive_entry *entry)
a->offset = 0;
a->uid = a->user_uid;
a->mode = archive_entry_mode(a->entry);
a->filesize = archive_entry_size(a->entry);
archive_strcpy(&(a->_name_data), archive_entry_pathname(a->entry));
a->name = a->_name_data.s;
archive_clear_error(&a->archive);
@ -425,8 +428,10 @@ _archive_write_header(struct archive *_a, struct archive_entry *entry)
* If it's not open, tell our client not to try writing.
* In particular, dirs, links, etc, don't get written to.
*/
if (a->fd < 0)
if (a->fd < 0) {
archive_entry_set_size(entry, 0);
a->filesize = 0;
}
done:
/* Restore the user's umask before returning. */
umask(a->user_umask);
@ -451,6 +456,7 @@ _archive_write_data_block(struct archive *_a,
{
struct archive_write_disk *a = (struct archive_write_disk *)_a;
ssize_t bytes_written = 0;
int r = ARCHIVE_OK;
__archive_check_magic(&a->archive, ARCHIVE_WRITE_DISK_MAGIC,
ARCHIVE_STATE_DATA, "archive_write_disk_block");
@ -470,7 +476,13 @@ _archive_write_data_block(struct archive *_a,
}
/* Write the data. */
while (size > 0) {
while (size > 0 && a->offset < a->filesize) {
if (a->offset + size > a->filesize) {
size = a->filesize - a->offset;
archive_set_error(&a->archive, errno,
"Write request too large");
r = ARCHIVE_WARN;
}
bytes_written = write(a->fd, buff, size);
if (bytes_written < 0) {
archive_set_error(&a->archive, errno, "Write failed");
@ -479,13 +491,14 @@ _archive_write_data_block(struct archive *_a,
size -= bytes_written;
a->offset += bytes_written;
}
return (ARCHIVE_OK);
return (r);
}
static ssize_t
_archive_write_data(struct archive *_a, const void *buff, size_t size)
{
struct archive_write_disk *a = (struct archive_write_disk *)_a;
off_t offset;
int r;
__archive_check_magic(&a->archive, ARCHIVE_WRITE_DISK_MAGIC,
@ -493,10 +506,11 @@ _archive_write_data(struct archive *_a, const void *buff, size_t size)
if (a->fd < 0)
return (ARCHIVE_OK);
offset = a->offset;
r = _archive_write_data_block(_a, buff, size, a->offset);
if (r < ARCHIVE_OK)
return (r);
return (size);
return (a->offset - offset);
}
static int
@ -829,8 +843,20 @@ create_filesystem_object(struct archive_write_disk *a)
/* We identify hard/symlinks according to the link names. */
/* Since link(2) and symlink(2) don't handle modes, we're done here. */
linkname = archive_entry_hardlink(a->entry);
if (linkname != NULL)
return link(linkname, a->name) ? errno : 0;
if (linkname != NULL) {
r = link(linkname, a->name) ? errno : 0;
/*
* New cpio and pax formats allow hardlink entries
* to carry data, so we may have to open the file
* for hardlink entries.
*/
if (r == 0 && a->filesize > 0) {
a->fd = open(a->name, O_WRONLY | O_TRUNC);
if (a->fd < 0)
r = errno;
}
return (r);
}
linkname = archive_entry_symlink(a->entry);
if (linkname != NULL)
return symlink(linkname, a->name) ? errno : 0;

View File

@ -58,6 +58,7 @@ TESTS= \
test_tar_large.c \
test_write_compress_program.c \
test_write_disk.c \
test_write_disk_hardlink.c \
test_write_disk_perms.c \
test_write_disk_secure.c \
test_write_format_ar.c \

View File

@ -0,0 +1,165 @@
/*-
* Copyright (c) 2003-2007 Tim Kientzle
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "test.h"
__FBSDID("$FreeBSD$");
#define UMASK 022
/*
* Exercise hardlink recreation.
*/
DEFINE_TEST(test_write_disk_hardlink)
{
#if ARCHIVE_VERSION_STAMP < 1009000
skipping("archive_write_disk_hardlink tests");
#else
static const char data[]="abcdefghijklmnopqrstuvwxyz";
struct archive *ad;
struct archive_entry *ae;
struct stat st, st2;
/* Force the umask to something predictable. */
umask(UMASK);
/* Write entries to disk. */
assert((ad = archive_write_disk_new()) != NULL);
/*
* First, use a tar-like approach; a regular file, then
* a separate "hardlink" entry.
*/
/* Regular file. */
assert((ae = archive_entry_new()) != NULL);
archive_entry_copy_pathname(ae, "link1a");
archive_entry_set_mode(ae, S_IFREG | 0755);
archive_entry_set_size(ae, sizeof(data));
assertEqualIntA(ad, 0, archive_write_header(ad, ae));
assertEqualInt(sizeof(data), archive_write_data(ad, data, sizeof(data)));
assertEqualIntA(ad, 0, archive_write_finish_entry(ad));
archive_entry_free(ae);
/* Link. */
assert((ae = archive_entry_new()) != NULL);
archive_entry_copy_pathname(ae, "link1b");
archive_entry_set_mode(ae, S_IFREG | 0755);
archive_entry_set_size(ae, 0);
archive_entry_copy_hardlink(ae, "link1a");
assertEqualIntA(ad, 0, archive_write_header(ad, ae));
assertEqualInt(0, archive_write_data(ad, data, sizeof(data)));
assertEqualIntA(ad, 0, archive_write_finish_entry(ad));
archive_entry_free(ae);
/*
* Second, try an old-cpio-like approach; a regular file, then
* another identical one (which has been marked hardlink).
*/
/* Regular file. */
assert((ae = archive_entry_new()) != NULL);
archive_entry_copy_pathname(ae, "link2a");
archive_entry_set_mode(ae, S_IFREG | 0755);
archive_entry_set_size(ae, sizeof(data));
assertEqualIntA(ad, 0, archive_write_header(ad, ae));
assertEqualInt(sizeof(data), archive_write_data(ad, data, sizeof(data)));
assertEqualIntA(ad, 0, archive_write_finish_entry(ad));
archive_entry_free(ae);
/* Link. */
assert((ae = archive_entry_new()) != NULL);
archive_entry_copy_pathname(ae, "link2b");
archive_entry_set_mode(ae, S_IFREG | 0755);
archive_entry_set_size(ae, sizeof(data));
archive_entry_copy_hardlink(ae, "link2a");
assertEqualIntA(ad, 0, archive_write_header(ad, ae));
assertEqualInt(sizeof(data), archive_write_data(ad, data, sizeof(data)));
assertEqualIntA(ad, 0, archive_write_finish_entry(ad));
archive_entry_free(ae);
/*
* Finally, try a new-cpio-like approach, where the initial
* regular file is empty and the hardlink has the data.
*/
/* Regular file. */
assert((ae = archive_entry_new()) != NULL);
archive_entry_copy_pathname(ae, "link3a");
archive_entry_set_mode(ae, S_IFREG | 0755);
archive_entry_set_size(ae, 0);
assertEqualIntA(ad, 0, archive_write_header(ad, ae));
assertEqualInt(0, archive_write_data(ad, data, sizeof(data)));
assertEqualIntA(ad, 0, archive_write_finish_entry(ad));
archive_entry_free(ae);
/* Link. */
assert((ae = archive_entry_new()) != NULL);
archive_entry_copy_pathname(ae, "link3b");
archive_entry_set_mode(ae, S_IFREG | 0755);
archive_entry_set_size(ae, sizeof(data));
archive_entry_copy_hardlink(ae, "link3a");
assertEqualIntA(ad, 0, archive_write_header(ad, ae));
assertEqualInt(sizeof(data), archive_write_data(ad, data, sizeof(data)));
assertEqualIntA(ad, 0, archive_write_finish_entry(ad));
archive_entry_free(ae);
assertEqualInt(0, archive_write_finish(ad));
/* Test the entries on disk. */
assert(0 == stat("link1a", &st));
assertEqualInt(st.st_mode, (S_IFREG | 0755) & ~UMASK);
assertEqualInt(st.st_size, sizeof(data));
assertEqualInt(st.st_nlink, 2);
assert(0 == stat("link1b", &st2));
assertEqualInt(st2.st_mode, (S_IFREG | 0755) & ~UMASK);
assertEqualInt(st2.st_size, sizeof(data));
assertEqualInt(st2.st_nlink, 2);
assertEqualInt(st.st_ino, st2.st_ino);
assertEqualInt(st.st_dev, st2.st_dev);
assert(0 == stat("link2a", &st));
assertEqualInt(st.st_mode, (S_IFREG | 0755) & ~UMASK);
assertEqualInt(st.st_size, sizeof(data));
assertEqualInt(st.st_nlink, 2);
assert(0 == stat("link2b", &st2));
assertEqualInt(st2.st_mode, (S_IFREG | 0755) & ~UMASK);
assertEqualInt(st2.st_size, sizeof(data));
assertEqualInt(st2.st_nlink, 2);
assertEqualInt(st.st_ino, st2.st_ino);
assertEqualInt(st.st_dev, st2.st_dev);
assert(0 == stat("link3a", &st));
assertEqualInt(st.st_mode, (S_IFREG | 0755) & ~UMASK);
assertEqualInt(st.st_size, sizeof(data));
assertEqualInt(st.st_nlink, 2);
assert(0 == stat("link3b", &st2));
assertEqualInt(st2.st_mode, (S_IFREG | 0755) & ~UMASK);
assertEqualInt(st2.st_size, sizeof(data));
assertEqualInt(st2.st_nlink, 2);
assertEqualInt(st.st_ino, st2.st_ino);
assertEqualInt(st.st_dev, st2.st_dev);
#endif
}