Zip write support for libarchive.

The initial implementation was developed by Anselm Strauss as part of
Google Summer of Code 2008, then completed by Joerg Sonnenberger.
This commit is contained in:
Tim Kientzle 2009-12-29 06:15:32 +00:00
parent 84b1d75646
commit b1657eb07c
5 changed files with 671 additions and 0 deletions

View File

@ -87,6 +87,7 @@ SRCS= archive_check_magic.c \
archive_write_set_format_pax.c \
archive_write_set_format_shar.c \
archive_write_set_format_ustar.c \
archive_write_set_format_zip.c \
filter_fork.c
# Man pages to be installed.

View File

@ -538,6 +538,7 @@ __LA_DECL int archive_write_set_format_pax_restricted(struct archive *);
__LA_DECL int archive_write_set_format_shar(struct archive *);
__LA_DECL int archive_write_set_format_shar_dump(struct archive *);
__LA_DECL int archive_write_set_format_ustar(struct archive *);
__LA_DECL int archive_write_set_format_zip(struct archive *);
__LA_DECL int archive_write_open(struct archive *, void *,
archive_open_callback *, archive_write_callback *,
archive_close_callback *);

View File

@ -53,6 +53,7 @@ struct { int code; int (*setter)(struct archive *); } codes[] =
{ ARCHIVE_FORMAT_TAR_PAX_RESTRICTED,
archive_write_set_format_pax_restricted },
{ ARCHIVE_FORMAT_TAR_USTAR, archive_write_set_format_ustar },
{ ARCHIVE_FORMAT_ZIP, archive_write_set_format_zip },
{ 0, NULL }
};

View File

@ -57,6 +57,7 @@ struct { const char *name; int (*setter)(struct archive *); } names[] =
{ "shar", archive_write_set_format_shar },
{ "shardump", archive_write_set_format_shar_dump },
{ "ustar", archive_write_set_format_ustar },
{ "zip", archive_write_set_format_zip },
{ NULL, NULL }
};

View File

@ -0,0 +1,667 @@
/*-
* Copyright (c) 2008 Anselm Strauss
* Copyright (c) 2009 Joerg Sonnenberger
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Development supported by Google Summer of Code 2008.
*/
/*
* The current implementation is very limited:
*
* - No encryption support.
* - No ZIP64 support.
* - No support for splitting and spanning.
* - Only supports regular file and folder entries.
*
* Note that generally data in ZIP files is little-endian encoded,
* with some exceptions.
*
* TODO: Since Libarchive is generally 64bit oriented, but this implementation
* does not yet support sizes exceeding 32bit, it is highly fragile for
* big archives. This should change when ZIP64 is finally implemented, otherwise
* some serious checking has to be done.
*
*/
#include "archive_platform.h"
__FBSDID("$FreeBSD$");
#ifdef HAVE_ERRNO_H
#include <errno.h>
#endif
#include <stdio.h>
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#ifdef HAVE_ZLIB_H
#include <zlib.h>
#endif
#include "archive.h"
#include "archive_endian.h"
#include "archive_entry.h"
#include "archive_private.h"
#include "archive_write_private.h"
#ifndef HAVE_ZLIB_H
#include "archive_crc32.h"
#endif
#define ZIP_SIGNATURE_LOCAL_FILE_HEADER 0x04034b50
#define ZIP_SIGNATURE_DATA_DESCRIPTOR 0x08074b50
#define ZIP_SIGNATURE_FILE_HEADER 0x02014b50
#define ZIP_SIGNATURE_CENTRAL_DIRECTORY_END 0x06054b50
#define ZIP_SIGNATURE_EXTRA_TIMESTAMP 0x5455
#define ZIP_SIGNATURE_EXTRA_UNIX 0x7855
#define ZIP_VERSION_EXTRACT 0x0014 /* ZIP version 2.0 is needed. */
#define ZIP_VERSION_BY 0x0314 /* Made by UNIX, using ZIP version 2.0. */
#define ZIP_FLAGS 0x08 /* Flagging bit 3 (count from 0) for using data descriptor. */
enum compression {
COMPRESSION_STORE = 0
#ifdef HAVE_ZLIB_H
,
COMPRESSION_DEFLATE = 8
#endif
};
static ssize_t archive_write_zip_data(struct archive_write *, const void *buff, size_t s);
static int archive_write_zip_finish(struct archive_write *);
static int archive_write_zip_destroy(struct archive_write *);
static int archive_write_zip_finish_entry(struct archive_write *);
static int archive_write_zip_header(struct archive_write *, struct archive_entry *);
static unsigned int dos_time(const time_t);
static size_t path_length(struct archive_entry *);
static int write_path(struct archive_entry *, struct archive_write *);
struct zip_local_file_header {
char signature[4];
char version[2];
char flags[2];
char compression[2];
char timedate[4];
char crc32[4];
char compressed_size[4];
char uncompressed_size[4];
char filename_length[2];
char extra_length[2];
};
struct zip_file_header {
char signature[4];
char version_by[2];
char version_extract[2];
char flags[2];
char compression[2];
char timedate[4];
char crc32[4];
char compressed_size[4];
char uncompressed_size[4];
char filename_length[2];
char extra_length[2];
char comment_length[2];
char disk_number[2];
char attributes_internal[2];
char attributes_external[4];
char offset[4];
};
struct zip_data_descriptor {
char signature[4]; /* Not mandatory, but recommended by specification. */
char crc32[4];
char compressed_size[4];
char uncompressed_size[4];
};
struct zip_extra_data_local {
char time_id[2];
char time_size[2];
char time_flag[1];
char mtime[4];
char atime[4];
char ctime[4];
char unix_id[2];
char unix_size[2];
char unix_uid[2];
char unix_gid[2];
};
struct zip_extra_data_central {
char time_id[2];
char time_size[2];
char time_flag[1];
char mtime[4];
char unix_id[2];
char unix_size[2];
};
struct zip_file_header_link {
struct zip_file_header_link *next;
struct archive_entry *entry;
off_t offset;
unsigned long crc32;
off_t compressed_size;
enum compression compression;
};
struct zip {
struct zip_data_descriptor data_descriptor;
struct zip_file_header_link *central_directory;
struct zip_file_header_link *central_directory_end;
int64_t offset;
int64_t written_bytes;
int64_t remaining_data_bytes;
enum compression compression;
#ifdef HAVE_ZLIB_H
z_stream stream;
size_t len_buf;
unsigned char *buf;
#endif
};
struct zip_central_directory_end {
char signature[4];
char disk[2];
char start_disk[2];
char entries_disk[2];
char entries[2];
char size[4];
char offset[4];
char comment_length[2];
};
static int
archive_write_zip_options(struct archive_write *a, const char *key,
const char *value)
{
struct zip *zip = a->format_data;
if (strcmp(key, "compression") == 0) {
if (strcmp(value, "deflate") == 0) {
#ifdef HAVE_ZLIB_H
zip->compression = COMPRESSION_DEFLATE;
#else
archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
"deflate compression not supported");
return ARCHIVE_WARN;
#endif
} else if (strcmp(value, "store") == 0)
zip->compression = COMPRESSION_STORE;
else
return (ARCHIVE_WARN);
return (ARCHIVE_OK);
}
return (ARCHIVE_WARN);
}
int
archive_write_set_format_zip(struct archive *_a)
{
struct archive_write *a = (struct archive_write *)_a;
struct zip *zip;
/* If another format was already registered, unregister it. */
if (a->format_destroy != NULL)
(a->format_destroy)(a);
zip = (struct zip *) malloc(sizeof(*zip));
if (zip == NULL) {
archive_set_error(&a->archive, ENOMEM, "Can't allocate zip data");
return (ARCHIVE_FATAL);
}
zip->central_directory = NULL;
zip->central_directory_end = NULL;
zip->offset = 0;
zip->written_bytes = 0;
zip->remaining_data_bytes = 0;
#ifdef HAVE_ZLIB_H
zip->compression = COMPRESSION_DEFLATE;
zip->len_buf = 65536;
zip->buf = malloc(zip->len_buf);
if (zip->buf == NULL) {
archive_set_error(&a->archive, ENOMEM, "Can't allocate compression buffer");
return (ARCHIVE_FATAL);
}
#else
zip->compression = COMPRESSION_STORE;
#endif
a->format_data = zip;
a->pad_uncompressed = 0; /* Actually not needed for now, since no compression support yet. */
a->format_name = "zip";
a->format_options = archive_write_zip_options;
a->format_write_header = archive_write_zip_header;
a->format_write_data = archive_write_zip_data;
a->format_finish_entry = archive_write_zip_finish_entry;
a->format_finish = archive_write_zip_finish;
a->format_destroy = archive_write_zip_destroy;
a->archive.archive_format = ARCHIVE_FORMAT_ZIP;
a->archive.archive_format_name = "ZIP";
archive_le32enc(&zip->data_descriptor.signature,
ZIP_SIGNATURE_DATA_DESCRIPTOR);
return (ARCHIVE_OK);
}
static int
archive_write_zip_header(struct archive_write *a, struct archive_entry *entry)
{
struct zip *zip;
struct zip_local_file_header h;
struct zip_extra_data_local e;
struct zip_data_descriptor *d;
struct zip_file_header_link *l;
int ret;
int64_t size;
mode_t type;
/* Entries other than a regular file or a folder are skipped. */
type = archive_entry_filetype(entry);
if ((type != AE_IFREG) & (type != AE_IFDIR)) {
archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Filetype not supported");
return ARCHIVE_FAILED;
};
/* Directory entries should have a size of 0. */
if (type == AE_IFDIR)
archive_entry_set_size(entry, 0);
zip = a->format_data;
d = &zip->data_descriptor;
size = archive_entry_size(entry);
zip->remaining_data_bytes = size;
/* Append archive entry to the central directory data. */
l = (struct zip_file_header_link *) malloc(sizeof(*l));
if (l == NULL) {
archive_set_error(&a->archive, ENOMEM, "Can't allocate zip header data");
return (ARCHIVE_FATAL);
}
l->entry = archive_entry_clone(entry);
/* Initialize the CRC variable and potentially the local crc32(). */
l->crc32 = crc32(0, NULL, 0);
l->compression = zip->compression;
l->compressed_size = 0;
l->next = NULL;
if (zip->central_directory == NULL) {
zip->central_directory = l;
} else {
zip->central_directory_end->next = l;
}
zip->central_directory_end = l;
/* Store the offset of this header for later use in central directory. */
l->offset = zip->written_bytes;
memset(&h, 0, sizeof(h));
archive_le32enc(&h.signature, ZIP_SIGNATURE_LOCAL_FILE_HEADER);
archive_le16enc(&h.version, ZIP_VERSION_EXTRACT);
archive_le16enc(&h.flags, ZIP_FLAGS);
archive_le16enc(&h.compression, zip->compression);
archive_le32enc(&h.timedate, dos_time(archive_entry_mtime(entry)));
archive_le16enc(&h.filename_length, (uint16_t)path_length(entry));
switch (zip->compression) {
case COMPRESSION_STORE:
/* Setting compressed and uncompressed sizes even when specification says
* to set to zero when using data descriptors. Otherwise the end of the
* data for an entry is rather difficult to find. */
archive_le32enc(&h.compressed_size, size);
archive_le32enc(&h.uncompressed_size, size);
break;
#ifdef HAVE_ZLIB_H
case COMPRESSION_DEFLATE:
archive_le32enc(&h.uncompressed_size, size);
zip->stream.zalloc = Z_NULL;
zip->stream.zfree = Z_NULL;
zip->stream.opaque = Z_NULL;
zip->stream.next_out = zip->buf;
zip->stream.avail_out = zip->len_buf;
if (deflateInit2(&zip->stream, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
-15, 8, Z_DEFAULT_STRATEGY) != Z_OK) {
archive_set_error(&a->archive, ENOMEM, "Can't init deflate compressor");
return (ARCHIVE_FATAL);
}
break;
#endif
}
/* Formatting extra data. */
archive_le16enc(&h.extra_length, sizeof(e));
archive_le16enc(&e.time_id, ZIP_SIGNATURE_EXTRA_TIMESTAMP);
archive_le16enc(&e.time_size, sizeof(e.time_flag) +
sizeof(e.mtime) + sizeof(e.atime) + sizeof(e.ctime));
e.time_flag[0] = 0x07;
archive_le32enc(&e.mtime, archive_entry_mtime(entry));
archive_le32enc(&e.atime, archive_entry_atime(entry));
archive_le32enc(&e.ctime, archive_entry_ctime(entry));
archive_le16enc(&e.unix_id, ZIP_SIGNATURE_EXTRA_UNIX);
archive_le16enc(&e.unix_size, sizeof(e.unix_uid) + sizeof(e.unix_gid));
archive_le16enc(&e.unix_uid, archive_entry_uid(entry));
archive_le16enc(&e.unix_gid, archive_entry_gid(entry));
archive_le32enc(&d->uncompressed_size, size);
ret = (a->compressor.write)(a, &h, sizeof(h));
if (ret != ARCHIVE_OK)
return (ARCHIVE_FATAL);
zip->written_bytes += sizeof(h);
ret = write_path(entry, a);
if (ret <= ARCHIVE_OK)
return (ARCHIVE_FATAL);
zip->written_bytes += ret;
ret = (a->compressor.write)(a, &e, sizeof(e));
if (ret != ARCHIVE_OK)
return (ARCHIVE_FATAL);
zip->written_bytes += sizeof(e);
return (ARCHIVE_OK);
}
static ssize_t
archive_write_zip_data(struct archive_write *a, const void *buff, size_t s)
{
int ret;
struct zip *zip = a->format_data;
struct zip_file_header_link *l = zip->central_directory_end;
if ((int64_t)s > zip->remaining_data_bytes)
s = (size_t)zip->remaining_data_bytes;
if (s == 0) return 0;
switch (zip->compression) {
case COMPRESSION_STORE:
ret = (a->compressor.write)(a, buff, s);
if (ret != ARCHIVE_OK) return (ret);
zip->written_bytes += s;
zip->remaining_data_bytes -= s;
l->compressed_size += s;
l->crc32 = crc32(l->crc32, buff, s);
return (s);
#if HAVE_ZLIB_H
case COMPRESSION_DEFLATE:
zip->stream.next_in = (unsigned char*)(uintptr_t)buff;
zip->stream.avail_in = s;
do {
ret = deflate(&zip->stream, Z_NO_FLUSH);
if (ret == Z_STREAM_ERROR)
return (ARCHIVE_FATAL);
if (zip->stream.avail_out == 0) {
ret = (a->compressor.write)(a, zip->buf, zip->len_buf);
if (ret != ARCHIVE_OK)
return (ret);
l->compressed_size += zip->len_buf;
zip->written_bytes += zip->len_buf;
zip->stream.next_out = zip->buf;
zip->stream.avail_out = zip->len_buf;
}
} while (zip->stream.avail_in != 0);
zip->remaining_data_bytes -= s;
/* If we have it, use zlib's fast crc32() */
l->crc32 = crc32(l->crc32, buff, s);
return (s);
#endif
default:
archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
"Invalid ZIP compression type");
return ARCHIVE_FATAL;
}
}
static int
archive_write_zip_finish_entry(struct archive_write *a)
{
/* Write the data descripter after file data has been written. */
int ret;
struct zip *zip = a->format_data;
struct zip_data_descriptor *d = &zip->data_descriptor;
struct zip_file_header_link *l = zip->central_directory_end;
#if HAVE_ZLIB_H
size_t reminder;
#endif
switch(zip->compression) {
case COMPRESSION_STORE:
break;
#if HAVE_ZLIB_H
case COMPRESSION_DEFLATE:
for (;;) {
ret = deflate(&zip->stream, Z_FINISH);
if (ret == Z_STREAM_ERROR)
return (ARCHIVE_FATAL);
reminder = zip->len_buf - zip->stream.avail_out;
ret = (a->compressor.write)(a, zip->buf, reminder);
if (ret != ARCHIVE_OK)
return (ret);
l->compressed_size += reminder;
zip->written_bytes += reminder;
zip->stream.next_out = zip->buf;
if (zip->stream.avail_out != 0)
break;
zip->stream.avail_out = zip->len_buf;
}
deflateEnd(&zip->stream);
break;
#endif
}
archive_le32enc(&d->crc32, l->crc32);
archive_le32enc(&d->compressed_size, l->compressed_size);
ret = (a->compressor.write)(a, d, sizeof(*d));
if (ret != ARCHIVE_OK)
return (ARCHIVE_FATAL);
zip->written_bytes += sizeof(*d);
return (ARCHIVE_OK);
}
static int
archive_write_zip_finish(struct archive_write *a)
{
struct zip *zip;
struct zip_file_header_link *l;
struct zip_file_header h;
struct zip_central_directory_end end;
struct zip_extra_data_central e;
off_t offset_start, offset_end;
int entries;
int ret;
zip = a->format_data;
l = zip->central_directory;
/*
* Formatting central directory file header fields that are fixed for all entries.
* Fields not used (and therefor 0) are:
*
* - comment_length
* - disk_number
* - attributes_internal
*/
memset(&h, 0, sizeof(h));
archive_le32enc(&h.signature, ZIP_SIGNATURE_FILE_HEADER);
archive_le16enc(&h.version_by, ZIP_VERSION_BY);
archive_le16enc(&h.version_extract, ZIP_VERSION_EXTRACT);
archive_le16enc(&h.flags, ZIP_FLAGS);
entries = 0;
offset_start = zip->written_bytes;
/* Formatting individual header fields per entry and
* writing each entry. */
while (l != NULL) {
archive_le16enc(&h.compression, l->compression);
archive_le32enc(&h.timedate, dos_time(archive_entry_mtime(l->entry)));
archive_le32enc(&h.crc32, l->crc32);
archive_le32enc(&h.compressed_size, l->compressed_size);
archive_le32enc(&h.uncompressed_size, archive_entry_size(l->entry));
archive_le16enc(&h.filename_length, (uint16_t)path_length(l->entry));
archive_le16enc(&h.extra_length, sizeof(e));
archive_le16enc(&h.attributes_external[2], archive_entry_mode(l->entry));
archive_le32enc(&h.offset, l->offset);
/* Formatting extra data. */
archive_le16enc(&e.time_id, ZIP_SIGNATURE_EXTRA_TIMESTAMP);
archive_le16enc(&e.time_size, sizeof(e.mtime) + sizeof(e.time_flag));
e.time_flag[0] = 0x07;
archive_le32enc(&e.mtime, archive_entry_mtime(l->entry));
archive_le16enc(&e.unix_id, ZIP_SIGNATURE_EXTRA_UNIX);
archive_le16enc(&e.unix_size, 0x0000);
ret = (a->compressor.write)(a, &h, sizeof(h));
if (ret != ARCHIVE_OK)
return (ARCHIVE_FATAL);
zip->written_bytes += sizeof(h);
ret = write_path(l->entry, a);
if (ret <= ARCHIVE_OK)
return (ARCHIVE_FATAL);
zip->written_bytes += ret;
ret = (a->compressor.write)(a, &e, sizeof(e));
if (ret != ARCHIVE_OK)
return (ARCHIVE_FATAL);
zip->written_bytes += sizeof(e);
l = l->next;
entries++;
}
offset_end = zip->written_bytes;
/* Formatting end of central directory. */
memset(&end, 0, sizeof(end));
archive_le32enc(&end.signature, ZIP_SIGNATURE_CENTRAL_DIRECTORY_END);
archive_le16enc(&end.entries_disk, entries);
archive_le16enc(&end.entries, entries);
archive_le32enc(&end.size, offset_end - offset_start);
archive_le32enc(&end.offset, offset_start);
/* Writing end of central directory. */
ret = (a->compressor.write)(a, &end, sizeof(end));
if (ret != ARCHIVE_OK)
return (ARCHIVE_FATAL);
zip->written_bytes += sizeof(end);
return (ARCHIVE_OK);
}
static int
archive_write_zip_destroy(struct archive_write *a)
{
struct zip *zip;
struct zip_file_header_link *l;
zip = a->format_data;
while (zip->central_directory != NULL) {
l = zip->central_directory;
zip->central_directory = l->next;
archive_entry_free(l->entry);
free(l);
}
#ifdef HAVE_ZLIB_H
free(zip->buf);
#endif
free(zip);
a->format_data = NULL;
return (ARCHIVE_OK);
}
/* Convert into MSDOS-style date/time. */
static unsigned int
dos_time(const time_t unix_time)
{
struct tm *t;
unsigned int dt;
/* This will not preserve time when creating/extracting the archive
* on two systems with different time zones. */
t = localtime(&unix_time);
dt = 0;
dt += ((t->tm_year - 80) & 0x7f) << 9;
dt += ((t->tm_mon + 1) & 0x0f) << 5;
dt += (t->tm_mday & 0x1f);
dt <<= 16;
dt += (t->tm_hour & 0x1f) << 11;
dt += (t->tm_min & 0x3f) << 5;
dt += (t->tm_sec & 0x3e) >> 1; /* Only counting every 2 seconds. */
return dt;
}
static size_t
path_length(struct archive_entry *entry)
{
mode_t type;
const char *path;
type = archive_entry_filetype(entry);
path = archive_entry_pathname(entry);
if ((type == AE_IFDIR) & (path[strlen(path) - 1] != '/')) {
return strlen(path) + 1;
} else {
return strlen(path);
}
}
static int
write_path(struct archive_entry *entry, struct archive_write *archive)
{
int ret;
const char *path;
mode_t type;
size_t written_bytes;
path = archive_entry_pathname(entry);
type = archive_entry_filetype(entry);
written_bytes = 0;
ret = (archive->compressor.write)(archive, path, strlen(path));
if (ret != ARCHIVE_OK)
return (ARCHIVE_FATAL);
written_bytes += strlen(path);
/* Folders are recognized by a traling slash. */
if ((type == AE_IFDIR) & (path[strlen(path) - 1] != '/')) {
ret = (archive->compressor.write)(archive, "/", 1);
if (ret != ARCHIVE_OK)
return (ARCHIVE_FATAL);
written_bytes += 1;
}
return ((int)written_bytes);
}