MFC performance improvements when skipping entry bodies.
In particular, this speeds up listing contents or extracting single files from uncompressed archives read from slow, seekable media (e.g., slow disk drives). Thanks to: Benjamin Lutz for doing the heavy lifting, critical bugfix from Chris Spiegel
This commit is contained in:
parent
eeec0f0910
commit
b06f9338d2
@ -100,14 +100,18 @@ struct archive_entry;
|
||||
/* #define ARCHIVE_ERRNO_MISC */
|
||||
|
||||
/*
|
||||
* Callbacks are invoked to automatically read/write/open/close the archive.
|
||||
* You can provide your own for complex tasks (like breaking archives
|
||||
* across multiple tapes) or use standard ones built into the library.
|
||||
* Callbacks are invoked to automatically read/skip/write/open/close the
|
||||
* archive. You can provide your own for complex tasks (like breaking
|
||||
* archives across multiple tapes) or use standard ones built into the
|
||||
* library.
|
||||
*/
|
||||
|
||||
/* Returns pointer and size of next block of data from archive. */
|
||||
typedef ssize_t archive_read_callback(struct archive *, void *_client_data,
|
||||
const void **_buffer);
|
||||
/* Skips at most request bytes from archive and returns the skipped amount */
|
||||
typedef ssize_t archive_skip_callback(struct archive *, void *_client_data,
|
||||
size_t request);
|
||||
/* Returns size actually written, zero on EOF, -1 on error. */
|
||||
typedef ssize_t archive_write_callback(struct archive *, void *_client_data,
|
||||
void *_buffer, size_t _length);
|
||||
@ -187,6 +191,9 @@ int archive_read_support_format_zip(struct archive *);
|
||||
int archive_read_open(struct archive *, void *_client_data,
|
||||
archive_open_callback *, archive_read_callback *,
|
||||
archive_close_callback *);
|
||||
int archive_read_open2(struct archive *, void *_client_data,
|
||||
archive_open_callback *, archive_read_callback *,
|
||||
archive_skip_callback *, archive_close_callback *);
|
||||
|
||||
/*
|
||||
* The archive_read_open_file function is a convenience function built
|
||||
|
@ -68,6 +68,7 @@ struct archive {
|
||||
/* Callbacks to open/read/write/close archive stream. */
|
||||
archive_open_callback *client_opener;
|
||||
archive_read_callback *client_reader;
|
||||
archive_skip_callback *client_skipper;
|
||||
archive_write_callback *client_writer;
|
||||
archive_close_callback *client_closer;
|
||||
void *client_data;
|
||||
@ -132,6 +133,7 @@ struct archive {
|
||||
ssize_t (*compression_read_ahead)(struct archive *,
|
||||
const void **, size_t request);
|
||||
ssize_t (*compression_read_consume)(struct archive *, size_t);
|
||||
ssize_t (*compression_skip)(struct archive *, size_t);
|
||||
|
||||
/*
|
||||
* Format detection is mostly the same as compression
|
||||
|
@ -109,6 +109,19 @@ int
|
||||
archive_read_open(struct archive *a, void *client_data,
|
||||
archive_open_callback *client_opener, archive_read_callback *client_reader,
|
||||
archive_close_callback *client_closer)
|
||||
{
|
||||
/* Old archive_read_open() is just a thin shell around
|
||||
* archive_read_open2. */
|
||||
return archive_read_open2(a, client_data, client_opener,
|
||||
client_reader, NULL, client_closer);
|
||||
}
|
||||
|
||||
int
|
||||
archive_read_open2(struct archive *a, void *client_data,
|
||||
archive_open_callback *client_opener,
|
||||
archive_read_callback *client_reader,
|
||||
archive_skip_callback *client_skipper,
|
||||
archive_close_callback *client_closer)
|
||||
{
|
||||
const void *buffer;
|
||||
ssize_t bytes_read;
|
||||
@ -129,6 +142,7 @@ archive_read_open(struct archive *a, void *client_data,
|
||||
*/
|
||||
a->client_opener = NULL;
|
||||
a->client_reader = NULL;
|
||||
a->client_skipper = NULL;
|
||||
a->client_closer = NULL;
|
||||
a->client_data = NULL;
|
||||
|
||||
@ -167,6 +181,7 @@ archive_read_open(struct archive *a, void *client_data,
|
||||
/* Now that the client callbacks have worked, remember them. */
|
||||
a->client_opener = client_opener; /* Do we need to remember this? */
|
||||
a->client_reader = client_reader;
|
||||
a->client_skipper = client_skipper;
|
||||
a->client_closer = client_closer;
|
||||
a->client_data = client_data;
|
||||
|
||||
|
@ -45,6 +45,7 @@ struct read_fd_data {
|
||||
static int file_close(struct archive *, void *);
|
||||
static int file_open(struct archive *, void *);
|
||||
static ssize_t file_read(struct archive *, void *, const void **buff);
|
||||
static ssize_t file_skip(struct archive *, void *, size_t request);
|
||||
|
||||
int
|
||||
archive_read_open_fd(struct archive *a, int fd, size_t block_size)
|
||||
@ -64,7 +65,7 @@ archive_read_open_fd(struct archive *a, int fd, size_t block_size)
|
||||
return (ARCHIVE_FATAL);
|
||||
}
|
||||
mine->fd = fd;
|
||||
return (archive_read_open(a, mine, file_open, file_read, file_close));
|
||||
return (archive_read_open2(a, mine, file_open, file_read, file_skip, file_close));
|
||||
}
|
||||
|
||||
static int
|
||||
@ -87,10 +88,51 @@ static ssize_t
|
||||
file_read(struct archive *a, void *client_data, const void **buff)
|
||||
{
|
||||
struct read_fd_data *mine = client_data;
|
||||
ssize_t bytes_read;
|
||||
|
||||
(void)a; /* UNUSED */
|
||||
*buff = mine->buffer;
|
||||
return (read(mine->fd, mine->buffer, mine->block_size));
|
||||
bytes_read = read(mine->fd, mine->buffer, mine->block_size);
|
||||
if (bytes_read < 0) {
|
||||
archive_set_error(a, errno, "Error reading fd %d", mine->fd);
|
||||
}
|
||||
return (bytes_read);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
file_skip(struct archive *a, void *client_data, size_t request)
|
||||
{
|
||||
struct read_fd_data *mine = client_data;
|
||||
off_t old_offset, new_offset;
|
||||
|
||||
/* Reduce request to the next smallest multiple of block_size */
|
||||
request = (request / mine->block_size) * mine->block_size;
|
||||
/*
|
||||
* Hurray for lazy evaluation: if the first lseek fails, the second
|
||||
* one will not be executed.
|
||||
*/
|
||||
if (((old_offset = lseek(mine->fd, 0, SEEK_CUR)) < 0) ||
|
||||
((new_offset = lseek(mine->fd, request, SEEK_CUR)) < 0))
|
||||
{
|
||||
if (errno == ESPIPE)
|
||||
{
|
||||
/*
|
||||
* Failure to lseek() can be caused by the file
|
||||
* descriptor pointing to a pipe, socket or FIFO.
|
||||
* Return 0 here, so the compression layer will use
|
||||
* read()s instead to advance the file descriptor.
|
||||
* It's slower of course, but works as well.
|
||||
*/
|
||||
return (0);
|
||||
}
|
||||
/*
|
||||
* There's been an error other than ESPIPE. This is most
|
||||
* likely caused by a programmer error (too large request)
|
||||
* or a corrupted archive file.
|
||||
*/
|
||||
archive_set_error(a, errno, "Error seeking");
|
||||
return (-1);
|
||||
}
|
||||
return (new_offset - old_offset);
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -48,6 +48,7 @@ struct read_file_data {
|
||||
static int file_close(struct archive *, void *);
|
||||
static int file_open(struct archive *, void *);
|
||||
static ssize_t file_read(struct archive *, void *, const void **buff);
|
||||
static ssize_t file_skip(struct archive *, void *, size_t request);
|
||||
|
||||
int
|
||||
archive_read_open_file(struct archive *a, const char *filename,
|
||||
@ -73,7 +74,7 @@ archive_read_open_file(struct archive *a, const char *filename,
|
||||
mine->block_size = block_size;
|
||||
mine->buffer = NULL;
|
||||
mine->fd = -1;
|
||||
return (archive_read_open(a, mine, file_open, file_read, file_close));
|
||||
return (archive_read_open2(a, mine, file_open, file_read, file_skip, file_close));
|
||||
}
|
||||
|
||||
static int
|
||||
@ -119,7 +120,6 @@ file_read(struct archive *a, void *client_data, const void **buff)
|
||||
struct read_file_data *mine = client_data;
|
||||
ssize_t bytes_read;
|
||||
|
||||
(void)a; /* UNUSED */
|
||||
*buff = mine->buffer;
|
||||
bytes_read = read(mine->fd, mine->buffer, mine->block_size);
|
||||
if (bytes_read < 0) {
|
||||
@ -132,6 +132,51 @@ file_read(struct archive *a, void *client_data, const void **buff)
|
||||
return (bytes_read);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
file_skip(struct archive *a, void *client_data, size_t request)
|
||||
{
|
||||
struct read_file_data *mine = client_data;
|
||||
off_t old_offset, new_offset;
|
||||
|
||||
/* Reduce request to the next smallest multiple of block_size */
|
||||
request = (request / mine->block_size) * mine->block_size;
|
||||
/*
|
||||
* Hurray for lazy evaluation: if the first lseek fails, the second
|
||||
* one will not be executed.
|
||||
*/
|
||||
if (((old_offset = lseek(mine->fd, 0, SEEK_CUR)) < 0) ||
|
||||
((new_offset = lseek(mine->fd, request, SEEK_CUR)) < 0))
|
||||
{
|
||||
if (errno == ESPIPE)
|
||||
{
|
||||
/*
|
||||
* Failure to lseek() can be caused by the file
|
||||
* descriptor pointing to a pipe, socket or FIFO.
|
||||
* Return 0 here, so the compression layer will use
|
||||
* read()s instead to advance the file descriptor.
|
||||
* It's slower of course, but works as well.
|
||||
*/
|
||||
return (0);
|
||||
}
|
||||
/*
|
||||
* There's been an error other than ESPIPE. This is most
|
||||
* likely caused by a programmer error (too large request)
|
||||
* or a corrupted archive file.
|
||||
*/
|
||||
if (mine->filename[0] == '\0')
|
||||
/*
|
||||
* Should never get here, since lseek() on stdin ought
|
||||
* to return an ESPIPE error.
|
||||
*/
|
||||
archive_set_error(a, errno, "Error seeking in stdin");
|
||||
else
|
||||
archive_set_error(a, errno, "Error seeking in '%s'",
|
||||
mine->filename);
|
||||
return (-1);
|
||||
}
|
||||
return (new_offset - old_offset);
|
||||
}
|
||||
|
||||
static int
|
||||
file_close(struct archive *a, void *client_data)
|
||||
{
|
||||
|
@ -187,6 +187,7 @@ init(struct archive *a, const void *buff, size_t n)
|
||||
|
||||
a->compression_read_ahead = read_ahead;
|
||||
a->compression_read_consume = read_consume;
|
||||
a->compression_skip = NULL; /* not supported */
|
||||
a->compression_finish = finish;
|
||||
|
||||
/* Initialize compression library. */
|
||||
|
@ -190,6 +190,7 @@ init(struct archive *a, const void *buff, size_t n)
|
||||
|
||||
a->compression_read_ahead = read_ahead;
|
||||
a->compression_read_consume = read_consume;
|
||||
a->compression_skip = NULL; /* not supported */
|
||||
a->compression_finish = finish;
|
||||
|
||||
state = malloc(sizeof(*state));
|
||||
|
@ -191,6 +191,7 @@ init(struct archive *a, const void *buff, size_t n)
|
||||
|
||||
a->compression_read_ahead = read_ahead;
|
||||
a->compression_read_consume = read_consume;
|
||||
a->compression_skip = NULL; /* not supported */
|
||||
a->compression_finish = finish;
|
||||
|
||||
/*
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include "archive_platform.h"
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
@ -61,6 +62,8 @@ struct archive_decompress_none {
|
||||
*/
|
||||
#define BUFFER_SIZE 65536
|
||||
|
||||
#define minimum(a, b) (a < b ? a : b)
|
||||
|
||||
static int archive_decompressor_none_bid(const void *, size_t);
|
||||
static int archive_decompressor_none_finish(struct archive *);
|
||||
static int archive_decompressor_none_init(struct archive *,
|
||||
@ -69,6 +72,7 @@ static ssize_t archive_decompressor_none_read_ahead(struct archive *,
|
||||
const void **, size_t);
|
||||
static ssize_t archive_decompressor_none_read_consume(struct archive *,
|
||||
size_t);
|
||||
static ssize_t archive_decompressor_none_skip(struct archive *, size_t);
|
||||
|
||||
int
|
||||
archive_read_support_compression_none(struct archive *a)
|
||||
@ -123,6 +127,7 @@ archive_decompressor_none_init(struct archive *a, const void *buff, size_t n)
|
||||
a->compression_data = state;
|
||||
a->compression_read_ahead = archive_decompressor_none_read_ahead;
|
||||
a->compression_read_consume = archive_decompressor_none_read_consume;
|
||||
a->compression_skip = archive_decompressor_none_skip;
|
||||
a->compression_finish = archive_decompressor_none_finish;
|
||||
|
||||
return (ARCHIVE_OK);
|
||||
@ -251,6 +256,74 @@ archive_decompressor_none_read_consume(struct archive *a, size_t request)
|
||||
return (request);
|
||||
}
|
||||
|
||||
/*
|
||||
* Skip at most request bytes. Skipped data is marked as consumed.
|
||||
*/
|
||||
static ssize_t
|
||||
archive_decompressor_none_skip(struct archive *a, size_t request)
|
||||
{
|
||||
struct archive_decompress_none *state;
|
||||
ssize_t bytes_skipped, total_bytes_skipped = 0;
|
||||
size_t min;
|
||||
|
||||
state = a->compression_data;
|
||||
if (state->fatal)
|
||||
return (-1);
|
||||
/*
|
||||
* If there is data in the buffers already, use that first.
|
||||
*/
|
||||
if (state->avail > 0) {
|
||||
min = minimum(request, state->avail);
|
||||
bytes_skipped = archive_decompressor_none_read_consume(a, min);
|
||||
request -= bytes_skipped;
|
||||
total_bytes_skipped += bytes_skipped;
|
||||
}
|
||||
if (state->client_avail > 0) {
|
||||
min = minimum(request, state->client_avail);
|
||||
bytes_skipped = archive_decompressor_none_read_consume(a, min);
|
||||
request -= bytes_skipped;
|
||||
total_bytes_skipped += bytes_skipped;
|
||||
}
|
||||
if (request == 0)
|
||||
return (total_bytes_skipped);
|
||||
/*
|
||||
* If no client_skipper is provided, just read the old way. It is very
|
||||
* likely that after skipping, the request has not yet been fully
|
||||
* satisfied (and is still > 0). In that case, read as well.
|
||||
*/
|
||||
if (a->client_skipper != NULL) {
|
||||
bytes_skipped = (a->client_skipper)(a, a->client_data,
|
||||
request);
|
||||
if (bytes_skipped < 0) { /* error */
|
||||
state->client_total = state->client_avail = 0;
|
||||
state->client_next = state->client_buff = NULL;
|
||||
state->fatal = 1;
|
||||
return (bytes_skipped);
|
||||
}
|
||||
total_bytes_skipped += bytes_skipped;
|
||||
a->file_position += bytes_skipped;
|
||||
request -= bytes_skipped;
|
||||
state->client_next = state->client_buff;
|
||||
a->raw_position += bytes_skipped;
|
||||
state->client_avail = state->client_total = 0;
|
||||
}
|
||||
while (request > 0) {
|
||||
const void* dummy_buffer;
|
||||
ssize_t bytes_read;
|
||||
bytes_read = archive_decompressor_none_read_ahead(a,
|
||||
&dummy_buffer, request);
|
||||
if (bytes_read < 0)
|
||||
return (bytes_read);
|
||||
assert(bytes_read >= 0); /* precondition for cast below */
|
||||
min = minimum((size_t)bytes_read, request);
|
||||
bytes_read = archive_decompressor_none_read_consume(a, min);
|
||||
total_bytes_skipped += bytes_read;
|
||||
request -= bytes_read;
|
||||
}
|
||||
assert(request == 0);
|
||||
return (total_bytes_skipped);
|
||||
}
|
||||
|
||||
static int
|
||||
archive_decompressor_none_finish(struct archive *a)
|
||||
{
|
||||
|
@ -193,6 +193,7 @@ static int archive_read_format_tar_bid(struct archive *);
|
||||
static int archive_read_format_tar_cleanup(struct archive *);
|
||||
static int archive_read_format_tar_read_data(struct archive *a,
|
||||
const void **buff, size_t *size, off_t *offset);
|
||||
static int archive_read_format_tar_skip(struct archive *a);
|
||||
static int archive_read_format_tar_read_header(struct archive *,
|
||||
struct archive_entry *);
|
||||
static int checksum(struct archive *, const void *);
|
||||
@ -260,7 +261,7 @@ archive_read_support_format_tar(struct archive *a)
|
||||
archive_read_format_tar_bid,
|
||||
archive_read_format_tar_read_header,
|
||||
archive_read_format_tar_read_data,
|
||||
NULL,
|
||||
archive_read_format_tar_skip,
|
||||
archive_read_format_tar_cleanup);
|
||||
|
||||
if (r != ARCHIVE_OK)
|
||||
@ -522,6 +523,50 @@ archive_read_format_tar_read_data(struct archive *a,
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
archive_read_format_tar_skip(struct archive *a)
|
||||
{
|
||||
ssize_t bytes_skipped;
|
||||
struct tar* tar;
|
||||
struct sparse_block *p;
|
||||
int r = ARCHIVE_OK;
|
||||
const void *b; /* dummy variables */
|
||||
size_t s;
|
||||
off_t o;
|
||||
|
||||
|
||||
tar = *(a->pformat_data);
|
||||
if (a->compression_skip == NULL) {
|
||||
while (r == ARCHIVE_OK)
|
||||
r = archive_read_format_tar_read_data(a, &b, &s, &o);
|
||||
return (r);
|
||||
}
|
||||
bytes_skipped = (a->compression_skip)(a, tar->entry_bytes_remaining);
|
||||
if (bytes_skipped < 0)
|
||||
return (ARCHIVE_FATAL);
|
||||
/* same code as above in _tar_read_data() */
|
||||
tar->entry_bytes_remaining -= bytes_skipped;
|
||||
while (tar->sparse_list != NULL &&
|
||||
tar->sparse_list->remaining == 0) {
|
||||
p = tar->sparse_list;
|
||||
tar->sparse_list = p->next;
|
||||
free(p);
|
||||
if (tar->sparse_list != NULL)
|
||||
tar->entry_offset = tar->sparse_list->offset;
|
||||
}
|
||||
if (tar->sparse_list != NULL) {
|
||||
if (tar->sparse_list->remaining < bytes_skipped)
|
||||
bytes_skipped = tar->sparse_list->remaining;
|
||||
tar->sparse_list->remaining -= bytes_skipped;
|
||||
}
|
||||
tar->entry_offset += bytes_skipped;
|
||||
tar->entry_bytes_remaining -= bytes_skipped;
|
||||
/* Reuse padding code above. */
|
||||
while (r == ARCHIVE_OK)
|
||||
r = archive_read_format_tar_read_data(a, &b, &s, &o);
|
||||
return (r);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function recursively interprets all of the headers associated
|
||||
* with a single entry.
|
||||
|
Loading…
x
Reference in New Issue
Block a user