freebsd-dev/lib/libarchive/archive_read_open_fd.c
Tim Kientzle 693285bc87 Use 'skip' when ignoring data in tar archives. This dramatically
increases performance when extracting a single entry from a large
uncompressed archive, especially on slow devices such as USB hard
drives.

Requires a number of changes:
   * New archive_read_open2() supports a 'skip' client function
   * Old archive_read_open() is implemented as a wrapper now, to
     continue supporting the old API/ABI.
   * _read_open_fd and _read_open_file sprout new 'skip' functions.
   * compression layer gets a new 'skip' operation.
   * compression_none passes skip requests through to client.
   * compression_{gzip,bzip2,compress} simply ignore skip requests.

Thanks to: Benjamin Lutz, who designed and implemented the whole thing.
   I'm just committing it.  ;-)

TODO: Need to update the documentation a little bit.
2006-07-30 00:29:01 +00:00

149 lines
4.3 KiB
C

/*-
* Copyright (c) 2003-2004 Tim Kientzle
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer
* in this position and unchanged.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "archive_platform.h"
__FBSDID("$FreeBSD$");
#include <sys/stat.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "archive.h"
#include "archive_private.h"
struct read_fd_data {
int fd;
size_t block_size;
void *buffer;
};
static int file_close(struct archive *, void *);
static int file_open(struct archive *, void *);
static ssize_t file_read(struct archive *, void *, const void **buff);
static ssize_t file_skip(struct archive *, void *, size_t request);
int
archive_read_open_fd(struct archive *a, int fd, size_t block_size)
{
struct read_fd_data *mine;
mine = malloc(sizeof(*mine));
if (mine == NULL) {
archive_set_error(a, ENOMEM, "No memory");
return (ARCHIVE_FATAL);
}
mine->block_size = block_size;
mine->buffer = malloc(mine->block_size);
if (mine->buffer == NULL) {
archive_set_error(a, ENOMEM, "No memory");
free(mine);
return (ARCHIVE_FATAL);
}
mine->fd = fd;
return (archive_read_open2(a, mine, file_open, file_read, file_skip, file_close));
}
static int
file_open(struct archive *a, void *client_data)
{
struct read_fd_data *mine = client_data;
struct stat st;
if (fstat(mine->fd, &st) != 0) {
archive_set_error(a, errno, "Can't stat fd %d", mine->fd);
return (ARCHIVE_FATAL);
}
a->skip_file_dev = st.st_dev;
a->skip_file_ino = st.st_ino;
return (ARCHIVE_OK);
}
static ssize_t
file_read(struct archive *a, void *client_data, const void **buff)
{
struct read_fd_data *mine = client_data;
ssize_t bytes_read;
*buff = mine->buffer;
bytes_read = read(mine->fd, mine->buffer, mine->block_size);
if (bytes_read < 0) {
archive_set_error(a, errno, "Error reading fd %d", mine->fd);
}
return (bytes_read);
}
static ssize_t
file_skip(struct archive *a, void *client_data, size_t request)
{
struct read_fd_data *mine = client_data;
off_t old_offset, new_offset;
/* Reduce request to the next smallest multiple of block_size */
request = (request / mine->block_size) * mine->block_size;
/*
* Hurray for lazy evaluation: if the first lseek fails, the second
* one will not be executed.
*/
if (((old_offset = lseek(mine->fd, 0, SEEK_CUR)) < 0) ||
((new_offset = lseek(mine->fd, request, SEEK_CUR)) < 0))
{
if (errno == ESPIPE)
{
/*
* Failure to lseek() can be caused by the file
* descriptor pointing to a pipe, socket or FIFO.
* Return 0 here, so the compression layer will use
* read()s instead to advance the file descriptor.
* It's slower of course, but works as well.
*/
return (0);
}
/*
* There's been an error other than ESPIPE. This is most
* likely caused by a programmer error (too large request)
* or a corrupted archive file.
*/
archive_set_error(a, errno, "Error seeking");
return (-1);
}
return (new_offset - old_offset);
}
static int
file_close(struct archive *a, void *client_data)
{
struct read_fd_data *mine = client_data;
(void)a; /* UNUSED */
if (mine->buffer != NULL)
free(mine->buffer);
free(mine);
return (ARCHIVE_OK);
}