freebsd-nq/lib/libarchive/archive_read_support_format_mtree.c
Tim Kientzle 3010219939 Refactor the mtree code a bit to make the layering clearer: Each
"file" is described by multiple "lines" each possibly containing
multiple "keywords."  Incorporate some additions from Joerg Sonnenberger
to handle linked files and correctly deal with backing files on disk.
2008-03-15 07:10:24 +00:00

778 lines
19 KiB
C

/*-
* Copyright (c) 2003-2007 Tim Kientzle
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "archive_platform.h"
__FBSDID("$FreeBSD$");
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
#ifdef HAVE_ERRNO_H
#include <errno.h>
#endif
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif
#include <stddef.h>
/* #include <stdint.h> */ /* See archive_platform.h */
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#include "archive.h"
#include "archive_entry.h"
#include "archive_private.h"
#include "archive_read_private.h"
#include "archive_string.h"
#ifndef O_BINARY
#define O_BINARY 0
#endif
struct mtree_entry {
struct mtree_entry *next;
char *name;
char *option_start;
char *option_end;
char full;
char used;
};
struct mtree {
struct archive_string line;
size_t buffsize;
char *buff;
off_t offset;
int fd;
int filetype;
int archive_format;
const char *archive_format_name;
struct mtree_entry *entries;
struct mtree_entry *this_entry;
struct archive_string current_dir;
struct archive_string contents_name;
off_t cur_size, cur_offset;
};
static int cleanup(struct archive_read *);
static int mtree_bid(struct archive_read *);
static int parse_file(struct archive_read *, struct archive_entry *,
struct mtree *, struct mtree_entry *);
static void parse_escapes(char *, struct mtree_entry *);
static int parse_line(struct archive_read *, struct archive_entry *,
struct mtree *, struct mtree_entry *);
static int parse_keyword(struct archive_read *, struct mtree *,
struct archive_entry *, char *, char *);
static int read_data(struct archive_read *a,
const void **buff, size_t *size, off_t *offset);
static ssize_t readline(struct archive_read *, struct mtree *, char **, ssize_t);
static int skip(struct archive_read *a);
static int read_header(struct archive_read *,
struct archive_entry *);
static int64_t mtree_atol10(char **);
static int64_t mtree_atol8(char **);
int
archive_read_support_format_mtree(struct archive *_a)
{
struct archive_read *a = (struct archive_read *)_a;
struct mtree *mtree;
int r;
mtree = (struct mtree *)malloc(sizeof(*mtree));
if (mtree == NULL) {
archive_set_error(&a->archive, ENOMEM,
"Can't allocate mtree data");
return (ARCHIVE_FATAL);
}
memset(mtree, 0, sizeof(*mtree));
mtree->fd = -1;
r = __archive_read_register_format(a, mtree,
mtree_bid, read_header, read_data, skip, cleanup);
if (r != ARCHIVE_OK)
free(mtree);
return (ARCHIVE_OK);
}
static int
cleanup(struct archive_read *a)
{
struct mtree *mtree;
struct mtree_entry *p, *q;
mtree = (struct mtree *)(a->format->data);
p = mtree->entries;
while (p != NULL) {
q = p->next;
free(p->name);
/*
* Note: option_start, option_end are pointers into
* the block that p->name points to. So we should
* not try to free them!
*/
free(p);
p = q;
}
archive_string_free(&mtree->line);
archive_string_free(&mtree->current_dir);
archive_string_free(&mtree->contents_name);
free(mtree->buff);
free(mtree);
(a->format->data) = NULL;
return (ARCHIVE_OK);
}
static int
mtree_bid(struct archive_read *a)
{
struct mtree *mtree;
ssize_t bytes_read;
const void *h;
const char *signature = "#mtree";
const char *p;
int bid;
mtree = (struct mtree *)(a->format->data);
/* Now let's look at the actual header and see if it matches. */
bytes_read = (a->decompressor->read_ahead)(a, &h, strlen(signature));
if (bytes_read <= 0)
return (bytes_read);
p = h;
bid = 0;
while (bytes_read > 0 && *signature != '\0') {
if (*p != *signature)
return (bid = 0);
bid += 8;
p++;
signature++;
bytes_read--;
}
return (bid);
}
/*
* The extended mtree format permits multiple lines specifying
* attributes for each file. Practically speaking, that means we have
* to read the entire mtree file into memory up front.
*/
static int
read_mtree(struct archive_read *a, struct mtree *mtree)
{
ssize_t len;
char *p;
struct mtree_entry *mentry;
struct mtree_entry *last_mentry = NULL;
mtree->archive_format = ARCHIVE_FORMAT_MTREE_V1;
mtree->archive_format_name = "mtree";
for (;;) {
len = readline(a, mtree, &p, 256);
if (len == 0) {
mtree->this_entry = mtree->entries;
return (ARCHIVE_OK);
}
if (len < 0)
return (len);
/* Leading whitespace is never significant, ignore it. */
while (*p == ' ' || *p == '\t') {
++p;
--len;
}
/* Skip content lines and blank lines. */
if (*p == '#')
continue;
if (*p == '\r' || *p == '\n' || *p == '\0')
continue;
mentry = malloc(sizeof(*mentry));
if (mentry == NULL) {
archive_set_error(&a->archive, ENOMEM,
"Can't allocate memory");
return (ARCHIVE_FATAL);
}
memset(mentry, 0, sizeof(*mentry));
/* Add this entry to list. */
if (last_mentry == NULL) {
last_mentry = mtree->entries = mentry;
} else {
last_mentry->next = mentry;
}
last_mentry = mentry;
/* Copy line over onto heap. */
mentry->name = malloc(len + 1);
if (mentry->name == NULL) {
free(mentry);
archive_set_error(&a->archive, ENOMEM,
"Can't allocate memory");
return (ARCHIVE_FATAL);
}
strcpy(mentry->name, p);
mentry->option_end = mentry->name + len;
/* Find end of name. */
p = mentry->name;
while (*p != ' ' && *p != '\n' && *p != '\0')
++p;
*p++ = '\0';
parse_escapes(mentry->name, mentry);
/* Find start of options and record it. */
while (p < mentry->option_end && (*p == ' ' || *p == '\t'))
++p;
mentry->option_start = p;
/* Null terminate each separate option. */
while (++p < mentry->option_end)
if (*p == ' ' || *p == '\t' || *p == '\n')
*p = '\0';
}
}
/*
* Read in the entire mtree file into memory on the first request.
* Then use the next unused file to satisfy each header request.
*/
static int
read_header(struct archive_read *a, struct archive_entry *entry)
{
struct mtree *mtree;
char *p;
int r;
mtree = (struct mtree *)(a->format->data);
if (mtree->fd >= 0) {
close(mtree->fd);
mtree->fd = -1;
}
if (mtree->entries == NULL) {
r = read_mtree(a, mtree);
if (r != ARCHIVE_OK)
return (r);
}
a->archive.archive_format = mtree->archive_format;
a->archive.archive_format_name = mtree->archive_format_name;
for (;;) {
if (mtree->this_entry == NULL)
return (ARCHIVE_EOF);
if (strcmp(mtree->this_entry->name, "..") == 0) {
mtree->this_entry->used = 1;
if (archive_strlen(&mtree->current_dir) > 0) {
/* Roll back current path. */
p = mtree->current_dir.s
+ mtree->current_dir.length - 1;
while (p >= mtree->current_dir.s && *p != '/')
--p;
if (p >= mtree->current_dir.s)
--p;
mtree->current_dir.length
= p - mtree->current_dir.s + 1;
}
}
if (!mtree->this_entry->used) {
r = parse_file(a, entry, mtree, mtree->this_entry);
return (r);
}
mtree->this_entry = mtree->this_entry->next;
}
}
/*
* A single file can have multiple lines contribute specifications.
* Parse as many lines as necessary, then pull additional information
* from a backing file on disk as necessary.
*/
static int
parse_file(struct archive_read *a, struct archive_entry *entry,
struct mtree *mtree, struct mtree_entry *mentry)
{
struct stat st;
struct mtree_entry *mp;
int r = ARCHIVE_OK, r1;
mentry->used = 1;
/* Initialize reasonable defaults. */
mtree->filetype = AE_IFREG;
archive_entry_set_size(entry, 0);
/* Parse options from this line. */
r = parse_line(a, entry, mtree, mentry);
if (mentry->full) {
archive_entry_copy_pathname(entry, mentry->name);
/*
* "Full" entries are allowed to have multiple lines
* and those lines aren't required to be adjacent. We
* don't support multiple lines for "relative" entries
* nor do we make any attempt to merge data from
* separate "relative" and "full" entries. (Merging
* "relative" and "full" entries would require dealing
* with pathname canonicalization, which is a very
* tricky subject.)
*/
for (mp = mentry->next; mp != NULL; mp = mp->next) {
if (mp->full && !mp->used
&& strcmp(mentry->name, mp->name) == 0) {
/* Later lines override earlier ones. */
mp->used = 1;
r1 = parse_line(a, entry, mtree, mp);
if (r1 < r)
r = r1;
}
}
} else {
/*
* Relative entries require us to construct
* the full path and possibly update the
* current directory.
*/
size_t n = archive_strlen(&mtree->current_dir);
if (n > 0)
archive_strcat(&mtree->current_dir, "/");
archive_strcat(&mtree->current_dir, mentry->name);
archive_entry_copy_pathname(entry, mtree->current_dir.s);
if (archive_entry_filetype(entry) != AE_IFDIR)
mtree->current_dir.length = n;
}
/*
* Try to open and stat the file to get the real size
* and other file info. It would be nice to avoid
* this here so that getting a listing of an mtree
* wouldn't require opening every referenced contents
* file. But then we wouldn't know the actual
* contents size, so I don't see a really viable way
* around this. (Also, we may want to someday pull
* other unspecified info from the contents file on
* disk.)
*/
mtree->fd = -1;
if (archive_strlen(&mtree->contents_name) > 0) {
mtree->fd = open(mtree->contents_name.s,
O_RDONLY | O_BINARY);
if (mtree->fd < 0) {
archive_set_error(&a->archive, errno,
"Can't open content=\"%s\"",
mtree->contents_name.s);
r = ARCHIVE_WARN;
}
} else if (archive_entry_filetype(entry) == AE_IFREG) {
mtree->fd = open(archive_entry_pathname(entry),
O_RDONLY | O_BINARY);
}
/*
* If there is a contents file on disk, use that size;
* otherwise leave it as-is (it might have been set from
* the mtree size= keyword).
*/
if (mtree->fd >= 0) {
if (fstat(mtree->fd, &st) != 0) {
archive_set_error(&a->archive, errno,
"could not stat %s",
archive_entry_pathname(entry));
r = ARCHIVE_WARN;
/* If we can't stat it, don't keep it open. */
close(mtree->fd);
mtree->fd = -1;
} else if ((st.st_mode & S_IFMT) != S_IFREG) {
archive_set_error(&a->archive, errno,
"%s is not a regular file",
archive_entry_pathname(entry));
r = ARCHIVE_WARN;
/* Don't hold a non-regular file open. */
close(mtree->fd);
mtree->fd = -1;
} else {
archive_entry_set_size(entry, st.st_size);
archive_entry_set_ino(entry, st.st_ino);
archive_entry_set_dev(entry, st.st_dev);
archive_entry_set_nlink(entry, st.st_nlink);
}
}
mtree->cur_size = archive_entry_size(entry);
mtree->offset = 0;
return r;
}
/*
* Each line contains a sequence of keywords.
*/
static int
parse_line(struct archive_read *a, struct archive_entry *entry,
struct mtree *mtree, struct mtree_entry *mp)
{
char *p, *q;
int r = ARCHIVE_OK, r1;
p = mp->option_start;
while (p < mp->option_end) {
q = p + strlen(p);
r1 = parse_keyword(a, mtree, entry, p, q);
if (r1 < r)
r = r1;
p = q + 1;
}
return (r);
}
/*
* Parse a single keyword and its value.
*/
static int
parse_keyword(struct archive_read *a, struct mtree *mtree,
struct archive_entry *entry, char *key, char *end)
{
char *val;
if (end == key)
return (ARCHIVE_OK);
if (*key == '\0')
return (ARCHIVE_OK);
val = strchr(key, '=');
if (val == NULL) {
archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
"Malformed attribute \"%s\" (%d)", key, key[0]);
return (ARCHIVE_WARN);
}
*val = '\0';
++val;
switch (key[0]) {
case 'c':
if (strcmp(key, "content") == 0
|| strcmp(key, "contents") == 0) {
parse_escapes(val, NULL);
archive_strcpy(&mtree->contents_name, val);
break;
}
case 'g':
if (strcmp(key, "gid") == 0) {
archive_entry_set_gid(entry, mtree_atol10(&val));
break;
}
if (strcmp(key, "gname") == 0) {
archive_entry_copy_gname(entry, val);
break;
}
case 'l':
if (strcmp(key, "link") == 0) {
archive_entry_set_link(entry, val);
break;
}
case 'm':
if (strcmp(key, "mode") == 0) {
if (val[0] == '0') {
archive_entry_set_perm(entry,
mtree_atol8(&val));
} else
archive_set_error(&a->archive,
ARCHIVE_ERRNO_FILE_FORMAT,
"Symbolic mode \"%s\" unsupported", val);
break;
}
case 's':
if (strcmp(key, "size") == 0) {
archive_entry_set_size(entry, mtree_atol10(&val));
break;
}
case 't':
if (strcmp(key, "type") == 0) {
switch (val[0]) {
case 'b':
if (strcmp(val, "block") == 0) {
mtree->filetype = AE_IFBLK;
break;
}
case 'c':
if (strcmp(val, "char") == 0) {
mtree->filetype = AE_IFCHR;
break;
}
case 'd':
if (strcmp(val, "dir") == 0) {
mtree->filetype = AE_IFDIR;
break;
}
case 'f':
if (strcmp(val, "fifo") == 0) {
mtree->filetype = AE_IFIFO;
break;
}
if (strcmp(val, "file") == 0) {
mtree->filetype = AE_IFREG;
break;
}
case 'l':
if (strcmp(val, "link") == 0) {
mtree->filetype = AE_IFLNK;
break;
}
default:
archive_set_error(&a->archive,
ARCHIVE_ERRNO_FILE_FORMAT,
"Unrecognized file type \"%s\"", val);
return (ARCHIVE_WARN);
}
archive_entry_set_filetype(entry, mtree->filetype);
break;
}
if (strcmp(key, "time") == 0) {
archive_entry_set_mtime(entry, mtree_atol10(&val), 0);
break;
}
case 'u':
if (strcmp(key, "uid") == 0) {
archive_entry_set_uid(entry, mtree_atol10(&val));
break;
}
if (strcmp(key, "uname") == 0) {
archive_entry_copy_uname(entry, val);
break;
}
default:
archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
"Unrecognized key %s=%s", key, val);
return (ARCHIVE_WARN);
}
return (ARCHIVE_OK);
}
static int
read_data(struct archive_read *a, const void **buff, size_t *size, off_t *offset)
{
size_t bytes_to_read;
ssize_t bytes_read;
struct mtree *mtree;
mtree = (struct mtree *)(a->format->data);
if (mtree->fd < 0) {
*buff = NULL;
*offset = 0;
*size = 0;
return (ARCHIVE_EOF);
}
if (mtree->buff == NULL) {
mtree->buffsize = 64 * 1024;
mtree->buff = malloc(mtree->buffsize);
if (mtree->buff == NULL) {
archive_set_error(&a->archive, ENOMEM,
"Can't allocate memory");
}
}
*buff = mtree->buff;
*offset = mtree->offset;
if (mtree->buffsize > mtree->cur_size - mtree->offset)
bytes_to_read = mtree->cur_size - mtree->offset;
else
bytes_to_read = mtree->buffsize;
bytes_read = read(mtree->fd, mtree->buff, bytes_to_read);
if (bytes_read < 0) {
archive_set_error(&a->archive, errno, "Can't read");
return (ARCHIVE_WARN);
}
if (bytes_read == 0) {
*size = 0;
return (ARCHIVE_EOF);
}
mtree->offset += bytes_read;
*size = (size_t)bytes_read;
return (ARCHIVE_OK);
}
/* Skip does nothing except possibly close the contents file. */
static int
skip(struct archive_read *a)
{
struct mtree *mtree;
mtree = (struct mtree *)(a->format->data);
if (mtree->fd >= 0) {
close(mtree->fd);
mtree->fd = -1;
}
return (ARCHIVE_OK);
}
/*
* Since parsing octal escapes always makes strings shorter,
* we can always do this conversion in-place.
*/
static void
parse_escapes(char *src, struct mtree_entry *mentry)
{
char *dest = src;
char c;
while (*src != '\0') {
c = *src++;
if (c == '/' && mentry != NULL)
mentry->full = 1;
if (c == '\\') {
if (src[0] >= '0' && src[0] <= '3'
&& src[1] >= '0' && src[1] <= '7'
&& src[2] >= '0' && src[2] <= '7') {
c = (src[0] - '0') << 6;
c |= (src[1] - '0') << 3;
c |= (src[2] - '0');
src += 3;
}
}
*dest++ = c;
}
*dest = '\0';
}
/*
* Note that this implementation does not (and should not!) obey
* locale settings; you cannot simply substitute strtol here, since
* it does obey locale.
*/
static int64_t
mtree_atol8(char **p)
{
int64_t l, limit, last_digit_limit;
int digit, base;
base = 8;
limit = INT64_MAX / base;
last_digit_limit = INT64_MAX % base;
l = 0;
digit = **p - '0';
while (digit >= 0 && digit < base) {
if (l>limit || (l == limit && digit > last_digit_limit)) {
l = INT64_MAX; /* Truncate on overflow. */
break;
}
l = (l * base) + digit;
digit = *++(*p) - '0';
}
return (l);
}
/*
* Note that this implementation does not (and should not!) obey
* locale settings; you cannot simply substitute strtol here, since
* it does obey locale.
*/
static int64_t
mtree_atol10(char **p)
{
int64_t l, limit, last_digit_limit;
int base, digit, sign;
base = 10;
limit = INT64_MAX / base;
last_digit_limit = INT64_MAX % base;
if (**p == '-') {
sign = -1;
++(*p);
} else
sign = 1;
l = 0;
digit = **p - '0';
while (digit >= 0 && digit < base) {
if (l > limit || (l == limit && digit > last_digit_limit)) {
l = UINT64_MAX; /* Truncate on overflow. */
break;
}
l = (l * base) + digit;
digit = *++(*p) - '0';
}
return (sign < 0) ? -l : l;
}
/*
* Returns length of line (including trailing newline)
* or negative on error. 'start' argument is updated to
* point to first character of line.
*/
static ssize_t
readline(struct archive_read *a, struct mtree *mtree, char **start, ssize_t limit)
{
ssize_t bytes_read;
ssize_t total_size = 0;
const void *t;
const char *s;
void *p;
/* Accumulate line in a line buffer. */
for (;;) {
/* Read some more. */
bytes_read = (a->decompressor->read_ahead)(a, &t, 1);
if (bytes_read == 0)
return (0);
if (bytes_read < 0)
return (ARCHIVE_FATAL);
s = t; /* Start of line? */
p = memchr(t, '\n', bytes_read);
/* If we found '\n', trim the read. */
if (p != NULL) {
bytes_read = 1 + ((const char *)p) - s;
}
if (total_size + bytes_read + 1 > limit) {
archive_set_error(&a->archive,
ARCHIVE_ERRNO_FILE_FORMAT,
"Line too long");
return (ARCHIVE_FATAL);
}
if (archive_string_ensure(&mtree->line,
total_size + bytes_read + 1) == NULL) {
archive_set_error(&a->archive, ENOMEM,
"Can't allocate working buffer");
return (ARCHIVE_FATAL);
}
memcpy(mtree->line.s + total_size, t, bytes_read);
(a->decompressor->consume)(a, bytes_read);
total_size += bytes_read;
/* Null terminate. */
mtree->line.s[total_size] = '\0';
/* If we found '\n', clean up and return. */
if (p != NULL) {
*start = mtree->line.s;
return (total_size);
}
}
}