freebsd-dev/lib/libarchive/archive_read_support_format_tp.c

/*-
 * Copyright (c) 2003-2005 Tim Kientzle
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer
 *    in this position and unchanged.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "archive_platform.h"
__FBSDID("$FreeBSD$");

#include <sys/stat.h>

#include <errno.h>
/* #include <stdint.h> */ /* See archive_platform.h */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>

#include "archive.h"
#include "archive_entry.h"
#include "archive_private.h"
#include "archive_string.h"

/*
 * 'tp' was the common archiving format for Fourth Edition through
 * Sixth Edition Unix.  It was replaced by 'tar' in Seventh Edition.
 * (First through Third Edition used the 'tap' archiver.)
 *
 * The format has a 512-byte boot block, followed by a table of
 * contents listing all of the files in the archive, followed by
 * the file data.  Like 'tar', it is block-oriented; file data is
 * padded to a whole number of blocks.
 *
 * There are three different variants with slightly different TOC
 * formats:
 *    Original tp:  64-byte TOC entries with 32-byte pathnames.
 *    Ian Johnson's AGSM 'itp':  64-byte TOC entries with 48-byte pathnames
 *    'dtp' ???: 128-byte TOC entries with 114-byte pathnames.
 *
 * All variants store similar metadata: 16-bit mode, 8-bit uid/gid,
 * 24-bit size, 32-bit timestamp.  (The later 'tar' format extended
 * these fields and added link support.  The earlier 'tap' format used
 * narrower 8-bit mode and 16-bit size.)
 */

/*
 * The support code here reads the entire TOC into memory
 * up front.  The following structure is used to store
 * a single TOC record in memory.
 */
struct file_info {
	unsigned int	 offset;  /* Offset in archive. */
	unsigned int 	 size;	/* File size in bytes. */
	time_t		 mtime;	/* File last modified time. */
	mode_t		 mode;
	uid_t		 uid;
	gid_t		 gid;
	char		*name; /* Null-terminated filename. */
};

/*
 * Format-specific data.
 */
struct tp {
	int	bid; /* If non-zero, return this as our bid. */

	struct file_info **pending_files;
	int	pending_files_allocated;
	int	pending_files_used;

	uint64_t current_position;
	int64_t entry_bytes_remaining;
	int64_t entry_sparse_offset;
	int	fake_inode;
	int	fake_dev;

	/*
	 * Pointer to a function to parse the dir entry for
	 * the selected format.
	 */
	struct file_info *(*parse_file_info)(struct archive *, const void *);
	ssize_t  toc_size;
	int toc_read; /* True if we've already read the TOC. */
};

static void	add_entry(struct tp *tp, struct file_info *file);
static int	archive_read_format_tp_bid(struct archive *);
static int	archive_read_format_tp_cleanup(struct archive *);
static int	archive_read_format_tp_read_data(struct archive *,
		    const void **, size_t *, off_t *);
static int	archive_read_format_tp_read_header(struct archive *,
		    struct archive_entry *);
static struct file_info *next_entry(struct tp *);
static int	next_entry_seek(struct archive *a, struct tp *tp,
		    struct file_info **pfile);
static struct file_info *parse_file_info_tp(struct archive *, const void *);
static struct file_info *parse_file_info_itp(struct archive *, const void *);
static void	release_file(struct tp *, struct file_info *);
static int	toi(const void *p, int n);

int
archive_read_support_format_tp(struct archive *a)
{
	struct tp *tp;
	int r;

	tp = malloc(sizeof(*tp));
	if (tp == NULL) {
		archive_set_error(a, ENOMEM, "Can't allocate tp data");
		return (ARCHIVE_FATAL);
	}
	memset(tp, 0, sizeof(*tp));
	tp->bid = -1; /* We haven't yet bid. */

	r = __archive_read_register_format(a,
	    tp,
	    archive_read_format_tp_bid,
	    archive_read_format_tp_read_header,
	    archive_read_format_tp_read_data,
	    NULL,
	    archive_read_format_tp_cleanup);

	if (r != ARCHIVE_OK) {
		free(tp);
		return (r);
	}
	return (ARCHIVE_OK);
}

static int
archive_read_format_tp_bid(struct archive *a)
{
	struct tp *tp;
	ssize_t bytes_read;
	const void *h;
	const char *p;
	int toc_count;

	tp = *(a->pformat_data);

	if (tp->bid >= 0)
		return (tp->bid);

	/* Read a large initial block and inspect it to see
	 * if it looks like a tp TOC. */
	bytes_read = (a->compression_read_ahead)(a, &h, 8192);
	if (bytes_read < 1024)
		return (tp->bid = 0);

	p = (const char *)h;

	/* Skip the 512-byte boot block. */
	bytes_read -= 512;
	p += 512;

	/*
	 * Check that there is something that looks like a tp TOC
	 * entry located every 64 bytes.
	 */
	tp->parse_file_info = parse_file_info_tp;
	tp->toc_size = 64;
	toc_count = 0;
	while (bytes_read > 64 && p[0] != '\0') {
		/* Null-terminated ASCII pathname starts at beginning
		 * of block and is no more than 32 characters long for
		 * tp format, 48 for 'itp' format. */
		const char *pn = p;
		while (*pn >= 0x20 && *pn <= 0x7e && pn < p + 64) {
			/* backslash is illegal in filenames */
			if (*pn == '\\')
				return (tp->bid = 0);
			pn++;
		}
		if (pn > p + 48) /* String longer than 48 chars? */
			return (tp->bid = 0);
		/* Must be Ian Johnson's AGSM extended version. */
		if (pn > p + 32)
			tp->parse_file_info = parse_file_info_itp;
		if (*pn != '\0') /* Has non-ASCII character. */
			return (tp->bid = 0);
		/* We've checked ~1 bit for each character. */
		tp->bid += pn - p;

		/*
		 * TODO: sanity-test the mode field; the upper bits
		 * of the mode should have only one of a small number
		 * of valid file types.
		 */
		toc_count++;
		p += tp->toc_size;
	}

	/*
	 * We now know how many TOC entries we have in memory.
	 * Read the offset/size values into memory, sort, and verify
	 * that they define non-overlapping blocks in the archive.
	 */
	{
		struct block_info { uint64_t offset; uint64_t size; } *blocks;
		struct block_info t;
		int i, not_sorted;

		blocks = malloc(sizeof(*blocks) * toc_count);
		memset(blocks, 0, sizeof(*blocks) * toc_count);
		p = (const char *)h;
		p += 512;
		for (i = 0; i < toc_count; i++) {
			/* TODO: If this is itp, use different offsets. */
			blocks[i].size = toi(p + 37, 3);
			blocks[i].offset = toi(p + 44, 2) * 512;
			p += 64;
			/* TODO: If this is dtp, use different offsets and stride. */
		}

		/*
		 * Sort blocks by offset, just in case the entries
		 * aren't already in sorted order.  Because we expect
		 * the entries to already be sorted, a bubble sort is
		 * actually appropriate: it's O(n) on already-sorted
		 * data, compared to O(n log n) for quicksort or merge
		 * sort and O(n^2) for insertion sort.
		 */
		do {
			not_sorted = 0;
			for (i = 0; i < toc_count - 1; i++) {
				if (blocks[i].offset > blocks[i + 1].offset) {
					t = blocks[i];
					blocks[i] = blocks[i + 1];
					blocks[i + 1] = t;
					not_sorted = 1;
				}
			}
		} while (not_sorted);

		/* Check that blocks don't overlap. */
		for (i = 0; i < toc_count - 1; i++) {
			if (blocks[i].offset + blocks[i].size
			    > blocks[i + 1].offset)
			{
				free(blocks);
				return (tp->bid = 0);
			}
		}
	}

	return (tp->bid);
}

static int
archive_read_format_tp_read_header(struct archive *a,
    struct archive_entry *entry)
{
	struct stat st;
	struct tp *tp;
	struct file_info *file;
	const void *v;
	const char *p;
	ssize_t bytes_read;
	int r;

	tp = *(a->pformat_data);

	/* Read the entire TOC first. */
	if (!tp->toc_read) {
		/* Skip the initial block. */
		bytes_read = (a->compression_read_ahead)(a, &v, 512);
		if (bytes_read < 512)
			return (ARCHIVE_FATAL);
		bytes_read = 512;
		tp->current_position += bytes_read;
		(a->compression_read_consume)(a, bytes_read);

		/* Consume TOC entries. */
		do {
			bytes_read = (a->compression_read_ahead)(a,
			    &v, tp->toc_size);
			if (bytes_read < tp->toc_size)
				return (ARCHIVE_FATAL);
			bytes_read = tp->toc_size;
			tp->current_position += bytes_read;
			(a->compression_read_consume)(a, bytes_read);
			p = (const char *)v;
			file = (*tp->parse_file_info)(a, p);
			if (file != NULL)
				add_entry(tp, file);
			else if (p[0] != '\0')
				/* NULL is okay if this is the sentinel. */
				return (ARCHIVE_FATAL);
		} while (p[0] != '\0');

		tp->toc_read = 1;
	}

	/* Get the next entry that appears after the current offset. */
	r = next_entry_seek(a, tp, &file);
	if (r != ARCHIVE_OK)
		return (r);

	tp->entry_bytes_remaining = file->size;
	tp->entry_sparse_offset = 0; /* Offset for sparse-file-aware clients */

	/* Set up the entry structure with information about this entry. */
	memset(&st, 0, sizeof(st));
	st.st_mode = file->mode;
	st.st_uid = file->uid;
	st.st_gid = file->gid;
	st.st_nlink = 1;
	if (++tp->fake_inode > 0xfff0) {
		tp->fake_inode = 1;
		tp->fake_dev++;
	}
	st.st_ino = tp->fake_inode;
	st.st_dev = tp->fake_dev;
	st.st_mtime = file->mtime;
	st.st_ctime = file->mtime;
	st.st_atime = file->mtime;
	st.st_size = tp->entry_bytes_remaining;
	archive_entry_copy_stat(entry, &st);
	archive_entry_set_pathname(entry, file->name);

	release_file(tp, file);
	return (ARCHIVE_OK);
}

static int
archive_read_format_tp_read_data(struct archive *a,
    const void **buff, size_t *size, off_t *offset)
{
	ssize_t bytes_read;
	struct tp *tp;

	tp = *(a->pformat_data);
	if (tp->entry_bytes_remaining <= 0) {
		*buff = NULL;
		*size = 0;
		*offset = tp->entry_sparse_offset;
		return (ARCHIVE_EOF);
	}

	bytes_read = (a->compression_read_ahead)(a, buff, 1);
	if (bytes_read == 0)
		archive_set_error(a, ARCHIVE_ERRNO_MISC,
		    "Truncated input file");
	if (bytes_read <= 0)
		return (ARCHIVE_FATAL);
	if (bytes_read > tp->entry_bytes_remaining)
		bytes_read = tp->entry_bytes_remaining;
	*size = bytes_read;
	*offset = tp->entry_sparse_offset;
	tp->entry_sparse_offset += bytes_read;
	tp->entry_bytes_remaining -= bytes_read;
	tp->current_position += bytes_read;
	(a->compression_read_consume)(a, bytes_read);
	return (ARCHIVE_OK);
}

static int
archive_read_format_tp_cleanup(struct archive *a)
{
	struct tp *tp;
	struct file_info *file;

	tp = *(a->pformat_data);
	while ((file = next_entry(tp)) != NULL)
		release_file(tp, file);
	free(tp);
	*(a->pformat_data) = NULL;
	return (ARCHIVE_OK);
}

/*
 * This routine parses a single directory record.
 */
static struct file_info *
parse_file_info_tp(struct archive *a, const void *dir_p)
{
	struct file_info *file;
	const struct tpdir {
		char name[32];
		char mode[2];
		char uid[1];
		char gid[1];
		char unused[1];
		char size[3];
		char modtime[4];
		char tapeaddr[2];
		char unused2[16];
		char checksum[2];
	} *p = dir_p;

	(void)a; /* UNUSED */

	/* Create a new file entry and copy data from the dir record. */
	file = malloc(sizeof(*file));
	if (file == NULL) {
		archive_set_error(a, ENOMEM, "Can't allocate TOC record");
		return (NULL);
	}
	memset(file, 0, sizeof(*file));

	file->name = malloc(sizeof(p->name) + 1);
	if (file->name == NULL) {
		archive_set_error(a, ENOMEM, "Can't allocate TOC name");
		free(file);
		return (NULL);
	}
	memcpy(file->name, p->name, sizeof(p->name));
	file->name[sizeof(p->name)] = '\0';
	/* If name wasn't null-terminated, then it's not valid. */
	if (strlen(file->name) == sizeof(p->name) || strlen(file->name) == 0) {
		archive_set_error(a, ENOMEM, "Damaged tp archive; invalid TOC");
		free(file->name);
		free(file);
		return (NULL);
	}
	file->offset = toi(p->tapeaddr, sizeof(p->tapeaddr)) * 512;
	file->size = toi(p->size, sizeof(p->size));
	file->mtime = toi(p->modtime, sizeof(p->modtime));
	file->mode = toi(p->mode, sizeof(p->mode));
	file->uid = toi(p->uid, sizeof(p->uid));
	file->gid = toi(p->gid, sizeof(p->gid));
	return (file);
}


/*
 * Ian Johnson's extended tp for AGSM eliminated the 16 pad bytes and
 * extnded the name field, allowing for 48 byte names.
 */
static struct file_info *
parse_file_info_itp(struct archive *a, const void *dir_p)
{
	struct file_info *file;
	const struct itpdir {
		char name[48];
		char mode[2];
		char uid[1];
		char gid[1];
		char unused[1];
		char size[3];
		char modtime[4];
		char tapeaddr[2];
		char checksum[2];
	} *p = dir_p;

	(void)a; /* UNUSED */

	/* Create a new file entry and copy data from the dir record. */
	file = malloc(sizeof(*file));
	if (file == NULL) {
		archive_set_error(a, ENOMEM, "Can't allocate TOC record");
		return (NULL);
	}
	memset(file, 0, sizeof(*file));

	file->name = malloc(sizeof(p->name) + 1);
	if (file->name == NULL) {
		archive_set_error(a, ENOMEM, "Can't allocate TOC name");
		free(file);
		return (NULL);
	}
	memcpy(file->name, p->name, sizeof(p->name));
	file->name[sizeof(p->name)] = '\0';
	/* If name wasn't null-terminated, then it's not valid. */
	if (strlen(file->name) == sizeof(p->name) || strlen(file->name) == 0) {
		archive_set_error(a, ENOMEM, "Damaged tp archive; invalid TOC");
		free(file->name);
		free(file);
		return (NULL);
	}
	file->offset = toi(p->tapeaddr, sizeof(p->tapeaddr)) * 512;
	file->size = toi(p->size, sizeof(p->size));
	file->mtime = toi(p->modtime, sizeof(p->modtime));
	file->mode = toi(p->mode, sizeof(p->mode));
	file->uid = toi(p->uid, sizeof(p->uid));
	file->gid = toi(p->gid, sizeof(p->gid));
	return (file);
}

static void
add_entry(struct tp *tp, struct file_info *file)
{
	/* Expand our pending files list as necessary. */
	if (tp->pending_files_used >= tp->pending_files_allocated) {
		struct file_info **new_pending_files;
		int new_size = tp->pending_files_allocated * 2;

		if (new_size < 1024)
			new_size = 1024;
		new_pending_files = malloc(new_size * sizeof(new_pending_files[0]));
		if (new_pending_files == NULL)
			__archive_errx(1, "Out of memory");
		memcpy(new_pending_files, tp->pending_files,
		    tp->pending_files_allocated * sizeof(new_pending_files[0]));
		if (tp->pending_files != NULL)
			free(tp->pending_files);
		tp->pending_files = new_pending_files;
		tp->pending_files_allocated = new_size;
	}

	tp->pending_files[tp->pending_files_used++] = file;
}

static void
release_file(struct tp *tp, struct file_info *file)
{
	(void)tp; /* UNUSED */
	if (file->name)
		free(file->name);
	free(file);
}

static int
next_entry_seek(struct archive *a, struct tp *tp,
    struct file_info **pfile)
{
	struct file_info *file;
	uint64_t offset;

	*pfile = NULL;
	for (;;) {
		*pfile = file = next_entry(tp);
		if (file == NULL)
			return (ARCHIVE_EOF);
		offset = file->offset;

		/* Seek forward to the start of the entry. */
		while (tp->current_position < offset) {
			ssize_t step = offset - tp->current_position;
			ssize_t bytes_read;
			const void *buff;

			if (step > 512)
				step = 512;
			bytes_read = (a->compression_read_ahead)(a, &buff, step);
			if (bytes_read <= 0) {
				release_file(tp, file);
				return (ARCHIVE_FATAL);
			}
			if (bytes_read > step)
				bytes_read = step;
			tp->current_position += bytes_read;
			(a->compression_read_consume)(a, bytes_read);
		}

		/* We found body of file; handle it now. */
		if (offset == file->offset)
			return (ARCHIVE_OK);
	}
}

static struct file_info *
next_entry(struct tp *tp)
{
	int least_index;
	uint64_t least_offset;
	int i;
	struct file_info *r;

	if (tp->pending_files_used < 1)
		return (NULL);

	/* Assume the first file in the list is the earliest on disk. */
	least_index = 0;
	least_offset = tp->pending_files[0]->offset;

	/* Now, try to find an earlier one. */
	for(i = 0; i < tp->pending_files_used; i++) {
		uint64_t offset = tp->pending_files[i]->offset;
		if (least_offset > offset) {
			least_index = i;
			least_offset = offset;
		}
	}
	r = tp->pending_files[least_index];
	tp->pending_files[least_index]
	    = tp->pending_files[--tp->pending_files_used];
	return (r);
}

/*
 * 'tp' format was developed for PDP-11, so it uses the screwy PDP-11
 * byte order, which is big-endian words, little-endian bytes within a
 * word.  In particular, the 32-bit value 0x44332211 gets stored as
 * four bytes: 0x33 0x44 0x11 0x22
 */
static int
toi(const void *p, int n)
{
	const unsigned char *v = (const unsigned char *)p;
	switch(n) {
	case 1: return (v[0]);
	case 2: return (v[0] + v[1] * 0x100);
	case 3: return (v[0] * 0x10000 + toi(v + 1, 2));
	case 4: return (toi(v, 2) * 0x10000 + toi(v + 2, 2));
	default: return (0);
	}
}