Add support for "tp" format. tp was the standard system

archiver for Fourth Edition through Sixth Edition Unix; it was
replaced by tar in Seventh Edition.  (First Edition through
Third Edition used "tap.")

Unfortunately, tp was not so very standard; there were a
few different variants.  The code here attempts to support
what I believe were the most common variants.

tp support is not yet enabled by archive_read_support_format_all(),
as I'm not yet entirely comfortable with the detection
heuristics.  People interested in experimenting can
add archive_read_support_format_tp() just after any calls
to archive_read_support_format_all() in bsdtar to see how
well this works.

TODO: tp format is roughly similar in structure to dump/restore
   archive formats used by many systems.  It should be possible
   to generalize this code to handle many dump/restore variants.
   Format detection heuristics are going to be rough, though.

Thanks to: Warren Toomey, whose very basic tp extraction programs
   and documentation made this possible.
This commit is contained in:
Tim Kientzle 2006-01-17 03:40:42 +00:00
parent afb9481259
commit bbf3318c61
4 changed files with 631 additions and 1 deletions

View File

@ -25,7 +25,7 @@ ARCHIVE_API_MAJOR= 1
# Note: Do NOT reset this to zero after bumping ARCHIVE_API_MAJOR!
ARCHIVE_API_MINOR= 2
# Bumped often. ;-)
ARCHIVE_API_REVISION= 36
ARCHIVE_API_REVISION= 37
# Full libarchive version combines the above three numbers.
VERSION= ${ARCHIVE_API_MAJOR}.${ARCHIVE_API_MINOR}.${ARCHIVE_API_REVISION}
@ -68,6 +68,7 @@ BASE_SRCS= archive_check_magic.c \
archive_read_support_format_cpio.c \
archive_read_support_format_iso9660.c \
archive_read_support_format_tar.c \
archive_read_support_format_tp.c \
archive_read_support_format_zip.c \
archive_string.c \
archive_string_sprintf.c \

View File

@ -180,6 +180,7 @@ int archive_read_support_format_cpio(struct archive *);
int archive_read_support_format_gnutar(struct archive *);
int archive_read_support_format_iso9660(struct archive *);
int archive_read_support_format_tar(struct archive *);
int archive_read_support_format_tp(struct archive *);
int archive_read_support_format_zip(struct archive *);

View File

@ -0,0 +1,618 @@
/*-
* Copyright (c) 2003-2005 Tim Kientzle
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer
* in this position and unchanged.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "archive_platform.h"
__FBSDID("$FreeBSD$");
#include <sys/stat.h>
#include <errno.h>
/* #include <stdint.h> */ /* See archive_platform.h */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include "archive.h"
#include "archive_entry.h"
#include "archive_private.h"
#include "archive_string.h"
/*
* 'tp' was the common archiving format for Fourth Edition through
* Sixth Edition Unix. It was replaced by 'tar' in Seventh Edition.
* (First through Third Edition used the 'tap' archiver.)
*
* The format has a 512-byte boot block, followed by a table of
* contents listing all of the files in the archive, followed by
* the file data. Like 'tar', it is block-oriented; file data is
* padded to a whole number of blocks.
*
* There are three different variants with slightly different TOC
* formats:
* Original tp: 64-byte TOC entries with 32-byte pathnames.
* Ian Johnson's AGSM 'itp': 64-byte TOC entries with 48-byte pathnames
* 'dtp' ???: 128-byte TOC entries with 114-byte pathnames.
*
* All variants store similar metadata: 16-bit mode, 8-bit uid/gid,
* 24-bit size, 32-bit timestamp. (The later 'tar' format extended
* these fields and added link support. The earlier 'tap' format used
* narrower 8-bit mode and 16-bit size.)
*/
/*
* The support code here reads the entire TOC into memory
* up front. The following structure is used to store
* a single TOC record in memory.
*/
struct file_info {
unsigned int offset; /* Offset in archive. */
unsigned int size; /* File size in bytes. */
time_t mtime; /* File last modified time. */
mode_t mode;
uid_t uid;
gid_t gid;
char *name; /* Null-terminated filename. */
};
/*
* Format-specific data.
*/
struct tp {
int bid; /* If non-zero, return this as our bid. */
struct file_info **pending_files;
int pending_files_allocated;
int pending_files_used;
uint64_t current_position;
int64_t entry_bytes_remaining;
int64_t entry_sparse_offset;
int fake_inode;
int fake_dev;
/*
* Pointer to a function to parse the dir entry for
* the selected format.
*/
struct file_info *(*parse_file_info)(struct archive *, const void *);
ssize_t toc_size;
int toc_read; /* True if we've already read the TOC. */
};
static void add_entry(struct tp *tp, struct file_info *file);
static int archive_read_format_tp_bid(struct archive *);
static int archive_read_format_tp_cleanup(struct archive *);
static int archive_read_format_tp_read_data(struct archive *,
const void **, size_t *, off_t *);
static int archive_read_format_tp_read_header(struct archive *,
struct archive_entry *);
static struct file_info *next_entry(struct tp *);
static int next_entry_seek(struct archive *a, struct tp *tp,
struct file_info **pfile);
static struct file_info *parse_file_info_tp(struct archive *, const void *);
static struct file_info *parse_file_info_itp(struct archive *, const void *);
static void release_file(struct tp *, struct file_info *);
static int toi(const void *p, int n);
int
archive_read_support_format_tp(struct archive *a)
{
struct tp *tp;
int r;
tp = malloc(sizeof(*tp));
if (tp == NULL) {
archive_set_error(a, ENOMEM, "Can't allocate tp data");
return (ARCHIVE_FATAL);
}
memset(tp, 0, sizeof(*tp));
tp->bid = -1; /* We haven't yet bid. */
r = __archive_read_register_format(a,
tp,
archive_read_format_tp_bid,
archive_read_format_tp_read_header,
archive_read_format_tp_read_data,
NULL,
archive_read_format_tp_cleanup);
if (r != ARCHIVE_OK) {
free(tp);
return (r);
}
return (ARCHIVE_OK);
}
static int
archive_read_format_tp_bid(struct archive *a)
{
struct tp *tp;
ssize_t bytes_read;
const void *h;
const char *p;
int toc_count;
tp = *(a->pformat_data);
if (tp->bid >= 0)
return (tp->bid);
/* Read a large initial block and inspect it to see
* if it looks like a tp TOC. */
bytes_read = (a->compression_read_ahead)(a, &h, 8192);
if (bytes_read < 1024)
return (tp->bid = 0);
p = (const char *)h;
/* Skip the 512-byte boot block. */
bytes_read -= 512;
p += 512;
/*
* Check that there is something that looks like a tp TOC
* entry located every 64 bytes.
*/
tp->parse_file_info = parse_file_info_tp;
tp->toc_size = 64;
toc_count = 0;
while (bytes_read > 64 && p[0] != '\0') {
/* Null-terminated ASCII pathname starts at beginning
* of block and is no more than 32 characters long for
* tp format, 48 for 'itp' format. */
const char *pn = p;
while (*pn >= 0x20 && *pn <= 0x7e && pn < p + 64) {
/* backslash is illegal in filenames */
if (*pn == '\\')
return (tp->bid = 0);
pn++;
}
if (pn > p + 48) /* String longer than 48 chars? */
return (tp->bid = 0);
/* Must be Ian Johnson's AGSM extended version. */
if (pn > p + 32)
tp->parse_file_info = parse_file_info_itp;
if (*pn != '\0') /* Has non-ASCII character. */
return (tp->bid = 0);
/* We've checked ~1 bit for each character. */
tp->bid += pn - p;
/*
* TODO: sanity-test the mode field; the upper bits
* of the mode should have only one of a small number
* of valid file types.
*/
toc_count++;
p += tp->toc_size;
}
/*
* We now know how many TOC entries we have in memory.
* Read the offset/size values into memory, sort, and verify
* that they define non-overlapping blocks in the archive.
*/
{
struct block_info { uint64_t offset; uint64_t size; } *blocks;
struct block_info t;
int i, not_sorted;
blocks = malloc(sizeof(*blocks) * toc_count);
memset(blocks, 0, sizeof(*blocks) * toc_count);
p = (const char *)h;
p += 512;
for (i = 0; i < toc_count; i++) {
/* TODO: If this is itp, use different offsets. */
blocks[i].size = toi(p + 37, 3);
blocks[i].offset = toi(p + 44, 2) * 512;
p += 64;
/* TODO: If this is dtp, use different offsets and stride. */
}
/*
* Sort blocks by offset, just in case the entries
* aren't already in sorted order. Because we expect
* the entries to already be sorted, a bubble sort is
* actually appropriate: it's O(n) on already-sorted
* data, compared to O(n log n) for quicksort or merge
* sort and O(n^2) for insertion sort.
*/
do {
not_sorted = 0;
for (i = 0; i < toc_count - 1; i++) {
if (blocks[i].offset > blocks[i + 1].offset) {
t = blocks[i];
blocks[i] = blocks[i + 1];
blocks[i + 1] = t;
not_sorted = 1;
}
}
} while (not_sorted);
/* Check that blocks don't overlap. */
for (i = 0; i < toc_count - 1; i++) {
if (blocks[i].offset + blocks[i].size
> blocks[i + 1].offset)
{
free(blocks);
return (tp->bid = 0);
}
}
}
return (tp->bid);
}
static int
archive_read_format_tp_read_header(struct archive *a,
struct archive_entry *entry)
{
struct stat st;
struct tp *tp;
struct file_info *file;
const char *p;
ssize_t bytes_read;
int r;
tp = *(a->pformat_data);
/* Read the entire TOC first. */
if (!tp->toc_read) {
/* Skip the initial block. */
bytes_read = (a->compression_read_ahead)(a,
(const void **)&p, 512);
if (bytes_read < 512)
return (ARCHIVE_FATAL);
bytes_read = 512;
tp->current_position += bytes_read;
(a->compression_read_consume)(a, bytes_read);
/* Consume TOC entries. */
do {
bytes_read = (a->compression_read_ahead)(a,
(const void **)&p, tp->toc_size);
if (bytes_read < tp->toc_size)
return (ARCHIVE_FATAL);
bytes_read = tp->toc_size;
tp->current_position += bytes_read;
(a->compression_read_consume)(a, bytes_read);
file = (*tp->parse_file_info)(a, p);
if (file != NULL)
add_entry(tp, file);
else if (p[0] != '\0')
/* NULL is okay if this is the sentinel. */
return (ARCHIVE_FATAL);
} while (p[0] != '\0');
tp->toc_read = 1;
}
/* Get the next entry that appears after the current offset. */
r = next_entry_seek(a, tp, &file);
if (r != ARCHIVE_OK)
return (r);
tp->entry_bytes_remaining = file->size;
tp->entry_sparse_offset = 0; /* Offset for sparse-file-aware clients */
/* Set up the entry structure with information about this entry. */
memset(&st, 0, sizeof(st));
st.st_mode = file->mode;
st.st_uid = file->uid;
st.st_gid = file->gid;
st.st_nlink = 1;
if (++tp->fake_inode > 0xfff0) {
tp->fake_inode = 1;
tp->fake_dev++;
}
st.st_ino = tp->fake_inode;
st.st_dev = tp->fake_dev;
st.st_mtime = file->mtime;
st.st_ctime = file->mtime;
st.st_atime = file->mtime;
st.st_size = tp->entry_bytes_remaining;
archive_entry_copy_stat(entry, &st);
archive_entry_set_pathname(entry, file->name);
release_file(tp, file);
return (ARCHIVE_OK);
}
static int
archive_read_format_tp_read_data(struct archive *a,
const void **buff, size_t *size, off_t *offset)
{
ssize_t bytes_read;
struct tp *tp;
tp = *(a->pformat_data);
if (tp->entry_bytes_remaining <= 0) {
*buff = NULL;
*size = 0;
*offset = tp->entry_sparse_offset;
return (ARCHIVE_EOF);
}
bytes_read = (a->compression_read_ahead)(a, buff, 1);
if (bytes_read == 0)
archive_set_error(a, ARCHIVE_ERRNO_MISC,
"Truncated input file");
if (bytes_read <= 0)
return (ARCHIVE_FATAL);
if (bytes_read > tp->entry_bytes_remaining)
bytes_read = tp->entry_bytes_remaining;
*size = bytes_read;
*offset = tp->entry_sparse_offset;
tp->entry_sparse_offset += bytes_read;
tp->entry_bytes_remaining -= bytes_read;
tp->current_position += bytes_read;
(a->compression_read_consume)(a, bytes_read);
return (ARCHIVE_OK);
}
static int
archive_read_format_tp_cleanup(struct archive *a)
{
struct tp *tp;
struct file_info *file;
tp = *(a->pformat_data);
while ((file = next_entry(tp)) != NULL)
release_file(tp, file);
free(tp);
*(a->pformat_data) = NULL;
return (ARCHIVE_OK);
}
/*
* This routine parses a single directory record.
*/
static struct file_info *
parse_file_info_tp(struct archive *a, const void *dir_p)
{
struct file_info *file;
const struct tpdir {
char name[32];
char mode[2];
char uid[1];
char gid[1];
char unused[1];
char size[3];
char modtime[4];
char tapeaddr[2];
char unused2[16];
char checksum[2];
} *p = dir_p;
(void)a; /* UNUSED */
/* Create a new file entry and copy data from the dir record. */
file = malloc(sizeof(*file));
if (file == NULL) {
archive_set_error(a, ENOMEM, "Can't allocate TOC record");
return (NULL);
}
memset(file, 0, sizeof(*file));
file->name = malloc(sizeof(p->name) + 1);
if (file->name == NULL) {
archive_set_error(a, ENOMEM, "Can't allocate TOC name");
free(file);
return (NULL);
}
memcpy(file->name, p->name, sizeof(p->name));
file->name[sizeof(p->name)] = '\0';
/* If name wasn't null-terminated, then it's not valid. */
if (strlen(file->name) == sizeof(p->name) || strlen(file->name) == 0) {
archive_set_error(a, ENOMEM, "Damaged tp archive; invalid TOC");
free(file->name);
free(file);
return (NULL);
}
file->offset = toi(p->tapeaddr, sizeof(p->tapeaddr)) * 512;
file->size = toi(p->size, sizeof(p->size));
file->mtime = toi(p->modtime, sizeof(p->modtime));
file->mode = toi(p->mode, sizeof(p->mode));
file->uid = toi(p->uid, sizeof(p->uid));
file->gid = toi(p->gid, sizeof(p->gid));
return (file);
}
/*
* Ian Johnson's extended tp for AGSM eliminated the 16 pad bytes and
* extnded the name field, allowing for 48 byte names.
*/
static struct file_info *
parse_file_info_itp(struct archive *a, const void *dir_p)
{
struct file_info *file;
const struct itpdir {
char name[48];
char mode[2];
char uid[1];
char gid[1];
char unused[1];
char size[3];
char modtime[4];
char tapeaddr[2];
char checksum[2];
} *p = dir_p;
(void)a; /* UNUSED */
/* Create a new file entry and copy data from the dir record. */
file = malloc(sizeof(*file));
if (file == NULL) {
archive_set_error(a, ENOMEM, "Can't allocate TOC record");
return (NULL);
}
memset(file, 0, sizeof(*file));
file->name = malloc(sizeof(p->name) + 1);
if (file->name == NULL) {
archive_set_error(a, ENOMEM, "Can't allocate TOC name");
free(file);
return (NULL);
}
memcpy(file->name, p->name, sizeof(p->name));
file->name[sizeof(p->name)] = '\0';
/* If name wasn't null-terminated, then it's not valid. */
if (strlen(file->name) == sizeof(p->name) || strlen(file->name) == 0) {
archive_set_error(a, ENOMEM, "Damaged tp archive; invalid TOC");
free(file->name);
free(file);
return (NULL);
}
file->offset = toi(p->tapeaddr, sizeof(p->tapeaddr)) * 512;
file->size = toi(p->size, sizeof(p->size));
file->mtime = toi(p->modtime, sizeof(p->modtime));
file->mode = toi(p->mode, sizeof(p->mode));
file->uid = toi(p->uid, sizeof(p->uid));
file->gid = toi(p->gid, sizeof(p->gid));
return (file);
}
static void
add_entry(struct tp *tp, struct file_info *file)
{
/* Expand our pending files list as necessary. */
if (tp->pending_files_used >= tp->pending_files_allocated) {
struct file_info **new_pending_files;
int new_size = tp->pending_files_allocated * 2;
if (new_size < 1024)
new_size = 1024;
new_pending_files = malloc(new_size * sizeof(new_pending_files[0]));
if (new_pending_files == NULL)
__archive_errx(1, "Out of memory");
memcpy(new_pending_files, tp->pending_files,
tp->pending_files_allocated * sizeof(new_pending_files[0]));
if (tp->pending_files != NULL)
free(tp->pending_files);
tp->pending_files = new_pending_files;
tp->pending_files_allocated = new_size;
}
tp->pending_files[tp->pending_files_used++] = file;
}
static void
release_file(struct tp *tp, struct file_info *file)
{
(void)tp; /* UNUSED */
if (file->name)
free(file->name);
free(file);
}
static int
next_entry_seek(struct archive *a, struct tp *tp,
struct file_info **pfile)
{
struct file_info *file;
uint64_t offset;
*pfile = NULL;
for (;;) {
*pfile = file = next_entry(tp);
if (file == NULL)
return (ARCHIVE_EOF);
offset = file->offset;
/* Seek forward to the start of the entry. */
while (tp->current_position < offset) {
ssize_t step = offset - tp->current_position;
ssize_t bytes_read;
const void *buff;
if (step > 512)
step = 512;
bytes_read = (a->compression_read_ahead)(a, &buff, step);
if (bytes_read <= 0) {
release_file(tp, file);
return (ARCHIVE_FATAL);
}
if (bytes_read > step)
bytes_read = step;
tp->current_position += bytes_read;
(a->compression_read_consume)(a, bytes_read);
}
/* We found body of file; handle it now. */
if (offset == file->offset)
return (ARCHIVE_OK);
}
}
static struct file_info *
next_entry(struct tp *tp)
{
int least_index;
uint64_t least_offset;
int i;
struct file_info *r;
if (tp->pending_files_used < 1)
return (NULL);
/* Assume the first file in the list is the earliest on disk. */
least_index = 0;
least_offset = tp->pending_files[0]->offset;
/* Now, try to find an earlier one. */
for(i = 0; i < tp->pending_files_used; i++) {
uint64_t offset = tp->pending_files[i]->offset;
if (least_offset > offset) {
least_index = i;
least_offset = offset;
}
}
r = tp->pending_files[least_index];
tp->pending_files[least_index]
= tp->pending_files[--tp->pending_files_used];
return (r);
}
/*
* 'tp' format was developed for PDP-11, so it uses the screwy PDP-11
* byte order, which is big-endian words, little-endian bytes within a
* word. In particular, the 32-bit value 0x44332211 gets stored as
* four bytes: 0x33 0x44 0x11 0x22
*/
static int
toi(const void *p, int n)
{
const unsigned char *v = (const unsigned char *)p;
switch(n) {
case 1: return (v[0]);
case 2: return (v[0] + v[1] * 0x100);
case 3: return (v[0] * 0x10000 + toi(v + 1, 2));
case 4: return (toi(v, 2) * 0x10000 + toi(v + 2, 2));
default: return (0);
}
}

View File

@ -235,6 +235,16 @@ compressed with the
.Dq deflate
algorithm.
Older zip compression algorithms are not supported.
.Ss Tp Formats
The libarchive library has experimental support for tp format,
which was used in Fourth Edition through Sixth Edition Unix.
(It was supplanted by tar in Seventh Edition Unix.)
There were several distinct variants of this format; libarchive
supports the original tp format and the itp variant.
Currently, tp format support is not enabled by
.Fn archive_read_support_format_all ,
it must be explicitly enabled by calling
.Fn archive_read_support_format_tp .
.Sh SEE ALSO
.Xr cpio 1 ,
.Xr mkisofs 1 ,