Many fixes:

* Disabled shared-library building, as some API breakage is
  still likely.  (I didn't realize it was turned on by default.)  If
  you have an existing /usr/lib/libarchive.so.2, I recommend deleting it.
* Pax interchange format now correctly stores and reads UTF8
  for extended attributes.  In particular, pax format can portably
  handle arbitrarily long pathnames containing arbitrary characters.
* Library compiles cleanly at -O2, -O3, and WARNS=6 on all
  FreeBSD-CURRENT platforms.
* Minor portability improvements inspired by Juergen Lock
  and Greg Lewis.  (Less reliance on stdint.h, isolating of
  various portability-challenged constructs.)
* archive_entry transparently converts multi-byte <-> wide character
  strings, allowing clients and format handlers to deal with either
  one, as appropriate.
* Support for reading 'L' and 'K' entries in standard tar archives
  for star compatibility.
* Recognize (but don't yet handle) ACL entries from Solaris tar.
* Pushed format-specific data for format readers down into
  format-specific storage and out of library-global storage.  This
  should make it easier to maintain individual formats without mucking
  with the core library management.
* Documentation updates to track the above changes.
* Updates to tar.5 to correct a few mistakes and add some additional
  information about GNU tar and Solaris tar formats.

Notes:
* The basic 'tar' reader is getting more general; there's not much
  point in keeping the 'gnutar' reader separate.  Merging the two
  would lose a bunch of duplicate code.
* The libc ACL support is looking increasingly inadequate for my needs
  here.  I might need to assemble some fairly significant code for
  parsing and building ACLs. <sigh>
This commit is contained in:
kientzle 2004-03-19 22:37:06 +00:00
parent fb8b67e065
commit eb220de95b
23 changed files with 1175 additions and 643 deletions

View File

@ -1,8 +1,14 @@
# Makefile for libarchive.
#
# $FreeBSD$
#
LIB= archive
SHLIB_MAJOR= 1
# I'm not yet ready for a shared version of this library, as
# there are still a couple of API changes still in the works.
NOSHLIBS= 1
SRCS= archive_check_magic.c \
archive_entry.c \
archive_read.c \
@ -114,8 +120,8 @@ DEBUG_FLAGS+= -DDEBUG -g
CFLAGS+= -DHAVE_DMALLOC -I/usr/local/include
LDFLAGS+= -L/usr/local/lib -ldmalloc
.endif
CFLAGS+= -O3
# Should be WARNS=6, except that zlib.h is borked. <sigh>
WARNS?= 3
WARNS?= 6
.include <bsd.lib.mk>

View File

@ -1,8 +1,14 @@
# Makefile for libarchive.
#
# $FreeBSD$
#
LIB= archive
SHLIB_MAJOR= 1
# I'm not yet ready for a shared version of this library, as
# there are still a couple of API changes still in the works.
NOSHLIBS= 1
SRCS= archive_check_magic.c \
archive_entry.c \
archive_read.c \
@ -114,8 +120,8 @@ DEBUG_FLAGS+= -DDEBUG -g
CFLAGS+= -DHAVE_DMALLOC -I/usr/local/include
LDFLAGS+= -L/usr/local/lib -ldmalloc
.endif
CFLAGS+= -O3
# Should be WARNS=6, except that zlib.h is borked. <sigh>
WARNS?= 3
WARNS?= 6
.include <bsd.lib.mk>

View File

@ -29,9 +29,8 @@
#ifndef ARCHIVE_H_INCLUDED
#define ARCHIVE_H_INCLUDED
#include <stdarg.h>
#include <stdint.h>
#include <unistd.h>
#include <inttypes.h> /* For int64_t */
#include <unistd.h> /* For ssize_t and size_t */
#define ARCHIVE_BYTES_PER_RECORD 512
#define ARCHIVE_DEFAULT_BYTES_PER_BLOCK 10240

View File

@ -29,9 +29,8 @@
#ifndef ARCHIVE_H_INCLUDED
#define ARCHIVE_H_INCLUDED
#include <stdarg.h>
#include <stdint.h>
#include <unistd.h>
#include <inttypes.h> /* For int64_t */
#include <unistd.h> /* For ssize_t and size_t */
#define ARCHIVE_BYTES_PER_RECORD 512
#define ARCHIVE_DEFAULT_BYTES_PER_BLOCK 10240

View File

@ -30,13 +30,20 @@
.Sh NAME
.Nm archive_entry_clear
.Nm archive_entry_clone
.Nm archive_entry_copy_gname_w
.Nm archive_entry_copy_hardlink_w
.Nm archive_entry_copy_pathname_w
.Nm archive_entry_copy_stat
.Nm archive_entry_dup
.Nm archive_entry_copy_symlink_w
.Nm archive_entry_copy_uname_w
.Nm archive_entry_free
.Nm archive_entry_gname
.Nm archive_entry_gname_w
.Nm archive_entry_hardlink
.Nm archive_entry_hardlink_w
.Nm archive_entry_new
.Nm archive_entry_pathname
.Nm archive_entry_pathname_w
.Nm archive_entry_set_devmajor
.Nm archive_entry_set_devminor
.Nm archive_entry_set_gid
@ -51,8 +58,10 @@
.Nm archive_entry_size
.Nm archive_entry_stat
.Nm archive_entry_symlink
.Nm archive_entry_symlink_w
.Nm archive_entry_tartype
.Nm archive_entry_uname
.Nm archive_entry_uname_w
.Nd functions for manipulating archive entry descriptions
.Sh SYNOPSIS
.In archive_entry.h
@ -61,19 +70,33 @@
.Ft struct archive_entry *
.Fn archive_entry_clone "struct archive_entry *"
.Ft void
.Fn archive_entry_copy_gname_w "struct archive_entry *" "const wchar_t *"
.Ft void
.Fn archive_entry_copy_hardlink_w "struct archive_entry *" "const wchar_t *"
.Ft void
.Fn archive_entry_copy_pathname_w "struct archive_entry *" "const wchar_t *"
.Ft void
.Fn archive_entry_copy_stat "struct archive_entry *" "struct stat *"
.Ft struct archive_entry *
.Fn archive_entry_dup "struct archive_entry *"
.Ft void
.Fn archive_entry_copy_symlink_w "struct archive_entry *" "const wchar_t *"
.Ft void
.Fn archive_entry_copy_uname_w "struct archive_entry *" "const wchar_t *"
.Ft void
.Fn archive_entry_free "struct archive_entry *"
.Ft const char *
.Fn archive_entry_gname "struct archive_entry *"
.Ft const wchar_t *
.Fn archive_entry_gname_w "struct archive_entry *"
.Ft const char *
.Fn archive_entry_hardlink "struct archive_entry *"
.Ft const wchar_t *
.Fn archive_entry_hardlink_w "struct archive_entry *"
.Ft struct archive_entry *
.Fn archive_entry_new "void"
.Ft const char *
.Fn archive_entry_pathname "struct archive_entry *"
.Ft const wchar_t *
.Fn archive_entry_pathname_w "struct archive_entry *"
.Ft void
.Fn archive_entry_set_devmajor "struct archive_entry *" "dev_t"
.Ft void
@ -102,24 +125,64 @@
.Fn archive_entry_stat "struct archive_entry *"
.Ft const char *
.Fn archive_entry_symlink "struct archive_entry *"
.Ft const wchar_t *
.Fn archive_entry_symlink_w "struct archive_entry *"
.Ft int
.Fn archive_entry_tartype "struct archive_entry *"
.Ft const char *
.Fn archive_entry_uname "struct archive_entry *"
.Ft const wchar_t *
.Fn archive_entry_uname_w "struct archive_entry *"
.Sh DESCRIPTION
These functions create and manipulate data objects that
represent entries within an archive.
You can think of a
.Tn struct archive_entry
as a
.Tn struct stat
on steroids: it includes everything from
as a heavy-duty version of
.Tn struct stat :
it includes everything from
.Tn struct stat
plus associated pathname, textual group and user names, etc.
These objects are used by
.Xr libarchive 3
to represent the metadata associated with a particular
entry in an archive.
.Pp
Most of the functions here set or read entries
in an object. Such functions have one of the
following forms:
.Bl -tag -compact -width indent
.It Fn archive_entry_set_XXXX
Stores the provided data in the object.
In particular, for strings, the pointer is stored,
not the referenced string.
.It Fn archive_entry_copy_XXXX
As above, except that the referenced data is copied
into the object.
.It Fn archive_entry_XXXX
Returns the specified data.
In the case of strings, a const-qualified pointer to
the string is returned.
.El
The string data can be accessed as wide character strings
(which are suffixed with
.Cm _w )
or normal
.Va char
strings.
Note that these are different representations of the same
data:
For example, if you store a narrow string and read the corresponding
wide string, the object will transparently convert formats
using the current locale.
Similarly, if you store a wide string and then store a
narrow string for the same data, the previously-set wide string will
be discarded in favor of the new data.
.Pp
The remaining functions allocate, destroy, clear, and copy
.Va archive_entry
objects.
These functions are described below:
.Bl -tag -compact -width indent
.It Fn archive_entry_clear
Erases the object, resetting all internal fields to the
@ -128,76 +191,20 @@ This is provided to allow you to quickly recycle objects
without thrashing the heap.
.It Fn archive_entry_clone
A deep copy operation; all text fields are duplicated.
.It Fn archive_entry_copy_stat
Copies the contents of the provided
.Tn struct stat
into the
.Tn struct archive_entry
object.
.It Fn archive_entry_dup
A shallow copy; text fields are not duplicated.
.It Fn archive_entry_free
Releases the
.Tn struct archive_entry
object.
.It Fn archive_entry_gname
Returns a pointer to the textual group name.
.It Fn archive_entry_hardlink
If this function returns non-NULL, then this object represents
a hardlink to another filesystem object.
The contents contain the pathname of the object.
.It Fn archive_entry_new
Allocate and return a blank
.Tn struct archive_entry
object.
.It Fn archive_entry_pathname
Returns a pointer to the pathname.
.It Fn archive_entry_set_devmajor
Sets the device major number (only valid for objects representing
block and character devices).
.It Fn archive_entry_set_devminor
Sets the device minor number (only valid for objects representing
block and character devices).
.It Fn archive_entry_set_gid
Sets the group ID for the object.
.It Fn archive_entry_set_gname
Sets a pointer to the textual group name.
Note that the name itself is not copied.
.It Fn archive_entry_set_hardlink
Sets the hardlink property; see
.Fn archive_entry_hardlink
above.
.It Fn archive_entry_set_mode
Sets the file mode.
.It Fn archive_entry_set_pathname
Sets a pointer to the pathname.
Note that the pathname text is not copied.
.It Fn archive_entry_set_symlink
Sets a pointer to the contents of a symbolic link.
Note that the pathname text is not copied.
.It Fn archive_entry_set_tartype
Sets the value to be used in a tar-format header
for this entry.
Client code should generally not set this; if it
is left unset, the library will automatically determine
an appropriate value.
.It Fn archive_entry_set_uid
Set the user ID for the object.
.It Fn archive_entry_set_uname
Sets a pointer to the textual user name.
Note that the name itself is not copied.
.It Fn archive_entry_size
Returns the size of the object on disk in bytes.
.It Fn archive_entry_stat
Returns a pointer to a populated
.Tn struct stat .
.It Fn archive_entry_symlink
Returns a pointer to the symlink contents.
.It Fn archive_entry_tartype
Returns the value used in a tar-format header.
Not generally useful to clients.
.It Fn archive_entry_uname
Returns a pointer to the textual user name.
.El
.\" .Sh EXAMPLE
.\" .Sh RETURN VALUES
@ -215,4 +222,4 @@ The
.Nm libarchive
library was written by
.An Tim Kientzle Aq kientzle@acm.org .
.Sh BUGS
.\" .Sh BUGS

View File

@ -32,11 +32,34 @@ __FBSDID("$FreeBSD$");
#ifdef HAVE_DMALLOC
#include <dmalloc.h>
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <wchar.h>
#include "archive_entry.h"
/*
* Handle wide character (i.e., Unicode) and non-wide character
* strings transparently.
*
*/
struct aes {
const char *aes_mbs;
char *aes_mbs_alloc;
const wchar_t *aes_wcs;
wchar_t *aes_wcs_alloc;
};
void aes_clean(struct aes *);
void aes_copy(struct aes *dest, struct aes *src);
const char * aes_get_mbs(struct aes *);
const wchar_t * aes_get_wcs(struct aes *);
void aes_set_mbs(struct aes *, const char *mbs);
void aes_set_wcs(struct aes *, const wchar_t *wcs);
void aes_copy_wcs(struct aes *, const wchar_t *wcs);
/*
* Description of an archive entry.
*
@ -70,24 +93,144 @@ struct archive_entry {
int ae_tartype;
/*
* Note: If you add any more string fields, update
* archive_entry_clone accordingly.
* Use aes here so that we get transparent mbs<->wcs conversions.
*/
const char *ae_acl; /* ACL text */
const char *ae_acl_default; /* default ACL */
const char *ae_fflags; /* Text fflags per fflagstostr(3) */
const char *ae_gname; /* Name of owning group */
const char *ae_hardlink; /* Name of target for hardlink */
const char *ae_pathname; /* Name of entry */
const char *ae_symlink; /* symlink contents */
const char *ae_uname; /* Name of owner */
char buff[1]; /* MUST BE AT END OF STRUCT!!! */
struct aes ae_acl; /* ACL text */
struct aes ae_acl_default; /* default ACL */
struct aes ae_fflags; /* Text fflags per fflagstostr(3) */
struct aes ae_gname; /* Name of owning group */
struct aes ae_hardlink; /* Name of target for hardlink */
struct aes ae_pathname; /* Name of entry */
struct aes ae_symlink; /* symlink contents */
struct aes ae_uname; /* Name of owner */
};
void
aes_clean(struct aes *aes)
{
if (aes->aes_mbs_alloc) {
free(aes->aes_mbs_alloc);
aes->aes_mbs_alloc = NULL;
}
if (aes->aes_wcs_alloc) {
free(aes->aes_wcs_alloc);
aes->aes_wcs_alloc = NULL;
}
memset(aes, 0, sizeof(*aes));
}
void
aes_copy(struct aes *dest, struct aes *src)
{
*dest = *src;
if (src->aes_mbs_alloc != NULL) {
dest->aes_mbs_alloc = strdup(src->aes_mbs_alloc);
dest->aes_mbs = dest->aes_mbs_alloc;
}
if (src->aes_wcs_alloc != NULL) {
dest->aes_wcs_alloc = malloc((wcslen(src->aes_wcs_alloc) + 1)
* sizeof(wchar_t));
dest->aes_wcs = dest->aes_wcs_alloc;
wcscpy(dest->aes_wcs_alloc, src->aes_wcs);
}
}
const char *
aes_get_mbs(struct aes *aes)
{
if (aes->aes_mbs == NULL && aes->aes_wcs != NULL) {
/*
* XXX Need to estimate the number of byte in the
* multi-byte form. Assume that, on average, wcs
* chars encode to no more than 3 bytes. There must
* be a better way... XXX
*/
int mbs_length = wcslen(aes->aes_wcs) * 3 + 64;
aes->aes_mbs_alloc = malloc(mbs_length);
aes->aes_mbs = aes->aes_mbs_alloc;
wcstombs(aes->aes_mbs_alloc, aes->aes_wcs, mbs_length - 1);
aes->aes_mbs_alloc[mbs_length - 1] = 0;
}
return (aes->aes_mbs);
}
const wchar_t *
aes_get_wcs(struct aes *aes)
{
if (aes->aes_wcs == NULL && aes->aes_mbs != NULL) {
/*
* No single byte will be more than one wide character,
* so this length estimate will always be big enough.
*/
int wcs_length = strlen(aes->aes_mbs);
aes->aes_wcs_alloc
= malloc((wcs_length + 1) * sizeof(wchar_t));
aes->aes_wcs = aes->aes_wcs_alloc;
mbstowcs(aes->aes_wcs_alloc, aes->aes_mbs, wcs_length);
aes->aes_wcs_alloc[wcs_length] = 0;
}
return (aes->aes_wcs);
}
void
aes_set_mbs(struct aes *aes, const char *mbs)
{
if (aes->aes_mbs_alloc) {
free(aes->aes_mbs_alloc);
aes->aes_mbs_alloc = NULL;
}
if (aes->aes_wcs_alloc) {
free(aes->aes_wcs_alloc);
aes->aes_wcs_alloc = NULL;
}
aes->aes_mbs = mbs;
aes->aes_wcs = NULL;
}
void
aes_set_wcs(struct aes *aes, const wchar_t *wcs)
{
if (aes->aes_mbs_alloc) {
free(aes->aes_mbs_alloc);
aes->aes_mbs_alloc = NULL;
}
if (aes->aes_wcs_alloc) {
free(aes->aes_wcs_alloc);
aes->aes_wcs_alloc = NULL;
}
aes->aes_mbs = NULL;
aes->aes_wcs = wcs;
}
void
aes_copy_wcs(struct aes *aes, const wchar_t *wcs)
{
if (aes->aes_mbs_alloc) {
free(aes->aes_mbs_alloc);
aes->aes_mbs_alloc = NULL;
}
if (aes->aes_wcs_alloc) {
free(aes->aes_wcs_alloc);
aes->aes_wcs_alloc = NULL;
}
aes->aes_mbs = NULL;
aes->aes_wcs_alloc = malloc((wcslen(wcs) + 1) * sizeof(wchar_t));
wcscpy(aes->aes_wcs_alloc, wcs);
aes->aes_wcs = aes->aes_wcs_alloc;
}
struct archive_entry *
archive_entry_clear(struct archive_entry *entry)
{
aes_clean(&entry->ae_acl);
aes_clean(&entry->ae_acl_default);
aes_clean(&entry->ae_fflags);
aes_clean(&entry->ae_gname);
aes_clean(&entry->ae_hardlink);
aes_clean(&entry->ae_pathname);
aes_clean(&entry->ae_symlink);
aes_clean(&entry->ae_uname);
memset(entry, 0, sizeof(*entry));
entry->ae_tartype = -1;
return entry;
@ -95,99 +238,30 @@ archive_entry_clear(struct archive_entry *entry)
struct archive_entry *
archive_entry_clone(struct archive_entry *entry)
{
int size;
struct archive_entry *entry2;
char *p;
size = sizeof(*entry2);
if (entry->ae_acl)
size += strlen(entry->ae_acl) + 1;
if (entry->ae_acl_default)
size += strlen(entry->ae_acl_default) + 1;
if (entry->ae_fflags)
size += strlen(entry->ae_fflags) + 1;
if (entry->ae_gname)
size += strlen(entry->ae_gname) + 1;
if (entry->ae_hardlink)
size += strlen(entry->ae_hardlink) + 1;
if (entry->ae_pathname)
size += strlen(entry->ae_pathname) + 1;
if (entry->ae_symlink)
size += strlen(entry->ae_symlink) + 1;
if (entry->ae_uname)
size += strlen(entry->ae_uname) + 1;
entry2 = malloc(size);
*entry2 = *entry;
/* Copy all of the strings from the original. */
p = entry2->buff;
if (entry->ae_acl) {
entry2->ae_acl = p;
strcpy(p, entry->ae_acl);
p += strlen(p) + 1;
}
if (entry->ae_acl_default) {
entry2->ae_acl_default = p;
strcpy(p, entry->ae_acl_default);
p += strlen(p) + 1;
}
if (entry->ae_fflags) {
entry2->ae_fflags = p;
strcpy(p, entry->ae_fflags);
p += strlen(p) + 1;
}
if (entry->ae_gname) {
entry2->ae_gname = p;
strcpy(p, entry->ae_gname);
p += strlen(p) + 1;
}
if (entry->ae_hardlink) {
entry2->ae_hardlink = p;
strcpy(p, entry->ae_hardlink);
p += strlen(p) + 1;
}
if (entry->ae_pathname) {
entry2->ae_pathname = p;
strcpy(p, entry->ae_pathname);
p += strlen(p) + 1;
}
if (entry->ae_symlink) {
entry2->ae_symlink = p;
strcpy(p, entry->ae_symlink);
p += strlen(p) + 1;
}
if (entry->ae_uname) {
entry2->ae_uname = p;
strcpy(p, entry->ae_uname);
p += strlen(p) + 1;
}
return (entry2);
}
struct archive_entry *
archive_entry_dup(struct archive_entry *entry)
{
struct archive_entry *entry2;
/* Allocate new structure and copy over all of the fields. */
entry2 = malloc(sizeof(*entry2));
*entry2 = *entry;
entry2->ae_stat = entry->ae_stat;
entry2->ae_tartype = entry->ae_tartype;
aes_copy(&entry2->ae_acl ,&entry->ae_acl);
aes_copy(&entry2->ae_acl_default ,&entry->ae_acl_default);
aes_copy(&entry2->ae_fflags ,&entry->ae_fflags);
aes_copy(&entry2->ae_gname ,&entry->ae_gname);
aes_copy(&entry2->ae_hardlink ,&entry->ae_hardlink);
aes_copy(&entry2->ae_pathname, &entry->ae_pathname);
aes_copy(&entry2->ae_symlink ,&entry->ae_symlink);
aes_copy(&entry2->ae_uname ,&entry->ae_uname);
return (entry2);
}
void
archive_entry_free(struct archive_entry *entry)
{
archive_entry_clear(entry);
free(entry);
}
@ -199,11 +273,11 @@ archive_entry_new(void)
entry = malloc(sizeof(*entry));
if(entry == NULL)
return (NULL);
archive_entry_clear(entry);
memset(entry, 0, sizeof(*entry));
entry->ae_tartype = -1;
return (entry);
}
/*
* Functions for reading fields from an archive_entry.
*/
@ -211,14 +285,14 @@ archive_entry_new(void)
const char *
archive_entry_acl(struct archive_entry *entry)
{
return (entry->ae_acl);
return (aes_get_mbs(&entry->ae_acl));
}
const char *
archive_entry_acl_default(struct archive_entry *entry)
{
return (entry->ae_acl_default);
return (aes_get_mbs(&entry->ae_acl_default));
}
dev_t
@ -237,19 +311,19 @@ archive_entry_devminor(struct archive_entry *entry)
const char *
archive_entry_fflags(struct archive_entry *entry)
{
return (entry->ae_fflags);
return (aes_get_mbs(&entry->ae_fflags));
}
const char *
archive_entry_gname(struct archive_entry *entry)
{
return (entry->ae_gname);
return (aes_get_mbs(&entry->ae_gname));
}
const char *
archive_entry_hardlink(struct archive_entry *entry)
{
return (entry->ae_hardlink);
return (aes_get_mbs(&entry->ae_hardlink));
}
mode_t
@ -261,7 +335,13 @@ archive_entry_mode(struct archive_entry *entry)
const char *
archive_entry_pathname(struct archive_entry *entry)
{
return (entry->ae_pathname);
return (aes_get_mbs(&entry->ae_pathname));
}
const wchar_t *
archive_entry_pathname_w(struct archive_entry *entry)
{
return (aes_get_wcs(&entry->ae_pathname));
}
int64_t
@ -279,7 +359,7 @@ archive_entry_stat(struct archive_entry *entry)
const char *
archive_entry_symlink(struct archive_entry *entry)
{
return (entry->ae_symlink);
return (aes_get_mbs(&entry->ae_symlink));
}
int
@ -291,7 +371,7 @@ archive_entry_tartype(struct archive_entry *entry)
const char *
archive_entry_uname(struct archive_entry *entry)
{
return (entry->ae_uname);
return (aes_get_mbs(&entry->ae_uname));
}
/*
@ -311,14 +391,25 @@ archive_entry_copy_stat(struct archive_entry *entry, const struct stat *st)
void
archive_entry_set_acl(struct archive_entry *entry, const char *acl)
{
entry->ae_acl = acl;
aes_set_mbs(&entry->ae_acl, acl);
}
void
archive_entry_copy_acl_w(struct archive_entry *entry, const wchar_t *acl)
{
aes_copy_wcs(&entry->ae_acl, acl);
}
void
archive_entry_set_acl_default(struct archive_entry *entry, const char *acl)
{
entry->ae_acl_default = acl;
aes_set_mbs(&entry->ae_acl_default, acl);
}
void
archive_entry_copy_acl_default_w(struct archive_entry *entry, const wchar_t *acl)
{
aes_copy_wcs(&entry->ae_acl_default, acl);
}
void
@ -342,7 +433,13 @@ archive_entry_set_devminor(struct archive_entry *entry, dev_t m)
void
archive_entry_set_fflags(struct archive_entry *entry, const char *flags)
{
entry->ae_fflags = flags;
aes_set_mbs(&entry->ae_fflags, flags);
}
void
archive_entry_copy_fflags_w(struct archive_entry *entry, const wchar_t *flags)
{
aes_copy_wcs(&entry->ae_fflags, flags);
}
void
@ -354,13 +451,25 @@ archive_entry_set_gid(struct archive_entry *entry, gid_t g)
void
archive_entry_set_gname(struct archive_entry *entry, const char *name)
{
entry->ae_gname = name;
aes_set_mbs(&entry->ae_gname, name);
}
void
archive_entry_copy_gname_w(struct archive_entry *entry, const wchar_t *name)
{
aes_copy_wcs(&entry->ae_gname, name);
}
void
archive_entry_set_hardlink(struct archive_entry *entry, const char *target)
{
entry->ae_hardlink = target;
aes_set_mbs(&entry->ae_hardlink, target);
}
void
archive_entry_copy_hardlink_w(struct archive_entry *entry, const wchar_t *target)
{
aes_copy_wcs(&entry->ae_hardlink, target);
}
void
@ -372,7 +481,13 @@ archive_entry_set_mode(struct archive_entry *entry, mode_t m)
void
archive_entry_set_pathname(struct archive_entry *entry, const char *name)
{
entry->ae_pathname = name;
aes_set_mbs(&entry->ae_pathname, name);
}
void
archive_entry_copy_pathname_w(struct archive_entry *entry, const wchar_t *name)
{
aes_copy_wcs(&entry->ae_pathname, name);
}
void
@ -382,9 +497,15 @@ archive_entry_set_size(struct archive_entry *entry, int64_t s)
}
void
archive_entry_set_symlink(struct archive_entry *entry, const char *link)
archive_entry_set_symlink(struct archive_entry *entry, const char *linkname)
{
entry->ae_symlink = link;
aes_set_mbs(&entry->ae_symlink, linkname);
}
void
archive_entry_copy_symlink_w(struct archive_entry *entry, const wchar_t *linkname)
{
aes_copy_wcs(&entry->ae_symlink, linkname);
}
void
@ -402,6 +523,26 @@ archive_entry_set_uid(struct archive_entry *entry, uid_t u)
void
archive_entry_set_uname(struct archive_entry *entry, const char *name)
{
entry->ae_uname = name;
aes_set_mbs(&entry->ae_uname, name);
}
void
archive_entry_copy_uname_w(struct archive_entry *entry, const wchar_t *name)
{
aes_copy_wcs(&entry->ae_uname, name);
}
#if TEST
int
main(int argc, char **argv)
{
struct aes aes;
memset(&aes, 0, sizeof(aes));
aes_clean(&aes);
aes_set_mbs(&aes, "ÈÈÈabc");
wprintf("%S\n", L"abcdef");
wprintf("%S\n",aes_get_wcs(&aes));
return (0);
}
#endif

View File

@ -29,9 +29,8 @@
#ifndef ARCHIVE_ENTRY_H_INCLUDED
#define ARCHIVE_ENTRY_H_INCLUDED
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <wchar.h>
/*
* Description of an archive entry.
@ -60,8 +59,6 @@ struct archive_entry;
struct archive_entry *archive_entry_clear(struct archive_entry *);
/* The 'clone' function does a deep copy; all of the strings are copied too. */
struct archive_entry *archive_entry_clone(struct archive_entry *);
/* The 'dup' function does a shallow copy; referenced strings aren't copied. */
struct archive_entry *archive_entry_dup(struct archive_entry *);
void archive_entry_free(struct archive_entry *);
struct archive_entry *archive_entry_new(void);
@ -78,6 +75,7 @@ const char *archive_entry_gname(struct archive_entry *);
const char *archive_entry_hardlink(struct archive_entry *);
mode_t archive_entry_mode(struct archive_entry *);
const char *archive_entry_pathname(struct archive_entry *);
const wchar_t *archive_entry_pathname_w(struct archive_entry *);
int64_t archive_entry_size(struct archive_entry *);
const struct stat *archive_entry_stat(struct archive_entry *);
const char *archive_entry_symlink(struct archive_entry *);
@ -93,19 +91,27 @@ const char *archive_entry_uname(struct archive_entry *);
void archive_entry_copy_stat(struct archive_entry *, const struct stat *);
void archive_entry_set_acl(struct archive_entry *, const char *);
void archive_entry_copy_acl_w(struct archive_entry *, const wchar_t *);
void archive_entry_set_acl_default(struct archive_entry *, const char *);
void archive_entry_copy_acl_default_w(struct archive_entry *, const wchar_t *);
void archive_entry_set_fflags(struct archive_entry *, const char *);
void archive_entry_copy_fflags_w(struct archive_entry *, const wchar_t *);
void archive_entry_set_devmajor(struct archive_entry *, dev_t);
void archive_entry_set_devminor(struct archive_entry *, dev_t);
void archive_entry_set_gid(struct archive_entry *, gid_t);
void archive_entry_set_gname(struct archive_entry *, const char *);
void archive_entry_copy_gname_w(struct archive_entry *, const wchar_t *);
void archive_entry_set_hardlink(struct archive_entry *, const char *);
void archive_entry_copy_hardlink_w(struct archive_entry *, const wchar_t *);
void archive_entry_set_mode(struct archive_entry *, mode_t);
void archive_entry_set_pathname(struct archive_entry *, const char *);
void archive_entry_copy_pathname_w(struct archive_entry *, const wchar_t *);
void archive_entry_set_size(struct archive_entry *, int64_t);
void archive_entry_set_symlink(struct archive_entry *, const char *);
void archive_entry_copy_symlink_w(struct archive_entry *, const wchar_t *);
void archive_entry_set_tartype(struct archive_entry *, char);
void archive_entry_set_uid(struct archive_entry *, uid_t);
void archive_entry_set_uname(struct archive_entry *, const char *);
void archive_entry_copy_uname_w(struct archive_entry *, const wchar_t *);
#endif /* !ARCHIVE_ENTRY_H_INCLUDED */

View File

@ -38,14 +38,38 @@
/* FreeBSD-specific definitions. */
#ifdef __FreeBSD__
#include <sys/cdefs.h> /* For __FBSDID */
/*
* Note that SUSv3 says that inttypes.h includes stdint.h.
* Since inttypes.h predates stdint.h, it's safest to always
* use inttypes.h instead of stdint.h.
*/
#include <inttypes.h> /* For int64_t, etc. */
#define HAVE_POSIX_ACL 1
#define HAVE_CHFLAGS 1
#define HAVE_LUTIMES 1
#define HAVE_LCHMOD 1
#define ARCHIVE_ERRNO_FILE_FORMAT EFTYPE
#define ARCHIVE_ERRNO_PROGRAMMER EDOOFUS
#define ARCHIVE_ERRNO_PROGRAMMER EINVAL
#define ARCHIVE_ERRNO_MISC (-1)
/*
* Older versions of inttypes.h don't have INT64_MAX, etc. Since
* SUSv3 requires them to be macros when they are defined, we can
* easily test for and define them here if necessary.
*/
#ifndef INT64_MAX
/* XXX Is this really necessary? XXX */
#ifdef __i386__
#define INT64_MAX 0x7fffffffffffffffLL
#define UINT64_MAX 0xffffffffffffffffULL
#else /* __alpha__ */
#define INT64_MAX 0x7fffffffffffffffL
#define UINT64_MAX 0xffffffffffffffffUL
#endif
#endif /* ! INT64_MAX */
#endif /* __FreeBSD__ */
/* No non-FreeBSD platform will have __FBSDID, so just define it here. */
#ifndef __FreeBSD__
@ -54,6 +78,7 @@
/* Linux */
#ifdef LINUX
#include <inttypes.h>
#define ARCHIVE_ERRNO_FILE_FORMAT EILSEQ
#define ARCHIVE_ERRNO_PROGRAMMER EINVAL
#define ARCHIVE_ERRNO_MISC (-1)

View File

@ -29,8 +29,6 @@
#ifndef ARCHIVE_PRIVATE_H_INCLUDED
#define ARCHIVE_PRIVATE_H_INCLUDED
#include <stdint.h>
#include "archive.h"
#include "archive_string.h"
@ -56,17 +54,6 @@ struct archive {
struct archive_entry *entry;
/*
* Space to store per-entry strings. Most header strings are
* copied here from the format-specific header, in order to
* gaurantee null-termination. Maybe these should go into
* per-format storage?
*/
struct archive_string entry_name;
struct archive_string entry_linkname;
struct archive_string entry_uname;
struct archive_string entry_gname;
/* Utility: Pointer to a block of nulls. */
const char *nulls;
size_t null_length;
@ -76,8 +63,8 @@ struct archive {
* will be able to register it's own read_data routine and these
* will move into the per-format data for the formats that use them.
*/
uint64_t entry_bytes_remaining;
uint64_t entry_padding; /* Skip this much after entry data. */
off_t entry_bytes_remaining;
off_t entry_padding; /* Skip this much after entry data. */
uid_t user_uid; /* UID of current user. */
@ -108,25 +95,10 @@ struct archive {
int pad_uncompressed;
int pad_uncompressed_byte; /* TODO: Support this. */
/*
* PAX extended header data. When reading,
* name/linkname/uname/gname fields may point into here. This
* should be moved into per-format data storage.
*/
struct archive_string pax_header;
/*
* GNU header fields. These should be moved into format-specific
* storage.
*/
struct archive_string gnu_name;
struct archive_string gnu_linkname;
int gnu_header_recursion_depth;
/* Position in UNCOMPRESSED data stream. */
intmax_t file_position;
off_t file_position;
/* File offset of beginning of most recently-read header. */
intmax_t header_position;
off_t header_position;
/*
* Detection functions for decompression: bid functions are
@ -192,8 +164,8 @@ struct archive {
* multiple format readers active at one time, so we need to
* allow for multiple format readers to have their data
* available. The pformat_data slot here is the solution: on
* read, it's set up in the bid phase and is gauranteed to
* always point to a void* variable that the format can use.
* read, it is gauranteed to always point to a void* variable
* that the format can use.
*/
void **pformat_data; /* Pointer to current format_data. */
void *format_data; /* Used by writers. */
@ -255,4 +227,6 @@ int __archive_read_register_compression(struct archive *a,
int (*bid)(const void *, size_t),
int (*init)(struct archive *, const void *, size_t));
#define err_combine(a,b) ((a) < (b) ? (a) : (b))
#endif

View File

@ -331,8 +331,15 @@ archive_read_data(struct archive *a, void *buff, size_t s)
ssize_t bytes_read;
archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_DATA);
if (s > a->entry_bytes_remaining)
s = a->entry_bytes_remaining;
/*
* off_t is generally at least as wide as size_t, so widen for
* comparison and narrow for the assignment. Otherwise, on
* platforms with 32-bit size_t and 64-bit off_t, we won't be
* able to correctly read archives with entries larger than
* 4gig.
*/
if ((off_t)s > a->entry_bytes_remaining)
s = (size_t)a->entry_bytes_remaining;
if (s > 0) {
bytes_read = (a->compression_read_ahead)(a, &data, 1);
if (bytes_read < 0) {
@ -424,20 +431,6 @@ archive_read_finish(struct archive *a)
*/
/* Casting a pointer to int allows us to remove 'const.' */
free((void *)(uintptr_t)(const void *)a->nulls);
if (a->entry_name.s != NULL)
free(a->entry_name.s);
if (a->entry_linkname.s != NULL)
free(a->entry_linkname.s);
if (a->entry_uname.s != NULL)
free(a->entry_uname.s);
if (a->entry_gname.s != NULL)
free(a->entry_gname.s);
if (a->pax_header.s != NULL)
free(a->pax_header.s);
if (a->gnu_name.s != NULL)
free(a->gnu_name.s);
if (a->gnu_linkname.s != NULL)
free(a->gnu_linkname.s);
if (a->extract_mkdirpath.s != NULL)
free(a->extract_mkdirpath.s);
if (a->entry)

View File

@ -52,7 +52,7 @@ archive_read_data_into_fd(struct archive *a, int fd)
a->entry_bytes_remaining);
if (bytes_read < 0)
return (-1);
if ((size_t)bytes_read > a->entry_bytes_remaining)
if (bytes_read > a->entry_bytes_remaining)
bytes_read = (ssize_t)a->entry_bytes_remaining;
bytes_written = write(fd, buff, bytes_read);

View File

@ -30,13 +30,14 @@ __FBSDID("$FreeBSD$");
#ifdef HAVE_DMALLOC
#include <dmalloc.h>
#endif
#include <err.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <zlib.h>
#include <err.h> /* zlib.h is borked, so must precede err.h */
#include "archive.h"
#include "archive_private.h"

View File

@ -34,7 +34,7 @@ __FBSDID("$FreeBSD$");
#endif
#include <err.h>
#include <errno.h>
#include <stdint.h>
/* #include <stdint.h> */ /* See archive_platform.h */
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
@ -70,6 +70,8 @@ struct links_entry {
struct cpio {
int magic;
struct links_entry *links_head;
struct archive_string entry_name;
struct archive_string entry_linkname;
};
static int64_t atol8(const char *, unsigned);
@ -177,18 +179,19 @@ archive_read_format_cpio_read_header(struct archive *a,
if (bytes < namelength)
return (ARCHIVE_FATAL);
(a->compression_read_consume)(a, namelength);
archive_strncpy(&a->entry_name, h, namelength);
archive_entry_set_pathname(entry, a->entry_name.s);
archive_strncpy(&cpio->entry_name, h, namelength);
archive_entry_set_pathname(entry, cpio->entry_name.s);
/* If this is a symlink, read the link contents. */
if (S_ISLNK(st.st_mode)) {
bytes = (a->compression_read_ahead)(a, &h,
a->entry_bytes_remaining);
if (bytes < a->entry_bytes_remaining)
if ((off_t)bytes < a->entry_bytes_remaining)
return (ARCHIVE_FATAL);
(a->compression_read_consume)(a, a->entry_bytes_remaining);
archive_strncpy(&a->entry_linkname, h, a->entry_bytes_remaining);
archive_entry_set_symlink(entry, a->entry_linkname.s);
archive_strncpy(&cpio->entry_linkname, h,
a->entry_bytes_remaining);
archive_entry_set_symlink(entry, cpio->entry_linkname.s);
a->entry_bytes_remaining = 0;
}

View File

@ -33,7 +33,8 @@ __FBSDID("$FreeBSD$");
#endif
#include <err.h>
#include <errno.h>
#include <stdint.h>
/* #include <stdint.h> */ /* See archive_platform.h */
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
@ -76,10 +77,21 @@ struct archive_entry_header_gnutar {
*/
};
struct gnutar {
struct archive_string entry_name;
struct archive_string entry_linkname;
struct archive_string entry_uname;
struct archive_string entry_gname;
struct archive_string gnu_name;
struct archive_string gnu_linkname;
int gnu_header_recursion_depth;
};
static int archive_block_is_null(const unsigned char *p);
static int archive_header_gnu(struct archive *, struct archive_entry *,
const void *);
static int archive_read_format_gnutar_bid(struct archive *a);
static int archive_read_format_gnutar_cleanup(struct archive *);
static int archive_read_format_gnutar_read_header(struct archive *a,
struct archive_entry *);
static int checksum(struct archive *a, const void *h);
@ -93,11 +105,40 @@ static int64_t tar_atol256(const char *, unsigned);
int
archive_read_support_format_gnutar(struct archive *a)
{
struct gnutar *gnutar;
gnutar = malloc(sizeof(*gnutar));
memset(gnutar, 0, sizeof(*gnutar));
return (__archive_read_register_format(a,
NULL,
gnutar,
archive_read_format_gnutar_bid,
archive_read_format_gnutar_read_header,
NULL));
archive_read_format_gnutar_cleanup));
}
static int
archive_read_format_gnutar_cleanup(struct archive *a)
{
struct gnutar *gnutar;
gnutar = *(a->pformat_data);
if (gnutar->entry_name.s != NULL)
free(gnutar->entry_name.s);
if (gnutar->entry_linkname.s != NULL)
free(gnutar->entry_linkname.s);
if (gnutar->entry_uname.s != NULL)
free(gnutar->entry_uname.s);
if (gnutar->entry_gname.s != NULL)
free(gnutar->entry_gname.s);
if (gnutar->gnu_name.s != NULL)
free(gnutar->gnu_name.s);
if (gnutar->gnu_linkname.s != NULL)
free(gnutar->gnu_linkname.s);
free(gnutar);
*(a->pformat_data) = NULL;
return (ARCHIVE_OK);
}
static int
@ -154,7 +195,9 @@ archive_read_format_gnutar_read_header(struct archive *a,
const void *h;
ssize_t bytes;
int oldstate;
struct gnutar *gnutar;
gnutar = *(a->pformat_data);
a->archive_format = ARCHIVE_FORMAT_TAR_GNUTAR;
a->archive_format_name = "GNU tar";
@ -188,14 +231,14 @@ archive_read_format_gnutar_read_header(struct archive *a,
}
/* This function gets called recursively for long name headers, etc. */
if (++a->gnu_header_recursion_depth > 32)
if (++gnutar->gnu_header_recursion_depth > 32)
errx(EINVAL,
"*** Too many special headers for one entry; giving up. "
"(%s:%s@%d)\n",
__FUNCTION__, __FILE__, __LINE__);
archive_header_gnu(a, entry, h);
a->gnu_header_recursion_depth--;
gnutar->gnu_header_recursion_depth--;
return (0);
}
@ -267,10 +310,13 @@ archive_header_gnu(struct archive *a, struct archive_entry *entry,
{
struct stat st;
const struct archive_entry_header_gnutar *header;
struct gnutar *gnutar;
char tartype;
unsigned oldstate;
/* Clear out entry structure */
memset(&st, 0, sizeof(st));
gnutar = *(a->pformat_data);
/*
* GNU header is like POSIX, except 'prefix' is
@ -280,12 +326,13 @@ archive_header_gnu(struct archive *a, struct archive_entry *entry,
/* Copy filename over (to ensure null termination). */
header = h;
archive_strncpy(&(a->entry_name), header->name, sizeof(header->name));
archive_entry_set_pathname(entry, a->entry_name.s);
archive_strncpy(&(gnutar->entry_name), header->name,
sizeof(header->name));
archive_entry_set_pathname(entry, gnutar->entry_name.s);
/* Copy linkname over */
if (header->linkname[0])
archive_strncpy(&(a->entry_linkname), header->linkname,
archive_strncpy(&(gnutar->entry_linkname), header->linkname,
sizeof(header->linkname));
/* Parse out the numeric fields (all are octal) */
@ -301,13 +348,13 @@ archive_header_gnu(struct archive *a, struct archive_entry *entry,
st.st_mode &= ~S_IFMT;
/* Fields common to ustar and GNU */
archive_strncpy(&(a->entry_uname),
archive_strncpy(&(gnutar->entry_uname),
header->uname, sizeof(header->uname));
archive_entry_set_uname(entry, a->entry_uname.s);
archive_entry_set_uname(entry, gnutar->entry_uname.s);
archive_strncpy(&(a->entry_gname),
archive_strncpy(&(gnutar->entry_gname),
header->gname, sizeof(header->gname));
archive_entry_set_gname(entry, a->entry_gname.s);
archive_entry_set_gname(entry, gnutar->entry_gname.s);
/* Parse out device numbers only for char and block specials */
if (header->typeflag[0] == '3' || header->typeflag[0] == '4')
@ -329,7 +376,7 @@ archive_header_gnu(struct archive *a, struct archive_entry *entry,
/* Interpret entry type */
switch (tartype) {
case '1': /* Hard link */
archive_entry_set_hardlink(entry, a->entry_linkname.s);
archive_entry_set_hardlink(entry, gnutar->entry_linkname.s);
/*
* Note: Technically, tar does not store the file type
* for a "hard link" entry, only the fact that it is a
@ -341,7 +388,7 @@ archive_header_gnu(struct archive *a, struct archive_entry *entry,
case '2': /* Symlink */
st.st_mode |= S_IFLNK;
st.st_size = 0;
archive_entry_set_symlink(entry, a->entry_linkname.s);
archive_entry_set_symlink(entry, gnutar->entry_linkname.s);
archive_entry_copy_stat(entry, &st);
break;
case '3': /* Character device */
@ -376,32 +423,43 @@ archive_header_gnu(struct archive *a, struct archive_entry *entry,
break;
case 'K': /* GNU long linkname */
/* Entry body is full name of link for next header. */
archive_string_ensure(&(a->gnu_linkname), st.st_size+1);
archive_read_data_into_buffer(a, a->gnu_linkname.s,
archive_string_ensure(&(gnutar->gnu_linkname), st.st_size+1);
/* Temporarily fudge internal state for read_data call. */
oldstate = a->state;
a->state = ARCHIVE_STATE_DATA;
archive_read_data_into_buffer(a, gnutar->gnu_linkname.s,
st.st_size);
a->gnu_linkname.s[st.st_size] = 0; /* Null term name! */
a->state = oldstate;
gnutar->gnu_linkname.s[st.st_size] = 0; /* Null term name! */
/*
* This next call will usually overwrite
* a->entry_linkname, which is why we _must_ have a
* separate gnu_linkname field.
* gnutar->entry_linkname, which is why we _must_ have
* a separate gnu_linkname field.
*/
archive_read_format_gnutar_read_header(a, entry);
if (archive_entry_tartype(entry) == '1')
archive_entry_set_hardlink(entry, a->gnu_linkname.s);
archive_entry_set_hardlink(entry, gnutar->gnu_linkname.s);
else if (archive_entry_tartype(entry) == '2')
archive_entry_set_symlink(entry, a->gnu_linkname.s);
archive_entry_set_symlink(entry, gnutar->gnu_linkname.s);
/* TODO: else { ... } */
break;
case 'L': /* GNU long filename */
/* Entry body is full pathname for next header. */
archive_string_ensure(&(a->gnu_name), st.st_size+1);
archive_read_data_into_buffer(a, a->gnu_name.s,
archive_string_ensure(&(gnutar->gnu_name), st.st_size+1);
/* Temporarily fudge internal state for read_data call. */
oldstate = a->state;
a->state = ARCHIVE_STATE_DATA;
archive_read_data_into_buffer(a, gnutar->gnu_name.s,
st.st_size);
a->gnu_name.s[st.st_size] = 0; /* Null terminate name! */
/* This next call will typically overwrite a->entry_name, which
* is why we _must_ have a separate gnu_name field */
a->state = oldstate;
gnutar->gnu_name.s[st.st_size] = 0; /* Null terminate name! */
/*
* This next call will typically overwrite
* gnutar->entry_name, which is why we _must_ have a
* separate gnu_name field.
*/
archive_read_format_gnutar_read_header(a, entry);
archive_entry_set_pathname(entry, a->gnu_name.s);
archive_entry_set_pathname(entry, gnutar->gnu_name.s);
break;
case 'M': /* GNU Multi-volume (remainder of file from last archive) */
/*

View File

@ -32,7 +32,7 @@ __FBSDID("$FreeBSD$");
#include <dmalloc.h>
#endif
#include <errno.h>
#include <stdint.h>
/* #include <stdint.h> */ /* See archive_platform.h */
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
@ -63,39 +63,101 @@ struct archive_entry_header_ustar {
char prefix[155];
};
/*
* Data specific to this format.
*/
struct tar {
struct archive_string acl_text;
struct archive_string entry_name;
struct archive_string entry_linkname;
struct archive_string entry_uname;
struct archive_string entry_gname;
struct archive_string longlink;
struct archive_string longname;
struct archive_string pax_header;
struct archive_string pax_global;
wchar_t *pax_entry;
size_t pax_entry_length;
int header_recursion_depth;
};
static size_t UTF8_mbrtowc(wchar_t * __restrict pwc,
const char * __restrict s, size_t n,
mbstate_t * __restrict ps __unused);
static int archive_block_is_null(const unsigned char *p);
static int archive_header_common(struct archive *, struct archive_entry *,
struct stat *, const void *);
static int archive_header_old_tar(struct archive *,
static int header_Solaris_ACL(struct archive *, struct tar *,
struct archive_entry *, struct stat *, const void *);
static int archive_header_pax_extensions(struct archive *,
static int header_common(struct archive *, struct tar *,
struct archive_entry *, struct stat *, const void *);
static int archive_header_pax_global(struct archive *,
static int header_old_tar(struct archive *, struct tar *,
struct archive_entry *, struct stat *, const void *);
static int header_pax_extensions(struct archive *, struct tar *,
struct archive_entry *, struct stat *, const void *);
static int header_pax_global(struct archive *, struct tar *,
struct archive_entry *, struct stat *, const void *h);
static int header_longlink(struct archive *, struct tar *,
struct archive_entry *, struct stat *, const void *h);
static int header_longname(struct archive *, struct tar *,
struct archive_entry *, struct stat *, const void *h);
static int header_ustar(struct archive *, struct tar *,
struct archive_entry *, struct stat *, const void *h);
static int archive_header_ustar(struct archive *, struct archive_entry *,
struct stat *, const void *h);
static int archive_read_format_tar_bid(struct archive *);
static int archive_read_format_tar_cleanup(struct archive *);
static int archive_read_format_tar_read_header(struct archive *,
struct archive_entry *);
static int checksum(struct archive *, const void *);
static int pax_attribute(struct archive *, struct archive_entry *,
struct stat *, char *key, char *value);
static int pax_header(struct archive *, struct archive_entry *,
struct stat *, char *attr, uint64_t length);
static void pax_time(const char *, struct timespec *t);
static int pax_attribute(struct archive_entry *, struct stat *,
wchar_t *key, wchar_t *value);
static int pax_header(struct archive *, struct tar *,
struct archive_entry *, struct stat *, char *attr);
static void pax_time(const wchar_t *, struct timespec *t);
static int read_body_to_string(struct archive *, struct archive_string *,
const void *h);
static int64_t tar_atol(const char *, unsigned);
static int64_t tar_atol10(const char *, unsigned);
static int64_t tar_atol10(const wchar_t *, unsigned);
static int64_t tar_atol256(const char *, unsigned);
static int64_t tar_atol8(const char *, unsigned);
static int tar_read_header(struct archive *, struct tar *,
struct archive_entry *, struct stat *);
static int utf8_decode(wchar_t *, const char *, size_t length);
int
archive_read_support_format_tar(struct archive *a)
{
struct tar *tar;
tar = malloc(sizeof(*tar));
memset(tar, 0, sizeof(*tar));
return (__archive_read_register_format(a,
NULL,
tar,
archive_read_format_tar_bid,
archive_read_format_tar_read_header,
NULL));
archive_read_format_tar_cleanup));
}
static int
archive_read_format_tar_cleanup(struct archive *a)
{
struct tar *tar;
tar = *(a->pformat_data);
if (tar->entry_name.s != NULL)
free(tar->entry_name.s);
if (tar->entry_linkname.s != NULL)
free(tar->entry_linkname.s);
if (tar->entry_uname.s != NULL)
free(tar->entry_uname.s);
if (tar->entry_gname.s != NULL)
free(tar->entry_gname.s);
if (tar->pax_header.s != NULL)
free(tar->pax_header.s);
if (tar->pax_global.s != NULL)
free(tar->pax_global.s);
free(tar);
*(a->pformat_data) = NULL;
return (ARCHIVE_OK);
}
@ -179,18 +241,37 @@ archive_read_format_tar_bid(struct archive *a)
return (bid);
}
/*
* The function invoked by archive_read_header(). This
* just sets up a few things and then calls the internal
* tar_read_header() function below.
*/
static int
archive_read_format_tar_read_header(struct archive *a,
struct archive_entry *entry)
{
struct stat st;
struct tar *tar;
memset(&st, 0, sizeof(st));
tar = *(a->pformat_data);
return (tar_read_header(a, tar, entry, &st));
}
/*
* This function recursively interprets all of the headers associated
* with a single entry.
*/
static int
tar_read_header(struct archive *a, struct tar *tar,
struct archive_entry *entry, struct stat *st)
{
ssize_t bytes;
int err;
const void *h;
const struct archive_entry_header_ustar *header;
memset(&st, 0, sizeof(st));
/* Read 512-byte header record */
bytes = (a->compression_read_ahead)(a, &h, 512);
if (bytes < 512) {
@ -208,8 +289,8 @@ archive_read_format_tar_read_header(struct archive *a,
/*
* Note: If the checksum fails and we return ARCHIVE_RETRY,
* then the client is likely to just retry. This is a very crude way
* to search for the next valid header!
* then the client is likely to just retry. This is a very
* crude way to search for the next valid header!
*
* TODO: Improve this by implementing a real header scan.
*/
@ -218,39 +299,58 @@ archive_read_format_tar_read_header(struct archive *a,
return (ARCHIVE_RETRY); /* Retryable: Invalid header */
}
if (++tar->header_recursion_depth > 32) {
archive_set_error(a, EINVAL, "Too many special headers");
return (ARCHIVE_WARN);
}
/* Determine the format variant. */
header = h;
if (memcmp(header->magic, "ustar", 5) != 0)
err = archive_header_old_tar(a, entry, &st, h); /* non-POSIX */
else switch(header->typeflag[0]) {
case 'g':
if (memcmp(header->magic, "ustar", 5) != 0) {
a->archive_format = ARCHIVE_FORMAT_TAR;
a->archive_format_name = "tar (non-POSIX)";
err = header_old_tar(a, tar, entry, st, h);
} else switch(header->typeflag[0]) {
case 'A': /* Solaris tar ACL */
a->archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
a->archive_format_name = "Solaris tar";
err = header_Solaris_ACL(a, tar, entry, st, h);
break;
case 'g': /* POSIX-standard 'g' header. */
a->archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
a->archive_format_name = "POSIX pax interchange format";
err = archive_header_pax_global(a, entry, &st, h);
err = header_pax_global(a, tar, entry, st, h);
break;
case 'x':
a->archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
a->archive_format_name = "POSIX pax interchange format";
err = archive_header_pax_extensions(a, entry, &st, h);
case 'K': /* Long link name (non-POSIX, but fairly common). */
err = header_longlink(a, tar, entry, st, h);
break;
case 'X':
case 'L': /* Long filename (non-POSIX, but fairly common). */
err = header_longname(a, tar, entry, st, h);
break;
case 'X': /* Used by SUN tar; same as 'x'. */
a->archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
a->archive_format_name =
"POSIX pax interchange format (Sun variant)";
err = archive_header_pax_extensions(a, entry, &st, h);
err = header_pax_extensions(a, tar, entry, st, h);
break;
case 'x': /* POSIX-standard 'x' header. */
a->archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
a->archive_format_name = "POSIX pax interchange format";
err = header_pax_extensions(a, tar, entry, st, h);
break;
default:
if (a->archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) {
if (a->archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE
&& a->archive_format != ARCHIVE_FORMAT_TAR_USTAR) {
a->archive_format = ARCHIVE_FORMAT_TAR_USTAR;
a->archive_format_name = "POSIX ustar";
a->archive_format_name = "POSIX ustar format";
}
err = archive_header_ustar(a, entry, &st, h);
err = header_ustar(a, tar, entry, st, h);
}
archive_entry_copy_stat(entry, &st);
archive_entry_copy_stat(entry, st);
--tar->header_recursion_depth;
return (err);
}
/*
* Return true if block checksum is correct.
*/
@ -298,7 +398,6 @@ checksum(struct archive *a, const void *h)
return (0);
}
/*
* Return true if this block contains only nulls.
*/
@ -313,29 +412,121 @@ archive_block_is_null(const unsigned char *p)
return (1);
}
/*
* Interpret 'A' Solaris ACL header
*/
static int
header_Solaris_ACL(struct archive *a, struct tar *tar,
struct archive_entry *entry, struct stat *st, const void *h)
{
int err, err2;
err = read_body_to_string(a, &(tar->acl_text), h);
err2 = tar_read_header(a, tar, entry, st);
/* XXX DO SOMETHING WITH THE ACL!!! XXX */
{
const char *msg = "\nXXX Solaris ACL entries recognized but not yet handled!!\n";
write(2, msg, strlen(msg));
}
return (err_combine(err, err2));
}
/*
* Interpret 'K' long linkname header.
*/
static int
header_longlink(struct archive *a, struct tar *tar,
struct archive_entry *entry, struct stat *st, const void *h)
{
int err, err2;
err = read_body_to_string(a, &(tar->longlink), h);
err2 = tar_read_header(a, tar, entry, st);
if (err == ARCHIVE_OK && err2 == ARCHIVE_OK) {
if (archive_entry_tartype(entry) == '1')
archive_entry_set_hardlink(entry, tar->longlink.s);
else if (archive_entry_tartype(entry) == '2')
archive_entry_set_symlink(entry, tar->longlink.s);
}
return (err_combine(err, err2));
}
/*
* Interpret 'L' long filename header.
*/
static int
header_longname(struct archive *a, struct tar *tar,
struct archive_entry *entry, struct stat *st, const void *h)
{
int err, err2;
err = read_body_to_string(a, &(tar->longname), h);
/* Read and parse "real" header, then override name. */
err2 = tar_read_header(a, tar, entry, st);
if (err == ARCHIVE_OK && err2 == ARCHIVE_OK)
archive_entry_set_pathname(entry, tar->longname.s);
return (err_combine(err, err2));
}
/*
* Read body of an archive entry into an archive_string object.
*/
static int
read_body_to_string(struct archive *a, struct archive_string *as, const void *h)
{
const struct archive_entry_header_ustar *header;
off_t size;
unsigned oldstate;
int err, err2;
header = h;
size = tar_atol(header->size, sizeof(header->size));
/* Temporarily fudge internal state for read_data call. */
oldstate = a->state;
a->state = ARCHIVE_STATE_DATA;
/* Read the body into the string. */
a->entry_bytes_remaining = size;
a->entry_padding = 0x1ff & -size;
archive_string_ensure(as, size+1);
err = archive_read_data_into_buffer(a, as->s, size);
as->s[size] = 0; /* Null terminate name! */
err2 = archive_read_data_skip(a); /* Resync for next header. */
/* Restore the state. */
a->state = oldstate;
return (err_combine(err, err2));
}
/*
* Parse out common header elements.
*
* This would be the same as archive_header_old_tar, except that the
* This would be the same as header_old_tar, except that the
* filename is handled slightly differently for old and POSIX
* entries (POSIX entries support a 'prefix'). This factoring
* allows archive_header_old_tar and archive_header_ustar
* allows header_old_tar and header_ustar
* to handle filenames differently, while still putting most of the
* common parsing into one place.
*/
static int
archive_header_common(struct archive *a, struct archive_entry *entry,
header_common(struct archive *a, struct tar *tar, struct archive_entry *entry,
struct stat *st, const void *h)
{
const struct archive_entry_header_ustar *header;
char tartype;
(void)a; /* UNUSED */
header = h;
if (header->linkname[0])
archive_strncpy(&(a->entry_linkname), header->linkname,
archive_strncpy(&(tar->entry_linkname), header->linkname,
sizeof(header->linkname));
else
archive_string_empty(&(a->entry_linkname));
archive_string_empty(&(tar->entry_linkname));
/* Parse out the numeric fields (all are octal) */
st->st_mode = tar_atol(header->mode, sizeof(header->mode));
@ -351,7 +542,7 @@ archive_header_common(struct archive *a, struct archive_entry *entry,
switch (tartype) {
case '1': /* Hard link */
archive_entry_set_hardlink(entry, a->entry_linkname.s);
archive_entry_set_hardlink(entry, tar->entry_linkname.s);
/*
* The following may seem odd, but: Technically, tar
* does not store the file type for a "hard link"
@ -367,7 +558,7 @@ archive_header_common(struct archive *a, struct archive_entry *entry,
case '2': /* Symlink */
st->st_mode |= S_IFLNK;
st->st_size = 0;
archive_entry_set_symlink(entry, a->entry_linkname.s);
archive_entry_set_symlink(entry, tar->entry_linkname.s);
break;
case '3': /* Character device */
st->st_mode |= S_IFCHR;
@ -397,24 +588,21 @@ archive_header_common(struct archive *a, struct archive_entry *entry,
}
/*
* Parse out header elements for "old-style" tar archives
* Parse out header elements for "old-style" tar archives.
*/
static int
archive_header_old_tar(struct archive *a, struct archive_entry *entry,
header_old_tar(struct archive *a, struct tar *tar, struct archive_entry *entry,
struct stat *st, const void *h)
{
const struct archive_entry_header_ustar *header;
a->archive_format = ARCHIVE_FORMAT_TAR;
a->archive_format_name = "tar (non-POSIX)";
/* Copy filename over (to ensure null termination). */
header = h;
archive_strncpy(&(a->entry_name), header->name, sizeof(header->name));
archive_entry_set_pathname(entry, a->entry_name.s);
archive_strncpy(&(tar->entry_name), header->name, sizeof(header->name));
archive_entry_set_pathname(entry, tar->entry_name.s);
/* Grab rest of common fields */
archive_header_common(a, entry, st, h);
header_common(a, tar, entry, st, h);
/*
* TODO: Decide whether the following special handling
@ -423,7 +611,7 @@ archive_header_old_tar(struct archive *a, struct archive_entry *entry,
/* "Regular" entry with trailing '/' is really directory. */
if (S_ISREG(st->st_mode) &&
'/' == a->entry_name.s[strlen(a->entry_name.s) - 1]) {
'/' == tar->entry_name.s[strlen(tar->entry_name.s) - 1]) {
st->st_mode &= ~S_IFMT;
st->st_mode |= S_IFDIR;
archive_entry_set_tartype(entry, '5');
@ -434,92 +622,29 @@ archive_header_old_tar(struct archive *a, struct archive_entry *entry,
return (0);
}
/*
* Parse a file header for a pax extended archive entry.
*/
static int
archive_header_pax_global(struct archive *a, struct archive_entry *entry,
struct stat *st, const void *h)
header_pax_global(struct archive *a, struct tar *tar,
struct archive_entry *entry, struct stat *st, const void *h)
{
uint64_t extension_size;
size_t bytes;
int err;
char *global;
const struct archive_entry_header_ustar *header;
int err, err2;
header = h;
extension_size = tar_atol(header->size, sizeof(header->size));
a->entry_bytes_remaining = extension_size;
a->entry_padding = 0x1ff & (-a->entry_bytes_remaining);
global = malloc(extension_size + 1);
archive_read_data_into_buffer(a, global, extension_size);
global[extension_size] = 0;
/*
* TODO: Store the global default options somewhere for future use.
* For now, just free the buffer and keep going.
*/
free(global);
/* Skip the padding. */
archive_read_data_skip(a);
/* Read the next header. */
bytes = (a->compression_read_ahead)(a, &h, 512);
if (bytes < 512) {
/* TODO: Set error values. */
return (-1);
}
(a->compression_read_consume)(a, 512);
header = h;
switch(header->typeflag[0]) {
case 'x':
case 'X':
err = archive_header_pax_extensions(a, entry, st, h);
break;
default:
err = archive_header_ustar(a, entry, st, h);
}
return (err);
err = read_body_to_string(a, &(tar->pax_global), h);
err2 = tar_read_header(a, tar, entry, st);
return (err_combine(err, err2));
}
static int
archive_header_pax_extensions(struct archive *a,
header_pax_extensions(struct archive *a, struct tar *tar,
struct archive_entry *entry, struct stat *st, const void *h)
{
uint64_t extension_size;
size_t bytes;
int err;
const struct archive_entry_header_ustar *header;
int oldstate;
read_body_to_string(a, &(tar->pax_header), h);
int err, err2;
header = h;
extension_size = tar_atol(header->size, sizeof(header->size));
a->entry_bytes_remaining = extension_size;
a->entry_padding = 0x1ff & (-a->entry_bytes_remaining);
archive_string_ensure(&(a->pax_header), extension_size + 1);
oldstate = a->state;
a->state = ARCHIVE_STATE_DATA;
archive_read_data_into_buffer(a, a->pax_header.s, extension_size);
a->pax_header.s[extension_size] = 0;
archive_read_data_skip(a); /* Skip any padding. */
a->state = oldstate;
/* Read the next header. */
bytes = (a->compression_read_ahead)(a, &h, 512);
if (bytes < 512) {
/* TODO: Set error values */
return (-1);
}
(a->compression_read_consume)(a, 512);
/* Must be a regular POSIX ustar entry. */
err = archive_header_ustar(a, entry, st, h);
/* Parse the next header. */
err = tar_read_header(a, tar, entry, st);
/*
* TODO: Parse global/default options into 'entry' struct here
@ -531,7 +656,8 @@ archive_header_pax_extensions(struct archive *a,
* and then skip any fields in the standard header that were
* defined in the pax header.
*/
pax_header(a, entry, st, a->pax_header.s, extension_size);
err2 = pax_header(a, tar, entry, st, tar->pax_header.s);
err = err_combine(err, err2);
a->entry_bytes_remaining = st->st_size;
a->entry_padding = 0x1ff & (-a->entry_bytes_remaining);
return (err);
@ -543,7 +669,7 @@ archive_header_pax_extensions(struct archive *a,
* handles "pax" or "extended ustar" entries.
*/
static int
archive_header_ustar(struct archive *a, struct archive_entry *entry,
header_ustar(struct archive *a, struct tar *tar, struct archive_entry *entry,
struct stat *st, const void *h)
{
const struct archive_entry_header_ustar *header;
@ -552,28 +678,28 @@ archive_header_ustar(struct archive *a, struct archive_entry *entry,
/* Copy name into an internal buffer to ensure null-termination. */
if (header->prefix[0]) {
archive_strncpy(&(a->entry_name), header->prefix,
archive_strncpy(&(tar->entry_name), header->prefix,
sizeof(header->prefix));
archive_strappend_char(&(a->entry_name), '/');
archive_strncat(&(a->entry_name), header->name,
archive_strappend_char(&(tar->entry_name), '/');
archive_strncat(&(tar->entry_name), header->name,
sizeof(header->name));
} else
archive_strncpy(&(a->entry_name), header->name,
archive_strncpy(&(tar->entry_name), header->name,
sizeof(header->name));
archive_entry_set_pathname(entry, a->entry_name.s);
archive_entry_set_pathname(entry, tar->entry_name.s);
/* Handle rest of common fields. */
archive_header_common(a, entry, st, h);
header_common(a, tar, entry, st, h);
/* Handle POSIX ustar fields. */
archive_strncpy(&(a->entry_uname), header->uname,
archive_strncpy(&(tar->entry_uname), header->uname,
sizeof(header->uname));
archive_entry_set_uname(entry, a->entry_uname.s);
archive_entry_set_uname(entry, tar->entry_uname.s);
archive_strncpy(&(a->entry_gname), header->gname,
archive_strncpy(&(tar->entry_gname), header->gname,
sizeof(header->gname));
archive_entry_set_gname(entry, a->entry_gname.s);
archive_entry_set_gname(entry, tar->entry_gname.s);
/* Parse out device numbers only for char and block specials. */
if (header->typeflag[0] == '3' || header->typeflag[0] == '4') {
@ -595,13 +721,16 @@ archive_header_ustar(struct archive *a, struct archive_entry *entry,
* Returns non-zero if there's an error in the data.
*/
static int
pax_header(struct archive *a, struct archive_entry *entry, struct stat *st,
char *attr, uint64_t attr_length)
pax_header(struct archive *a, struct tar *tar, struct archive_entry *entry,
struct stat *st, char *attr)
{
uint64_t l;
uint64_t line_length;
char *line, *key, *p, *value;
size_t attr_length, l, line_length;
char *line, *p;
wchar_t *key, *wp, *value;
int err, err2;
attr_length = strlen(attr);
err = ARCHIVE_OK;
while (attr_length > 0) {
/* Parse decimal length field at start of line. */
line_length = 0;
@ -626,36 +755,50 @@ pax_header(struct archive *a, struct archive_entry *entry, struct stat *st,
if (line_length > attr_length)
return (0);
/* Ensure pax_entry buffer is big enough. */
if (tar->pax_entry_length <= line_length) {
if (tar->pax_entry_length <= 0)
tar->pax_entry_length = 256;
while (tar->pax_entry_length <= line_length + 1)
tar->pax_entry_length *= 2;
/* XXX Error handling here */
tar->pax_entry = realloc(tar->pax_entry,
tar->pax_entry_length * sizeof(wchar_t));
}
/* Decode UTF-8 to wchar_t, null-terminate result. */
if (utf8_decode(tar->pax_entry, p,
line_length - (p - attr) - 1)) {
archive_set_error(a, ARCHIVE_ERRNO_MISC,
"Invalid UTF8 character in pax extended attribute");
err = err_combine(err, ARCHIVE_WARN);
}
/* Null-terminate 'key' value. */
/* XXX TODO: 'key' is officially UTF-8; should
* decode UTF-8 key to wchar here, then do
* all wchar matching below. XXX */
key = p;
p = strchr(key, '=');
if (p == NULL)
return (0);
if (p > line + line_length)
return (-1);
*p = 0;
if (strlen(key) < 1)
key = tar->pax_entry;
if (key[0] == L'=')
return (-1);
wp = wcschr(key, L'=');
if (wp == NULL) {
archive_set_error(a, ARCHIVE_ERRNO_MISC,
"Invalid pax extended attributes");
return (ARCHIVE_WARN);
}
*wp = 0;
/* Null-terminate 'value' portion. */
/* XXX need to decode UTF-8 value, make everything
* else wchar-clean. */
/* XXX should use pointer/length so that NULLs can
* appear within the value portion. <sigh> */
value = p + 1;
line[line_length - 1] = 0;
/* Identify null-terminated 'value' portion. */
value = wp + 1;
if (pax_attribute(a, entry, st, key, value))
return (-1);
/* Identify this attribute and set it in the entry. */
err2 = pax_attribute(entry, st, key, value);
err = err_combine(err, err2);
/* Skip to next line */
attr += line_length;
attr_length -= line_length;
}
return (0);
return (err);
}
@ -674,12 +817,9 @@ pax_header(struct archive *a, struct archive_entry *entry, struct stat *st,
* any of them look useful.
*/
static int
pax_attribute(struct archive *a, struct archive_entry *entry, struct stat *st,
char *key, char *value)
pax_attribute(struct archive_entry *entry, struct stat *st,
wchar_t *key, wchar_t *value)
{
(void)a; /* UNUSED */
switch (key[0]) {
case 'L':
/* Our extensions */
@ -691,71 +831,71 @@ pax_attribute(struct archive *a, struct archive_entry *entry, struct stat *st,
break;
case 'S':
/* We support some keys used by the "star" archiver */
if (strcmp(key, "SCHILY.acl.access")==0)
archive_entry_set_acl(entry, value);
else if (strcmp(key, "SCHILY.acl.default")==0)
archive_entry_set_acl_default(entry, value);
else if (strcmp(key, "SCHILY.devmajor")==0)
st->st_rdev = makedev(tar_atol10(value, strlen(value)),
if (wcscmp(key, L"SCHILY.acl.access")==0)
archive_entry_copy_acl_w(entry, value);
else if (wcscmp(key, L"SCHILY.acl.default")==0)
archive_entry_copy_acl_default_w(entry, value);
else if (wcscmp(key, L"SCHILY.devmajor")==0)
st->st_rdev = makedev(tar_atol10(value, wcslen(value)),
minor(st->st_dev));
else if (strcmp(key, "SCHILY.devminor")==0)
else if (wcscmp(key, L"SCHILY.devminor")==0)
st->st_rdev = makedev(major(st->st_dev),
tar_atol10(value, strlen(value)));
else if (strcmp(key, "SCHILY.fflags")==0)
archive_entry_set_fflags(entry, value);
else if (strcmp(key, "SCHILY.nlink")==0)
st->st_nlink = tar_atol10(value, strlen(value));
tar_atol10(value, wcslen(value)));
else if (wcscmp(key, L"SCHILY.fflags")==0)
archive_entry_copy_fflags_w(entry, value);
else if (wcscmp(key, L"SCHILY.nlink")==0)
st->st_nlink = tar_atol10(value, wcslen(value));
break;
case 'a':
if (strcmp(key, "atime")==0)
if (wcscmp(key, L"atime")==0)
pax_time(value, &(st->st_atimespec));
break;
case 'c':
if (strcmp(key, "ctime")==0)
if (wcscmp(key, L"ctime")==0)
pax_time(value, &(st->st_ctimespec));
else if (strcmp(key, "charset")==0) {
else if (wcscmp(key, L"charset")==0) {
/* TODO: Publish charset information in entry. */
} else if (strcmp(key, "comment")==0) {
} else if (wcscmp(key, L"comment")==0) {
/* TODO: Publish comment in entry. */
}
break;
case 'g':
if (strcmp(key, "gid")==0)
st->st_gid = tar_atol10(value, strlen(value));
else if (strcmp(key, "gname")==0)
archive_entry_set_gname(entry, value);
if (wcscmp(key, L"gid")==0)
st->st_gid = tar_atol10(value, wcslen(value));
else if (wcscmp(key, L"gname")==0)
archive_entry_copy_gname_w(entry, value);
break;
case 'l':
/* pax interchange doesn't distinguish hardlink vs. symlink. */
if (strcmp(key, "linkpath")==0) {
if (wcscmp(key, L"linkpath")==0) {
if (archive_entry_hardlink(entry))
archive_entry_set_hardlink(entry, value);
archive_entry_copy_hardlink_w(entry, value);
else
archive_entry_set_symlink(entry, value);
archive_entry_copy_symlink_w(entry, value);
}
break;
case 'm':
if (strcmp(key, "mtime")==0)
if (wcscmp(key, L"mtime")==0)
pax_time(value, &(st->st_mtimespec));
break;
case 'p':
if (strcmp(key, "path")==0)
archive_entry_set_pathname(entry, value);
if (wcscmp(key, L"path")==0)
archive_entry_copy_pathname_w(entry, value);
break;
case 'r':
/* POSIX has reserved 'realtime.*' */
break;
case 's':
/* POSIX has reserved 'security.*' */
/* Someday: if (strcmp(key, "security.acl")==0) { ... } */
if (strcmp(key, "size")==0)
st->st_size = tar_atol10(value, strlen(value));
/* Someday: if (wcscmp(key, L"security.acl")==0) { ... } */
if (wcscmp(key, L"size")==0)
st->st_size = tar_atol10(value, wcslen(value));
break;
case 'u':
if (strcmp(key, "uid")==0)
st->st_uid = tar_atol10(value, strlen(value));
else if (strcmp(key, "uname")==0)
archive_entry_set_uname(entry, value);
if (wcscmp(key, L"uid")==0)
st->st_uid = tar_atol10(value, wcslen(value));
else if (wcscmp(key, L"uname")==0)
archive_entry_copy_uname_w(entry, value);
break;
}
return (0);
@ -767,7 +907,7 @@ pax_attribute(struct archive *a, struct archive_entry *entry, struct stat *st,
* parse a decimal time value, which may include a fractional portion
*/
static void
pax_time(const char *p, struct timespec *t)
pax_time(const wchar_t *p, struct timespec *t)
{
char digit;
int64_t s;
@ -880,7 +1020,7 @@ tar_atol8(const char *p, unsigned char_cnt)
* it does obey locale.
*/
static int64_t
tar_atol10(const char *p, unsigned char_cnt)
tar_atol10(const wchar_t *p, unsigned char_cnt)
{
int64_t l;
int digit, sign;
@ -936,3 +1076,123 @@ tar_atol256(const char *p, unsigned char_cnt)
}
return (l);
}
static int
utf8_decode(wchar_t *dest, const char *src, size_t length)
{
size_t n;
int err;
err = 0;
while(length > 0) {
n = UTF8_mbrtowc(dest, src, length, NULL);
if (n == 0)
break;
if (n > 8) {
/* Invalid byte encountered; try to keep going. */
*dest = L'?';
n = 1;
err = 1;
}
dest++;
src += n;
length -= n;
}
*dest++ = L'\0';
return (err);
}
/*
* Copied from FreeBSD libc/locale.
*/
static size_t
UTF8_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
mbstate_t * __restrict ps __unused)
{
int ch, i, len, mask;
wchar_t lbound, wch;
if (s == NULL)
/* Reset to initial shift state (no-op) */
return (0);
if (n == 0)
/* Incomplete multibyte sequence */
return ((size_t)-2);
/*
* Determine the number of octets that make up this character from
* the first octet, and a mask that extracts the interesting bits of
* the first octet.
*
* We also specify a lower bound for the character code to detect
* redundant, non-"shortest form" encodings. For example, the
* sequence C0 80 is _not_ a legal representation of the null
* character. This enforces a 1-to-1 mapping between character
* codes and their multibyte representations.
*/
ch = (unsigned char)*s;
if ((ch & 0x80) == 0) {
mask = 0x7f;
len = 1;
lbound = 0;
} else if ((ch & 0xe0) == 0xc0) {
mask = 0x1f;
len = 2;
lbound = 0x80;
} else if ((ch & 0xf0) == 0xe0) {
mask = 0x0f;
len = 3;
lbound = 0x800;
} else if ((ch & 0xf8) == 0xf0) {
mask = 0x07;
len = 4;
lbound = 0x10000;
} else if ((ch & 0xfc) == 0xf8) {
mask = 0x03;
len = 5;
lbound = 0x200000;
} else if ((ch & 0xfc) == 0xfc) {
mask = 0x01;
len = 6;
lbound = 0x4000000;
} else {
/*
* Malformed input; input is not UTF-8.
*/
errno = EILSEQ;
return ((size_t)-1);
}
if (n < (size_t)len)
/* Incomplete multibyte sequence */
return ((size_t)-2);
/*
* Decode the octet sequence representing the character in chunks
* of 6 bits, most significant first.
*/
wch = (unsigned char)*s++ & mask;
i = len;
while (--i != 0) {
if ((*s & 0xc0) != 0x80) {
/*
* Malformed input; bad characters in the middle
* of a character.
*/
errno = EILSEQ;
return ((size_t)-1);
}
wch <<= 6;
wch |= *s++ & 0x3f;
}
if (wch < lbound) {
/*
* Malformed input; redundant encoding.
*/
errno = EILSEQ;
return ((size_t)-1);
}
if (pwc != NULL)
*pwc = wch;
return (wch == L'\0' ? 0 : len);
}

View File

@ -97,50 +97,3 @@ __archive_strappend_char(struct archive_string *as, char c)
{
return (__archive_string_append(as, &c, 1));
}
#if 0
/* Append Unicode character to string using UTF8 encoding */
struct archive_string *
__archive_strappend_char_UTF8(struct archive_string *as, int c)
{
char buff[6];
if (c <= 0x7f) {
buff[0] = c;
return (__archive_string_append(as, buff, 1));
} else if (c <= 0x7ff) {
buff[0] = 0xc0 | (c >> 6);
buff[1] = 0x80 | (c & 0x3f);
return (__archive_string_append(as, buff, 2));
} else if (c <= 0xffff) {
buff[0] = 0xe0 | (c >> 12);
buff[1] = 0x80 | ((c >> 6) & 0x3f);
buff[2] = 0x80 | (c & 0x3f);
return (__archive_string_append(as, buff, 3));
} else if (c <= 0x1fffff) {
buff[0] = 0xf0 | (c >> 18);
buff[1] = 0x80 | ((c >> 12) & 0x3f);
buff[2] = 0x80 | ((c >> 6) & 0x3f);
buff[3] = 0x80 | (c & 0x3f);
return (__archive_string_append(as, buff, 4));
} else if (c <= 0x3ffffff) {
buff[0] = 0xf8 | (c >> 24);
buff[1] = 0x80 | ((c >> 18) & 0x3f);
buff[2] = 0x80 | ((c >> 12) & 0x3f);
buff[3] = 0x80 | ((c >> 6) & 0x3f);
buff[4] = 0x80 | (c & 0x3f);
return (__archive_string_append(as, buff, 5));
} else if (c <= 0x7fffffff) {
buff[0] = 0xfc | (c >> 30);
buff[1] = 0x80 | ((c >> 24) & 0x3f);
buff[1] = 0x80 | ((c >> 18) & 0x3f);
buff[2] = 0x80 | ((c >> 12) & 0x3f);
buff[3] = 0x80 | ((c >> 6) & 0x3f);
buff[4] = 0x80 | (c & 0x3f);
return (__archive_string_append(as, buff, 6));
} else {
/* TODO: Handle this error?? */
return (as);
}
}
#endif

View File

@ -50,14 +50,15 @@ struct archive_string {
size_t buffer_length; /* Length of malloc-ed storage */
};
#define EMPTY_ARCHIVE_STRING {0,0,0}
/* Initialize an archive_string object on the stack or elsewhere. */
#define archive_string_init(a) \
do { (a)->s = NULL; (a)->length = 0; (a)->buffer_length = 0; } while(0)
/* Append a C char to an archive_string, resizing as necessary. */
struct archive_string *
__archive_strappend_char(struct archive_string *, char);
#define archive_strappend_char __archive_strappend_char
/* Append a char to an archive_string using UTF8. */
struct archive_string *
__archive_strappend_char_UTF8(struct archive_string *, int);
@ -86,7 +87,7 @@ __archive_strncat(struct archive_string *, const char *, size_t);
/* Copy a C string to an archive_string with limit, resizing as necessary. */
#define archive_strncpy(as,p,l) \
((as)->length=0,archive_strncat((as), (p), (l)))
((as)->length=0, archive_strncat((as), (p), (l)))
/* Return length of string. */
#define archive_strlen(a) ((a)->length)

View File

@ -35,7 +35,6 @@ __FBSDID("$FreeBSD$");
* needlessly bloating statically-linked clients.
*/
#include <sys/errno.h>
#include <sys/wait.h>
#ifdef HAVE_DMALLOC
#include <dmalloc.h>
@ -169,18 +168,6 @@ archive_write_finish(struct archive *a)
/* Release various dynamic buffers. */
free((void *)(uintptr_t)(const void *)a->nulls);
if (a->entry_name.s != NULL)
free(a->entry_name.s);
if (a->entry_linkname.s != NULL)
free(a->entry_linkname.s);
if (a->entry_uname.s != NULL)
free(a->entry_uname.s);
if (a->entry_gname.s != NULL)
free(a->entry_gname.s);
if (a->gnu_name.s != NULL)
free(a->gnu_name.s);
if (a->gnu_linkname.s != NULL)
free(a->gnu_linkname.s);
if (a->extract_mkdirpath.s != NULL)
free(a->extract_mkdirpath.s);
free(a);

View File

@ -41,7 +41,7 @@ __FBSDID("$FreeBSD$");
#include "archive_private.h"
struct write_file_data {
intmax_t offset;
off_t offset;
int fd;
char filename[1];
};

View File

@ -41,7 +41,7 @@ __FBSDID("$FreeBSD$");
#include "archive_private.h"
struct write_file_data {
intmax_t offset;
off_t offset;
int fd;
char filename[1];
};

View File

@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$");
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <wchar.h>
#include "archive.h"
#include "archive_entry.h"
@ -54,6 +55,8 @@ static void add_pax_attr_int(struct archive_string *,
static void add_pax_attr_time(struct archive_string *,
const char *key, int64_t sec,
unsigned long nanos);
static void add_pax_attr_w(struct archive_string *,
const char *key, const wchar_t *wvalue);
static int archive_write_pax_data(struct archive *,
const void *, size_t);
static int archive_write_pax_finish(struct archive *);
@ -183,6 +186,73 @@ add_pax_attr_int(struct archive_string *as, const char *key, int64_t value)
add_pax_attr(as, key, format_int(tmp + sizeof(tmp) - 1, value));
}
static void
add_pax_attr_w(struct archive_string *as, const char *key, const wchar_t *wval)
{
int utf8len;
const wchar_t *wp;
wchar_t wc;
char *utf8_value, *p;
utf8len = 0;
for (wp = wval; *wp != L'\0'; ) {
wc = *wp++;
if (wc <= 0x7f)
utf8len++;
else if (wc <= 0x7ff)
utf8len += 2;
else if (wc <= 0xffff)
utf8len += 3;
else if (wc <= 0x1fffff)
utf8len += 4;
else if (wc <= 0x3ffffff)
utf8len += 5;
else
utf8len += 6;
}
utf8_value = malloc(utf8len + 1);
for (wp = wval, p = utf8_value; *wp != L'\0'; ) {
wc = *wp++;
if (wc <= 0x7f) {
*p++ = (char)wc;
} else if (wc <= 0x7ff) {
p[0] = 0xc0 | ((wc >> 6) & 0x1f);
p[1] = 0x80 | (wc & 0x3f);
p += 2;
} else if (wc <= 0xffff) {
p[0] = 0xe0 | ((wc >> 12) & 0x0f);
p[1] = 0x80 | ((wc >> 6) & 0x3f);
p[2] = 0x80 | (wc & 0x3f);
p += 3;
} else if (wc <= 0x1fffff) {
p[0] = 0xf0 | ((wc >> 18) & 0x07);
p[1] = 0x80 | ((wc >> 12) & 0x3f);
p[2] = 0x80 | ((wc >> 6) & 0x3f);
p[3] = 0x80 | (wc & 0x3f);
p += 4;
} else if (wc <= 0x3ffffff) {
p[0] = 0xf8 | ((wc >> 24) & 0x03);
p[1] = 0x80 | ((wc >> 18) & 0x3f);
p[2] = 0x80 | ((wc >> 12) & 0x3f);
p[3] = 0x80 | ((wc >> 6) & 0x3f);
p[4] = 0x80 | (wc & 0x3f);
p += 5;
} else if (wc <= 0x7fffffff) {
p[0] = 0xfc | ((wc >> 30) & 0x01);
p[1] = 0x80 | ((wc >> 24) & 0x3f);
p[1] = 0x80 | ((wc >> 18) & 0x3f);
p[2] = 0x80 | ((wc >> 12) & 0x3f);
p[3] = 0x80 | ((wc >> 6) & 0x3f);
p[4] = 0x80 | (wc & 0x3f);
p += 6;
}
}
add_pax_attr(as, key, utf8_value);
free(utf8_value);
}
/*
* Add a key/value attribute to the pax header. This function handles
* the length field and various other syntactic requirements.
@ -243,16 +313,18 @@ archive_write_pax_header(struct archive *a,
struct archive_entry *entry_original)
{
struct archive_entry *entry_main;
const char *linkname, *name_start, *p;
const char *linkname, *p;
const wchar_t *wp, *wp2, *wname_start;
int need_extension, oldstate, r, ret;
struct pax *pax;
const struct stat *st_main, *st_original;
struct archive_string pax_entry_name = EMPTY_ARCHIVE_STRING;
struct archive_string pax_entry_name;
char paxbuff[512];
char ustarbuff[512];
char ustar_entry_name[256];
archive_string_init(&pax_entry_name);
need_extension = 0;
pax = a->format_data;
pax->written = 1;
@ -281,7 +353,7 @@ archive_write_pax_header(struct archive *a,
}
/* Copy entry so we can modify it as needed. */
entry_main = archive_entry_dup(entry_original);
entry_main = archive_entry_clone(entry_original);
archive_string_empty(&(pax->pax_header)); /* Blank our work area. */
st_main = archive_entry_stat(entry_main);
@ -291,16 +363,26 @@ archive_write_pax_header(struct archive *a,
* 'prefix' fields. Here, I pick out the longest possible
* suffix, then test whether the remaining prefix is too long.
*/
wp = archive_entry_pathname_w(entry_main);
p = archive_entry_pathname(entry_main);
if (strlen(p) <= 100) /* Short enough for just 'name' field */
name_start = p; /* Record a zero-length prefix */
if (wcslen(wp) <= 100) /* Short enough for just 'name' field */
wname_start = wp; /* Record a zero-length prefix */
else
/* Find the largest suffix that fits in 'name' field. */
name_start = strchr(p + strlen(p) - 100 - 1, '/');
wname_start = wcschr(wp + wcslen(wp) - 100 - 1, '/');
/* If name is too long, add 'path' to pax extended attrs. */
if (name_start == NULL || name_start - p > 155) {
add_pax_attr(&(pax->pax_header), "path", p);
/* Find non-ASCII character, if any. */
wp2 = wp;
while (*wp2 != L'\0' && *wp2 < 128)
wp2++;
/*
* If name is too long, or has non-ASCII characters, add
* 'path' to pax extended attrs.
*/
if (wname_start == NULL || wname_start - wp > 155 ||
*wp2 != L'\0') {
add_pax_attr_w(&(pax->pax_header), "path", wp);
archive_entry_set_pathname(entry_main,
build_ustar_entry_name(ustar_entry_name, p));
need_extension = 1;

View File

@ -80,10 +80,10 @@ POSIX
.Dq pax interchange format
archives,
.It
POSIX octet-oriented cpio archives.
POSIX octet-oriented cpio archives,
.It
two different variants of shar archives.
.El
The default write format is the pax interchange
format.
Pax interchange format is an extension of the tar archive format that
eliminates essentially all of the limitations of historic tar formats
in a standard fashion that is supported
@ -91,6 +91,9 @@ by POSIX-compliant
.Xr pax 1
implementations on many systems as well as several newer implementations of
.Xr tar 1 .
Note that the default write format will suppress the pax extended
attributes for most entries; explicitly requesting pax format will
enable those attributes for all entries.
.Pp
The read and write APIs are accessed through the
.Fn archive_read_XXX
@ -238,10 +241,18 @@ variants have eliminated most restrictions on the length of textual fields.
Clients should not assume that filenames, link names, user names, or
group names are limited in length.
In particular, pax interchange format can easily accomodate pathnames
that exceed
in arbitrary character sets that exceed
.Va PATH_MAX .
.Sh RETURN VALUES
Most functions return zero on success, non-zero on error.
The return value indicates the general severity of the error, ranging
from
.Cm ARCHIVE_WARNING ,
which indicates a minor problem that should probably be reported
to the user, to
.Cm ARCHIVE_FATAL ,
which indicates a serious problem that will prevent any further
operations on this archive.
On error, the
.Fn archive_errno
function can be used to retrieve a numeric error code (see
@ -257,21 +268,6 @@ return pointers to an allocated and initialized
.Tn struct archive
object.
.Pp
.Fn archive_read_next_header
returns a pointer to an
.Tn struct archive_entry
structure or
.Dv NULL .
If
.Dv NULL
is returned, the value from
.Fn archive_errno
will be zero if the end of the archive was reached,
-1 if there was a recoverable error reading the archive,
or positive if there was a non-recoverable error reading the archive.
If there was a recoverable error, the client should retry the
operation.
.Pp
.Fn archive_read_data
and
.Fn archive_write_data
@ -284,6 +280,9 @@ and
functions can be used to obtain more information.
.Sh ENVIRONMENT
The library currently obeys no environment variables.
There are character set conversions within the
.Xr archive_entry
functions that are impacted by the currently-selected locale.
.Sh SEE ALSO
.Xr tar 1 ,
.Xr archive_entry 3 ,
@ -304,8 +303,8 @@ library was written by
.An Tim Kientzle Aq kientzle@acm.org .
.Sh BUGS
Some archive formats support information that is not supported by
.Tn struct archive_entry
and cannot therefore be archived or restored using this library.
.Tn struct archive_entry .
Such information cannot be fully archived or restored using this library.
This includes, for example, comments, character sets, sparse
file information, or the arbitrary key/value pairs that can appear in
pax interchange format archives.
@ -317,9 +316,7 @@ is supported by all formats.
For example, cpio formats do not support nanosecond timestamps;
old tar formats do not support large device numbers.
.Pp
The library does not have write support for pre-POSIX tar archives.
The library cannot write pre-POSIX tar archives.
The support for GNU tar format is incomplete.
.Pp
The library should obey the current locale and convert
UTF8 filenames stored by pax interchange format to and from the
currently-active character coding.
Support for ACLs is still evolving and subject to change.

View File

@ -78,13 +78,13 @@ The header record for an old-style
archive consists of the following:
.Bd -literal -offset indent
struct tarfile_header_old {
char name[100];
char mode[8];
char uid[8];
char gid[8];
char size[12];
char mtime[12];
char checksum[8];
char name[100];
char mode[8];
char uid[8];
char gid[8];
char size[12];
char mtime[12];
char checksum[8];
};
.Ed
The remaining bytes in the header record are filled with nulls.
@ -157,15 +157,15 @@ and
fields were added:
.Bd -literal -offset indent
struct tarfile_entry_common {
char name[100];
char mode[8];
char uid[8];
char gid[8];
char size[12];
char mtime[12];
char checksum[8];
char linktype[1];
char linkname[100];
char name[100];
char mode[8];
char uid[8];
char gid[8];
char size[12];
char mtime[12];
char checksum[8];
char linktype[1];
char linkname[100];
};
.Ed
.Pp
@ -196,22 +196,22 @@ It extends the format above
with new fields:
.Bd -literal -offset indent
struct tarfile_entry_posix {
char name[100];
char mode[8];
char uid[8];
char gid[8];
char size[12];
char mtime[12];
char checksum[8];
char typeflag[1];
char linkname[100];
char magic[6];
char version[2];
char uname[32];
char gname[32];
char devmajor[8];
char devminor[8];
char prefix[155];
char name[100];
char mode[8];
char uid[8];
char gid[8];
char size[12];
char mtime[12];
char checksum[8];
char typeflag[1];
char linkname[100];
char magic[6];
char version[2];
char uname[32];
char gname[32];
char devmajor[8];
char devminor[8];
char prefix[155];
};
.Ed
.Bl -tag -width indent
@ -426,32 +426,32 @@ more lenient POSIX-compliant readers can successfully extract most
GNU tar archives.
.Bd -literal -offset indent
struct tarfile_entry_gnu {
char name[100];
char mode[8];
char uid[8];
char gid[8];
char size[12];
char mtime[12];
char checksum[8];
char typeflag[1];
char linkname[100];
char magic[6];
char version[2];
char uname[32];
char gname[32];
char devmajor[8];
char devminor[8];
char atime[12];
char ctime[12];
char offset[12];
char longnames[4];
char unused[1];
struct {
char offset[12];
char numbytes[12];
} sparse[4];
char isextended[1];
char realsize[12];
char name[100];
char mode[8];
char uid[8];
char gid[8];
char size[12];
char mtime[12];
char checksum[8];
char typeflag[1];
char linkname[100];
char magic[6];
char version[2];
char uname[32];
char gname[32];
char devmajor[8];
char devminor[8];
char atime[12];
char ctime[12];
char offset[12];
char longnames[4];
char unused[1];
struct {
char offset[12];
char numbytes[12];
} sparse[4];
char isextended[1];
char realsize[12];
};
.Ed
.Bl -tag -width indent
@ -569,8 +569,42 @@ additional
records.
Each such record contains XXX more details needed XXX
.It Va realsize
A binary representation of the size, with a much larger range
A binary representation of the file's complete size, with a much larger range
than the POSIX file size.
In particular, with
.Cm M
type files, the current entry is only a portion of the file.
In that case, the POSIX size field will indicate the size of this
entry; the
.Va realsize
field will indicate the total size of the file.
.El
.Ss Solaris Tar
XXX More Details Needed XXX
.Pp
Solaris tar supports an
.Dq extended
format that is fundamentally similar to pax interchange format,
with the following differences:
.Bl -bullet -compact -width indent
.Li
Extended attributes are stored in an entry whose type is
.Cm X ,
not
.Cm x ,
as used by pax interchange format.
The detailed format of this entry appears to be the same
as detailed above for the
.Cm x
entry.
.Li
An additional
.Cm A
entry is used to store an ACL for the following regular entry.
The body of this entry contains a seven-digit octal number
(whose value is 01000000 plus the number of ACL entries)
followed by a zero byte, followed by the
textual ACL description.
.El
.Ss Other Extensions
One common extension, utilized by GNU tar, star, and other newer