diff --git a/lib/libarchive/Makefile b/lib/libarchive/Makefile index 8e13dcb6b6eb..f1ff70e5b92e 100644 --- a/lib/libarchive/Makefile +++ b/lib/libarchive/Makefile @@ -1,8 +1,14 @@ # Makefile for libarchive. # # $FreeBSD$ -# + LIB= archive +SHLIB_MAJOR= 1 + +# I'm not yet ready for a shared version of this library, as +# there are still a couple of API changes still in the works. +NOSHLIBS= 1 + SRCS= archive_check_magic.c \ archive_entry.c \ archive_read.c \ @@ -114,8 +120,8 @@ DEBUG_FLAGS+= -DDEBUG -g CFLAGS+= -DHAVE_DMALLOC -I/usr/local/include LDFLAGS+= -L/usr/local/lib -ldmalloc .endif +CFLAGS+= -O3 -# Should be WARNS=6, except that zlib.h is borked. -WARNS?= 3 +WARNS?= 6 .include diff --git a/lib/libarchive/Makefile.freebsd b/lib/libarchive/Makefile.freebsd index 8e13dcb6b6eb..f1ff70e5b92e 100644 --- a/lib/libarchive/Makefile.freebsd +++ b/lib/libarchive/Makefile.freebsd @@ -1,8 +1,14 @@ # Makefile for libarchive. # # $FreeBSD$ -# + LIB= archive +SHLIB_MAJOR= 1 + +# I'm not yet ready for a shared version of this library, as +# there are still a couple of API changes still in the works. +NOSHLIBS= 1 + SRCS= archive_check_magic.c \ archive_entry.c \ archive_read.c \ @@ -114,8 +120,8 @@ DEBUG_FLAGS+= -DDEBUG -g CFLAGS+= -DHAVE_DMALLOC -I/usr/local/include LDFLAGS+= -L/usr/local/lib -ldmalloc .endif +CFLAGS+= -O3 -# Should be WARNS=6, except that zlib.h is borked. -WARNS?= 3 +WARNS?= 6 .include diff --git a/lib/libarchive/archive.h b/lib/libarchive/archive.h index f0028b0c9dab..18a8bdb5e5fc 100644 --- a/lib/libarchive/archive.h +++ b/lib/libarchive/archive.h @@ -29,9 +29,8 @@ #ifndef ARCHIVE_H_INCLUDED #define ARCHIVE_H_INCLUDED -#include -#include -#include +#include /* For int64_t */ +#include /* For ssize_t and size_t */ #define ARCHIVE_BYTES_PER_RECORD 512 #define ARCHIVE_DEFAULT_BYTES_PER_BLOCK 10240 diff --git a/lib/libarchive/archive.h.in b/lib/libarchive/archive.h.in index f0028b0c9dab..18a8bdb5e5fc 100644 --- a/lib/libarchive/archive.h.in +++ b/lib/libarchive/archive.h.in @@ -29,9 +29,8 @@ #ifndef ARCHIVE_H_INCLUDED #define ARCHIVE_H_INCLUDED -#include -#include -#include +#include /* For int64_t */ +#include /* For ssize_t and size_t */ #define ARCHIVE_BYTES_PER_RECORD 512 #define ARCHIVE_DEFAULT_BYTES_PER_BLOCK 10240 diff --git a/lib/libarchive/archive_entry.3 b/lib/libarchive/archive_entry.3 index c247b237272d..0a978543d193 100644 --- a/lib/libarchive/archive_entry.3 +++ b/lib/libarchive/archive_entry.3 @@ -30,13 +30,20 @@ .Sh NAME .Nm archive_entry_clear .Nm archive_entry_clone +.Nm archive_entry_copy_gname_w +.Nm archive_entry_copy_hardlink_w +.Nm archive_entry_copy_pathname_w .Nm archive_entry_copy_stat -.Nm archive_entry_dup +.Nm archive_entry_copy_symlink_w +.Nm archive_entry_copy_uname_w .Nm archive_entry_free .Nm archive_entry_gname +.Nm archive_entry_gname_w .Nm archive_entry_hardlink +.Nm archive_entry_hardlink_w .Nm archive_entry_new .Nm archive_entry_pathname +.Nm archive_entry_pathname_w .Nm archive_entry_set_devmajor .Nm archive_entry_set_devminor .Nm archive_entry_set_gid @@ -51,8 +58,10 @@ .Nm archive_entry_size .Nm archive_entry_stat .Nm archive_entry_symlink +.Nm archive_entry_symlink_w .Nm archive_entry_tartype .Nm archive_entry_uname +.Nm archive_entry_uname_w .Nd functions for manipulating archive entry descriptions .Sh SYNOPSIS .In archive_entry.h @@ -61,19 +70,33 @@ .Ft struct archive_entry * .Fn archive_entry_clone "struct archive_entry *" .Ft void +.Fn archive_entry_copy_gname_w "struct archive_entry *" "const wchar_t *" +.Ft void +.Fn archive_entry_copy_hardlink_w "struct archive_entry *" "const wchar_t *" +.Ft void +.Fn archive_entry_copy_pathname_w "struct archive_entry *" "const wchar_t *" +.Ft void .Fn archive_entry_copy_stat "struct archive_entry *" "struct stat *" -.Ft struct archive_entry * -.Fn archive_entry_dup "struct archive_entry *" +.Ft void +.Fn archive_entry_copy_symlink_w "struct archive_entry *" "const wchar_t *" +.Ft void +.Fn archive_entry_copy_uname_w "struct archive_entry *" "const wchar_t *" .Ft void .Fn archive_entry_free "struct archive_entry *" .Ft const char * .Fn archive_entry_gname "struct archive_entry *" +.Ft const wchar_t * +.Fn archive_entry_gname_w "struct archive_entry *" .Ft const char * .Fn archive_entry_hardlink "struct archive_entry *" +.Ft const wchar_t * +.Fn archive_entry_hardlink_w "struct archive_entry *" .Ft struct archive_entry * .Fn archive_entry_new "void" .Ft const char * .Fn archive_entry_pathname "struct archive_entry *" +.Ft const wchar_t * +.Fn archive_entry_pathname_w "struct archive_entry *" .Ft void .Fn archive_entry_set_devmajor "struct archive_entry *" "dev_t" .Ft void @@ -102,24 +125,64 @@ .Fn archive_entry_stat "struct archive_entry *" .Ft const char * .Fn archive_entry_symlink "struct archive_entry *" +.Ft const wchar_t * +.Fn archive_entry_symlink_w "struct archive_entry *" .Ft int .Fn archive_entry_tartype "struct archive_entry *" .Ft const char * .Fn archive_entry_uname "struct archive_entry *" +.Ft const wchar_t * +.Fn archive_entry_uname_w "struct archive_entry *" .Sh DESCRIPTION These functions create and manipulate data objects that represent entries within an archive. You can think of a .Tn struct archive_entry -as a -.Tn struct stat -on steroids: it includes everything from +as a heavy-duty version of +.Tn struct stat : +it includes everything from .Tn struct stat plus associated pathname, textual group and user names, etc. These objects are used by .Xr libarchive 3 to represent the metadata associated with a particular entry in an archive. +.Pp +Most of the functions here set or read entries +in an object. Such functions have one of the +following forms: +.Bl -tag -compact -width indent +.It Fn archive_entry_set_XXXX +Stores the provided data in the object. +In particular, for strings, the pointer is stored, +not the referenced string. +.It Fn archive_entry_copy_XXXX +As above, except that the referenced data is copied +into the object. +.It Fn archive_entry_XXXX +Returns the specified data. +In the case of strings, a const-qualified pointer to +the string is returned. +.El +The string data can be accessed as wide character strings +(which are suffixed with +.Cm _w ) +or normal +.Va char +strings. +Note that these are different representations of the same +data: +For example, if you store a narrow string and read the corresponding +wide string, the object will transparently convert formats +using the current locale. +Similarly, if you store a wide string and then store a +narrow string for the same data, the previously-set wide string will +be discarded in favor of the new data. +.Pp +The remaining functions allocate, destroy, clear, and copy +.Va archive_entry +objects. +These functions are described below: .Bl -tag -compact -width indent .It Fn archive_entry_clear Erases the object, resetting all internal fields to the @@ -128,76 +191,20 @@ This is provided to allow you to quickly recycle objects without thrashing the heap. .It Fn archive_entry_clone A deep copy operation; all text fields are duplicated. -.It Fn archive_entry_copy_stat -Copies the contents of the provided -.Tn struct stat -into the -.Tn struct archive_entry -object. -.It Fn archive_entry_dup -A shallow copy; text fields are not duplicated. .It Fn archive_entry_free Releases the .Tn struct archive_entry object. -.It Fn archive_entry_gname -Returns a pointer to the textual group name. -.It Fn archive_entry_hardlink -If this function returns non-NULL, then this object represents -a hardlink to another filesystem object. -The contents contain the pathname of the object. .It Fn archive_entry_new Allocate and return a blank .Tn struct archive_entry object. -.It Fn archive_entry_pathname -Returns a pointer to the pathname. -.It Fn archive_entry_set_devmajor -Sets the device major number (only valid for objects representing -block and character devices). -.It Fn archive_entry_set_devminor -Sets the device minor number (only valid for objects representing -block and character devices). -.It Fn archive_entry_set_gid -Sets the group ID for the object. -.It Fn archive_entry_set_gname -Sets a pointer to the textual group name. -Note that the name itself is not copied. -.It Fn archive_entry_set_hardlink -Sets the hardlink property; see -.Fn archive_entry_hardlink -above. -.It Fn archive_entry_set_mode -Sets the file mode. -.It Fn archive_entry_set_pathname -Sets a pointer to the pathname. -Note that the pathname text is not copied. -.It Fn archive_entry_set_symlink -Sets a pointer to the contents of a symbolic link. -Note that the pathname text is not copied. .It Fn archive_entry_set_tartype Sets the value to be used in a tar-format header for this entry. Client code should generally not set this; if it is left unset, the library will automatically determine an appropriate value. -.It Fn archive_entry_set_uid -Set the user ID for the object. -.It Fn archive_entry_set_uname -Sets a pointer to the textual user name. -Note that the name itself is not copied. -.It Fn archive_entry_size -Returns the size of the object on disk in bytes. -.It Fn archive_entry_stat -Returns a pointer to a populated -.Tn struct stat . -.It Fn archive_entry_symlink -Returns a pointer to the symlink contents. -.It Fn archive_entry_tartype -Returns the value used in a tar-format header. -Not generally useful to clients. -.It Fn archive_entry_uname -Returns a pointer to the textual user name. .El .\" .Sh EXAMPLE .\" .Sh RETURN VALUES @@ -215,4 +222,4 @@ The .Nm libarchive library was written by .An Tim Kientzle Aq kientzle@acm.org . -.Sh BUGS +.\" .Sh BUGS diff --git a/lib/libarchive/archive_entry.c b/lib/libarchive/archive_entry.c index 268afe615e39..b9a86c1c090c 100644 --- a/lib/libarchive/archive_entry.c +++ b/lib/libarchive/archive_entry.c @@ -32,11 +32,34 @@ __FBSDID("$FreeBSD$"); #ifdef HAVE_DMALLOC #include #endif +#include #include #include +#include #include "archive_entry.h" +/* + * Handle wide character (i.e., Unicode) and non-wide character + * strings transparently. + * + */ + +struct aes { + const char *aes_mbs; + char *aes_mbs_alloc; + const wchar_t *aes_wcs; + wchar_t *aes_wcs_alloc; +}; + +void aes_clean(struct aes *); +void aes_copy(struct aes *dest, struct aes *src); +const char * aes_get_mbs(struct aes *); +const wchar_t * aes_get_wcs(struct aes *); +void aes_set_mbs(struct aes *, const char *mbs); +void aes_set_wcs(struct aes *, const wchar_t *wcs); +void aes_copy_wcs(struct aes *, const wchar_t *wcs); + /* * Description of an archive entry. * @@ -70,24 +93,144 @@ struct archive_entry { int ae_tartype; /* - * Note: If you add any more string fields, update - * archive_entry_clone accordingly. + * Use aes here so that we get transparent mbs<->wcs conversions. */ - const char *ae_acl; /* ACL text */ - const char *ae_acl_default; /* default ACL */ - const char *ae_fflags; /* Text fflags per fflagstostr(3) */ - const char *ae_gname; /* Name of owning group */ - const char *ae_hardlink; /* Name of target for hardlink */ - const char *ae_pathname; /* Name of entry */ - const char *ae_symlink; /* symlink contents */ - const char *ae_uname; /* Name of owner */ - - char buff[1]; /* MUST BE AT END OF STRUCT!!! */ + struct aes ae_acl; /* ACL text */ + struct aes ae_acl_default; /* default ACL */ + struct aes ae_fflags; /* Text fflags per fflagstostr(3) */ + struct aes ae_gname; /* Name of owning group */ + struct aes ae_hardlink; /* Name of target for hardlink */ + struct aes ae_pathname; /* Name of entry */ + struct aes ae_symlink; /* symlink contents */ + struct aes ae_uname; /* Name of owner */ }; +void +aes_clean(struct aes *aes) +{ + if (aes->aes_mbs_alloc) { + free(aes->aes_mbs_alloc); + aes->aes_mbs_alloc = NULL; + } + if (aes->aes_wcs_alloc) { + free(aes->aes_wcs_alloc); + aes->aes_wcs_alloc = NULL; + } + memset(aes, 0, sizeof(*aes)); +} + +void +aes_copy(struct aes *dest, struct aes *src) +{ + *dest = *src; + if (src->aes_mbs_alloc != NULL) { + dest->aes_mbs_alloc = strdup(src->aes_mbs_alloc); + dest->aes_mbs = dest->aes_mbs_alloc; + } + + if (src->aes_wcs_alloc != NULL) { + dest->aes_wcs_alloc = malloc((wcslen(src->aes_wcs_alloc) + 1) + * sizeof(wchar_t)); + dest->aes_wcs = dest->aes_wcs_alloc; + wcscpy(dest->aes_wcs_alloc, src->aes_wcs); + } +} + +const char * +aes_get_mbs(struct aes *aes) +{ + if (aes->aes_mbs == NULL && aes->aes_wcs != NULL) { + /* + * XXX Need to estimate the number of byte in the + * multi-byte form. Assume that, on average, wcs + * chars encode to no more than 3 bytes. There must + * be a better way... XXX + */ + int mbs_length = wcslen(aes->aes_wcs) * 3 + 64; + aes->aes_mbs_alloc = malloc(mbs_length); + aes->aes_mbs = aes->aes_mbs_alloc; + wcstombs(aes->aes_mbs_alloc, aes->aes_wcs, mbs_length - 1); + aes->aes_mbs_alloc[mbs_length - 1] = 0; + } + return (aes->aes_mbs); +} + +const wchar_t * +aes_get_wcs(struct aes *aes) +{ + if (aes->aes_wcs == NULL && aes->aes_mbs != NULL) { + /* + * No single byte will be more than one wide character, + * so this length estimate will always be big enough. + */ + int wcs_length = strlen(aes->aes_mbs); + aes->aes_wcs_alloc + = malloc((wcs_length + 1) * sizeof(wchar_t)); + aes->aes_wcs = aes->aes_wcs_alloc; + mbstowcs(aes->aes_wcs_alloc, aes->aes_mbs, wcs_length); + aes->aes_wcs_alloc[wcs_length] = 0; + } + return (aes->aes_wcs); +} + +void +aes_set_mbs(struct aes *aes, const char *mbs) +{ + if (aes->aes_mbs_alloc) { + free(aes->aes_mbs_alloc); + aes->aes_mbs_alloc = NULL; + } + if (aes->aes_wcs_alloc) { + free(aes->aes_wcs_alloc); + aes->aes_wcs_alloc = NULL; + } + aes->aes_mbs = mbs; + aes->aes_wcs = NULL; +} + +void +aes_set_wcs(struct aes *aes, const wchar_t *wcs) +{ + if (aes->aes_mbs_alloc) { + free(aes->aes_mbs_alloc); + aes->aes_mbs_alloc = NULL; + } + if (aes->aes_wcs_alloc) { + free(aes->aes_wcs_alloc); + aes->aes_wcs_alloc = NULL; + } + aes->aes_mbs = NULL; + aes->aes_wcs = wcs; +} + +void +aes_copy_wcs(struct aes *aes, const wchar_t *wcs) +{ + if (aes->aes_mbs_alloc) { + free(aes->aes_mbs_alloc); + aes->aes_mbs_alloc = NULL; + } + if (aes->aes_wcs_alloc) { + free(aes->aes_wcs_alloc); + aes->aes_wcs_alloc = NULL; + } + aes->aes_mbs = NULL; + aes->aes_wcs_alloc = malloc((wcslen(wcs) + 1) * sizeof(wchar_t)); + wcscpy(aes->aes_wcs_alloc, wcs); + aes->aes_wcs = aes->aes_wcs_alloc; +} + struct archive_entry * archive_entry_clear(struct archive_entry *entry) { + aes_clean(&entry->ae_acl); + aes_clean(&entry->ae_acl_default); + aes_clean(&entry->ae_fflags); + aes_clean(&entry->ae_gname); + aes_clean(&entry->ae_hardlink); + aes_clean(&entry->ae_pathname); + aes_clean(&entry->ae_symlink); + aes_clean(&entry->ae_uname); memset(entry, 0, sizeof(*entry)); entry->ae_tartype = -1; return entry; @@ -95,99 +238,30 @@ archive_entry_clear(struct archive_entry *entry) struct archive_entry * archive_entry_clone(struct archive_entry *entry) -{ - int size; - struct archive_entry *entry2; - char *p; - - size = sizeof(*entry2); - if (entry->ae_acl) - size += strlen(entry->ae_acl) + 1; - if (entry->ae_acl_default) - size += strlen(entry->ae_acl_default) + 1; - if (entry->ae_fflags) - size += strlen(entry->ae_fflags) + 1; - if (entry->ae_gname) - size += strlen(entry->ae_gname) + 1; - if (entry->ae_hardlink) - size += strlen(entry->ae_hardlink) + 1; - if (entry->ae_pathname) - size += strlen(entry->ae_pathname) + 1; - if (entry->ae_symlink) - size += strlen(entry->ae_symlink) + 1; - if (entry->ae_uname) - size += strlen(entry->ae_uname) + 1; - - entry2 = malloc(size); - *entry2 = *entry; - - /* Copy all of the strings from the original. */ - p = entry2->buff; - - if (entry->ae_acl) { - entry2->ae_acl = p; - strcpy(p, entry->ae_acl); - p += strlen(p) + 1; - } - - if (entry->ae_acl_default) { - entry2->ae_acl_default = p; - strcpy(p, entry->ae_acl_default); - p += strlen(p) + 1; - } - - if (entry->ae_fflags) { - entry2->ae_fflags = p; - strcpy(p, entry->ae_fflags); - p += strlen(p) + 1; - } - - if (entry->ae_gname) { - entry2->ae_gname = p; - strcpy(p, entry->ae_gname); - p += strlen(p) + 1; - } - - if (entry->ae_hardlink) { - entry2->ae_hardlink = p; - strcpy(p, entry->ae_hardlink); - p += strlen(p) + 1; - } - - if (entry->ae_pathname) { - entry2->ae_pathname = p; - strcpy(p, entry->ae_pathname); - p += strlen(p) + 1; - } - - if (entry->ae_symlink) { - entry2->ae_symlink = p; - strcpy(p, entry->ae_symlink); - p += strlen(p) + 1; - } - - if (entry->ae_uname) { - entry2->ae_uname = p; - strcpy(p, entry->ae_uname); - p += strlen(p) + 1; - } - - return (entry2); -} - -struct archive_entry * -archive_entry_dup(struct archive_entry *entry) { struct archive_entry *entry2; + /* Allocate new structure and copy over all of the fields. */ entry2 = malloc(sizeof(*entry2)); - *entry2 = *entry; + entry2->ae_stat = entry->ae_stat; + entry2->ae_tartype = entry->ae_tartype; + + aes_copy(&entry2->ae_acl ,&entry->ae_acl); + aes_copy(&entry2->ae_acl_default ,&entry->ae_acl_default); + aes_copy(&entry2->ae_fflags ,&entry->ae_fflags); + aes_copy(&entry2->ae_gname ,&entry->ae_gname); + aes_copy(&entry2->ae_hardlink ,&entry->ae_hardlink); + aes_copy(&entry2->ae_pathname, &entry->ae_pathname); + aes_copy(&entry2->ae_symlink ,&entry->ae_symlink); + aes_copy(&entry2->ae_uname ,&entry->ae_uname); + return (entry2); } void archive_entry_free(struct archive_entry *entry) { + archive_entry_clear(entry); free(entry); } @@ -199,11 +273,11 @@ archive_entry_new(void) entry = malloc(sizeof(*entry)); if(entry == NULL) return (NULL); - archive_entry_clear(entry); + memset(entry, 0, sizeof(*entry)); + entry->ae_tartype = -1; return (entry); } - /* * Functions for reading fields from an archive_entry. */ @@ -211,14 +285,14 @@ archive_entry_new(void) const char * archive_entry_acl(struct archive_entry *entry) { - return (entry->ae_acl); + return (aes_get_mbs(&entry->ae_acl)); } const char * archive_entry_acl_default(struct archive_entry *entry) { - return (entry->ae_acl_default); + return (aes_get_mbs(&entry->ae_acl_default)); } dev_t @@ -237,19 +311,19 @@ archive_entry_devminor(struct archive_entry *entry) const char * archive_entry_fflags(struct archive_entry *entry) { - return (entry->ae_fflags); + return (aes_get_mbs(&entry->ae_fflags)); } const char * archive_entry_gname(struct archive_entry *entry) { - return (entry->ae_gname); + return (aes_get_mbs(&entry->ae_gname)); } const char * archive_entry_hardlink(struct archive_entry *entry) { - return (entry->ae_hardlink); + return (aes_get_mbs(&entry->ae_hardlink)); } mode_t @@ -261,7 +335,13 @@ archive_entry_mode(struct archive_entry *entry) const char * archive_entry_pathname(struct archive_entry *entry) { - return (entry->ae_pathname); + return (aes_get_mbs(&entry->ae_pathname)); +} + +const wchar_t * +archive_entry_pathname_w(struct archive_entry *entry) +{ + return (aes_get_wcs(&entry->ae_pathname)); } int64_t @@ -279,7 +359,7 @@ archive_entry_stat(struct archive_entry *entry) const char * archive_entry_symlink(struct archive_entry *entry) { - return (entry->ae_symlink); + return (aes_get_mbs(&entry->ae_symlink)); } int @@ -291,7 +371,7 @@ archive_entry_tartype(struct archive_entry *entry) const char * archive_entry_uname(struct archive_entry *entry) { - return (entry->ae_uname); + return (aes_get_mbs(&entry->ae_uname)); } /* @@ -311,14 +391,25 @@ archive_entry_copy_stat(struct archive_entry *entry, const struct stat *st) void archive_entry_set_acl(struct archive_entry *entry, const char *acl) { - entry->ae_acl = acl; + aes_set_mbs(&entry->ae_acl, acl); } +void +archive_entry_copy_acl_w(struct archive_entry *entry, const wchar_t *acl) +{ + aes_copy_wcs(&entry->ae_acl, acl); +} void archive_entry_set_acl_default(struct archive_entry *entry, const char *acl) { - entry->ae_acl_default = acl; + aes_set_mbs(&entry->ae_acl_default, acl); +} + +void +archive_entry_copy_acl_default_w(struct archive_entry *entry, const wchar_t *acl) +{ + aes_copy_wcs(&entry->ae_acl_default, acl); } void @@ -342,7 +433,13 @@ archive_entry_set_devminor(struct archive_entry *entry, dev_t m) void archive_entry_set_fflags(struct archive_entry *entry, const char *flags) { - entry->ae_fflags = flags; + aes_set_mbs(&entry->ae_fflags, flags); +} + +void +archive_entry_copy_fflags_w(struct archive_entry *entry, const wchar_t *flags) +{ + aes_copy_wcs(&entry->ae_fflags, flags); } void @@ -354,13 +451,25 @@ archive_entry_set_gid(struct archive_entry *entry, gid_t g) void archive_entry_set_gname(struct archive_entry *entry, const char *name) { - entry->ae_gname = name; + aes_set_mbs(&entry->ae_gname, name); +} + +void +archive_entry_copy_gname_w(struct archive_entry *entry, const wchar_t *name) +{ + aes_copy_wcs(&entry->ae_gname, name); } void archive_entry_set_hardlink(struct archive_entry *entry, const char *target) { - entry->ae_hardlink = target; + aes_set_mbs(&entry->ae_hardlink, target); +} + +void +archive_entry_copy_hardlink_w(struct archive_entry *entry, const wchar_t *target) +{ + aes_copy_wcs(&entry->ae_hardlink, target); } void @@ -372,7 +481,13 @@ archive_entry_set_mode(struct archive_entry *entry, mode_t m) void archive_entry_set_pathname(struct archive_entry *entry, const char *name) { - entry->ae_pathname = name; + aes_set_mbs(&entry->ae_pathname, name); +} + +void +archive_entry_copy_pathname_w(struct archive_entry *entry, const wchar_t *name) +{ + aes_copy_wcs(&entry->ae_pathname, name); } void @@ -382,9 +497,15 @@ archive_entry_set_size(struct archive_entry *entry, int64_t s) } void -archive_entry_set_symlink(struct archive_entry *entry, const char *link) +archive_entry_set_symlink(struct archive_entry *entry, const char *linkname) { - entry->ae_symlink = link; + aes_set_mbs(&entry->ae_symlink, linkname); +} + +void +archive_entry_copy_symlink_w(struct archive_entry *entry, const wchar_t *linkname) +{ + aes_copy_wcs(&entry->ae_symlink, linkname); } void @@ -402,6 +523,26 @@ archive_entry_set_uid(struct archive_entry *entry, uid_t u) void archive_entry_set_uname(struct archive_entry *entry, const char *name) { - entry->ae_uname = name; + aes_set_mbs(&entry->ae_uname, name); } +void +archive_entry_copy_uname_w(struct archive_entry *entry, const wchar_t *name) +{ + aes_copy_wcs(&entry->ae_uname, name); +} + +#if TEST +int +main(int argc, char **argv) +{ + struct aes aes; + + memset(&aes, 0, sizeof(aes)); + aes_clean(&aes); + aes_set_mbs(&aes, "ÈÈÈabc"); + wprintf("%S\n", L"abcdef"); + wprintf("%S\n",aes_get_wcs(&aes)); + return (0); +} +#endif diff --git a/lib/libarchive/archive_entry.h b/lib/libarchive/archive_entry.h index e210fa73faed..db0241822bdf 100644 --- a/lib/libarchive/archive_entry.h +++ b/lib/libarchive/archive_entry.h @@ -29,9 +29,8 @@ #ifndef ARCHIVE_ENTRY_H_INCLUDED #define ARCHIVE_ENTRY_H_INCLUDED - -#include -#include +#include +#include /* * Description of an archive entry. @@ -60,8 +59,6 @@ struct archive_entry; struct archive_entry *archive_entry_clear(struct archive_entry *); /* The 'clone' function does a deep copy; all of the strings are copied too. */ struct archive_entry *archive_entry_clone(struct archive_entry *); -/* The 'dup' function does a shallow copy; referenced strings aren't copied. */ -struct archive_entry *archive_entry_dup(struct archive_entry *); void archive_entry_free(struct archive_entry *); struct archive_entry *archive_entry_new(void); @@ -78,6 +75,7 @@ const char *archive_entry_gname(struct archive_entry *); const char *archive_entry_hardlink(struct archive_entry *); mode_t archive_entry_mode(struct archive_entry *); const char *archive_entry_pathname(struct archive_entry *); +const wchar_t *archive_entry_pathname_w(struct archive_entry *); int64_t archive_entry_size(struct archive_entry *); const struct stat *archive_entry_stat(struct archive_entry *); const char *archive_entry_symlink(struct archive_entry *); @@ -93,19 +91,27 @@ const char *archive_entry_uname(struct archive_entry *); void archive_entry_copy_stat(struct archive_entry *, const struct stat *); void archive_entry_set_acl(struct archive_entry *, const char *); +void archive_entry_copy_acl_w(struct archive_entry *, const wchar_t *); void archive_entry_set_acl_default(struct archive_entry *, const char *); +void archive_entry_copy_acl_default_w(struct archive_entry *, const wchar_t *); void archive_entry_set_fflags(struct archive_entry *, const char *); +void archive_entry_copy_fflags_w(struct archive_entry *, const wchar_t *); void archive_entry_set_devmajor(struct archive_entry *, dev_t); void archive_entry_set_devminor(struct archive_entry *, dev_t); void archive_entry_set_gid(struct archive_entry *, gid_t); void archive_entry_set_gname(struct archive_entry *, const char *); +void archive_entry_copy_gname_w(struct archive_entry *, const wchar_t *); void archive_entry_set_hardlink(struct archive_entry *, const char *); +void archive_entry_copy_hardlink_w(struct archive_entry *, const wchar_t *); void archive_entry_set_mode(struct archive_entry *, mode_t); void archive_entry_set_pathname(struct archive_entry *, const char *); +void archive_entry_copy_pathname_w(struct archive_entry *, const wchar_t *); void archive_entry_set_size(struct archive_entry *, int64_t); void archive_entry_set_symlink(struct archive_entry *, const char *); +void archive_entry_copy_symlink_w(struct archive_entry *, const wchar_t *); void archive_entry_set_tartype(struct archive_entry *, char); void archive_entry_set_uid(struct archive_entry *, uid_t); void archive_entry_set_uname(struct archive_entry *, const char *); +void archive_entry_copy_uname_w(struct archive_entry *, const wchar_t *); #endif /* !ARCHIVE_ENTRY_H_INCLUDED */ diff --git a/lib/libarchive/archive_platform.h b/lib/libarchive/archive_platform.h index fca65a116bff..ee6c08e00914 100644 --- a/lib/libarchive/archive_platform.h +++ b/lib/libarchive/archive_platform.h @@ -38,14 +38,38 @@ /* FreeBSD-specific definitions. */ #ifdef __FreeBSD__ #include /* For __FBSDID */ +/* + * Note that SUSv3 says that inttypes.h includes stdint.h. + * Since inttypes.h predates stdint.h, it's safest to always + * use inttypes.h instead of stdint.h. + */ +#include /* For int64_t, etc. */ + #define HAVE_POSIX_ACL 1 #define HAVE_CHFLAGS 1 #define HAVE_LUTIMES 1 #define HAVE_LCHMOD 1 #define ARCHIVE_ERRNO_FILE_FORMAT EFTYPE -#define ARCHIVE_ERRNO_PROGRAMMER EDOOFUS +#define ARCHIVE_ERRNO_PROGRAMMER EINVAL #define ARCHIVE_ERRNO_MISC (-1) + +/* + * Older versions of inttypes.h don't have INT64_MAX, etc. Since + * SUSv3 requires them to be macros when they are defined, we can + * easily test for and define them here if necessary. + */ +#ifndef INT64_MAX +/* XXX Is this really necessary? XXX */ +#ifdef __i386__ +#define INT64_MAX 0x7fffffffffffffffLL +#define UINT64_MAX 0xffffffffffffffffULL +#else /* __alpha__ */ +#define INT64_MAX 0x7fffffffffffffffL +#define UINT64_MAX 0xffffffffffffffffUL #endif +#endif /* ! INT64_MAX */ + +#endif /* __FreeBSD__ */ /* No non-FreeBSD platform will have __FBSDID, so just define it here. */ #ifndef __FreeBSD__ @@ -54,6 +78,7 @@ /* Linux */ #ifdef LINUX +#include #define ARCHIVE_ERRNO_FILE_FORMAT EILSEQ #define ARCHIVE_ERRNO_PROGRAMMER EINVAL #define ARCHIVE_ERRNO_MISC (-1) diff --git a/lib/libarchive/archive_private.h b/lib/libarchive/archive_private.h index a42f23902f01..24e52f41bf6a 100644 --- a/lib/libarchive/archive_private.h +++ b/lib/libarchive/archive_private.h @@ -29,8 +29,6 @@ #ifndef ARCHIVE_PRIVATE_H_INCLUDED #define ARCHIVE_PRIVATE_H_INCLUDED -#include - #include "archive.h" #include "archive_string.h" @@ -56,17 +54,6 @@ struct archive { struct archive_entry *entry; - /* - * Space to store per-entry strings. Most header strings are - * copied here from the format-specific header, in order to - * gaurantee null-termination. Maybe these should go into - * per-format storage? - */ - struct archive_string entry_name; - struct archive_string entry_linkname; - struct archive_string entry_uname; - struct archive_string entry_gname; - /* Utility: Pointer to a block of nulls. */ const char *nulls; size_t null_length; @@ -76,8 +63,8 @@ struct archive { * will be able to register it's own read_data routine and these * will move into the per-format data for the formats that use them. */ - uint64_t entry_bytes_remaining; - uint64_t entry_padding; /* Skip this much after entry data. */ + off_t entry_bytes_remaining; + off_t entry_padding; /* Skip this much after entry data. */ uid_t user_uid; /* UID of current user. */ @@ -108,25 +95,10 @@ struct archive { int pad_uncompressed; int pad_uncompressed_byte; /* TODO: Support this. */ - /* - * PAX extended header data. When reading, - * name/linkname/uname/gname fields may point into here. This - * should be moved into per-format data storage. - */ - struct archive_string pax_header; - - /* - * GNU header fields. These should be moved into format-specific - * storage. - */ - struct archive_string gnu_name; - struct archive_string gnu_linkname; - int gnu_header_recursion_depth; - /* Position in UNCOMPRESSED data stream. */ - intmax_t file_position; + off_t file_position; /* File offset of beginning of most recently-read header. */ - intmax_t header_position; + off_t header_position; /* * Detection functions for decompression: bid functions are @@ -192,8 +164,8 @@ struct archive { * multiple format readers active at one time, so we need to * allow for multiple format readers to have their data * available. The pformat_data slot here is the solution: on - * read, it's set up in the bid phase and is gauranteed to - * always point to a void* variable that the format can use. + * read, it is gauranteed to always point to a void* variable + * that the format can use. */ void **pformat_data; /* Pointer to current format_data. */ void *format_data; /* Used by writers. */ @@ -255,4 +227,6 @@ int __archive_read_register_compression(struct archive *a, int (*bid)(const void *, size_t), int (*init)(struct archive *, const void *, size_t)); +#define err_combine(a,b) ((a) < (b) ? (a) : (b)) + #endif diff --git a/lib/libarchive/archive_read.c b/lib/libarchive/archive_read.c index e51ea02df5fc..a6da35a60fd8 100644 --- a/lib/libarchive/archive_read.c +++ b/lib/libarchive/archive_read.c @@ -331,8 +331,15 @@ archive_read_data(struct archive *a, void *buff, size_t s) ssize_t bytes_read; archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_DATA); - if (s > a->entry_bytes_remaining) - s = a->entry_bytes_remaining; + /* + * off_t is generally at least as wide as size_t, so widen for + * comparison and narrow for the assignment. Otherwise, on + * platforms with 32-bit size_t and 64-bit off_t, we won't be + * able to correctly read archives with entries larger than + * 4gig. + */ + if ((off_t)s > a->entry_bytes_remaining) + s = (size_t)a->entry_bytes_remaining; if (s > 0) { bytes_read = (a->compression_read_ahead)(a, &data, 1); if (bytes_read < 0) { @@ -424,20 +431,6 @@ archive_read_finish(struct archive *a) */ /* Casting a pointer to int allows us to remove 'const.' */ free((void *)(uintptr_t)(const void *)a->nulls); - if (a->entry_name.s != NULL) - free(a->entry_name.s); - if (a->entry_linkname.s != NULL) - free(a->entry_linkname.s); - if (a->entry_uname.s != NULL) - free(a->entry_uname.s); - if (a->entry_gname.s != NULL) - free(a->entry_gname.s); - if (a->pax_header.s != NULL) - free(a->pax_header.s); - if (a->gnu_name.s != NULL) - free(a->gnu_name.s); - if (a->gnu_linkname.s != NULL) - free(a->gnu_linkname.s); if (a->extract_mkdirpath.s != NULL) free(a->extract_mkdirpath.s); if (a->entry) diff --git a/lib/libarchive/archive_read_data_into_fd.c b/lib/libarchive/archive_read_data_into_fd.c index b8d620a74b9d..617d09b5d3e2 100644 --- a/lib/libarchive/archive_read_data_into_fd.c +++ b/lib/libarchive/archive_read_data_into_fd.c @@ -52,7 +52,7 @@ archive_read_data_into_fd(struct archive *a, int fd) a->entry_bytes_remaining); if (bytes_read < 0) return (-1); - if ((size_t)bytes_read > a->entry_bytes_remaining) + if (bytes_read > a->entry_bytes_remaining) bytes_read = (ssize_t)a->entry_bytes_remaining; bytes_written = write(fd, buff, bytes_read); diff --git a/lib/libarchive/archive_read_support_compression_gzip.c b/lib/libarchive/archive_read_support_compression_gzip.c index c6d86cd01eeb..6961664495cb 100644 --- a/lib/libarchive/archive_read_support_compression_gzip.c +++ b/lib/libarchive/archive_read_support_compression_gzip.c @@ -30,13 +30,14 @@ __FBSDID("$FreeBSD$"); #ifdef HAVE_DMALLOC #include #endif -#include #include #include #include #include #include +#include /* zlib.h is borked, so must precede err.h */ + #include "archive.h" #include "archive_private.h" diff --git a/lib/libarchive/archive_read_support_format_cpio.c b/lib/libarchive/archive_read_support_format_cpio.c index de11aa55d8fa..c6b3daf556d3 100644 --- a/lib/libarchive/archive_read_support_format_cpio.c +++ b/lib/libarchive/archive_read_support_format_cpio.c @@ -34,7 +34,7 @@ __FBSDID("$FreeBSD$"); #endif #include #include -#include +/* #include */ /* See archive_platform.h */ #include #include #include @@ -70,6 +70,8 @@ struct links_entry { struct cpio { int magic; struct links_entry *links_head; + struct archive_string entry_name; + struct archive_string entry_linkname; }; static int64_t atol8(const char *, unsigned); @@ -177,18 +179,19 @@ archive_read_format_cpio_read_header(struct archive *a, if (bytes < namelength) return (ARCHIVE_FATAL); (a->compression_read_consume)(a, namelength); - archive_strncpy(&a->entry_name, h, namelength); - archive_entry_set_pathname(entry, a->entry_name.s); + archive_strncpy(&cpio->entry_name, h, namelength); + archive_entry_set_pathname(entry, cpio->entry_name.s); /* If this is a symlink, read the link contents. */ if (S_ISLNK(st.st_mode)) { bytes = (a->compression_read_ahead)(a, &h, a->entry_bytes_remaining); - if (bytes < a->entry_bytes_remaining) + if ((off_t)bytes < a->entry_bytes_remaining) return (ARCHIVE_FATAL); (a->compression_read_consume)(a, a->entry_bytes_remaining); - archive_strncpy(&a->entry_linkname, h, a->entry_bytes_remaining); - archive_entry_set_symlink(entry, a->entry_linkname.s); + archive_strncpy(&cpio->entry_linkname, h, + a->entry_bytes_remaining); + archive_entry_set_symlink(entry, cpio->entry_linkname.s); a->entry_bytes_remaining = 0; } diff --git a/lib/libarchive/archive_read_support_format_gnutar.c b/lib/libarchive/archive_read_support_format_gnutar.c index 98fbef60156e..ce7043812296 100644 --- a/lib/libarchive/archive_read_support_format_gnutar.c +++ b/lib/libarchive/archive_read_support_format_gnutar.c @@ -33,7 +33,8 @@ __FBSDID("$FreeBSD$"); #endif #include #include -#include +/* #include */ /* See archive_platform.h */ +#include #include #include @@ -76,10 +77,21 @@ struct archive_entry_header_gnutar { */ }; +struct gnutar { + struct archive_string entry_name; + struct archive_string entry_linkname; + struct archive_string entry_uname; + struct archive_string entry_gname; + struct archive_string gnu_name; + struct archive_string gnu_linkname; + int gnu_header_recursion_depth; +}; + static int archive_block_is_null(const unsigned char *p); static int archive_header_gnu(struct archive *, struct archive_entry *, const void *); static int archive_read_format_gnutar_bid(struct archive *a); +static int archive_read_format_gnutar_cleanup(struct archive *); static int archive_read_format_gnutar_read_header(struct archive *a, struct archive_entry *); static int checksum(struct archive *a, const void *h); @@ -93,11 +105,40 @@ static int64_t tar_atol256(const char *, unsigned); int archive_read_support_format_gnutar(struct archive *a) { + struct gnutar *gnutar; + + gnutar = malloc(sizeof(*gnutar)); + memset(gnutar, 0, sizeof(*gnutar)); + return (__archive_read_register_format(a, - NULL, + gnutar, archive_read_format_gnutar_bid, archive_read_format_gnutar_read_header, - NULL)); + archive_read_format_gnutar_cleanup)); +} + +static int +archive_read_format_gnutar_cleanup(struct archive *a) +{ + struct gnutar *gnutar; + + gnutar = *(a->pformat_data); + if (gnutar->entry_name.s != NULL) + free(gnutar->entry_name.s); + if (gnutar->entry_linkname.s != NULL) + free(gnutar->entry_linkname.s); + if (gnutar->entry_uname.s != NULL) + free(gnutar->entry_uname.s); + if (gnutar->entry_gname.s != NULL) + free(gnutar->entry_gname.s); + if (gnutar->gnu_name.s != NULL) + free(gnutar->gnu_name.s); + if (gnutar->gnu_linkname.s != NULL) + free(gnutar->gnu_linkname.s); + + free(gnutar); + *(a->pformat_data) = NULL; + return (ARCHIVE_OK); } static int @@ -154,7 +195,9 @@ archive_read_format_gnutar_read_header(struct archive *a, const void *h; ssize_t bytes; int oldstate; + struct gnutar *gnutar; + gnutar = *(a->pformat_data); a->archive_format = ARCHIVE_FORMAT_TAR_GNUTAR; a->archive_format_name = "GNU tar"; @@ -188,14 +231,14 @@ archive_read_format_gnutar_read_header(struct archive *a, } /* This function gets called recursively for long name headers, etc. */ - if (++a->gnu_header_recursion_depth > 32) + if (++gnutar->gnu_header_recursion_depth > 32) errx(EINVAL, "*** Too many special headers for one entry; giving up. " "(%s:%s@%d)\n", __FUNCTION__, __FILE__, __LINE__); archive_header_gnu(a, entry, h); - a->gnu_header_recursion_depth--; + gnutar->gnu_header_recursion_depth--; return (0); } @@ -267,10 +310,13 @@ archive_header_gnu(struct archive *a, struct archive_entry *entry, { struct stat st; const struct archive_entry_header_gnutar *header; + struct gnutar *gnutar; char tartype; + unsigned oldstate; /* Clear out entry structure */ memset(&st, 0, sizeof(st)); + gnutar = *(a->pformat_data); /* * GNU header is like POSIX, except 'prefix' is @@ -280,12 +326,13 @@ archive_header_gnu(struct archive *a, struct archive_entry *entry, /* Copy filename over (to ensure null termination). */ header = h; - archive_strncpy(&(a->entry_name), header->name, sizeof(header->name)); - archive_entry_set_pathname(entry, a->entry_name.s); + archive_strncpy(&(gnutar->entry_name), header->name, + sizeof(header->name)); + archive_entry_set_pathname(entry, gnutar->entry_name.s); /* Copy linkname over */ if (header->linkname[0]) - archive_strncpy(&(a->entry_linkname), header->linkname, + archive_strncpy(&(gnutar->entry_linkname), header->linkname, sizeof(header->linkname)); /* Parse out the numeric fields (all are octal) */ @@ -301,13 +348,13 @@ archive_header_gnu(struct archive *a, struct archive_entry *entry, st.st_mode &= ~S_IFMT; /* Fields common to ustar and GNU */ - archive_strncpy(&(a->entry_uname), + archive_strncpy(&(gnutar->entry_uname), header->uname, sizeof(header->uname)); - archive_entry_set_uname(entry, a->entry_uname.s); + archive_entry_set_uname(entry, gnutar->entry_uname.s); - archive_strncpy(&(a->entry_gname), + archive_strncpy(&(gnutar->entry_gname), header->gname, sizeof(header->gname)); - archive_entry_set_gname(entry, a->entry_gname.s); + archive_entry_set_gname(entry, gnutar->entry_gname.s); /* Parse out device numbers only for char and block specials */ if (header->typeflag[0] == '3' || header->typeflag[0] == '4') @@ -329,7 +376,7 @@ archive_header_gnu(struct archive *a, struct archive_entry *entry, /* Interpret entry type */ switch (tartype) { case '1': /* Hard link */ - archive_entry_set_hardlink(entry, a->entry_linkname.s); + archive_entry_set_hardlink(entry, gnutar->entry_linkname.s); /* * Note: Technically, tar does not store the file type * for a "hard link" entry, only the fact that it is a @@ -341,7 +388,7 @@ archive_header_gnu(struct archive *a, struct archive_entry *entry, case '2': /* Symlink */ st.st_mode |= S_IFLNK; st.st_size = 0; - archive_entry_set_symlink(entry, a->entry_linkname.s); + archive_entry_set_symlink(entry, gnutar->entry_linkname.s); archive_entry_copy_stat(entry, &st); break; case '3': /* Character device */ @@ -376,32 +423,43 @@ archive_header_gnu(struct archive *a, struct archive_entry *entry, break; case 'K': /* GNU long linkname */ /* Entry body is full name of link for next header. */ - archive_string_ensure(&(a->gnu_linkname), st.st_size+1); - archive_read_data_into_buffer(a, a->gnu_linkname.s, + archive_string_ensure(&(gnutar->gnu_linkname), st.st_size+1); + /* Temporarily fudge internal state for read_data call. */ + oldstate = a->state; + a->state = ARCHIVE_STATE_DATA; + archive_read_data_into_buffer(a, gnutar->gnu_linkname.s, st.st_size); - a->gnu_linkname.s[st.st_size] = 0; /* Null term name! */ + a->state = oldstate; + gnutar->gnu_linkname.s[st.st_size] = 0; /* Null term name! */ /* * This next call will usually overwrite - * a->entry_linkname, which is why we _must_ have a - * separate gnu_linkname field. + * gnutar->entry_linkname, which is why we _must_ have + * a separate gnu_linkname field. */ archive_read_format_gnutar_read_header(a, entry); if (archive_entry_tartype(entry) == '1') - archive_entry_set_hardlink(entry, a->gnu_linkname.s); + archive_entry_set_hardlink(entry, gnutar->gnu_linkname.s); else if (archive_entry_tartype(entry) == '2') - archive_entry_set_symlink(entry, a->gnu_linkname.s); + archive_entry_set_symlink(entry, gnutar->gnu_linkname.s); /* TODO: else { ... } */ break; case 'L': /* GNU long filename */ /* Entry body is full pathname for next header. */ - archive_string_ensure(&(a->gnu_name), st.st_size+1); - archive_read_data_into_buffer(a, a->gnu_name.s, + archive_string_ensure(&(gnutar->gnu_name), st.st_size+1); + /* Temporarily fudge internal state for read_data call. */ + oldstate = a->state; + a->state = ARCHIVE_STATE_DATA; + archive_read_data_into_buffer(a, gnutar->gnu_name.s, st.st_size); - a->gnu_name.s[st.st_size] = 0; /* Null terminate name! */ - /* This next call will typically overwrite a->entry_name, which - * is why we _must_ have a separate gnu_name field */ + a->state = oldstate; + gnutar->gnu_name.s[st.st_size] = 0; /* Null terminate name! */ + /* + * This next call will typically overwrite + * gnutar->entry_name, which is why we _must_ have a + * separate gnu_name field. + */ archive_read_format_gnutar_read_header(a, entry); - archive_entry_set_pathname(entry, a->gnu_name.s); + archive_entry_set_pathname(entry, gnutar->gnu_name.s); break; case 'M': /* GNU Multi-volume (remainder of file from last archive) */ /* diff --git a/lib/libarchive/archive_read_support_format_tar.c b/lib/libarchive/archive_read_support_format_tar.c index 5bfd944c9d85..24c7f42e26f7 100644 --- a/lib/libarchive/archive_read_support_format_tar.c +++ b/lib/libarchive/archive_read_support_format_tar.c @@ -32,7 +32,7 @@ __FBSDID("$FreeBSD$"); #include #endif #include -#include +/* #include */ /* See archive_platform.h */ #include #include #include @@ -63,39 +63,101 @@ struct archive_entry_header_ustar { char prefix[155]; }; +/* + * Data specific to this format. + */ +struct tar { + struct archive_string acl_text; + struct archive_string entry_name; + struct archive_string entry_linkname; + struct archive_string entry_uname; + struct archive_string entry_gname; + struct archive_string longlink; + struct archive_string longname; + struct archive_string pax_header; + struct archive_string pax_global; + wchar_t *pax_entry; + size_t pax_entry_length; + int header_recursion_depth; +}; + +static size_t UTF8_mbrtowc(wchar_t * __restrict pwc, + const char * __restrict s, size_t n, + mbstate_t * __restrict ps __unused); static int archive_block_is_null(const unsigned char *p); -static int archive_header_common(struct archive *, struct archive_entry *, - struct stat *, const void *); -static int archive_header_old_tar(struct archive *, +static int header_Solaris_ACL(struct archive *, struct tar *, struct archive_entry *, struct stat *, const void *); -static int archive_header_pax_extensions(struct archive *, +static int header_common(struct archive *, struct tar *, struct archive_entry *, struct stat *, const void *); -static int archive_header_pax_global(struct archive *, +static int header_old_tar(struct archive *, struct tar *, + struct archive_entry *, struct stat *, const void *); +static int header_pax_extensions(struct archive *, struct tar *, + struct archive_entry *, struct stat *, const void *); +static int header_pax_global(struct archive *, struct tar *, + struct archive_entry *, struct stat *, const void *h); +static int header_longlink(struct archive *, struct tar *, + struct archive_entry *, struct stat *, const void *h); +static int header_longname(struct archive *, struct tar *, + struct archive_entry *, struct stat *, const void *h); +static int header_ustar(struct archive *, struct tar *, struct archive_entry *, struct stat *, const void *h); -static int archive_header_ustar(struct archive *, struct archive_entry *, - struct stat *, const void *h); static int archive_read_format_tar_bid(struct archive *); +static int archive_read_format_tar_cleanup(struct archive *); static int archive_read_format_tar_read_header(struct archive *, struct archive_entry *); static int checksum(struct archive *, const void *); -static int pax_attribute(struct archive *, struct archive_entry *, - struct stat *, char *key, char *value); -static int pax_header(struct archive *, struct archive_entry *, - struct stat *, char *attr, uint64_t length); -static void pax_time(const char *, struct timespec *t); +static int pax_attribute(struct archive_entry *, struct stat *, + wchar_t *key, wchar_t *value); +static int pax_header(struct archive *, struct tar *, + struct archive_entry *, struct stat *, char *attr); +static void pax_time(const wchar_t *, struct timespec *t); +static int read_body_to_string(struct archive *, struct archive_string *, + const void *h); static int64_t tar_atol(const char *, unsigned); -static int64_t tar_atol10(const char *, unsigned); +static int64_t tar_atol10(const wchar_t *, unsigned); static int64_t tar_atol256(const char *, unsigned); static int64_t tar_atol8(const char *, unsigned); +static int tar_read_header(struct archive *, struct tar *, + struct archive_entry *, struct stat *); +static int utf8_decode(wchar_t *, const char *, size_t length); int archive_read_support_format_tar(struct archive *a) { + struct tar *tar; + + tar = malloc(sizeof(*tar)); + memset(tar, 0, sizeof(*tar)); + return (__archive_read_register_format(a, - NULL, + tar, archive_read_format_tar_bid, archive_read_format_tar_read_header, - NULL)); + archive_read_format_tar_cleanup)); +} + +static int +archive_read_format_tar_cleanup(struct archive *a) +{ + struct tar *tar; + + tar = *(a->pformat_data); + if (tar->entry_name.s != NULL) + free(tar->entry_name.s); + if (tar->entry_linkname.s != NULL) + free(tar->entry_linkname.s); + if (tar->entry_uname.s != NULL) + free(tar->entry_uname.s); + if (tar->entry_gname.s != NULL) + free(tar->entry_gname.s); + if (tar->pax_header.s != NULL) + free(tar->pax_header.s); + if (tar->pax_global.s != NULL) + free(tar->pax_global.s); + + free(tar); + *(a->pformat_data) = NULL; + return (ARCHIVE_OK); } @@ -179,18 +241,37 @@ archive_read_format_tar_bid(struct archive *a) return (bid); } +/* + * The function invoked by archive_read_header(). This + * just sets up a few things and then calls the internal + * tar_read_header() function below. + */ static int archive_read_format_tar_read_header(struct archive *a, struct archive_entry *entry) { struct stat st; + struct tar *tar; + + memset(&st, 0, sizeof(st)); + tar = *(a->pformat_data); + + return (tar_read_header(a, tar, entry, &st)); +} + +/* + * This function recursively interprets all of the headers associated + * with a single entry. + */ +static int +tar_read_header(struct archive *a, struct tar *tar, + struct archive_entry *entry, struct stat *st) +{ ssize_t bytes; int err; const void *h; const struct archive_entry_header_ustar *header; - memset(&st, 0, sizeof(st)); - /* Read 512-byte header record */ bytes = (a->compression_read_ahead)(a, &h, 512); if (bytes < 512) { @@ -208,8 +289,8 @@ archive_read_format_tar_read_header(struct archive *a, /* * Note: If the checksum fails and we return ARCHIVE_RETRY, - * then the client is likely to just retry. This is a very crude way - * to search for the next valid header! + * then the client is likely to just retry. This is a very + * crude way to search for the next valid header! * * TODO: Improve this by implementing a real header scan. */ @@ -218,39 +299,58 @@ archive_read_format_tar_read_header(struct archive *a, return (ARCHIVE_RETRY); /* Retryable: Invalid header */ } + if (++tar->header_recursion_depth > 32) { + archive_set_error(a, EINVAL, "Too many special headers"); + return (ARCHIVE_WARN); + } + /* Determine the format variant. */ header = h; - if (memcmp(header->magic, "ustar", 5) != 0) - err = archive_header_old_tar(a, entry, &st, h); /* non-POSIX */ - else switch(header->typeflag[0]) { - case 'g': + if (memcmp(header->magic, "ustar", 5) != 0) { + a->archive_format = ARCHIVE_FORMAT_TAR; + a->archive_format_name = "tar (non-POSIX)"; + err = header_old_tar(a, tar, entry, st, h); + } else switch(header->typeflag[0]) { + case 'A': /* Solaris tar ACL */ + a->archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; + a->archive_format_name = "Solaris tar"; + err = header_Solaris_ACL(a, tar, entry, st, h); + break; + case 'g': /* POSIX-standard 'g' header. */ a->archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; a->archive_format_name = "POSIX pax interchange format"; - err = archive_header_pax_global(a, entry, &st, h); + err = header_pax_global(a, tar, entry, st, h); break; - case 'x': - a->archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; - a->archive_format_name = "POSIX pax interchange format"; - err = archive_header_pax_extensions(a, entry, &st, h); + case 'K': /* Long link name (non-POSIX, but fairly common). */ + err = header_longlink(a, tar, entry, st, h); break; - case 'X': + case 'L': /* Long filename (non-POSIX, but fairly common). */ + err = header_longname(a, tar, entry, st, h); + break; + case 'X': /* Used by SUN tar; same as 'x'. */ a->archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; a->archive_format_name = "POSIX pax interchange format (Sun variant)"; - err = archive_header_pax_extensions(a, entry, &st, h); + err = header_pax_extensions(a, tar, entry, st, h); + break; + case 'x': /* POSIX-standard 'x' header. */ + a->archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; + a->archive_format_name = "POSIX pax interchange format"; + err = header_pax_extensions(a, tar, entry, st, h); break; default: - if (a->archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) { + if (a->archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE + && a->archive_format != ARCHIVE_FORMAT_TAR_USTAR) { a->archive_format = ARCHIVE_FORMAT_TAR_USTAR; - a->archive_format_name = "POSIX ustar"; + a->archive_format_name = "POSIX ustar format"; } - err = archive_header_ustar(a, entry, &st, h); + err = header_ustar(a, tar, entry, st, h); } - archive_entry_copy_stat(entry, &st); + archive_entry_copy_stat(entry, st); + --tar->header_recursion_depth; return (err); } - /* * Return true if block checksum is correct. */ @@ -298,7 +398,6 @@ checksum(struct archive *a, const void *h) return (0); } - /* * Return true if this block contains only nulls. */ @@ -313,29 +412,121 @@ archive_block_is_null(const unsigned char *p) return (1); } +/* + * Interpret 'A' Solaris ACL header + */ +static int +header_Solaris_ACL(struct archive *a, struct tar *tar, + struct archive_entry *entry, struct stat *st, const void *h) +{ + int err, err2; + + err = read_body_to_string(a, &(tar->acl_text), h); + err2 = tar_read_header(a, tar, entry, st); + + /* XXX DO SOMETHING WITH THE ACL!!! XXX */ + { + const char *msg = "\nXXX Solaris ACL entries recognized but not yet handled!!\n"; + write(2, msg, strlen(msg)); + } + + return (err_combine(err, err2)); +} + +/* + * Interpret 'K' long linkname header. + */ +static int +header_longlink(struct archive *a, struct tar *tar, + struct archive_entry *entry, struct stat *st, const void *h) +{ + int err, err2; + + err = read_body_to_string(a, &(tar->longlink), h); + err2 = tar_read_header(a, tar, entry, st); + if (err == ARCHIVE_OK && err2 == ARCHIVE_OK) { + if (archive_entry_tartype(entry) == '1') + archive_entry_set_hardlink(entry, tar->longlink.s); + else if (archive_entry_tartype(entry) == '2') + archive_entry_set_symlink(entry, tar->longlink.s); + } + return (err_combine(err, err2)); +} + +/* + * Interpret 'L' long filename header. + */ +static int +header_longname(struct archive *a, struct tar *tar, + struct archive_entry *entry, struct stat *st, const void *h) +{ + int err, err2; + + err = read_body_to_string(a, &(tar->longname), h); + /* Read and parse "real" header, then override name. */ + err2 = tar_read_header(a, tar, entry, st); + if (err == ARCHIVE_OK && err2 == ARCHIVE_OK) + archive_entry_set_pathname(entry, tar->longname.s); + return (err_combine(err, err2)); +} + +/* + * Read body of an archive entry into an archive_string object. + */ +static int +read_body_to_string(struct archive *a, struct archive_string *as, const void *h) +{ + const struct archive_entry_header_ustar *header; + off_t size; + unsigned oldstate; + int err, err2; + + header = h; + size = tar_atol(header->size, sizeof(header->size)); + + /* Temporarily fudge internal state for read_data call. */ + oldstate = a->state; + a->state = ARCHIVE_STATE_DATA; + + /* Read the body into the string. */ + a->entry_bytes_remaining = size; + a->entry_padding = 0x1ff & -size; + archive_string_ensure(as, size+1); + err = archive_read_data_into_buffer(a, as->s, size); + as->s[size] = 0; /* Null terminate name! */ + err2 = archive_read_data_skip(a); /* Resync for next header. */ + + /* Restore the state. */ + a->state = oldstate; + + return (err_combine(err, err2)); +} + /* * Parse out common header elements. * - * This would be the same as archive_header_old_tar, except that the + * This would be the same as header_old_tar, except that the * filename is handled slightly differently for old and POSIX * entries (POSIX entries support a 'prefix'). This factoring - * allows archive_header_old_tar and archive_header_ustar + * allows header_old_tar and header_ustar * to handle filenames differently, while still putting most of the * common parsing into one place. */ static int -archive_header_common(struct archive *a, struct archive_entry *entry, +header_common(struct archive *a, struct tar *tar, struct archive_entry *entry, struct stat *st, const void *h) { const struct archive_entry_header_ustar *header; char tartype; + (void)a; /* UNUSED */ + header = h; if (header->linkname[0]) - archive_strncpy(&(a->entry_linkname), header->linkname, + archive_strncpy(&(tar->entry_linkname), header->linkname, sizeof(header->linkname)); else - archive_string_empty(&(a->entry_linkname)); + archive_string_empty(&(tar->entry_linkname)); /* Parse out the numeric fields (all are octal) */ st->st_mode = tar_atol(header->mode, sizeof(header->mode)); @@ -351,7 +542,7 @@ archive_header_common(struct archive *a, struct archive_entry *entry, switch (tartype) { case '1': /* Hard link */ - archive_entry_set_hardlink(entry, a->entry_linkname.s); + archive_entry_set_hardlink(entry, tar->entry_linkname.s); /* * The following may seem odd, but: Technically, tar * does not store the file type for a "hard link" @@ -367,7 +558,7 @@ archive_header_common(struct archive *a, struct archive_entry *entry, case '2': /* Symlink */ st->st_mode |= S_IFLNK; st->st_size = 0; - archive_entry_set_symlink(entry, a->entry_linkname.s); + archive_entry_set_symlink(entry, tar->entry_linkname.s); break; case '3': /* Character device */ st->st_mode |= S_IFCHR; @@ -397,24 +588,21 @@ archive_header_common(struct archive *a, struct archive_entry *entry, } /* - * Parse out header elements for "old-style" tar archives + * Parse out header elements for "old-style" tar archives. */ static int -archive_header_old_tar(struct archive *a, struct archive_entry *entry, +header_old_tar(struct archive *a, struct tar *tar, struct archive_entry *entry, struct stat *st, const void *h) { const struct archive_entry_header_ustar *header; - a->archive_format = ARCHIVE_FORMAT_TAR; - a->archive_format_name = "tar (non-POSIX)"; - /* Copy filename over (to ensure null termination). */ header = h; - archive_strncpy(&(a->entry_name), header->name, sizeof(header->name)); - archive_entry_set_pathname(entry, a->entry_name.s); + archive_strncpy(&(tar->entry_name), header->name, sizeof(header->name)); + archive_entry_set_pathname(entry, tar->entry_name.s); /* Grab rest of common fields */ - archive_header_common(a, entry, st, h); + header_common(a, tar, entry, st, h); /* * TODO: Decide whether the following special handling @@ -423,7 +611,7 @@ archive_header_old_tar(struct archive *a, struct archive_entry *entry, /* "Regular" entry with trailing '/' is really directory. */ if (S_ISREG(st->st_mode) && - '/' == a->entry_name.s[strlen(a->entry_name.s) - 1]) { + '/' == tar->entry_name.s[strlen(tar->entry_name.s) - 1]) { st->st_mode &= ~S_IFMT; st->st_mode |= S_IFDIR; archive_entry_set_tartype(entry, '5'); @@ -434,92 +622,29 @@ archive_header_old_tar(struct archive *a, struct archive_entry *entry, return (0); } - /* * Parse a file header for a pax extended archive entry. */ static int -archive_header_pax_global(struct archive *a, struct archive_entry *entry, - struct stat *st, const void *h) +header_pax_global(struct archive *a, struct tar *tar, + struct archive_entry *entry, struct stat *st, const void *h) { - uint64_t extension_size; - size_t bytes; - int err; - char *global; - const struct archive_entry_header_ustar *header; + int err, err2; - header = h; - extension_size = tar_atol(header->size, sizeof(header->size)); - a->entry_bytes_remaining = extension_size; - a->entry_padding = 0x1ff & (-a->entry_bytes_remaining); - - global = malloc(extension_size + 1); - archive_read_data_into_buffer(a, global, extension_size); - global[extension_size] = 0; - - /* - * TODO: Store the global default options somewhere for future use. - * For now, just free the buffer and keep going. - */ - free(global); - - /* Skip the padding. */ - archive_read_data_skip(a); - - /* Read the next header. */ - bytes = (a->compression_read_ahead)(a, &h, 512); - if (bytes < 512) { - /* TODO: Set error values. */ - return (-1); - } - (a->compression_read_consume)(a, 512); - - header = h; - switch(header->typeflag[0]) { - case 'x': - case 'X': - err = archive_header_pax_extensions(a, entry, st, h); - break; - default: - err = archive_header_ustar(a, entry, st, h); - } - - return (err); + err = read_body_to_string(a, &(tar->pax_global), h); + err2 = tar_read_header(a, tar, entry, st); + return (err_combine(err, err2)); } static int -archive_header_pax_extensions(struct archive *a, +header_pax_extensions(struct archive *a, struct tar *tar, struct archive_entry *entry, struct stat *st, const void *h) { - uint64_t extension_size; - size_t bytes; - int err; - const struct archive_entry_header_ustar *header; - int oldstate; + read_body_to_string(a, &(tar->pax_header), h); + int err, err2; - header = h; - extension_size = tar_atol(header->size, sizeof(header->size)); - a->entry_bytes_remaining = extension_size; - a->entry_padding = 0x1ff & (-a->entry_bytes_remaining); - - archive_string_ensure(&(a->pax_header), extension_size + 1); - oldstate = a->state; - a->state = ARCHIVE_STATE_DATA; - archive_read_data_into_buffer(a, a->pax_header.s, extension_size); - a->pax_header.s[extension_size] = 0; - archive_read_data_skip(a); /* Skip any padding. */ - a->state = oldstate; - - /* Read the next header. */ - bytes = (a->compression_read_ahead)(a, &h, 512); - if (bytes < 512) { - /* TODO: Set error values */ - return (-1); - } - (a->compression_read_consume)(a, 512); - - /* Must be a regular POSIX ustar entry. */ - err = archive_header_ustar(a, entry, st, h); + /* Parse the next header. */ + err = tar_read_header(a, tar, entry, st); /* * TODO: Parse global/default options into 'entry' struct here @@ -531,7 +656,8 @@ archive_header_pax_extensions(struct archive *a, * and then skip any fields in the standard header that were * defined in the pax header. */ - pax_header(a, entry, st, a->pax_header.s, extension_size); + err2 = pax_header(a, tar, entry, st, tar->pax_header.s); + err = err_combine(err, err2); a->entry_bytes_remaining = st->st_size; a->entry_padding = 0x1ff & (-a->entry_bytes_remaining); return (err); @@ -543,7 +669,7 @@ archive_header_pax_extensions(struct archive *a, * handles "pax" or "extended ustar" entries. */ static int -archive_header_ustar(struct archive *a, struct archive_entry *entry, +header_ustar(struct archive *a, struct tar *tar, struct archive_entry *entry, struct stat *st, const void *h) { const struct archive_entry_header_ustar *header; @@ -552,28 +678,28 @@ archive_header_ustar(struct archive *a, struct archive_entry *entry, /* Copy name into an internal buffer to ensure null-termination. */ if (header->prefix[0]) { - archive_strncpy(&(a->entry_name), header->prefix, + archive_strncpy(&(tar->entry_name), header->prefix, sizeof(header->prefix)); - archive_strappend_char(&(a->entry_name), '/'); - archive_strncat(&(a->entry_name), header->name, + archive_strappend_char(&(tar->entry_name), '/'); + archive_strncat(&(tar->entry_name), header->name, sizeof(header->name)); } else - archive_strncpy(&(a->entry_name), header->name, + archive_strncpy(&(tar->entry_name), header->name, sizeof(header->name)); - archive_entry_set_pathname(entry, a->entry_name.s); + archive_entry_set_pathname(entry, tar->entry_name.s); /* Handle rest of common fields. */ - archive_header_common(a, entry, st, h); + header_common(a, tar, entry, st, h); /* Handle POSIX ustar fields. */ - archive_strncpy(&(a->entry_uname), header->uname, + archive_strncpy(&(tar->entry_uname), header->uname, sizeof(header->uname)); - archive_entry_set_uname(entry, a->entry_uname.s); + archive_entry_set_uname(entry, tar->entry_uname.s); - archive_strncpy(&(a->entry_gname), header->gname, + archive_strncpy(&(tar->entry_gname), header->gname, sizeof(header->gname)); - archive_entry_set_gname(entry, a->entry_gname.s); + archive_entry_set_gname(entry, tar->entry_gname.s); /* Parse out device numbers only for char and block specials. */ if (header->typeflag[0] == '3' || header->typeflag[0] == '4') { @@ -595,13 +721,16 @@ archive_header_ustar(struct archive *a, struct archive_entry *entry, * Returns non-zero if there's an error in the data. */ static int -pax_header(struct archive *a, struct archive_entry *entry, struct stat *st, - char *attr, uint64_t attr_length) +pax_header(struct archive *a, struct tar *tar, struct archive_entry *entry, + struct stat *st, char *attr) { - uint64_t l; - uint64_t line_length; - char *line, *key, *p, *value; + size_t attr_length, l, line_length; + char *line, *p; + wchar_t *key, *wp, *value; + int err, err2; + attr_length = strlen(attr); + err = ARCHIVE_OK; while (attr_length > 0) { /* Parse decimal length field at start of line. */ line_length = 0; @@ -626,36 +755,50 @@ pax_header(struct archive *a, struct archive_entry *entry, struct stat *st, if (line_length > attr_length) return (0); + /* Ensure pax_entry buffer is big enough. */ + if (tar->pax_entry_length <= line_length) { + if (tar->pax_entry_length <= 0) + tar->pax_entry_length = 256; + while (tar->pax_entry_length <= line_length + 1) + tar->pax_entry_length *= 2; + + /* XXX Error handling here */ + tar->pax_entry = realloc(tar->pax_entry, + tar->pax_entry_length * sizeof(wchar_t)); + } + + /* Decode UTF-8 to wchar_t, null-terminate result. */ + if (utf8_decode(tar->pax_entry, p, + line_length - (p - attr) - 1)) { + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "Invalid UTF8 character in pax extended attribute"); + err = err_combine(err, ARCHIVE_WARN); + } + /* Null-terminate 'key' value. */ - /* XXX TODO: 'key' is officially UTF-8; should - * decode UTF-8 key to wchar here, then do - * all wchar matching below. XXX */ - key = p; - p = strchr(key, '='); - if (p == NULL) - return (0); - if (p > line + line_length) - return (-1); - *p = 0; - if (strlen(key) < 1) + key = tar->pax_entry; + if (key[0] == L'=') return (-1); + wp = wcschr(key, L'='); + if (wp == NULL) { + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "Invalid pax extended attributes"); + return (ARCHIVE_WARN); + } + *wp = 0; - /* Null-terminate 'value' portion. */ - /* XXX need to decode UTF-8 value, make everything - * else wchar-clean. */ - /* XXX should use pointer/length so that NULLs can - * appear within the value portion. */ - value = p + 1; - line[line_length - 1] = 0; + /* Identify null-terminated 'value' portion. */ + value = wp + 1; - if (pax_attribute(a, entry, st, key, value)) - return (-1); + /* Identify this attribute and set it in the entry. */ + err2 = pax_attribute(entry, st, key, value); + err = err_combine(err, err2); /* Skip to next line */ attr += line_length; attr_length -= line_length; } - return (0); + return (err); } @@ -674,12 +817,9 @@ pax_header(struct archive *a, struct archive_entry *entry, struct stat *st, * any of them look useful. */ static int -pax_attribute(struct archive *a, struct archive_entry *entry, struct stat *st, - char *key, char *value) +pax_attribute(struct archive_entry *entry, struct stat *st, + wchar_t *key, wchar_t *value) { - - (void)a; /* UNUSED */ - switch (key[0]) { case 'L': /* Our extensions */ @@ -691,71 +831,71 @@ pax_attribute(struct archive *a, struct archive_entry *entry, struct stat *st, break; case 'S': /* We support some keys used by the "star" archiver */ - if (strcmp(key, "SCHILY.acl.access")==0) - archive_entry_set_acl(entry, value); - else if (strcmp(key, "SCHILY.acl.default")==0) - archive_entry_set_acl_default(entry, value); - else if (strcmp(key, "SCHILY.devmajor")==0) - st->st_rdev = makedev(tar_atol10(value, strlen(value)), + if (wcscmp(key, L"SCHILY.acl.access")==0) + archive_entry_copy_acl_w(entry, value); + else if (wcscmp(key, L"SCHILY.acl.default")==0) + archive_entry_copy_acl_default_w(entry, value); + else if (wcscmp(key, L"SCHILY.devmajor")==0) + st->st_rdev = makedev(tar_atol10(value, wcslen(value)), minor(st->st_dev)); - else if (strcmp(key, "SCHILY.devminor")==0) + else if (wcscmp(key, L"SCHILY.devminor")==0) st->st_rdev = makedev(major(st->st_dev), - tar_atol10(value, strlen(value))); - else if (strcmp(key, "SCHILY.fflags")==0) - archive_entry_set_fflags(entry, value); - else if (strcmp(key, "SCHILY.nlink")==0) - st->st_nlink = tar_atol10(value, strlen(value)); + tar_atol10(value, wcslen(value))); + else if (wcscmp(key, L"SCHILY.fflags")==0) + archive_entry_copy_fflags_w(entry, value); + else if (wcscmp(key, L"SCHILY.nlink")==0) + st->st_nlink = tar_atol10(value, wcslen(value)); break; case 'a': - if (strcmp(key, "atime")==0) + if (wcscmp(key, L"atime")==0) pax_time(value, &(st->st_atimespec)); break; case 'c': - if (strcmp(key, "ctime")==0) + if (wcscmp(key, L"ctime")==0) pax_time(value, &(st->st_ctimespec)); - else if (strcmp(key, "charset")==0) { + else if (wcscmp(key, L"charset")==0) { /* TODO: Publish charset information in entry. */ - } else if (strcmp(key, "comment")==0) { + } else if (wcscmp(key, L"comment")==0) { /* TODO: Publish comment in entry. */ } break; case 'g': - if (strcmp(key, "gid")==0) - st->st_gid = tar_atol10(value, strlen(value)); - else if (strcmp(key, "gname")==0) - archive_entry_set_gname(entry, value); + if (wcscmp(key, L"gid")==0) + st->st_gid = tar_atol10(value, wcslen(value)); + else if (wcscmp(key, L"gname")==0) + archive_entry_copy_gname_w(entry, value); break; case 'l': /* pax interchange doesn't distinguish hardlink vs. symlink. */ - if (strcmp(key, "linkpath")==0) { + if (wcscmp(key, L"linkpath")==0) { if (archive_entry_hardlink(entry)) - archive_entry_set_hardlink(entry, value); + archive_entry_copy_hardlink_w(entry, value); else - archive_entry_set_symlink(entry, value); + archive_entry_copy_symlink_w(entry, value); } break; case 'm': - if (strcmp(key, "mtime")==0) + if (wcscmp(key, L"mtime")==0) pax_time(value, &(st->st_mtimespec)); break; case 'p': - if (strcmp(key, "path")==0) - archive_entry_set_pathname(entry, value); + if (wcscmp(key, L"path")==0) + archive_entry_copy_pathname_w(entry, value); break; case 'r': /* POSIX has reserved 'realtime.*' */ break; case 's': /* POSIX has reserved 'security.*' */ - /* Someday: if (strcmp(key, "security.acl")==0) { ... } */ - if (strcmp(key, "size")==0) - st->st_size = tar_atol10(value, strlen(value)); + /* Someday: if (wcscmp(key, L"security.acl")==0) { ... } */ + if (wcscmp(key, L"size")==0) + st->st_size = tar_atol10(value, wcslen(value)); break; case 'u': - if (strcmp(key, "uid")==0) - st->st_uid = tar_atol10(value, strlen(value)); - else if (strcmp(key, "uname")==0) - archive_entry_set_uname(entry, value); + if (wcscmp(key, L"uid")==0) + st->st_uid = tar_atol10(value, wcslen(value)); + else if (wcscmp(key, L"uname")==0) + archive_entry_copy_uname_w(entry, value); break; } return (0); @@ -767,7 +907,7 @@ pax_attribute(struct archive *a, struct archive_entry *entry, struct stat *st, * parse a decimal time value, which may include a fractional portion */ static void -pax_time(const char *p, struct timespec *t) +pax_time(const wchar_t *p, struct timespec *t) { char digit; int64_t s; @@ -880,7 +1020,7 @@ tar_atol8(const char *p, unsigned char_cnt) * it does obey locale. */ static int64_t -tar_atol10(const char *p, unsigned char_cnt) +tar_atol10(const wchar_t *p, unsigned char_cnt) { int64_t l; int digit, sign; @@ -936,3 +1076,123 @@ tar_atol256(const char *p, unsigned char_cnt) } return (l); } + +static int +utf8_decode(wchar_t *dest, const char *src, size_t length) +{ + size_t n; + int err; + + err = 0; + while(length > 0) { + n = UTF8_mbrtowc(dest, src, length, NULL); + if (n == 0) + break; + if (n > 8) { + /* Invalid byte encountered; try to keep going. */ + *dest = L'?'; + n = 1; + err = 1; + } + dest++; + src += n; + length -= n; + } + *dest++ = L'\0'; + return (err); +} + +/* + * Copied from FreeBSD libc/locale. + */ +static size_t +UTF8_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, + mbstate_t * __restrict ps __unused) +{ + int ch, i, len, mask; + wchar_t lbound, wch; + + if (s == NULL) + /* Reset to initial shift state (no-op) */ + return (0); + if (n == 0) + /* Incomplete multibyte sequence */ + return ((size_t)-2); + + /* + * Determine the number of octets that make up this character from + * the first octet, and a mask that extracts the interesting bits of + * the first octet. + * + * We also specify a lower bound for the character code to detect + * redundant, non-"shortest form" encodings. For example, the + * sequence C0 80 is _not_ a legal representation of the null + * character. This enforces a 1-to-1 mapping between character + * codes and their multibyte representations. + */ + ch = (unsigned char)*s; + if ((ch & 0x80) == 0) { + mask = 0x7f; + len = 1; + lbound = 0; + } else if ((ch & 0xe0) == 0xc0) { + mask = 0x1f; + len = 2; + lbound = 0x80; + } else if ((ch & 0xf0) == 0xe0) { + mask = 0x0f; + len = 3; + lbound = 0x800; + } else if ((ch & 0xf8) == 0xf0) { + mask = 0x07; + len = 4; + lbound = 0x10000; + } else if ((ch & 0xfc) == 0xf8) { + mask = 0x03; + len = 5; + lbound = 0x200000; + } else if ((ch & 0xfc) == 0xfc) { + mask = 0x01; + len = 6; + lbound = 0x4000000; + } else { + /* + * Malformed input; input is not UTF-8. + */ + errno = EILSEQ; + return ((size_t)-1); + } + + if (n < (size_t)len) + /* Incomplete multibyte sequence */ + return ((size_t)-2); + + /* + * Decode the octet sequence representing the character in chunks + * of 6 bits, most significant first. + */ + wch = (unsigned char)*s++ & mask; + i = len; + while (--i != 0) { + if ((*s & 0xc0) != 0x80) { + /* + * Malformed input; bad characters in the middle + * of a character. + */ + errno = EILSEQ; + return ((size_t)-1); + } + wch <<= 6; + wch |= *s++ & 0x3f; + } + if (wch < lbound) { + /* + * Malformed input; redundant encoding. + */ + errno = EILSEQ; + return ((size_t)-1); + } + if (pwc != NULL) + *pwc = wch; + return (wch == L'\0' ? 0 : len); +} diff --git a/lib/libarchive/archive_string.c b/lib/libarchive/archive_string.c index b100fb9c7ee8..b7a59ec305c2 100644 --- a/lib/libarchive/archive_string.c +++ b/lib/libarchive/archive_string.c @@ -97,50 +97,3 @@ __archive_strappend_char(struct archive_string *as, char c) { return (__archive_string_append(as, &c, 1)); } - -#if 0 -/* Append Unicode character to string using UTF8 encoding */ -struct archive_string * -__archive_strappend_char_UTF8(struct archive_string *as, int c) -{ - char buff[6]; - - if (c <= 0x7f) { - buff[0] = c; - return (__archive_string_append(as, buff, 1)); - } else if (c <= 0x7ff) { - buff[0] = 0xc0 | (c >> 6); - buff[1] = 0x80 | (c & 0x3f); - return (__archive_string_append(as, buff, 2)); - } else if (c <= 0xffff) { - buff[0] = 0xe0 | (c >> 12); - buff[1] = 0x80 | ((c >> 6) & 0x3f); - buff[2] = 0x80 | (c & 0x3f); - return (__archive_string_append(as, buff, 3)); - } else if (c <= 0x1fffff) { - buff[0] = 0xf0 | (c >> 18); - buff[1] = 0x80 | ((c >> 12) & 0x3f); - buff[2] = 0x80 | ((c >> 6) & 0x3f); - buff[3] = 0x80 | (c & 0x3f); - return (__archive_string_append(as, buff, 4)); - } else if (c <= 0x3ffffff) { - buff[0] = 0xf8 | (c >> 24); - buff[1] = 0x80 | ((c >> 18) & 0x3f); - buff[2] = 0x80 | ((c >> 12) & 0x3f); - buff[3] = 0x80 | ((c >> 6) & 0x3f); - buff[4] = 0x80 | (c & 0x3f); - return (__archive_string_append(as, buff, 5)); - } else if (c <= 0x7fffffff) { - buff[0] = 0xfc | (c >> 30); - buff[1] = 0x80 | ((c >> 24) & 0x3f); - buff[1] = 0x80 | ((c >> 18) & 0x3f); - buff[2] = 0x80 | ((c >> 12) & 0x3f); - buff[3] = 0x80 | ((c >> 6) & 0x3f); - buff[4] = 0x80 | (c & 0x3f); - return (__archive_string_append(as, buff, 6)); - } else { - /* TODO: Handle this error?? */ - return (as); - } -} -#endif diff --git a/lib/libarchive/archive_string.h b/lib/libarchive/archive_string.h index f6076a469c5c..6cbe72229301 100644 --- a/lib/libarchive/archive_string.h +++ b/lib/libarchive/archive_string.h @@ -50,14 +50,15 @@ struct archive_string { size_t buffer_length; /* Length of malloc-ed storage */ }; -#define EMPTY_ARCHIVE_STRING {0,0,0} +/* Initialize an archive_string object on the stack or elsewhere. */ +#define archive_string_init(a) \ + do { (a)->s = NULL; (a)->length = 0; (a)->buffer_length = 0; } while(0) /* Append a C char to an archive_string, resizing as necessary. */ struct archive_string * __archive_strappend_char(struct archive_string *, char); #define archive_strappend_char __archive_strappend_char - /* Append a char to an archive_string using UTF8. */ struct archive_string * __archive_strappend_char_UTF8(struct archive_string *, int); @@ -86,7 +87,7 @@ __archive_strncat(struct archive_string *, const char *, size_t); /* Copy a C string to an archive_string with limit, resizing as necessary. */ #define archive_strncpy(as,p,l) \ - ((as)->length=0,archive_strncat((as), (p), (l))) + ((as)->length=0, archive_strncat((as), (p), (l))) /* Return length of string. */ #define archive_strlen(a) ((a)->length) diff --git a/lib/libarchive/archive_write.c b/lib/libarchive/archive_write.c index 050d7e23dffb..eac741395b5e 100644 --- a/lib/libarchive/archive_write.c +++ b/lib/libarchive/archive_write.c @@ -35,7 +35,6 @@ __FBSDID("$FreeBSD$"); * needlessly bloating statically-linked clients. */ -#include #include #ifdef HAVE_DMALLOC #include @@ -169,18 +168,6 @@ archive_write_finish(struct archive *a) /* Release various dynamic buffers. */ free((void *)(uintptr_t)(const void *)a->nulls); - if (a->entry_name.s != NULL) - free(a->entry_name.s); - if (a->entry_linkname.s != NULL) - free(a->entry_linkname.s); - if (a->entry_uname.s != NULL) - free(a->entry_uname.s); - if (a->entry_gname.s != NULL) - free(a->entry_gname.s); - if (a->gnu_name.s != NULL) - free(a->gnu_name.s); - if (a->gnu_linkname.s != NULL) - free(a->gnu_linkname.s); if (a->extract_mkdirpath.s != NULL) free(a->extract_mkdirpath.s); free(a); diff --git a/lib/libarchive/archive_write_open_file.c b/lib/libarchive/archive_write_open_file.c index cae6cd60766a..92fa9d8a9ce3 100644 --- a/lib/libarchive/archive_write_open_file.c +++ b/lib/libarchive/archive_write_open_file.c @@ -41,7 +41,7 @@ __FBSDID("$FreeBSD$"); #include "archive_private.h" struct write_file_data { - intmax_t offset; + off_t offset; int fd; char filename[1]; }; diff --git a/lib/libarchive/archive_write_open_filename.c b/lib/libarchive/archive_write_open_filename.c index cae6cd60766a..92fa9d8a9ce3 100644 --- a/lib/libarchive/archive_write_open_filename.c +++ b/lib/libarchive/archive_write_open_filename.c @@ -41,7 +41,7 @@ __FBSDID("$FreeBSD$"); #include "archive_private.h" struct write_file_data { - intmax_t offset; + off_t offset; int fd; char filename[1]; }; diff --git a/lib/libarchive/archive_write_set_format_pax.c b/lib/libarchive/archive_write_set_format_pax.c index 8c5342ecd658..441c190895a7 100644 --- a/lib/libarchive/archive_write_set_format_pax.c +++ b/lib/libarchive/archive_write_set_format_pax.c @@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include "archive.h" #include "archive_entry.h" @@ -54,6 +55,8 @@ static void add_pax_attr_int(struct archive_string *, static void add_pax_attr_time(struct archive_string *, const char *key, int64_t sec, unsigned long nanos); +static void add_pax_attr_w(struct archive_string *, + const char *key, const wchar_t *wvalue); static int archive_write_pax_data(struct archive *, const void *, size_t); static int archive_write_pax_finish(struct archive *); @@ -183,6 +186,73 @@ add_pax_attr_int(struct archive_string *as, const char *key, int64_t value) add_pax_attr(as, key, format_int(tmp + sizeof(tmp) - 1, value)); } +static void +add_pax_attr_w(struct archive_string *as, const char *key, const wchar_t *wval) +{ + int utf8len; + const wchar_t *wp; + wchar_t wc; + char *utf8_value, *p; + + utf8len = 0; + for (wp = wval; *wp != L'\0'; ) { + wc = *wp++; + if (wc <= 0x7f) + utf8len++; + else if (wc <= 0x7ff) + utf8len += 2; + else if (wc <= 0xffff) + utf8len += 3; + else if (wc <= 0x1fffff) + utf8len += 4; + else if (wc <= 0x3ffffff) + utf8len += 5; + else + utf8len += 6; + } + + utf8_value = malloc(utf8len + 1); + for (wp = wval, p = utf8_value; *wp != L'\0'; ) { + wc = *wp++; + if (wc <= 0x7f) { + *p++ = (char)wc; + } else if (wc <= 0x7ff) { + p[0] = 0xc0 | ((wc >> 6) & 0x1f); + p[1] = 0x80 | (wc & 0x3f); + p += 2; + } else if (wc <= 0xffff) { + p[0] = 0xe0 | ((wc >> 12) & 0x0f); + p[1] = 0x80 | ((wc >> 6) & 0x3f); + p[2] = 0x80 | (wc & 0x3f); + p += 3; + } else if (wc <= 0x1fffff) { + p[0] = 0xf0 | ((wc >> 18) & 0x07); + p[1] = 0x80 | ((wc >> 12) & 0x3f); + p[2] = 0x80 | ((wc >> 6) & 0x3f); + p[3] = 0x80 | (wc & 0x3f); + p += 4; + } else if (wc <= 0x3ffffff) { + p[0] = 0xf8 | ((wc >> 24) & 0x03); + p[1] = 0x80 | ((wc >> 18) & 0x3f); + p[2] = 0x80 | ((wc >> 12) & 0x3f); + p[3] = 0x80 | ((wc >> 6) & 0x3f); + p[4] = 0x80 | (wc & 0x3f); + p += 5; + } else if (wc <= 0x7fffffff) { + p[0] = 0xfc | ((wc >> 30) & 0x01); + p[1] = 0x80 | ((wc >> 24) & 0x3f); + p[1] = 0x80 | ((wc >> 18) & 0x3f); + p[2] = 0x80 | ((wc >> 12) & 0x3f); + p[3] = 0x80 | ((wc >> 6) & 0x3f); + p[4] = 0x80 | (wc & 0x3f); + p += 6; + } + } + + add_pax_attr(as, key, utf8_value); + free(utf8_value); +} + /* * Add a key/value attribute to the pax header. This function handles * the length field and various other syntactic requirements. @@ -243,16 +313,18 @@ archive_write_pax_header(struct archive *a, struct archive_entry *entry_original) { struct archive_entry *entry_main; - const char *linkname, *name_start, *p; + const char *linkname, *p; + const wchar_t *wp, *wp2, *wname_start; int need_extension, oldstate, r, ret; struct pax *pax; const struct stat *st_main, *st_original; - struct archive_string pax_entry_name = EMPTY_ARCHIVE_STRING; + struct archive_string pax_entry_name; char paxbuff[512]; char ustarbuff[512]; char ustar_entry_name[256]; + archive_string_init(&pax_entry_name); need_extension = 0; pax = a->format_data; pax->written = 1; @@ -281,7 +353,7 @@ archive_write_pax_header(struct archive *a, } /* Copy entry so we can modify it as needed. */ - entry_main = archive_entry_dup(entry_original); + entry_main = archive_entry_clone(entry_original); archive_string_empty(&(pax->pax_header)); /* Blank our work area. */ st_main = archive_entry_stat(entry_main); @@ -291,16 +363,26 @@ archive_write_pax_header(struct archive *a, * 'prefix' fields. Here, I pick out the longest possible * suffix, then test whether the remaining prefix is too long. */ + wp = archive_entry_pathname_w(entry_main); p = archive_entry_pathname(entry_main); - if (strlen(p) <= 100) /* Short enough for just 'name' field */ - name_start = p; /* Record a zero-length prefix */ + if (wcslen(wp) <= 100) /* Short enough for just 'name' field */ + wname_start = wp; /* Record a zero-length prefix */ else /* Find the largest suffix that fits in 'name' field. */ - name_start = strchr(p + strlen(p) - 100 - 1, '/'); + wname_start = wcschr(wp + wcslen(wp) - 100 - 1, '/'); - /* If name is too long, add 'path' to pax extended attrs. */ - if (name_start == NULL || name_start - p > 155) { - add_pax_attr(&(pax->pax_header), "path", p); + /* Find non-ASCII character, if any. */ + wp2 = wp; + while (*wp2 != L'\0' && *wp2 < 128) + wp2++; + + /* + * If name is too long, or has non-ASCII characters, add + * 'path' to pax extended attrs. + */ + if (wname_start == NULL || wname_start - wp > 155 || + *wp2 != L'\0') { + add_pax_attr_w(&(pax->pax_header), "path", wp); archive_entry_set_pathname(entry_main, build_ustar_entry_name(ustar_entry_name, p)); need_extension = 1; diff --git a/lib/libarchive/libarchive.3 b/lib/libarchive/libarchive.3 index f6fdb5e44f7c..550bf4d8a7f7 100644 --- a/lib/libarchive/libarchive.3 +++ b/lib/libarchive/libarchive.3 @@ -80,10 +80,10 @@ POSIX .Dq pax interchange format archives, .It -POSIX octet-oriented cpio archives. +POSIX octet-oriented cpio archives, +.It +two different variants of shar archives. .El -The default write format is the pax interchange -format. Pax interchange format is an extension of the tar archive format that eliminates essentially all of the limitations of historic tar formats in a standard fashion that is supported @@ -91,6 +91,9 @@ by POSIX-compliant .Xr pax 1 implementations on many systems as well as several newer implementations of .Xr tar 1 . +Note that the default write format will suppress the pax extended +attributes for most entries; explicitly requesting pax format will +enable those attributes for all entries. .Pp The read and write APIs are accessed through the .Fn archive_read_XXX @@ -238,10 +241,18 @@ variants have eliminated most restrictions on the length of textual fields. Clients should not assume that filenames, link names, user names, or group names are limited in length. In particular, pax interchange format can easily accomodate pathnames -that exceed +in arbitrary character sets that exceed .Va PATH_MAX . .Sh RETURN VALUES Most functions return zero on success, non-zero on error. +The return value indicates the general severity of the error, ranging +from +.Cm ARCHIVE_WARNING , +which indicates a minor problem that should probably be reported +to the user, to +.Cm ARCHIVE_FATAL , +which indicates a serious problem that will prevent any further +operations on this archive. On error, the .Fn archive_errno function can be used to retrieve a numeric error code (see @@ -257,21 +268,6 @@ return pointers to an allocated and initialized .Tn struct archive object. .Pp -.Fn archive_read_next_header -returns a pointer to an -.Tn struct archive_entry -structure or -.Dv NULL . -If -.Dv NULL -is returned, the value from -.Fn archive_errno -will be zero if the end of the archive was reached, --1 if there was a recoverable error reading the archive, -or positive if there was a non-recoverable error reading the archive. -If there was a recoverable error, the client should retry the -operation. -.Pp .Fn archive_read_data and .Fn archive_write_data @@ -284,6 +280,9 @@ and functions can be used to obtain more information. .Sh ENVIRONMENT The library currently obeys no environment variables. +There are character set conversions within the +.Xr archive_entry +functions that are impacted by the currently-selected locale. .Sh SEE ALSO .Xr tar 1 , .Xr archive_entry 3 , @@ -304,8 +303,8 @@ library was written by .An Tim Kientzle Aq kientzle@acm.org . .Sh BUGS Some archive formats support information that is not supported by -.Tn struct archive_entry -and cannot therefore be archived or restored using this library. +.Tn struct archive_entry . +Such information cannot be fully archived or restored using this library. This includes, for example, comments, character sets, sparse file information, or the arbitrary key/value pairs that can appear in pax interchange format archives. @@ -317,9 +316,7 @@ is supported by all formats. For example, cpio formats do not support nanosecond timestamps; old tar formats do not support large device numbers. .Pp -The library does not have write support for pre-POSIX tar archives. +The library cannot write pre-POSIX tar archives. The support for GNU tar format is incomplete. .Pp -The library should obey the current locale and convert -UTF8 filenames stored by pax interchange format to and from the -currently-active character coding. +Support for ACLs is still evolving and subject to change. \ No newline at end of file diff --git a/lib/libarchive/tar.5 b/lib/libarchive/tar.5 index 5149454ae198..dbd1ed0cd914 100644 --- a/lib/libarchive/tar.5 +++ b/lib/libarchive/tar.5 @@ -78,13 +78,13 @@ The header record for an old-style archive consists of the following: .Bd -literal -offset indent struct tarfile_header_old { - char name[100]; - char mode[8]; - char uid[8]; - char gid[8]; - char size[12]; - char mtime[12]; - char checksum[8]; +char name[100]; +char mode[8]; +char uid[8]; +char gid[8]; +char size[12]; +char mtime[12]; +char checksum[8]; }; .Ed The remaining bytes in the header record are filled with nulls. @@ -157,15 +157,15 @@ and fields were added: .Bd -literal -offset indent struct tarfile_entry_common { - char name[100]; - char mode[8]; - char uid[8]; - char gid[8]; - char size[12]; - char mtime[12]; - char checksum[8]; - char linktype[1]; - char linkname[100]; +char name[100]; +char mode[8]; +char uid[8]; +char gid[8]; +char size[12]; +char mtime[12]; +char checksum[8]; +char linktype[1]; +char linkname[100]; }; .Ed .Pp @@ -196,22 +196,22 @@ It extends the format above with new fields: .Bd -literal -offset indent struct tarfile_entry_posix { - char name[100]; - char mode[8]; - char uid[8]; - char gid[8]; - char size[12]; - char mtime[12]; - char checksum[8]; - char typeflag[1]; - char linkname[100]; - char magic[6]; - char version[2]; - char uname[32]; - char gname[32]; - char devmajor[8]; - char devminor[8]; - char prefix[155]; +char name[100]; +char mode[8]; +char uid[8]; +char gid[8]; +char size[12]; +char mtime[12]; +char checksum[8]; +char typeflag[1]; +char linkname[100]; +char magic[6]; +char version[2]; +char uname[32]; +char gname[32]; +char devmajor[8]; +char devminor[8]; +char prefix[155]; }; .Ed .Bl -tag -width indent @@ -426,32 +426,32 @@ more lenient POSIX-compliant readers can successfully extract most GNU tar archives. .Bd -literal -offset indent struct tarfile_entry_gnu { - char name[100]; - char mode[8]; - char uid[8]; - char gid[8]; - char size[12]; - char mtime[12]; - char checksum[8]; - char typeflag[1]; - char linkname[100]; - char magic[6]; - char version[2]; - char uname[32]; - char gname[32]; - char devmajor[8]; - char devminor[8]; - char atime[12]; - char ctime[12]; - char offset[12]; - char longnames[4]; - char unused[1]; - struct { - char offset[12]; - char numbytes[12]; - } sparse[4]; - char isextended[1]; - char realsize[12]; +char name[100]; +char mode[8]; +char uid[8]; +char gid[8]; +char size[12]; +char mtime[12]; +char checksum[8]; +char typeflag[1]; +char linkname[100]; +char magic[6]; +char version[2]; +char uname[32]; +char gname[32]; +char devmajor[8]; +char devminor[8]; +char atime[12]; +char ctime[12]; +char offset[12]; +char longnames[4]; +char unused[1]; +struct { +char offset[12]; +char numbytes[12]; +} sparse[4]; +char isextended[1]; +char realsize[12]; }; .Ed .Bl -tag -width indent @@ -569,8 +569,42 @@ additional records. Each such record contains XXX more details needed XXX .It Va realsize -A binary representation of the size, with a much larger range +A binary representation of the file's complete size, with a much larger range than the POSIX file size. +In particular, with +.Cm M +type files, the current entry is only a portion of the file. +In that case, the POSIX size field will indicate the size of this +entry; the +.Va realsize +field will indicate the total size of the file. +.El +.Ss Solaris Tar +XXX More Details Needed XXX +.Pp +Solaris tar supports an +.Dq extended +format that is fundamentally similar to pax interchange format, +with the following differences: +.Bl -bullet -compact -width indent +.Li +Extended attributes are stored in an entry whose type is +.Cm X , +not +.Cm x , +as used by pax interchange format. +The detailed format of this entry appears to be the same +as detailed above for the +.Cm x +entry. +.Li +An additional +.Cm A +entry is used to store an ACL for the following regular entry. +The body of this entry contains a seven-digit octal number +(whose value is 01000000 plus the number of ACL entries) +followed by a zero byte, followed by the +textual ACL description. .El .Ss Other Extensions One common extension, utilized by GNU tar, star, and other newer