diff --git a/lib/libarchive/Makefile b/lib/libarchive/Makefile index 38c9067f4b70..23272d7f2834 100644 --- a/lib/libarchive/Makefile +++ b/lib/libarchive/Makefile @@ -9,7 +9,7 @@ LDADD= -lbz2 -lz # Major: Bumped ONLY when API/ABI breakage happens. # Minor: Bumped when significant new features are added (see SHLIB_MAJOR) # Revision: Bumped on any notable change -VERSION= 1.2.41 +VERSION= 1.2.51 ARCHIVE_API_MAJOR!= echo ${VERSION} | sed -e 's/\..*//' ARCHIVE_API_MINOR!= echo ${VERSION} | sed -e 's/[0-9]*\.//' | sed -e 's/\..*//' diff --git a/lib/libarchive/archive.h.in b/lib/libarchive/archive.h.in index 666917c9055f..62c4d52af6a6 100644 --- a/lib/libarchive/archive.h.in +++ b/lib/libarchive/archive.h.in @@ -253,6 +253,7 @@ int archive_read_data_into_fd(struct archive *, int fd); #define ARCHIVE_EXTRACT_UNLINK (16) /* Default: don't unlink existing files */ #define ARCHIVE_EXTRACT_ACL (32) /* Default: don't restore ACLs */ #define ARCHIVE_EXTRACT_FFLAGS (64) /* Default: don't restore fflags */ +#define ARCHIVE_EXTRACT_XATTR (128) /* Default: don't restore xattrs */ int archive_read_extract(struct archive *, struct archive_entry *, int flags); diff --git a/lib/libarchive/archive_entry.c b/lib/libarchive/archive_entry.c index de7238d9dca6..c67c64bd85fe 100644 --- a/lib/libarchive/archive_entry.c +++ b/lib/libarchive/archive_entry.c @@ -59,13 +59,13 @@ static size_t wcslen(const wchar_t *s) static wchar_t * wcscpy(wchar_t *s1, const wchar_t *s2) { wchar_t *dest = s1; - while((*s1 = *s2) != L'\0') + while ((*s1 = *s2) != L'\0') ++s1, ++s2; return dest; } -#define wmemcpy(a,b,i) (wchar_t *)memcpy((a),(b),(i)*sizeof(wchar_t)) +#define wmemcpy(a,b,i) (wchar_t *)memcpy((a), (b), (i) * sizeof(wchar_t)) /* Good enough for simple equality testing, but not for sorting. */ -#define wmemcmp(a,b,i) memcmp((a),(b),(i)*sizeof(wchar_t)) +#define wmemcmp(a,b,i) memcmp((a), (b), (i) * sizeof(wchar_t)) #endif #include "archive.h" @@ -97,6 +97,14 @@ struct ae_acl { struct aes name; /* uname/gname */ }; +struct ae_xattr { + struct ae_xattr *next; + + char *name; + void *value; + size_t size; +}; + static void aes_clean(struct aes *); static void aes_copy(struct aes *dest, struct aes *src); static const char * aes_get_mbs(struct aes *); @@ -170,6 +178,9 @@ struct archive_entry { struct ae_acl *acl_p; int acl_state; /* See acl_next for details. */ wchar_t *acl_text_w; + + struct ae_xattr *xattr_head; + struct ae_xattr *xattr_p; }; static void @@ -332,6 +343,7 @@ archive_entry_clear(struct archive_entry *entry) aes_clean(&entry->ae_symlink); aes_clean(&entry->ae_uname); archive_entry_acl_clear(entry); + archive_entry_xattr_clear(entry); memset(entry, 0, sizeof(*entry)); return entry; } @@ -358,6 +370,7 @@ archive_entry_clone(struct archive_entry *entry) aes_copy(&entry2->ae_uname, &entry->ae_uname); /* XXX TODO: Copy ACL data over as well. XXX */ + /* XXX TODO: Copy xattr data over as well. XXX */ return (entry2); } @@ -1054,7 +1067,7 @@ archive_entry_acl_text_w(struct archive_entry *entry, int flags) length ++; /* colon */ length += 3; /* rwx */ length += 1; /* colon */ - length += max(sizeof(uid_t),sizeof(gid_t)) * 3 + 1; + length += max(sizeof(uid_t), sizeof(gid_t)) * 3 + 1; length ++; /* newline */ } ap = ap->next; @@ -1345,6 +1358,98 @@ fail: return (ARCHIVE_WARN); } +/* + * extended attribute handling + */ + +void +archive_entry_xattr_clear(struct archive_entry *entry) +{ + struct ae_xattr *xp; + + while (entry->xattr_head != NULL) { + xp = entry->xattr_head->next; + free(entry->xattr_head->name); + free(entry->xattr_head->value); + free(entry->xattr_head); + entry->xattr_head = xp; + } + + entry->xattr_head = NULL; +} + +void +archive_entry_xattr_add_entry(struct archive_entry *entry, + const char *name, const void *value, size_t size) +{ + struct ae_xattr *xp; + + for (xp = entry->xattr_head; xp != NULL; xp = xp->next) + ; + + if ((xp = malloc(sizeof(struct ae_xattr))) == NULL) + /* XXX Error XXX */ + return; + + xp->name = strdup(name); + if ((xp -> value = malloc(size)) != NULL) { + memcpy(xp -> value, value, size); + xp -> size = size; + } else + xp -> size = 0; + + xp->next = entry->xattr_head; + entry->xattr_head = xp; +} + + +/* + * returns number of the extended attribute entries + */ +int +archive_entry_xattr_count(struct archive_entry *entry) +{ + struct ae_xattr *xp; + int count = 0; + + for (xp = entry->xattr_head; xp != NULL; xp = xp->next) + count++; + + return count; +} + +int +archive_entry_xattr_reset(struct archive_entry * entry) +{ + entry->xattr_p = entry->xattr_head; + + return archive_entry_xattr_count(entry); +} + +int +archive_entry_xattr_next(struct archive_entry * entry, + const char **name, const void **value, size_t *size) +{ + if (entry->xattr_p) { + *name = entry->xattr_p->name; + *value = entry->xattr_p->value; + *size = entry->xattr_p->size; + + entry->xattr_p = entry->xattr_p->next; + + return (ARCHIVE_OK); + } else { + *name = NULL; + *name = NULL; + *size = (size_t)0; + return (ARCHIVE_WARN); + } +} + +/* + * end of xattr handling + */ + /* * Match "[:whitespace:]*(.*)[:whitespace:]*[:,\n]". *wp is updated * to point to just after the separator. *start points to the first diff --git a/lib/libarchive/archive_entry.h b/lib/libarchive/archive_entry.h index a35f0dfae58b..3c3f73dfbc8d 100644 --- a/lib/libarchive/archive_entry.h +++ b/lib/libarchive/archive_entry.h @@ -229,4 +229,23 @@ int __archive_entry_acl_parse_w(struct archive_entry *, } #endif +/* + * extended attributes + */ + +void archive_entry_xattr_clear(struct archive_entry *); +void archive_entry_xattr_add_entry(struct archive_entry *, + const char *name, const void *value, size_t size); + +/* + * To retrieve the xattr list, first "reset", then repeatedly ask for the + * "next" entry. + */ + +int archive_entry_xattr_count(struct archive_entry *); +int archive_entry_xattr_reset(struct archive_entry *); +int archive_entry_xattr_next(struct archive_entry *, + const char **name, const void **value, size_t *); + + #endif /* !ARCHIVE_ENTRY_H_INCLUDED */ diff --git a/lib/libarchive/archive_read_extract.c b/lib/libarchive/archive_read_extract.c index cde42b86db2b..f7127f9f72bf 100644 --- a/lib/libarchive/archive_read_extract.c +++ b/lib/libarchive/archive_read_extract.c @@ -31,6 +31,9 @@ __FBSDID("$FreeBSD$"); #ifdef HAVE_SYS_ACL_H #include #endif +#ifdef HAVE_ATTR_XATTR_H +#include +#endif #ifdef HAVE_SYS_IOCTL_H #include #endif @@ -134,6 +137,7 @@ static int set_acl(struct archive *, int fd, struct archive_entry *, acl_type_t, int archive_entry_acl_type, const char *tn); #endif static int set_acls(struct archive *, int fd, struct archive_entry *); +static int set_xattrs(struct archive *, int fd, struct archive_entry *); static int set_fflags(struct archive *, int fd, const char *name, mode_t, unsigned long fflags_set, unsigned long fflags_clear); static int set_ownership(struct archive *, int fd, struct archive_entry *, @@ -1086,6 +1090,12 @@ set_perm(struct archive *a, int fd, struct archive_entry *entry, return (r); } + if (flags & ARCHIVE_EXTRACT_XATTR) { + r = set_xattrs(a, fd, entry); + if (r != ARCHIVE_OK) + return (r); + } + /* * Make 'critical_flags' hold all file flags that can't be * immediately restored. For example, on BSD systems, @@ -1201,7 +1211,7 @@ set_fflags(struct archive *a, int fd, const char *name, mode_t mode, return (ARCHIVE_WARN); } -#elif defined(__linux) +#elif defined(__linux) && defined(EXT2_IOC_GETFLAGS) && defined(EXT2_IOC_SETFLAGS) /* * Linux has flags too, but uses ioctl() to access them instead of @@ -1214,8 +1224,8 @@ set_fflags(struct archive *a, int fd, const char *name, mode_t mode, struct extract *extract; int ret; int myfd = fd; - int err; unsigned long newflags, oldflags; + unsigned long sf_mask = 0; extract = a->extract; if (set == 0 && clear == 0) @@ -1231,10 +1241,18 @@ set_fflags(struct archive *a, int fd, const char *name, mode_t mode, return (ARCHIVE_OK); /* - * Linux has no define for the flags that are only settable - * by the root user... + * Linux has no define for the flags that are only settable by + * the root user. This code may seem a little complex, but + * there seem to be some Linux systems that lack these + * defines. (?) The code below degrades reasonably gracefully + * if sf_mask is incomplete. */ -#define SF_MASK (EXT2_IMMUTABLE_FL|EXT2_APPEND_FL) +#ifdef EXT2_IMMUTABLE_FL + sf_mask |= EXT2_IMMUTABLE_FL; +#endif +#ifdef EXT2_APPEND_FL + sf_mask |= EXT2_APPEND_FL; +#endif /* * XXX As above, this would be way simpler if we didn't have * to read the current flags from disk. XXX @@ -1250,8 +1268,8 @@ set_fflags(struct archive *a, int fd, const char *name, mode_t mode, } /* If we couldn't set all the flags, try again with a subset. */ if (ioctl(myfd, EXT2_IOC_GETFLAGS, &oldflags) >= 0) { - newflags &= ~SF_MASK; - oldflags &= SF_MASK; + newflags &= ~sf_mask; + oldflags &= sf_mask; newflags |= oldflags; if (ioctl(myfd, EXT2_IOC_SETFLAGS, &newflags) >= 0) goto cleanup; @@ -1389,11 +1407,12 @@ set_acl(struct archive *a, int fd, struct archive_entry *entry, if (fd >= 0 && acl_type == ACL_TYPE_ACCESS && acl_set_fd(fd, acl) == 0) ret = ARCHIVE_OK; else -#endif +#else #if HAVE_ACL_SET_FD_NP if (fd >= 0 && acl_set_fd_np(fd, acl, acl_type) == 0) ret = ARCHIVE_OK; else +#endif #endif if (acl_set_file(name, acl_type, acl) != 0) { archive_set_error(a, errno, "Failed to set %s acl", typename); @@ -1404,9 +1423,85 @@ set_acl(struct archive *a, int fd, struct archive_entry *entry, } #endif +#if HAVE_LSETXATTR /* - * The following routines do some basic caching of uname/gname lookups. - * All such lookups go through these routines, including ACL conversions. + * Restore extended attributes - Linux implementation + */ +static int +set_xattrs(struct archive *a, int fd, struct archive_entry *entry) +{ + static int warning_done = 0; + int ret = ARCHIVE_OK; + int i = archive_entry_xattr_reset(entry); + + while (i--) { + const char *name; + const void *value; + size_t size; + archive_entry_xattr_next(entry, &name, &value, &size); + if (name != NULL && + strncmp(name, "xfsroot.", 8) != 0 && + strncmp(name, "system.", 7) != 0) { + int e; +#if HAVE_FSETXATTR + if (fd >= 0) + e = fsetxattr(fd, name, value, size, 0); + else +#endif + { + e = lsetxattr(archive_entry_pathname(entry), + name, value, size, 0); + } + if (e == -1) { + if (err == ENOTSUP) { + if (!warning_done) { + warning_done = 1; + archive_set_error(a, err, + "Cannot restore extended " + "attributes on this file " + "system"); + } + } else + archive_set_error(a, err, + "Failed to set extended attribute"); + ret = ARCHIVE_WARN; + } + } else { + archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, + "Invalid extended attribute encountered"); + ret = ARCHIVE_WARN; + } + } + return (ret); +} +#else +/* + * Restore extended attributes - stub implementation for unsupported systems + */ +static int +set_xattrs(struct archive *a, int fd, struct archive_entry *entry) +{ + static int warning_done = 0; + (void)a; /* UNUSED */ + (void)fd; /* UNUSED */ + (void)entry; /* UNUSED */ + if (!warning_done) { + warning_done = 1; + archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, + "Cannot restore extended attributes on this system"); + return (ARCHIVE_WARN); + } + /* Warning was already emitted; suppress further warnings. */ + return (ARCHIVE_OK); +} +#endif + +/* + * The following routines do some basic caching of uname/gname + * lookups. All such lookups go through these routines, including ACL + * conversions. Even a small cache here provides an enormous speedup, + * especially on systems using NIS, LDAP, or a similar networked + * directory system. * * TODO: Provide an API for clients to override these routines. */ @@ -1485,17 +1580,17 @@ lookup_uid(struct archive *a, const char *uname, uid_t uid) static unsigned int hash(const char *p) { - /* A 32-bit version of Peter Weinberger's (PJW) hash algorithm, - as used by ELF for hashing function names. */ - unsigned g,h = 0; - while(*p != '\0') { - h = ( h << 4 ) + *p++; - if (( g = h & 0xF0000000 )) { - h ^= g >> 24; - h &= 0x0FFFFFFF; - } - } - return h; + /* A 32-bit version of Peter Weinberger's (PJW) hash algorithm, + as used by ELF for hashing function names. */ + unsigned g, h = 0; + while (*p != '\0') { + h = ( h << 4 ) + *p++; + if (( g = h & 0xF0000000 )) { + h ^= g >> 24; + h &= 0x0FFFFFFF; + } + } + return h; } void diff --git a/lib/libarchive/archive_read_support_format_cpio.c b/lib/libarchive/archive_read_support_format_cpio.c index 946a74ee55d6..75b041761067 100644 --- a/lib/libarchive/archive_read_support_format_cpio.c +++ b/lib/libarchive/archive_read_support_format_cpio.c @@ -260,7 +260,7 @@ archive_read_format_cpio_read_header(struct archive *a, } /* Compare name to "TRAILER!!!" to test for end-of-archive. */ - if (namelength == 11 && strcmp(h,"TRAILER!!!")==0) { + if (namelength == 11 && strcmp(h, "TRAILER!!!") == 0) { /* TODO: Store file location of start of block. */ archive_set_error(a, 0, NULL); return (ARCHIVE_EOF); diff --git a/lib/libarchive/archive_read_support_format_iso9660.c b/lib/libarchive/archive_read_support_format_iso9660.c index 510d7f08253c..065bf9bc211f 100644 --- a/lib/libarchive/archive_read_support_format_iso9660.c +++ b/lib/libarchive/archive_read_support_format_iso9660.c @@ -364,7 +364,7 @@ archive_read_format_iso9660_read_header(struct archive *a, /* If this is a directory, read in all of the entries right now. */ if (S_ISDIR(st.st_mode)) { - while(iso9660->entry_bytes_remaining > 0) { + while (iso9660->entry_bytes_remaining > 0) { const void *block; const unsigned char *p; ssize_t step = iso9660->logical_block_size; @@ -918,7 +918,7 @@ next_entry(struct iso9660 *iso9660) + iso9660->pending_files[0]->size; /* Now, try to find an earlier one. */ - for(i = 0; i < iso9660->pending_files_used; i++) { + for (i = 0; i < iso9660->pending_files_used; i++) { /* Use the position of the file *end* as our comparison. */ uint64_t end_offset = iso9660->pending_files[i]->offset + iso9660->pending_files[i]->size; diff --git a/lib/libarchive/archive_read_support_format_tar.c b/lib/libarchive/archive_read_support_format_tar.c index 2ab868437acc..6f1045ad418d 100644 --- a/lib/libarchive/archive_read_support_format_tar.c +++ b/lib/libarchive/archive_read_support_format_tar.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2003-2004 Tim Kientzle + * Copyright (c) 2003-2006 Tim Kientzle * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -49,7 +49,7 @@ __FBSDID("$FreeBSD$"); static int wcscmp(const wchar_t *s1, const wchar_t *s2) { int diff = *s1 - *s2; - while(*s1 && diff == 0) + while (*s1 && diff == 0) diff = (int)*++s1 - (int)*++s2; return diff; } @@ -155,6 +155,7 @@ struct tar { static size_t UTF8_mbrtowc(wchar_t *pwc, const char *s, size_t n); static int archive_block_is_null(const unsigned char *p); +static char *base64_decode(const wchar_t *, size_t, size_t *); static int gnu_read_sparse_data(struct archive *, struct tar *, const struct archive_entry_header_gnutar *header); static void gnu_parse_sparse_data(struct archive *, struct tar *, @@ -199,7 +200,10 @@ static int64_t tar_atol256(const char *, unsigned); static int64_t tar_atol8(const char *, unsigned); static int tar_read_header(struct archive *, struct tar *, struct archive_entry *, struct stat *); +static int tohex(int c); +static char *url_decode(const char *); static int utf8_decode(wchar_t *, const char *, size_t length); +static char *wide_to_narrow(const wchar_t *wval); /* * ANSI C99 defines constants for these, but not everyone supports @@ -1154,7 +1158,42 @@ pax_header(struct archive *a, struct tar *tar, struct archive_entry *entry, return (err); } +static int +pax_attribute_xattr(struct archive_entry *entry, + wchar_t *name, wchar_t *value) +{ + char *name_decoded, *name_narrow; + void *value_decoded; + size_t value_len; + if (wcslen(name) < 18 || (wcsncmp(name, L"LIBARCHIVE.xattr.", 17)) != 0) + return 3; + + name += 17; + + /* URL-decode name */ + name_narrow = wide_to_narrow(name); + if (name_narrow == NULL) + return 2; + name_decoded = url_decode(name_narrow); + free(name_narrow); + if (name_decoded == NULL) + return 2; + + /* Base-64 decode value */ + value_decoded = base64_decode(value, wcslen(value), &value_len); + if (value_decoded == NULL) { + free(name_decoded); + return 1; + } + + archive_entry_xattr_add_entry(entry, name_decoded, + value_decoded, value_len); + + free(name_decoded); + free(value_decoded); + return 0; +} /* * Parse a single key=value attribute. key/value pointers are @@ -1184,6 +1223,8 @@ pax_attribute(struct archive_entry *entry, struct stat *st, if (strcmp(key, "LIBARCHIVE.xxxxxxx")==0) archive_entry_set_xxxxxx(entry, value); */ + if (wcsncmp(key, L"LIBARCHIVE.xattr.", 17)==0) + pax_attribute_xattr(entry, key, value); break; case 'S': /* We support some keys used by the "star" archiver */ @@ -1599,7 +1640,7 @@ utf8_decode(wchar_t *dest, const char *src, size_t length) int err; err = 0; - while(length > 0) { + while (length > 0) { n = UTF8_mbrtowc(dest, src, length); if (n == 0) break; @@ -1721,3 +1762,159 @@ UTF8_mbrtowc(wchar_t *pwc, const char *s, size_t n) } return (wch == L'\0' ? 0 : len); } + + +/* + * base64_decode - Base64 decode + * + * This accepts most variations of base-64 encoding, including: + * * with or without line breaks + * * with or without the final group padded with '=' or '_' characters + * (The most economical Base-64 variant does not pad the last group and + * omits line breaks; RFC1341 used for MIME requires both.) + */ +static char * +base64_decode(const wchar_t *src, size_t len, size_t *out_len) +{ + static const unsigned char digits[64] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + static unsigned char decode_table[128]; + char *out, *d; + + /* If the decode table is not yet initialized, prepare it. */ + if (decode_table[digits[1]] != 1) { + size_t i; + memset(decode_table, 0xff, sizeof(decode_table)); + for (i = 0; i < sizeof(digits); i++) + decode_table[digits[i]] = i; + } + + /* Allocate enough space to hold the entire output. */ + /* Note that we may not use all of this... */ + out = malloc((len * 3 + 3) / 4); + if (out == NULL) { + *out_len = 0; + return (NULL); + } + d = out; + + while (len > 0) { + /* Collect the next group of (up to) four characters. */ + int v = 0; + int group_size = 0; + while (group_size < 4 && len > 0) { + /* '=' or '_' padding indicates final group. */ + if (*src == '=' || *src == '_') { + len = 0; + break; + } + /* Skip illegal characters (including line breaks) */ + if (*src > 127 || *src < 32 + || decode_table[*src] == 0xff) { + len--; + src++; + continue; + } + v <<= 6; + v |= decode_table[*src++]; + len --; + group_size++; + } + /* Align a short group properly. */ + v <<= 6 * (4 - group_size); + /* Unpack the group we just collected. */ + switch (group_size) { + case 4: d[2] = v & 0xff; + /* FALLTHROUGH */ + case 3: d[1] = (v >> 8) & 0xff; + /* FALLTHROUGH */ + case 2: d[0] = (v >> 16) & 0xff; + break; + case 1: /* this is invalid! */ + break; + } + d += group_size * 3 / 4; + } + + *out_len = d - out; + return (out); +} + +/* + * This is a little tricky because the C99 standard wcstombs() + * function returns the number of bytes that were converted, + * not the number that should be converted. As a result, + * we can never accurately size the output buffer (without + * doing a tedious output size calculation in advance). + * This approach (try a conversion, then try again if it fails) + * will almost always succeed on the first try, and is thus + * much faster, at the cost of sometimes requiring multiple + * passes while we expand the buffer. + */ +static char * +wide_to_narrow(const wchar_t *wval) +{ + int converted_length; + /* Guess an output buffer size and try the conversion. */ + int alloc_length = wcslen(wval) * 3; + char *mbs_val = malloc(alloc_length + 1); + if (mbs_val == NULL) + return (NULL); + converted_length = wcstombs(mbs_val, wval, alloc_length); + + /* If we exhausted the buffer, resize and try again. */ + while (converted_length >= alloc_length) { + free(mbs_val); + alloc_length *= 2; + mbs_val = malloc(alloc_length + 1); + if (mbs_val == NULL) + return (NULL); + converted_length = wcstombs(mbs_val, wval, alloc_length); + } + + /* Ensure a trailing null and return the final string. */ + mbs_val[alloc_length] = '\0'; + return (mbs_val); +} + +static char * +url_decode(const char *in) +{ + char *out, *d; + const char *s; + + out = malloc(strlen(in) + 1); + if (out == NULL) + return (NULL); + for (s = in, d = out; *s != '\0'; ) { + if (*s == '%') { + /* Try to convert % escape */ + int digit1 = tohex(s[1]); + int digit2 = tohex(s[2]); + if (digit1 >= 0 && digit2 >= 0) { + /* Looks good, consume three chars */ + s += 3; + /* Convert output */ + *d++ = ((digit1 << 4) | digit2); + continue; + } + /* Else fall through and treat '%' as normal char */ + } + *d++ = *s++; + } + *d = '\0'; + return (out); +} + +static int +tohex(int c) +{ + if (c >= '0' && c <= '9') + return (c - '0'); + else if (c >= 'A' && c <= 'F') + return (c - 'A' + 10); + else if (c >= 'a' && c <= 'f') + return (c - 'a' + 10); + else + return (-1); +} diff --git a/lib/libarchive/archive_write_set_format_pax.c b/lib/libarchive/archive_write_set_format_pax.c index 4e9857bf0eb3..c256cb255280 100644 --- a/lib/libarchive/archive_write_set_format_pax.c +++ b/lib/libarchive/archive_write_set_format_pax.c @@ -66,11 +66,13 @@ static int archive_write_pax_finish(struct archive *); static int archive_write_pax_finish_entry(struct archive *); static int archive_write_pax_header(struct archive *, struct archive_entry *); +static char *base64_encode(const char *src, size_t len); static char *build_pax_attribute_name(char *dest, const char *src); static char *build_ustar_entry_name(char *dest, const char *src, size_t src_length, const char *insert); static char *format_int(char *dest, int64_t); static int has_non_ASCII(const wchar_t *); +static char *url_encode(const char *in); static int write_nulls(struct archive *, size_t); /* @@ -142,7 +144,7 @@ add_pax_attr_time(struct archive_string *as, const char *key, t = tmp + sizeof(tmp) - 1; /* Skip trailing zeros in the fractional part. */ - for(digit = 0, i = 10; i > 0 && digit == 0; i--) { + for (digit = 0, i = 10; i > 0 && digit == 0; i--) { digit = nanos % 10; nanos /= 10; } @@ -190,10 +192,10 @@ add_pax_attr_int(struct archive_string *as, const char *key, int64_t value) add_pax_attr(as, key, format_int(tmp + sizeof(tmp) - 1, value)); } -static void -add_pax_attr_w(struct archive_string *as, const char *key, const wchar_t *wval) +static char * +utf8_encode(const wchar_t *wval) { - int utf8len; + int utf8len; const wchar_t *wp; unsigned long wc; char *utf8_value, *p; @@ -217,8 +219,10 @@ add_pax_attr_w(struct archive_string *as, const char *key, const wchar_t *wval) } utf8_value = malloc(utf8len + 1); - if (utf8_value == NULL) + if (utf8_value == NULL) { __archive_errx(1, "Not enough memory for attributes"); + return (NULL); + } for (wp = wval, p = utf8_value; *wp != L'\0'; ) { wc = *wp++; @@ -258,6 +262,16 @@ add_pax_attr_w(struct archive_string *as, const char *key, const wchar_t *wval) /* Ignore larger values; UTF-8 can't encode them. */ } *p = '\0'; + + return (utf8_value); +} + +static void +add_pax_attr_w(struct archive_string *as, const char *key, const wchar_t *wval) +{ + char *utf8_value = utf8_encode(wval); + if (utf8_value == NULL) + return; add_pax_attr(as, key, utf8_value); free(utf8_value); } @@ -311,6 +325,53 @@ add_pax_attr(struct archive_string *as, const char *key, const char *value) archive_strappend_char(as, '\n'); } +static void +archive_write_pax_header_xattrs(struct pax *pax, struct archive_entry *entry) +{ + struct archive_string s; + int i = archive_entry_xattr_reset(entry); + + while (i--) { + const char *name; + const void *value; + char *encoded_value; + char *url_encoded_name = NULL, *encoded_name = NULL; + wchar_t *wcs_name = NULL; + size_t size; + + archive_entry_xattr_next(entry, &name, &value, &size); + /* Name is URL-encoded, then converted to wchar_t, + * then UTF-8 encoded. */ + url_encoded_name = url_encode(name); + if (url_encoded_name != NULL) { + /* Convert narrow-character to wide-character. */ + int wcs_length = strlen(url_encoded_name); + wcs_name = malloc((wcs_length + 1) * sizeof(wchar_t)); + if (wcs_name == NULL) + __archive_errx(1, "No memory for xattr conversion"); + mbstowcs(wcs_name, url_encoded_name, wcs_length); + wcs_name[wcs_length] = 0; + free(url_encoded_name); /* Done with this. */ + } + if (wcs_name != NULL) { + encoded_name = utf8_encode(wcs_name); + free(wcs_name); /* Done with wchar_t name. */ + } + + encoded_value = base64_encode(value, size); + + if (encoded_name != NULL && encoded_value != NULL) { + archive_string_init(&s); + archive_strcpy(&s, "LIBARCHIVE.xattr."); + archive_strcat(&s, encoded_name); + add_pax_attr(&(pax->pax_header), s.s, encoded_value); + archive_string_free(&s); + } + free(encoded_name); + free(encoded_value); + } +} + /* * TODO: Consider adding 'comment' and 'charset' fields to * archive_entry so that clients can specify them. Also, consider @@ -538,6 +599,10 @@ archive_write_pax_header(struct archive *a, ARCHIVE_ENTRY_ACL_TYPE_DEFAULT) > 0) need_extension = 1; + /* If there are extended attributes, we need an extension */ + if (!need_extension && archive_entry_xattr_count(entry_original) > 0) + need_extension = 1; + /* * The following items are handled differently in "pax * restricted" format. In particular, in "pax restricted" @@ -595,6 +660,9 @@ archive_write_pax_header(struct archive *a, st_main->st_ino); add_pax_attr_int(&(pax->pax_header), "SCHILY.nlink", st_main->st_nlink); + + /* Store extended attributes */ + archive_write_pax_header_xattrs(pax, entry_original); } /* Only regular files have data. */ @@ -1026,3 +1094,94 @@ has_non_ASCII(const wchar_t *wp) wp++; return (*wp != L'\0'); } + +/* + * Used by extended attribute support; encodes the name + * so that there will be no '=' characters in the result. + */ +static char * +url_encode(const char *in) +{ + const char *s; + char *d; + int out_len = 0; + char *out; + + for (s = in; *s != '\0'; s++) { + if (*s < 33 || *s > 126 || *s == '%' || *s == '=') + out_len += 3; + else + out_len++; + } + + out = (char *)malloc(out_len + 1); + if (out == NULL) + return (NULL); + + for (s = in, d = out; *s != '\0'; s++) { + /* encode any non-printable ASCII character or '%' or '=' */ + if (*s < 33 || *s > 126 || *s == '%' || *s == '=') { + /* URL encoding is '%' followed by two hex digits */ + *d++ = '%'; + *d++ = "0123456789ABCDEF"[0x0f & (*s >> 4)]; + *d++ = "0123456789ABCDEF"[0x0f & *s]; + } else { + *d++ = *s; + } + } + *d = '\0'; + return (out); +} + +/* + * Encode a sequence of bytes into a C string using base-64 encoding. + * + * Returns a null-terminated C string allocated with malloc(); caller + * is responsible for freeing the result. + */ +static char * +base64_encode(const char *s, size_t len) +{ + static const char digits[64] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + int v; + char *d, *out; + + /* 3 bytes becomes 4 chars, but round up and allow for trailing NUL */ + out = malloc((len * 4 + 2) / 3 + 1); + if (out == NULL) + return (NULL); + d = out; + + /* Convert each group of 3 bytes into 4 characters. */ + while (len >= 3) { + v = (((int)s[0] << 16) & 0xff0000) + | (((int)s[1] << 8) & 0xff00) + | (((int)s[2]) & 0x00ff); + s += 3; + len -= 3; + *d++ = digits[(v >> 18) & 0x3f]; + *d++ = digits[(v >> 12) & 0x3f]; + *d++ = digits[(v >> 6) & 0x3f]; + *d++ = digits[(v) & 0x3f]; + } + /* Handle final group of 1 byte (2 chars) or 2 bytes (3 chars). */ + switch (len) { + case 0: break; + case 1: + v = (((int)s[0] << 16) & 0xff0000); + *d++ = digits[(v >> 18) & 0x3f]; + *d++ = digits[(v >> 12) & 0x3f]; + break; + case 2: + v = (((int)s[0] << 16) & 0xff0000) + | (((int)s[1] << 8) & 0xff00); + *d++ = digits[(v >> 18) & 0x3f]; + *d++ = digits[(v >> 12) & 0x3f]; + *d++ = digits[(v >> 6) & 0x3f]; + break; + } + /* Add trailing NUL character so output is a valid C string. */ + *d++ = '\0'; + return (out); +} diff --git a/lib/libarchive/tar.5 b/lib/libarchive/tar.5 index d46e1b54abef..242a3d0b2752 100644 --- a/lib/libarchive/tar.5 +++ b/lib/libarchive/tar.5 @@ -399,6 +399,21 @@ Schilling's .Cm SCHILY.* extensions can store all of the data from .Va struct stat . +.It Cm LIBARCHIVE.xattr. Ns Ar namespace Ns . Ns Ar key +Libarchive stores POSIX.1e-style extended attributes using +keys of this form. The +.Ar key +value is URL-encoded: +All non-ASCII characters and the two special characters +.Dq = +and +.Dq % +are encoded as +.Dq % +followed by two uppercase hexadecimal digits. +The value of this key is the extended attribute value +encoded in base 64. +XXX Detail the base-64 format here XXX .It Cm VENDOR.* XXX document other vendor-specific extensions XXX .El