/*- * Copyright (c) 2003-2004 Tim Kientzle * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "archive_platform.h" __FBSDID("$FreeBSD$"); #include #ifdef HAVE_DMALLOC #include #endif #include #include #include #include #include #include "archive.h" #include "archive_entry.h" #include "archive_private.h" /* * Structure of GNU tar header */ struct archive_entry_header_gnutar { char name[100]; char mode[8]; char uid[8]; char gid[8]; char size[12]; char mtime[12]; char checksum[8]; char typeflag[1]; char linkname[100]; char magic[8]; /* "ustar \0" (note blank/blank/null at end) */ char uname[32]; char gname[32]; char devmajor[8]; char devminor[8]; char atime[12]; char ctime[12]; char offset[12]; char longnames[4]; char unused[1]; struct { char offset[12]; char numbytes[12]; } sparse[4]; char isextended[1]; char realsize[12]; /* * GNU doesn't use POSIX 'prefix' field; they use the 'L' (longname) * entry instead. */ }; static int archive_block_is_null(const unsigned char *p); static int archive_header_gnu(struct archive *, struct archive_entry *, const void *); static int archive_read_format_gnutar_bid(struct archive *a); static int archive_read_format_gnutar_read_header(struct archive *a, struct archive_entry *); static int checksum(struct archive *a, const void *h); static int64_t tar_atol(const char *, unsigned); static int64_t tar_atol8(const char *, unsigned); static int64_t tar_atol256(const char *, unsigned); /* * The ONLY publicly visible function in this file. */ int archive_read_support_format_gnutar(struct archive *a) { return (__archive_read_register_format(a, NULL, archive_read_format_gnutar_bid, archive_read_format_gnutar_read_header, NULL)); } static int archive_read_format_gnutar_bid(struct archive *a) { int bid; size_t bytes_read; const void *h; const struct archive_entry_header_gnutar *header; /* * If we're already reading a non-tar file, don't * bother to bid. */ if (a->archive_format != 0 && (a->archive_format & ARCHIVE_FORMAT_BASE_MASK) != ARCHIVE_FORMAT_TAR) return (0); bid = 0; /* If last header was my preferred format, bid a bit more. */ if (a->archive_format == ARCHIVE_FORMAT_TAR_GNUTAR) bid += 10; bytes_read = (a->compression_read_ahead)(a, &h, 512); if (bytes_read < 512) return (-1); /* * TODO: if checksum or header fail, scan ahead for * next valid header. */ /* Checksum field is eight 8-bit values: 64 bits of validation. */ if (!checksum(a, h)) return (0); bid += 64; header = (const struct archive_entry_header_gnutar *)h; /* This distinguishes GNU tar formats from POSIX formats */ if (memcmp(header->magic, "ustar \0", 8) != 0) return (0); bid += 64; return (bid); } static int archive_read_format_gnutar_read_header(struct archive *a, struct archive_entry *entry) { const void *h; ssize_t bytes; int oldstate; a->archive_format = ARCHIVE_FORMAT_TAR_GNUTAR; a->archive_format_name = "GNU tar"; /* Skip remains of previous entry. */ oldstate = a->state; a->state = ARCHIVE_STATE_DATA; archive_read_data_skip(a); a->state = oldstate; /* Read 512-byte header record */ bytes = (a->compression_read_ahead)(a, &h, 512); if (bytes < 512) return (ARCHIVE_FATAL); (a->compression_read_consume)(a, 512); /* * If this is a block of nulls, return 0 (no more entries). * Note the initial (*h)==0 test short-circuits the function call * in the most common case. */ if (((*(const char *)h)==0) && archive_block_is_null(h)) { /* TODO: Store file location of start of block in public area */ archive_set_error(a, 0, NULL); return (ARCHIVE_EOF); } /* TODO: add support for scanning for next valid header */ if (!checksum(a, h)) { archive_set_error(a, EINVAL, "Damaged GNU tar archive"); return (ARCHIVE_FATAL); /* Not a valid header. */ } /* This function gets called recursively for long name headers, etc. */ if (++a->gnu_header_recursion_depth > 32) errx(EINVAL, "*** Too many special headers for one entry; giving up. " "(%s:%s@%d)\n", __FUNCTION__, __FILE__, __LINE__); archive_header_gnu(a, entry, h); a->gnu_header_recursion_depth--; return (0); } /* * Return true if block checksum is correct. */ static int checksum(struct archive *a, const void *h) { const unsigned char *bytes; const struct archive_entry_header_gnutar *header; int i, sum, signed_sum, unsigned_sum; (void)a; /* UNUSED */ bytes = h; header = h; /* Test checksum: POSIX specifies UNSIGNED for this calculation. */ sum = tar_atol(header->checksum, sizeof(header->checksum)); unsigned_sum = 0; for (i = 0; i < 148; i++) unsigned_sum += (unsigned char)bytes[i]; for (; i < 156; i++) unsigned_sum += 32; for (; i < 512; i++) unsigned_sum += (unsigned char)bytes[i]; if (sum == unsigned_sum) return (1); /* * Repeat test with SIGNED bytes, just in case this archive * was created by an old BSD, Solaris, or HP-UX tar with a broken * checksum calculation. */ signed_sum = 0; for (i = 0; i < 148; i++) signed_sum += (signed char)bytes[i]; for (; i < 156; i++) signed_sum += 32; for (; i < 512; i++) signed_sum += (signed char)bytes[i]; if (sum == signed_sum) return (1); return (0); } /* * Return true if this block contains only nulls. */ static int archive_block_is_null(const unsigned char *p) { unsigned i; for (i = 0; i < ARCHIVE_BYTES_PER_RECORD / sizeof(*p); i++) { if (*p++) return (0); } return (1); } /* * Parse GNU tar header */ static int archive_header_gnu(struct archive *a, struct archive_entry *entry, const void *h) { struct stat st; const struct archive_entry_header_gnutar *header; char tartype; /* Clear out entry structure */ memset(&st, 0, sizeof(st)); /* * GNU header is like POSIX, except 'prefix' is * replaced with some other fields. This also means the * filename is stored as in old-style archives. */ /* Copy filename over (to ensure null termination). */ header = h; archive_strncpy(&(a->entry_name), header->name, sizeof(header->name)); archive_entry_set_pathname(entry, a->entry_name.s); /* Copy linkname over */ if (header->linkname[0]) archive_strncpy(&(a->entry_linkname), header->linkname, sizeof(header->linkname)); /* Parse out the numeric fields (all are octal) */ st.st_mode = tar_atol(header->mode, sizeof(header->mode)); st.st_uid = tar_atol(header->uid, sizeof(header->uid)); st.st_gid = tar_atol(header->gid, sizeof(header->gid)); st.st_size = tar_atol(header->size, sizeof(header->size)); st.st_mtime = tar_atol(header->mtime, sizeof(header->mtime)); /* Handle the tar type flag appropriately. */ tartype = header->typeflag[0]; archive_entry_set_tartype(entry, tartype); st.st_mode &= ~S_IFMT; /* Fields common to ustar and GNU */ archive_strncpy(&(a->entry_uname), header->uname, sizeof(header->uname)); archive_entry_set_uname(entry, a->entry_uname.s); archive_strncpy(&(a->entry_gname), header->gname, sizeof(header->gname)); archive_entry_set_gname(entry, a->entry_gname.s); /* Parse out device numbers only for char and block specials */ if (header->typeflag[0] == '3' || header->typeflag[0] == '4') st.st_rdev = makedev ( tar_atol(header->devmajor, sizeof(header->devmajor)), tar_atol(header->devminor, sizeof(header->devminor))); else st.st_rdev = 0; /* Grab additional GNU fields. */ /* TODO: FILL THIS IN!!! */ st.st_atime = tar_atol(header->atime, sizeof(header->atime)); st.st_ctime = tar_atol(header->atime, sizeof(header->ctime)); /* Set internal counter for locating next header */ a->entry_bytes_remaining = st.st_size; a->entry_padding = 0x1ff & (-a->entry_bytes_remaining); /* Interpret entry type */ switch (tartype) { case '1': /* Hard link */ archive_entry_set_hardlink(entry, a->entry_linkname.s); /* * Note: Technically, tar does not store the file type * for a "hard link" entry, only the fact that it is a * hard link. So, I leave the file type in st_mode * zero here. */ archive_entry_copy_stat(entry, &st); break; case '2': /* Symlink */ st.st_mode |= S_IFLNK; st.st_size = 0; archive_entry_set_symlink(entry, a->entry_linkname.s); archive_entry_copy_stat(entry, &st); break; case '3': /* Character device */ st.st_mode |= S_IFCHR; st.st_size = 0; archive_entry_copy_stat(entry, &st); break; case '4': /* Block device */ st.st_mode |= S_IFBLK; st.st_size = 0; archive_entry_copy_stat(entry, &st); break; case '5': /* POSIX Dir */ st.st_mode |= S_IFDIR; st.st_size = 0; archive_entry_copy_stat(entry, &st); break; case '6': /* FIFO device */ st.st_mode |= S_IFIFO; st.st_size = 0; archive_entry_copy_stat(entry, &st); break; case 'D': /* GNU incremental directory type */ /* * No special handling is actually required here. * It might be nice someday to preprocess the file list and * provide it to the client, though. */ st.st_mode &= ~ S_IFMT; st.st_mode |= S_IFDIR; archive_entry_copy_stat(entry, &st); break; case 'K': /* GNU long linkname */ /* Entry body is full name of link for next header. */ archive_string_ensure(&(a->gnu_linkname), st.st_size+1); archive_read_data_into_buffer(a, a->gnu_linkname.s, st.st_size); a->gnu_linkname.s[st.st_size] = 0; /* Null term name! */ /* * This next call will usually overwrite * a->entry_linkname, which is why we _must_ have a * separate gnu_linkname field. */ archive_read_format_gnutar_read_header(a, entry); if (archive_entry_tartype(entry) == '1') archive_entry_set_hardlink(entry, a->gnu_linkname.s); else if (archive_entry_tartype(entry) == '2') archive_entry_set_symlink(entry, a->gnu_linkname.s); /* TODO: else { ... } */ break; case 'L': /* GNU long filename */ /* Entry body is full pathname for next header. */ archive_string_ensure(&(a->gnu_name), st.st_size+1); archive_read_data_into_buffer(a, a->gnu_name.s, st.st_size); a->gnu_name.s[st.st_size] = 0; /* Null terminate name! */ /* This next call will typically overwrite a->entry_name, which * is why we _must_ have a separate gnu_name field */ archive_read_format_gnutar_read_header(a, entry); archive_entry_set_pathname(entry, a->gnu_name.s); break; case 'M': /* GNU Multi-volume (remainder of file from last archive) */ /* * As far as I can tell, this is just like a regular file * entry, except that the contents should be _appended_ to * the indicated file at the indicated offset. This may * require some API work to fully support. */ break; case 'N': /* Old GNU long filename; this will never be supported */ /* Essentially, body of this entry is a script for * renaming previously-extracted entries. Ugh. */ break; case 'S': /* GNU Sparse files: These are really ugly, and unlikely * to be supported anytime soon. */ break; case 'V': /* GNU volume header */ /* Just skip it */ return (archive_read_format_gnutar_read_header(a, entry)); default: /* Regular file and non-standard types */ /* Per POSIX: non-recognized types should always be * treated as regular files. Of course, GNU * extensions aren't compatible with this dictum. * */ st.st_mode |= S_IFREG; archive_entry_copy_stat(entry, &st); break; } return (0); } /* * Convert text->integer. * * Traditional tar formats (including POSIX) specify base-8 for * all of the standard numeric fields. GNU tar supports base-256 * as well in many of the numeric fields. There is also an old * and short-lived base-64 format, but I doubt I'll ever see * an archive that uses it. (According to the changelog for GNU tar, * that format was only implemented for a couple of weeks!) */ static int64_t tar_atol(const char *p, unsigned char_cnt) { if (*p & 0x80) return (tar_atol256(p, char_cnt)); return (tar_atol8(p, char_cnt)); } /* * Note that this implementation does not (and should not!) obey * locale settings; you cannot simply substitute strtol here, since * it does obey locale. */ static int64_t tar_atol8(const char *p, unsigned char_cnt) { int64_t l; int digit, sign; static const int64_t limit = INT64_MAX / 8; static const int base = 8; static const char last_digit_limit = INT64_MAX % 8; while (*p == ' ' || *p == '\t') p++; if (*p == '-') { sign = -1; p++; } else sign = 1; l = 0; digit = *p - '0'; while (digit >= 0 && digit < base && char_cnt-- > 0) { if (l>limit || (l == limit && digit > last_digit_limit)) { l = INT64_MAX; /* Truncate on overflow */ break; } l = ( l * base ) + digit; digit = *++p - '0'; } return (sign < 0) ? -l : l; } /* * Parse a base-256 integer. * * TODO: This overflows very quickly for negative values; fix this. */ static int64_t tar_atol256(const char *p, unsigned char_cnt) { int64_t l; int digit; const int64_t limit = INT64_MAX / 256; /* Ignore high bit of first byte (that's the base-256 flag). */ l = 0; digit = 0x7f & *(const unsigned char *)p; while (char_cnt-- > 0) { if (l > limit) { l = INT64_MAX; /* Truncate on overflow */ break; } l = (l << 8) + digit; digit = *(const unsigned char *)++p; } return (l); }