From 7584fa8aa43ebc5cd637eee3ececf8029678dcb7 Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Sun, 31 Aug 2008 07:10:40 +0000 Subject: [PATCH] Joerg Sonnenberger's improvements to the shar writer. Significant performance improvements, better quoting of file names, etc. --- .../archive_write_set_format_shar.c | 445 ++++++++++-------- 1 file changed, 255 insertions(+), 190 deletions(-) diff --git a/lib/libarchive/archive_write_set_format_shar.c b/lib/libarchive/archive_write_set_format_shar.c index a7f2d8f468bf..f3664a856be7 100644 --- a/lib/libarchive/archive_write_set_format_shar.c +++ b/lib/libarchive/archive_write_set_format_shar.c @@ -1,5 +1,6 @@ /*- * Copyright (c) 2003-2007 Tim Kientzle + * Copyright (c) 2008 Joerg Sonnenberger * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,7 +30,6 @@ __FBSDID("$FreeBSD$"); #ifdef HAVE_ERRNO_H #include #endif -#include #include #ifdef HAVE_STDLIB_H #include @@ -49,13 +49,14 @@ struct shar { struct archive_entry *entry; int has_data; char *last_dir; - char outbuff[1024]; - size_t outbytes; + + /* Line buffer for uuencoded dump format */ + char outbuff[45]; size_t outpos; - int uuavail; - char uubuffer[3]; + int wrote_header; struct archive_string work; + struct archive_string quoted_name; }; static int archive_write_shar_finish(struct archive_write *); @@ -67,23 +68,33 @@ static ssize_t archive_write_shar_data_sed(struct archive_write *, static ssize_t archive_write_shar_data_uuencode(struct archive_write *, const void * buff, size_t); static int archive_write_shar_finish_entry(struct archive_write *); -static int shar_printf(struct archive_write *, const char *fmt, ...); -static void uuencode_group(struct shar *); -static int -shar_printf(struct archive_write *a, const char *fmt, ...) +/* + * Copy the given string to the buffer, quoting all shell meta characters + * found. + */ +static void +shar_quote(struct archive_string *buf, const char *str, int in_shell) { - struct shar *shar; - va_list ap; - int ret; + static const char meta[] = "\n \t'`\";&<>()|*?{}[]\\$!#^~"; + size_t len; - shar = (struct shar *)a->format_data; - va_start(ap, fmt); - archive_string_empty(&(shar->work)); - archive_string_vsprintf(&(shar->work), fmt, ap); - ret = ((a->compressor.write)(a, shar->work.s, strlen(shar->work.s))); - va_end(ap); - return (ret); + while (*str != '\0') { + if ((len = strcspn(str, meta)) != 0) { + archive_strncat(buf, str, len); + str += len; + } else if (*str == '\n') { + if (in_shell) + archive_strcat(buf, "\"\n\""); + else + archive_strcat(buf, "\\n"); + ++str; + } else { + archive_strappend_char(buf, '\\'); + archive_strappend_char(buf, *str); + ++str; + } + } } /* @@ -105,6 +116,8 @@ archive_write_set_format_shar(struct archive *_a) return (ARCHIVE_FATAL); } memset(shar, 0, sizeof(*shar)); + archive_string_init(&shar->work); + archive_string_init(&shar->quoted_name); a->format_data = shar; a->pad_uncompressed = 0; @@ -146,16 +159,11 @@ archive_write_shar_header(struct archive_write *a, struct archive_entry *entry) const char *name; char *p, *pp; struct shar *shar; - int ret; shar = (struct shar *)a->format_data; if (!shar->wrote_header) { - ret = shar_printf(a, "#!/bin/sh\n"); - if (ret != ARCHIVE_OK) - return (ret); - ret = shar_printf(a, "# This is a shell archive\n"); - if (ret != ARCHIVE_OK) - return (ret); + archive_strcat(&shar->work, "#!/bin/sh\n"); + archive_strcat(&shar->work, "# This is a shell archive\n"); shar->wrote_header = 1; } @@ -192,10 +200,11 @@ archive_write_shar_header(struct archive_write *a, struct archive_entry *entry) } } + archive_string_empty(&shar->quoted_name); + shar_quote(&shar->quoted_name, name, 1); + /* Stock preparation for all file types. */ - ret = shar_printf(a, "echo x %s\n", name); - if (ret != ARCHIVE_OK) - return (ret); + archive_string_sprintf(&shar->work, "echo x %s\n", shar->quoted_name.s); if (archive_entry_filetype(entry) != AE_IFDIR) { /* Try to create the dir. */ @@ -210,12 +219,10 @@ archive_write_shar_header(struct archive_write *a, struct archive_entry *entry) /* Don't try to "mkdir ." */ free(p); } else if (shar->last_dir == NULL) { - ret = shar_printf(a, - "mkdir -p %s > /dev/null 2>&1\n", p); - if (ret != ARCHIVE_OK) { - free(p); - return (ret); - } + archive_strcat(&shar->work, "mkdir -p "); + shar_quote(&shar->work, p, 1); + archive_strcat(&shar->work, + " > /dev/null 2>&1\n"); shar->last_dir = p; } else if (strcmp(p, shar->last_dir) == 0) { /* We've already created this exact dir. */ @@ -225,13 +232,10 @@ archive_write_shar_header(struct archive_write *a, struct archive_entry *entry) /* We've already created a subdir. */ free(p); } else { - ret = shar_printf(a, - "mkdir -p %s > /dev/null 2>&1\n", p); - if (ret != ARCHIVE_OK) { - free(p); - return (ret); - } - free(shar->last_dir); + archive_strcat(&shar->work, "mkdir -p "); + shar_quote(&shar->work, p, 1); + archive_strcat(&shar->work, + " > /dev/null 2>&1\n"); shar->last_dir = p; } } else { @@ -242,51 +246,47 @@ archive_write_shar_header(struct archive_write *a, struct archive_entry *entry) /* Handle file-type specific issues. */ shar->has_data = 0; if ((linkname = archive_entry_hardlink(entry)) != NULL) { - ret = shar_printf(a, "ln -f %s %s\n", linkname, name); - if (ret != ARCHIVE_OK) - return (ret); + archive_strcat(&shar->work, "ln -f "); + shar_quote(&shar->work, linkname, 1); + archive_string_sprintf(&shar->work, " %s\n", + shar->quoted_name.s); } else if ((linkname = archive_entry_symlink(entry)) != NULL) { - ret = shar_printf(a, "ln -fs %s %s\n", linkname, name); - if (ret != ARCHIVE_OK) - return (ret); + archive_strcat(&shar->work, "ln -fs "); + shar_quote(&shar->work, linkname, 1); + archive_string_sprintf(&shar->work, " %s\n", + shar->quoted_name.s); } else { switch(archive_entry_filetype(entry)) { case AE_IFREG: if (archive_entry_size(entry) == 0) { /* More portable than "touch." */ - ret = shar_printf(a, "test -e \"%s\" || :> \"%s\"\n", name, name); - if (ret != ARCHIVE_OK) - return (ret); + archive_string_sprintf(&shar->work, + "test -e \"%s\" || :> \"%s\"\n", + shar->quoted_name.s, shar->quoted_name.s); } else { if (shar->dump) { - ret = shar_printf(a, - "uudecode -o %s << 'SHAR_END'\n", - name); - if (ret != ARCHIVE_OK) - return (ret); - ret = shar_printf(a, "begin %o %s\n", - archive_entry_mode(entry) & 0777, - name); - if (ret != ARCHIVE_OK) - return (ret); + archive_string_sprintf(&shar->work, + "uudecode -p > %s << 'SHAR_END'\n", + shar->quoted_name.s); + archive_string_sprintf(&shar->work, + "begin %o ", + archive_entry_mode(entry) & 0777); + shar_quote(&shar->work, name, 0); + archive_strcat(&shar->work, "\n"); } else { - ret = shar_printf(a, + archive_string_sprintf(&shar->work, "sed 's/^X//' > %s << 'SHAR_END'\n", - name); - if (ret != ARCHIVE_OK) - return (ret); + shar->quoted_name.s); } shar->has_data = 1; shar->end_of_line = 1; shar->outpos = 0; - shar->outbytes = 0; } break; case AE_IFDIR: - ret = shar_printf(a, "mkdir -p %s > /dev/null 2>&1\n", - name); - if (ret != ARCHIVE_OK) - return (ret); + archive_string_sprintf(&shar->work, + "mkdir -p %s > /dev/null 2>&1\n", + shar->quoted_name.s); /* Record that we just created this directory. */ if (shar->last_dir != NULL) free(shar->last_dir); @@ -302,23 +302,20 @@ archive_write_shar_header(struct archive_write *a, struct archive_entry *entry) */ break; case AE_IFIFO: - ret = shar_printf(a, "mkfifo %s\n", name); - if (ret != ARCHIVE_OK) - return (ret); + archive_string_sprintf(&shar->work, + "mkfifo %s\n", shar->quoted_name.s); break; case AE_IFCHR: - ret = shar_printf(a, "mknod %s c %d %d\n", name, + archive_string_sprintf(&shar->work, + "mknod %s c %d %d\n", shar->quoted_name.s, archive_entry_rdevmajor(entry), archive_entry_rdevminor(entry)); - if (ret != ARCHIVE_OK) - return (ret); break; case AE_IFBLK: - ret = shar_printf(a, "mknod %s b %d %d\n", name, + archive_string_sprintf(&shar->work, + "mknod %s b %d %d\n", shar->quoted_name.s, archive_entry_rdevmajor(entry), archive_entry_rdevminor(entry)); - if (ret != ARCHIVE_OK) - return (ret); break; default: return (ARCHIVE_WARN); @@ -328,69 +325,119 @@ archive_write_shar_header(struct archive_write *a, struct archive_entry *entry) return (ARCHIVE_OK); } -/* XXX TODO: This could be more efficient XXX */ static ssize_t archive_write_shar_data_sed(struct archive_write *a, const void *buff, size_t n) { + static const size_t ensured = 65533; struct shar *shar; const char *src; + char *buf, *buf_end; int ret; size_t written = n; shar = (struct shar *)a->format_data; - if (!shar->has_data) + if (!shar->has_data || n == 0) return (0); src = (const char *)buff; - ret = ARCHIVE_OK; - shar->outpos = 0; - while (n-- > 0) { - if (shar->end_of_line) { - shar->outbuff[shar->outpos++] = 'X'; - shar->end_of_line = 0; - } - if (*src == '\n') - shar->end_of_line = 1; - shar->outbuff[shar->outpos++] = *src++; - if (shar->outpos > sizeof(shar->outbuff) - 2) { - ret = (a->compressor.write)(a, shar->outbuff, - shar->outpos); + /* + * ensure is the number of bytes in buffer before expanding the + * current character. Each operation writes the current character + * and optionally the start-of-new-line marker. This can happen + * twice before entering the loop, so make sure three additional + * bytes can be written. + */ + if (archive_string_ensure(&shar->work, ensured + 3) == NULL) + __archive_errx(1, "Out of memory"); + + if (shar->work.length > ensured) { + ret = (*a->compressor.write)(a, shar->work.s, + shar->work.length); + if (ret != ARCHIVE_OK) + return (ARCHIVE_FATAL); + archive_string_empty(&shar->work); + } + buf = shar->work.s + shar->work.length; + buf_end = shar->work.s + ensured; + + if (shar->end_of_line) { + *buf++ = 'X'; + shar->end_of_line = 0; + } + + while (n-- != 0) { + if ((*buf++ = *src++) == '\n') { + if (n == 0) + shar->end_of_line = 1; + else + *buf++ = 'X'; + } + + if (buf >= buf_end) { + shar->work.length = buf - shar->work.s; + ret = (*a->compressor.write)(a, shar->work.s, + shar->work.length); if (ret != ARCHIVE_OK) - return (ret); - shar->outpos = 0; + return (ARCHIVE_FATAL); + archive_string_empty(&shar->work); + buf = shar->work.s; } } - if (shar->outpos > 0) - ret = (a->compressor.write)(a, shar->outbuff, shar->outpos); - if (ret != ARCHIVE_OK) - return (ret); + shar->work.length = buf - shar->work.s; + return (written); } #define UUENC(c) (((c)!=0) ? ((c) & 077) + ' ': '`') -/* XXX This could be a lot more efficient. XXX */ static void -uuencode_group(struct shar *shar) +uuencode_group(const char _in[3], char out[4]) { - int t; + const unsigned char *in = (const unsigned char *)_in; + int t; - t = 0; - if (shar->uuavail > 0) - t = 0xff0000 & (shar->uubuffer[0] << 16); - if (shar->uuavail > 1) - t |= 0x00ff00 & (shar->uubuffer[1] << 8); - if (shar->uuavail > 2) - t |= 0x0000ff & (shar->uubuffer[2]); - shar->outbuff[shar->outpos++] = UUENC( 0x3f & (t>>18) ); - shar->outbuff[shar->outpos++] = UUENC( 0x3f & (t>>12) ); - shar->outbuff[shar->outpos++] = UUENC( 0x3f & (t>>6) ); - shar->outbuff[shar->outpos++] = UUENC( 0x3f & (t) ); - shar->uuavail = 0; - shar->outbytes += shar->uuavail; - shar->outbuff[shar->outpos] = 0; + t = (in[0] << 16) | (in[1] << 8) | in[2]; + out[0] = UUENC( 0x3f & (t >> 18) ); + out[1] = UUENC( 0x3f & (t >> 12) ); + out[2] = UUENC( 0x3f & (t >> 6) ); + out[3] = UUENC( 0x3f & t ); +} + +static void +uuencode_line(struct shar *shar, const char *inbuf, size_t len) +{ + char tmp_buf[3], *buf; + size_t alloc_len; + + /* len <= 45 -> expanded to 60 + len byte + new line */ + alloc_len = shar->work.length + 62; + if (archive_string_ensure(&shar->work, alloc_len) == NULL) + __archive_errx(1, "Out of memory"); + + buf = shar->work.s + shar->work.length; + *buf++ = UUENC(len); + while (len >= 3) { + uuencode_group(inbuf, buf); + len -= 3; + inbuf += 3; + buf += 4; + } + if (len != 0) { + tmp_buf[0] = inbuf[0]; + if (len == 1) + tmp_buf[1] = '\0'; + else + tmp_buf[1] = inbuf[1]; + tmp_buf[2] = '\0'; + uuencode_group(inbuf, buf); + buf += 4; + } + *buf++ = '\n'; + if ((buf - shar->work.s) > (ptrdiff_t)(shar->work.length + 62)) + __archive_errx(1, "Buffer overflow"); + shar->work.length = buf - shar->work.s; } static ssize_t @@ -406,21 +453,39 @@ archive_write_shar_data_uuencode(struct archive_write *a, const void *buff, if (!shar->has_data) return (ARCHIVE_OK); src = (const char *)buff; - n = length; - while (n-- > 0) { - if (shar->uuavail == 3) - uuencode_group(shar); - if (shar->outpos >= 60) { - ret = shar_printf(a, "%c%s\n", UUENC(shar->outbytes), - shar->outbuff); - if (ret != ARCHIVE_OK) - return (ret); - shar->outpos = 0; - shar->outbytes = 0; - } - shar->uubuffer[shar->uuavail++] = *src++; - shar->outbytes++; + if (shar->outpos != 0) { + n = 45 - shar->outpos; + if (n > length) + n = length; + memcpy(shar->outbuff + shar->outpos, src, n); + if (shar->outpos + n < 45) { + shar->outpos += n; + return length; + } + uuencode_line(shar, shar->outbuff, 45); + src += n; + n = length - n; + } else { + n = length; + } + + while (n >= 45) { + uuencode_line(shar, src, 45); + src += 45; + n -= 45; + + if (shar->work.length < 65536) + continue; + ret = (*a->compressor.write)(a, shar->work.s, + shar->work.length); + if (ret != ARCHIVE_OK) + return (ARCHIVE_FATAL); + archive_string_empty(&shar->work); + } + if (n != 0) { + memcpy(shar->outbuff, src, n); + shar->outpos = n; } return (length); } @@ -439,54 +504,43 @@ archive_write_shar_finish_entry(struct archive_write *a) if (shar->dump) { /* Finish uuencoded data. */ if (shar->has_data) { - if (shar->uuavail > 0) - uuencode_group(shar); - if (shar->outpos > 0) { - ret = shar_printf(a, "%c%s\n", - UUENC(shar->outbytes), shar->outbuff); - if (ret != ARCHIVE_OK) - return (ret); - shar->outpos = 0; - shar->uuavail = 0; - shar->outbytes = 0; - } - ret = shar_printf(a, "%c\n", UUENC(0)); - if (ret != ARCHIVE_OK) - return (ret); - ret = shar_printf(a, "end\n", UUENC(0)); - if (ret != ARCHIVE_OK) - return (ret); - ret = shar_printf(a, "SHAR_END\n"); - if (ret != ARCHIVE_OK) - return (ret); + if (shar->outpos > 0) + uuencode_line(shar, shar->outbuff, + shar->outpos); + archive_strcat(&shar->work, "`\nend\n"); + archive_strcat(&shar->work, "SHAR_END\n"); } /* Restore file mode, owner, flags. */ /* * TODO: Don't immediately restore mode for * directories; defer that to end of script. */ - ret = shar_printf(a, "chmod %o %s\n", - archive_entry_mode(shar->entry) & 07777, - archive_entry_pathname(shar->entry)); - if (ret != ARCHIVE_OK) - return (ret); + archive_string_sprintf(&shar->work, "chmod %o ", + archive_entry_mode(shar->entry) & 07777); + shar_quote(&shar->work, archive_entry_pathname(shar->entry), 1); + archive_strcat(&shar->work, "\n"); u = archive_entry_uname(shar->entry); g = archive_entry_gname(shar->entry); if (u != NULL || g != NULL) { - ret = shar_printf(a, "chown %s%s%s %s\n", - (u != NULL) ? u : "", - (g != NULL) ? ":" : "", (g != NULL) ? g : "", - archive_entry_pathname(shar->entry)); - if (ret != ARCHIVE_OK) - return (ret); + archive_strcat(&shar->work, "chown "); + if (u != NULL) + shar_quote(&shar->work, u, 1); + if (g != NULL) { + archive_strcat(&shar->work, ":"); + shar_quote(&shar->work, g, 1); + } + shar_quote(&shar->work, + archive_entry_pathname(shar->entry), 1); + archive_strcat(&shar->work, "\n"); } if ((p = archive_entry_fflags_text(shar->entry)) != NULL) { - ret = shar_printf(a, "chflags %s %s\n", p, - archive_entry_pathname(shar->entry)); - if (ret != ARCHIVE_OK) - return (ret); + archive_string_sprintf(&shar->work, "chflags %s ", + p, archive_entry_pathname(shar->entry)); + shar_quote(&shar->work, + archive_entry_pathname(shar->entry), 1); + archive_strcat(&shar->work, "\n"); } /* TODO: restore ACLs */ @@ -494,20 +548,24 @@ archive_write_shar_finish_entry(struct archive_write *a) } else { if (shar->has_data) { /* Finish sed-encoded data: ensure last line ends. */ - if (!shar->end_of_line) { - ret = shar_printf(a, "\n"); - if (ret != ARCHIVE_OK) - return (ret); - } - ret = shar_printf(a, "SHAR_END\n"); - if (ret != ARCHIVE_OK) - return (ret); + if (!shar->end_of_line) + archive_strappend_char(&shar->work, '\n'); + archive_strcat(&shar->work, "SHAR_END\n"); } } archive_entry_free(shar->entry); shar->entry = NULL; - return (0); + + if (shar->work.length < 65536) + return (ARCHIVE_OK); + + ret = (*a->compressor.write)(a, shar->work.s, shar->work.length); + if (ret != ARCHIVE_OK) + return (ARCHIVE_FATAL); + archive_string_empty(&shar->work); + + return (ARCHIVE_OK); } static int @@ -529,17 +587,22 @@ archive_write_shar_finish(struct archive_write *a) * shar format, then sets another format (which would invoke * shar_finish to free the format-specific data). */ - if (shar->wrote_header) { - ret = shar_printf(a, "exit\n"); - if (ret != ARCHIVE_OK) - return (ret); - /* Shar output is never padded. */ - archive_write_set_bytes_in_last_block(&a->archive, 1); - /* - * TODO: shar should also suppress padding of - * uncompressed data within gzip/bzip2 streams. - */ - } + if (shar->wrote_header == 0) + return (ARCHIVE_OK); + + archive_strcat(&shar->work, "exit\n"); + + ret = (*a->compressor.write)(a, shar->work.s, shar->work.length); + if (ret != ARCHIVE_OK) + return (ARCHIVE_FATAL); + + /* Shar output is never padded. */ + archive_write_set_bytes_in_last_block(&a->archive, 1); + /* + * TODO: shar should also suppress padding of + * uncompressed data within gzip/bzip2 streams. + */ + return (ARCHIVE_OK); } @@ -549,11 +612,13 @@ archive_write_shar_destroy(struct archive_write *a) struct shar *shar; shar = (struct shar *)a->format_data; - if (shar->entry != NULL) - archive_entry_free(shar->entry); - if (shar->last_dir != NULL) - free(shar->last_dir); + if (shar == NULL) + return (ARCHIVE_OK); + + archive_entry_free(shar->entry); + free(shar->last_dir); archive_string_free(&(shar->work)); + archive_string_free(&(shar->quoted_name)); free(shar); a->format_data = NULL; return (ARCHIVE_OK);