From 604e65334e196a0977c08443be2c2eb85747162f Mon Sep 17 00:00:00 2001 From: cem Date: Tue, 13 Aug 2019 23:32:56 +0000 Subject: [PATCH] geom_uzip(4), mkuzip(8): Add Zstd image mode The Zstd format bumps the CLOOP major number to 4 to avoid incompatibility with older systems. Support in geom_uzip(4) is conditional on the ZSTDIO kernel option, which is enabled in amd64 GENERIC, but not all in-tree configurations. mkuzip(8) was modified slightly to always initialize the nblocks + 1'th offset in the CLOOP file format. Previously, it was only initialized in the case where the final compressed block happened to be unaligned w.r.t. DEV_BSIZE. The "Fake" last+1 block change in r298619 means that the final compressed block's 'blen' was never correct unless the compressed uzip image happened to be BSIZE-aligned. This happened in about 1 out of every 512 cases. The zlib and lzma decompressors are probably tolerant of extra trash following the frame they were told to decode, but Zstd complains that the input size is incorrect. Correspondingly, geom_uzip(4) was modified slightly to avoid trashing the nblocks + 1'th offset when it is known to be initialized to a good value. This corrects the calculated final real cluster compressed length to match that printed by mkuzip(8). mkuzip(8) was refactored somewhat to reduce code duplication and increase ease of adding other compression formats. * Input block size validation was pulled out of individual compression init routines into main(). * Init routines now validate a user-provided compression level or select an algorithm-specific default, if none was provided. * A new interface for calculating the maximal compressed size of an incompressible input block was added for each driver. The generic code uses it to validate against MAXPHYS as well as to allocate compression result buffers in the generic code. * Algorithm selection is now driven by a table lookup, to increase ease of adding other formats in the future. mkuzip(8) gained the ability to explicitly specify a compression level with '-C'. The prior defaults -- 9 for zlib and 6 for lzma -- are maintained. The new zstd default is 9, to match zlib. Rather than select lzma or zlib with '-L' or its absense, respectively, a new argument '-A ' is provided to select 'zlib', 'lzma', or 'zstd'. '-L' is considered deprecated, but will probably never be removed. All of the new features were documented in mkuzip.8; the page was also cleaned up slightly. Relnotes: yes --- share/man/man4/geom_uzip.4 | 6 +- sys/conf/NOTES | 4 +- sys/conf/files | 2 + sys/geom/uzip/g_uzip.c | 79 ++++++++++--- sys/geom/uzip/g_uzip_cloop.h | 6 + sys/geom/uzip/g_uzip_zstd.c | 157 ++++++++++++++++++++++++++ sys/geom/uzip/g_uzip_zstd.h | 30 +++++ sys/modules/geom/geom_uzip/Makefile | 11 +- usr.bin/mkuzip/Makefile | 6 +- usr.bin/mkuzip/mkuz_cfg.h | 2 + usr.bin/mkuzip/mkuz_cloop.h | 2 + usr.bin/mkuzip/mkuz_conveyor.c | 7 +- usr.bin/mkuzip/mkuz_format.h | 7 +- usr.bin/mkuzip/mkuz_lzma.c | 57 ++++------ usr.bin/mkuzip/mkuz_lzma.h | 5 +- usr.bin/mkuzip/mkuz_zlib.c | 53 +++++---- usr.bin/mkuzip/mkuz_zlib.h | 5 +- usr.bin/mkuzip/mkuz_zstd.c | 95 ++++++++++++++++ usr.bin/mkuzip/mkuz_zstd.h | 38 +++++++ usr.bin/mkuzip/mkuzip.8 | 166 ++++++++++++++++++++-------- usr.bin/mkuzip/mkuzip.c | 94 +++++++++++++--- usr.bin/mkuzip/mkuzip.h | 3 + 22 files changed, 684 insertions(+), 151 deletions(-) create mode 100644 sys/geom/uzip/g_uzip_zstd.c create mode 100644 sys/geom/uzip/g_uzip_zstd.h create mode 100644 usr.bin/mkuzip/mkuz_zstd.c create mode 100644 usr.bin/mkuzip/mkuz_zstd.h diff --git a/share/man/man4/geom_uzip.4 b/share/man/man4/geom_uzip.4 index d729acb44e02..359940bd2cf1 100644 --- a/share/man/man4/geom_uzip.4 +++ b/share/man/man4/geom_uzip.4 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd February 26, 2019 +.Dd August 13, 2019 .Dt GEOM_UZIP 4 .Os .Sh NAME @@ -37,6 +37,7 @@ place the following line in your kernel configuration file: .Bd -ragged -offset indent .Cd "device xz" +.Cd "options zstd" .Cd "options GEOM_UZIP" .Ed .Pp @@ -163,6 +164,9 @@ Log operations involving compressed cluster number. .Xr md 4 , .Xr geom 8 , .Xr mkuzip 8 +.Sh HISTORY +Zstd support was added in +.Fx 13.0 . .Sh AUTHORS .An -nosplit The diff --git a/sys/conf/NOTES b/sys/conf/NOTES index 3c4b6b99c4cb..0d58cd3a57f3 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -2853,8 +2853,8 @@ options IMAGACT_BINMISC # This enables support for compressed core dumps. options GZIO -# zstd I/O stream support -# This enables support for Zstd compressed core dumps. +# zstd support +# This enables support for Zstd compressed core dumps and GEOM_UZIP images. options ZSTDIO # BHND(4) drivers diff --git a/sys/conf/files b/sys/conf/files index dae1d90f336a..da67fbdf0c46 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -3627,6 +3627,8 @@ geom/uzip/g_uzip.c optional geom_uzip geom/uzip/g_uzip_lzma.c optional geom_uzip geom/uzip/g_uzip_wrkthr.c optional geom_uzip geom/uzip/g_uzip_zlib.c optional geom_uzip +geom/uzip/g_uzip_zstd.c optional geom_uzip zstdio \ + compile-with "${NORMAL_C} -I$S/contrib/zstd/lib/freebsd" geom/vinum/geom_vinum.c optional geom_vinum geom/vinum/geom_vinum_create.c optional geom_vinum geom/vinum/geom_vinum_drive.c optional geom_vinum diff --git a/sys/geom/uzip/g_uzip.c b/sys/geom/uzip/g_uzip.c index 00a4781771aa..8339abb74a82 100644 --- a/sys/geom/uzip/g_uzip.c +++ b/sys/geom/uzip/g_uzip.c @@ -31,6 +31,9 @@ #include __FBSDID("$FreeBSD$"); +#include "opt_geom.h" +#include "opt_zstdio.h" + #include #include #include @@ -51,10 +54,11 @@ __FBSDID("$FreeBSD$"); #include #include #include +#ifdef ZSTDIO +#include +#endif #include -#include "opt_geom.h" - MALLOC_DEFINE(M_GEOM_UZIP, "geom_uzip", "GEOM UZIP data structures"); FEATURE(geom_uzip, "GEOM read-only compressed disks support"); @@ -594,7 +598,7 @@ g_uzip_parse_toc(struct g_uzip_softc *sc, struct g_provider *pp, * block whose offset is larger than ours and assume * it's going to be the next one. */ - for (j = i + 1; j < sc->nblocks; j++) { + for (j = i + 1; j < sc->nblocks + 1; j++) { if (sc->toc[j].offset > max_offset) { break; } @@ -664,8 +668,10 @@ g_uzip_taste(struct g_class *mp, struct g_provider *pp, int flags) struct g_uzip_softc *sc; enum { G_UZIP = 1, - G_ULZMA + G_ULZMA, + G_ZSTD, } type; + char cloop_version; g_trace(G_T_TOPOLOGY, "%s(%s,%s)", __func__, mp->name, pp->name); g_topology_assert(); @@ -712,11 +718,12 @@ g_uzip_taste(struct g_class *mp, struct g_provider *pp, int flags) goto e3; } + cloop_version = header->magic[CLOOP_OFS_VERSN]; switch (header->magic[CLOOP_OFS_COMPR]) { case CLOOP_COMP_LZMA: case CLOOP_COMP_LZMA_DDP: type = G_ULZMA; - if (header->magic[CLOOP_OFS_VERSN] < CLOOP_MINVER_LZMA) { + if (cloop_version < CLOOP_MINVER_LZMA) { DPRINTF(GUZ_DBG_ERR, ("%s: image version too old\n", gp->name)); goto e3; @@ -727,7 +734,7 @@ g_uzip_taste(struct g_class *mp, struct g_provider *pp, int flags) case CLOOP_COMP_LIBZ: case CLOOP_COMP_LIBZ_DDP: type = G_UZIP; - if (header->magic[CLOOP_OFS_VERSN] < CLOOP_MINVER_ZLIB) { + if (cloop_version < CLOOP_MINVER_ZLIB) { DPRINTF(GUZ_DBG_ERR, ("%s: image version too old\n", gp->name)); goto e3; @@ -735,6 +742,24 @@ g_uzip_taste(struct g_class *mp, struct g_provider *pp, int flags) DPRINTF(GUZ_DBG_INFO, ("%s: GEOM_UZIP_ZLIB image found\n", gp->name)); break; + case CLOOP_COMP_ZSTD: + case CLOOP_COMP_ZSTD_DDP: + if (cloop_version < CLOOP_MINVER_ZSTD) { + DPRINTF(GUZ_DBG_ERR, ("%s: image version too old\n", + gp->name)); + goto e3; + } +#ifdef ZSTDIO + DPRINTF(GUZ_DBG_INFO, ("%s: GEOM_UZIP_ZSTD image found.\n", + gp->name)); + type = G_ZSTD; +#else + DPRINTF(GUZ_DBG_ERR, ("%s: GEOM_UZIP_ZSTD image found, but " + "this kernel was configured with Zstd disabled.\n", + gp->name)); + goto e3; +#endif + break; default: DPRINTF(GUZ_DBG_ERR, ("%s: unsupported image type\n", gp->name)); @@ -774,6 +799,13 @@ g_uzip_taste(struct g_class *mp, struct g_provider *pp, int flags) } DPRINTF(GUZ_DBG_INFO, ("%s: %u offsets in the first sector\n", gp->name, offsets_read)); + + /* + * The following invalidates the "header" pointer into the first + * block's "buf." + */ + header = NULL; + for (blk = 1; offsets_read < total_offsets; blk++) { uint32_t nread; @@ -805,20 +837,41 @@ g_uzip_taste(struct g_class *mp, struct g_provider *pp, int flags) goto e5; } - if (type == G_UZIP) { + switch (type) { + case G_UZIP: sc->dcp = g_uzip_zlib_ctor(sc->blksz); - } else { + break; + case G_ULZMA: sc->dcp = g_uzip_lzma_ctor(sc->blksz); - } - if (sc->dcp == NULL) { + break; +#ifdef ZSTDIO + case G_ZSTD: + sc->dcp = g_uzip_zstd_ctor(sc->blksz); + break; +#endif + default: goto e5; } /* - * "Fake" last+1 block, to make it easier for the TOC parser to - * iterate without making the last element a special case. + * The last+1 block was not always initialized by earlier versions of + * mkuzip(8). However, *if* it is initialized, the difference between + * its offset and the prior block's offset represents the length of the + * final real compressed block, and this is significant to the + * decompressor. */ - sc->toc[sc->nblocks].offset = pp->mediasize; + if (cloop_version >= CLOOP_MINVER_RELIABLE_LASTBLKSZ && + sc->toc[sc->nblocks].offset != 0) { + if (sc->toc[sc->nblocks].offset > pp->mediasize) { + DPRINTF(GUZ_DBG_ERR, + ("%s: bogus n+1 offset %ju > mediasize %ju\n", + gp->name, (uintmax_t)sc->toc[sc->nblocks].offset, + (uintmax_t)pp->mediasize)); + goto e6; + } + } else { + sc->toc[sc->nblocks].offset = pp->mediasize; + } /* Massage TOC (table of contents), make sure it is sound */ if (g_uzip_parse_toc(sc, pp, gp) != 0) { DPRINTF(GUZ_DBG_ERR, ("%s: TOC error\n", gp->name)); diff --git a/sys/geom/uzip/g_uzip_cloop.h b/sys/geom/uzip/g_uzip_cloop.h index 219a7b67d6ec..f6b1174400d9 100644 --- a/sys/geom/uzip/g_uzip_cloop.h +++ b/sys/geom/uzip/g_uzip_cloop.h @@ -39,14 +39,20 @@ #define CLOOP_MAJVER_2 '2' #define CLOOP_MAJVER_3 '3' +#define CLOOP_MAJVER_4 '4' #define CLOOP_COMP_LIBZ 'V' #define CLOOP_COMP_LIBZ_DDP 'v' #define CLOOP_COMP_LZMA 'L' #define CLOOP_COMP_LZMA_DDP 'l' +#define CLOOP_COMP_ZSTD 'Z' +#define CLOOP_COMP_ZSTD_DDP 'z' #define CLOOP_MINVER_LZMA CLOOP_MAJVER_3 #define CLOOP_MINVER_ZLIB CLOOP_MAJVER_2 +#define CLOOP_MINVER_ZSTD CLOOP_MAJVER_4 + +#define CLOOP_MINVER_RELIABLE_LASTBLKSZ CLOOP_MAJVER_4 struct cloop_header { char magic[CLOOP_MAGIC_LEN]; /* cloop magic */ diff --git a/sys/geom/uzip/g_uzip_zstd.c b/sys/geom/uzip/g_uzip_zstd.c new file mode 100644 index 000000000000..27b246242186 --- /dev/null +++ b/sys/geom/uzip/g_uzip_zstd.c @@ -0,0 +1,157 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2019 Conrad Meyer + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* + * We don't actually need any static-link ABI, just want to use "experimental" + * custom malloc/free APIs. + */ +#define ZSTD_STATIC_LINKING_ONLY +#include + +FEATURE(geom_uzip_zstd, "g_uzip Zstd support"); + +struct g_uzip_zstd { + struct g_uzip_dapi guz_pub; + uint32_t guz_blksz; + ZSTD_DCtx *guz_dctx; +}; + +#ifndef container_of +#define container_of(ptr, type, member) \ +({ \ + const __typeof(((type *)0)->member) *__p = (ptr); \ + (type *)((uintptr_t)__p - offsetof(type, member)); \ +}) +#endif +#define to_zstd_softc(zpp) container_of(zpp, struct g_uzip_zstd, guz_pub) + +static int +guz_zstd_decompress(struct g_uzip_dapi *zpp, const char *gp_name, void *input, + size_t ilen, void *outputbuf) +{ + struct g_uzip_zstd *sc; + size_t rc; + + sc = to_zstd_softc(zpp); + rc = ZSTD_decompressDCtx(sc->guz_dctx, outputbuf, sc->guz_blksz, input, + ilen); + if (ZSTD_isError(rc)) { + printf("%s: UZIP(zstd) decompress failed: %s\n", gp_name, + ZSTD_getErrorName(rc)); + return (EIO); + } + KASSERT(rc == sc->guz_blksz, ("%s: Expected %u bytes, got %zu", + __func__, sc->guz_blksz, rc)); + return (0); +} + +static void +guz_zstd_free(struct g_uzip_dapi *zpp) +{ + struct g_uzip_zstd *sc; + size_t rc; + + sc = to_zstd_softc(zpp); + rc = ZSTD_freeDCtx(sc->guz_dctx); + if (ZSTD_isError(rc)) + printf("%s: UZIP(zstd) free failed: %s\n", __func__, + ZSTD_getErrorName(rc)); + + free(sc, M_GEOM_UZIP); +} + +static int +guz_zstd_rewind(struct g_uzip_dapi *zpp, const char *gp_name) +{ + struct g_uzip_zstd *sc; + size_t rc; + + sc = to_zstd_softc(zpp); + rc = ZSTD_DCtx_reset(sc->guz_dctx, ZSTD_reset_session_and_parameters); + if (ZSTD_isError(rc)) { + printf("%s: UZIP(zstd) rewind failed: %s\n", gp_name, + ZSTD_getErrorName(rc)); + return (EIO); + } + return (0); +} + +static void * +zstd_alloc(void *opaque, size_t size) +{ + return (malloc(size, opaque, M_WAITOK)); +} + +static void +zstd_free(void *opaque, void *address) +{ + free(address, opaque); +} + +static const ZSTD_customMem zstd_guz_alloc = { + .customAlloc = zstd_alloc, + .customFree = zstd_free, + .opaque = M_GEOM_UZIP, +}; + +struct g_uzip_dapi * +g_uzip_zstd_ctor(uint32_t blksz) +{ + struct g_uzip_zstd *sc; + + sc = malloc(sizeof(*sc), M_GEOM_UZIP, M_WAITOK | M_ZERO); + + sc->guz_dctx = ZSTD_createDCtx_advanced(zstd_guz_alloc); + if (sc->guz_dctx == NULL) { + printf("%s: ZSTD_createDCtx_advanced failed\n", __func__); + free(sc, M_GEOM_UZIP); + return (NULL); + } + + sc->guz_blksz = blksz; + sc->guz_pub.max_blen = ZSTD_compressBound(blksz); + sc->guz_pub.decompress = guz_zstd_decompress; + sc->guz_pub.free = guz_zstd_free; + sc->guz_pub.rewind = guz_zstd_rewind; + sc->guz_pub.pvt = NULL; + + return (&sc->guz_pub); +} diff --git a/sys/geom/uzip/g_uzip_zstd.h b/sys/geom/uzip/g_uzip_zstd.h new file mode 100644 index 000000000000..adb76882f76b --- /dev/null +++ b/sys/geom/uzip/g_uzip_zstd.h @@ -0,0 +1,30 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2019 Conrad Meyer + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +struct g_uzip_dapi *g_uzip_zstd_ctor(uint32_t); diff --git a/sys/modules/geom/geom_uzip/Makefile b/sys/modules/geom/geom_uzip/Makefile index 3aa7ea96cd6d..dfd4e57abea0 100644 --- a/sys/modules/geom/geom_uzip/Makefile +++ b/sys/modules/geom/geom_uzip/Makefile @@ -6,12 +6,17 @@ KMOD= geom_uzip SRCS= g_uzip.c g_uzip_zlib.c g_uzip_lzma.c g_uzip_wrkthr.c SRCS+= g_uzip.h g_uzip_dapi.h g_uzip_lzma.h g_uzip_zlib.h g_uzip_softc.h \ g_uzip_wrkthr.h -#CFLAGS= -g -DINVARIANT_SUPPORT -DINVARIANTS -.PATH: ${SRCTOP}/sys/net +# This works for buildkernel, but will disable zstd in geom_uzip out of tree. +.if ${KERN_OPTS:MZSTDIO} != "" +SRCS+= g_uzip_zstd.c g_uzip_zstd.h +CFLAGS.g_uzip_zstd.c+= -I${SRCTOP}/sys/contrib/zstd/lib/freebsd +.endif + +#CFLAGS= -g -DINVARIANT_SUPPORT -DINVARIANTS CFLAGS.g_uzip_lzma.c+= -I${SRCTOP}/sys/contrib/xz-embedded/freebsd \ -I${SRCTOP}/sys/contrib/xz-embedded/linux/lib/xz/ -SRCS+= opt_geom.h +SRCS+= opt_geom.h opt_zstdio.h .include diff --git a/usr.bin/mkuzip/Makefile b/usr.bin/mkuzip/Makefile index ed31b78f2d31..c60fd7f5ca54 100644 --- a/usr.bin/mkuzip/Makefile +++ b/usr.bin/mkuzip/Makefile @@ -3,10 +3,12 @@ PROG= mkuzip MAN= mkuzip.8 SRCS= mkuzip.c mkuz_blockcache.c mkuz_lzma.c mkuz_zlib.c mkuz_conveyor.c \ - mkuz_blk.c mkuz_fqueue.c mkuz_time.c mkuz_insize.c + mkuz_blk.c mkuz_fqueue.c mkuz_time.c mkuz_insize.c mkuz_zstd.c + +CFLAGS+= -I${SRCTOP}/sys/contrib/zstd/lib #CFLAGS+= -DMKUZ_DEBUG -LIBADD= z md lzma pthread +LIBADD= lzma md pthread z zstd .include diff --git a/usr.bin/mkuzip/mkuz_cfg.h b/usr.bin/mkuzip/mkuz_cfg.h index fc88ef29198f..a27d98a6ff60 100644 --- a/usr.bin/mkuzip/mkuz_cfg.h +++ b/usr.bin/mkuzip/mkuz_cfg.h @@ -39,4 +39,6 @@ struct mkuz_cfg { const char *iname; off_t isize; const struct mkuz_format *handler; + size_t cbound_blksz; + int comp_level; }; diff --git a/usr.bin/mkuzip/mkuz_cloop.h b/usr.bin/mkuzip/mkuz_cloop.h index 4ed7c5026391..fabf80a53b12 100644 --- a/usr.bin/mkuzip/mkuz_cloop.h +++ b/usr.bin/mkuzip/mkuz_cloop.h @@ -39,9 +39,11 @@ #define CLOOP_MAJVER_2 '2' #define CLOOP_MAJVER_3 '3' +#define CLOOP_MAJVER_4 '4' #define CLOOP_COMP_LIBZ 'V' #define CLOOP_COMP_LZMA 'L' +#define CLOOP_COMP_ZSTD 'Z' struct cloop_header { char magic[CLOOP_MAGIC_LEN]; /* cloop magic */ diff --git a/usr.bin/mkuzip/mkuz_conveyor.c b/usr.bin/mkuzip/mkuz_conveyor.c index 856d445cce50..2cfae1c0775c 100644 --- a/usr.bin/mkuzip/mkuz_conveyor.c +++ b/usr.bin/mkuzip/mkuz_conveyor.c @@ -42,8 +42,8 @@ __FBSDID("$FreeBSD$"); #include "mkuz_conveyor.h" #include "mkuz_cfg.h" #include "mkuzip.h" -#include "mkuz_format.h" #include "mkuz_blk.h" +#include "mkuz_format.h" #include "mkuz_fqueue.h" #include "mkuz_blk_chain.h" @@ -67,7 +67,7 @@ cworker(void *p) cfp = cwp->cfp; cvp = cwp->cvp; free(cwp); - c_ctx = cfp->handler->f_init(cfp->blksz); + c_ctx = cfp->handler->f_init(&cfp->comp_level); for (;;) { iblk = mkuz_fqueue_deq(cvp->wrk_queue); if (iblk == MKUZ_BLK_EOF) { @@ -80,7 +80,8 @@ cworker(void *p) /* All zeroes block */ oblk = mkuz_blk_ctor(0); } else { - oblk = cfp->handler->f_compress(c_ctx, iblk); + oblk = mkuz_blk_ctor(cfp->cbound_blksz); + cfp->handler->f_compress(c_ctx, iblk, oblk); if (cfp->en_dedup != 0) { compute_digest(oblk); } diff --git a/usr.bin/mkuzip/mkuz_format.h b/usr.bin/mkuzip/mkuz_format.h index 817c0121ed85..ddee771eab87 100644 --- a/usr.bin/mkuzip/mkuz_format.h +++ b/usr.bin/mkuzip/mkuz_format.h @@ -26,12 +26,15 @@ * $FreeBSD$ */ -DEFINE_RAW_METHOD(f_init, void *, uint32_t); -DEFINE_RAW_METHOD(f_compress, struct mkuz_blk *, void *, const struct mkuz_blk *); +DEFINE_RAW_METHOD(f_compress_bound, size_t, size_t); +DEFINE_RAW_METHOD(f_init, void *, int *); +DEFINE_RAW_METHOD(f_compress, void, void *, const struct mkuz_blk *, struct mkuz_blk *); struct mkuz_format { + const char *option; const char *magic; const char *default_sufx; + f_compress_bound_t f_compress_bound; f_init_t f_init; f_compress_t f_compress; }; diff --git a/usr.bin/mkuzip/mkuz_lzma.c b/usr.bin/mkuzip/mkuz_lzma.c index 8810d2ef0c56..bab2820f7c38 100644 --- a/usr.bin/mkuzip/mkuz_lzma.c +++ b/usr.bin/mkuzip/mkuz_lzma.c @@ -35,61 +35,55 @@ __FBSDID("$FreeBSD$"); #include #include "mkuzip.h" -#include "mkuz_lzma.h" #include "mkuz_blk.h" - -#define USED_BLOCKSIZE DEV_BSIZE +#include "mkuz_lzma.h" struct mkuz_lzma { lzma_filter filters[2]; lzma_options_lzma opt_lzma; lzma_stream strm; - uint32_t blksz; }; -static const lzma_stream lzma_stream_init = LZMA_STREAM_INIT; +size_t +mkuz_lzma_cbound(size_t blksz) +{ + return (lzma_stream_buffer_bound(blksz)); +} void * -mkuz_lzma_init(uint32_t blksz) +mkuz_lzma_init(int *comp_level) { struct mkuz_lzma *ulp; - if (blksz % USED_BLOCKSIZE != 0) { - errx(1, "cluster size should be multiple of %d", - USED_BLOCKSIZE); + if (*comp_level == USE_DEFAULT_LEVEL) + *comp_level = LZMA_PRESET_DEFAULT; + if (*comp_level < 0 || *comp_level > 9) + errx(1, "provided compression level %d is invalid", + *comp_level); /* Not reached */ - } - if (blksz > MAXPHYS) { - errx(1, "cluster size is too large"); - /* Not reached */ - } + ulp = mkuz_safe_zmalloc(sizeof(struct mkuz_lzma)); /* Init lzma encoder */ - ulp->strm = lzma_stream_init; - if (lzma_lzma_preset(&ulp->opt_lzma, LZMA_PRESET_DEFAULT)) + ulp->strm = (lzma_stream)LZMA_STREAM_INIT; + if (lzma_lzma_preset(&ulp->opt_lzma, *comp_level)) errx(1, "Error loading LZMA preset"); ulp->filters[0].id = LZMA_FILTER_LZMA2; ulp->filters[0].options = &ulp->opt_lzma; ulp->filters[1].id = LZMA_VLI_UNKNOWN; - ulp->blksz = blksz; - return (void *)ulp; } -struct mkuz_blk * -mkuz_lzma_compress(void *p, const struct mkuz_blk *iblk) +void +mkuz_lzma_compress(void *p, const struct mkuz_blk *iblk, struct mkuz_blk *oblk) { lzma_ret ret; - struct mkuz_blk *rval; struct mkuz_lzma *ulp; ulp = (struct mkuz_lzma *)p; - rval = mkuz_blk_ctor(ulp->blksz * 2); - ret = lzma_stream_encoder(&ulp->strm, ulp->filters, LZMA_CHECK_CRC32); if (ret != LZMA_OK) { if (ret == LZMA_MEMLIMIT_ERROR) @@ -99,23 +93,20 @@ mkuz_lzma_compress(void *p, const struct mkuz_blk *iblk) } ulp->strm.next_in = iblk->data; - ulp->strm.avail_in = ulp->blksz; - ulp->strm.next_out = rval->data; - ulp->strm.avail_out = rval->alen; + ulp->strm.avail_in = iblk->info.len; + ulp->strm.next_out = oblk->data; + ulp->strm.avail_out = oblk->alen; ret = lzma_code(&ulp->strm, LZMA_FINISH); - if (ret != LZMA_STREAM_END) { - /* Error */ + if (ret != LZMA_STREAM_END) errx(1, "lzma_code FINISH failed, code=%d, pos(in=%zd, " - "out=%zd)", ret, (ulp->blksz - ulp->strm.avail_in), - (ulp->blksz * 2 - ulp->strm.avail_out)); - } + "out=%zd)", ret, (iblk->info.len - ulp->strm.avail_in), + (oblk->alen - ulp->strm.avail_out)); #if 0 lzma_end(&ulp->strm); #endif - rval->info.len = rval->alen - ulp->strm.avail_out; - return (rval); + oblk->info.len = oblk->alen - ulp->strm.avail_out; } diff --git a/usr.bin/mkuzip/mkuz_lzma.h b/usr.bin/mkuzip/mkuz_lzma.h index bba45425343c..920acc67164d 100644 --- a/usr.bin/mkuzip/mkuz_lzma.h +++ b/usr.bin/mkuzip/mkuz_lzma.h @@ -38,5 +38,6 @@ "exit $?\n" #define DEFAULT_SUFX_LZMA ".ulzma" -void *mkuz_lzma_init(uint32_t); -struct mkuz_blk *mkuz_lzma_compress(void *, const struct mkuz_blk *); +size_t mkuz_lzma_cbound(size_t); +void *mkuz_lzma_init(int *); +void mkuz_lzma_compress(void *, const struct mkuz_blk *, struct mkuz_blk *); diff --git a/usr.bin/mkuzip/mkuz_zlib.c b/usr.bin/mkuzip/mkuz_zlib.c index 4b191f945cda..fa2519ffb60d 100644 --- a/usr.bin/mkuzip/mkuz_zlib.c +++ b/usr.bin/mkuzip/mkuz_zlib.c @@ -34,54 +34,51 @@ __FBSDID("$FreeBSD$"); #include #include "mkuzip.h" -#include "mkuz_zlib.h" #include "mkuz_blk.h" +#include "mkuz_zlib.h" struct mkuz_zlib { - uLongf oblen; - uint32_t blksz; + int comp_level; }; +size_t +mkuz_zlib_cbound(size_t blksz) +{ + return (compressBound(blksz)); +} + void * -mkuz_zlib_init(uint32_t blksz) +mkuz_zlib_init(int *comp_level) { struct mkuz_zlib *zp; - if (blksz % DEV_BSIZE != 0) { - errx(1, "cluster size should be multiple of %d", - DEV_BSIZE); + if (*comp_level == USE_DEFAULT_LEVEL) + *comp_level = Z_BEST_COMPRESSION; + if (*comp_level < Z_BEST_SPEED || *comp_level > Z_BEST_COMPRESSION) + errx(1, "provided compression level %d is invalid", + *comp_level); /* Not reached */ - } - if (compressBound(blksz) > MAXPHYS) { - errx(1, "cluster size is too large"); - /* Not reached */ - } - zp = mkuz_safe_zmalloc(sizeof(struct mkuz_zlib)); - zp->oblen = compressBound(blksz); - zp->blksz = blksz; - return (void *)zp; + zp = mkuz_safe_zmalloc(sizeof(struct mkuz_zlib)); + zp->comp_level = *comp_level; + + return (zp); } -struct mkuz_blk * -mkuz_zlib_compress(void *p, const struct mkuz_blk *iblk) +void +mkuz_zlib_compress(void *p, const struct mkuz_blk *iblk, struct mkuz_blk *oblk) { uLongf destlen_z; - struct mkuz_blk *rval; struct mkuz_zlib *zp; zp = (struct mkuz_zlib *)p; - rval = mkuz_blk_ctor(zp->oblen); - - destlen_z = rval->alen; - if (compress2(rval->data, &destlen_z, iblk->data, zp->blksz, - Z_BEST_COMPRESSION) != Z_OK) { - errx(1, "can't compress data: compress2() " - "failed"); + destlen_z = oblk->alen; + if (compress2(oblk->data, &destlen_z, iblk->data, iblk->info.len, + zp->comp_level) != Z_OK) { + errx(1, "can't compress data: compress2() failed"); /* Not reached */ } - rval->info.len = (uint32_t)destlen_z; - return (rval); + oblk->info.len = (uint32_t)destlen_z; } diff --git a/usr.bin/mkuzip/mkuz_zlib.h b/usr.bin/mkuzip/mkuz_zlib.h index 55e57a610b4b..ad653b935f22 100644 --- a/usr.bin/mkuzip/mkuz_zlib.h +++ b/usr.bin/mkuzip/mkuz_zlib.h @@ -32,5 +32,6 @@ "(kldstat -qm g_uzip||kldload geom_uzip)>&-&&" \ "mount_cd9660 /dev/`mdconfig -af $0`.uzip $1\nexit $?\n" -void *mkuz_zlib_init(uint32_t); -struct mkuz_blk *mkuz_zlib_compress(void *, const struct mkuz_blk *); +size_t mkuz_zlib_cbound(size_t); +void *mkuz_zlib_init(int *); +void mkuz_zlib_compress(void *, const struct mkuz_blk *, struct mkuz_blk *); diff --git a/usr.bin/mkuzip/mkuz_zstd.c b/usr.bin/mkuzip/mkuz_zstd.c new file mode 100644 index 000000000000..d59cb47917d2 --- /dev/null +++ b/usr.bin/mkuzip/mkuz_zstd.c @@ -0,0 +1,95 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2019 Conrad Meyer + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +#include + +#include "mkuzip.h" +#include "mkuz_blk.h" +#include "mkuz_zstd.h" + +size_t +mkuz_zstd_cbound(size_t blksz) +{ + return (ZSTD_compressBound(blksz)); +} + +void * +mkuz_zstd_init(int *comp_level) +{ + ZSTD_CCtx *cctx; + size_t rc; + + /* Default chosen for near-parity with mkuzip zlib default. */ + if (*comp_level == USE_DEFAULT_LEVEL) + *comp_level = 9; + if (*comp_level < ZSTD_minCLevel() || *comp_level == 0 || + *comp_level > ZSTD_maxCLevel()) + errx(1, "provided compression level %d is invalid", + *comp_level); + + cctx = ZSTD_createCCtx(); + if (cctx == NULL) + errx(1, "could not allocate Zstd context"); + + rc = ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, + *comp_level); + if (ZSTD_isError(rc)) + errx(1, "Could not set zstd compression level %d: %s", + *comp_level, ZSTD_getErrorName(rc)); + + rc = ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1); + if (ZSTD_isError(rc)) + errx(1, "Could not enable zstd checksum: %s", + ZSTD_getErrorName(rc)); + + return (cctx); +} + +void +mkuz_zstd_compress(void *p, const struct mkuz_blk *iblk, struct mkuz_blk *oblk) +{ + ZSTD_CCtx *cctx; + size_t rc; + + cctx = p; + + rc = ZSTD_compress2(cctx, oblk->data, oblk->alen, iblk->data, + iblk->info.len); + if (ZSTD_isError(rc)) + errx(1, "could not compress data: ZSTD_compress2: %s", + ZSTD_getErrorName(rc)); + + oblk->info.len = rc; +} diff --git a/usr.bin/mkuzip/mkuz_zstd.h b/usr.bin/mkuzip/mkuz_zstd.h new file mode 100644 index 000000000000..874e2d82812c --- /dev/null +++ b/usr.bin/mkuzip/mkuz_zstd.h @@ -0,0 +1,38 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2019 Conrad Meyer + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#define DEFAULT_SUFX_ZSTD ".uzst" + +#define CLOOP_MAGIC_ZSTD "#!/bin/sh\n#Z4.0 Format\n" \ + "(kldstat -qm g_uzip||kldload geom_uzip)>&-&&" \ + "mount_cd9660 /dev/`mdconfig -af $0`.uzip $1\nexit $?\n" + +size_t mkuz_zstd_cbound(size_t); +void *mkuz_zstd_init(int *); +void mkuz_zstd_compress(void *, const struct mkuz_blk *, struct mkuz_blk *); diff --git a/usr.bin/mkuzip/mkuzip.8 b/usr.bin/mkuzip/mkuzip.8 index 8a54aee7ba57..9bf4a0c3f893 100644 --- a/usr.bin/mkuzip/mkuzip.8 +++ b/usr.bin/mkuzip/mkuzip.8 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd February 19, 2019 +.Dd August 9, 2019 .Dt MKUZIP 8 .Os .Sh NAME @@ -35,7 +35,9 @@ class .Sh SYNOPSIS .Nm -.Op Fl dLSsvZ +.Op Fl dSsvZ +.Op Fl A Ar compression_algorithm +.Op Fl C Ar compression_level .Op Fl j Ar compression_jobs .Op Fl o Ar outfile .Op Fl s Ar cluster_size @@ -57,17 +59,82 @@ works in two phases: .It An .Ar infile -image is split into clusters; each cluster is compressed using -.Xr zlib 3 -or -.Xr lzma 3 . +image is split into clusters; each cluster is compressed. .It -The resulting set of compressed clusters along with headers that allow -locating each individual cluster is written to the output file. +The resulting set of compressed clusters is written to the output file. +In addition, a +.Dq table of contents +header is written which allows for efficient seeking. .El .Pp The options are: .Bl -tag -width indent +.It Fl A Op Ar lzma | Ar zlib | Ar zstd +Select a specific compression algorithm. +If this option is not provided, the default is +.Ar zlib . +.Pp +The +.Ar lzma +algorithm provides noticeable better compression levels than zlib on the same +data set. +It has vastly slower compression speed and moderately slower decompression +speed. +.Pp +The +.Ar zstd +algorithm provides better compression levels than zlib on the same data set. +It also has faster compression and decompression speed than zlib. +In the very high compression +.Dq level +settings, it does not offer quite as high a compression ratio as +.Ar lzma . +However, its decompression speed does not suffer at high compression +.Dq levels . +.It Fl C Ar compression_level +Select the integer compression level used to parameterize the chosen +compression algorithm. +.Pp +For any given algorithm, a lesser number selects a faster compression mode. +A greater number selects a slower compression mode. +Typically, for the same algorithm, a greater +.Ar compression_level +provides better final compression ratio. +.Pp +For +.Ar lzma , +the range of valid compression levels is +.Va 0-9 . +The +.Nm +default for lzma is +.Va 6 . +.Pp +For +.Ar zlib , +the range of valid compression levels is +.Va 1-9 . +The +.Nm +default for zlib is +.Va 9 . +.Pp +For +.Ar zstd , +the range of valid compression levels is currently +.Va 1-19 . +The +.Nm +default for zstd is +.Va 9 . +.It Fl d +Enable de-duplication. +When the option is enabled +.Nm +detects identical blocks in the input and replaces each subsequent occurrence +of such block with pointer to the very first one in the output. +Setting this option results is moderate decrease of compressed image size, +typically around 3-5% of a final size of the compressed image. .It Fl j Ar compression_jobs Specify the number of compression jobs that .Nm @@ -77,24 +144,9 @@ to the value of .Va hw.ncpu .Xr sysctl 8 variable. -.It Fl d -Enable de-duplication. -When the option is enabled the -.Nm -detects identical blocks in the input and replaces each subsequent occurence -of such block with pointer to the very first one in the output. -Setting this option results is moderate decrease of compressed image size, -typically around 3-5% of a final size of the compressed image. -.It Fl L -Use -.Xr lzma 3 -compression algorithm instead of the default -.Xr zlib 3 . -The -.Xr lzma 3 -provides noticeable better compression levels on the same data set -at the expense of much slower compression speed (10-20x) and somewhat slower -decompression (2-3x). +.It Op Fl L +Legacy flag that indicates the same thing as +.Dq Fl A Ar lzma . .It Fl o Ar outfile Name of the output file .Ar outfile . @@ -119,33 +171,44 @@ should be a multiple of 512 bytes. .It Fl v Display verbose messages. .It Fl Z -Disable zero-blocks detection and elimination. -When this option is set, the +Disable zero-block detection and elimination. +When this option is set, .Nm -would compress empty blocks (i.e. clusters that consist of only zero bytes) -just as it would any other block. -When the option is not set, the +compresses blocks of zero bytes just as it would any other block. +When the option is not set, .Nm -detects such blocks and skips them from the output. +detects and compresses zero blocks in a space-efficient way. Setting .Fl Z -results is slight increase of compressed image size, typically less than 0.1% -of a final size of the compressed image. +increases compressed image sizes slightly, typically less than 0.1%. .El -.Sh NOTES -The compression ratio largely depends on the cluster size used. -.\" The following two sentences are unclear: how can gzip(1) be -.\" used in a comparable fashion, and wouldn't a gzip-compressed -.\" image suffer from larger cluster sizes as well? -For large cluster sizes (16K and higher), typical compression ratios +.Sh IMPLEMENTATION NOTES +The compression ratio largely depends on the compression algorithm, level, and +cluster size used. +For large cluster sizes (16kB and higher), typical overall image compression +ratios with +.Xr zlib 3 are only 1-2% less than those achieved with -.Xr gzip 1 . -However, it should be kept in mind that larger cluster -sizes lead to higher overhead in the +.Xr gzip 1 +over the entire image. +However, it should be kept in mind that larger cluster sizes lead to higher +overhead in the .Xr geom_uzip 4 class, as the class has to decompress the whole cluster even if only a few bytes from that cluster have to be read. .Pp +Additionally, the threshold at 16-32 kB where a larger cluster size does not +benefit overall compression ratio is an artifact of the +.Xr zlib 3 +algorithm in particular. +.Ar Lzma +and +.Ar Zstd will continue to provide better compression ratios as cluster sizes +are increased, at high enough compression levels. +The same tradeoff continues to apply: reads in +.Xr geom_uzip 4 +become more expensive the greater the cluster size. +.Pp The .Nm utility @@ -169,12 +232,27 @@ specific feature and while it does not require any changes to on-disk compressed image format, however it did require some matching changes to the .Xr geom_uzip 4 to handle resulting images correctly. +.Pp +To make use of +.Ar zstd +.Nm +images, the kernel must be configured with +.Cd ZSTDIO . +It is enabled by default in many +.Cd GENERIC +kernels provided as binary distributions by +.Fx . +The status on any particular system can be verified by checking +.Xr sysctl 8 +.Dv kern.features.geom_uzip_zstd +for +.Dq 1 . .Sh EXIT STATUS .Ex -std .Sh SEE ALSO .Xr gzip 1 , .Xr xz 1 , -.Xr lzma 3 , +.Xr zstd 1 , .Xr zlib 3 , .Xr geom 4 , .Xr geom_uzip 4 , diff --git a/usr.bin/mkuzip/mkuzip.c b/usr.bin/mkuzip/mkuzip.c index be0a9b23acb4..a2763e06440c 100644 --- a/usr.bin/mkuzip/mkuzip.c +++ b/usr.bin/mkuzip/mkuzip.c @@ -51,8 +51,9 @@ __FBSDID("$FreeBSD$"); #include "mkuzip.h" #include "mkuz_cloop.h" #include "mkuz_blockcache.h" -#include "mkuz_zlib.h" #include "mkuz_lzma.h" +#include "mkuz_zlib.h" +#include "mkuz_zstd.h" #include "mkuz_blk.h" #include "mkuz_cfg.h" #include "mkuz_conveyor.h" @@ -63,18 +64,38 @@ __FBSDID("$FreeBSD$"); #define DEFAULT_CLSTSIZE 16384 -static struct mkuz_format uzip_fmt = { - .magic = CLOOP_MAGIC_ZLIB, - .default_sufx = DEFAULT_SUFX_ZLIB, - .f_init = &mkuz_zlib_init, - .f_compress = &mkuz_zlib_compress +enum UZ_ALGORITHM { + UZ_ZLIB = 0, + UZ_LZMA, + UZ_ZSTD, + UZ_INVALID }; -static struct mkuz_format ulzma_fmt = { - .magic = CLOOP_MAGIC_LZMA, - .default_sufx = DEFAULT_SUFX_LZMA, - .f_init = &mkuz_lzma_init, - .f_compress = &mkuz_lzma_compress +static const struct mkuz_format uzip_fmts[] = { + [UZ_ZLIB] = { + .option = "zlib", + .magic = CLOOP_MAGIC_ZLIB, + .default_sufx = DEFAULT_SUFX_ZLIB, + .f_compress_bound = mkuz_zlib_cbound, + .f_init = mkuz_zlib_init, + .f_compress = mkuz_zlib_compress, + }, + [UZ_LZMA] = { + .option = "lzma", + .magic = CLOOP_MAGIC_LZMA, + .default_sufx = DEFAULT_SUFX_LZMA, + .f_compress_bound = mkuz_lzma_cbound, + .f_init = mkuz_lzma_init, + .f_compress = mkuz_lzma_compress, + }, + [UZ_ZSTD] = { + .option = "zstd", + .magic = CLOOP_MAGIC_ZSTD, + .default_sufx = DEFAULT_SUFX_ZSTD, + .f_compress_bound = mkuz_zstd_cbound, + .f_init = mkuz_zstd_init, + .f_compress = mkuz_zstd_compress, + }, }; static struct mkuz_blk *readblock(int, u_int32_t); @@ -111,6 +132,8 @@ int main(int argc, char **argv) struct mkuz_blk_info *chit; size_t ncpusz, ncpu, magiclen; double st, et; + enum UZ_ALGORITHM comp_alg; + int comp_level; st = getdtime(); @@ -129,12 +152,27 @@ int main(int argc, char **argv) cfs.en_dedup = 0; summary.en = 0; summary.f = stderr; - cfs.handler = &uzip_fmt; + comp_alg = UZ_ZLIB; + comp_level = USE_DEFAULT_LEVEL; cfs.nworkers = ncpu; struct mkuz_blk *iblk, *oblk; - while((opt = getopt(argc, argv, "o:s:vZdLSj:")) != -1) { + while((opt = getopt(argc, argv, "A:C:o:s:vZdLSj:")) != -1) { switch(opt) { + case 'A': + for (tmp = UZ_ZLIB; tmp < UZ_INVALID; tmp++) { + if (strcmp(uzip_fmts[tmp].option, optarg) == 0) + break; + } + if (tmp == UZ_INVALID) + errx(1, "invalid algorithm specified: %s", + optarg); + /* Not reached */ + comp_alg = tmp; + break; + case 'C': + comp_level = atoi(optarg); + break; case 'o': oname = optarg; break; @@ -162,7 +200,7 @@ int main(int argc, char **argv) break; case 'L': - cfs.handler = &ulzma_fmt; + comp_alg = UZ_LZMA; break; case 'S': @@ -193,16 +231,32 @@ int main(int argc, char **argv) /* Not reached */ } + cfs.handler = &uzip_fmts[comp_alg]; + magiclen = strlcpy(hdr.magic, cfs.handler->magic, sizeof(hdr.magic)); assert(magiclen < sizeof(hdr.magic)); if (cfs.en_dedup != 0) { - hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3; + /* + * Dedupe requires a version 3 format. Don't downgrade newer + * formats. + */ + if (hdr.magic[CLOOP_OFS_VERSN] == CLOOP_MAJVER_2) + hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3; hdr.magic[CLOOP_OFS_COMPR] = tolower(hdr.magic[CLOOP_OFS_COMPR]); } - c_ctx = cfs.handler->f_init(cfs.blksz); + if (cfs.blksz % DEV_BSIZE != 0) + errx(1, "cluster size should be multiple of %d", DEV_BSIZE); + + cfs.cbound_blksz = cfs.handler->f_compress_bound(cfs.blksz); + if (cfs.cbound_blksz > MAXPHYS) + errx(1, "maximal compressed cluster size %zu greater than MAXPHYS %zu", + cfs.cbound_blksz, (size_t)MAXPHYS); + + c_ctx = cfs.handler->f_init(&comp_level); + cfs.comp_level = comp_level; cfs.iname = argv[0]; if (oname == NULL) { @@ -239,6 +293,14 @@ int main(int argc, char **argv) } toc = mkuz_safe_malloc((hdr.nblocks + 1) * sizeof(*toc)); + /* + * Initialize last+1 entry with non-heap trash. If final padding is + * added later, it may or may not be overwritten with an offset + * representing the length of the final compressed block. If not, + * initialize to a defined value. + */ + toc[hdr.nblocks] = 0; + cfs.fdw = open(oname, (cfs.en_dedup ? O_RDWR : O_WRONLY) | O_TRUNC | O_CREAT, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); if (cfs.fdw < 0) { diff --git a/usr.bin/mkuzip/mkuzip.h b/usr.bin/mkuzip/mkuzip.h index f41507c86964..b4bec58525ee 100644 --- a/usr.bin/mkuzip/mkuzip.h +++ b/usr.bin/mkuzip/mkuzip.h @@ -28,6 +28,9 @@ #define DEFINE_RAW_METHOD(func, rval, args...) typedef rval (*func##_t)(args) +/* Use an algorithm-specific default level if no explicit level is selected. */ +#define USE_DEFAULT_LEVEL INT_MIN + void *mkuz_safe_malloc(size_t); void *mkuz_safe_zmalloc(size_t); int mkuz_memvcmp(const void *, unsigned char, size_t);