geom_uzip(4), mkuzip(8): Add Zstd image mode

The Zstd format bumps the CLOOP major number to 4 to avoid incompatibility
with older systems.  Support in geom_uzip(4) is conditional on the ZSTDIO
kernel option, which is enabled in amd64 GENERIC, but not all in-tree
configurations.

mkuzip(8) was modified slightly to always initialize the nblocks + 1'th
offset in the CLOOP file format.  Previously, it was only initialized in the
case where the final compressed block happened to be unaligned w.r.t.
DEV_BSIZE.  The "Fake" last+1 block change in r298619 means that the final
compressed block's 'blen' was never correct unless the compressed uzip image
happened to be BSIZE-aligned.  This happened in about 1 out of every 512
cases.  The zlib and lzma decompressors are probably tolerant of extra trash
following the frame they were told to decode, but Zstd complains that the
input size is incorrect.

Correspondingly, geom_uzip(4) was modified slightly to avoid trashing the
nblocks + 1'th offset when it is known to be initialized to a good value.
This corrects the calculated final real cluster compressed length to match
that printed by mkuzip(8).

mkuzip(8) was refactored somewhat to reduce code duplication and increase
ease of adding other compression formats.

  * Input block size validation was pulled out of individual compression
    init routines into main().

  * Init routines now validate a user-provided compression level or select
    an algorithm-specific default, if none was provided.

  * A new interface for calculating the maximal compressed size of an
    incompressible input block was added for each driver.  The generic code
    uses it to validate against MAXPHYS as well as to allocate compression
    result buffers in the generic code.

  * Algorithm selection is now driven by a table lookup, to increase ease of
    adding other formats in the future.

mkuzip(8) gained the ability to explicitly specify a compression level with
'-C'.  The prior defaults -- 9 for zlib and 6 for lzma -- are maintained.
The new zstd default is 9, to match zlib.

Rather than select lzma or zlib with '-L' or its absense, respectively, a
new argument '-A <algorithm>' is provided to select 'zlib', 'lzma', or
'zstd'.  '-L' is considered deprecated, but will probably never be removed.

All of the new features were documented in mkuzip.8; the page was also
cleaned up slightly.

Relnotes:	yes
This commit is contained in:
cem 2019-08-13 23:32:56 +00:00
parent 07370ae4ee
commit 604e65334e
22 changed files with 684 additions and 151 deletions

View File

@ -25,7 +25,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd February 26, 2019
.Dd August 13, 2019
.Dt GEOM_UZIP 4
.Os
.Sh NAME
@ -37,6 +37,7 @@ place the following line in your
kernel configuration file:
.Bd -ragged -offset indent
.Cd "device xz"
.Cd "options zstd"
.Cd "options GEOM_UZIP"
.Ed
.Pp
@ -163,6 +164,9 @@ Log operations involving compressed cluster number.
.Xr md 4 ,
.Xr geom 8 ,
.Xr mkuzip 8
.Sh HISTORY
Zstd support was added in
.Fx 13.0 .
.Sh AUTHORS
.An -nosplit
The

View File

@ -2853,8 +2853,8 @@ options IMAGACT_BINMISC
# This enables support for compressed core dumps.
options GZIO
# zstd I/O stream support
# This enables support for Zstd compressed core dumps.
# zstd support
# This enables support for Zstd compressed core dumps and GEOM_UZIP images.
options ZSTDIO
# BHND(4) drivers

View File

@ -3627,6 +3627,8 @@ geom/uzip/g_uzip.c optional geom_uzip
geom/uzip/g_uzip_lzma.c optional geom_uzip
geom/uzip/g_uzip_wrkthr.c optional geom_uzip
geom/uzip/g_uzip_zlib.c optional geom_uzip
geom/uzip/g_uzip_zstd.c optional geom_uzip zstdio \
compile-with "${NORMAL_C} -I$S/contrib/zstd/lib/freebsd"
geom/vinum/geom_vinum.c optional geom_vinum
geom/vinum/geom_vinum_create.c optional geom_vinum
geom/vinum/geom_vinum_drive.c optional geom_vinum

View File

@ -31,6 +31,9 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_geom.h"
#include "opt_zstdio.h"
#include <sys/param.h>
#include <sys/bio.h>
#include <sys/endian.h>
@ -51,10 +54,11 @@ __FBSDID("$FreeBSD$");
#include <geom/uzip/g_uzip_dapi.h>
#include <geom/uzip/g_uzip_zlib.h>
#include <geom/uzip/g_uzip_lzma.h>
#ifdef ZSTDIO
#include <geom/uzip/g_uzip_zstd.h>
#endif
#include <geom/uzip/g_uzip_wrkthr.h>
#include "opt_geom.h"
MALLOC_DEFINE(M_GEOM_UZIP, "geom_uzip", "GEOM UZIP data structures");
FEATURE(geom_uzip, "GEOM read-only compressed disks support");
@ -594,7 +598,7 @@ g_uzip_parse_toc(struct g_uzip_softc *sc, struct g_provider *pp,
* block whose offset is larger than ours and assume
* it's going to be the next one.
*/
for (j = i + 1; j < sc->nblocks; j++) {
for (j = i + 1; j < sc->nblocks + 1; j++) {
if (sc->toc[j].offset > max_offset) {
break;
}
@ -664,8 +668,10 @@ g_uzip_taste(struct g_class *mp, struct g_provider *pp, int flags)
struct g_uzip_softc *sc;
enum {
G_UZIP = 1,
G_ULZMA
G_ULZMA,
G_ZSTD,
} type;
char cloop_version;
g_trace(G_T_TOPOLOGY, "%s(%s,%s)", __func__, mp->name, pp->name);
g_topology_assert();
@ -712,11 +718,12 @@ g_uzip_taste(struct g_class *mp, struct g_provider *pp, int flags)
goto e3;
}
cloop_version = header->magic[CLOOP_OFS_VERSN];
switch (header->magic[CLOOP_OFS_COMPR]) {
case CLOOP_COMP_LZMA:
case CLOOP_COMP_LZMA_DDP:
type = G_ULZMA;
if (header->magic[CLOOP_OFS_VERSN] < CLOOP_MINVER_LZMA) {
if (cloop_version < CLOOP_MINVER_LZMA) {
DPRINTF(GUZ_DBG_ERR, ("%s: image version too old\n",
gp->name));
goto e3;
@ -727,7 +734,7 @@ g_uzip_taste(struct g_class *mp, struct g_provider *pp, int flags)
case CLOOP_COMP_LIBZ:
case CLOOP_COMP_LIBZ_DDP:
type = G_UZIP;
if (header->magic[CLOOP_OFS_VERSN] < CLOOP_MINVER_ZLIB) {
if (cloop_version < CLOOP_MINVER_ZLIB) {
DPRINTF(GUZ_DBG_ERR, ("%s: image version too old\n",
gp->name));
goto e3;
@ -735,6 +742,24 @@ g_uzip_taste(struct g_class *mp, struct g_provider *pp, int flags)
DPRINTF(GUZ_DBG_INFO, ("%s: GEOM_UZIP_ZLIB image found\n",
gp->name));
break;
case CLOOP_COMP_ZSTD:
case CLOOP_COMP_ZSTD_DDP:
if (cloop_version < CLOOP_MINVER_ZSTD) {
DPRINTF(GUZ_DBG_ERR, ("%s: image version too old\n",
gp->name));
goto e3;
}
#ifdef ZSTDIO
DPRINTF(GUZ_DBG_INFO, ("%s: GEOM_UZIP_ZSTD image found.\n",
gp->name));
type = G_ZSTD;
#else
DPRINTF(GUZ_DBG_ERR, ("%s: GEOM_UZIP_ZSTD image found, but "
"this kernel was configured with Zstd disabled.\n",
gp->name));
goto e3;
#endif
break;
default:
DPRINTF(GUZ_DBG_ERR, ("%s: unsupported image type\n",
gp->name));
@ -774,6 +799,13 @@ g_uzip_taste(struct g_class *mp, struct g_provider *pp, int flags)
}
DPRINTF(GUZ_DBG_INFO, ("%s: %u offsets in the first sector\n",
gp->name, offsets_read));
/*
* The following invalidates the "header" pointer into the first
* block's "buf."
*/
header = NULL;
for (blk = 1; offsets_read < total_offsets; blk++) {
uint32_t nread;
@ -805,20 +837,41 @@ g_uzip_taste(struct g_class *mp, struct g_provider *pp, int flags)
goto e5;
}
if (type == G_UZIP) {
switch (type) {
case G_UZIP:
sc->dcp = g_uzip_zlib_ctor(sc->blksz);
} else {
break;
case G_ULZMA:
sc->dcp = g_uzip_lzma_ctor(sc->blksz);
}
if (sc->dcp == NULL) {
break;
#ifdef ZSTDIO
case G_ZSTD:
sc->dcp = g_uzip_zstd_ctor(sc->blksz);
break;
#endif
default:
goto e5;
}
/*
* "Fake" last+1 block, to make it easier for the TOC parser to
* iterate without making the last element a special case.
* The last+1 block was not always initialized by earlier versions of
* mkuzip(8). However, *if* it is initialized, the difference between
* its offset and the prior block's offset represents the length of the
* final real compressed block, and this is significant to the
* decompressor.
*/
if (cloop_version >= CLOOP_MINVER_RELIABLE_LASTBLKSZ &&
sc->toc[sc->nblocks].offset != 0) {
if (sc->toc[sc->nblocks].offset > pp->mediasize) {
DPRINTF(GUZ_DBG_ERR,
("%s: bogus n+1 offset %ju > mediasize %ju\n",
gp->name, (uintmax_t)sc->toc[sc->nblocks].offset,
(uintmax_t)pp->mediasize));
goto e6;
}
} else {
sc->toc[sc->nblocks].offset = pp->mediasize;
}
/* Massage TOC (table of contents), make sure it is sound */
if (g_uzip_parse_toc(sc, pp, gp) != 0) {
DPRINTF(GUZ_DBG_ERR, ("%s: TOC error\n", gp->name));

View File

@ -39,14 +39,20 @@
#define CLOOP_MAJVER_2 '2'
#define CLOOP_MAJVER_3 '3'
#define CLOOP_MAJVER_4 '4'
#define CLOOP_COMP_LIBZ 'V'
#define CLOOP_COMP_LIBZ_DDP 'v'
#define CLOOP_COMP_LZMA 'L'
#define CLOOP_COMP_LZMA_DDP 'l'
#define CLOOP_COMP_ZSTD 'Z'
#define CLOOP_COMP_ZSTD_DDP 'z'
#define CLOOP_MINVER_LZMA CLOOP_MAJVER_3
#define CLOOP_MINVER_ZLIB CLOOP_MAJVER_2
#define CLOOP_MINVER_ZSTD CLOOP_MAJVER_4
#define CLOOP_MINVER_RELIABLE_LASTBLKSZ CLOOP_MAJVER_4
struct cloop_header {
char magic[CLOOP_MAGIC_LEN]; /* cloop magic */

157
sys/geom/uzip/g_uzip_zstd.c Normal file
View File

@ -0,0 +1,157 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2019 Conrad Meyer <cem@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/errno.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/sysctl.h>
#include <geom/uzip/g_uzip.h>
#include <geom/uzip/g_uzip_dapi.h>
#include <geom/uzip/g_uzip_zstd.h>
/*
* We don't actually need any static-link ABI, just want to use "experimental"
* custom malloc/free APIs.
*/
#define ZSTD_STATIC_LINKING_ONLY
#include <contrib/zstd/lib/zstd.h>
FEATURE(geom_uzip_zstd, "g_uzip Zstd support");
struct g_uzip_zstd {
struct g_uzip_dapi guz_pub;
uint32_t guz_blksz;
ZSTD_DCtx *guz_dctx;
};
#ifndef container_of
#define container_of(ptr, type, member) \
({ \
const __typeof(((type *)0)->member) *__p = (ptr); \
(type *)((uintptr_t)__p - offsetof(type, member)); \
})
#endif
#define to_zstd_softc(zpp) container_of(zpp, struct g_uzip_zstd, guz_pub)
static int
guz_zstd_decompress(struct g_uzip_dapi *zpp, const char *gp_name, void *input,
size_t ilen, void *outputbuf)
{
struct g_uzip_zstd *sc;
size_t rc;
sc = to_zstd_softc(zpp);
rc = ZSTD_decompressDCtx(sc->guz_dctx, outputbuf, sc->guz_blksz, input,
ilen);
if (ZSTD_isError(rc)) {
printf("%s: UZIP(zstd) decompress failed: %s\n", gp_name,
ZSTD_getErrorName(rc));
return (EIO);
}
KASSERT(rc == sc->guz_blksz, ("%s: Expected %u bytes, got %zu",
__func__, sc->guz_blksz, rc));
return (0);
}
static void
guz_zstd_free(struct g_uzip_dapi *zpp)
{
struct g_uzip_zstd *sc;
size_t rc;
sc = to_zstd_softc(zpp);
rc = ZSTD_freeDCtx(sc->guz_dctx);
if (ZSTD_isError(rc))
printf("%s: UZIP(zstd) free failed: %s\n", __func__,
ZSTD_getErrorName(rc));
free(sc, M_GEOM_UZIP);
}
static int
guz_zstd_rewind(struct g_uzip_dapi *zpp, const char *gp_name)
{
struct g_uzip_zstd *sc;
size_t rc;
sc = to_zstd_softc(zpp);
rc = ZSTD_DCtx_reset(sc->guz_dctx, ZSTD_reset_session_and_parameters);
if (ZSTD_isError(rc)) {
printf("%s: UZIP(zstd) rewind failed: %s\n", gp_name,
ZSTD_getErrorName(rc));
return (EIO);
}
return (0);
}
static void *
zstd_alloc(void *opaque, size_t size)
{
return (malloc(size, opaque, M_WAITOK));
}
static void
zstd_free(void *opaque, void *address)
{
free(address, opaque);
}
static const ZSTD_customMem zstd_guz_alloc = {
.customAlloc = zstd_alloc,
.customFree = zstd_free,
.opaque = M_GEOM_UZIP,
};
struct g_uzip_dapi *
g_uzip_zstd_ctor(uint32_t blksz)
{
struct g_uzip_zstd *sc;
sc = malloc(sizeof(*sc), M_GEOM_UZIP, M_WAITOK | M_ZERO);
sc->guz_dctx = ZSTD_createDCtx_advanced(zstd_guz_alloc);
if (sc->guz_dctx == NULL) {
printf("%s: ZSTD_createDCtx_advanced failed\n", __func__);
free(sc, M_GEOM_UZIP);
return (NULL);
}
sc->guz_blksz = blksz;
sc->guz_pub.max_blen = ZSTD_compressBound(blksz);
sc->guz_pub.decompress = guz_zstd_decompress;
sc->guz_pub.free = guz_zstd_free;
sc->guz_pub.rewind = guz_zstd_rewind;
sc->guz_pub.pvt = NULL;
return (&sc->guz_pub);
}

View File

@ -0,0 +1,30 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2019 Conrad Meyer <cem@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
struct g_uzip_dapi *g_uzip_zstd_ctor(uint32_t);

View File

@ -6,12 +6,17 @@ KMOD= geom_uzip
SRCS= g_uzip.c g_uzip_zlib.c g_uzip_lzma.c g_uzip_wrkthr.c
SRCS+= g_uzip.h g_uzip_dapi.h g_uzip_lzma.h g_uzip_zlib.h g_uzip_softc.h \
g_uzip_wrkthr.h
#CFLAGS= -g -DINVARIANT_SUPPORT -DINVARIANTS
.PATH: ${SRCTOP}/sys/net
# This works for buildkernel, but will disable zstd in geom_uzip out of tree.
.if ${KERN_OPTS:MZSTDIO} != ""
SRCS+= g_uzip_zstd.c g_uzip_zstd.h
CFLAGS.g_uzip_zstd.c+= -I${SRCTOP}/sys/contrib/zstd/lib/freebsd
.endif
#CFLAGS= -g -DINVARIANT_SUPPORT -DINVARIANTS
CFLAGS.g_uzip_lzma.c+= -I${SRCTOP}/sys/contrib/xz-embedded/freebsd \
-I${SRCTOP}/sys/contrib/xz-embedded/linux/lib/xz/
SRCS+= opt_geom.h
SRCS+= opt_geom.h opt_zstdio.h
.include <bsd.kmod.mk>

View File

@ -3,10 +3,12 @@
PROG= mkuzip
MAN= mkuzip.8
SRCS= mkuzip.c mkuz_blockcache.c mkuz_lzma.c mkuz_zlib.c mkuz_conveyor.c \
mkuz_blk.c mkuz_fqueue.c mkuz_time.c mkuz_insize.c
mkuz_blk.c mkuz_fqueue.c mkuz_time.c mkuz_insize.c mkuz_zstd.c
CFLAGS+= -I${SRCTOP}/sys/contrib/zstd/lib
#CFLAGS+= -DMKUZ_DEBUG
LIBADD= z md lzma pthread
LIBADD= lzma md pthread z zstd
.include <bsd.prog.mk>

View File

@ -39,4 +39,6 @@ struct mkuz_cfg {
const char *iname;
off_t isize;
const struct mkuz_format *handler;
size_t cbound_blksz;
int comp_level;
};

View File

@ -39,9 +39,11 @@
#define CLOOP_MAJVER_2 '2'
#define CLOOP_MAJVER_3 '3'
#define CLOOP_MAJVER_4 '4'
#define CLOOP_COMP_LIBZ 'V'
#define CLOOP_COMP_LZMA 'L'
#define CLOOP_COMP_ZSTD 'Z'
struct cloop_header {
char magic[CLOOP_MAGIC_LEN]; /* cloop magic */

View File

@ -42,8 +42,8 @@ __FBSDID("$FreeBSD$");
#include "mkuz_conveyor.h"
#include "mkuz_cfg.h"
#include "mkuzip.h"
#include "mkuz_format.h"
#include "mkuz_blk.h"
#include "mkuz_format.h"
#include "mkuz_fqueue.h"
#include "mkuz_blk_chain.h"
@ -67,7 +67,7 @@ cworker(void *p)
cfp = cwp->cfp;
cvp = cwp->cvp;
free(cwp);
c_ctx = cfp->handler->f_init(cfp->blksz);
c_ctx = cfp->handler->f_init(&cfp->comp_level);
for (;;) {
iblk = mkuz_fqueue_deq(cvp->wrk_queue);
if (iblk == MKUZ_BLK_EOF) {
@ -80,7 +80,8 @@ cworker(void *p)
/* All zeroes block */
oblk = mkuz_blk_ctor(0);
} else {
oblk = cfp->handler->f_compress(c_ctx, iblk);
oblk = mkuz_blk_ctor(cfp->cbound_blksz);
cfp->handler->f_compress(c_ctx, iblk, oblk);
if (cfp->en_dedup != 0) {
compute_digest(oblk);
}

View File

@ -26,12 +26,15 @@
* $FreeBSD$
*/
DEFINE_RAW_METHOD(f_init, void *, uint32_t);
DEFINE_RAW_METHOD(f_compress, struct mkuz_blk *, void *, const struct mkuz_blk *);
DEFINE_RAW_METHOD(f_compress_bound, size_t, size_t);
DEFINE_RAW_METHOD(f_init, void *, int *);
DEFINE_RAW_METHOD(f_compress, void, void *, const struct mkuz_blk *, struct mkuz_blk *);
struct mkuz_format {
const char *option;
const char *magic;
const char *default_sufx;
f_compress_bound_t f_compress_bound;
f_init_t f_init;
f_compress_t f_compress;
};

View File

@ -35,61 +35,55 @@ __FBSDID("$FreeBSD$");
#include <lzma.h>
#include "mkuzip.h"
#include "mkuz_lzma.h"
#include "mkuz_blk.h"
#define USED_BLOCKSIZE DEV_BSIZE
#include "mkuz_lzma.h"
struct mkuz_lzma {
lzma_filter filters[2];
lzma_options_lzma opt_lzma;
lzma_stream strm;
uint32_t blksz;
};
static const lzma_stream lzma_stream_init = LZMA_STREAM_INIT;
size_t
mkuz_lzma_cbound(size_t blksz)
{
return (lzma_stream_buffer_bound(blksz));
}
void *
mkuz_lzma_init(uint32_t blksz)
mkuz_lzma_init(int *comp_level)
{
struct mkuz_lzma *ulp;
if (blksz % USED_BLOCKSIZE != 0) {
errx(1, "cluster size should be multiple of %d",
USED_BLOCKSIZE);
if (*comp_level == USE_DEFAULT_LEVEL)
*comp_level = LZMA_PRESET_DEFAULT;
if (*comp_level < 0 || *comp_level > 9)
errx(1, "provided compression level %d is invalid",
*comp_level);
/* Not reached */
}
if (blksz > MAXPHYS) {
errx(1, "cluster size is too large");
/* Not reached */
}
ulp = mkuz_safe_zmalloc(sizeof(struct mkuz_lzma));
/* Init lzma encoder */
ulp->strm = lzma_stream_init;
if (lzma_lzma_preset(&ulp->opt_lzma, LZMA_PRESET_DEFAULT))
ulp->strm = (lzma_stream)LZMA_STREAM_INIT;
if (lzma_lzma_preset(&ulp->opt_lzma, *comp_level))
errx(1, "Error loading LZMA preset");
ulp->filters[0].id = LZMA_FILTER_LZMA2;
ulp->filters[0].options = &ulp->opt_lzma;
ulp->filters[1].id = LZMA_VLI_UNKNOWN;
ulp->blksz = blksz;
return (void *)ulp;
}
struct mkuz_blk *
mkuz_lzma_compress(void *p, const struct mkuz_blk *iblk)
void
mkuz_lzma_compress(void *p, const struct mkuz_blk *iblk, struct mkuz_blk *oblk)
{
lzma_ret ret;
struct mkuz_blk *rval;
struct mkuz_lzma *ulp;
ulp = (struct mkuz_lzma *)p;
rval = mkuz_blk_ctor(ulp->blksz * 2);
ret = lzma_stream_encoder(&ulp->strm, ulp->filters, LZMA_CHECK_CRC32);
if (ret != LZMA_OK) {
if (ret == LZMA_MEMLIMIT_ERROR)
@ -99,23 +93,20 @@ mkuz_lzma_compress(void *p, const struct mkuz_blk *iblk)
}
ulp->strm.next_in = iblk->data;
ulp->strm.avail_in = ulp->blksz;
ulp->strm.next_out = rval->data;
ulp->strm.avail_out = rval->alen;
ulp->strm.avail_in = iblk->info.len;
ulp->strm.next_out = oblk->data;
ulp->strm.avail_out = oblk->alen;
ret = lzma_code(&ulp->strm, LZMA_FINISH);
if (ret != LZMA_STREAM_END) {
/* Error */
if (ret != LZMA_STREAM_END)
errx(1, "lzma_code FINISH failed, code=%d, pos(in=%zd, "
"out=%zd)", ret, (ulp->blksz - ulp->strm.avail_in),
(ulp->blksz * 2 - ulp->strm.avail_out));
}
"out=%zd)", ret, (iblk->info.len - ulp->strm.avail_in),
(oblk->alen - ulp->strm.avail_out));
#if 0
lzma_end(&ulp->strm);
#endif
rval->info.len = rval->alen - ulp->strm.avail_out;
return (rval);
oblk->info.len = oblk->alen - ulp->strm.avail_out;
}

View File

@ -38,5 +38,6 @@
"exit $?\n"
#define DEFAULT_SUFX_LZMA ".ulzma"
void *mkuz_lzma_init(uint32_t);
struct mkuz_blk *mkuz_lzma_compress(void *, const struct mkuz_blk *);
size_t mkuz_lzma_cbound(size_t);
void *mkuz_lzma_init(int *);
void mkuz_lzma_compress(void *, const struct mkuz_blk *, struct mkuz_blk *);

View File

@ -34,54 +34,51 @@ __FBSDID("$FreeBSD$");
#include <zlib.h>
#include "mkuzip.h"
#include "mkuz_zlib.h"
#include "mkuz_blk.h"
#include "mkuz_zlib.h"
struct mkuz_zlib {
uLongf oblen;
uint32_t blksz;
int comp_level;
};
size_t
mkuz_zlib_cbound(size_t blksz)
{
return (compressBound(blksz));
}
void *
mkuz_zlib_init(uint32_t blksz)
mkuz_zlib_init(int *comp_level)
{
struct mkuz_zlib *zp;
if (blksz % DEV_BSIZE != 0) {
errx(1, "cluster size should be multiple of %d",
DEV_BSIZE);
if (*comp_level == USE_DEFAULT_LEVEL)
*comp_level = Z_BEST_COMPRESSION;
if (*comp_level < Z_BEST_SPEED || *comp_level > Z_BEST_COMPRESSION)
errx(1, "provided compression level %d is invalid",
*comp_level);
/* Not reached */
}
if (compressBound(blksz) > MAXPHYS) {
errx(1, "cluster size is too large");
/* Not reached */
}
zp = mkuz_safe_zmalloc(sizeof(struct mkuz_zlib));
zp->oblen = compressBound(blksz);
zp->blksz = blksz;
zp->comp_level = *comp_level;
return (void *)zp;
return (zp);
}
struct mkuz_blk *
mkuz_zlib_compress(void *p, const struct mkuz_blk *iblk)
void
mkuz_zlib_compress(void *p, const struct mkuz_blk *iblk, struct mkuz_blk *oblk)
{
uLongf destlen_z;
struct mkuz_blk *rval;
struct mkuz_zlib *zp;
zp = (struct mkuz_zlib *)p;
rval = mkuz_blk_ctor(zp->oblen);
destlen_z = rval->alen;
if (compress2(rval->data, &destlen_z, iblk->data, zp->blksz,
Z_BEST_COMPRESSION) != Z_OK) {
errx(1, "can't compress data: compress2() "
"failed");
destlen_z = oblk->alen;
if (compress2(oblk->data, &destlen_z, iblk->data, iblk->info.len,
zp->comp_level) != Z_OK) {
errx(1, "can't compress data: compress2() failed");
/* Not reached */
}
rval->info.len = (uint32_t)destlen_z;
return (rval);
oblk->info.len = (uint32_t)destlen_z;
}

View File

@ -32,5 +32,6 @@
"(kldstat -qm g_uzip||kldload geom_uzip)>&-&&" \
"mount_cd9660 /dev/`mdconfig -af $0`.uzip $1\nexit $?\n"
void *mkuz_zlib_init(uint32_t);
struct mkuz_blk *mkuz_zlib_compress(void *, const struct mkuz_blk *);
size_t mkuz_zlib_cbound(size_t);
void *mkuz_zlib_init(int *);
void mkuz_zlib_compress(void *, const struct mkuz_blk *, struct mkuz_blk *);

View File

@ -0,0 +1,95 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2019 Conrad Meyer <cem@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <err.h>
#include <limits.h>
#include <stddef.h>
#include <stdint.h>
#include <zstd.h>
#include "mkuzip.h"
#include "mkuz_blk.h"
#include "mkuz_zstd.h"
size_t
mkuz_zstd_cbound(size_t blksz)
{
return (ZSTD_compressBound(blksz));
}
void *
mkuz_zstd_init(int *comp_level)
{
ZSTD_CCtx *cctx;
size_t rc;
/* Default chosen for near-parity with mkuzip zlib default. */
if (*comp_level == USE_DEFAULT_LEVEL)
*comp_level = 9;
if (*comp_level < ZSTD_minCLevel() || *comp_level == 0 ||
*comp_level > ZSTD_maxCLevel())
errx(1, "provided compression level %d is invalid",
*comp_level);
cctx = ZSTD_createCCtx();
if (cctx == NULL)
errx(1, "could not allocate Zstd context");
rc = ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel,
*comp_level);
if (ZSTD_isError(rc))
errx(1, "Could not set zstd compression level %d: %s",
*comp_level, ZSTD_getErrorName(rc));
rc = ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1);
if (ZSTD_isError(rc))
errx(1, "Could not enable zstd checksum: %s",
ZSTD_getErrorName(rc));
return (cctx);
}
void
mkuz_zstd_compress(void *p, const struct mkuz_blk *iblk, struct mkuz_blk *oblk)
{
ZSTD_CCtx *cctx;
size_t rc;
cctx = p;
rc = ZSTD_compress2(cctx, oblk->data, oblk->alen, iblk->data,
iblk->info.len);
if (ZSTD_isError(rc))
errx(1, "could not compress data: ZSTD_compress2: %s",
ZSTD_getErrorName(rc));
oblk->info.len = rc;
}

View File

@ -0,0 +1,38 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2019 Conrad Meyer <cem@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#define DEFAULT_SUFX_ZSTD ".uzst"
#define CLOOP_MAGIC_ZSTD "#!/bin/sh\n#Z4.0 Format\n" \
"(kldstat -qm g_uzip||kldload geom_uzip)>&-&&" \
"mount_cd9660 /dev/`mdconfig -af $0`.uzip $1\nexit $?\n"
size_t mkuz_zstd_cbound(size_t);
void *mkuz_zstd_init(int *);
void mkuz_zstd_compress(void *, const struct mkuz_blk *, struct mkuz_blk *);

View File

@ -25,7 +25,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd February 19, 2019
.Dd August 9, 2019
.Dt MKUZIP 8
.Os
.Sh NAME
@ -35,7 +35,9 @@
class
.Sh SYNOPSIS
.Nm
.Op Fl dLSsvZ
.Op Fl dSsvZ
.Op Fl A Ar compression_algorithm
.Op Fl C Ar compression_level
.Op Fl j Ar compression_jobs
.Op Fl o Ar outfile
.Op Fl s Ar cluster_size
@ -57,17 +59,82 @@ works in two phases:
.It
An
.Ar infile
image is split into clusters; each cluster is compressed using
.Xr zlib 3
or
.Xr lzma 3 .
image is split into clusters; each cluster is compressed.
.It
The resulting set of compressed clusters along with headers that allow
locating each individual cluster is written to the output file.
The resulting set of compressed clusters is written to the output file.
In addition, a
.Dq table of contents
header is written which allows for efficient seeking.
.El
.Pp
The options are:
.Bl -tag -width indent
.It Fl A Op Ar lzma | Ar zlib | Ar zstd
Select a specific compression algorithm.
If this option is not provided, the default is
.Ar zlib .
.Pp
The
.Ar lzma
algorithm provides noticeable better compression levels than zlib on the same
data set.
It has vastly slower compression speed and moderately slower decompression
speed.
.Pp
The
.Ar zstd
algorithm provides better compression levels than zlib on the same data set.
It also has faster compression and decompression speed than zlib.
In the very high compression
.Dq level
settings, it does not offer quite as high a compression ratio as
.Ar lzma .
However, its decompression speed does not suffer at high compression
.Dq levels .
.It Fl C Ar compression_level
Select the integer compression level used to parameterize the chosen
compression algorithm.
.Pp
For any given algorithm, a lesser number selects a faster compression mode.
A greater number selects a slower compression mode.
Typically, for the same algorithm, a greater
.Ar compression_level
provides better final compression ratio.
.Pp
For
.Ar lzma ,
the range of valid compression levels is
.Va 0-9 .
The
.Nm
default for lzma is
.Va 6 .
.Pp
For
.Ar zlib ,
the range of valid compression levels is
.Va 1-9 .
The
.Nm
default for zlib is
.Va 9 .
.Pp
For
.Ar zstd ,
the range of valid compression levels is currently
.Va 1-19 .
The
.Nm
default for zstd is
.Va 9 .
.It Fl d
Enable de-duplication.
When the option is enabled
.Nm
detects identical blocks in the input and replaces each subsequent occurrence
of such block with pointer to the very first one in the output.
Setting this option results is moderate decrease of compressed image size,
typically around 3-5% of a final size of the compressed image.
.It Fl j Ar compression_jobs
Specify the number of compression jobs that
.Nm
@ -77,24 +144,9 @@ to the value of
.Va hw.ncpu
.Xr sysctl 8
variable.
.It Fl d
Enable de-duplication.
When the option is enabled the
.Nm
detects identical blocks in the input and replaces each subsequent occurence
of such block with pointer to the very first one in the output.
Setting this option results is moderate decrease of compressed image size,
typically around 3-5% of a final size of the compressed image.
.It Fl L
Use
.Xr lzma 3
compression algorithm instead of the default
.Xr zlib 3 .
The
.Xr lzma 3
provides noticeable better compression levels on the same data set
at the expense of much slower compression speed (10-20x) and somewhat slower
decompression (2-3x).
.It Op Fl L
Legacy flag that indicates the same thing as
.Dq Fl A Ar lzma .
.It Fl o Ar outfile
Name of the output file
.Ar outfile .
@ -119,33 +171,44 @@ should be a multiple of 512 bytes.
.It Fl v
Display verbose messages.
.It Fl Z
Disable zero-blocks detection and elimination.
When this option is set, the
Disable zero-block detection and elimination.
When this option is set,
.Nm
would compress empty blocks (i.e. clusters that consist of only zero bytes)
just as it would any other block.
When the option is not set, the
compresses blocks of zero bytes just as it would any other block.
When the option is not set,
.Nm
detects such blocks and skips them from the output.
detects and compresses zero blocks in a space-efficient way.
Setting
.Fl Z
results is slight increase of compressed image size, typically less than 0.1%
of a final size of the compressed image.
increases compressed image sizes slightly, typically less than 0.1%.
.El
.Sh NOTES
The compression ratio largely depends on the cluster size used.
.\" The following two sentences are unclear: how can gzip(1) be
.\" used in a comparable fashion, and wouldn't a gzip-compressed
.\" image suffer from larger cluster sizes as well?
For large cluster sizes (16K and higher), typical compression ratios
.Sh IMPLEMENTATION NOTES
The compression ratio largely depends on the compression algorithm, level, and
cluster size used.
For large cluster sizes (16kB and higher), typical overall image compression
ratios with
.Xr zlib 3
are only 1-2% less than those achieved with
.Xr gzip 1 .
However, it should be kept in mind that larger cluster
sizes lead to higher overhead in the
.Xr gzip 1
over the entire image.
However, it should be kept in mind that larger cluster sizes lead to higher
overhead in the
.Xr geom_uzip 4
class, as the class has to decompress the whole cluster even if
only a few bytes from that cluster have to be read.
.Pp
Additionally, the threshold at 16-32 kB where a larger cluster size does not
benefit overall compression ratio is an artifact of the
.Xr zlib 3
algorithm in particular.
.Ar Lzma
and
.Ar Zstd will continue to provide better compression ratios as cluster sizes
are increased, at high enough compression levels.
The same tradeoff continues to apply: reads in
.Xr geom_uzip 4
become more expensive the greater the cluster size.
.Pp
The
.Nm
utility
@ -169,12 +232,27 @@ specific feature and while it does not require any changes to on-disk
compressed image format, however it did require some matching changes to the
.Xr geom_uzip 4
to handle resulting images correctly.
.Pp
To make use of
.Ar zstd
.Nm
images, the kernel must be configured with
.Cd ZSTDIO .
It is enabled by default in many
.Cd GENERIC
kernels provided as binary distributions by
.Fx .
The status on any particular system can be verified by checking
.Xr sysctl 8
.Dv kern.features.geom_uzip_zstd
for
.Dq 1 .
.Sh EXIT STATUS
.Ex -std
.Sh SEE ALSO
.Xr gzip 1 ,
.Xr xz 1 ,
.Xr lzma 3 ,
.Xr zstd 1 ,
.Xr zlib 3 ,
.Xr geom 4 ,
.Xr geom_uzip 4 ,

View File

@ -51,8 +51,9 @@ __FBSDID("$FreeBSD$");
#include "mkuzip.h"
#include "mkuz_cloop.h"
#include "mkuz_blockcache.h"
#include "mkuz_zlib.h"
#include "mkuz_lzma.h"
#include "mkuz_zlib.h"
#include "mkuz_zstd.h"
#include "mkuz_blk.h"
#include "mkuz_cfg.h"
#include "mkuz_conveyor.h"
@ -63,18 +64,38 @@ __FBSDID("$FreeBSD$");
#define DEFAULT_CLSTSIZE 16384
static struct mkuz_format uzip_fmt = {
.magic = CLOOP_MAGIC_ZLIB,
.default_sufx = DEFAULT_SUFX_ZLIB,
.f_init = &mkuz_zlib_init,
.f_compress = &mkuz_zlib_compress
enum UZ_ALGORITHM {
UZ_ZLIB = 0,
UZ_LZMA,
UZ_ZSTD,
UZ_INVALID
};
static struct mkuz_format ulzma_fmt = {
static const struct mkuz_format uzip_fmts[] = {
[UZ_ZLIB] = {
.option = "zlib",
.magic = CLOOP_MAGIC_ZLIB,
.default_sufx = DEFAULT_SUFX_ZLIB,
.f_compress_bound = mkuz_zlib_cbound,
.f_init = mkuz_zlib_init,
.f_compress = mkuz_zlib_compress,
},
[UZ_LZMA] = {
.option = "lzma",
.magic = CLOOP_MAGIC_LZMA,
.default_sufx = DEFAULT_SUFX_LZMA,
.f_init = &mkuz_lzma_init,
.f_compress = &mkuz_lzma_compress
.f_compress_bound = mkuz_lzma_cbound,
.f_init = mkuz_lzma_init,
.f_compress = mkuz_lzma_compress,
},
[UZ_ZSTD] = {
.option = "zstd",
.magic = CLOOP_MAGIC_ZSTD,
.default_sufx = DEFAULT_SUFX_ZSTD,
.f_compress_bound = mkuz_zstd_cbound,
.f_init = mkuz_zstd_init,
.f_compress = mkuz_zstd_compress,
},
};
static struct mkuz_blk *readblock(int, u_int32_t);
@ -111,6 +132,8 @@ int main(int argc, char **argv)
struct mkuz_blk_info *chit;
size_t ncpusz, ncpu, magiclen;
double st, et;
enum UZ_ALGORITHM comp_alg;
int comp_level;
st = getdtime();
@ -129,12 +152,27 @@ int main(int argc, char **argv)
cfs.en_dedup = 0;
summary.en = 0;
summary.f = stderr;
cfs.handler = &uzip_fmt;
comp_alg = UZ_ZLIB;
comp_level = USE_DEFAULT_LEVEL;
cfs.nworkers = ncpu;
struct mkuz_blk *iblk, *oblk;
while((opt = getopt(argc, argv, "o:s:vZdLSj:")) != -1) {
while((opt = getopt(argc, argv, "A:C:o:s:vZdLSj:")) != -1) {
switch(opt) {
case 'A':
for (tmp = UZ_ZLIB; tmp < UZ_INVALID; tmp++) {
if (strcmp(uzip_fmts[tmp].option, optarg) == 0)
break;
}
if (tmp == UZ_INVALID)
errx(1, "invalid algorithm specified: %s",
optarg);
/* Not reached */
comp_alg = tmp;
break;
case 'C':
comp_level = atoi(optarg);
break;
case 'o':
oname = optarg;
break;
@ -162,7 +200,7 @@ int main(int argc, char **argv)
break;
case 'L':
cfs.handler = &ulzma_fmt;
comp_alg = UZ_LZMA;
break;
case 'S':
@ -193,16 +231,32 @@ int main(int argc, char **argv)
/* Not reached */
}
cfs.handler = &uzip_fmts[comp_alg];
magiclen = strlcpy(hdr.magic, cfs.handler->magic, sizeof(hdr.magic));
assert(magiclen < sizeof(hdr.magic));
if (cfs.en_dedup != 0) {
/*
* Dedupe requires a version 3 format. Don't downgrade newer
* formats.
*/
if (hdr.magic[CLOOP_OFS_VERSN] == CLOOP_MAJVER_2)
hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3;
hdr.magic[CLOOP_OFS_COMPR] =
tolower(hdr.magic[CLOOP_OFS_COMPR]);
}
c_ctx = cfs.handler->f_init(cfs.blksz);
if (cfs.blksz % DEV_BSIZE != 0)
errx(1, "cluster size should be multiple of %d", DEV_BSIZE);
cfs.cbound_blksz = cfs.handler->f_compress_bound(cfs.blksz);
if (cfs.cbound_blksz > MAXPHYS)
errx(1, "maximal compressed cluster size %zu greater than MAXPHYS %zu",
cfs.cbound_blksz, (size_t)MAXPHYS);
c_ctx = cfs.handler->f_init(&comp_level);
cfs.comp_level = comp_level;
cfs.iname = argv[0];
if (oname == NULL) {
@ -239,6 +293,14 @@ int main(int argc, char **argv)
}
toc = mkuz_safe_malloc((hdr.nblocks + 1) * sizeof(*toc));
/*
* Initialize last+1 entry with non-heap trash. If final padding is
* added later, it may or may not be overwritten with an offset
* representing the length of the final compressed block. If not,
* initialize to a defined value.
*/
toc[hdr.nblocks] = 0;
cfs.fdw = open(oname, (cfs.en_dedup ? O_RDWR : O_WRONLY) | O_TRUNC | O_CREAT,
S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
if (cfs.fdw < 0) {

View File

@ -28,6 +28,9 @@
#define DEFINE_RAW_METHOD(func, rval, args...) typedef rval (*func##_t)(args)
/* Use an algorithm-specific default level if no explicit level is selected. */
#define USE_DEFAULT_LEVEL INT_MIN
void *mkuz_safe_malloc(size_t);
void *mkuz_safe_zmalloc(size_t);
int mkuz_memvcmp(const void *, unsigned char, size_t);