geom_uzip(4), mkuzip(8): Add Zstd image mode

The Zstd format bumps the CLOOP major number to 4 to avoid incompatibility
with older systems.  Support in geom_uzip(4) is conditional on the ZSTDIO
kernel option, which is enabled in amd64 GENERIC, but not all in-tree
configurations.

mkuzip(8) was modified slightly to always initialize the nblocks + 1'th
offset in the CLOOP file format.  Previously, it was only initialized in the
case where the final compressed block happened to be unaligned w.r.t.
DEV_BSIZE.  The "Fake" last+1 block change in r298619 means that the final
compressed block's 'blen' was never correct unless the compressed uzip image
happened to be BSIZE-aligned.  This happened in about 1 out of every 512
cases.  The zlib and lzma decompressors are probably tolerant of extra trash
following the frame they were told to decode, but Zstd complains that the
input size is incorrect.

Correspondingly, geom_uzip(4) was modified slightly to avoid trashing the
nblocks + 1'th offset when it is known to be initialized to a good value.
This corrects the calculated final real cluster compressed length to match
that printed by mkuzip(8).

mkuzip(8) was refactored somewhat to reduce code duplication and increase
ease of adding other compression formats.

  * Input block size validation was pulled out of individual compression
    init routines into main().

  * Init routines now validate a user-provided compression level or select
    an algorithm-specific default, if none was provided.

  * A new interface for calculating the maximal compressed size of an
    incompressible input block was added for each driver.  The generic code
    uses it to validate against MAXPHYS as well as to allocate compression
    result buffers in the generic code.

  * Algorithm selection is now driven by a table lookup, to increase ease of
    adding other formats in the future.

mkuzip(8) gained the ability to explicitly specify a compression level with
'-C'.  The prior defaults -- 9 for zlib and 6 for lzma -- are maintained.
The new zstd default is 9, to match zlib.

Rather than select lzma or zlib with '-L' or its absense, respectively, a
new argument '-A <algorithm>' is provided to select 'zlib', 'lzma', or
'zstd'.  '-L' is considered deprecated, but will probably never be removed.

All of the new features were documented in mkuzip.8; the page was also
cleaned up slightly.

Relnotes:	yes
This commit is contained in:
Conrad Meyer 2019-08-13 23:32:56 +00:00
parent 443b0ad786
commit eefd8f96fb
22 changed files with 684 additions and 151 deletions

View File

@ -25,7 +25,7 @@
.\" .\"
.\" $FreeBSD$ .\" $FreeBSD$
.\" .\"
.Dd February 26, 2019 .Dd August 13, 2019
.Dt GEOM_UZIP 4 .Dt GEOM_UZIP 4
.Os .Os
.Sh NAME .Sh NAME
@ -37,6 +37,7 @@ place the following line in your
kernel configuration file: kernel configuration file:
.Bd -ragged -offset indent .Bd -ragged -offset indent
.Cd "device xz" .Cd "device xz"
.Cd "options zstd"
.Cd "options GEOM_UZIP" .Cd "options GEOM_UZIP"
.Ed .Ed
.Pp .Pp
@ -163,6 +164,9 @@ Log operations involving compressed cluster number.
.Xr md 4 , .Xr md 4 ,
.Xr geom 8 , .Xr geom 8 ,
.Xr mkuzip 8 .Xr mkuzip 8
.Sh HISTORY
Zstd support was added in
.Fx 13.0 .
.Sh AUTHORS .Sh AUTHORS
.An -nosplit .An -nosplit
The The

View File

@ -2853,8 +2853,8 @@ options IMAGACT_BINMISC
# This enables support for compressed core dumps. # This enables support for compressed core dumps.
options GZIO options GZIO
# zstd I/O stream support # zstd support
# This enables support for Zstd compressed core dumps. # This enables support for Zstd compressed core dumps and GEOM_UZIP images.
options ZSTDIO options ZSTDIO
# BHND(4) drivers # BHND(4) drivers

View File

@ -3627,6 +3627,8 @@ geom/uzip/g_uzip.c optional geom_uzip
geom/uzip/g_uzip_lzma.c optional geom_uzip geom/uzip/g_uzip_lzma.c optional geom_uzip
geom/uzip/g_uzip_wrkthr.c optional geom_uzip geom/uzip/g_uzip_wrkthr.c optional geom_uzip
geom/uzip/g_uzip_zlib.c optional geom_uzip geom/uzip/g_uzip_zlib.c optional geom_uzip
geom/uzip/g_uzip_zstd.c optional geom_uzip zstdio \
compile-with "${NORMAL_C} -I$S/contrib/zstd/lib/freebsd"
geom/vinum/geom_vinum.c optional geom_vinum geom/vinum/geom_vinum.c optional geom_vinum
geom/vinum/geom_vinum_create.c optional geom_vinum geom/vinum/geom_vinum_create.c optional geom_vinum
geom/vinum/geom_vinum_drive.c optional geom_vinum geom/vinum/geom_vinum_drive.c optional geom_vinum

View File

@ -31,6 +31,9 @@
#include <sys/cdefs.h> #include <sys/cdefs.h>
__FBSDID("$FreeBSD$"); __FBSDID("$FreeBSD$");
#include "opt_geom.h"
#include "opt_zstdio.h"
#include <sys/param.h> #include <sys/param.h>
#include <sys/bio.h> #include <sys/bio.h>
#include <sys/endian.h> #include <sys/endian.h>
@ -51,10 +54,11 @@ __FBSDID("$FreeBSD$");
#include <geom/uzip/g_uzip_dapi.h> #include <geom/uzip/g_uzip_dapi.h>
#include <geom/uzip/g_uzip_zlib.h> #include <geom/uzip/g_uzip_zlib.h>
#include <geom/uzip/g_uzip_lzma.h> #include <geom/uzip/g_uzip_lzma.h>
#ifdef ZSTDIO
#include <geom/uzip/g_uzip_zstd.h>
#endif
#include <geom/uzip/g_uzip_wrkthr.h> #include <geom/uzip/g_uzip_wrkthr.h>
#include "opt_geom.h"
MALLOC_DEFINE(M_GEOM_UZIP, "geom_uzip", "GEOM UZIP data structures"); MALLOC_DEFINE(M_GEOM_UZIP, "geom_uzip", "GEOM UZIP data structures");
FEATURE(geom_uzip, "GEOM read-only compressed disks support"); FEATURE(geom_uzip, "GEOM read-only compressed disks support");
@ -594,7 +598,7 @@ g_uzip_parse_toc(struct g_uzip_softc *sc, struct g_provider *pp,
* block whose offset is larger than ours and assume * block whose offset is larger than ours and assume
* it's going to be the next one. * it's going to be the next one.
*/ */
for (j = i + 1; j < sc->nblocks; j++) { for (j = i + 1; j < sc->nblocks + 1; j++) {
if (sc->toc[j].offset > max_offset) { if (sc->toc[j].offset > max_offset) {
break; break;
} }
@ -664,8 +668,10 @@ g_uzip_taste(struct g_class *mp, struct g_provider *pp, int flags)
struct g_uzip_softc *sc; struct g_uzip_softc *sc;
enum { enum {
G_UZIP = 1, G_UZIP = 1,
G_ULZMA G_ULZMA,
G_ZSTD,
} type; } type;
char cloop_version;
g_trace(G_T_TOPOLOGY, "%s(%s,%s)", __func__, mp->name, pp->name); g_trace(G_T_TOPOLOGY, "%s(%s,%s)", __func__, mp->name, pp->name);
g_topology_assert(); g_topology_assert();
@ -712,11 +718,12 @@ g_uzip_taste(struct g_class *mp, struct g_provider *pp, int flags)
goto e3; goto e3;
} }
cloop_version = header->magic[CLOOP_OFS_VERSN];
switch (header->magic[CLOOP_OFS_COMPR]) { switch (header->magic[CLOOP_OFS_COMPR]) {
case CLOOP_COMP_LZMA: case CLOOP_COMP_LZMA:
case CLOOP_COMP_LZMA_DDP: case CLOOP_COMP_LZMA_DDP:
type = G_ULZMA; type = G_ULZMA;
if (header->magic[CLOOP_OFS_VERSN] < CLOOP_MINVER_LZMA) { if (cloop_version < CLOOP_MINVER_LZMA) {
DPRINTF(GUZ_DBG_ERR, ("%s: image version too old\n", DPRINTF(GUZ_DBG_ERR, ("%s: image version too old\n",
gp->name)); gp->name));
goto e3; goto e3;
@ -727,7 +734,7 @@ g_uzip_taste(struct g_class *mp, struct g_provider *pp, int flags)
case CLOOP_COMP_LIBZ: case CLOOP_COMP_LIBZ:
case CLOOP_COMP_LIBZ_DDP: case CLOOP_COMP_LIBZ_DDP:
type = G_UZIP; type = G_UZIP;
if (header->magic[CLOOP_OFS_VERSN] < CLOOP_MINVER_ZLIB) { if (cloop_version < CLOOP_MINVER_ZLIB) {
DPRINTF(GUZ_DBG_ERR, ("%s: image version too old\n", DPRINTF(GUZ_DBG_ERR, ("%s: image version too old\n",
gp->name)); gp->name));
goto e3; goto e3;
@ -735,6 +742,24 @@ g_uzip_taste(struct g_class *mp, struct g_provider *pp, int flags)
DPRINTF(GUZ_DBG_INFO, ("%s: GEOM_UZIP_ZLIB image found\n", DPRINTF(GUZ_DBG_INFO, ("%s: GEOM_UZIP_ZLIB image found\n",
gp->name)); gp->name));
break; break;
case CLOOP_COMP_ZSTD:
case CLOOP_COMP_ZSTD_DDP:
if (cloop_version < CLOOP_MINVER_ZSTD) {
DPRINTF(GUZ_DBG_ERR, ("%s: image version too old\n",
gp->name));
goto e3;
}
#ifdef ZSTDIO
DPRINTF(GUZ_DBG_INFO, ("%s: GEOM_UZIP_ZSTD image found.\n",
gp->name));
type = G_ZSTD;
#else
DPRINTF(GUZ_DBG_ERR, ("%s: GEOM_UZIP_ZSTD image found, but "
"this kernel was configured with Zstd disabled.\n",
gp->name));
goto e3;
#endif
break;
default: default:
DPRINTF(GUZ_DBG_ERR, ("%s: unsupported image type\n", DPRINTF(GUZ_DBG_ERR, ("%s: unsupported image type\n",
gp->name)); gp->name));
@ -774,6 +799,13 @@ g_uzip_taste(struct g_class *mp, struct g_provider *pp, int flags)
} }
DPRINTF(GUZ_DBG_INFO, ("%s: %u offsets in the first sector\n", DPRINTF(GUZ_DBG_INFO, ("%s: %u offsets in the first sector\n",
gp->name, offsets_read)); gp->name, offsets_read));
/*
* The following invalidates the "header" pointer into the first
* block's "buf."
*/
header = NULL;
for (blk = 1; offsets_read < total_offsets; blk++) { for (blk = 1; offsets_read < total_offsets; blk++) {
uint32_t nread; uint32_t nread;
@ -805,20 +837,41 @@ g_uzip_taste(struct g_class *mp, struct g_provider *pp, int flags)
goto e5; goto e5;
} }
if (type == G_UZIP) { switch (type) {
case G_UZIP:
sc->dcp = g_uzip_zlib_ctor(sc->blksz); sc->dcp = g_uzip_zlib_ctor(sc->blksz);
} else { break;
case G_ULZMA:
sc->dcp = g_uzip_lzma_ctor(sc->blksz); sc->dcp = g_uzip_lzma_ctor(sc->blksz);
} break;
if (sc->dcp == NULL) { #ifdef ZSTDIO
case G_ZSTD:
sc->dcp = g_uzip_zstd_ctor(sc->blksz);
break;
#endif
default:
goto e5; goto e5;
} }
/* /*
* "Fake" last+1 block, to make it easier for the TOC parser to * The last+1 block was not always initialized by earlier versions of
* iterate without making the last element a special case. * mkuzip(8). However, *if* it is initialized, the difference between
* its offset and the prior block's offset represents the length of the
* final real compressed block, and this is significant to the
* decompressor.
*/ */
sc->toc[sc->nblocks].offset = pp->mediasize; if (cloop_version >= CLOOP_MINVER_RELIABLE_LASTBLKSZ &&
sc->toc[sc->nblocks].offset != 0) {
if (sc->toc[sc->nblocks].offset > pp->mediasize) {
DPRINTF(GUZ_DBG_ERR,
("%s: bogus n+1 offset %ju > mediasize %ju\n",
gp->name, (uintmax_t)sc->toc[sc->nblocks].offset,
(uintmax_t)pp->mediasize));
goto e6;
}
} else {
sc->toc[sc->nblocks].offset = pp->mediasize;
}
/* Massage TOC (table of contents), make sure it is sound */ /* Massage TOC (table of contents), make sure it is sound */
if (g_uzip_parse_toc(sc, pp, gp) != 0) { if (g_uzip_parse_toc(sc, pp, gp) != 0) {
DPRINTF(GUZ_DBG_ERR, ("%s: TOC error\n", gp->name)); DPRINTF(GUZ_DBG_ERR, ("%s: TOC error\n", gp->name));

View File

@ -39,14 +39,20 @@
#define CLOOP_MAJVER_2 '2' #define CLOOP_MAJVER_2 '2'
#define CLOOP_MAJVER_3 '3' #define CLOOP_MAJVER_3 '3'
#define CLOOP_MAJVER_4 '4'
#define CLOOP_COMP_LIBZ 'V' #define CLOOP_COMP_LIBZ 'V'
#define CLOOP_COMP_LIBZ_DDP 'v' #define CLOOP_COMP_LIBZ_DDP 'v'
#define CLOOP_COMP_LZMA 'L' #define CLOOP_COMP_LZMA 'L'
#define CLOOP_COMP_LZMA_DDP 'l' #define CLOOP_COMP_LZMA_DDP 'l'
#define CLOOP_COMP_ZSTD 'Z'
#define CLOOP_COMP_ZSTD_DDP 'z'
#define CLOOP_MINVER_LZMA CLOOP_MAJVER_3 #define CLOOP_MINVER_LZMA CLOOP_MAJVER_3
#define CLOOP_MINVER_ZLIB CLOOP_MAJVER_2 #define CLOOP_MINVER_ZLIB CLOOP_MAJVER_2
#define CLOOP_MINVER_ZSTD CLOOP_MAJVER_4
#define CLOOP_MINVER_RELIABLE_LASTBLKSZ CLOOP_MAJVER_4
struct cloop_header { struct cloop_header {
char magic[CLOOP_MAGIC_LEN]; /* cloop magic */ char magic[CLOOP_MAGIC_LEN]; /* cloop magic */

157
sys/geom/uzip/g_uzip_zstd.c Normal file
View File

@ -0,0 +1,157 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2019 Conrad Meyer <cem@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/errno.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/sysctl.h>
#include <geom/uzip/g_uzip.h>
#include <geom/uzip/g_uzip_dapi.h>
#include <geom/uzip/g_uzip_zstd.h>
/*
* We don't actually need any static-link ABI, just want to use "experimental"
* custom malloc/free APIs.
*/
#define ZSTD_STATIC_LINKING_ONLY
#include <contrib/zstd/lib/zstd.h>
FEATURE(geom_uzip_zstd, "g_uzip Zstd support");
struct g_uzip_zstd {
struct g_uzip_dapi guz_pub;
uint32_t guz_blksz;
ZSTD_DCtx *guz_dctx;
};
#ifndef container_of
#define container_of(ptr, type, member) \
({ \
const __typeof(((type *)0)->member) *__p = (ptr); \
(type *)((uintptr_t)__p - offsetof(type, member)); \
})
#endif
#define to_zstd_softc(zpp) container_of(zpp, struct g_uzip_zstd, guz_pub)
static int
guz_zstd_decompress(struct g_uzip_dapi *zpp, const char *gp_name, void *input,
size_t ilen, void *outputbuf)
{
struct g_uzip_zstd *sc;
size_t rc;
sc = to_zstd_softc(zpp);
rc = ZSTD_decompressDCtx(sc->guz_dctx, outputbuf, sc->guz_blksz, input,
ilen);
if (ZSTD_isError(rc)) {
printf("%s: UZIP(zstd) decompress failed: %s\n", gp_name,
ZSTD_getErrorName(rc));
return (EIO);
}
KASSERT(rc == sc->guz_blksz, ("%s: Expected %u bytes, got %zu",
__func__, sc->guz_blksz, rc));
return (0);
}
static void
guz_zstd_free(struct g_uzip_dapi *zpp)
{
struct g_uzip_zstd *sc;
size_t rc;
sc = to_zstd_softc(zpp);
rc = ZSTD_freeDCtx(sc->guz_dctx);
if (ZSTD_isError(rc))
printf("%s: UZIP(zstd) free failed: %s\n", __func__,
ZSTD_getErrorName(rc));
free(sc, M_GEOM_UZIP);
}
static int
guz_zstd_rewind(struct g_uzip_dapi *zpp, const char *gp_name)
{
struct g_uzip_zstd *sc;
size_t rc;
sc = to_zstd_softc(zpp);
rc = ZSTD_DCtx_reset(sc->guz_dctx, ZSTD_reset_session_and_parameters);
if (ZSTD_isError(rc)) {
printf("%s: UZIP(zstd) rewind failed: %s\n", gp_name,
ZSTD_getErrorName(rc));
return (EIO);
}
return (0);
}
static void *
zstd_alloc(void *opaque, size_t size)
{
return (malloc(size, opaque, M_WAITOK));
}
static void
zstd_free(void *opaque, void *address)
{
free(address, opaque);
}
static const ZSTD_customMem zstd_guz_alloc = {
.customAlloc = zstd_alloc,
.customFree = zstd_free,
.opaque = M_GEOM_UZIP,
};
struct g_uzip_dapi *
g_uzip_zstd_ctor(uint32_t blksz)
{
struct g_uzip_zstd *sc;
sc = malloc(sizeof(*sc), M_GEOM_UZIP, M_WAITOK | M_ZERO);
sc->guz_dctx = ZSTD_createDCtx_advanced(zstd_guz_alloc);
if (sc->guz_dctx == NULL) {
printf("%s: ZSTD_createDCtx_advanced failed\n", __func__);
free(sc, M_GEOM_UZIP);
return (NULL);
}
sc->guz_blksz = blksz;
sc->guz_pub.max_blen = ZSTD_compressBound(blksz);
sc->guz_pub.decompress = guz_zstd_decompress;
sc->guz_pub.free = guz_zstd_free;
sc->guz_pub.rewind = guz_zstd_rewind;
sc->guz_pub.pvt = NULL;
return (&sc->guz_pub);
}

View File

@ -0,0 +1,30 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2019 Conrad Meyer <cem@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
struct g_uzip_dapi *g_uzip_zstd_ctor(uint32_t);

View File

@ -6,12 +6,17 @@ KMOD= geom_uzip
SRCS= g_uzip.c g_uzip_zlib.c g_uzip_lzma.c g_uzip_wrkthr.c SRCS= g_uzip.c g_uzip_zlib.c g_uzip_lzma.c g_uzip_wrkthr.c
SRCS+= g_uzip.h g_uzip_dapi.h g_uzip_lzma.h g_uzip_zlib.h g_uzip_softc.h \ SRCS+= g_uzip.h g_uzip_dapi.h g_uzip_lzma.h g_uzip_zlib.h g_uzip_softc.h \
g_uzip_wrkthr.h g_uzip_wrkthr.h
#CFLAGS= -g -DINVARIANT_SUPPORT -DINVARIANTS
.PATH: ${SRCTOP}/sys/net # This works for buildkernel, but will disable zstd in geom_uzip out of tree.
.if ${KERN_OPTS:MZSTDIO} != ""
SRCS+= g_uzip_zstd.c g_uzip_zstd.h
CFLAGS.g_uzip_zstd.c+= -I${SRCTOP}/sys/contrib/zstd/lib/freebsd
.endif
#CFLAGS= -g -DINVARIANT_SUPPORT -DINVARIANTS
CFLAGS.g_uzip_lzma.c+= -I${SRCTOP}/sys/contrib/xz-embedded/freebsd \ CFLAGS.g_uzip_lzma.c+= -I${SRCTOP}/sys/contrib/xz-embedded/freebsd \
-I${SRCTOP}/sys/contrib/xz-embedded/linux/lib/xz/ -I${SRCTOP}/sys/contrib/xz-embedded/linux/lib/xz/
SRCS+= opt_geom.h SRCS+= opt_geom.h opt_zstdio.h
.include <bsd.kmod.mk> .include <bsd.kmod.mk>

View File

@ -3,10 +3,12 @@
PROG= mkuzip PROG= mkuzip
MAN= mkuzip.8 MAN= mkuzip.8
SRCS= mkuzip.c mkuz_blockcache.c mkuz_lzma.c mkuz_zlib.c mkuz_conveyor.c \ SRCS= mkuzip.c mkuz_blockcache.c mkuz_lzma.c mkuz_zlib.c mkuz_conveyor.c \
mkuz_blk.c mkuz_fqueue.c mkuz_time.c mkuz_insize.c mkuz_blk.c mkuz_fqueue.c mkuz_time.c mkuz_insize.c mkuz_zstd.c
CFLAGS+= -I${SRCTOP}/sys/contrib/zstd/lib
#CFLAGS+= -DMKUZ_DEBUG #CFLAGS+= -DMKUZ_DEBUG
LIBADD= z md lzma pthread LIBADD= lzma md pthread z zstd
.include <bsd.prog.mk> .include <bsd.prog.mk>

View File

@ -39,4 +39,6 @@ struct mkuz_cfg {
const char *iname; const char *iname;
off_t isize; off_t isize;
const struct mkuz_format *handler; const struct mkuz_format *handler;
size_t cbound_blksz;
int comp_level;
}; };

View File

@ -39,9 +39,11 @@
#define CLOOP_MAJVER_2 '2' #define CLOOP_MAJVER_2 '2'
#define CLOOP_MAJVER_3 '3' #define CLOOP_MAJVER_3 '3'
#define CLOOP_MAJVER_4 '4'
#define CLOOP_COMP_LIBZ 'V' #define CLOOP_COMP_LIBZ 'V'
#define CLOOP_COMP_LZMA 'L' #define CLOOP_COMP_LZMA 'L'
#define CLOOP_COMP_ZSTD 'Z'
struct cloop_header { struct cloop_header {
char magic[CLOOP_MAGIC_LEN]; /* cloop magic */ char magic[CLOOP_MAGIC_LEN]; /* cloop magic */

View File

@ -42,8 +42,8 @@ __FBSDID("$FreeBSD$");
#include "mkuz_conveyor.h" #include "mkuz_conveyor.h"
#include "mkuz_cfg.h" #include "mkuz_cfg.h"
#include "mkuzip.h" #include "mkuzip.h"
#include "mkuz_format.h"
#include "mkuz_blk.h" #include "mkuz_blk.h"
#include "mkuz_format.h"
#include "mkuz_fqueue.h" #include "mkuz_fqueue.h"
#include "mkuz_blk_chain.h" #include "mkuz_blk_chain.h"
@ -67,7 +67,7 @@ cworker(void *p)
cfp = cwp->cfp; cfp = cwp->cfp;
cvp = cwp->cvp; cvp = cwp->cvp;
free(cwp); free(cwp);
c_ctx = cfp->handler->f_init(cfp->blksz); c_ctx = cfp->handler->f_init(&cfp->comp_level);
for (;;) { for (;;) {
iblk = mkuz_fqueue_deq(cvp->wrk_queue); iblk = mkuz_fqueue_deq(cvp->wrk_queue);
if (iblk == MKUZ_BLK_EOF) { if (iblk == MKUZ_BLK_EOF) {
@ -80,7 +80,8 @@ cworker(void *p)
/* All zeroes block */ /* All zeroes block */
oblk = mkuz_blk_ctor(0); oblk = mkuz_blk_ctor(0);
} else { } else {
oblk = cfp->handler->f_compress(c_ctx, iblk); oblk = mkuz_blk_ctor(cfp->cbound_blksz);
cfp->handler->f_compress(c_ctx, iblk, oblk);
if (cfp->en_dedup != 0) { if (cfp->en_dedup != 0) {
compute_digest(oblk); compute_digest(oblk);
} }

View File

@ -26,12 +26,15 @@
* $FreeBSD$ * $FreeBSD$
*/ */
DEFINE_RAW_METHOD(f_init, void *, uint32_t); DEFINE_RAW_METHOD(f_compress_bound, size_t, size_t);
DEFINE_RAW_METHOD(f_compress, struct mkuz_blk *, void *, const struct mkuz_blk *); DEFINE_RAW_METHOD(f_init, void *, int *);
DEFINE_RAW_METHOD(f_compress, void, void *, const struct mkuz_blk *, struct mkuz_blk *);
struct mkuz_format { struct mkuz_format {
const char *option;
const char *magic; const char *magic;
const char *default_sufx; const char *default_sufx;
f_compress_bound_t f_compress_bound;
f_init_t f_init; f_init_t f_init;
f_compress_t f_compress; f_compress_t f_compress;
}; };

View File

@ -35,61 +35,55 @@ __FBSDID("$FreeBSD$");
#include <lzma.h> #include <lzma.h>
#include "mkuzip.h" #include "mkuzip.h"
#include "mkuz_lzma.h"
#include "mkuz_blk.h" #include "mkuz_blk.h"
#include "mkuz_lzma.h"
#define USED_BLOCKSIZE DEV_BSIZE
struct mkuz_lzma { struct mkuz_lzma {
lzma_filter filters[2]; lzma_filter filters[2];
lzma_options_lzma opt_lzma; lzma_options_lzma opt_lzma;
lzma_stream strm; lzma_stream strm;
uint32_t blksz;
}; };
static const lzma_stream lzma_stream_init = LZMA_STREAM_INIT; size_t
mkuz_lzma_cbound(size_t blksz)
{
return (lzma_stream_buffer_bound(blksz));
}
void * void *
mkuz_lzma_init(uint32_t blksz) mkuz_lzma_init(int *comp_level)
{ {
struct mkuz_lzma *ulp; struct mkuz_lzma *ulp;
if (blksz % USED_BLOCKSIZE != 0) { if (*comp_level == USE_DEFAULT_LEVEL)
errx(1, "cluster size should be multiple of %d", *comp_level = LZMA_PRESET_DEFAULT;
USED_BLOCKSIZE); if (*comp_level < 0 || *comp_level > 9)
errx(1, "provided compression level %d is invalid",
*comp_level);
/* Not reached */ /* Not reached */
}
if (blksz > MAXPHYS) {
errx(1, "cluster size is too large");
/* Not reached */
}
ulp = mkuz_safe_zmalloc(sizeof(struct mkuz_lzma)); ulp = mkuz_safe_zmalloc(sizeof(struct mkuz_lzma));
/* Init lzma encoder */ /* Init lzma encoder */
ulp->strm = lzma_stream_init; ulp->strm = (lzma_stream)LZMA_STREAM_INIT;
if (lzma_lzma_preset(&ulp->opt_lzma, LZMA_PRESET_DEFAULT)) if (lzma_lzma_preset(&ulp->opt_lzma, *comp_level))
errx(1, "Error loading LZMA preset"); errx(1, "Error loading LZMA preset");
ulp->filters[0].id = LZMA_FILTER_LZMA2; ulp->filters[0].id = LZMA_FILTER_LZMA2;
ulp->filters[0].options = &ulp->opt_lzma; ulp->filters[0].options = &ulp->opt_lzma;
ulp->filters[1].id = LZMA_VLI_UNKNOWN; ulp->filters[1].id = LZMA_VLI_UNKNOWN;
ulp->blksz = blksz;
return (void *)ulp; return (void *)ulp;
} }
struct mkuz_blk * void
mkuz_lzma_compress(void *p, const struct mkuz_blk *iblk) mkuz_lzma_compress(void *p, const struct mkuz_blk *iblk, struct mkuz_blk *oblk)
{ {
lzma_ret ret; lzma_ret ret;
struct mkuz_blk *rval;
struct mkuz_lzma *ulp; struct mkuz_lzma *ulp;
ulp = (struct mkuz_lzma *)p; ulp = (struct mkuz_lzma *)p;
rval = mkuz_blk_ctor(ulp->blksz * 2);
ret = lzma_stream_encoder(&ulp->strm, ulp->filters, LZMA_CHECK_CRC32); ret = lzma_stream_encoder(&ulp->strm, ulp->filters, LZMA_CHECK_CRC32);
if (ret != LZMA_OK) { if (ret != LZMA_OK) {
if (ret == LZMA_MEMLIMIT_ERROR) if (ret == LZMA_MEMLIMIT_ERROR)
@ -99,23 +93,20 @@ mkuz_lzma_compress(void *p, const struct mkuz_blk *iblk)
} }
ulp->strm.next_in = iblk->data; ulp->strm.next_in = iblk->data;
ulp->strm.avail_in = ulp->blksz; ulp->strm.avail_in = iblk->info.len;
ulp->strm.next_out = rval->data; ulp->strm.next_out = oblk->data;
ulp->strm.avail_out = rval->alen; ulp->strm.avail_out = oblk->alen;
ret = lzma_code(&ulp->strm, LZMA_FINISH); ret = lzma_code(&ulp->strm, LZMA_FINISH);
if (ret != LZMA_STREAM_END) { if (ret != LZMA_STREAM_END)
/* Error */
errx(1, "lzma_code FINISH failed, code=%d, pos(in=%zd, " errx(1, "lzma_code FINISH failed, code=%d, pos(in=%zd, "
"out=%zd)", ret, (ulp->blksz - ulp->strm.avail_in), "out=%zd)", ret, (iblk->info.len - ulp->strm.avail_in),
(ulp->blksz * 2 - ulp->strm.avail_out)); (oblk->alen - ulp->strm.avail_out));
}
#if 0 #if 0
lzma_end(&ulp->strm); lzma_end(&ulp->strm);
#endif #endif
rval->info.len = rval->alen - ulp->strm.avail_out; oblk->info.len = oblk->alen - ulp->strm.avail_out;
return (rval);
} }

View File

@ -38,5 +38,6 @@
"exit $?\n" "exit $?\n"
#define DEFAULT_SUFX_LZMA ".ulzma" #define DEFAULT_SUFX_LZMA ".ulzma"
void *mkuz_lzma_init(uint32_t); size_t mkuz_lzma_cbound(size_t);
struct mkuz_blk *mkuz_lzma_compress(void *, const struct mkuz_blk *); void *mkuz_lzma_init(int *);
void mkuz_lzma_compress(void *, const struct mkuz_blk *, struct mkuz_blk *);

View File

@ -34,54 +34,51 @@ __FBSDID("$FreeBSD$");
#include <zlib.h> #include <zlib.h>
#include "mkuzip.h" #include "mkuzip.h"
#include "mkuz_zlib.h"
#include "mkuz_blk.h" #include "mkuz_blk.h"
#include "mkuz_zlib.h"
struct mkuz_zlib { struct mkuz_zlib {
uLongf oblen; int comp_level;
uint32_t blksz;
}; };
size_t
mkuz_zlib_cbound(size_t blksz)
{
return (compressBound(blksz));
}
void * void *
mkuz_zlib_init(uint32_t blksz) mkuz_zlib_init(int *comp_level)
{ {
struct mkuz_zlib *zp; struct mkuz_zlib *zp;
if (blksz % DEV_BSIZE != 0) { if (*comp_level == USE_DEFAULT_LEVEL)
errx(1, "cluster size should be multiple of %d", *comp_level = Z_BEST_COMPRESSION;
DEV_BSIZE); if (*comp_level < Z_BEST_SPEED || *comp_level > Z_BEST_COMPRESSION)
errx(1, "provided compression level %d is invalid",
*comp_level);
/* Not reached */ /* Not reached */
}
if (compressBound(blksz) > MAXPHYS) {
errx(1, "cluster size is too large");
/* Not reached */
}
zp = mkuz_safe_zmalloc(sizeof(struct mkuz_zlib));
zp->oblen = compressBound(blksz);
zp->blksz = blksz;
return (void *)zp; zp = mkuz_safe_zmalloc(sizeof(struct mkuz_zlib));
zp->comp_level = *comp_level;
return (zp);
} }
struct mkuz_blk * void
mkuz_zlib_compress(void *p, const struct mkuz_blk *iblk) mkuz_zlib_compress(void *p, const struct mkuz_blk *iblk, struct mkuz_blk *oblk)
{ {
uLongf destlen_z; uLongf destlen_z;
struct mkuz_blk *rval;
struct mkuz_zlib *zp; struct mkuz_zlib *zp;
zp = (struct mkuz_zlib *)p; zp = (struct mkuz_zlib *)p;
rval = mkuz_blk_ctor(zp->oblen); destlen_z = oblk->alen;
if (compress2(oblk->data, &destlen_z, iblk->data, iblk->info.len,
destlen_z = rval->alen; zp->comp_level) != Z_OK) {
if (compress2(rval->data, &destlen_z, iblk->data, zp->blksz, errx(1, "can't compress data: compress2() failed");
Z_BEST_COMPRESSION) != Z_OK) {
errx(1, "can't compress data: compress2() "
"failed");
/* Not reached */ /* Not reached */
} }
rval->info.len = (uint32_t)destlen_z; oblk->info.len = (uint32_t)destlen_z;
return (rval);
} }

View File

@ -32,5 +32,6 @@
"(kldstat -qm g_uzip||kldload geom_uzip)>&-&&" \ "(kldstat -qm g_uzip||kldload geom_uzip)>&-&&" \
"mount_cd9660 /dev/`mdconfig -af $0`.uzip $1\nexit $?\n" "mount_cd9660 /dev/`mdconfig -af $0`.uzip $1\nexit $?\n"
void *mkuz_zlib_init(uint32_t); size_t mkuz_zlib_cbound(size_t);
struct mkuz_blk *mkuz_zlib_compress(void *, const struct mkuz_blk *); void *mkuz_zlib_init(int *);
void mkuz_zlib_compress(void *, const struct mkuz_blk *, struct mkuz_blk *);

View File

@ -0,0 +1,95 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2019 Conrad Meyer <cem@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <err.h>
#include <limits.h>
#include <stddef.h>
#include <stdint.h>
#include <zstd.h>
#include "mkuzip.h"
#include "mkuz_blk.h"
#include "mkuz_zstd.h"
size_t
mkuz_zstd_cbound(size_t blksz)
{
return (ZSTD_compressBound(blksz));
}
void *
mkuz_zstd_init(int *comp_level)
{
ZSTD_CCtx *cctx;
size_t rc;
/* Default chosen for near-parity with mkuzip zlib default. */
if (*comp_level == USE_DEFAULT_LEVEL)
*comp_level = 9;
if (*comp_level < ZSTD_minCLevel() || *comp_level == 0 ||
*comp_level > ZSTD_maxCLevel())
errx(1, "provided compression level %d is invalid",
*comp_level);
cctx = ZSTD_createCCtx();
if (cctx == NULL)
errx(1, "could not allocate Zstd context");
rc = ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel,
*comp_level);
if (ZSTD_isError(rc))
errx(1, "Could not set zstd compression level %d: %s",
*comp_level, ZSTD_getErrorName(rc));
rc = ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1);
if (ZSTD_isError(rc))
errx(1, "Could not enable zstd checksum: %s",
ZSTD_getErrorName(rc));
return (cctx);
}
void
mkuz_zstd_compress(void *p, const struct mkuz_blk *iblk, struct mkuz_blk *oblk)
{
ZSTD_CCtx *cctx;
size_t rc;
cctx = p;
rc = ZSTD_compress2(cctx, oblk->data, oblk->alen, iblk->data,
iblk->info.len);
if (ZSTD_isError(rc))
errx(1, "could not compress data: ZSTD_compress2: %s",
ZSTD_getErrorName(rc));
oblk->info.len = rc;
}

View File

@ -0,0 +1,38 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2019 Conrad Meyer <cem@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#define DEFAULT_SUFX_ZSTD ".uzst"
#define CLOOP_MAGIC_ZSTD "#!/bin/sh\n#Z4.0 Format\n" \
"(kldstat -qm g_uzip||kldload geom_uzip)>&-&&" \
"mount_cd9660 /dev/`mdconfig -af $0`.uzip $1\nexit $?\n"
size_t mkuz_zstd_cbound(size_t);
void *mkuz_zstd_init(int *);
void mkuz_zstd_compress(void *, const struct mkuz_blk *, struct mkuz_blk *);

View File

@ -25,7 +25,7 @@
.\" .\"
.\" $FreeBSD$ .\" $FreeBSD$
.\" .\"
.Dd February 19, 2019 .Dd August 9, 2019
.Dt MKUZIP 8 .Dt MKUZIP 8
.Os .Os
.Sh NAME .Sh NAME
@ -35,7 +35,9 @@
class class
.Sh SYNOPSIS .Sh SYNOPSIS
.Nm .Nm
.Op Fl dLSsvZ .Op Fl dSsvZ
.Op Fl A Ar compression_algorithm
.Op Fl C Ar compression_level
.Op Fl j Ar compression_jobs .Op Fl j Ar compression_jobs
.Op Fl o Ar outfile .Op Fl o Ar outfile
.Op Fl s Ar cluster_size .Op Fl s Ar cluster_size
@ -57,17 +59,82 @@ works in two phases:
.It .It
An An
.Ar infile .Ar infile
image is split into clusters; each cluster is compressed using image is split into clusters; each cluster is compressed.
.Xr zlib 3
or
.Xr lzma 3 .
.It .It
The resulting set of compressed clusters along with headers that allow The resulting set of compressed clusters is written to the output file.
locating each individual cluster is written to the output file. In addition, a
.Dq table of contents
header is written which allows for efficient seeking.
.El .El
.Pp .Pp
The options are: The options are:
.Bl -tag -width indent .Bl -tag -width indent
.It Fl A Op Ar lzma | Ar zlib | Ar zstd
Select a specific compression algorithm.
If this option is not provided, the default is
.Ar zlib .
.Pp
The
.Ar lzma
algorithm provides noticeable better compression levels than zlib on the same
data set.
It has vastly slower compression speed and moderately slower decompression
speed.
.Pp
The
.Ar zstd
algorithm provides better compression levels than zlib on the same data set.
It also has faster compression and decompression speed than zlib.
In the very high compression
.Dq level
settings, it does not offer quite as high a compression ratio as
.Ar lzma .
However, its decompression speed does not suffer at high compression
.Dq levels .
.It Fl C Ar compression_level
Select the integer compression level used to parameterize the chosen
compression algorithm.
.Pp
For any given algorithm, a lesser number selects a faster compression mode.
A greater number selects a slower compression mode.
Typically, for the same algorithm, a greater
.Ar compression_level
provides better final compression ratio.
.Pp
For
.Ar lzma ,
the range of valid compression levels is
.Va 0-9 .
The
.Nm
default for lzma is
.Va 6 .
.Pp
For
.Ar zlib ,
the range of valid compression levels is
.Va 1-9 .
The
.Nm
default for zlib is
.Va 9 .
.Pp
For
.Ar zstd ,
the range of valid compression levels is currently
.Va 1-19 .
The
.Nm
default for zstd is
.Va 9 .
.It Fl d
Enable de-duplication.
When the option is enabled
.Nm
detects identical blocks in the input and replaces each subsequent occurrence
of such block with pointer to the very first one in the output.
Setting this option results is moderate decrease of compressed image size,
typically around 3-5% of a final size of the compressed image.
.It Fl j Ar compression_jobs .It Fl j Ar compression_jobs
Specify the number of compression jobs that Specify the number of compression jobs that
.Nm .Nm
@ -77,24 +144,9 @@ to the value of
.Va hw.ncpu .Va hw.ncpu
.Xr sysctl 8 .Xr sysctl 8
variable. variable.
.It Fl d .It Op Fl L
Enable de-duplication. Legacy flag that indicates the same thing as
When the option is enabled the .Dq Fl A Ar lzma .
.Nm
detects identical blocks in the input and replaces each subsequent occurence
of such block with pointer to the very first one in the output.
Setting this option results is moderate decrease of compressed image size,
typically around 3-5% of a final size of the compressed image.
.It Fl L
Use
.Xr lzma 3
compression algorithm instead of the default
.Xr zlib 3 .
The
.Xr lzma 3
provides noticeable better compression levels on the same data set
at the expense of much slower compression speed (10-20x) and somewhat slower
decompression (2-3x).
.It Fl o Ar outfile .It Fl o Ar outfile
Name of the output file Name of the output file
.Ar outfile . .Ar outfile .
@ -119,33 +171,44 @@ should be a multiple of 512 bytes.
.It Fl v .It Fl v
Display verbose messages. Display verbose messages.
.It Fl Z .It Fl Z
Disable zero-blocks detection and elimination. Disable zero-block detection and elimination.
When this option is set, the When this option is set,
.Nm .Nm
would compress empty blocks (i.e. clusters that consist of only zero bytes) compresses blocks of zero bytes just as it would any other block.
just as it would any other block. When the option is not set,
When the option is not set, the
.Nm .Nm
detects such blocks and skips them from the output. detects and compresses zero blocks in a space-efficient way.
Setting Setting
.Fl Z .Fl Z
results is slight increase of compressed image size, typically less than 0.1% increases compressed image sizes slightly, typically less than 0.1%.
of a final size of the compressed image.
.El .El
.Sh NOTES .Sh IMPLEMENTATION NOTES
The compression ratio largely depends on the cluster size used. The compression ratio largely depends on the compression algorithm, level, and
.\" The following two sentences are unclear: how can gzip(1) be cluster size used.
.\" used in a comparable fashion, and wouldn't a gzip-compressed For large cluster sizes (16kB and higher), typical overall image compression
.\" image suffer from larger cluster sizes as well? ratios with
For large cluster sizes (16K and higher), typical compression ratios .Xr zlib 3
are only 1-2% less than those achieved with are only 1-2% less than those achieved with
.Xr gzip 1 . .Xr gzip 1
However, it should be kept in mind that larger cluster over the entire image.
sizes lead to higher overhead in the However, it should be kept in mind that larger cluster sizes lead to higher
overhead in the
.Xr geom_uzip 4 .Xr geom_uzip 4
class, as the class has to decompress the whole cluster even if class, as the class has to decompress the whole cluster even if
only a few bytes from that cluster have to be read. only a few bytes from that cluster have to be read.
.Pp .Pp
Additionally, the threshold at 16-32 kB where a larger cluster size does not
benefit overall compression ratio is an artifact of the
.Xr zlib 3
algorithm in particular.
.Ar Lzma
and
.Ar Zstd will continue to provide better compression ratios as cluster sizes
are increased, at high enough compression levels.
The same tradeoff continues to apply: reads in
.Xr geom_uzip 4
become more expensive the greater the cluster size.
.Pp
The The
.Nm .Nm
utility utility
@ -169,12 +232,27 @@ specific feature and while it does not require any changes to on-disk
compressed image format, however it did require some matching changes to the compressed image format, however it did require some matching changes to the
.Xr geom_uzip 4 .Xr geom_uzip 4
to handle resulting images correctly. to handle resulting images correctly.
.Pp
To make use of
.Ar zstd
.Nm
images, the kernel must be configured with
.Cd ZSTDIO .
It is enabled by default in many
.Cd GENERIC
kernels provided as binary distributions by
.Fx .
The status on any particular system can be verified by checking
.Xr sysctl 8
.Dv kern.features.geom_uzip_zstd
for
.Dq 1 .
.Sh EXIT STATUS .Sh EXIT STATUS
.Ex -std .Ex -std
.Sh SEE ALSO .Sh SEE ALSO
.Xr gzip 1 , .Xr gzip 1 ,
.Xr xz 1 , .Xr xz 1 ,
.Xr lzma 3 , .Xr zstd 1 ,
.Xr zlib 3 , .Xr zlib 3 ,
.Xr geom 4 , .Xr geom 4 ,
.Xr geom_uzip 4 , .Xr geom_uzip 4 ,

View File

@ -51,8 +51,9 @@ __FBSDID("$FreeBSD$");
#include "mkuzip.h" #include "mkuzip.h"
#include "mkuz_cloop.h" #include "mkuz_cloop.h"
#include "mkuz_blockcache.h" #include "mkuz_blockcache.h"
#include "mkuz_zlib.h"
#include "mkuz_lzma.h" #include "mkuz_lzma.h"
#include "mkuz_zlib.h"
#include "mkuz_zstd.h"
#include "mkuz_blk.h" #include "mkuz_blk.h"
#include "mkuz_cfg.h" #include "mkuz_cfg.h"
#include "mkuz_conveyor.h" #include "mkuz_conveyor.h"
@ -63,18 +64,38 @@ __FBSDID("$FreeBSD$");
#define DEFAULT_CLSTSIZE 16384 #define DEFAULT_CLSTSIZE 16384
static struct mkuz_format uzip_fmt = { enum UZ_ALGORITHM {
.magic = CLOOP_MAGIC_ZLIB, UZ_ZLIB = 0,
.default_sufx = DEFAULT_SUFX_ZLIB, UZ_LZMA,
.f_init = &mkuz_zlib_init, UZ_ZSTD,
.f_compress = &mkuz_zlib_compress UZ_INVALID
}; };
static struct mkuz_format ulzma_fmt = { static const struct mkuz_format uzip_fmts[] = {
.magic = CLOOP_MAGIC_LZMA, [UZ_ZLIB] = {
.default_sufx = DEFAULT_SUFX_LZMA, .option = "zlib",
.f_init = &mkuz_lzma_init, .magic = CLOOP_MAGIC_ZLIB,
.f_compress = &mkuz_lzma_compress .default_sufx = DEFAULT_SUFX_ZLIB,
.f_compress_bound = mkuz_zlib_cbound,
.f_init = mkuz_zlib_init,
.f_compress = mkuz_zlib_compress,
},
[UZ_LZMA] = {
.option = "lzma",
.magic = CLOOP_MAGIC_LZMA,
.default_sufx = DEFAULT_SUFX_LZMA,
.f_compress_bound = mkuz_lzma_cbound,
.f_init = mkuz_lzma_init,
.f_compress = mkuz_lzma_compress,
},
[UZ_ZSTD] = {
.option = "zstd",
.magic = CLOOP_MAGIC_ZSTD,
.default_sufx = DEFAULT_SUFX_ZSTD,
.f_compress_bound = mkuz_zstd_cbound,
.f_init = mkuz_zstd_init,
.f_compress = mkuz_zstd_compress,
},
}; };
static struct mkuz_blk *readblock(int, u_int32_t); static struct mkuz_blk *readblock(int, u_int32_t);
@ -111,6 +132,8 @@ int main(int argc, char **argv)
struct mkuz_blk_info *chit; struct mkuz_blk_info *chit;
size_t ncpusz, ncpu, magiclen; size_t ncpusz, ncpu, magiclen;
double st, et; double st, et;
enum UZ_ALGORITHM comp_alg;
int comp_level;
st = getdtime(); st = getdtime();
@ -129,12 +152,27 @@ int main(int argc, char **argv)
cfs.en_dedup = 0; cfs.en_dedup = 0;
summary.en = 0; summary.en = 0;
summary.f = stderr; summary.f = stderr;
cfs.handler = &uzip_fmt; comp_alg = UZ_ZLIB;
comp_level = USE_DEFAULT_LEVEL;
cfs.nworkers = ncpu; cfs.nworkers = ncpu;
struct mkuz_blk *iblk, *oblk; struct mkuz_blk *iblk, *oblk;
while((opt = getopt(argc, argv, "o:s:vZdLSj:")) != -1) { while((opt = getopt(argc, argv, "A:C:o:s:vZdLSj:")) != -1) {
switch(opt) { switch(opt) {
case 'A':
for (tmp = UZ_ZLIB; tmp < UZ_INVALID; tmp++) {
if (strcmp(uzip_fmts[tmp].option, optarg) == 0)
break;
}
if (tmp == UZ_INVALID)
errx(1, "invalid algorithm specified: %s",
optarg);
/* Not reached */
comp_alg = tmp;
break;
case 'C':
comp_level = atoi(optarg);
break;
case 'o': case 'o':
oname = optarg; oname = optarg;
break; break;
@ -162,7 +200,7 @@ int main(int argc, char **argv)
break; break;
case 'L': case 'L':
cfs.handler = &ulzma_fmt; comp_alg = UZ_LZMA;
break; break;
case 'S': case 'S':
@ -193,16 +231,32 @@ int main(int argc, char **argv)
/* Not reached */ /* Not reached */
} }
cfs.handler = &uzip_fmts[comp_alg];
magiclen = strlcpy(hdr.magic, cfs.handler->magic, sizeof(hdr.magic)); magiclen = strlcpy(hdr.magic, cfs.handler->magic, sizeof(hdr.magic));
assert(magiclen < sizeof(hdr.magic)); assert(magiclen < sizeof(hdr.magic));
if (cfs.en_dedup != 0) { if (cfs.en_dedup != 0) {
hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3; /*
* Dedupe requires a version 3 format. Don't downgrade newer
* formats.
*/
if (hdr.magic[CLOOP_OFS_VERSN] == CLOOP_MAJVER_2)
hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3;
hdr.magic[CLOOP_OFS_COMPR] = hdr.magic[CLOOP_OFS_COMPR] =
tolower(hdr.magic[CLOOP_OFS_COMPR]); tolower(hdr.magic[CLOOP_OFS_COMPR]);
} }
c_ctx = cfs.handler->f_init(cfs.blksz); if (cfs.blksz % DEV_BSIZE != 0)
errx(1, "cluster size should be multiple of %d", DEV_BSIZE);
cfs.cbound_blksz = cfs.handler->f_compress_bound(cfs.blksz);
if (cfs.cbound_blksz > MAXPHYS)
errx(1, "maximal compressed cluster size %zu greater than MAXPHYS %zu",
cfs.cbound_blksz, (size_t)MAXPHYS);
c_ctx = cfs.handler->f_init(&comp_level);
cfs.comp_level = comp_level;
cfs.iname = argv[0]; cfs.iname = argv[0];
if (oname == NULL) { if (oname == NULL) {
@ -239,6 +293,14 @@ int main(int argc, char **argv)
} }
toc = mkuz_safe_malloc((hdr.nblocks + 1) * sizeof(*toc)); toc = mkuz_safe_malloc((hdr.nblocks + 1) * sizeof(*toc));
/*
* Initialize last+1 entry with non-heap trash. If final padding is
* added later, it may or may not be overwritten with an offset
* representing the length of the final compressed block. If not,
* initialize to a defined value.
*/
toc[hdr.nblocks] = 0;
cfs.fdw = open(oname, (cfs.en_dedup ? O_RDWR : O_WRONLY) | O_TRUNC | O_CREAT, cfs.fdw = open(oname, (cfs.en_dedup ? O_RDWR : O_WRONLY) | O_TRUNC | O_CREAT,
S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
if (cfs.fdw < 0) { if (cfs.fdw < 0) {

View File

@ -28,6 +28,9 @@
#define DEFINE_RAW_METHOD(func, rval, args...) typedef rval (*func##_t)(args) #define DEFINE_RAW_METHOD(func, rval, args...) typedef rval (*func##_t)(args)
/* Use an algorithm-specific default level if no explicit level is selected. */
#define USE_DEFAULT_LEVEL INT_MIN
void *mkuz_safe_malloc(size_t); void *mkuz_safe_malloc(size_t);
void *mkuz_safe_zmalloc(size_t); void *mkuz_safe_zmalloc(size_t);
int mkuz_memvcmp(const void *, unsigned char, size_t); int mkuz_memvcmp(const void *, unsigned char, size_t);