Update to Zstandard 1.4.2

The full release notes for 1.4.1 (skipped) and 1.4.2 can be found on Github: https://github.com/facebook/zstd/releases/tag/v1.4.1 https://github.com/facebook/zstd/releases/tag/v1.4.2 These are mostly minor updates; 1.4.1 purportedly brings something like 7% faster decompression speed. Relnotes: yes
svn path=/head/; revision=350760
2019-08-08 16:54:22 +00:00 · 2019-08-08 16:54:22 +00:00 · 4d3f1eafc9 · 2020-12-20 02:59:44 +00:00
commit 4d3f1eafc9
parent e7c5d9d3f2 90f4bdbe91
75 changed files with 3349 additions and 2098 deletions
--- a/lib/libzstd/Makefile
+++ b/lib/libzstd/Makefile
@ -11,6 +11,8 @@ SRCS=	entropy_common.c \
 	fse_compress.c \
 	huf_compress.c \
 	zstd_compress.c \
+	zstd_compress_literals.c \
+	zstd_compress_sequences.c \
 	zstdmt_compress.c \
 	huf_decompress.c \
 	zstd_ddict.c \
@ -43,7 +45,20 @@ ZSTDDIR=	${SRCTOP}/sys/contrib/zstd
 	${ZSTDDIR}/lib/decompress ${ZSTDDIR}/lib/deprecated \
 	${ZSTDDIR}/lib/dictBuilder ${ZSTDDIR}/lib

+.include <bsd.compiler.mk>
+
+# https://github.com/facebook/zstd/commit/812e8f2a [zstd 1.4.1]
+# "Note that [GCC] autovectorization still does not do a good job on the
+# optimized version, so it's turned off via attribute and flag.  I found
+# that neither attribute nor command-line flag were entirely successful in
+# turning off vectorization, which is why there were both."
+.if ${COMPILER_TYPE} == "gcc"
+CFLAGS.zstd_decompress_block.c+=	-fno-tree-vectorize
+.endif
+
 # Work around for LLVM bug 35023, https://bugs.llvm.org/show_bug.cgi?id=35023
+# Fixed in https://reviews.llvm.org/rL349935 ; not sure if we have that version
+# in our LLVM or not yet.
 .if ${MACHINE_ARCH:Marm*} != ""
 CFLAGS.zstd_compress.c+= -O0
 .endif
--- a/sys/conf/files
+++ b/sys/conf/files
@ -648,6 +648,8 @@ contrib/zstd/lib/common/entropy_common.c	optional zstdio compile-with ${ZSTD_C}
 contrib/zstd/lib/common/error_private.c		optional zstdio compile-with ${ZSTD_C}
 contrib/zstd/lib/common/xxhash.c		optional zstdio compile-with ${ZSTD_C}
 contrib/zstd/lib/compress/zstd_compress.c	optional zstdio compile-with ${ZSTD_C}
+contrib/zstd/lib/compress/zstd_compress_literals.c	optional zstdio compile-with ${ZSTD_C}
+contrib/zstd/lib/compress/zstd_compress_sequences.c	optional zstdio compile-with ${ZSTD_C}
 contrib/zstd/lib/compress/fse_compress.c	optional zstdio compile-with ${ZSTD_C}
 contrib/zstd/lib/compress/hist.c		optional zstdio compile-with ${ZSTD_C}
 contrib/zstd/lib/compress/huf_compress.c	optional zstdio compile-with ${ZSTD_C}
@ -658,7 +660,9 @@ contrib/zstd/lib/compress/zstd_ldm.c		optional zstdio compile-with ${ZSTD_C}
 contrib/zstd/lib/compress/zstd_opt.c		optional zstdio compile-with ${ZSTD_C}
 contrib/zstd/lib/decompress/zstd_ddict.c	optional zstdio compile-with ${ZSTD_C}
 contrib/zstd/lib/decompress/zstd_decompress.c	optional zstdio compile-with ${ZSTD_C}
-contrib/zstd/lib/decompress/zstd_decompress_block.c	optional zstdio compile-with ${ZSTD_C}
+# See comment in sys/conf/kern.pre.mk
+contrib/zstd/lib/decompress/zstd_decompress_block.c	optional zstdio \
+	compile-with "${ZSTD_C} ${ZSTD_DECOMPRESS_BLOCK_FLAGS}"
 contrib/zstd/lib/decompress/huf_decompress.c	optional zstdio compile-with ${ZSTD_C}
 # Blake 2
 contrib/libb2/blake2b-ref.c	optional crypto | ipsec | ipsec_support \
--- a/sys/conf/kern.pre.mk
+++ b/sys/conf/kern.pre.mk
@ -175,6 +175,14 @@ NORMAL_FWO= ${LD} -b binary --no-warn-mismatch -d -warn-common -r \

 # for ZSTD in the kernel (include zstd/lib/freebsd before other CFLAGS)
 ZSTD_C= ${CC} -c -DZSTD_HEAPMODE=1 -I$S/contrib/zstd/lib/freebsd ${CFLAGS} -I$S/contrib/zstd/lib -I$S/contrib/zstd/lib/common ${WERROR} -Wno-inline -Wno-missing-prototypes ${PROF} -U__BMI__ ${.IMPSRC}
+# https://github.com/facebook/zstd/commit/812e8f2a [zstd 1.4.1]
+# "Note that [GCC] autovectorization still does not do a good job on the
+# optimized version, so it's turned off via attribute and flag.  I found
+# that neither attribute nor command-line flag were entirely successful in
+# turning off vectorization, which is why there were both."
+.if ${COMPILER_TYPE} == "gcc"
+ZSTD_DECOMPRESS_BLOCK_FLAGS= -fno-tree-vectorize
+.endif

 # Common for dtrace / zfs
 CDDL_CFLAGS=	-DFREEBSD_NAMECACHE -nostdinc -I$S/cddl/compat/opensolaris -I$S/cddl/contrib/opensolaris/uts/common -I$S -I$S/cddl/contrib/opensolaris/common ${CFLAGS} -Wno-unknown-pragmas -Wno-missing-prototypes -Wno-undef -Wno-strict-prototypes -Wno-cast-qual -Wno-parentheses -Wno-redundant-decls -Wno-missing-braces -Wno-uninitialized -Wno-unused -Wno-inline -Wno-switch -Wno-pointer-arith -Wno-unknown-pragmas
--- a/sys/contrib/zstd/CHANGELOG
+++ b/sys/contrib/zstd/CHANGELOG
@ -1,3 +1,39 @@
+v1.4.2
+bug: Fix bug in zstd-0.5 decoder by @terrelln (#1696)
+bug: Fix seekable decompression in-memory API by @iburinoc (#1695)
+misc: Validate blocks are smaller than size limit by @vivekmg (#1685)
+misc: Restructure source files by @ephiepark (#1679)
+
+v1.4.1
+bug: Fix data corruption in niche use cases by @terrelln (#1659)
+bug: Fuzz legacy modes, fix uncovered bugs by @terrelln (#1593, #1594, #1595)
+bug: Fix out of bounds read by @terrelln (#1590)
+perf: Improve decode speed by ~7% @mgrice (#1668)
+perf: Slightly improved compression ratio of level 3 and 4 (ZSTD_dfast) by @cyan4973 (#1681)
+perf: Slightly faster compression speed when re-using a context by @cyan4973 (#1658)
+perf: Improve compression ratio for small windowLog by @cyan4973 (#1624)
+perf: Faster compression speed in high compression mode for repetitive data by @terrelln (#1635)
+api: Add parameter to generate smaller dictionaries by @tyler-tran (#1656)
+cli: Recognize symlinks when built in C99 mode by @felixhandte (#1640)
+cli: Expose cpu load indicator for each file on -vv mode by @ephiepark (#1631)
+cli: Restrict read permissions on destination files by @chungy (#1644)
+cli: zstdgrep: handle -f flag by @felixhandte (#1618)
+cli: zstdcat: follow symlinks by @vejnar (#1604)
+doc: Remove extra size limit on compressed blocks by @felixhandte (#1689)
+doc: Fix typo by @yk-tanigawa (#1633)
+doc: Improve documentation on streaming buffer sizes by @cyan4973 (#1629)
+build: CMake: support building with LZ4 @leeyoung624 (#1626)
+build: CMake: install zstdless and zstdgrep by @leeyoung624 (#1647)
+build: CMake: respect existing uninstall target by @j301scott (#1619)
+build: Make: skip multithread tests when built without support by @michaelforney (#1620)
+build: Make: Fix examples/ test target by @sjnam (#1603)
+build: Meson: rename options out of deprecated namespace by @lzutao (#1665)
+build: Meson: fix build by @lzutao (#1602)
+build: Visual Studio: don't export symbols in static lib by @scharan (#1650)
+build: Visual Studio: fix linking by @absotively (#1639)
+build: Fix MinGW-W64 build by @myzhang1029 (#1600)
+misc: Expand decodecorpus coverage by @ephiepark (#1664)
+
 v1.4.0
 perf: Improve level 1 compression speed in most scenarios by 6% by @gbtucker and @terrelln
 api: Move the advanced API, including all functions in the staging section, to the stable section
--- a/sys/contrib/zstd/contrib/largeNbDicts/largeNbDicts.c
+++ b/sys/contrib/zstd/contrib/largeNbDicts/largeNbDicts.c
@ -559,7 +559,7 @@ static int benchMem(slice_collection_t dstBlocks,
        CONTROL(BMK_isSuccessful_runOutcome(outcome));

        BMK_runTime_t const result = BMK_extract_runTime(outcome);
-        U64 const dTime_ns = result.nanoSecPerRun;
+        double const dTime_ns = result.nanoSecPerRun;
        double const dTime_sec = (double)dTime_ns / 1000000000;
        size_t const srcSize = result.sumOfReturn;
        double const dSpeed_MBps = (double)srcSize / dTime_sec / (1 MB);
--- a/sys/contrib/zstd/contrib/seekable_format/examples/.gitignore
+++ b/sys/contrib/zstd/contrib/seekable_format/examples/.gitignore
@ -1,4 +1,5 @@
 seekable_compression
 seekable_decompression
+seekable_decompression_mem
 parallel_processing
 parallel_compression
--- a/sys/contrib/zstd/contrib/seekable_format/examples/Makefile
+++ b/sys/contrib/zstd/contrib/seekable_format/examples/Makefile
@ -24,7 +24,8 @@ SEEKABLE_OBJS = ../zstdseek_compress.c ../zstdseek_decompress.c $(ZSTDLIB)

 default: all

-all: seekable_compression seekable_decompression parallel_processing
+all: seekable_compression seekable_decompression seekable_decompression_mem \
+	parallel_processing

 $(ZSTDLIB):
 	make -C $(ZSTDLIB_PATH) $(ZSTDLIB_NAME)
@ -35,6 +36,9 @@ seekable_compression : seekable_compression.c $(SEEKABLE_OBJS)
 seekable_decompression : seekable_decompression.c $(SEEKABLE_OBJS)
 	$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@

+seekable_decompression_mem : seekable_decompression_mem.c $(SEEKABLE_OBJS)
+	$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
+
 parallel_processing : parallel_processing.c $(SEEKABLE_OBJS)
 	$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ -pthread

@ -44,5 +48,6 @@ parallel_compression : parallel_compression.c $(SEEKABLE_OBJS)
 clean:
 	@rm -f core *.o tmp* result* *.zst \
 		seekable_compression seekable_decompression \
+		seekable_decompression_mem \
 		parallel_processing parallel_compression
 	@echo Cleaning completed
--- a/sys/contrib/zstd/contrib/seekable_format/examples/seekable_decompression_mem.c
+++ b/sys/contrib/zstd/contrib/seekable_format/examples/seekable_decompression_mem.c
@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2017-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ */
+
+
+#include <stdlib.h>    // malloc, exit
+#include <stdio.h>     // fprintf, perror, feof
+#include <string.h>    // strerror
+#include <errno.h>     // errno
+#define ZSTD_STATIC_LINKING_ONLY
+#include <zstd.h>      // presumes zstd library is installed
+#include <zstd_errors.h>
+
+#include "zstd_seekable.h"
+
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+#define MAX_FILE_SIZE (8 * 1024 * 1024)
+
+static void* malloc_orDie(size_t size)
+{
+    void* const buff = malloc(size);
+    if (buff) return buff;
+    /* error */
+    perror("malloc");
+    exit(1);
+}
+
+static void* realloc_orDie(void* ptr, size_t size)
+{
+    ptr = realloc(ptr, size);
+    if (ptr) return ptr;
+    /* error */
+    perror("realloc");
+    exit(1);
+}
+
+static FILE* fopen_orDie(const char *filename, const char *instruction)
+{
+    FILE* const inFile = fopen(filename, instruction);
+    if (inFile) return inFile;
+    /* error */
+    perror(filename);
+    exit(3);
+}
+
+static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file)
+{
+    size_t const readSize = fread(buffer, 1, sizeToRead, file);
+    if (readSize == sizeToRead) return readSize;   /* good */
+    if (feof(file)) return readSize;   /* good, reached end of file */
+    /* error */
+    perror("fread");
+    exit(4);
+}
+
+static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file)
+{
+    size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file);
+    if (writtenSize == sizeToWrite) return sizeToWrite;   /* good */
+    /* error */
+    perror("fwrite");
+    exit(5);
+}
+
+static size_t fclose_orDie(FILE* file)
+{
+    if (!fclose(file)) return 0;
+    /* error */
+    perror("fclose");
+    exit(6);
+}
+
+static void fseek_orDie(FILE* file, long int offset, int origin) {
+    if (!fseek(file, offset, origin)) {
+        if (!fflush(file)) return;
+    }
+    /* error */
+    perror("fseek");
+    exit(7);
+}
+
+
+static void decompressFile_orDie(const char* fname, off_t startOffset, off_t endOffset)
+{
+    FILE* const fin  = fopen_orDie(fname, "rb");
+    FILE* const fout = stdout;
+    // Just for demo purposes, assume file is <= MAX_FILE_SIZE
+    void*  const buffIn = malloc_orDie(MAX_FILE_SIZE);
+    size_t const inSize = fread_orDie(buffIn, MAX_FILE_SIZE, fin);
+    size_t const buffOutSize = ZSTD_DStreamOutSize();  /* Guarantee to successfully flush at least one complete compressed block in all circumstances. */
+    void*  const buffOut = malloc_orDie(buffOutSize);
+
+    ZSTD_seekable* const seekable = ZSTD_seekable_create();
+    if (seekable==NULL) { fprintf(stderr, "ZSTD_seekable_create() error \n"); exit(10); }
+
+    size_t const initResult = ZSTD_seekable_initBuff(seekable, buffIn, inSize);
+    if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_init() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); }
+
+    while (startOffset < endOffset) {
+        size_t const result = ZSTD_seekable_decompress(seekable, buffOut, MIN(endOffset - startOffset, buffOutSize), startOffset);
+
+        if (ZSTD_isError(result)) {
+            fprintf(stderr, "ZSTD_seekable_decompress() error : %s \n",
+                    ZSTD_getErrorName(result));
+            exit(12);
+        }
+        fwrite_orDie(buffOut, result, fout);
+        startOffset += result;
+    }
+
+    ZSTD_seekable_free(seekable);
+    fclose_orDie(fin);
+    fclose_orDie(fout);
+    free(buffIn);
+    free(buffOut);
+}
+
+
+int main(int argc, const char** argv)
+{
+    const char* const exeName = argv[0];
+
+    if (argc!=4) {
+        fprintf(stderr, "wrong arguments\n");
+        fprintf(stderr, "usage:\n");
+        fprintf(stderr, "%s FILE START END\n", exeName);
+        return 1;
+    }
+
+    {
+        const char* const inFilename = argv[1];
+        off_t const startOffset = atoll(argv[2]);
+        off_t const endOffset = atoll(argv[3]);
+        decompressFile_orDie(inFilename, startOffset, endOffset);
+    }
+
+    return 0;
+}
--- a/sys/contrib/zstd/contrib/seekable_format/zstdseek_decompress.c
+++ b/sys/contrib/zstd/contrib/seekable_format/zstdseek_decompress.c
@ -106,7 +106,7 @@ typedef struct {
 static int ZSTD_seekable_read_buff(void* opaque, void* buffer, size_t n)
 {
    buffWrapper_t* buff = (buffWrapper_t*) opaque;
-    if (buff->size + n > buff->pos) return -1;
+    if (buff->pos + n > buff->size) return -1;
    memcpy(buffer, (const BYTE*)buff->ptr + buff->pos, n);
    buff->pos += n;
    return 0;
@ -124,7 +124,7 @@ static int ZSTD_seekable_seek_buff(void* opaque, long long offset, int origin)
        newOffset = (unsigned long long)buff->pos + offset;
        break;
    case SEEK_END:
-        newOffset = (unsigned long long)buff->size - offset;
+        newOffset = (unsigned long long)buff->size + offset;
        break;
    default:
        assert(0);  /* not possible */
--- a/sys/contrib/zstd/doc/zstd_compression_format.md
+++ b/sys/contrib/zstd/doc/zstd_compression_format.md
@ -16,7 +16,7 @@ Distribution of this document is unlimited.

 ### Version

-0.3.1 (25/10/18)
+0.3.2 (17/07/19)


 Introduction
@ -390,9 +390,7 @@ A block can contain any number of bytes (even zero), up to
 -  Window_Size
 -  128 KB

-A `Compressed_Block` has the extra restriction that `Block_Size` is always
-strictly less than the decompressed size.
-If this condition cannot be respected,
+If this condition cannot be respected when generating a `Compressed_Block`,
 the block must be sent uncompressed instead (`Raw_Block`).


@ -1655,6 +1653,7 @@ or at least provide a meaningful error code explaining for which reason it canno

 Version changes
 ---------------
+- 0.3.2 : remove additional block size restriction on compressed blocks
 - 0.3.1 : minor clarification regarding offset history update rules
 - 0.3.0 : minor edits to match RFC8478
 - 0.2.9 : clarifications for huffman weights direct representation, by Ulrich Kunitz
--- a/sys/contrib/zstd/doc/zstd_manual.html
+++ b/sys/contrib/zstd/doc/zstd_manual.html
@ -1,46 +1,36 @@
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-<title>zstd 1.4.0 Manual</title>
+<title>zstd 1.4.2 Manual</title>
 </head>
 <body>
-<h1>zstd 1.4.0 Manual</h1>
+<h1>zstd 1.4.2 Manual</h1>
 <hr>
 <a name="Contents"></a><h2>Contents</h2>
 <ol>
 <li><a href="#Chapter1">Introduction</a></li>
 <li><a href="#Chapter2">Version</a></li>
-<li><a href="#Chapter3">Default constant</a></li>
-<li><a href="#Chapter4">Constants</a></li>
-<li><a href="#Chapter5">Simple API</a></li>
-<li><a href="#Chapter6">Explicit context</a></li>
-<li><a href="#Chapter7">Advanced compression API</a></li>
-<li><a href="#Chapter8">Advanced decompression API</a></li>
-<li><a href="#Chapter9">Streaming</a></li>
-<li><a href="#Chapter10">Streaming compression - HowTo</a></li>
-<li><a href="#Chapter11">This is a legacy streaming API, and can be replaced by ZSTD_CCtx_reset() and</a></li>
-<li><a href="#Chapter12">Equivalent to:</a></li>
-<li><a href="#Chapter13">Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue).</a></li>
-<li><a href="#Chapter14">Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush).</a></li>
-<li><a href="#Chapter15">Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end).</a></li>
-<li><a href="#Chapter16">Streaming decompression - HowTo</a></li>
-<li><a href="#Chapter17">Simple dictionary API</a></li>
-<li><a href="#Chapter18">Bulk processing dictionary API</a></li>
-<li><a href="#Chapter19">Dictionary helper functions</a></li>
-<li><a href="#Chapter20">Advanced dictionary and prefix API</a></li>
-<li><a href="#Chapter21">ADVANCED AND EXPERIMENTAL FUNCTIONS</a></li>
-<li><a href="#Chapter22">experimental API (static linking only)</a></li>
-<li><a href="#Chapter23">Frame size functions</a></li>
-<li><a href="#Chapter24">ZSTD_decompressBound() :</a></li>
-<li><a href="#Chapter25">Memory management</a></li>
-<li><a href="#Chapter26">Advanced compression functions</a></li>
-<li><a href="#Chapter27">Advanced decompression functions</a></li>
-<li><a href="#Chapter28">Advanced streaming functions</a></li>
-<li><a href="#Chapter29">Buffer-less and synchronous inner streaming functions</a></li>
-<li><a href="#Chapter30">Buffer-less streaming compression (synchronous mode)</a></li>
-<li><a href="#Chapter31">Buffer-less streaming decompression (synchronous mode)</a></li>
-<li><a href="#Chapter32">ZSTD_getFrameHeader() :</a></li>
-<li><a href="#Chapter33">Block level API</a></li>
+<li><a href="#Chapter3">Simple API</a></li>
+<li><a href="#Chapter4">Explicit context</a></li>
+<li><a href="#Chapter5">Advanced compression API</a></li>
+<li><a href="#Chapter6">Advanced decompression API</a></li>
+<li><a href="#Chapter7">Streaming</a></li>
+<li><a href="#Chapter8">Streaming compression - HowTo</a></li>
+<li><a href="#Chapter9">Streaming decompression - HowTo</a></li>
+<li><a href="#Chapter10">Simple dictionary API</a></li>
+<li><a href="#Chapter11">Bulk processing dictionary API</a></li>
+<li><a href="#Chapter12">Dictionary helper functions</a></li>
+<li><a href="#Chapter13">Advanced dictionary and prefix API</a></li>
+<li><a href="#Chapter14">experimental API (static linking only)</a></li>
+<li><a href="#Chapter15">Frame size functions</a></li>
+<li><a href="#Chapter16">Memory management</a></li>
+<li><a href="#Chapter17">Advanced compression functions</a></li>
+<li><a href="#Chapter18">Advanced decompression functions</a></li>
+<li><a href="#Chapter19">Advanced streaming functions</a></li>
+<li><a href="#Chapter20">Buffer-less and synchronous inner streaming functions</a></li>
+<li><a href="#Chapter21">Buffer-less streaming compression (synchronous mode)</a></li>
+<li><a href="#Chapter22">Buffer-less streaming decompression (synchronous mode)</a></li>
+<li><a href="#Chapter23">Block level API</a></li>
 </ol>
 <hr>
 <a name="Chapter1"></a><h2>Introduction</h2><pre>
@ -78,11 +68,7 @@

 <pre><b>unsigned ZSTD_versionNumber(void);   </b>/**< to check runtime library version */<b>
 </b></pre><BR>
-<a name="Chapter3"></a><h2>Default constant</h2><pre></pre>
-
-<a name="Chapter4"></a><h2>Constants</h2><pre></pre>
-
-<a name="Chapter5"></a><h2>Simple API</h2><pre></pre>
+<a name="Chapter3"></a><h2>Simple API</h2><pre></pre>

 <pre><b>size_t ZSTD_compress( void* dst, size_t dstCapacity,
                const void* src, size_t srcSize,
@ -152,12 +138,17 @@ const char* ZSTD_getErrorName(size_t code);     </b>/*!< provides readable strin
 int         ZSTD_minCLevel(void);               </b>/*!< minimum negative compression level allowed */<b>
 int         ZSTD_maxCLevel(void);               </b>/*!< maximum compression level available */<b>
 </pre></b><BR>
-<a name="Chapter6"></a><h2>Explicit context</h2><pre></pre>
+<a name="Chapter4"></a><h2>Explicit context</h2><pre></pre>

 <h3>Compression context</h3><pre>  When compressing many times,
-  it is recommended to allocate a context just once, and re-use it for each successive compression operation.
+  it is recommended to allocate a context just once,
+  and re-use it for each successive compression operation.
  This will make workload friendlier for system's memory.
-  Use one context per thread for parallel execution in multi-threaded environments. 
+  Note : re-using context is just a speed / resource optimization.
+         It doesn't change the compression ratio, which remains identical.
+  Note 2 : In multi-threaded environments,
+         use one different context per thread for parallel execution.
+ 
 </pre><b><pre>typedef struct ZSTD_CCtx_s ZSTD_CCtx;
 ZSTD_CCtx* ZSTD_createCCtx(void);
 size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);
@ -189,7 +180,7 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
 
 </p></pre><BR>

-<a name="Chapter7"></a><h2>Advanced compression API</h2><pre></pre>
+<a name="Chapter5"></a><h2>Advanced compression API</h2><pre></pre>

 <pre><b>typedef enum { ZSTD_fast=1,
               ZSTD_dfast=2,
@ -332,6 +323,7 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
     * ZSTD_c_forceMaxWindow
     * ZSTD_c_forceAttachDict
     * ZSTD_c_literalCompressionMode
+     * ZSTD_c_targetCBlockSize
     * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
     * note : never ever use experimentalParam? names directly;
     *        also, the enums values themselves are unstable and can still change.
@ -341,6 +333,7 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
     ZSTD_c_experimentalParam3=1000,
     ZSTD_c_experimentalParam4=1001,
     ZSTD_c_experimentalParam5=1002,
+     ZSTD_c_experimentalParam6=1003,
 } ZSTD_cParameter;
 </b></pre><BR>
 <pre><b>typedef struct {
@ -424,7 +417,7 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
 
 </p></pre><BR>

-<a name="Chapter8"></a><h2>Advanced decompression API</h2><pre></pre>
+<a name="Chapter6"></a><h2>Advanced decompression API</h2><pre></pre>

 <pre><b>typedef enum {

@ -472,7 +465,7 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
 
 </p></pre><BR>

-<a name="Chapter9"></a><h2>Streaming</h2><pre></pre>
+<a name="Chapter7"></a><h2>Streaming</h2><pre></pre>

 <pre><b>typedef struct ZSTD_inBuffer_s {
  const void* src;    </b>/**< start of input buffer */<b>
@ -486,7 +479,7 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
  size_t pos;         </b>/**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */<b>
 } ZSTD_outBuffer;
 </b></pre><BR>
-<a name="Chapter10"></a><h2>Streaming compression - HowTo</h2><pre>
+<a name="Chapter8"></a><h2>Streaming compression - HowTo</h2><pre>
  A ZSTD_CStream object is required to track streaming operation.
  Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
  ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
@ -592,31 +585,28 @@ size_t ZSTD_freeCStream(ZSTD_CStream* zcs);

 <pre><b>size_t ZSTD_CStreamInSize(void);    </b>/**< recommended size for input buffer */<b>
 </b></pre><BR>
-<pre><b>size_t ZSTD_CStreamOutSize(void);   </b>/**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */<b>
+<pre><b>size_t ZSTD_CStreamOutSize(void);   </b>/**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */<b>
 </b></pre><BR>
-<a name="Chapter11"></a><h2>This is a legacy streaming API, and can be replaced by ZSTD_CCtx_reset() and</h2><pre> ZSTD_compressStream2(). It is redundent, but is still fully supported.
- Advanced parameters and dictionary compression can only be used through the
- new API.
-<BR></pre>
-
-<a name="Chapter12"></a><h2>Equivalent to:</h2><pre>
+<pre><b>size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
+</b>/*!<b>
+ * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue).
+ * NOTE: The return value is different. ZSTD_compressStream() returns a hint for
+ * the next read size (if non-zero and not an error). ZSTD_compressStream2()
+ * returns the minimum nb of bytes left to flush (if non-zero and not an error).
+ */
+size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+</b>/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */<b>
+size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
+</b>/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */<b>
+size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
+</b><p>
     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
 
-<BR></pre>
+</p></pre><BR>

-<a name="Chapter13"></a><h2>Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue).</h2><pre> NOTE: The return value is different. ZSTD_compressStream() returns a hint for
- the next read size (if non-zero and not an error). ZSTD_compressStream2()
- returns the number of bytes left to flush (if non-zero and not an error).
- 
-<BR></pre>
-
-<a name="Chapter14"></a><h2>Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush).</h2><pre></pre>
-
-<a name="Chapter15"></a><h2>Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end).</h2><pre></pre>
-
-<a name="Chapter16"></a><h2>Streaming decompression - HowTo</h2><pre>
+<a name="Chapter9"></a><h2>Streaming decompression - HowTo</h2><pre>
  A ZSTD_DStream object is required to track streaming operations.
  Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
  ZSTD_DStream objects can be re-used multiple times.
@ -647,14 +637,12 @@ size_t ZSTD_freeCStream(ZSTD_CStream* zcs);
 <h3>ZSTD_DStream management functions</h3><pre></pre><b><pre>ZSTD_DStream* ZSTD_createDStream(void);
 size_t ZSTD_freeDStream(ZSTD_DStream* zds);
 </pre></b><BR>
-<h3>Streaming decompression functions</h3><pre></pre><b><pre>size_t ZSTD_initDStream(ZSTD_DStream* zds);
-size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
-</pre></b><BR>
+<h3>Streaming decompression functions</h3><pre></pre><b><pre></pre></b><BR>
 <pre><b>size_t ZSTD_DStreamInSize(void);    </b>/*!< recommended size for input buffer */<b>
 </b></pre><BR>
 <pre><b>size_t ZSTD_DStreamOutSize(void);   </b>/*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */<b>
 </b></pre><BR>
-<a name="Chapter17"></a><h2>Simple dictionary API</h2><pre></pre>
+<a name="Chapter10"></a><h2>Simple dictionary API</h2><pre></pre>

 <pre><b>size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
                               void* dst, size_t dstCapacity,
@ -680,7 +668,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
  Note : When `dict == NULL || dictSize < 8` no dictionary is used. 
 </p></pre><BR>

-<a name="Chapter18"></a><h2>Bulk processing dictionary API</h2><pre></pre>
+<a name="Chapter11"></a><h2>Bulk processing dictionary API</h2><pre></pre>

 <pre><b>ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
                             int compressionLevel);
@ -723,7 +711,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
  Recommended when same dictionary is used multiple times. 
 </p></pre><BR>

-<a name="Chapter19"></a><h2>Dictionary helper functions</h2><pre></pre>
+<a name="Chapter12"></a><h2>Dictionary helper functions</h2><pre></pre>

 <pre><b>unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
 </b><p>  Provides the dictID stored within dictionary.
@ -749,7 +737,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
  When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. 
 </p></pre><BR>

-<a name="Chapter20"></a><h2>Advanced dictionary and prefix API</h2><pre>
+<a name="Chapter13"></a><h2>Advanced dictionary and prefix API</h2><pre>
 This API allows dictionaries to be used with ZSTD_compress2(),
 ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and
 only reset with the context is reset with ZSTD_reset_parameters or
@ -867,15 +855,7 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
  Note that object memory usage can evolve (increase or decrease) over time. 
 </p></pre><BR>

-<a name="Chapter21"></a><h2>ADVANCED AND EXPERIMENTAL FUNCTIONS</h2><pre>
- The definitions in the following section are considered experimental.
- They are provided for advanced scenarios.
- They should never be used with a dynamic library, as prototypes may change in the future.
- Use them only in association with static linking.
- 
-<BR></pre>
-
-<a name="Chapter22"></a><h2>experimental API (static linking only)</h2><pre>
+<a name="Chapter14"></a><h2>experimental API (static linking only)</h2><pre>
 The following symbols and constants
 are not planned to join "stable API" status in the near future.
 They can still change in future versions.
@ -973,7 +953,7 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
  ZSTD_lcm_uncompressed = 2,  </b>/**< Always emit uncompressed literals. */<b>
 } ZSTD_literalCompressionMode_e;
 </b></pre><BR>
-<a name="Chapter23"></a><h2>Frame size functions</h2><pre></pre>
+<a name="Chapter15"></a><h2>Frame size functions</h2><pre></pre>

 <pre><b>unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
 </b><p>  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
@ -998,7 +978,8 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
            however it does mean that all frame data must be present and valid. 
 </p></pre><BR>

-<a name="Chapter24"></a><h2>ZSTD_decompressBound() :</h2><pre>  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
+<pre><b>unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize);
+</b><p>  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
  `srcSize` must be the _exact_ size of this series
       (i.e. there should be a frame boundary at `src + srcSize`)
  @return : - upper-bound for the decompressed size of all data in all successive frames
@ -1010,7 +991,7 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
  note 3  : when the decompressed size field isn't available, the upper-bound for that frame is calculated by:
              upper-bound = # blocks * min(128 KB, Window_Size)
 
-<BR></pre>
+</p></pre><BR>

 <pre><b>size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
 </b><p>  srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX.
@ -1018,7 +999,7 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
           or an error code (if srcSize is too small) 
 </p></pre><BR>

-<a name="Chapter25"></a><h2>Memory management</h2><pre></pre>
+<a name="Chapter16"></a><h2>Memory management</h2><pre></pre>

 <pre><b>size_t ZSTD_estimateCCtxSize(int compressionLevel);
 size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
@ -1098,7 +1079,7 @@ static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL };  </b>/**< t
 
 </p></pre><BR>

-<a name="Chapter26"></a><h2>Advanced compression functions</h2><pre></pre>
+<a name="Chapter17"></a><h2>Advanced compression functions</h2><pre></pre>

 <pre><b>ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
 </b><p>  Create a digested dictionary for compression
@ -1243,7 +1224,7 @@ size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
 
 </p></pre><BR>

-<a name="Chapter27"></a><h2>Advanced decompression functions</h2><pre></pre>
+<a name="Chapter18"></a><h2>Advanced decompression functions</h2><pre></pre>

 <pre><b>unsigned ZSTD_isFrame(const void* buffer, size_t size);
 </b><p>  Tells if the content of `buffer` starts with a valid Frame Identifier.
@ -1305,7 +1286,7 @@ size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
 
 </p></pre><BR>

-<a name="Chapter28"></a><h2>Advanced streaming functions</h2><pre>  Warning : most of these functions are now redundant with the Advanced API.
+<a name="Chapter19"></a><h2>Advanced streaming functions</h2><pre>  Warning : most of these functions are now redundant with the Advanced API.
  Once Advanced API reaches "stable" status,
  redundant functions will be deprecated, and then at some point removed.
 <BR></pre>
@ -1407,18 +1388,41 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict*
 
 </p></pre><BR>

-<h3>Advanced Streaming decompression functions</h3><pre></pre><b><pre>size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); </b>/**< note: no dictionary will be used if dict == NULL or dictSize < 8 */<b>
-size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);  </b>/**< note : ddict is referenced, it must outlive decompression session */<b>
-size_t ZSTD_resetDStream(ZSTD_DStream* zds);  </b>/**< re-use decompression parameters from previous init; saves dictionary loading */<b>
+<h3>Advanced Streaming decompression functions</h3><pre></pre><b><pre></b>/**<b>
+ * This function is deprecated, and is equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *     ZSTD_DCtx_loadDictionary(zds, dict, dictSize);
+ *
+ * note: no dictionary will be used if dict == NULL or dictSize < 8
+ */
+size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
+</b>/**<b>
+ * This function is deprecated, and is equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *     ZSTD_DCtx_refDDict(zds, ddict);
+ *
+ * note : ddict is referenced, it must outlive decompression session
+ */
+size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);
+</b>/**<b>
+ * This function is deprecated, and is equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *
+ * re-use decompression parameters from previous init; saves dictionary loading
+ */
+size_t ZSTD_resetDStream(ZSTD_DStream* zds);
 </pre></b><BR>
-<a name="Chapter29"></a><h2>Buffer-less and synchronous inner streaming functions</h2><pre>
+<a name="Chapter20"></a><h2>Buffer-less and synchronous inner streaming functions</h2><pre>
  This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
  But it's also a complex one, with several restrictions, documented below.
  Prefer normal streaming API for an easier experience.
 
 <BR></pre>

-<a name="Chapter30"></a><h2>Buffer-less streaming compression (synchronous mode)</h2><pre>
+<a name="Chapter21"></a><h2>Buffer-less streaming compression (synchronous mode)</h2><pre>
  A ZSTD_CCtx object is required to track streaming operations.
  Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
  ZSTD_CCtx object can be re-used multiple times within successive compression operations.
@ -1454,7 +1458,7 @@ size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
 size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize);   </b>/* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */<b>
 size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); </b>/**<  note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */<b>
 </pre></b><BR>
-<a name="Chapter31"></a><h2>Buffer-less streaming decompression (synchronous mode)</h2><pre>
+<a name="Chapter22"></a><h2>Buffer-less streaming decompression (synchronous mode)</h2><pre>
  A ZSTD_DCtx object is required to track streaming operations.
  Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
  A ZSTD_DCtx object can be re-used multiple times.
@ -1536,23 +1540,21 @@ typedef struct {
    unsigned checksumFlag;
 } ZSTD_frameHeader;
 </pre></b><BR>
-<a name="Chapter32"></a><h2>ZSTD_getFrameHeader() :</h2><pre>  decode Frame Header, or requires larger `srcSize`.
+<pre><b>size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize);   </b>/**< doesn't consume input */<b>
+</b>/*! ZSTD_getFrameHeader_advanced() :<b>
+ *  same as ZSTD_getFrameHeader(),
+ *  with added capability to select a format (like ZSTD_f_zstd1_magicless) */
+size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
+size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize);  </b>/**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */<b>
+</b><p>  decode Frame Header, or requires larger `srcSize`.
 @return : 0, `zfhPtr` is correctly filled,
          >0, `srcSize` is too small, value is wanted `srcSize` amount,
           or an error code, which can be tested using ZSTD_isError() 
-<BR></pre>
-
-<pre><b>size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize);   </b>/**< doesn't consume input */<b>
-</b></pre><BR>
-<pre><b>size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
-size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize);  </b>/**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */<b>
-</b><p>  same as ZSTD_getFrameHeader(),
-  with added capability to select a format (like ZSTD_f_zstd1_magicless) 
 </p></pre><BR>

 <pre><b>typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
 </b></pre><BR>
-<a name="Chapter33"></a><h2>Block level API</h2><pre></pre>
+<a name="Chapter23"></a><h2>Block level API</h2><pre></pre>

 <pre><b></b><p>    Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes).
    User will have to take in charge required information to regenerate data, such as compressed and content sizes.
--- a/sys/contrib/zstd/examples/Makefile
+++ b/sys/contrib/zstd/examples/Makefile
@ -77,7 +77,6 @@ test: all
 	@echo -- Edge cases detection
 	! ./streaming_decompression tmp    # invalid input, must fail
 	! ./simple_decompression tmp       # invalid input, must fail
-	! ./simple_decompression tmp.zst   # unknown input size, must fail
 	touch tmpNull                      # create 0-size file
 	./simple_compression tmpNull
 	./simple_decompression tmpNull.zst # 0-size frame : must work
--- a/sys/contrib/zstd/lib/Makefile
+++ b/sys/contrib/zstd/lib/Makefile
@ -17,6 +17,7 @@ LIBVER_MINOR := $(shell echo $(LIBVER_MINOR_SCRIPT))
 LIBVER_PATCH := $(shell echo $(LIBVER_PATCH_SCRIPT))
 LIBVER := $(shell echo $(LIBVER_SCRIPT))
 VERSION?= $(LIBVER)
+CCVER := $(shell $(CC) --version)

 CPPFLAGS+= -I. -I./common -DXXH_NAMESPACE=ZSTD_
 ifeq ($(OS),Windows_NT)   # MinGW assumed
@ -45,6 +46,10 @@ ZDICT_FILES := $(sort $(wildcard dictBuilder/*.c))
 ZDEPR_FILES := $(sort $(wildcard deprecated/*.c))
 ZSTD_FILES := $(ZSTDCOMMON_FILES)

+ifeq ($(findstring GCC,$(CCVER)),GCC)
+decompress/zstd_decompress_block.o :	CFLAGS+=-fno-tree-vectorize
+endif
+
 ZSTD_LEGACY_SUPPORT ?= 5
 ZSTD_LIB_COMPRESSION ?= 1
 ZSTD_LIB_DECOMPRESSION ?= 1
--- a/sys/contrib/zstd/lib/common/compiler.h
+++ b/sys/contrib/zstd/lib/common/compiler.h
@ -127,6 +127,13 @@
    }                                     \
 }

+/* vectorization */
+#if !defined(__clang__) && defined(__GNUC__)
+#  define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
+#else
+#  define DONT_VECTORIZE
+#endif
+
 /* disable warnings */
 #ifdef _MSC_VER    /* Visual Studio */
 #  include <intrin.h>                    /* For Visual 2005 */
--- a/sys/contrib/zstd/lib/common/zstd_internal.h
+++ b/sys/contrib/zstd/lib/common/zstd_internal.h
@ -34,7 +34,6 @@
 #endif
 #include "xxhash.h"                /* XXH_reset, update, digest */

-
 #if defined (__cplusplus)
 extern "C" {
 #endif
@ -193,19 +192,72 @@ static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
 *  Shared functions to include for inlining
 *********************************************/
 static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
+
 #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
+static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); }
+#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
+
+#define WILDCOPY_OVERLENGTH 8
+#define VECLEN 16
+
+typedef enum {
+    ZSTD_no_overlap,
+    ZSTD_overlap_src_before_dst,
+    /*  ZSTD_overlap_dst_before_src, */
+} ZSTD_overlap_e;

 /*! ZSTD_wildcopy() :
 *  custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */
-#define WILDCOPY_OVERLENGTH 8
-MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
+MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
+void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
 {
+    ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
    const BYTE* ip = (const BYTE*)src;
    BYTE* op = (BYTE*)dst;
    BYTE* const oend = op + length;
-    do
-        COPY8(op, ip)
-    while (op < oend);
+
+    assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
+    if (length < VECLEN || (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN)) {
+      do
+          COPY8(op, ip)
+      while (op < oend);
+    }
+    else {
+      if ((length & 8) == 0)
+        COPY8(op, ip);
+      do {
+        COPY16(op, ip);
+      }
+      while (op < oend);
+    }
+}
+
+/*! ZSTD_wildcopy_16min() :
+ *  same semantics as ZSTD_wilcopy() except guaranteed to be able to copy 16 bytes at the start */
+MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
+void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
+{
+    ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + length;
+
+    assert(length >= 8);
+    assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
+
+    if (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN) {
+      do
+          COPY8(op, ip)
+      while (op < oend);
+    }
+    else {
+      if ((length & 8) == 0)
+        COPY8(op, ip);
+      do {
+        COPY16(op, ip);
+      }
+      while (op < oend);
+    }
 }

 MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd)   /* should be faster for decoding, but strangely, not verified on all platform */
--- a/sys/contrib/zstd/lib/compress/zstd_compress.c
+++ b/sys/contrib/zstd/lib/compress/zstd_compress.c
--- a/sys/contrib/zstd/lib/compress/zstd_compress_internal.h
+++ b/sys/contrib/zstd/lib/compress/zstd_compress_internal.h
@ -33,13 +33,13 @@ extern "C" {
 ***************************************/
 #define kSearchStrength      8
 #define HASH_READ_SIZE       8
-#define ZSTD_DUBT_UNSORTED_MARK 1   /* For btlazy2 strategy, index 1 now means "unsorted".
+#define ZSTD_DUBT_UNSORTED_MARK 1   /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
                                       It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
                                       It's not a big deal though : candidate will just be sorted again.
                                       Additionally, candidate position 1 will be lost.
                                       But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
-                                       The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy
-                                       Constant required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
+                                       The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
+                                       This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */


 /*-*************************************
@ -128,21 +128,20 @@ typedef struct {
    BYTE const* base;       /* All regular indexes relative to this position */
    BYTE const* dictBase;   /* extDict indexes relative to this position */
    U32 dictLimit;          /* below that point, need extDict */
-    U32 lowLimit;           /* below that point, no more data */
+    U32 lowLimit;           /* below that point, no more valid data */
 } ZSTD_window_t;

 typedef struct ZSTD_matchState_t ZSTD_matchState_t;
 struct ZSTD_matchState_t {
    ZSTD_window_t window;   /* State for window round buffer management */
-    U32 loadedDictEnd;      /* index of end of dictionary */
+    U32 loadedDictEnd;      /* index of end of dictionary, within context's referential. When dict referential is copied into active context (i.e. not attached), effectively same value as dictSize, since referential starts from zero */
    U32 nextToUpdate;       /* index from which to continue table update */
-    U32 nextToUpdate3;      /* index from which to continue table update */
    U32 hashLog3;           /* dispatch table : larger == faster, more memory */
    U32* hashTable;
    U32* hashTable3;
    U32* chainTable;
    optState_t opt;         /* optimal parser state */
-    const ZSTD_matchState_t * dictMatchState;
+    const ZSTD_matchState_t* dictMatchState;
    ZSTD_compressionParameters cParams;
 };

@ -195,6 +194,9 @@ struct ZSTD_CCtx_params_s {
    int compressionLevel;
    int forceWindow;           /* force back-references to respect limit of
                                * 1<<wLog, even for dictionary */
+    size_t targetCBlockSize;   /* Tries to fit compressed block size to be around targetCBlockSize.
+                                * No target when targetCBlockSize == 0.
+                                * There is no guarantee on compressed block size */

    ZSTD_dictAttachPref_e attachDictPref;
    ZSTD_literalCompressionMode_e literalCompressionMode;
@ -305,6 +307,30 @@ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
    return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
 }

+/* ZSTD_cParam_withinBounds:
+ * @return 1 if value is within cParam bounds,
+ * 0 otherwise */
+MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
+{
+    ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
+    if (ZSTD_isError(bounds.error)) return 0;
+    if (value < bounds.lowerBound) return 0;
+    if (value > bounds.upperBound) return 0;
+    return 1;
+}
+
+/* ZSTD_minGain() :
+ * minimum compression required
+ * to generate a compress block or a compressed literals section.
+ * note : use same formula for both situations */
+MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
+{
+    U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
+    ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
+    assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
+    return (srcSize >> minlog) + 2;
+}
+
 /*! ZSTD_storeSeq() :
 *  Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
 *  `offsetCode` : distance to match + 3 (values 1-3 are repCodes).
@ -324,7 +350,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v
    /* copy Literals */
    assert(seqStorePtr->maxNbLit <= 128 KB);
    assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
-    ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
+    ZSTD_wildcopy(seqStorePtr->lit, literals, litLength, ZSTD_no_overlap);
    seqStorePtr->lit += litLength;

    /* literal Length */
@ -564,6 +590,9 @@ MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64
 /*-*************************************
 *  Round buffer management
 ***************************************/
+#if (ZSTD_WINDOWLOG_MAX_64 > 31)
+# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
+#endif
 /* Max current allowed */
 #define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
 /* Maximum chunk size before overflow correction needs to be called again */
@ -675,31 +704,49 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
 * Updates lowLimit so that:
 *    (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
 *
- * This allows a simple check that index >= lowLimit to see if index is valid.
- * This must be called before a block compression call, with srcEnd as the block
- * source end.
+ * It ensures index is valid as long as index >= lowLimit.
+ * This must be called before a block compression call.
 *
- * If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit.
- * This is because dictionaries are allowed to be referenced as long as the last
- * byte of the dictionary is in the window, but once they are out of range,
- * they cannot be referenced. If loadedDictEndPtr is NULL, we use
- * loadedDictEnd == 0.
+ * loadedDictEnd is only defined if a dictionary is in use for current compression.
+ * As the name implies, loadedDictEnd represents the index at end of dictionary.
+ * The value lies within context's referential, it can be directly compared to blockEndIdx.
 *
- * In normal dict mode, the dict is between lowLimit and dictLimit. In
- * dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary
- * is below them. forceWindow and dictMatchState are therefore incompatible.
+ * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
+ * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
+ * This is because dictionaries are allowed to be referenced fully
+ * as long as the last byte of the dictionary is in the window.
+ * Once input has progressed beyond window size, dictionary cannot be referenced anymore.
+ *
+ * In normal dict mode, the dictionary lies between lowLimit and dictLimit.
+ * In dictMatchState mode, lowLimit and dictLimit are the same,
+ * and the dictionary is below them.
+ * forceWindow and dictMatchState are therefore incompatible.
 */
 MEM_STATIC void
 ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
-                           void const* srcEnd,
-                           U32 maxDist,
-                           U32* loadedDictEndPtr,
+                     const void* blockEnd,
+                           U32   maxDist,
+                           U32*  loadedDictEndPtr,
                     const ZSTD_matchState_t** dictMatchStatePtr)
 {
-    U32 const blockEndIdx = (U32)((BYTE const*)srcEnd - window->base);
-    U32 loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
-    DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u",
-                (unsigned)blockEndIdx, (unsigned)maxDist);
+    U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
+    U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
+    DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
+                (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
+
+    /* - When there is no dictionary : loadedDictEnd == 0.
+         In which case, the test (blockEndIdx > maxDist) is merely to avoid
+         overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
+       - When there is a standard dictionary :
+         Index referential is copied from the dictionary,
+         which means it starts from 0.
+         In which case, loadedDictEnd == dictSize,
+         and it makes sense to compare `blockEndIdx > maxDist + dictSize`
+         since `blockEndIdx` also starts from zero.
+       - When there is an attached dictionary :
+         loadedDictEnd is expressed within the referential of the context,
+         so it can be directly compared against blockEndIdx.
+    */
    if (blockEndIdx > maxDist + loadedDictEnd) {
        U32 const newLowLimit = blockEndIdx - maxDist;
        if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
@ -708,10 +755,31 @@ ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
                        (unsigned)window->dictLimit, (unsigned)window->lowLimit);
            window->dictLimit = window->lowLimit;
        }
-        if (loadedDictEndPtr)
-            *loadedDictEndPtr = 0;
-        if (dictMatchStatePtr)
-            *dictMatchStatePtr = NULL;
+        /* On reaching window size, dictionaries are invalidated */
+        if (loadedDictEndPtr) *loadedDictEndPtr = 0;
+        if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
+    }
+}
+
+/* Similar to ZSTD_window_enforceMaxDist(),
+ * but only invalidates dictionary
+ * when input progresses beyond window size. */
+MEM_STATIC void
+ZSTD_checkDictValidity(ZSTD_window_t* window,
+                       const void* blockEnd,
+                             U32   maxDist,
+                             U32*  loadedDictEndPtr,
+                       const ZSTD_matchState_t** dictMatchStatePtr)
+{
+    U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
+    U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
+    DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
+                (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
+
+    if (loadedDictEnd && (blockEndIdx > maxDist + loadedDictEnd)) {
+        /* On reaching window size, dictionaries are invalidated */
+        if (loadedDictEndPtr) *loadedDictEndPtr = 0;
+        if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
    }
 }

--- a/sys/contrib/zstd/lib/compress/zstd_compress_literals.c
+++ b/sys/contrib/zstd/lib/compress/zstd_compress_literals.c
@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+ /*-*************************************
+ *  Dependencies
+ ***************************************/
+#include "zstd_compress_literals.h"
+
+size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    BYTE* const ostart = (BYTE* const)dst;
+    U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
+
+    RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall);
+
+    switch(flSize)
+    {
+        case 1: /* 2 - 1 - 5 */
+            ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
+            break;
+        case 2: /* 2 - 2 - 12 */
+            MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
+            break;
+        case 3: /* 2 - 2 - 20 */
+            MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
+            break;
+        default:   /* not necessary : flSize is {1,2,3} */
+            assert(0);
+    }
+
+    memcpy(ostart + flSize, src, srcSize);
+    return srcSize + flSize;
+}
+
+size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    BYTE* const ostart = (BYTE* const)dst;
+    U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
+
+    (void)dstCapacity;  /* dstCapacity already guaranteed to be >=4, hence large enough */
+
+    switch(flSize)
+    {
+        case 1: /* 2 - 1 - 5 */
+            ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
+            break;
+        case 2: /* 2 - 2 - 12 */
+            MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
+            break;
+        case 3: /* 2 - 2 - 20 */
+            MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
+            break;
+        default:   /* not necessary : flSize is {1,2,3} */
+            assert(0);
+    }
+
+    ostart[flSize] = *(const BYTE*)src;
+    return flSize+1;
+}
+
+size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
+                              ZSTD_hufCTables_t* nextHuf,
+                              ZSTD_strategy strategy, int disableLiteralCompression,
+                              void* dst, size_t dstCapacity,
+                        const void* src, size_t srcSize,
+                              void* workspace, size_t wkspSize,
+                        const int bmi2)
+{
+    size_t const minGain = ZSTD_minGain(srcSize, strategy);
+    size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
+    BYTE*  const ostart = (BYTE*)dst;
+    U32 singleStream = srcSize < 256;
+    symbolEncodingType_e hType = set_compressed;
+    size_t cLitSize;
+
+    DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i)",
+                disableLiteralCompression);
+
+    /* Prepare nextEntropy assuming reusing the existing table */
+    memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+
+    if (disableLiteralCompression)
+        return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+
+    /* small ? don't even attempt compression (speed opt) */
+#   define COMPRESS_LITERALS_SIZE_MIN 63
+    {   size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
+        if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+    }
+
+    RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
+    {   HUF_repeat repeat = prevHuf->repeatMode;
+        int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
+        if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
+        cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
+                                      workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2)
+                                : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
+                                      workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
+        if (repeat != HUF_repeat_none) {
+            /* reused the existing table */
+            hType = set_repeat;
+        }
+    }
+
+    if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
+        memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+        return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+    }
+    if (cLitSize==1) {
+        memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+        return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
+    }
+
+    if (hType == set_compressed) {
+        /* using a newly constructed table */
+        nextHuf->repeatMode = HUF_repeat_check;
+    }
+
+    /* Build header */
+    switch(lhSize)
+    {
+    case 3: /* 2 - 2 - 10 - 10 */
+        {   U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
+            MEM_writeLE24(ostart, lhc);
+            break;
+        }
+    case 4: /* 2 - 2 - 14 - 14 */
+        {   U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
+            MEM_writeLE32(ostart, lhc);
+            break;
+        }
+    case 5: /* 2 - 2 - 18 - 18 */
+        {   U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
+            MEM_writeLE32(ostart, lhc);
+            ostart[4] = (BYTE)(cLitSize >> 10);
+            break;
+        }
+    default:  /* not possible : lhSize is {3,4,5} */
+        assert(0);
+    }
+    return lhSize+cLitSize;
+}
--- a/sys/contrib/zstd/lib/compress/zstd_compress_literals.h
+++ b/sys/contrib/zstd/lib/compress/zstd_compress_literals.h
@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMPRESS_LITERALS_H
+#define ZSTD_COMPRESS_LITERALS_H
+
+#include "zstd_compress_internal.h" /* ZSTD_hufCTables_t, ZSTD_minGain() */
+
+
+size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
+                              ZSTD_hufCTables_t* nextHuf,
+                              ZSTD_strategy strategy, int disableLiteralCompression,
+                              void* dst, size_t dstCapacity,
+                        const void* src, size_t srcSize,
+                              void* workspace, size_t wkspSize,
+                        const int bmi2);
+
+#endif /* ZSTD_COMPRESS_LITERALS_H */
--- a/sys/contrib/zstd/lib/compress/zstd_compress_sequences.c
+++ b/sys/contrib/zstd/lib/compress/zstd_compress_sequences.c
@ -0,0 +1,415 @@
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+ /*-*************************************
+ *  Dependencies
+ ***************************************/
+#include "zstd_compress_sequences.h"
+
+/**
+ * -log2(x / 256) lookup table for x in [0, 256).
+ * If x == 0: Return 0
+ * Else: Return floor(-log2(x / 256) * 256)
+ */
+static unsigned const kInverseProbabilityLog256[256] = {
+    0,    2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162,
+    1130, 1100, 1073, 1047, 1024, 1001, 980,  960,  941,  923,  906,  889,
+    874,  859,  844,  830,  817,  804,  791,  779,  768,  756,  745,  734,
+    724,  714,  704,  694,  685,  676,  667,  658,  650,  642,  633,  626,
+    618,  610,  603,  595,  588,  581,  574,  567,  561,  554,  548,  542,
+    535,  529,  523,  517,  512,  506,  500,  495,  489,  484,  478,  473,
+    468,  463,  458,  453,  448,  443,  438,  434,  429,  424,  420,  415,
+    411,  407,  402,  398,  394,  390,  386,  382,  377,  373,  370,  366,
+    362,  358,  354,  350,  347,  343,  339,  336,  332,  329,  325,  322,
+    318,  315,  311,  308,  305,  302,  298,  295,  292,  289,  286,  282,
+    279,  276,  273,  270,  267,  264,  261,  258,  256,  253,  250,  247,
+    244,  241,  239,  236,  233,  230,  228,  225,  222,  220,  217,  215,
+    212,  209,  207,  204,  202,  199,  197,  194,  192,  190,  187,  185,
+    182,  180,  178,  175,  173,  171,  168,  166,  164,  162,  159,  157,
+    155,  153,  151,  149,  146,  144,  142,  140,  138,  136,  134,  132,
+    130,  128,  126,  123,  121,  119,  117,  115,  114,  112,  110,  108,
+    106,  104,  102,  100,  98,   96,   94,   93,   91,   89,   87,   85,
+    83,   82,   80,   78,   76,   74,   73,   71,   69,   67,   66,   64,
+    62,   61,   59,   57,   55,   54,   52,   50,   49,   47,   46,   44,
+    42,   41,   39,   37,   36,   34,   33,   31,   30,   28,   26,   25,
+    23,   22,   20,   19,   17,   16,   14,   13,   11,   10,   8,    7,
+    5,    4,    2,    1,
+};
+
+static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
+  void const* ptr = ctable;
+  U16 const* u16ptr = (U16 const*)ptr;
+  U32 const maxSymbolValue = MEM_read16(u16ptr + 1);
+  return maxSymbolValue;
+}
+
+/**
+ * Returns the cost in bytes of encoding the normalized count header.
+ * Returns an error if any of the helper functions return an error.
+ */
+static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
+                              size_t const nbSeq, unsigned const FSELog)
+{
+    BYTE wksp[FSE_NCOUNTBOUND];
+    S16 norm[MaxSeq + 1];
+    const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
+    FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max));
+    return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
+}
+
+/**
+ * Returns the cost in bits of encoding the distribution described by count
+ * using the entropy bound.
+ */
+static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total)
+{
+    unsigned cost = 0;
+    unsigned s;
+    for (s = 0; s <= max; ++s) {
+        unsigned norm = (unsigned)((256 * count[s]) / total);
+        if (count[s] != 0 && norm == 0)
+            norm = 1;
+        assert(count[s] < total);
+        cost += count[s] * kInverseProbabilityLog256[norm];
+    }
+    return cost >> 8;
+}
+
+/**
+ * Returns the cost in bits of encoding the distribution in count using ctable.
+ * Returns an error if ctable cannot represent all the symbols in count.
+ */
+static size_t ZSTD_fseBitCost(
+    FSE_CTable const* ctable,
+    unsigned const* count,
+    unsigned const max)
+{
+    unsigned const kAccuracyLog = 8;
+    size_t cost = 0;
+    unsigned s;
+    FSE_CState_t cstate;
+    FSE_initCState(&cstate, ctable);
+    RETURN_ERROR_IF(ZSTD_getFSEMaxSymbolValue(ctable) < max, GENERIC,
+                    "Repeat FSE_CTable has maxSymbolValue %u < %u",
+                    ZSTD_getFSEMaxSymbolValue(ctable), max);
+    for (s = 0; s <= max; ++s) {
+        unsigned const tableLog = cstate.stateLog;
+        unsigned const badCost = (tableLog + 1) << kAccuracyLog;
+        unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog);
+        if (count[s] == 0)
+            continue;
+        RETURN_ERROR_IF(bitCost >= badCost, GENERIC,
+                        "Repeat FSE_CTable has Prob[%u] == 0", s);
+        cost += count[s] * bitCost;
+    }
+    return cost >> kAccuracyLog;
+}
+
+/**
+ * Returns the cost in bits of encoding the distribution in count using the
+ * table described by norm. The max symbol support by norm is assumed >= max.
+ * norm must be valid for every symbol with non-zero probability in count.
+ */
+static size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
+                                    unsigned const* count, unsigned const max)
+{
+    unsigned const shift = 8 - accuracyLog;
+    size_t cost = 0;
+    unsigned s;
+    assert(accuracyLog <= 8);
+    for (s = 0; s <= max; ++s) {
+        unsigned const normAcc = norm[s] != -1 ? norm[s] : 1;
+        unsigned const norm256 = normAcc << shift;
+        assert(norm256 > 0);
+        assert(norm256 < 256);
+        cost += count[s] * kInverseProbabilityLog256[norm256];
+    }
+    return cost >> 8;
+}
+
+symbolEncodingType_e
+ZSTD_selectEncodingType(
+        FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
+        size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
+        FSE_CTable const* prevCTable,
+        short const* defaultNorm, U32 defaultNormLog,
+        ZSTD_defaultPolicy_e const isDefaultAllowed,
+        ZSTD_strategy const strategy)
+{
+    ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
+    if (mostFrequent == nbSeq) {
+        *repeatMode = FSE_repeat_none;
+        if (isDefaultAllowed && nbSeq <= 2) {
+            /* Prefer set_basic over set_rle when there are 2 or less symbols,
+             * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
+             * If basic encoding isn't possible, always choose RLE.
+             */
+            DEBUGLOG(5, "Selected set_basic");
+            return set_basic;
+        }
+        DEBUGLOG(5, "Selected set_rle");
+        return set_rle;
+    }
+    if (strategy < ZSTD_lazy) {
+        if (isDefaultAllowed) {
+            size_t const staticFse_nbSeq_max = 1000;
+            size_t const mult = 10 - strategy;
+            size_t const baseLog = 3;
+            size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog;  /* 28-36 for offset, 56-72 for lengths */
+            assert(defaultNormLog >= 5 && defaultNormLog <= 6);  /* xx_DEFAULTNORMLOG */
+            assert(mult <= 9 && mult >= 7);
+            if ( (*repeatMode == FSE_repeat_valid)
+              && (nbSeq < staticFse_nbSeq_max) ) {
+                DEBUGLOG(5, "Selected set_repeat");
+                return set_repeat;
+            }
+            if ( (nbSeq < dynamicFse_nbSeq_min)
+              || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) {
+                DEBUGLOG(5, "Selected set_basic");
+                /* The format allows default tables to be repeated, but it isn't useful.
+                 * When using simple heuristics to select encoding type, we don't want
+                 * to confuse these tables with dictionaries. When running more careful
+                 * analysis, we don't need to waste time checking both repeating tables
+                 * and default tables.
+                 */
+                *repeatMode = FSE_repeat_none;
+                return set_basic;
+            }
+        }
+    } else {
+        size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC);
+        size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC);
+        size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog);
+        size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq);
+
+        if (isDefaultAllowed) {
+            assert(!ZSTD_isError(basicCost));
+            assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost)));
+        }
+        assert(!ZSTD_isError(NCountCost));
+        assert(compressedCost < ERROR(maxCode));
+        DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
+                    (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost);
+        if (basicCost <= repeatCost && basicCost <= compressedCost) {
+            DEBUGLOG(5, "Selected set_basic");
+            assert(isDefaultAllowed);
+            *repeatMode = FSE_repeat_none;
+            return set_basic;
+        }
+        if (repeatCost <= compressedCost) {
+            DEBUGLOG(5, "Selected set_repeat");
+            assert(!ZSTD_isError(repeatCost));
+            return set_repeat;
+        }
+        assert(compressedCost < basicCost && compressedCost < repeatCost);
+    }
+    DEBUGLOG(5, "Selected set_compressed");
+    *repeatMode = FSE_repeat_check;
+    return set_compressed;
+}
+
+size_t
+ZSTD_buildCTable(void* dst, size_t dstCapacity,
+                FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
+                unsigned* count, U32 max,
+                const BYTE* codeTable, size_t nbSeq,
+                const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
+                const FSE_CTable* prevCTable, size_t prevCTableSize,
+                void* workspace, size_t workspaceSize)
+{
+    BYTE* op = (BYTE*)dst;
+    const BYTE* const oend = op + dstCapacity;
+    DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity);
+
+    switch (type) {
+    case set_rle:
+        FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max));
+        RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall);
+        *op = codeTable[0];
+        return 1;
+    case set_repeat:
+        memcpy(nextCTable, prevCTable, prevCTableSize);
+        return 0;
+    case set_basic:
+        FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize));  /* note : could be pre-calculated */
+        return 0;
+    case set_compressed: {
+        S16 norm[MaxSeq + 1];
+        size_t nbSeq_1 = nbSeq;
+        const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
+        if (count[codeTable[nbSeq-1]] > 1) {
+            count[codeTable[nbSeq-1]]--;
+            nbSeq_1--;
+        }
+        assert(nbSeq_1 > 1);
+        FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max));
+        {   size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog);   /* overflow protected */
+            FORWARD_IF_ERROR(NCountSize);
+            FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, workspace, workspaceSize));
+            return NCountSize;
+        }
+    }
+    default: assert(0); RETURN_ERROR(GENERIC);
+    }
+}
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_encodeSequences_body(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets)
+{
+    BIT_CStream_t blockStream;
+    FSE_CState_t  stateMatchLength;
+    FSE_CState_t  stateOffsetBits;
+    FSE_CState_t  stateLitLength;
+
+    RETURN_ERROR_IF(
+        ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)),
+        dstSize_tooSmall, "not enough space remaining");
+    DEBUGLOG(6, "available space for bitstream : %i  (dstCapacity=%u)",
+                (int)(blockStream.endPtr - blockStream.startPtr),
+                (unsigned)dstCapacity);
+
+    /* first symbols */
+    FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
+    FSE_initCState2(&stateOffsetBits,  CTable_OffsetBits,  ofCodeTable[nbSeq-1]);
+    FSE_initCState2(&stateLitLength,   CTable_LitLength,   llCodeTable[nbSeq-1]);
+    BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
+    if (MEM_32bits()) BIT_flushBits(&blockStream);
+    BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
+    if (MEM_32bits()) BIT_flushBits(&blockStream);
+    if (longOffsets) {
+        U32 const ofBits = ofCodeTable[nbSeq-1];
+        int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
+        if (extraBits) {
+            BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
+            BIT_flushBits(&blockStream);
+        }
+        BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
+                    ofBits - extraBits);
+    } else {
+        BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
+    }
+    BIT_flushBits(&blockStream);
+
+    {   size_t n;
+        for (n=nbSeq-2 ; n<nbSeq ; n--) {      /* intentional underflow */
+            BYTE const llCode = llCodeTable[n];
+            BYTE const ofCode = ofCodeTable[n];
+            BYTE const mlCode = mlCodeTable[n];
+            U32  const llBits = LL_bits[llCode];
+            U32  const ofBits = ofCode;
+            U32  const mlBits = ML_bits[mlCode];
+            DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
+                        (unsigned)sequences[n].litLength,
+                        (unsigned)sequences[n].matchLength + MINMATCH,
+                        (unsigned)sequences[n].offset);
+                                                                            /* 32b*/  /* 64b*/
+                                                                            /* (7)*/  /* (7)*/
+            FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode);       /* 15 */  /* 15 */
+            FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode);      /* 24 */  /* 24 */
+            if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
+            FSE_encodeSymbol(&blockStream, &stateLitLength, llCode);        /* 16 */  /* 33 */
+            if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
+                BIT_flushBits(&blockStream);                                /* (7)*/
+            BIT_addBits(&blockStream, sequences[n].litLength, llBits);
+            if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
+            BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
+            if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
+            if (longOffsets) {
+                int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
+                if (extraBits) {
+                    BIT_addBits(&blockStream, sequences[n].offset, extraBits);
+                    BIT_flushBits(&blockStream);                            /* (7)*/
+                }
+                BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
+                            ofBits - extraBits);                            /* 31 */
+            } else {
+                BIT_addBits(&blockStream, sequences[n].offset, ofBits);     /* 31 */
+            }
+            BIT_flushBits(&blockStream);                                    /* (7)*/
+            DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
+    }   }
+
+    DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog);
+    FSE_flushCState(&blockStream, &stateMatchLength);
+    DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog);
+    FSE_flushCState(&blockStream, &stateOffsetBits);
+    DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog);
+    FSE_flushCState(&blockStream, &stateLitLength);
+
+    {   size_t const streamSize = BIT_closeCStream(&blockStream);
+        RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space");
+        return streamSize;
+    }
+}
+
+static size_t
+ZSTD_encodeSequences_default(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets)
+{
+    return ZSTD_encodeSequences_body(dst, dstCapacity,
+                                    CTable_MatchLength, mlCodeTable,
+                                    CTable_OffsetBits, ofCodeTable,
+                                    CTable_LitLength, llCodeTable,
+                                    sequences, nbSeq, longOffsets);
+}
+
+
+#if DYNAMIC_BMI2
+
+static TARGET_ATTRIBUTE("bmi2") size_t
+ZSTD_encodeSequences_bmi2(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets)
+{
+    return ZSTD_encodeSequences_body(dst, dstCapacity,
+                                    CTable_MatchLength, mlCodeTable,
+                                    CTable_OffsetBits, ofCodeTable,
+                                    CTable_LitLength, llCodeTable,
+                                    sequences, nbSeq, longOffsets);
+}
+
+#endif
+
+size_t ZSTD_encodeSequences(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
+{
+    DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity);
+#if DYNAMIC_BMI2
+    if (bmi2) {
+        return ZSTD_encodeSequences_bmi2(dst, dstCapacity,
+                                         CTable_MatchLength, mlCodeTable,
+                                         CTable_OffsetBits, ofCodeTable,
+                                         CTable_LitLength, llCodeTable,
+                                         sequences, nbSeq, longOffsets);
+    }
+#endif
+    (void)bmi2;
+    return ZSTD_encodeSequences_default(dst, dstCapacity,
+                                        CTable_MatchLength, mlCodeTable,
+                                        CTable_OffsetBits, ofCodeTable,
+                                        CTable_LitLength, llCodeTable,
+                                        sequences, nbSeq, longOffsets);
+}
--- a/sys/contrib/zstd/lib/compress/zstd_compress_sequences.h
+++ b/sys/contrib/zstd/lib/compress/zstd_compress_sequences.h
@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMPRESS_SEQUENCES_H
+#define ZSTD_COMPRESS_SEQUENCES_H
+
+#include "fse.h" /* FSE_repeat, FSE_CTable */
+#include "zstd_internal.h" /* symbolEncodingType_e, ZSTD_strategy */
+
+typedef enum {
+    ZSTD_defaultDisallowed = 0,
+    ZSTD_defaultAllowed = 1
+} ZSTD_defaultPolicy_e;
+
+symbolEncodingType_e
+ZSTD_selectEncodingType(
+        FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
+        size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
+        FSE_CTable const* prevCTable,
+        short const* defaultNorm, U32 defaultNormLog,
+        ZSTD_defaultPolicy_e const isDefaultAllowed,
+        ZSTD_strategy const strategy);
+
+size_t
+ZSTD_buildCTable(void* dst, size_t dstCapacity,
+                FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
+                unsigned* count, U32 max,
+                const BYTE* codeTable, size_t nbSeq,
+                const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
+                const FSE_CTable* prevCTable, size_t prevCTableSize,
+                void* workspace, size_t workspaceSize);
+
+size_t ZSTD_encodeSequences(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2);
+
+#endif /* ZSTD_COMPRESS_SEQUENCES_H */
--- a/sys/contrib/zstd/lib/compress/zstd_double_fast.c
+++ b/sys/contrib/zstd/lib/compress/zstd_double_fast.c
@ -43,8 +43,7 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
            /* Only load extra positions for ZSTD_dtlm_full */
            if (dtlm == ZSTD_dtlm_fast)
                break;
-        }
-    }
+    }   }
 }


@ -63,7 +62,10 @@ size_t ZSTD_compressBlock_doubleFast_generic(
    const BYTE* const istart = (const BYTE*)src;
    const BYTE* ip = istart;
    const BYTE* anchor = istart;
-    const U32 prefixLowestIndex = ms->window.dictLimit;
+    const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
+    const U32 lowestValid = ms->window.dictLimit;
+    const U32 maxDistance = 1U << cParams->windowLog;
+    const U32 prefixLowestIndex = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid;
    const BYTE* const prefixLowest = base + prefixLowestIndex;
    const BYTE* const iend = istart + srcSize;
    const BYTE* const ilimit = iend - HASH_READ_SIZE;
@ -95,8 +97,15 @@ size_t ZSTD_compressBlock_doubleFast_generic(
                                     dictCParams->chainLog : hBitsS;
    const U32 dictAndPrefixLength  = (U32)(ip - prefixLowest + dictEnd - dictStart);

+    DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
+
    assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);

+    /* if a dictionary is attached, it must be within window range */
+    if (dictMode == ZSTD_dictMatchState) {
+        assert(lowestValid + maxDistance >= endIndex);
+    }
+
    /* init */
    ip += (dictAndPrefixLength == 0);
    if (dictMode == ZSTD_noDict) {
@ -138,7 +147,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
            const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
            ip++;
-            ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
            goto _match_stored;
        }

@ -147,7 +156,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
          && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
            mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
            ip++;
-            ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
            goto _match_stored;
        }

@ -170,8 +179,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
                offset = (U32)(current - dictMatchIndexL - dictIndexDelta);
                while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
                goto _match_found;
-            }
-        }
+        }   }

        if (matchIndexS > prefixLowestIndex) {
            /* check prefix short match */
@ -186,16 +194,14 @@ size_t ZSTD_compressBlock_doubleFast_generic(

            if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {
                goto _search_next_long;
-            }
-        }
+        }   }

        ip += ((ip-anchor) >> kSearchStrength) + 1;
        continue;

 _search_next_long:

-        {
-            size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
+        {   size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
            size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
            U32 const matchIndexL3 = hashLong[hl3];
            const BYTE* matchL3 = base + matchIndexL3;
@ -221,9 +227,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
                    offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta);
                    while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
                    goto _match_found;
-                }
-            }
-        }
+        }   }   }

        /* if no long +1 match, explore the short match we found */
        if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
@ -242,7 +246,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
        offset_2 = offset_1;
        offset_1 = offset;

-        ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);

 _match_stored:
        /* match found */
@ -250,11 +254,14 @@ size_t ZSTD_compressBlock_doubleFast_generic(
        anchor = ip;

        if (ip <= ilimit) {
-            /* Fill Table */
-            hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] =
-                hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2;  /* here because current+2 could be > iend-8 */
-            hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] =
-                hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
+            /* Complementary insertion */
+            /* done after iLimit test, as candidates could be > iend-8 */
+            {   U32 const indexToInsert = current+2;
+                hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
+                hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
+                hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
+                hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
+            }

            /* check immediate repcode */
            if (dictMode == ZSTD_dictMatchState) {
@ -278,8 +285,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
                        continue;
                    }
                    break;
-                }
-            }
+            }   }

            if (dictMode == ZSTD_noDict) {
                while ( (ip <= ilimit)
@ -294,14 +300,15 @@ size_t ZSTD_compressBlock_doubleFast_generic(
                    ip += rLength;
                    anchor = ip;
                    continue;   /* faster when present ... (?) */
-    }   }   }   }
+        }   }   }
+    }   /* while (ip < ilimit) */

    /* save reps for next block */
    rep[0] = offset_1 ? offset_1 : offsetSaved;
    rep[1] = offset_2 ? offset_2 : offsetSaved;

    /* Return the last literals size */
-    return iend - anchor;
+    return (size_t)(iend - anchor);
 }


@ -360,10 +367,15 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
    const BYTE* anchor = istart;
    const BYTE* const iend = istart + srcSize;
    const BYTE* const ilimit = iend - 8;
-    const U32   prefixStartIndex = ms->window.dictLimit;
    const BYTE* const base = ms->window.base;
+    const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
+    const U32   maxDistance = 1U << cParams->windowLog;
+    const U32   lowestValid = ms->window.lowLimit;
+    const U32   lowLimit = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid;
+    const U32   dictStartIndex = lowLimit;
+    const U32   dictLimit = ms->window.dictLimit;
+    const U32   prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
    const BYTE* const prefixStart = base + prefixStartIndex;
-    const U32   dictStartIndex = ms->window.lowLimit;
    const BYTE* const dictBase = ms->window.dictBase;
    const BYTE* const dictStart = dictBase + dictStartIndex;
    const BYTE* const dictEnd = dictBase + prefixStartIndex;
@ -371,6 +383,10 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(

    DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);

+    /* if extDict is invalidated due to maxDistance, switch to "regular" variant */
+    if (prefixStartIndex == dictStartIndex)
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict);
+
    /* Search Loop */
    while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
        const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
@ -396,7 +412,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
            const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
            ip++;
-            ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
        } else {
            if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
                const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
@ -407,7 +423,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
                while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; }   /* catch up */
                offset_2 = offset_1;
                offset_1 = offset;
-                ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);

            } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
                size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
@ -432,23 +448,27 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
                }
                offset_2 = offset_1;
                offset_1 = offset;
-                ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);

            } else {
                ip += ((ip-anchor) >> kSearchStrength) + 1;
                continue;
        }   }

-        /* found a match : store it */
+        /* move to next sequence start */
        ip += mLength;
        anchor = ip;

        if (ip <= ilimit) {
-            /* Fill Table */
-            hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2;
-            hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2;
-            hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
-            hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
+            /* Complementary insertion */
+            /* done after iLimit test, as candidates could be > iend-8 */
+            {   U32 const indexToInsert = current+2;
+                hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
+                hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
+                hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
+                hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
+            }
+
            /* check immediate repcode */
            while (ip <= ilimit) {
                U32 const current2 = (U32)(ip-base);
@ -475,7 +495,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
    rep[1] = offset_2;

    /* Return the last literals size */
-    return iend - anchor;
+    return (size_t)(iend - anchor);
 }


--- a/sys/contrib/zstd/lib/compress/zstd_fast.c
+++ b/sys/contrib/zstd/lib/compress/zstd_fast.c
@ -13,7 +13,8 @@


 void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
-                        void const* end, ZSTD_dictTableLoadMethod_e dtlm)
+                        const void* const end,
+                        ZSTD_dictTableLoadMethod_e dtlm)
 {
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
    U32* const hashTable = ms->hashTable;
@ -41,6 +42,7 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
    }   }   }   }
 }

+
 FORCE_INLINE_TEMPLATE
 size_t ZSTD_compressBlock_fast_generic(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@ -58,7 +60,10 @@ size_t ZSTD_compressBlock_fast_generic(
    const BYTE* ip0 = istart;
    const BYTE* ip1;
    const BYTE* anchor = istart;
-    const U32   prefixStartIndex = ms->window.dictLimit;
+    const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
+    const U32   maxDistance = 1U << cParams->windowLog;
+    const U32   validStartIndex = ms->window.dictLimit;
+    const U32   prefixStartIndex = (endIndex - validStartIndex > maxDistance) ? endIndex - maxDistance : validStartIndex;
    const BYTE* const prefixStart = base + prefixStartIndex;
    const BYTE* const iend = istart + srcSize;
    const BYTE* const ilimit = iend - HASH_READ_SIZE;
@ -165,7 +170,7 @@ size_t ZSTD_compressBlock_fast_generic(
    rep[1] = offset_2 ? offset_2 : offsetSaved;

    /* Return the last literals size */
-    return iend - anchor;
+    return (size_t)(iend - anchor);
 }


@ -222,8 +227,15 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
    const U32 dictAndPrefixLength  = (U32)(ip - prefixStart + dictEnd - dictStart);
    const U32 dictHLog             = dictCParams->hashLog;

-    /* otherwise, we would get index underflow when translating a dict index
-     * into a local index */
+    /* if a dictionary is still attached, it necessarily means that
+     * it is within window size. So we just check it. */
+    const U32 maxDistance = 1U << cParams->windowLog;
+    const U32 endIndex = (U32)((size_t)(ip - base) + srcSize);
+    assert(endIndex - prefixStartIndex <= maxDistance);
+    (void)maxDistance; (void)endIndex;   /* these variables are not used when assert() is disabled */
+
+    /* ensure there will be no no underflow
+     * when translating a dict index into a local index */
    assert(prefixStartIndex >= (U32)(dictEnd - dictBase));

    /* init */
@ -251,7 +263,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
            const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
            ip++;
-            ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
        } else if ( (matchIndex <= prefixStartIndex) ) {
            size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
            U32 const dictMatchIndex = dictHashTable[dictHash];
@ -271,7 +283,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
                } /* catch up */
                offset_2 = offset_1;
                offset_1 = offset;
-                ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
            }
        } else if (MEM_read32(match) != MEM_read32(ip)) {
            /* it's not a match, and we're not going to check the dictionary */
@ -286,7 +298,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
                 && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
            offset_2 = offset_1;
            offset_1 = offset;
-            ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
        }

        /* match found */
@ -327,7 +339,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
    rep[1] = offset_2 ? offset_2 : offsetSaved;

    /* Return the last literals size */
-    return iend - anchor;
+    return (size_t)(iend - anchor);
 }

 size_t ZSTD_compressBlock_fast_dictMatchState(
@ -366,15 +378,24 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
    const BYTE* const istart = (const BYTE*)src;
    const BYTE* ip = istart;
    const BYTE* anchor = istart;
-    const U32   dictStartIndex = ms->window.lowLimit;
+    const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
+    const U32   maxDistance = 1U << cParams->windowLog;
+    const U32   validLow = ms->window.lowLimit;
+    const U32   lowLimit = (endIndex - validLow > maxDistance) ? endIndex - maxDistance : validLow;
+    const U32   dictStartIndex = lowLimit;
    const BYTE* const dictStart = dictBase + dictStartIndex;
-    const U32   prefixStartIndex = ms->window.dictLimit;
+    const U32   dictLimit = ms->window.dictLimit;
+    const U32   prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit;
    const BYTE* const prefixStart = base + prefixStartIndex;
    const BYTE* const dictEnd = dictBase + prefixStartIndex;
    const BYTE* const iend = istart + srcSize;
    const BYTE* const ilimit = iend - 8;
    U32 offset_1=rep[0], offset_2=rep[1];

+    /* switch to "regular" variant if extDict is invalidated due to maxDistance */
+    if (prefixStartIndex == dictStartIndex)
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
+
    /* Search Loop */
    while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
        const size_t h = ZSTD_hashPtr(ip, hlog, mls);
@ -394,7 +415,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
            const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
            ip++;
-            ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
        } else {
            if ( (matchIndex < dictStartIndex) ||
                 (MEM_read32(match) != MEM_read32(ip)) ) {
@ -410,7 +431,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
                offset = current - matchIndex;
                offset_2 = offset_1;
                offset_1 = offset;
-                ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
        }   }

        /* found a match : store it */
@ -445,7 +466,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
    rep[1] = offset_2;

    /* Return the last literals size */
-    return iend - anchor;
+    return (size_t)(iend - anchor);
 }


--- a/sys/contrib/zstd/lib/compress/zstd_lazy.c
+++ b/sys/contrib/zstd/lib/compress/zstd_lazy.c
@ -83,7 +83,10 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
    U32* largerPtr  = smallerPtr + 1;
    U32 matchIndex = *smallerPtr;   /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
    U32 dummy32;   /* to be nullified at the end */
-    U32 const windowLow = ms->window.lowLimit;
+    U32 const windowValid = ms->window.lowLimit;
+    U32 const maxDistance = 1U << cParams->windowLog;
+    U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
+

    DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
                current, dictLimit, windowLow);
@ -239,7 +242,9 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,

    const BYTE* const base = ms->window.base;
    U32    const current = (U32)(ip-base);
-    U32    const windowLow = ms->window.lowLimit;
+    U32    const maxDistance = 1U << cParams->windowLog;
+    U32    const windowValid = ms->window.lowLimit;
+    U32    const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;

    U32*   const bt = ms->chainTable;
    U32    const btLog  = cParams->chainLog - 1;
@ -490,8 +495,10 @@ size_t ZSTD_HcFindBestMatch_generic (
    const U32 dictLimit = ms->window.dictLimit;
    const BYTE* const prefixStart = base + dictLimit;
    const BYTE* const dictEnd = dictBase + dictLimit;
-    const U32 lowLimit = ms->window.lowLimit;
    const U32 current = (U32)(ip-base);
+    const U32 maxDistance = 1U << cParams->windowLog;
+    const U32 lowValid = ms->window.lowLimit;
+    const U32 lowLimit = (current - lowValid > maxDistance) ? current - maxDistance : lowValid;
    const U32 minChain = current > chainSize ? current - chainSize : 0;
    U32 nbAttempts = 1U << cParams->searchLog;
    size_t ml=4-1;
@ -653,7 +660,6 @@ size_t ZSTD_compressBlock_lazy_generic(

    /* init */
    ip += (dictAndPrefixLength == 0);
-    ms->nextToUpdate3 = ms->nextToUpdate;
    if (dictMode == ZSTD_noDict) {
        U32 const maxRep = (U32)(ip - prefixLowest);
        if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
@ -933,7 +939,6 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
    U32 offset_1 = rep[0], offset_2 = rep[1];

    /* init */
-    ms->nextToUpdate3 = ms->nextToUpdate;
    ip += (ip == prefixStart);

    /* Match Loop */
--- a/sys/contrib/zstd/lib/compress/zstd_ldm.c
+++ b/sys/contrib/zstd/lib/compress/zstd_ldm.c
@ -447,7 +447,7 @@ size_t ZSTD_ldm_generateSequences(
        if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
            U32 const ldmHSize = 1U << params->hashLog;
            U32 const correction = ZSTD_window_correctOverflow(
-                &ldmState->window, /* cycleLog */ 0, maxDist, src);
+                &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
            ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
        }
        /* 2. We enforce the maximum offset allowed.
--- a/sys/contrib/zstd/lib/compress/zstd_opt.c
+++ b/sys/contrib/zstd/lib/compress/zstd_opt.c
@ -255,13 +255,13 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
 * to provide a cost which is directly comparable to a match ending at same position */
 static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel)
 {
-    if (optPtr->priceType >= zop_predef) return WEIGHT(litLength, optLevel);
+    if (optPtr->priceType >= zop_predef) return (int)WEIGHT(litLength, optLevel);

    /* dynamic statistics */
    {   U32 const llCode = ZSTD_LLcode(litLength);
-        int const contribution = (LL_bits[llCode] * BITCOST_MULTIPLIER)
-                               + WEIGHT(optPtr->litLengthFreq[0], optLevel)   /* note: log2litLengthSum cancel out */
-                               - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
+        int const contribution = (int)(LL_bits[llCode] * BITCOST_MULTIPLIER)
+                               + (int)WEIGHT(optPtr->litLengthFreq[0], optLevel)   /* note: log2litLengthSum cancel out */
+                               - (int)WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
 #if 1
        return contribution;
 #else
@ -278,7 +278,7 @@ static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLe
                                     const optState_t* const optPtr,
                                     int optLevel)
 {
-    int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel)
+    int const contribution = (int)ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel)
                           + ZSTD_litLengthContribution(litLength, optPtr, optLevel);
    return contribution;
 }
@ -372,13 +372,15 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)

 /* Update hashTable3 up to ip (excluded)
   Assumption : always within prefix (i.e. not within extDict) */
-static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, const BYTE* const ip)
+static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
+                                              U32* nextToUpdate3,
+                                              const BYTE* const ip)
 {
    U32* const hashTable3 = ms->hashTable3;
    U32 const hashLog3 = ms->hashLog3;
    const BYTE* const base = ms->window.base;
-    U32 idx = ms->nextToUpdate3;
-    U32 const target = ms->nextToUpdate3 = (U32)(ip - base);
+    U32 idx = *nextToUpdate3;
+    U32 const target = (U32)(ip - base);
    size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
    assert(hashLog3 > 0);

@ -387,6 +389,7 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, const BYTE*
        idx++;
    }

+    *nextToUpdate3 = target;
    return hashTable3[hash3];
 }

@ -503,9 +506,11 @@ static U32 ZSTD_insertBt1(
    }   }

    *smallerPtr = *largerPtr = 0;
-    if (bestLength > 384) return MIN(192, (U32)(bestLength - 384));   /* speed optimization */
-    assert(matchEndIdx > current + 8);
-    return matchEndIdx - (current + 8);
+    {   U32 positions = 0;
+        if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384));   /* speed optimization */
+        assert(matchEndIdx > current + 8);
+        return MAX(positions, matchEndIdx - (current + 8));
+    }
 }

 FORCE_INLINE_TEMPLATE
@ -520,8 +525,13 @@ void ZSTD_updateTree_internal(
    DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
                idx, target, dictMode);

-    while(idx < target)
-        idx += ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
+    while(idx < target) {
+        U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
+        assert(idx < (U32)(idx + forward));
+        idx += forward;
+    }
+    assert((size_t)(ip - base) <= (size_t)(U32)(-1));
+    assert((size_t)(iend - base) <= (size_t)(U32)(-1));
    ms->nextToUpdate = target;
 }

@ -531,16 +541,18 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {

 FORCE_INLINE_TEMPLATE
 U32 ZSTD_insertBtAndGetAllMatches (
+                    ZSTD_match_t* matches,   /* store result (found matches) in this table (presumed large enough) */
                    ZSTD_matchState_t* ms,
+                    U32* nextToUpdate3,
                    const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
-                    U32 rep[ZSTD_REP_NUM],
+                    const U32 rep[ZSTD_REP_NUM],
                    U32 const ll0,   /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
-                    ZSTD_match_t* matches,
                    const U32 lengthToBeat,
                    U32 const mls /* template */)
 {
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
    U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
+    U32 const maxDistance = 1U << cParams->windowLog;
    const BYTE* const base = ms->window.base;
    U32 const current = (U32)(ip-base);
    U32 const hashLog = cParams->hashLog;
@ -556,8 +568,9 @@ U32 ZSTD_insertBtAndGetAllMatches (
    U32 const dictLimit = ms->window.dictLimit;
    const BYTE* const dictEnd = dictBase + dictLimit;
    const BYTE* const prefixStart = base + dictLimit;
-    U32 const btLow = btMask >= current ? 0 : current - btMask;
-    U32 const windowLow = ms->window.lowLimit;
+    U32 const btLow = (btMask >= current) ? 0 : current - btMask;
+    U32 const windowValid = ms->window.lowLimit;
+    U32 const windowLow = ((current - windowValid) > maxDistance) ? current - maxDistance : windowValid;
    U32 const matchLow = windowLow ? windowLow : 1;
    U32* smallerPtr = bt + 2*(current&btMask);
    U32* largerPtr  = bt + 2*(current&btMask) + 1;
@ -627,7 +640,7 @@ U32 ZSTD_insertBtAndGetAllMatches (

    /* HC3 match finder */
    if ((mls == 3) /*static*/ && (bestLength < mls)) {
-        U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, ip);
+        U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
        if ((matchIndex3 >= matchLow)
          & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
            size_t mlen;
@ -653,9 +666,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
                     (ip+mlen == iLimit) ) {  /* best possible length */
                    ms->nextToUpdate = current+1;  /* skip insertion */
                    return 1;
-                }
-            }
-        }
+        }   }   }
        /* no dictMatchState lookup: dicts don't have a populated HC3 table */
    }

@ -760,10 +771,13 @@ U32 ZSTD_insertBtAndGetAllMatches (


 FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
+                        ZSTD_match_t* matches,   /* store result (match found, increasing size) in this table */
                        ZSTD_matchState_t* ms,
+                        U32* nextToUpdate3,
                        const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
-                        U32 rep[ZSTD_REP_NUM], U32 const ll0,
-                        ZSTD_match_t* matches, U32 const lengthToBeat)
+                        const U32 rep[ZSTD_REP_NUM],
+                        U32 const ll0,
+                        U32 const lengthToBeat)
 {
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
    U32 const matchLengthSearch = cParams->minMatch;
@ -772,12 +786,12 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
    ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
    switch(matchLengthSearch)
    {
-    case 3 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 3);
+    case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3);
    default :
-    case 4 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 4);
-    case 5 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 5);
+    case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4);
+    case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5);
    case 7 :
-    case 6 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 6);
+    case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6);
    }
 }

@ -853,6 +867,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,

    U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
    U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
+    U32 nextToUpdate3 = ms->nextToUpdate;

    ZSTD_optimal_t* const opt = optStatePtr->priceTable;
    ZSTD_match_t* const matches = optStatePtr->matchTable;
@ -862,7 +877,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
    DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
                (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
    assert(optLevel <= 2);
-    ms->nextToUpdate3 = ms->nextToUpdate;
    ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
    ip += (ip==prefixStart);

@ -873,7 +887,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
        /* find first match */
        {   U32 const litlen = (U32)(ip - anchor);
            U32 const ll0 = !litlen;
-            U32 const nbMatches = ZSTD_BtGetAllMatches(ms, ip, iend, dictMode, rep, ll0, matches, minMatch);
+            U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
            if (!nbMatches) { ip++; continue; }

            /* initialize opt[0] */
@ -970,7 +984,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
                U32 const previousPrice = opt[cur].price;
                U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
-                U32 const nbMatches = ZSTD_BtGetAllMatches(ms, inr, iend, dictMode, opt[cur].rep, ll0, matches, minMatch);
+                U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
                U32 matchNb;
                if (!nbMatches) {
                    DEBUGLOG(7, "rPos:%u : no match found", cur);
@ -1094,7 +1108,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
    }   /* while (ip < ilimit) */

    /* Return the last literals size */
-    return iend - anchor;
+    return (size_t)(iend - anchor);
 }


@ -1158,7 +1172,6 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
    ms->window.dictLimit += (U32)srcSize;
    ms->window.lowLimit = ms->window.dictLimit;
    ms->nextToUpdate = ms->window.dictLimit;
-    ms->nextToUpdate3 = ms->window.dictLimit;

    /* re-inforce weight of collected statistics */
    ZSTD_upscaleStats(&ms->opt);
--- a/sys/contrib/zstd/lib/compress/zstdmt_compress.c
+++ b/sys/contrib/zstd/lib/compress/zstdmt_compress.c
@ -1129,9 +1129,14 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
            size_t const produced = ZSTD_isError(cResult) ? 0 : cResult;
            size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed;
            assert(flushed <= produced);
+            assert(jobPtr->consumed <= jobPtr->src.size);
            toFlush = produced - flushed;
-            if (toFlush==0 && (jobPtr->consumed >= jobPtr->src.size)) {
-                /* doneJobID is not-fully-flushed, but toFlush==0 : doneJobID should be compressing some more data */
+            /* if toFlush==0, nothing is available to flush.
+             * However, jobID is expected to still be active:
+             * if jobID was already completed and fully flushed,
+             * ZSTDMT_flushProduced() should have already moved onto next job.
+             * Therefore, some input has not yet been consumed. */
+            if (toFlush==0) {
                assert(jobPtr->consumed < jobPtr->src.size);
            }
        }
@ -1148,12 +1153,16 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)

 static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
 {
-    if (params.ldmParams.enableLdm)
+    unsigned jobLog;
+    if (params.ldmParams.enableLdm) {
        /* In Long Range Mode, the windowLog is typically oversized.
         * In which case, it's preferable to determine the jobSize
         * based on chainLog instead. */
-        return MAX(21, params.cParams.chainLog + 4);
-    return MAX(20, params.cParams.windowLog + 2);
+        jobLog = MAX(21, params.cParams.chainLog + 4);
+    } else {
+        jobLog = MAX(20, params.cParams.windowLog + 2);
+    }
+    return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX);
 }

 static int ZSTDMT_overlapLog_default(ZSTD_strategy strat)
@ -1197,7 +1206,7 @@ static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params const params)
        ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
                - overlapRLog;
    }
-    assert(0 <= ovLog && ovLog <= 30);
+    assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX);
    DEBUGLOG(4, "overlapLog : %i", params.overlapLog);
    DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
    return (ovLog==0) ? 0 : (size_t)1 << ovLog;
@ -1391,7 +1400,7 @@ size_t ZSTDMT_initCStream_internal(
        FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) );

    if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
-    if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
+    if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX;

    mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN);  /* do not trigger multi-threading when srcSize is too small */
    if (mtctx->singleBlockingThread) {
@ -1432,6 +1441,8 @@ size_t ZSTDMT_initCStream_internal(
    if (mtctx->targetSectionSize == 0) {
        mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
    }
+    assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX);
+
    if (params.rsyncable) {
        /* Aim for the targetsectionSize as the average job size. */
        U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);
--- a/sys/contrib/zstd/lib/compress/zstdmt_compress.h
+++ b/sys/contrib/zstd/lib/compress/zstdmt_compress.h
@ -50,6 +50,7 @@
 #ifndef ZSTDMT_JOBSIZE_MIN
 #  define ZSTDMT_JOBSIZE_MIN (1 MB)
 #endif
+#define ZSTDMT_JOBLOG_MAX   (MEM_32bits() ? 29 : 30)
 #define ZSTDMT_JOBSIZE_MAX  (MEM_32bits() ? (512 MB) : (1024 MB))


--- a/sys/contrib/zstd/lib/decompress/zstd_decompress.c
+++ b/sys/contrib/zstd/lib/decompress/zstd_decompress.c
@ -360,8 +360,11 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize)
    sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
    RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
                    frameParameter_unsupported);
-
-    return skippableHeaderSize + sizeU32;
+    {
+        size_t const skippableSize = skippableHeaderSize + sizeU32;
+        RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong);
+        return skippableSize;
+    }
 }

 /** ZSTD_findDecompressedSize() :
@ -378,11 +381,10 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)

        if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
            size_t const skippableSize = readSkippableFrameSize(src, srcSize);
-            if (ZSTD_isError(skippableSize))
-                return skippableSize;
-            if (srcSize < skippableSize) {
+            if (ZSTD_isError(skippableSize)) {
                return ZSTD_CONTENTSIZE_ERROR;
            }
+            assert(skippableSize <= srcSize);

            src = (const BYTE *)src + skippableSize;
            srcSize -= skippableSize;
@ -467,6 +469,8 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize
    if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
        && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
        frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize);
+        assert(ZSTD_isError(frameSizeInfo.compressedSize) ||
+               frameSizeInfo.compressedSize <= srcSize);
        return frameSizeInfo;
    } else {
        const BYTE* ip = (const BYTE*)src;
@ -529,7 +533,6 @@ size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
    return frameSizeInfo.compressedSize;
 }

-
 /** ZSTD_decompressBound() :
 *  compatible with legacy mode
 *  `src` must point to the start of a ZSTD frame or a skippeable frame
@ -546,6 +549,7 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
        unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
        if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
            return ZSTD_CONTENTSIZE_ERROR;
+        assert(srcSize >= compressedSize);
        src = (const BYTE*)src + compressedSize;
        srcSize -= compressedSize;
        bound += decompressedBound;
@ -738,9 +742,8 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
                        (unsigned)magicNumber, ZSTD_MAGICNUMBER);
            if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
                size_t const skippableSize = readSkippableFrameSize(src, srcSize);
-                if (ZSTD_isError(skippableSize))
-                    return skippableSize;
-                RETURN_ERROR_IF(srcSize < skippableSize, srcSize_wrong);
+                FORWARD_IF_ERROR(skippableSize);
+                assert(skippableSize <= srcSize);

                src = (const BYTE *)src + skippableSize;
                srcSize -= skippableSize;
@ -906,6 +909,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
        {   blockProperties_t bp;
            size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
            if (ZSTD_isError(cBlockSize)) return cBlockSize;
+            RETURN_ERROR_IF(cBlockSize > dctx->fParams.blockSizeMax, corruption_detected, "Block Size Exceeds Maximum");
            dctx->expected = cBlockSize;
            dctx->bType = bp.blockType;
            dctx->rleSize = bp.origSize;
@ -950,6 +954,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
                RETURN_ERROR(corruption_detected);
            }
            if (ZSTD_isError(rSize)) return rSize;
+            RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum");
            DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize);
            dctx->decodedSize += rSize;
            if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize);
--- a/sys/contrib/zstd/lib/decompress/zstd_decompress_block.c
+++ b/sys/contrib/zstd/lib/decompress/zstd_decompress_block.c
@ -505,7 +505,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
    *nbSeqPtr = nbSeq;

    /* FSE table descriptors */
-    RETURN_ERROR_IF(ip+4 > iend, srcSize_wrong); /* minimum possible size */
+    RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong); /* minimum possible size: 1 byte for symbol encoding types */
    {   symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
        symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
        symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
@ -637,9 +637,10 @@ size_t ZSTD_execSequence(BYTE* op,
    if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);

    /* copy Literals */
-    ZSTD_copy8(op, *litPtr);
    if (sequence.litLength > 8)
-        ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8);   /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
+        ZSTD_wildcopy_16min(op, (*litPtr), sequence.litLength, ZSTD_no_overlap);   /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
+    else
+        ZSTD_copy8(op, *litPtr);
    op = oLitEnd;
    *litPtr = iLitEnd;   /* update for next sequence */

@ -686,13 +687,13 @@ size_t ZSTD_execSequence(BYTE* op,

    if (oMatchEnd > oend-(16-MINMATCH)) {
        if (op < oend_w) {
-            ZSTD_wildcopy(op, match, oend_w - op);
+            ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
            match += oend_w - op;
            op = oend_w;
        }
        while (op < oMatchEnd) *op++ = *match++;
    } else {
-        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8);   /* works even if matchLength < 8 */
+        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);   /* works even if matchLength < 8 */
    }
    return sequenceLength;
 }
@ -717,9 +718,11 @@ size_t ZSTD_execSequenceLong(BYTE* op,
    if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);

    /* copy Literals */
-    ZSTD_copy8(op, *litPtr);  /* note : op <= oLitEnd <= oend_w == oend - 8 */
    if (sequence.litLength > 8)
-        ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8);   /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
+        ZSTD_wildcopy_16min(op, *litPtr, sequence.litLength, ZSTD_no_overlap);   /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
+    else
+        ZSTD_copy8(op, *litPtr);  /* note : op <= oLitEnd <= oend_w == oend - 8 */
+
    op = oLitEnd;
    *litPtr = iLitEnd;   /* update for next sequence */

@ -766,13 +769,13 @@ size_t ZSTD_execSequenceLong(BYTE* op,

    if (oMatchEnd > oend-(16-MINMATCH)) {
        if (op < oend_w) {
-            ZSTD_wildcopy(op, match, oend_w - op);
+            ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
            match += oend_w - op;
            op = oend_w;
        }
        while (op < oMatchEnd) *op++ = *match++;
    } else {
-        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8);   /* works even if matchLength < 8 */
+        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);   /* works even if matchLength < 8 */
    }
    return sequenceLength;
 }
@ -889,6 +892,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
 }

 FORCE_INLINE_TEMPLATE size_t
+DONT_VECTORIZE
 ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
                               void* dst, size_t maxDstSize,
                         const void* seqStart, size_t seqSize, int nbSeq,
@ -918,6 +922,11 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
        ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
        ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);

+        ZSTD_STATIC_ASSERT(
+                BIT_DStream_unfinished < BIT_DStream_completed &&
+                BIT_DStream_endOfBuffer < BIT_DStream_completed &&
+                BIT_DStream_completed < BIT_DStream_overflow);
+
        for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
            nbSeq--;
            {   seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
@ -930,6 +939,7 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
        /* check if reached exact end */
        DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
        RETURN_ERROR_IF(nbSeq, corruption_detected);
+        RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected);
        /* save reps for next block */
        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
    }
@ -1131,6 +1141,7 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,

 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
 static TARGET_ATTRIBUTE("bmi2") size_t
+DONT_VECTORIZE
 ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
                                 void* dst, size_t maxDstSize,
                           const void* seqStart, size_t seqSize, int nbSeq,
--- a/sys/contrib/zstd/lib/dictBuilder/cover.c
+++ b/sys/contrib/zstd/lib/dictBuilder/cover.c
@ -526,10 +526,10 @@ static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
 * Prepare a context for dictionary building.
 * The context is only dependent on the parameter `d` and can used multiple
 * times.
- * Returns 1 on success or zero on error.
+ * Returns 0 on success or error code on error.
 * The context must be destroyed with `COVER_ctx_destroy()`.
 */
-static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
+static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
                          const size_t *samplesSizes, unsigned nbSamples,
                          unsigned d, double splitPoint) {
  const BYTE *const samples = (const BYTE *)samplesBuffer;
@ -544,17 +544,17 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
      totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
    DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
                 (unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
-    return 0;
+    return ERROR(srcSize_wrong);
  }
  /* Check if there are at least 5 training samples */
  if (nbTrainSamples < 5) {
    DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
-    return 0;
+    return ERROR(srcSize_wrong);
  }
  /* Check if there's testing sample */
  if (nbTestSamples < 1) {
    DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
-    return 0;
+    return ERROR(srcSize_wrong);
  }
  /* Zero the context */
  memset(ctx, 0, sizeof(*ctx));
@ -577,7 +577,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
  if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) {
    DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
    COVER_ctx_destroy(ctx);
-    return 0;
+    return ERROR(memory_allocation);
  }
  ctx->freqs = NULL;
  ctx->d = d;
@ -624,7 +624,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
                (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group);
  ctx->freqs = ctx->suffix;
  ctx->suffix = NULL;
-  return 1;
+  return 0;
 }

 void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
@ -729,11 +729,11 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
  /* Checks */
  if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
    DISPLAYLEVEL(1, "Cover parameters incorrect\n");
-    return ERROR(GENERIC);
+    return ERROR(parameter_outOfBound);
  }
  if (nbSamples == 0) {
    DISPLAYLEVEL(1, "Cover must have at least one input file\n");
-    return ERROR(GENERIC);
+    return ERROR(srcSize_wrong);
  }
  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
    DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
@ -741,15 +741,18 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
    return ERROR(dstSize_tooSmall);
  }
  /* Initialize context and activeDmers */
-  if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
-                      parameters.d, parameters.splitPoint)) {
-    return ERROR(GENERIC);
+  {
+    size_t const initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
+                      parameters.d, parameters.splitPoint);
+    if (ZSTD_isError(initVal)) {
+      return initVal;
+    }
  }
  COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel);
  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
    DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
    COVER_ctx_destroy(&ctx);
-    return ERROR(GENERIC);
+    return ERROR(memory_allocation);
  }

  DISPLAYLEVEL(2, "Building dictionary\n");
@ -810,7 +813,7 @@ size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
        cctx, dst, dstCapacity, samples + offsets[i],
        samplesSizes[i], cdict);
    if (ZSTD_isError(size)) {
-      totalCompressedSize = ERROR(GENERIC);
+      totalCompressedSize = size;
      goto _compressCleanup;
    }
    totalCompressedSize += size;
@ -886,9 +889,11 @@ void COVER_best_start(COVER_best_t *best) {
 * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
 * If this dictionary is the best so far save it and its parameters.
 */
-void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
-                              ZDICT_cover_params_t parameters, void *dict,
-                              size_t dictSize) {
+void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
+                              COVER_dictSelection_t selection) {
+  void* dict = selection.dictContent;
+  size_t compressedSize = selection.totalCompressedSize;
+  size_t dictSize = selection.dictSize;
  if (!best) {
    return;
  }
@ -914,6 +919,9 @@ void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
        }
      }
      /* Save the dictionary, parameters, and size */
+      if (!dict) {
+        return;
+      }
      memcpy(best->dict, dict, dictSize);
      best->dictSize = dictSize;
      best->parameters = parameters;
@ -926,6 +934,111 @@ void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
  }
 }

+COVER_dictSelection_t COVER_dictSelectionError(size_t error) {
+    COVER_dictSelection_t selection = { NULL, 0, error };
+    return selection;
+}
+
+unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) {
+  return (ZSTD_isError(selection.totalCompressedSize) || !selection.dictContent);
+}
+
+void COVER_dictSelectionFree(COVER_dictSelection_t selection){
+  free(selection.dictContent);
+}
+
+COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
+        size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
+        size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) {
+
+  size_t largestDict = 0;
+  size_t largestCompressed = 0;
+  BYTE* customDictContentEnd = customDictContent + dictContentSize;
+
+  BYTE * largestDictbuffer = (BYTE *)malloc(dictContentSize);
+  BYTE * candidateDictBuffer = (BYTE *)malloc(dictContentSize);
+  double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
+
+  if (!largestDictbuffer || !candidateDictBuffer) {
+    free(largestDictbuffer);
+    free(candidateDictBuffer);
+    return COVER_dictSelectionError(dictContentSize);
+  }
+
+  /* Initial dictionary size and compressed size */
+  memcpy(largestDictbuffer, customDictContent, dictContentSize);
+  dictContentSize = ZDICT_finalizeDictionary(
+    largestDictbuffer, dictContentSize, customDictContent, dictContentSize,
+    samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
+
+  if (ZDICT_isError(dictContentSize)) {
+    free(largestDictbuffer);
+    free(candidateDictBuffer);
+    return COVER_dictSelectionError(dictContentSize);
+  }
+
+  totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
+                                                       samplesBuffer, offsets,
+                                                       nbCheckSamples, nbSamples,
+                                                       largestDictbuffer, dictContentSize);
+
+  if (ZSTD_isError(totalCompressedSize)) {
+    free(largestDictbuffer);
+    free(candidateDictBuffer);
+    return COVER_dictSelectionError(totalCompressedSize);
+  }
+
+  if (params.shrinkDict == 0) {
+    COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
+    free(candidateDictBuffer);
+    return selection;
+  }
+
+  largestDict = dictContentSize;
+  largestCompressed = totalCompressedSize;
+  dictContentSize = ZDICT_DICTSIZE_MIN;
+
+  /* Largest dict is initially at least ZDICT_DICTSIZE_MIN */
+  while (dictContentSize < largestDict) {
+    memcpy(candidateDictBuffer, largestDictbuffer, largestDict);
+    dictContentSize = ZDICT_finalizeDictionary(
+      candidateDictBuffer, dictContentSize, customDictContentEnd - dictContentSize, dictContentSize,
+      samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
+
+    if (ZDICT_isError(dictContentSize)) {
+      free(largestDictbuffer);
+      free(candidateDictBuffer);
+      return COVER_dictSelectionError(dictContentSize);
+
+    }
+
+    totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
+                                                         samplesBuffer, offsets,
+                                                         nbCheckSamples, nbSamples,
+                                                         candidateDictBuffer, dictContentSize);
+
+    if (ZSTD_isError(totalCompressedSize)) {
+      free(largestDictbuffer);
+      free(candidateDictBuffer);
+      return COVER_dictSelectionError(totalCompressedSize);
+    }
+
+    if (totalCompressedSize <= largestCompressed * regressionTolerance) {
+      COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize };
+      free(largestDictbuffer);
+      return selection;
+    }
+    dictContentSize *= 2;
+  }
+  dictContentSize = largestDict;
+  totalCompressedSize = largestCompressed;
+  {
+    COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
+    free(candidateDictBuffer);
+    return selection;
+  }
+}
+
 /**
 * Parameters for COVER_tryParameters().
 */
@ -951,6 +1064,7 @@ static void COVER_tryParameters(void *opaque) {
  /* Allocate space for hash table, dict, and freqs */
  COVER_map_t activeDmers;
  BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
+  COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
  U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
    DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
@ -966,29 +1080,21 @@ static void COVER_tryParameters(void *opaque) {
  {
    const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
                                              dictBufferCapacity, parameters);
-    dictBufferCapacity = ZDICT_finalizeDictionary(
-        dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
-        ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples,
-        parameters.zParams);
-    if (ZDICT_isError(dictBufferCapacity)) {
-      DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
+    selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
+        ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
+        totalCompressedSize);
+
+    if (COVER_dictSelectionIsError(selection)) {
+      DISPLAYLEVEL(1, "Failed to select dictionary\n");
      goto _cleanup;
    }
  }
-  /* Check total compressed size */
-  totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes,
-                                                       ctx->samples, ctx->offsets,
-                                                       ctx->nbTrainSamples, ctx->nbSamples,
-                                                       dict, dictBufferCapacity);
-
 _cleanup:
-  COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
-                    dictBufferCapacity);
+  free(dict);
+  COVER_best_finish(data->best, parameters, selection);
  free(data);
  COVER_map_destroy(&activeDmers);
-  if (dict) {
-    free(dict);
-  }
+  COVER_dictSelectionFree(selection);
  if (freqs) {
    free(freqs);
  }
@ -1010,6 +1116,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
  const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
  const unsigned kIterations =
      (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
+  const unsigned shrinkDict = 0;
  /* Local variables */
  const int displayLevel = parameters->zParams.notificationLevel;
  unsigned iteration = 1;
@ -1022,15 +1129,15 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
  /* Checks */
  if (splitPoint <= 0 || splitPoint > 1) {
    LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
-    return ERROR(GENERIC);
+    return ERROR(parameter_outOfBound);
  }
  if (kMinK < kMaxD || kMaxK < kMinK) {
    LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
-    return ERROR(GENERIC);
+    return ERROR(parameter_outOfBound);
  }
  if (nbSamples == 0) {
    DISPLAYLEVEL(1, "Cover must have at least one input file\n");
-    return ERROR(GENERIC);
+    return ERROR(srcSize_wrong);
  }
  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
    DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
@ -1054,11 +1161,14 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
    /* Initialize the context for this value of d */
    COVER_ctx_t ctx;
    LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
-    if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint)) {
-      LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
-      COVER_best_destroy(&best);
-      POOL_free(pool);
-      return ERROR(GENERIC);
+    {
+      const size_t initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint);
+      if (ZSTD_isError(initVal)) {
+        LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
+        COVER_best_destroy(&best);
+        POOL_free(pool);
+        return initVal;
+      }
    }
    if (!warned) {
      COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel);
@ -1075,7 +1185,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
        COVER_best_destroy(&best);
        COVER_ctx_destroy(&ctx);
        POOL_free(pool);
-        return ERROR(GENERIC);
+        return ERROR(memory_allocation);
      }
      data->ctx = &ctx;
      data->best = &best;
@ -1085,6 +1195,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
      data->parameters.d = d;
      data->parameters.splitPoint = splitPoint;
      data->parameters.steps = kSteps;
+      data->parameters.shrinkDict = shrinkDict;
      data->parameters.zParams.notificationLevel = g_displayLevel;
      /* Check the parameters */
      if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) {
--- a/sys/contrib/zstd/lib/dictBuilder/cover.h
+++ b/sys/contrib/zstd/lib/dictBuilder/cover.h
@ -46,6 +46,15 @@ typedef struct {
  U32 size;
 } COVER_epoch_info_t;

+/**
+ * Struct used for the dictionary selection function.
+ */
+typedef struct COVER_dictSelection {
+  BYTE* dictContent;
+  size_t dictSize;
+  size_t totalCompressedSize;
+} COVER_dictSelection_t;
+
 /**
 * Computes the number of epochs and the size of each epoch.
 * We will make sure that each epoch gets at least 10 * k bytes.
@ -107,6 +116,32 @@ void COVER_best_start(COVER_best_t *best);
 * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
 * If this dictionary is the best so far save it and its parameters.
 */
-void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
-                       ZDICT_cover_params_t parameters, void *dict,
-                       size_t dictSize);
+void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
+                       COVER_dictSelection_t selection);
+/**
+ * Error function for COVER_selectDict function. Checks if the return
+ * value is an error.
+ */
+unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection);
+
+ /**
+  * Error function for COVER_selectDict function. Returns a struct where
+  * return.totalCompressedSize is a ZSTD error.
+  */
+COVER_dictSelection_t COVER_dictSelectionError(size_t error);
+
+/**
+ * Always call after selectDict is called to free up used memory from
+ * newly created dictionary.
+ */
+void COVER_dictSelectionFree(COVER_dictSelection_t selection);
+
+/**
+ * Called to finalize the dictionary and select one based on whether or not
+ * the shrink-dict flag was enabled. If enabled the dictionary used is the
+ * smallest dictionary within a specified regression of the compressed size
+ * from the largest dictionary.
+ */
+ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
+                       size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
+                       size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize);
--- a/sys/contrib/zstd/lib/dictBuilder/fastcover.c
+++ b/sys/contrib/zstd/lib/dictBuilder/fastcover.c
@ -287,10 +287,10 @@ FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx)
 * Prepare a context for dictionary building.
 * The context is only dependent on the parameter `d` and can used multiple
 * times.
- * Returns 1 on success or zero on error.
+ * Returns 0 on success or error code on error.
 * The context must be destroyed with `FASTCOVER_ctx_destroy()`.
 */
-static int
+static size_t
 FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
                   const void* samplesBuffer,
                   const size_t* samplesSizes, unsigned nbSamples,
@ -310,19 +310,19 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
        totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) {
        DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
                    (unsigned)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20));
-        return 0;
+        return ERROR(srcSize_wrong);
    }

    /* Check if there are at least 5 training samples */
    if (nbTrainSamples < 5) {
        DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid\n", nbTrainSamples);
-        return 0;
+        return ERROR(srcSize_wrong);
    }

    /* Check if there's testing sample */
    if (nbTestSamples < 1) {
        DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.\n", nbTestSamples);
-        return 0;
+        return ERROR(srcSize_wrong);
    }

    /* Zero the context */
@ -347,7 +347,7 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
    if (ctx->offsets == NULL) {
        DISPLAYLEVEL(1, "Failed to allocate scratch buffers \n");
        FASTCOVER_ctx_destroy(ctx);
-        return 0;
+        return ERROR(memory_allocation);
    }

    /* Fill offsets from the samplesSizes */
@ -364,13 +364,13 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
    if (ctx->freqs == NULL) {
        DISPLAYLEVEL(1, "Failed to allocate frequency table \n");
        FASTCOVER_ctx_destroy(ctx);
-        return 0;
+        return ERROR(memory_allocation);
    }

    DISPLAYLEVEL(2, "Computing frequencies\n");
    FASTCOVER_computeFrequency(ctx->freqs, ctx);

-    return 1;
+    return 0;
 }


@ -435,7 +435,6 @@ FASTCOVER_buildDictionary(const FASTCOVER_ctx_t* ctx,
  return tail;
 }

-
 /**
 * Parameters for FASTCOVER_tryParameters().
 */
@ -464,6 +463,7 @@ static void FASTCOVER_tryParameters(void *opaque)
  U16* segmentFreqs = (U16 *)calloc(((U64)1 << ctx->f), sizeof(U16));
  /* Allocate space for hash table, dict, and freqs */
  BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
+  COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
  U32 *freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
  if (!segmentFreqs || !dict || !freqs) {
    DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
@ -473,27 +473,24 @@ static void FASTCOVER_tryParameters(void *opaque)
  memcpy(freqs, ctx->freqs, ((U64)1 << ctx->f) * sizeof(U32));
  /* Build the dictionary */
  { const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict, dictBufferCapacity,
-                                                  parameters, segmentFreqs);
+                                                    parameters, segmentFreqs);
+
    const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100);
-    dictBufferCapacity = ZDICT_finalizeDictionary(
-        dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
-        ctx->samples, ctx->samplesSizes, nbFinalizeSamples, parameters.zParams);
-    if (ZDICT_isError(dictBufferCapacity)) {
-      DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
+    selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
+         ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
+         totalCompressedSize);
+
+    if (COVER_dictSelectionIsError(selection)) {
+      DISPLAYLEVEL(1, "Failed to select dictionary\n");
      goto _cleanup;
    }
  }
-  /* Check total compressed size */
-  totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes,
-                                                       ctx->samples, ctx->offsets,
-                                                       ctx->nbTrainSamples, ctx->nbSamples,
-                                                       dict, dictBufferCapacity);
 _cleanup:
-  COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
-                    dictBufferCapacity);
+  free(dict);
+  COVER_best_finish(data->best, parameters, selection);
  free(data);
  free(segmentFreqs);
-  free(dict);
+  COVER_dictSelectionFree(selection);
  free(freqs);
 }

@ -508,6 +505,7 @@ FASTCOVER_convertToCoverParams(ZDICT_fastCover_params_t fastCoverParams,
    coverParams->nbThreads = fastCoverParams.nbThreads;
    coverParams->splitPoint = fastCoverParams.splitPoint;
    coverParams->zParams = fastCoverParams.zParams;
+    coverParams->shrinkDict = fastCoverParams.shrinkDict;
 }


@ -524,6 +522,7 @@ FASTCOVER_convertToFastCoverParams(ZDICT_cover_params_t coverParams,
    fastCoverParams->f = f;
    fastCoverParams->accel = accel;
    fastCoverParams->zParams = coverParams.zParams;
+    fastCoverParams->shrinkDict = coverParams.shrinkDict;
 }


@ -550,11 +549,11 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
    if (!FASTCOVER_checkParameters(coverParams, dictBufferCapacity, parameters.f,
                                   parameters.accel)) {
      DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n");
-      return ERROR(GENERIC);
+      return ERROR(parameter_outOfBound);
    }
    if (nbSamples == 0) {
      DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n");
-      return ERROR(GENERIC);
+      return ERROR(srcSize_wrong);
    }
    if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
      DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
@ -564,11 +563,14 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
    /* Assign corresponding FASTCOVER_accel_t to accelParams*/
    accelParams = FASTCOVER_defaultAccelParameters[parameters.accel];
    /* Initialize context */
-    if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
+    {
+      size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
                            coverParams.d, parameters.splitPoint, parameters.f,
-                            accelParams)) {
-      DISPLAYLEVEL(1, "Failed to initialize context\n");
-      return ERROR(GENERIC);
+                            accelParams);
+      if (ZSTD_isError(initVal)) {
+        DISPLAYLEVEL(1, "Failed to initialize context\n");
+        return initVal;
+      }
    }
    COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel);
    /* Build the dictionary */
@ -616,6 +618,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
        (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
    const unsigned f = parameters->f == 0 ? DEFAULT_F : parameters->f;
    const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel;
+    const unsigned shrinkDict = 0;
    /* Local variables */
    const int displayLevel = parameters->zParams.notificationLevel;
    unsigned iteration = 1;
@ -627,19 +630,19 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
    /* Checks */
    if (splitPoint <= 0 || splitPoint > 1) {
      LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n");
-      return ERROR(GENERIC);
+      return ERROR(parameter_outOfBound);
    }
    if (accel == 0 || accel > FASTCOVER_MAX_ACCEL) {
      LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect accel\n");
-      return ERROR(GENERIC);
+      return ERROR(parameter_outOfBound);
    }
    if (kMinK < kMaxD || kMaxK < kMinK) {
      LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n");
-      return ERROR(GENERIC);
+      return ERROR(parameter_outOfBound);
    }
    if (nbSamples == 0) {
      LOCALDISPLAYLEVEL(displayLevel, 1, "FASTCOVER must have at least one input file\n");
-      return ERROR(GENERIC);
+      return ERROR(srcSize_wrong);
    }
    if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
      LOCALDISPLAYLEVEL(displayLevel, 1, "dictBufferCapacity must be at least %u\n",
@ -666,11 +669,14 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
      /* Initialize the context for this value of d */
      FASTCOVER_ctx_t ctx;
      LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
-      if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams)) {
-        LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
-        COVER_best_destroy(&best);
-        POOL_free(pool);
-        return ERROR(GENERIC);
+      {
+        size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams);
+        if (ZSTD_isError(initVal)) {
+          LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
+          COVER_best_destroy(&best);
+          POOL_free(pool);
+          return initVal;
+        }
      }
      if (!warned) {
        COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel);
@ -687,7 +693,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
          COVER_best_destroy(&best);
          FASTCOVER_ctx_destroy(&ctx);
          POOL_free(pool);
-          return ERROR(GENERIC);
+          return ERROR(memory_allocation);
        }
        data->ctx = &ctx;
        data->best = &best;
@ -697,6 +703,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
        data->parameters.d = d;
        data->parameters.splitPoint = splitPoint;
        data->parameters.steps = kSteps;
+        data->parameters.shrinkDict = shrinkDict;
        data->parameters.zParams.notificationLevel = g_displayLevel;
        /* Check the parameters */
        if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity,
--- a/sys/contrib/zstd/lib/dictBuilder/zdict.c
+++ b/sys/contrib/zstd/lib/dictBuilder/zdict.c
@ -741,7 +741,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
    /* analyze, build stats, starting with literals */
    {   size_t maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog);
        if (HUF_isError(maxNbBits)) {
-            eSize = ERROR(GENERIC);
+            eSize = maxNbBits;
            DISPLAYLEVEL(1, " HUF_buildCTable error \n");
            goto _cleanup;
        }
@ -764,7 +764,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
    total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u];
    errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax);
    if (FSE_isError(errorCode)) {
-        eSize = ERROR(GENERIC);
+        eSize = errorCode;
        DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n");
        goto _cleanup;
    }
@ -773,7 +773,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
    total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u];
    errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML);
    if (FSE_isError(errorCode)) {
-        eSize = ERROR(GENERIC);
+        eSize = errorCode;
        DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n");
        goto _cleanup;
    }
@ -782,7 +782,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
    total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u];
    errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL);
    if (FSE_isError(errorCode)) {
-        eSize = ERROR(GENERIC);
+        eSize = errorCode;
        DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n");
        goto _cleanup;
    }
@ -791,7 +791,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
    /* write result to buffer */
    {   size_t const hhSize = HUF_writeCTable(dstPtr, maxDstSize, hufTable, 255, huffLog);
        if (HUF_isError(hhSize)) {
-            eSize = ERROR(GENERIC);
+            eSize = hhSize;
            DISPLAYLEVEL(1, "HUF_writeCTable error \n");
            goto _cleanup;
        }
@ -802,7 +802,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,

    {   size_t const ohSize = FSE_writeNCount(dstPtr, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog);
        if (FSE_isError(ohSize)) {
-            eSize = ERROR(GENERIC);
+            eSize = ohSize;
            DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount \n");
            goto _cleanup;
        }
@ -813,7 +813,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,

    {   size_t const mhSize = FSE_writeNCount(dstPtr, maxDstSize, matchLengthNCount, MaxML, mlLog);
        if (FSE_isError(mhSize)) {
-            eSize = ERROR(GENERIC);
+            eSize = mhSize;
            DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount \n");
            goto _cleanup;
        }
@ -824,7 +824,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,

    {   size_t const lhSize = FSE_writeNCount(dstPtr, maxDstSize, litLengthNCount, MaxLL, llLog);
        if (FSE_isError(lhSize)) {
-            eSize = ERROR(GENERIC);
+            eSize = lhSize;
            DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount \n");
            goto _cleanup;
        }
@ -834,7 +834,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
    }

    if (maxDstSize<12) {
-        eSize = ERROR(GENERIC);
+        eSize = ERROR(dstSize_tooSmall);
        DISPLAYLEVEL(1, "not enough space to write RepOffsets \n");
        goto _cleanup;
    }
--- a/sys/contrib/zstd/lib/dictBuilder/zdict.h
+++ b/sys/contrib/zstd/lib/dictBuilder/zdict.h
@ -94,6 +94,8 @@ typedef struct {
    unsigned steps;              /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
    unsigned nbThreads;          /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
    double splitPoint;           /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */
+    unsigned shrinkDict;         /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking  */
+    unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */
    ZDICT_params_t zParams;
 } ZDICT_cover_params_t;

@ -105,6 +107,9 @@ typedef struct {
    unsigned nbThreads;          /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
    double splitPoint;           /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */
    unsigned accel;              /* Acceleration level: constraint: 0 < accel <= 10, higher means faster and less accurate, 0 means default(1) */
+    unsigned shrinkDict;         /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking  */
+    unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */
+
    ZDICT_params_t zParams;
 } ZDICT_fastCover_params_t;

--- a/sys/contrib/zstd/lib/legacy/zstd_legacy.h
+++ b/sys/contrib/zstd/lib/legacy/zstd_legacy.h
@ -238,6 +238,10 @@ MEM_STATIC ZSTD_frameSizeInfo ZSTD_findFrameSizeInfoLegacy(const void *src, size
            frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
            break;
    }
+    if (!ZSTD_isError(frameSizeInfo.compressedSize) && frameSizeInfo.compressedSize > srcSize) {
+        frameSizeInfo.compressedSize = ERROR(srcSize_wrong);
+        frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
+    }
    return frameSizeInfo;
 }

--- a/sys/contrib/zstd/lib/legacy/zstd_v01.c
+++ b/sys/contrib/zstd/lib/legacy/zstd_v01.c
@ -1073,99 +1073,102 @@ static size_t HUF_decompress_usingDTable(   /* -3% slower when non static */
    const void* cSrc, size_t cSrcSize,
    const U16* DTable)
 {
-    BYTE* const ostart = (BYTE*) dst;
-    BYTE* op = ostart;
-    BYTE* const omax = op + maxDstSize;
-    BYTE* const olimit = omax-15;
-
-    const void* ptr = DTable;
-    const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1;
-    const U32 dtLog = DTable[0];
-    size_t errorCode;
-    U32 reloadStatus;
-
-    /* Init */
-
-    const U16* jumpTable = (const U16*)cSrc;
-    const size_t length1 = FSE_readLE16(jumpTable);
-    const size_t length2 = FSE_readLE16(jumpTable+1);
-    const size_t length3 = FSE_readLE16(jumpTable+2);
-    const size_t length4 = cSrcSize - 6 - length1 - length2 - length3;   // check coherency !!
-    const char* const start1 = (const char*)(cSrc) + 6;
-    const char* const start2 = start1 + length1;
-    const char* const start3 = start2 + length2;
-    const char* const start4 = start3 + length3;
-    FSE_DStream_t bitD1, bitD2, bitD3, bitD4;
-
-    if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
-
-    errorCode = FSE_initDStream(&bitD1, start1, length1);
-    if (FSE_isError(errorCode)) return errorCode;
-    errorCode = FSE_initDStream(&bitD2, start2, length2);
-    if (FSE_isError(errorCode)) return errorCode;
-    errorCode = FSE_initDStream(&bitD3, start3, length3);
-    if (FSE_isError(errorCode)) return errorCode;
-    errorCode = FSE_initDStream(&bitD4, start4, length4);
-    if (FSE_isError(errorCode)) return errorCode;
-
-    reloadStatus=FSE_reloadDStream(&bitD2);
-
-    /* 16 symbols per loop */
-    for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit);  /* D2-3-4 are supposed to be synchronized and finish together */
-        op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
+    if (cSrcSize < 6) return (size_t)-FSE_ERROR_srcSize_wrong;
    {
-#define HUF_DECODE_SYMBOL_0(n, Dstream) \
-        op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog);
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* op = ostart;
+        BYTE* const omax = op + maxDstSize;
+        BYTE* const olimit = omax-15;

-#define HUF_DECODE_SYMBOL_1(n, Dstream) \
-        op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
-        if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
+        const void* ptr = DTable;
+        const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;
+        U32 reloadStatus;

-#define HUF_DECODE_SYMBOL_2(n, Dstream) \
-        op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
-        if (FSE_32bits()) FSE_reloadDStream(&Dstream)
+        /* Init */

-        HUF_DECODE_SYMBOL_1( 0, bitD1);
-        HUF_DECODE_SYMBOL_1( 1, bitD2);
-        HUF_DECODE_SYMBOL_1( 2, bitD3);
-        HUF_DECODE_SYMBOL_1( 3, bitD4);
-        HUF_DECODE_SYMBOL_2( 4, bitD1);
-        HUF_DECODE_SYMBOL_2( 5, bitD2);
-        HUF_DECODE_SYMBOL_2( 6, bitD3);
-        HUF_DECODE_SYMBOL_2( 7, bitD4);
-        HUF_DECODE_SYMBOL_1( 8, bitD1);
-        HUF_DECODE_SYMBOL_1( 9, bitD2);
-        HUF_DECODE_SYMBOL_1(10, bitD3);
-        HUF_DECODE_SYMBOL_1(11, bitD4);
-        HUF_DECODE_SYMBOL_0(12, bitD1);
-        HUF_DECODE_SYMBOL_0(13, bitD2);
-        HUF_DECODE_SYMBOL_0(14, bitD3);
-        HUF_DECODE_SYMBOL_0(15, bitD4);
-    }
+        const U16* jumpTable = (const U16*)cSrc;
+        const size_t length1 = FSE_readLE16(jumpTable);
+        const size_t length2 = FSE_readLE16(jumpTable+1);
+        const size_t length3 = FSE_readLE16(jumpTable+2);
+        const size_t length4 = cSrcSize - 6 - length1 - length2 - length3;   // check coherency !!
+        const char* const start1 = (const char*)(cSrc) + 6;
+        const char* const start2 = start1 + length1;
+        const char* const start3 = start2 + length2;
+        const char* const start4 = start3 + length3;
+        FSE_DStream_t bitD1, bitD2, bitD3, bitD4;

-    if (reloadStatus!=FSE_DStream_completed)   /* not complete : some bitStream might be FSE_DStream_unfinished */
-        return (size_t)-FSE_ERROR_corruptionDetected;
+        if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;

-    /* tail */
-    {
-        // bitTail = bitD1;   // *much* slower : -20% !??!
-        FSE_DStream_t bitTail;
-        bitTail.ptr = bitD1.ptr;
-        bitTail.bitsConsumed = bitD1.bitsConsumed;
-        bitTail.bitContainer = bitD1.bitContainer;   // required in case of FSE_DStream_endOfBuffer
-        bitTail.start = start1;
-        for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
+        errorCode = FSE_initDStream(&bitD1, start1, length1);
+        if (FSE_isError(errorCode)) return errorCode;
+        errorCode = FSE_initDStream(&bitD2, start2, length2);
+        if (FSE_isError(errorCode)) return errorCode;
+        errorCode = FSE_initDStream(&bitD3, start3, length3);
+        if (FSE_isError(errorCode)) return errorCode;
+        errorCode = FSE_initDStream(&bitD4, start4, length4);
+        if (FSE_isError(errorCode)) return errorCode;
+
+        reloadStatus=FSE_reloadDStream(&bitD2);
+
+        /* 16 symbols per loop */
+        for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit);  /* D2-3-4 are supposed to be synchronized and finish together */
+            op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
        {
-            HUF_DECODE_SYMBOL_0(0, bitTail);
+    #define HUF_DECODE_SYMBOL_0(n, Dstream) \
+            op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog);
+
+    #define HUF_DECODE_SYMBOL_1(n, Dstream) \
+            op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
+            if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
+
+    #define HUF_DECODE_SYMBOL_2(n, Dstream) \
+            op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
+            if (FSE_32bits()) FSE_reloadDStream(&Dstream)
+
+            HUF_DECODE_SYMBOL_1( 0, bitD1);
+            HUF_DECODE_SYMBOL_1( 1, bitD2);
+            HUF_DECODE_SYMBOL_1( 2, bitD3);
+            HUF_DECODE_SYMBOL_1( 3, bitD4);
+            HUF_DECODE_SYMBOL_2( 4, bitD1);
+            HUF_DECODE_SYMBOL_2( 5, bitD2);
+            HUF_DECODE_SYMBOL_2( 6, bitD3);
+            HUF_DECODE_SYMBOL_2( 7, bitD4);
+            HUF_DECODE_SYMBOL_1( 8, bitD1);
+            HUF_DECODE_SYMBOL_1( 9, bitD2);
+            HUF_DECODE_SYMBOL_1(10, bitD3);
+            HUF_DECODE_SYMBOL_1(11, bitD4);
+            HUF_DECODE_SYMBOL_0(12, bitD1);
+            HUF_DECODE_SYMBOL_0(13, bitD2);
+            HUF_DECODE_SYMBOL_0(14, bitD3);
+            HUF_DECODE_SYMBOL_0(15, bitD4);
        }

-        if (FSE_endOfDStream(&bitTail))
-            return op-ostart;
+        if (reloadStatus!=FSE_DStream_completed)   /* not complete : some bitStream might be FSE_DStream_unfinished */
+            return (size_t)-FSE_ERROR_corruptionDetected;
+
+        /* tail */
+        {
+            // bitTail = bitD1;   // *much* slower : -20% !??!
+            FSE_DStream_t bitTail;
+            bitTail.ptr = bitD1.ptr;
+            bitTail.bitsConsumed = bitD1.bitsConsumed;
+            bitTail.bitContainer = bitD1.bitContainer;   // required in case of FSE_DStream_endOfBuffer
+            bitTail.start = start1;
+            for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
+            {
+                HUF_DECODE_SYMBOL_0(0, bitTail);
+            }
+
+            if (FSE_endOfDStream(&bitTail))
+                return op-ostart;
+        }
+
+        if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */
+
+        return (size_t)-FSE_ERROR_corruptionDetected;
    }
-
-    if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */
-
-    return (size_t)-FSE_ERROR_corruptionDetected;
 }


@ -1355,8 +1358,6 @@ static unsigned ZSTD_isLittleEndian(void)

 static U16    ZSTD_read16(const void* p) { U16 r; memcpy(&r, p, sizeof(r)); return r; }

-static U32    ZSTD_read32(const void* p) { U32 r; memcpy(&r, p, sizeof(r)); return r; }
-
 static void   ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }

 static void   ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
@ -1381,16 +1382,9 @@ static U16 ZSTD_readLE16(const void* memPtr)
    }
 }

-
-static U32 ZSTD_readLE32(const void* memPtr)
+static U32 ZSTD_readLE24(const void* memPtr)
 {
-    if (ZSTD_isLittleEndian())
-        return ZSTD_read32(memPtr);
-    else
-    {
-        const BYTE* p = (const BYTE*)memPtr;
-        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
-    }
+    return ZSTD_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
 }

 static U32 ZSTD_readBE32(const void* memPtr)
@ -1704,13 +1698,13 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
    seqState->prevOffset = seq->offset;
    if (litLength == MaxLL)
    {
-        U32 add = dumps<de ? *dumps++ : 0;
+        const U32 add = dumps<de ? *dumps++ : 0;
        if (add < 255) litLength += add;
        else
        {
            if (dumps<=(de-3))
            {
-                litLength = ZSTD_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+                litLength = ZSTD_readLE24(dumps);
                dumps += 3;
            }
        }
@ -1732,13 +1726,13 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
    matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
    if (matchLength == MaxML)
    {
-        U32 add = dumps<de ? *dumps++ : 0;
+        const U32 add = dumps<de ? *dumps++ : 0;
        if (add < 255) matchLength += add;
        else
        {
            if (dumps<=(de-3))
            {
-                matchLength = ZSTD_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+                matchLength = ZSTD_readLE24(dumps);
                dumps += 3;
            }
        }
--- a/sys/contrib/zstd/lib/legacy/zstd_v02.c
+++ b/sys/contrib/zstd/lib/legacy/zstd_v02.c
@ -217,6 +217,11 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
    }
 }

+MEM_STATIC U32 MEM_readLE24(const void* memPtr)
+{
+    return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
+}
+
 MEM_STATIC U32 MEM_readLE32(const void* memPtr)
 {
    if (MEM_isLittleEndian())
@ -3043,11 +3048,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
    seqState->prevOffset = seq->offset;
    if (litLength == MaxLL)
    {
-        U32 add = *dumps++;
+        const U32 add = dumps<de ? *dumps++ : 0;
        if (add < 255) litLength += add;
-        else
+        else if (dumps + 3 <= de)
        {
-            litLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            litLength = MEM_readLE24(dumps);
            dumps += 3;
        }
        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
@ -3073,11 +3078,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
    matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
    if (matchLength == MaxML)
    {
-        U32 add = *dumps++;
+        const U32 add = dumps<de ? *dumps++ : 0;
        if (add < 255) matchLength += add;
-        else
+        else if (dumps + 3 <= de)
        {
-            matchLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            matchLength = MEM_readLE24(dumps);
            dumps += 3;
        }
        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
--- a/sys/contrib/zstd/lib/legacy/zstd_v03.c
+++ b/sys/contrib/zstd/lib/legacy/zstd_v03.c
@ -219,6 +219,11 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
    }
 }

+MEM_STATIC U32 MEM_readLE24(const void* memPtr)
+{
+    return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
+}
+
 MEM_STATIC U32 MEM_readLE32(const void* memPtr)
 {
    if (MEM_isLittleEndian())
@ -2684,11 +2689,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
    seqState->prevOffset = seq->offset;
    if (litLength == MaxLL)
    {
-        U32 add = *dumps++;
+        const U32 add = dumps<de ? *dumps++ : 0;
        if (add < 255) litLength += add;
-        else
+        else if (dumps + 3 <= de)
        {
-            litLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            litLength = MEM_readLE24(dumps);
            dumps += 3;
        }
        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
@ -2714,11 +2719,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
    matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
    if (matchLength == MaxML)
    {
-        U32 add = *dumps++;
+        const U32 add = dumps<de ? *dumps++ : 0;
        if (add < 255) matchLength += add;
-        else
+        else if (dumps + 3 <= de)
        {
-            matchLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            matchLength = MEM_readLE24(dumps);
            dumps += 3;
        }
        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
--- a/sys/contrib/zstd/lib/legacy/zstd_v04.c
+++ b/sys/contrib/zstd/lib/legacy/zstd_v04.c
@ -189,6 +189,11 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
    }
 }

+MEM_STATIC U32 MEM_readLE24(const void* memPtr)
+{
+    return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
+}
+
 MEM_STATIC U32 MEM_readLE32(const void* memPtr)
 {
    if (MEM_isLittleEndian())
@ -2808,13 +2813,12 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
    litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
    prevOffset = litLength ? seq->offset : seqState->prevOffset;
    if (litLength == MaxLL) {
-        U32 add = *dumps++;
+        const U32 add = dumps<de ? *dumps++ : 0;
        if (add < 255) litLength += add;
-        else {
-            litLength = dumps[0] + (dumps[1]<<8) + (dumps[2]<<16);
+        else if (dumps + 3 <= de) {
+            litLength = MEM_readLE24(dumps);
            dumps += 3;
        }
-        if (dumps > de) { litLength = MaxLL+255; }  /* late correction, to avoid using uninitialized memory */
        if (dumps >= de) { dumps = de-1; }  /* late correction, to avoid read overflow (data is now corrupted anyway) */
    }

@ -2837,13 +2841,12 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
    /* MatchLength */
    matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
    if (matchLength == MaxML) {
-        U32 add = *dumps++;
+        const U32 add = dumps<de ? *dumps++ : 0;
        if (add < 255) matchLength += add;
-        else {
-            matchLength = dumps[0] + (dumps[1]<<8) + (dumps[2]<<16);
+        else if (dumps + 3 <= de){
+            matchLength = MEM_readLE24(dumps);
            dumps += 3;
        }
-        if (dumps > de) { matchLength = MaxML+255; }  /* late correction, to avoid using uninitialized memory */
        if (dumps >= de) { dumps = de-1; }  /* late correction, to avoid read overflow (data is now corrupted anyway) */
    }
    matchLength += MINMATCH;
--- a/sys/contrib/zstd/lib/legacy/zstd_v05.c
+++ b/sys/contrib/zstd/lib/legacy/zstd_v05.c
@ -1998,91 +1998,92 @@ size_t HUFv05_decompress4X2_usingDTable(
    const void* cSrc, size_t cSrcSize,
    const U16* DTable)
 {
-    const BYTE* const istart = (const BYTE*) cSrc;
-    BYTE* const ostart = (BYTE*) dst;
-    BYTE* const oend = ostart + dstSize;
-    const void* const dtPtr = DTable;
-    const HUFv05_DEltX2* const dt = ((const HUFv05_DEltX2*)dtPtr) +1;
-    const U32 dtLog = DTable[0];
-    size_t errorCode;
-
-    /* Init */
-    BITv05_DStream_t bitD1;
-    BITv05_DStream_t bitD2;
-    BITv05_DStream_t bitD3;
-    BITv05_DStream_t bitD4;
-    const size_t length1 = MEM_readLE16(istart);
-    const size_t length2 = MEM_readLE16(istart+2);
-    const size_t length3 = MEM_readLE16(istart+4);
-    size_t length4;
-    const BYTE* const istart1 = istart + 6;  /* jumpTable */
-    const BYTE* const istart2 = istart1 + length1;
-    const BYTE* const istart3 = istart2 + length2;
-    const BYTE* const istart4 = istart3 + length3;
-    const size_t segmentSize = (dstSize+3) / 4;
-    BYTE* const opStart2 = ostart + segmentSize;
-    BYTE* const opStart3 = opStart2 + segmentSize;
-    BYTE* const opStart4 = opStart3 + segmentSize;
-    BYTE* op1 = ostart;
-    BYTE* op2 = opStart2;
-    BYTE* op3 = opStart3;
-    BYTE* op4 = opStart4;
-    U32 endSignal;
-
    /* Check */
    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        const void* const dtPtr = DTable;
+        const HUFv05_DEltX2* const dt = ((const HUFv05_DEltX2*)dtPtr) +1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;

-    length4 = cSrcSize - (length1 + length2 + length3 + 6);
-    if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
-    errorCode = BITv05_initDStream(&bitD1, istart1, length1);
-    if (HUFv05_isError(errorCode)) return errorCode;
-    errorCode = BITv05_initDStream(&bitD2, istart2, length2);
-    if (HUFv05_isError(errorCode)) return errorCode;
-    errorCode = BITv05_initDStream(&bitD3, istart3, length3);
-    if (HUFv05_isError(errorCode)) return errorCode;
-    errorCode = BITv05_initDStream(&bitD4, istart4, length4);
-    if (HUFv05_isError(errorCode)) return errorCode;
+        /* Init */
+        BITv05_DStream_t bitD1;
+        BITv05_DStream_t bitD2;
+        BITv05_DStream_t bitD3;
+        BITv05_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;

-    /* 16-32 symbols per loop (4-8 symbols per stream) */
-    endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
-    for ( ; (endSignal==BITv05_DStream_unfinished) && (op4<(oend-7)) ; ) {
-        HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
-        HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
-        HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
-        HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
-        HUFv05_DECODE_SYMBOLX2_1(op1, &bitD1);
-        HUFv05_DECODE_SYMBOLX2_1(op2, &bitD2);
-        HUFv05_DECODE_SYMBOLX2_1(op3, &bitD3);
-        HUFv05_DECODE_SYMBOLX2_1(op4, &bitD4);
-        HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
-        HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
-        HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
-        HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
-        HUFv05_DECODE_SYMBOLX2_0(op1, &bitD1);
-        HUFv05_DECODE_SYMBOLX2_0(op2, &bitD2);
-        HUFv05_DECODE_SYMBOLX2_0(op3, &bitD3);
-        HUFv05_DECODE_SYMBOLX2_0(op4, &bitD4);
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BITv05_initDStream(&bitD1, istart1, length1);
+        if (HUFv05_isError(errorCode)) return errorCode;
+        errorCode = BITv05_initDStream(&bitD2, istart2, length2);
+        if (HUFv05_isError(errorCode)) return errorCode;
+        errorCode = BITv05_initDStream(&bitD3, istart3, length3);
+        if (HUFv05_isError(errorCode)) return errorCode;
+        errorCode = BITv05_initDStream(&bitD4, istart4, length4);
+        if (HUFv05_isError(errorCode)) return errorCode;
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
        endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
+        for ( ; (endSignal==BITv05_DStream_unfinished) && (op4<(oend-7)) ; ) {
+            HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUFv05_DECODE_SYMBOLX2_1(op1, &bitD1);
+            HUFv05_DECODE_SYMBOLX2_1(op2, &bitD2);
+            HUFv05_DECODE_SYMBOLX2_1(op3, &bitD3);
+            HUFv05_DECODE_SYMBOLX2_1(op4, &bitD4);
+            HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUFv05_DECODE_SYMBOLX2_0(op1, &bitD1);
+            HUFv05_DECODE_SYMBOLX2_0(op2, &bitD2);
+            HUFv05_DECODE_SYMBOLX2_0(op3, &bitD3);
+            HUFv05_DECODE_SYMBOLX2_0(op4, &bitD4);
+            endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUFv05_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
+        HUFv05_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
+        HUFv05_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
+        HUFv05_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BITv05_endOfDStream(&bitD1) & BITv05_endOfDStream(&bitD2) & BITv05_endOfDStream(&bitD3) & BITv05_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
    }
-
-    /* check corruption */
-    if (op1 > opStart2) return ERROR(corruption_detected);
-    if (op2 > opStart3) return ERROR(corruption_detected);
-    if (op3 > opStart4) return ERROR(corruption_detected);
-    /* note : op4 supposed already verified within main loop */
-
-    /* finish bitStreams one by one */
-    HUFv05_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
-    HUFv05_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
-    HUFv05_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
-    HUFv05_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
-
-    /* check */
-    endSignal = BITv05_endOfDStream(&bitD1) & BITv05_endOfDStream(&bitD2) & BITv05_endOfDStream(&bitD3) & BITv05_endOfDStream(&bitD4);
-    if (!endSignal) return ERROR(corruption_detected);
-
-    /* decoded size */
-    return dstSize;
 }


@ -3150,14 +3151,17 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState)
    litLength = FSEv05_peakSymbol(&(seqState->stateLL));
    prevOffset = litLength ? seq->offset : seqState->prevOffset;
    if (litLength == MaxLL) {
-        U32 add = *dumps++;
+        const U32 add = *dumps++;
        if (add < 255) litLength += add;
-        else {
-            litLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no risk : dumps is always followed by seq tables > 1 byte */
-            if (litLength&1) litLength>>=1, dumps += 3;
-            else litLength = (U16)(litLength)>>1, dumps += 2;
+        else if (dumps + 2 <= de) {
+            litLength = MEM_readLE16(dumps);
+            dumps += 2;
+            if ((litLength & 1) && dumps < de) {
+                litLength += *dumps << 16;
+                dumps += 1;
+            }
+            litLength>>=1;
        }
-        if (dumps > de) { litLength = MaxLL+255; }  /* late correction, to avoid using uninitialized memory */
        if (dumps >= de) { dumps = de-1; }  /* late correction, to avoid read overflow (data is now corrupted anyway) */
    }

@ -3184,14 +3188,17 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState)
    /* MatchLength */
    matchLength = FSEv05_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
    if (matchLength == MaxML) {
-        U32 add = *dumps++;
+        const U32 add = dumps<de ? *dumps++ : 0;
        if (add < 255) matchLength += add;
-        else {
-            matchLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
-            if (matchLength&1) matchLength>>=1, dumps += 3;
-            else matchLength = (U16)(matchLength)>>1, dumps += 2;
+        else if (dumps + 2 <= de) {
+            matchLength = MEM_readLE16(dumps);
+            dumps += 2;
+            if ((matchLength & 1) && dumps < de) {
+                matchLength += *dumps << 16;
+                dumps += 1;
+            }
+            matchLength >>= 1;
        }
-        if (dumps > de) { matchLength = MaxML+255; }  /* late correction, to avoid using uninitialized memory */
        if (dumps >= de) { dumps = de-1; }  /* late correction, to avoid read overflow (data is now corrupted anyway) */
    }
    matchLength += MINMATCH;
--- a/sys/contrib/zstd/lib/legacy/zstd_v06.c
+++ b/sys/contrib/zstd/lib/legacy/zstd_v06.c
@ -3242,14 +3242,12 @@ static size_t ZSTDv06_decodeSeqHeaders(int* nbSeqPtr,
    }

    /* FSE table descriptors */
+    if (ip + 4 > iend) return ERROR(srcSize_wrong); /* min : header byte + all 3 are "raw", hence no header, but at least xxLog bits per type */
    {   U32 const LLtype  = *ip >> 6;
        U32 const Offtype = (*ip >> 4) & 3;
        U32 const MLtype  = (*ip >> 2) & 3;
        ip++;

-        /* check */
-        if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
-
        /* Build DTables */
        {   size_t const bhSize = ZSTDv06_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog, flagRepeatTable);
            if (ZSTDv06_isError(bhSize)) return ERROR(corruption_detected);
@ -3672,7 +3670,7 @@ void ZSTDv06_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cS
    blockProperties_t blockProperties = { bt_compressed, 0 };

    /* Frame Header */
-    {   size_t const frameHeaderSize = ZSTDv06_frameHeaderSize(src, ZSTDv06_frameHeaderSize_min);
+    {   size_t const frameHeaderSize = ZSTDv06_frameHeaderSize(src, srcSize);
        if (ZSTDv06_isError(frameHeaderSize)) {
            ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, frameHeaderSize);
            return;
--- a/sys/contrib/zstd/lib/legacy/zstd_v07.c
+++ b/sys/contrib/zstd/lib/legacy/zstd_v07.c
@ -3470,14 +3470,12 @@ static size_t ZSTDv07_decodeSeqHeaders(int* nbSeqPtr,
    }

    /* FSE table descriptors */
+    if (ip + 4 > iend) return ERROR(srcSize_wrong); /* min : header byte + all 3 are "raw", hence no header, but at least xxLog bits per type */
    {   U32 const LLtype  = *ip >> 6;
        U32 const OFtype = (*ip >> 4) & 3;
        U32 const MLtype  = (*ip >> 2) & 3;
        ip++;

-        /* check */
-        if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
-
        /* Build DTables */
        {   size_t const llhSize = ZSTDv07_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog, flagRepeatTable);
            if (ZSTDv07_isError(llhSize)) return ERROR(corruption_detected);
@ -3918,7 +3916,7 @@ void ZSTDv07_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cS
    }

    /* Frame Header */
-    {   size_t const frameHeaderSize = ZSTDv07_frameHeaderSize(src, ZSTDv07_frameHeaderSize_min);
+    {   size_t const frameHeaderSize = ZSTDv07_frameHeaderSize(src, srcSize);
        if (ZSTDv07_isError(frameHeaderSize)) {
            ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, frameHeaderSize);
            return;
--- a/sys/contrib/zstd/lib/zstd.h
+++ b/sys/contrib/zstd/lib/zstd.h
@ -71,7 +71,7 @@ extern "C" {
 /*------   Version   ------*/
 #define ZSTD_VERSION_MAJOR    1
 #define ZSTD_VERSION_MINOR    4
-#define ZSTD_VERSION_RELEASE  0
+#define ZSTD_VERSION_RELEASE  2

 #define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
 ZSTDLIB_API unsigned ZSTD_versionNumber(void);   /**< to check runtime library version */
@ -82,16 +82,16 @@ ZSTDLIB_API unsigned ZSTD_versionNumber(void);   /**< to check runtime library v
 #define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
 ZSTDLIB_API const char* ZSTD_versionString(void);   /* requires v1.3.0+ */

-/***************************************
-*  Default constant
-***************************************/
+/* *************************************
+ *  Default constant
+ ***************************************/
 #ifndef ZSTD_CLEVEL_DEFAULT
 #  define ZSTD_CLEVEL_DEFAULT 3
 #endif

-/***************************************
-*  Constants
-***************************************/
+/* *************************************
+ *  Constants
+ ***************************************/

 /* All magic numbers are supposed read/written to/from files/memory using little-endian convention */
 #define ZSTD_MAGICNUMBER            0xFD2FB528    /* valid since v0.8.0 */
@ -183,9 +183,14 @@ ZSTDLIB_API int         ZSTD_maxCLevel(void);               /*!< maximum compres
 ***************************************/
 /*= Compression context
 *  When compressing many times,
- *  it is recommended to allocate a context just once, and re-use it for each successive compression operation.
+ *  it is recommended to allocate a context just once,
+ *  and re-use it for each successive compression operation.
 *  This will make workload friendlier for system's memory.
- *  Use one context per thread for parallel execution in multi-threaded environments. */
+ *  Note : re-using context is just a speed / resource optimization.
+ *         It doesn't change the compression ratio, which remains identical.
+ *  Note 2 : In multi-threaded environments,
+ *         use one different context per thread for parallel execution.
+ */
 typedef struct ZSTD_CCtx_s ZSTD_CCtx;
 ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
 ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);
@ -380,6 +385,7 @@ typedef enum {
     * ZSTD_c_forceMaxWindow
     * ZSTD_c_forceAttachDict
     * ZSTD_c_literalCompressionMode
+     * ZSTD_c_targetCBlockSize
     * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
     * note : never ever use experimentalParam? names directly;
     *        also, the enums values themselves are unstable and can still change.
@ -389,6 +395,7 @@ typedef enum {
     ZSTD_c_experimentalParam3=1000,
     ZSTD_c_experimentalParam4=1001,
     ZSTD_c_experimentalParam5=1002,
+     ZSTD_c_experimentalParam6=1003,
 } ZSTD_cParameter;

 typedef struct {
@ -657,17 +664,33 @@ ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
                                         ZSTD_inBuffer* input,
                                         ZSTD_EndDirective endOp);

-ZSTDLIB_API size_t ZSTD_CStreamInSize(void);    /**< recommended size for input buffer */
-ZSTDLIB_API size_t ZSTD_CStreamOutSize(void);   /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */

-/*******************************************************************************
- * This is a legacy streaming API, and can be replaced by ZSTD_CCtx_reset() and
- * ZSTD_compressStream2(). It is redundant, but is still fully supported.
+/* These buffer sizes are softly recommended.
+ * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output.
+ * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(),
+ * reducing the amount of memory shuffling and buffering, resulting in minor performance savings.
+ *
+ * However, note that these recommendations are from the perspective of a C caller program.
+ * If the streaming interface is invoked from some other language,
+ * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo,
+ * a major performance rule is to reduce crossing such interface to an absolute minimum.
+ * It's not rare that performance ends being spent more into the interface, rather than compression itself.
+ * In which cases, prefer using large buffers, as large as practical,
+ * for both input and output, to reduce the nb of roundtrips.
+ */
+ZSTDLIB_API size_t ZSTD_CStreamInSize(void);    /**< recommended size for input buffer */
+ZSTDLIB_API size_t ZSTD_CStreamOutSize(void);   /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */
+
+
+/* *****************************************************************************
+ * This following is a legacy streaming API.
+ * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2().
+ * It is redundant, but remains fully supported.
 * Advanced parameters and dictionary compression can only be used through the
 * new API.
 ******************************************************************************/

-/**
+/*!
 * Equivalent to:
 *
 *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
@ -675,16 +698,16 @@ ZSTDLIB_API size_t ZSTD_CStreamOutSize(void);   /**< recommended size for output
 *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
 */
 ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
-/**
+/*!
 * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue).
 * NOTE: The return value is different. ZSTD_compressStream() returns a hint for
 * the next read size (if non-zero and not an error). ZSTD_compressStream2()
- * returns the number of bytes left to flush (if non-zero and not an error).
+ * returns the minimum nb of bytes left to flush (if non-zero and not an error).
 */
 ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
-/** Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */
+/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */
 ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
-/** Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */
+/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */
 ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);


@ -969,7 +992,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
 #endif  /* ZSTD_H_235446 */


-/****************************************************************************************
+/* **************************************************************************************
 *   ADVANCED AND EXPERIMENTAL FUNCTIONS
 ****************************************************************************************
 * The definitions in the following section are considered experimental.
@ -1037,6 +1060,10 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
 #define ZSTD_LDM_HASHRATELOG_MIN     0
 #define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN)

+/* Advanced parameter bounds */
+#define ZSTD_TARGETCBLOCKSIZE_MIN   64
+#define ZSTD_TARGETCBLOCKSIZE_MAX   ZSTD_BLOCKSIZE_MAX
+
 /* internal */
 #define ZSTD_HASHLOG3_MAX           17

@ -1162,7 +1189,7 @@ typedef enum {
 *            however it does mean that all frame data must be present and valid. */
 ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);

-/** ZSTD_decompressBound() :
+/*! ZSTD_decompressBound() :
 *  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
 *  `srcSize` must be the _exact_ size of this series
 *       (i.e. there should be a frame boundary at `src + srcSize`)
@ -1409,6 +1436,11 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre
 */
 #define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5

+/* Tries to fit compressed block size to be around targetCBlockSize.
+ * No target when targetCBlockSize == 0.
+ * There is no guarantee on compressed block size (default:0) */
+#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
+
 /*! ZSTD_CCtx_getParameter() :
 *  Get the requested compression parameter value, selected by enum ZSTD_cParameter,
 *  and store it into int* value.
@ -1843,7 +1875,7 @@ typedef struct {
    unsigned checksumFlag;
 } ZSTD_frameHeader;

-/** ZSTD_getFrameHeader() :
+/*! ZSTD_getFrameHeader() :
 *  decode Frame Header, or requires larger `srcSize`.
 * @return : 0, `zfhPtr` is correctly filled,
 *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
--- a/sys/contrib/zstd/programs/.gitignore
+++ b/sys/contrib/zstd/programs/.gitignore
@ -33,4 +33,5 @@ afl

 # Misc files
 *.bat
+!windres/generate_res.bat
 dirTest*
--- a/sys/contrib/zstd/programs/README.md
+++ b/sys/contrib/zstd/programs/README.md
@ -37,7 +37,7 @@ There are however other Makefile targets that create different variations of CLI
  `.gz` support is automatically enabled when `zlib` library is detected at build time.
  It's possible to disable `.gz` support, by setting `HAVE_ZLIB=0`.
  Example : `make zstd HAVE_ZLIB=0`
-  It's also possible to force compilation with zlib support, `using HAVE_ZLIB=1`.
+  It's also possible to force compilation with zlib support, using `HAVE_ZLIB=1`.
  In which case, linking stage will fail if `zlib` library cannot be found.
  This is useful to prevent silent feature disabling.

@ -45,7 +45,7 @@ There are however other Makefile targets that create different variations of CLI
  This is ordered through commands `--format=xz` and `--format=lzma` respectively.
  Alternatively, symlinks named `xz`, `unxz`, `lzma`, or `unlzma` will mimic intended behavior.
  `.xz` and `.lzma` support is automatically enabled when `lzma` library is detected at build time.
-  It's possible to disable `.xz` and `.lzma` support, by setting `HAVE_LZMA=0` .
+  It's possible to disable `.xz` and `.lzma` support, by setting `HAVE_LZMA=0`.
  Example : `make zstd HAVE_LZMA=0`
  It's also possible to force compilation with lzma support, using `HAVE_LZMA=1`.
  In which case, linking stage will fail if `lzma` library cannot be found.
@ -157,8 +157,8 @@ Advanced arguments :

 Dictionary builder :
 --train ## : create a dictionary from a training set of files
--train-cover[=k=#,d=#,steps=#,split=#] : use the cover algorithm with optional args
--train-fastcover[=k=#,d=#,f=#,steps=#,split=#,accel=#] : use the fastcover algorithm with optional args
+--train-cover[=k=#,d=#,steps=#,split=#,shrink[=#]] : use the cover algorithm with optional args
+--train-fastcover[=k=#,d=#,f=#,steps=#,split=#,shrink[=#],accel=#] : use the fastcover algorithm with optional args
 --train-legacy[=s=#] : use the legacy algorithm with selectivity (default: 9)
 -o file : `file` is dictionary name (default: dictionary)
 --maxdict=# : limit dictionary to specified size (default: 112640)
--- a/sys/contrib/zstd/programs/benchfn.c
+++ b/sys/contrib/zstd/programs/benchfn.c
@ -15,7 +15,6 @@
 ***************************************/
 #include <stdlib.h>      /* malloc, free */
 #include <string.h>      /* memset */
-#undef NDEBUG            /* assert must not be disabled */
 #include <assert.h>      /* assert */

 #include "timefn.h"        /* UTIL_time_t, UTIL_getTime */
@ -54,6 +53,9 @@
    return retValue;                                  \
 }

+/* Abort execution if a condition is not met */
+#define CONTROL(c) { if (!(c)) { DEBUGOUTPUT("error: %s \n", #c); abort(); } }
+

 /* *************************************
 *  Benchmarking an arbitrary function
@ -68,13 +70,13 @@ int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome)
 *           check outcome validity first, using BMK_isValid_runResult() */
 BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome)
 {
-    assert(outcome.error_tag_never_ever_use_directly == 0);
+    CONTROL(outcome.error_tag_never_ever_use_directly == 0);
    return outcome.internal_never_ever_use_directly;
 }

 size_t BMK_extract_errorResult(BMK_runOutcome_t outcome)
 {
-    assert(outcome.error_tag_never_ever_use_directly != 0);
+    CONTROL(outcome.error_tag_never_ever_use_directly != 0);
    return outcome.error_result_never_ever_use_directly;
 }

--- a/sys/contrib/zstd/programs/fileio.c
+++ b/sys/contrib/zstd/programs/fileio.c
@ -175,7 +175,7 @@ static void clearHandler(void)

 #if !defined(BACKTRACE_ENABLE)
 /* automatic detector : backtrace enabled by default on linux+glibc and osx */
-#  if (defined(__linux__) && defined(__GLIBC__)) \
+#  if (defined(__linux__) && (defined(__GLIBC__) && !defined(__UCLIBC__))) \
     || (defined(__APPLE__) && defined(__MACH__))
 #    define BACKTRACE_ENABLE 1
 #  else
@ -269,6 +269,13 @@ void FIO_addAbortHandler()
        else
            return -1;
    }
+    static __int64 LONG_TELL(FILE* file) {
+        LARGE_INTEGER off, newOff;
+        off.QuadPart = 0;
+        newOff.QuadPart = 0;
+        SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, &newOff, FILE_CURRENT);
+        return newOff.QuadPart;
+    }
 #else
 #   define LONG_SEEK fseek
 #   define LONG_TELL ftell
@ -297,6 +304,7 @@ struct FIO_prefs_s {
    int ldmMinMatch;
    int ldmBucketSizeLog;
    int ldmHashRateLog;
+    size_t targetCBlockSize;
    ZSTD_literalCompressionMode_e literalCompressionMode;

    /* IO preferences */
@ -341,6 +349,7 @@ FIO_prefs_t* FIO_createPreferences(void)
    ret->ldmMinMatch = 0;
    ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET;
    ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET;
+    ret->targetCBlockSize = 0;
    ret->literalCompressionMode = ZSTD_lcm_auto;
    return ret;
 }
@ -409,6 +418,10 @@ void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable) {
    prefs->rsyncable = rsyncable;
 }

+void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize) {
+    prefs->targetCBlockSize = targetCBlockSize;
+}
+
 void FIO_setLiteralCompressionMode(
        FIO_prefs_t* const prefs,
        ZSTD_literalCompressionMode_e mode) {
@ -557,8 +570,11 @@ static FILE* FIO_openDstFile(FIO_prefs_t* const prefs, const char* srcFileName,
    }   }

    {   FILE* const f = fopen( dstFileName, "wb" );
-        if (f == NULL)
+        if (f == NULL) {
            DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno));
+        } else {
+            chmod(dstFileName, 00600);
+        }
        return f;
    }
 }
@ -649,6 +665,8 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
        CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_checksumFlag, prefs->checksumFlag) );
        /* compression level */
        CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, cLevel) );
+        /* max compressed block size */
+        CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetCBlockSize, (int)prefs->targetCBlockSize) );
        /* long distance matching */
        CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableLongDistanceMatching, prefs->ldmFlag) );
        CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashLog, prefs->ldmHashLog) );
@ -1158,6 +1176,8 @@ FIO_compressFilename_internal(FIO_prefs_t* const prefs,
                              const char* dstFileName, const char* srcFileName,
                              int compressionLevel)
 {
+    UTIL_time_t const timeStart = UTIL_getTime();
+    clock_t const cpuStart = clock();
    U64 readsize = 0;
    U64 compressedfilesize = 0;
    U64 const fileSize = UTIL_getFileSize(srcFileName);
@ -1210,6 +1230,15 @@ FIO_compressFilename_internal(FIO_prefs_t* const prefs,
        (unsigned long long)readsize, (unsigned long long) compressedfilesize,
         dstFileName);

+    /* Elapsed Time and CPU Load */
+    {   clock_t const cpuEnd = clock();
+        double const cpuLoad_s = (double)(cpuEnd - cpuStart) / CLOCKS_PER_SEC;
+        U64 const timeLength_ns = UTIL_clockSpanNano(timeStart);
+        double const timeLength_s = (double)timeLength_ns / 1000000000;
+        double const cpuLoad_pct = (cpuLoad_s / timeLength_s) * 100;
+        DISPLAYLEVEL(4, "%-20s : Completed in %.2f sec  (cpu load : %.0f%%)\n",
+                        srcFileName, timeLength_s, cpuLoad_pct);
+    }
    return 0;
 }

@ -1332,15 +1361,12 @@ int FIO_compressFilename(FIO_prefs_t* const prefs,
                         const char* dictFileName, int compressionLevel,
                         ZSTD_compressionParameters comprParams)
 {
-    clock_t const start = clock();
    U64 const fileSize = UTIL_getFileSize(srcFileName);
    U64 const srcSize = (fileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : fileSize;

    cRess_t const ress = FIO_createCResources(prefs, dictFileName, compressionLevel, srcSize, comprParams);
    int const result = FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel);

-    double const seconds = (double)(clock() - start) / CLOCKS_PER_SEC;
-    DISPLAYLEVEL(4, "Completed in %.2f sec \n", seconds);

    FIO_freeCResources(ress);
    return result;
--- a/sys/contrib/zstd/programs/fileio.h
+++ b/sys/contrib/zstd/programs/fileio.h
@ -71,6 +71,7 @@ void FIO_setOverlapLog(FIO_prefs_t* const prefs, int overlapLog);
 void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag);
 void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse);  /**< 0: no sparse; 1: disable on stdout; 2: always enabled */
 void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable);
+void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize);
 void FIO_setLiteralCompressionMode(
        FIO_prefs_t* const prefs,
        ZSTD_literalCompressionMode_e mode);
--- a/sys/contrib/zstd/programs/util.c
+++ b/sys/contrib/zstd/programs/util.c
@ -107,19 +107,11 @@ int UTIL_isSameFile(const char* file1, const char* file2)
 U32 UTIL_isLink(const char* infilename)
 {
 /* macro guards, as defined in : https://linux.die.net/man/2/lstat */
-#ifndef __STRICT_ANSI__
-#if defined(_BSD_SOURCE) \
-    || (defined(_XOPEN_SOURCE) && (_XOPEN_SOURCE >= 500)) \
-    || (defined(_XOPEN_SOURCE) && defined(_XOPEN_SOURCE_EXTENDED)) \
-    || (defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L)) \
-    || (defined(__APPLE__) && defined(__MACH__)) \
-    || defined(__OpenBSD__) \
-    || defined(__FreeBSD__)
+#if PLATFORM_POSIX_VERSION >= 200112L
    int r;
    stat_t statbuf;
    r = lstat(infilename, &statbuf);
    if (!r && S_ISLNK(statbuf.st_mode)) return 1;
-#endif
 #endif
    (void)infilename;
    return 0;
@ -253,6 +245,7 @@ int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char

        if (!followLinks && UTIL_isLink(path)) {
            UTIL_DISPLAYLEVEL(2, "Warning : %s is a symbolic link, ignoring\n", path);
+            free(path);
            continue;
        }

--- a/sys/contrib/zstd/programs/zstd.1
+++ b/sys/contrib/zstd/programs/zstd.1
@ -1,5 +1,5 @@
 .
-.TH "ZSTD" "1" "December 2018" "zstd 1.3.8" "User Commands"
+.TH "ZSTD" "1" "July 2019" "zstd 1.4.2" "User Commands"
 .
 .SH "NAME"
 \fBzstd\fR \- zstd, zstdmt, unzstd, zstdcat \- Compress or decompress \.zst files
@ -187,6 +187,10 @@ verbose mode
 suppress warnings, interactivity, and notifications\. specify twice to suppress errors too\.
 .
 .TP
+\fB\-\-no\-progress\fR
+do not display the progress bar, but keep all other messages\.
+.
+.TP
 \fB\-C\fR, \fB\-\-[no\-]check\fR
 add integrity check computed from uncompressed data (default: enabled)
 .
@ -225,11 +229,11 @@ Split input files in blocks of size # (default: no split)
 A dictionary ID is a locally unique ID that a decoder can use to verify it is using the right dictionary\. By default, zstd will create a 4\-bytes random number ID\. It\'s possible to give a precise number instead\. Short numbers have an advantage : an ID < 256 will only need 1 byte in the compressed frame header, and an ID < 65536 will only need 2 bytes\. This compares favorably to 4 bytes default\. However, it\'s up to the dictionary manager to not assign twice the same ID to 2 different dictionaries\.
 .
 .TP
-\fB\-\-train\-cover[=k#,d=#,steps=#,split=#]\fR
-Select parameters for the default dictionary builder algorithm named cover\. If \fId\fR is not specified, then it tries \fId\fR = 6 and \fId\fR = 8\. If \fIk\fR is not specified, then it tries \fIsteps\fR values in the range [50, 2000]\. If \fIsteps\fR is not specified, then the default value of 40 is used\. If \fIsplit\fR is not specified or split <= 0, then the default value of 100 is used\. Requires that \fId\fR <= \fIk\fR\.
+\fB\-\-train\-cover[=k#,d=#,steps=#,split=#,shrink[=#]]\fR
+Select parameters for the default dictionary builder algorithm named cover\. If \fId\fR is not specified, then it tries \fId\fR = 6 and \fId\fR = 8\. If \fIk\fR is not specified, then it tries \fIsteps\fR values in the range [50, 2000]\. If \fIsteps\fR is not specified, then the default value of 40 is used\. If \fIsplit\fR is not specified or split <= 0, then the default value of 100 is used\. Requires that \fId\fR <= \fIk\fR\. If \fIshrink\fR flag is not used, then the default value for \fIshrinkDict\fR of 0 is used\. If \fIshrink\fR is not specified, then the default value for \fIshrinkDictMaxRegression\fR of 1 is used\.
 .
 .IP
-Selects segments of size \fIk\fR with highest score to put in the dictionary\. The score of a segment is computed by the sum of the frequencies of all the subsegments of size \fId\fR\. Generally \fId\fR should be in the range [6, 8], occasionally up to 16, but the algorithm will run faster with d <= \fI8\fR\. Good values for \fIk\fR vary widely based on the input data, but a safe range is [2 * \fId\fR, 2000]\. If \fIsplit\fR is 100, all input samples are used for both training and testing to find optimal \fId\fR and \fIk\fR to build dictionary\. Supports multithreading if \fBzstd\fR is compiled with threading support\.
+Selects segments of size \fIk\fR with highest score to put in the dictionary\. The score of a segment is computed by the sum of the frequencies of all the subsegments of size \fId\fR\. Generally \fId\fR should be in the range [6, 8], occasionally up to 16, but the algorithm will run faster with d <= \fI8\fR\. Good values for \fIk\fR vary widely based on the input data, but a safe range is [2 * \fId\fR, 2000]\. If \fIsplit\fR is 100, all input samples are used for both training and testing to find optimal \fId\fR and \fIk\fR to build dictionary\. Supports multithreading if \fBzstd\fR is compiled with threading support\. Having \fIshrink\fR enabled takes a truncated dictionary of minimum size and doubles in size until compression ratio of the truncated dictionary is at most \fIshrinkDictMaxRegression%\fR worse than the compression ratio of the largest dictionary\.
 .
 .IP
 Examples:
@ -249,6 +253,12 @@ Examples:
 .IP
 \fBzstd \-\-train\-cover=k=50,split=60 FILEs\fR
 .
+.IP
+\fBzstd \-\-train\-cover=shrink FILEs\fR
+.
+.IP
+\fBzstd \-\-train\-cover=shrink=2 FILEs\fR
+.
 .TP
 \fB\-\-train\-fastcover[=k#,d=#,f=#,steps=#,split=#,accel=#]\fR
 Same as cover but with extra parameters \fIf\fR and \fIaccel\fR and different default value of split If \fIsplit\fR is not specified, then it tries \fIsplit\fR = 75\. If \fIf\fR is not specified, then it tries \fIf\fR = 20\. Requires that 0 < \fIf\fR < 32\. If \fIaccel\fR is not specified, then it tries \fIaccel\fR = 1\. Requires that 0 < \fIaccel\fR <= 10\. Requires that \fId\fR = 6 or \fId\fR = 8\.
--- a/sys/contrib/zstd/programs/zstd.1.md
+++ b/sys/contrib/zstd/programs/zstd.1.md
@ -244,13 +244,15 @@ Compression of small files similar to the sample set will be greatly improved.
    This compares favorably to 4 bytes default.
    However, it's up to the dictionary manager to not assign twice the same ID to
    2 different dictionaries.
-* `--train-cover[=k#,d=#,steps=#,split=#]`:
+* `--train-cover[=k#,d=#,steps=#,split=#,shrink[=#]]`:
    Select parameters for the default dictionary builder algorithm named cover.
    If _d_ is not specified, then it tries _d_ = 6 and _d_ = 8.
    If _k_ is not specified, then it tries _steps_ values in the range [50, 2000].
    If _steps_ is not specified, then the default value of 40 is used.
    If _split_ is not specified or split <= 0, then the default value of 100 is used.
    Requires that _d_ <= _k_.
+    If _shrink_ flag is not used, then the default value for _shrinkDict_ of 0 is used.
+    If _shrink_ is not specified, then the default value for _shrinkDictMaxRegression_ of 1 is used.

    Selects segments of size _k_ with highest score to put in the dictionary.
    The score of a segment is computed by the sum of the frequencies of all the
@ -262,6 +264,9 @@ Compression of small files similar to the sample set will be greatly improved.
    If _split_ is 100, all input samples are used for both training and testing
    to find optimal _d_ and _k_ to build dictionary.
    Supports multithreading if `zstd` is compiled with threading support.
+    Having _shrink_ enabled takes a truncated dictionary of minimum size and doubles
+    in size until compression ratio of the truncated dictionary is at most
+    _shrinkDictMaxRegression%_ worse than the compression ratio of the largest dictionary.

    Examples:

@ -275,6 +280,10 @@ Compression of small files similar to the sample set will be greatly improved.

    `zstd --train-cover=k=50,split=60 FILEs`

+    `zstd --train-cover=shrink FILEs`
+
+    `zstd --train-cover=shrink=2 FILEs`
+
 * `--train-fastcover[=k#,d=#,f=#,steps=#,split=#,accel=#]`:
    Same as cover but with extra parameters _f_ and _accel_ and different default value of split
    If _split_ is not specified, then it tries _split_ = 75.
--- a/sys/contrib/zstd/programs/zstdcli.c
+++ b/sys/contrib/zstd/programs/zstdcli.c
@ -141,6 +141,7 @@ static int usage_advanced(const char* programName)
    DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog);
    DISPLAY( "--fast[=#]: switch to ultra fast compression level (default: %u)\n", 1);
    DISPLAY( "--adapt : dynamically adapt compression level to I/O conditions \n");
+    DISPLAY( "--target-compressed-block-size=# : make compressed block near targeted size \n");
 #ifdef ZSTD_MULTITHREAD
    DISPLAY( " -T#    : spawns # compression threads (default: 1, 0==# cores) \n");
    DISPLAY( " -B#    : select size of each job (default: 0==automatic) \n");
@ -179,8 +180,8 @@ static int usage_advanced(const char* programName)
    DISPLAY( "\n");
    DISPLAY( "Dictionary builder : \n");
    DISPLAY( "--train ## : create a dictionary from a training set of files \n");
-    DISPLAY( "--train-cover[=k=#,d=#,steps=#,split=#] : use the cover algorithm with optional args\n");
-    DISPLAY( "--train-fastcover[=k=#,d=#,f=#,steps=#,split=#,accel=#] : use the fast cover algorithm with optional args\n");
+    DISPLAY( "--train-cover[=k=#,d=#,steps=#,split=#,shrink[=#]] : use the cover algorithm with optional args\n");
+    DISPLAY( "--train-fastcover[=k=#,d=#,f=#,steps=#,split=#,accel=#,shrink[=#]] : use the fast cover algorithm with optional args\n");
    DISPLAY( "--train-legacy[=s=#] : use the legacy algorithm with selectivity (default: %u)\n", g_defaultSelectivityLevel);
    DISPLAY( " -o file : `file` is dictionary name (default: %s) \n", g_defaultDictName);
    DISPLAY( "--maxdict=# : limit dictionary to specified size (default: %u) \n", g_defaultMaxDictSize);
@ -293,6 +294,8 @@ static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)


 #ifndef ZSTD_NODICT
+
+static const unsigned kDefaultRegression = 1;
 /**
 * parseCoverParameters() :
 * reads cover parameters from *stringPtr (e.g. "--train-cover=k=48,d=8,steps=32") into *params
@ -311,10 +314,23 @@ static unsigned parseCoverParameters(const char* stringPtr, ZDICT_cover_params_t
          params->splitPoint = (double)splitPercentage / 100.0;
          if (stringPtr[0]==',') { stringPtr++; continue; } else break;
        }
+        if (longCommandWArg(&stringPtr, "shrink")) {
+          params->shrinkDictMaxRegression = kDefaultRegression;
+          params->shrinkDict = 1;
+          if (stringPtr[0]=='=') {
+            stringPtr++;
+            params->shrinkDictMaxRegression = readU32FromChar(&stringPtr);
+          }
+          if (stringPtr[0]==',') {
+            stringPtr++;
+            continue;
+          }
+          else break;
+        }
        return 0;
    }
    if (stringPtr[0] != 0) return 0;
-    DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nsteps=%u\nsplit=%u\n", params->k, params->d, params->steps, (unsigned)(params->splitPoint * 100));
+    DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nsteps=%u\nsplit=%u\nshrink%u\n", params->k, params->d, params->steps, (unsigned)(params->splitPoint * 100), params->shrinkDictMaxRegression);
    return 1;
 }

@ -338,10 +354,23 @@ static unsigned parseFastCoverParameters(const char* stringPtr, ZDICT_fastCover_
          params->splitPoint = (double)splitPercentage / 100.0;
          if (stringPtr[0]==',') { stringPtr++; continue; } else break;
        }
+        if (longCommandWArg(&stringPtr, "shrink")) {
+          params->shrinkDictMaxRegression = kDefaultRegression;
+          params->shrinkDict = 1;
+          if (stringPtr[0]=='=') {
+            stringPtr++;
+            params->shrinkDictMaxRegression = readU32FromChar(&stringPtr);
+          }
+          if (stringPtr[0]==',') {
+            stringPtr++;
+            continue;
+          }
+          else break;
+        }
        return 0;
    }
    if (stringPtr[0] != 0) return 0;
-    DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\n", params->k, params->d, params->f, params->steps, (unsigned)(params->splitPoint * 100), params->accel);
+    DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\nshrink=%u\n", params->k, params->d, params->f, params->steps, (unsigned)(params->splitPoint * 100), params->accel, params->shrinkDictMaxRegression);
    return 1;
 }

@ -367,6 +396,8 @@ static ZDICT_cover_params_t defaultCoverParams(void)
    params.d = 8;
    params.steps = 4;
    params.splitPoint = 1.0;
+    params.shrinkDict = 0;
+    params.shrinkDictMaxRegression = kDefaultRegression;
    return params;
 }

@ -379,6 +410,8 @@ static ZDICT_fastCover_params_t defaultFastCoverParams(void)
    params.steps = 4;
    params.splitPoint = 0.75; /* different from default splitPoint of cover */
    params.accel = DEFAULT_ACCEL;
+    params.shrinkDict = 0;
+    params.shrinkDictMaxRegression = kDefaultRegression;
    return params;
 }
 #endif
@ -555,6 +588,7 @@ int main(int argCount, const char* argv[])
    const char* suffix = ZSTD_EXTENSION;
    unsigned maxDictSize = g_defaultMaxDictSize;
    unsigned dictID = 0;
+    size_t targetCBlockSize = 0;
    int dictCLevel = g_defaultDictCLevel;
    unsigned dictSelect = g_defaultSelectivityLevel;
 #ifdef UTIL_HAS_CREATEFILELIST
@ -588,11 +622,11 @@ int main(int argCount, const char* argv[])
    /* preset behaviors */
    if (exeNameMatch(programName, ZSTD_ZSTDMT)) nbWorkers=0, singleThread=0;
    if (exeNameMatch(programName, ZSTD_UNZSTD)) operation=zom_decompress;
-    if (exeNameMatch(programName, ZSTD_CAT)) { operation=zom_decompress; forceStdout=1; FIO_overwriteMode(prefs); outFileName=stdoutmark; g_displayLevel=1; }     /* supports multiple formats */
-    if (exeNameMatch(programName, ZSTD_ZCAT)) { operation=zom_decompress; forceStdout=1; FIO_overwriteMode(prefs); outFileName=stdoutmark; g_displayLevel=1; }    /* behave like zcat, also supports multiple formats */
+    if (exeNameMatch(programName, ZSTD_CAT)) { operation=zom_decompress; FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; outFileName=stdoutmark; g_displayLevel=1; }     /* supports multiple formats */
+    if (exeNameMatch(programName, ZSTD_ZCAT)) { operation=zom_decompress; FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; outFileName=stdoutmark; g_displayLevel=1; }    /* behave like zcat, also supports multiple formats */
    if (exeNameMatch(programName, ZSTD_GZ)) { suffix = GZ_EXTENSION; FIO_setCompressionType(prefs, FIO_gzipCompression); FIO_setRemoveSrcFile(prefs, 1); }        /* behave like gzip */
    if (exeNameMatch(programName, ZSTD_GUNZIP)) { operation=zom_decompress; FIO_setRemoveSrcFile(prefs, 1); }                                                     /* behave like gunzip, also supports multiple formats */
-    if (exeNameMatch(programName, ZSTD_GZCAT)) { operation=zom_decompress; forceStdout=1; FIO_overwriteMode(prefs); outFileName=stdoutmark; g_displayLevel=1; }   /* behave like gzcat, also supports multiple formats */
+    if (exeNameMatch(programName, ZSTD_GZCAT)) { operation=zom_decompress; FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; outFileName=stdoutmark; g_displayLevel=1; }   /* behave like gzcat, also supports multiple formats */
    if (exeNameMatch(programName, ZSTD_LZMA)) { suffix = LZMA_EXTENSION; FIO_setCompressionType(prefs, FIO_lzmaCompression); FIO_setRemoveSrcFile(prefs, 1); }    /* behave like lzma */
    if (exeNameMatch(programName, ZSTD_UNLZMA)) { operation=zom_decompress; FIO_setCompressionType(prefs, FIO_lzmaCompression); FIO_setRemoveSrcFile(prefs, 1); } /* behave like unlzma, also supports multiple formats */
    if (exeNameMatch(programName, ZSTD_XZ)) { suffix = XZ_EXTENSION; FIO_setCompressionType(prefs, FIO_xzCompression); FIO_setRemoveSrcFile(prefs, 1); }          /* behave like xz */
@ -711,6 +745,7 @@ int main(int argCount, const char* argv[])
                    if (longCommandWArg(&argument, "--maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
                    if (longCommandWArg(&argument, "--dictID=")) { dictID = readU32FromChar(&argument); continue; }
                    if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) CLEAN_RETURN(badusage(programName)); continue; }
+                    if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readU32FromChar(&argument); continue; }
                    if (longCommandWArg(&argument, "--long")) {
                        unsigned ldmWindowLog = 0;
                        ldmFlag = 1;
@ -1115,6 +1150,7 @@ int main(int argCount, const char* argv[])
        FIO_setAdaptMin(prefs, adaptMin);
        FIO_setAdaptMax(prefs, adaptMax);
        FIO_setRsyncable(prefs, rsyncable);
+        FIO_setTargetCBlockSize(prefs, targetCBlockSize);
        FIO_setLiteralCompressionMode(prefs, literalCompressionMode);
        if (adaptMin > cLevel) cLevel = adaptMin;
        if (adaptMax < cLevel) cLevel = adaptMax;
@ -1124,7 +1160,7 @@ int main(int argCount, const char* argv[])
        else
          operationResult = FIO_compressMultipleFilenames(prefs, filenameTable, filenameIdx, outFileName, suffix, dictFileName, cLevel, compressionParams);
 #else
-        (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; /* not used when ZSTD_NOCOMPRESS set */
+        (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; /* not used when ZSTD_NOCOMPRESS set */
        DISPLAY("Compression not supported \n");
 #endif
    } else {  /* decompression or test */
--- a/sys/contrib/zstd/programs/zstdgrep
+++ b/sys/contrib/zstd/programs/zstdgrep
@ -58,6 +58,9 @@ while [ "$#" -gt 0 ] && [ "${endofopts}" -eq 0 ]; do
                    shift 2
                    break
                    ;;
+                -f)
+                    pattern_found=2
+                    ;;
                *)
                    ;;
            esac
@ -117,7 +120,11 @@ else
    set -f
    while [ "$#" -gt 0 ]; do
        # shellcheck disable=SC2086
-        "${zcat}" -fq -- "$1" | "${grep}" --label="${1}" ${grep_args} -- "${pattern}" -
+        if [ $pattern_found -eq 2 ]; then
+            "${zcat}" -fq -- "$1" | "${grep}" --label="${1}" ${grep_args} -- -
+        else
+            "${zcat}" -fq -- "$1" | "${grep}" --label="${1}" ${grep_args} -- "${pattern}" -
+        fi
        [ "$?" -ne 0 ] && EXIT_CODE=1
        shift
    done
--- a/sys/contrib/zstd/programs/zstdgrep.1
+++ b/sys/contrib/zstd/programs/zstdgrep.1
@ -1,5 +1,5 @@
 .
-.TH "ZSTDGREP" "1" "December 2018" "zstd 1.3.8" "User Commands"
+.TH "ZSTDGREP" "1" "July 2019" "zstd 1.4.2" "User Commands"
 .
 .SH "NAME"
 \fBzstdgrep\fR \- print lines matching a pattern in zstandard\-compressed files
--- a/sys/contrib/zstd/programs/zstdless.1
+++ b/sys/contrib/zstd/programs/zstdless.1
@ -1,5 +1,5 @@
 .
-.TH "ZSTDLESS" "1" "December 2018" "zstd 1.3.8" "User Commands"
+.TH "ZSTDLESS" "1" "July 2019" "zstd 1.4.2" "User Commands"
 .
 .SH "NAME"
 \fBzstdless\fR \- view zstandard\-compressed files
--- a/sys/contrib/zstd/tests/.gitignore
+++ b/sys/contrib/zstd/tests/.gitignore
@ -55,6 +55,7 @@ _*
 tmp*
 *.zst
 *.gz
+!gzip/hufts-segv.gz
 result
 out
 *.zstd
--- a/sys/contrib/zstd/tests/Makefile
+++ b/sys/contrib/zstd/tests/Makefile
@ -215,6 +215,9 @@ roundTripCrash : $(ZSTD_OBJECTS) roundTripCrash.c
 longmatch  : $(ZSTD_OBJECTS) longmatch.c
 	$(CC) $(FLAGS) $^ -o $@$(EXT)

+bigdict: $(ZSTDMT_OBJECTS) $(PRGDIR)/datagen.c bigdict.c
+	$(CC) $(FLAGS) $(MULTITHREAD) $^ -o $@$(EXT)
+
 invalidDictionaries : $(ZSTD_OBJECTS) invalidDictionaries.c
 	$(CC) $(FLAGS) $^ -o $@$(EXT)

@ -247,7 +250,7 @@ clean:
 	$(MAKE) -C $(ZSTDDIR) clean
 	$(MAKE) -C $(PRGDIR) clean
 	@$(RM) -fR $(TESTARTEFACT)
-	@$(RM) -f core *.o tmp* result* *.gcda dictionary *.zst \
+	@$(RM) -f core *.o tmp* *.tmp result* *.gcda dictionary *.zst \
        $(PRGDIR)/zstd$(EXT) $(PRGDIR)/zstd32$(EXT) \
        fullbench$(EXT) fullbench32$(EXT) \
        fullbench-lib$(EXT) fullbench-dll$(EXT) \
@ -256,7 +259,7 @@ clean:
        zstreamtest$(EXT) zstreamtest32$(EXT) \
        datagen$(EXT) paramgrill$(EXT) roundTripCrash$(EXT) longmatch$(EXT) \
        symbols$(EXT) invalidDictionaries$(EXT) legacy$(EXT) poolTests$(EXT) \
-        decodecorpus$(EXT) checkTag$(EXT)
+        decodecorpus$(EXT) checkTag$(EXT) bigdict$(EXT)
 	@echo Cleaning completed


@ -359,6 +362,9 @@ test-zstdgrep: gzstd
 	-echo 'hello world' > test.txt && $(PRGDIR)/zstd test.txt
 	env ZCAT=/tmp/zstdcat $(PRGDIR)/zstdgrep hello test.txt.zst
 	env ZCAT=/tmp/zstdcat $(PRGDIR)/zstdgrep weird test.txt.zst && return 1 || return 0
+	-echo 'hello' > pattern.txt
+	env ZCAT=/tmp/zstdcat $(PRGDIR)/zstdgrep -f pattern.txt test.txt.zst
+	$(RM) test.txt test.txt.zst pattern.txt

 test-fullbench: fullbench datagen
 	$(QEMU_SYS) ./fullbench -i1
@ -394,6 +400,9 @@ test-zstream32: zstreamtest32
 test-longmatch: longmatch
 	$(QEMU_SYS) ./longmatch

+test-bigdict: bigdict
+	$(QEMU_SYS) ./bigdict
+
 test-invalidDictionaries: invalidDictionaries
 	$(QEMU_SYS) ./invalidDictionaries

--- a/sys/contrib/zstd/tests/bigdict.c
+++ b/sys/contrib/zstd/tests/bigdict.c
@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2017-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include "datagen.h"
+#include "mem.h"
+#define ZSTD_STATIC_LINKING_ONLY
+#include "zstd.h"
+
+static int
+compress(ZSTD_CCtx* cctx, ZSTD_DCtx* dctx,
+         void* dst, size_t dstCapacity,
+         void const* src, size_t srcSize,
+         void* roundtrip, ZSTD_EndDirective end)
+{
+    ZSTD_inBuffer in = {src, srcSize, 0};
+    ZSTD_outBuffer out = {dst, dstCapacity, 0};
+    int ended = 0;
+
+    while (!ended && (in.pos < in.size || out.pos > 0)) {
+        size_t rc;
+        out.pos = 0;
+        rc = ZSTD_compressStream2(cctx, &out, &in, end);
+        if (ZSTD_isError(rc))
+            return 1;
+        if (end == ZSTD_e_end && rc == 0)
+            ended = 1;
+        {
+            ZSTD_inBuffer rtIn = {dst, out.pos, 0};
+            ZSTD_outBuffer rtOut = {roundtrip, srcSize, 0};
+            rc = 1;
+            while (rtIn.pos < rtIn.size || rtOut.pos > 0) {
+                rtOut.pos = 0;
+                rc = ZSTD_decompressStream(dctx, &rtOut, &rtIn);
+                if (ZSTD_isError(rc)) {
+                    fprintf(stderr, "Decompression error: %s\n", ZSTD_getErrorName(rc));
+                    return 1;
+                }
+                if (rc == 0)
+                    break;
+            }
+            if (ended && rc != 0) {
+                fprintf(stderr, "Frame not finished!\n");
+                return 1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+int main(int argc, const char** argv)
+{
+    ZSTD_CCtx* cctx = ZSTD_createCCtx();
+    ZSTD_DCtx* dctx = ZSTD_createDCtx();
+    const size_t dataSize = (size_t)1 << 30;
+    const size_t outSize = ZSTD_compressBound(dataSize);
+    const size_t bufferSize = (size_t)1 << 31;
+    char* buffer = (char*)malloc(bufferSize);
+    void* out = malloc(outSize);
+    void* roundtrip = malloc(dataSize);
+    (void)argc;
+    (void)argv;
+
+    if (!buffer || !out || !roundtrip || !cctx || !dctx) {
+        fprintf(stderr, "Allocation failure\n");
+        return 1;
+    }
+
+    if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, 31)))
+        return 1;
+    if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 1)))
+        return 1;
+    if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_overlapLog, 9)))
+        return 1;
+    if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1)))
+        return 1;
+    if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_strategy, ZSTD_btopt)))
+        return 1;
+    if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_targetLength, 7)))
+        return 1;
+    if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, 7)))
+        return 1;
+    if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_searchLog, 1)))
+        return 1;
+    if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_hashLog, 10)))
+        return 1;
+    if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_chainLog, 10)))
+        return 1;
+
+    if (ZSTD_isError(ZSTD_DCtx_setParameter(dctx, ZSTD_d_windowLogMax, 31)))
+        return 1;
+
+    RDG_genBuffer(buffer, bufferSize, 1.0, 0.0, 0xbeefcafe);
+
+    /* Compress 30 GB */
+    {
+        int i;
+        for (i = 0; i < 10; ++i) {
+            fprintf(stderr, "Compressing 1 GB\n");
+            if (compress(cctx, dctx, out, outSize, buffer, dataSize, roundtrip, ZSTD_e_continue))
+                return 1;
+        }
+    }
+    fprintf(stderr, "Compressing 1 GB\n");
+    if (compress(cctx, dctx, out, outSize, buffer, dataSize, roundtrip, ZSTD_e_end))
+        return 1;
+
+    fprintf(stderr, "Success!\n");
+
+    free(roundtrip);
+    free(out);
+    free(buffer);
+    ZSTD_freeDCtx(dctx);
+    ZSTD_freeCCtx(cctx);
+    return 0;
+}
--- a/sys/contrib/zstd/tests/decodecorpus.c
+++ b/sys/contrib/zstd/tests/decodecorpus.c
@ -840,16 +840,16 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
    {   unsigned max = MaxLL;
        size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, WKSP, sizeof(WKSP));   /* cannot fail */
        assert(!HIST_isError(mostFrequent));
-        if (mostFrequent == nbSeq) {
-            /* do RLE if we have the chance */
-            *op++ = llCodeTable[0];
-            FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
-            LLtype = set_rle;
-        } else if (frame->stats.fseInit && !(RAND(seed) & 3) &&
+        if (frame->stats.fseInit && !(RAND(seed) & 3) &&
                   isSymbolSubset(llCodeTable, nbSeq,
                                  frame->stats.litlengthSymbolSet, 35)) {
            /* maybe do repeat mode if we're allowed to */
            LLtype = set_repeat;
+        } else if (mostFrequent == nbSeq) {
+            /* do RLE if we have the chance */
+            *op++ = llCodeTable[0];
+            FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
+            LLtype = set_rle;
        } else if (!(RAND(seed) & 3)) {
            /* maybe use the default distribution */
            FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
@ -872,14 +872,14 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
    {   unsigned max = MaxOff;
        size_t const mostFrequent = HIST_countFast_wksp(count, &max, ofCodeTable, nbSeq, WKSP, sizeof(WKSP));   /* cannot fail */
        assert(!HIST_isError(mostFrequent));
-        if (mostFrequent == nbSeq) {
-            *op++ = ofCodeTable[0];
-            FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
-            Offtype = set_rle;
-        } else if (frame->stats.fseInit && !(RAND(seed) & 3) &&
+        if (frame->stats.fseInit && !(RAND(seed) & 3) &&
                   isSymbolSubset(ofCodeTable, nbSeq,
                                  frame->stats.offsetSymbolSet, 28)) {
            Offtype = set_repeat;
+        } else if (mostFrequent == nbSeq) {
+            *op++ = ofCodeTable[0];
+            FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
+            Offtype = set_rle;
        } else if (!(RAND(seed) & 3)) {
            FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, DefaultMaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
            Offtype = set_basic;
@ -900,14 +900,14 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
    {   unsigned max = MaxML;
        size_t const mostFrequent = HIST_countFast_wksp(count, &max, mlCodeTable, nbSeq, WKSP, sizeof(WKSP));   /* cannot fail */
        assert(!HIST_isError(mostFrequent));
-        if (mostFrequent == nbSeq) {
-            *op++ = *mlCodeTable;
-            FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
-            MLtype = set_rle;
-        } else if (frame->stats.fseInit && !(RAND(seed) & 3) &&
+        if (frame->stats.fseInit && !(RAND(seed) & 3) &&
                   isSymbolSubset(mlCodeTable, nbSeq,
                                  frame->stats.matchlengthSymbolSet, 52)) {
            MLtype = set_repeat;
+        } else if (mostFrequent == nbSeq) {
+            *op++ = *mlCodeTable;
+            FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
+            MLtype = set_rle;
        } else if (!(RAND(seed) & 3)) {
            /* sometimes do default distribution */
            FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
@ -1050,8 +1050,8 @@ static void writeBlock(U32* seed, frame_t* frame, size_t contentSize,
        op += contentSize;
        blockType = 0;
        blockSize = contentSize;
-    } else if (blockTypeDesc == 1) {
-        /* RLE */
+    } else if (blockTypeDesc == 1 && frame->header.contentSize > 0) {
+        /* RLE (Don't create RLE block if frame content is 0 since block size of 1 may exceed max block size)*/
        BYTE const symbol = RAND(seed) & 0xff;

        op[0] = symbol;
--- a/sys/contrib/zstd/tests/fullbench.c
+++ b/sys/contrib/zstd/tests/fullbench.c
@ -15,7 +15,7 @@
 #include "util.h"        /* Compiler options, UTIL_GetFileSize */
 #include <stdlib.h>      /* malloc */
 #include <stdio.h>       /* fprintf, fopen, ftello64 */
-#include <assert.h>      /* assert */
+#include <assert.h>

 #include "timefn.h"      /* UTIL_clockSpanNano, UTIL_getTime */
 #include "mem.h"         /* U32 */
@ -31,8 +31,8 @@
 #include "zstd.h"        /* ZSTD_versionString */
 #include "util.h"        /* time functions */
 #include "datagen.h"
-#include "benchfn.h"       /* CustomBench*/
-#include "benchzstd.h"     /* MB_UNIT */
+#include "benchfn.h"     /* CustomBench */
+#include "benchzstd.h"   /* MB_UNIT */


 /*_************************************
@ -51,7 +51,7 @@
 #define DEFAULT_CLEVEL 1

 #define COMPRESSIBILITY_DEFAULT 0.50
-static const size_t g_sampleSize = 10000000;
+static const size_t kSampleSizeDefault = 10000000;

 #define TIMELOOP_NANOSEC      (1*1000000000ULL) /* 1 second */

@ -61,12 +61,12 @@ static const size_t g_sampleSize = 10000000;
 **************************************/
 #define DISPLAY(...)  fprintf(stderr, __VA_ARGS__)

+#define CONTROL(c)  { if (!(c)) { abort(); } }   /* like assert(), but cannot be disabled */

 /*_************************************
 *  Benchmark Parameters
 **************************************/
 static unsigned g_nbIterations = NBLOOPS;
-static double g_compressibility = COMPRESSIBILITY_DEFAULT;


 /*_*******************************************************
@ -100,12 +100,12 @@ static ZSTD_CCtx* g_zcc = NULL;
 static size_t
 local_ZSTD_compress(const void* src, size_t srcSize,
                    void* dst, size_t dstSize,
-                    void* buff2)
+                    void* payload)
 {
    ZSTD_parameters p;
    ZSTD_frameParameters f = { 1 /* contentSizeHeader*/, 0, 0 };
    p.fParams = f;
-    p.cParams = *(ZSTD_compressionParameters*)buff2;
+    p.cParams = *(ZSTD_compressionParameters*)payload;
    return ZSTD_compress_advanced (g_zcc, dst, dstSize, src, srcSize, NULL ,0, p);
    //return ZSTD_compress(dst, dstSize, src, srcSize, cLevel);
 }
@ -126,7 +126,7 @@ extern size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* ctx, const void* src, size_t s
 static size_t local_ZSTD_decodeLiteralsBlock(const void* src, size_t srcSize, void* dst, size_t dstSize, void* buff2)
 {
    (void)src; (void)srcSize; (void)dst; (void)dstSize;
-    return ZSTD_decodeLiteralsBlock((ZSTD_DCtx*)g_zdc, buff2, g_cSize);
+    return ZSTD_decodeLiteralsBlock(g_zdc, buff2, g_cSize);
 }

 static size_t local_ZSTD_decodeSeqHeaders(const void* src, size_t srcSize, void* dst, size_t dstSize, void* buff2)
@ -141,14 +141,14 @@ static ZSTD_CStream* g_cstream= NULL;
 static size_t
 local_ZSTD_compressStream(const void* src, size_t srcSize,
                          void* dst, size_t dstCapacity,
-                          void* buff2)
+                          void* payload)
 {
    ZSTD_outBuffer buffOut;
    ZSTD_inBuffer buffIn;
    ZSTD_parameters p;
    ZSTD_frameParameters f = {1 /* contentSizeHeader*/, 0, 0};
    p.fParams = f;
-    p.cParams = *(ZSTD_compressionParameters*)buff2;
+    p.cParams = *(ZSTD_compressionParameters*)payload;
    ZSTD_initCStream_advanced(g_cstream, NULL, 0, p, ZSTD_CONTENTSIZE_UNKNOWN);
    buffOut.dst = dst;
    buffOut.size = dstCapacity;
@ -161,23 +161,39 @@ local_ZSTD_compressStream(const void* src, size_t srcSize,
    return buffOut.pos;
 }

+static size_t
+local_ZSTD_compressStream_freshCCtx(const void* src, size_t srcSize,
+                          void* dst, size_t dstCapacity,
+                          void* payload)
+{
+    ZSTD_CCtx* const cctx = ZSTD_createCCtx();
+    size_t r;
+    assert(cctx != NULL);
+
+    r = local_ZSTD_compressStream(src, srcSize, dst, dstCapacity, payload);
+
+    ZSTD_freeCCtx(cctx);
+
+    return r;
+}
+
 static size_t
 local_ZSTD_compress_generic_end(const void* src, size_t srcSize,
                                void* dst, size_t dstCapacity,
-                                void* buff2)
+                                void* payload)
 {
-    (void)buff2;
+    (void)payload;
    return ZSTD_compress2(g_cstream, dst, dstCapacity, src, srcSize);
 }

 static size_t
 local_ZSTD_compress_generic_continue(const void* src, size_t srcSize,
                                     void* dst, size_t dstCapacity,
-                                     void* buff2)
+                                     void* payload)
 {
    ZSTD_outBuffer buffOut;
    ZSTD_inBuffer buffIn;
-    (void)buff2;
+    (void)payload;
    buffOut.dst = dst;
    buffOut.size = dstCapacity;
    buffOut.pos = 0;
@ -192,9 +208,9 @@ local_ZSTD_compress_generic_continue(const void* src, size_t srcSize,
 static size_t
 local_ZSTD_compress_generic_T2_end(const void* src, size_t srcSize,
                                   void* dst, size_t dstCapacity,
-                                   void* buff2)
+                                   void* payload)
 {
-    (void)buff2;
+    (void)payload;
    ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_nbWorkers, 2);
    return ZSTD_compress2(g_cstream, dst, dstCapacity, src, srcSize);
 }
@ -202,11 +218,11 @@ local_ZSTD_compress_generic_T2_end(const void* src, size_t srcSize,
 static size_t
 local_ZSTD_compress_generic_T2_continue(const void* src, size_t srcSize,
                                        void* dst, size_t dstCapacity,
-                                        void* buff2)
+                                        void* payload)
 {
    ZSTD_outBuffer buffOut;
    ZSTD_inBuffer buffIn;
-    (void)buff2;
+    (void)payload;
    ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_nbWorkers, 2);
    buffOut.dst = dst;
    buffOut.size = dstCapacity;
@ -242,27 +258,28 @@ local_ZSTD_decompressStream(const void* src, size_t srcSize,
 #ifndef ZSTD_DLL_IMPORT
 static size_t local_ZSTD_compressContinue(const void* src, size_t srcSize,
                                          void* dst, size_t dstCapacity,
-                                          void* buff2)
+                                          void* payload)
 {
    ZSTD_parameters p;
    ZSTD_frameParameters f = { 1 /* contentSizeHeader*/, 0, 0 };
    p.fParams = f;
-    p.cParams = *(ZSTD_compressionParameters*)buff2;
+    p.cParams = *(ZSTD_compressionParameters*)payload;
    ZSTD_compressBegin_advanced(g_zcc, NULL, 0, p, srcSize);
    return ZSTD_compressEnd(g_zcc, dst, dstCapacity, src, srcSize);
 }

 #define FIRST_BLOCK_SIZE 8
-static size_t local_ZSTD_compressContinue_extDict(const void* src, size_t srcSize,
-                                                  void* dst, size_t dstCapacity,
-                                                  void* buff2)
+static size_t
+local_ZSTD_compressContinue_extDict(const void* src, size_t srcSize,
+                                    void* dst, size_t dstCapacity,
+                                    void* payload)
 {
    BYTE firstBlockBuf[FIRST_BLOCK_SIZE];

    ZSTD_parameters p;
-    ZSTD_frameParameters f = { 1, 0, 0 };
+    ZSTD_frameParameters const f = { 1, 0, 0 };
    p.fParams = f;
-    p.cParams = *(ZSTD_compressionParameters*)buff2;
+    p.cParams = *(ZSTD_compressionParameters*)payload;
    ZSTD_compressBegin_advanced(g_zcc, NULL, 0, p, srcSize);
    memcpy(firstBlockBuf, src, FIRST_BLOCK_SIZE);

@ -318,7 +335,7 @@ static int benchMem(unsigned benchNb,
    size_t dstBuffSize = ZSTD_compressBound(srcSize);
    BYTE*  dstBuff;
    void*  dstBuff2;
-    void*  buff2;
+    void*  payload;
    const char* benchName;
    BMK_benchFn_t benchFunction;
    int errorcode = 0;
@ -355,6 +372,9 @@ static int benchMem(unsigned benchNb,
    case 42:
        benchFunction = local_ZSTD_decompressStream; benchName = "decompressStream";
        break;
+    case 43:
+        benchFunction = local_ZSTD_compressStream_freshCCtx; benchName = "compressStream_freshCCtx";
+        break;
    case 51:
        benchFunction = local_ZSTD_compress_generic_continue; benchName = "compress_generic, continue";
        break;
@ -379,7 +399,7 @@ static int benchMem(unsigned benchNb,
        free(dstBuff); free(dstBuff2);
        return 12;
    }
-    buff2 = dstBuff2;
+    payload = dstBuff2;
    if (g_zcc==NULL) g_zcc = ZSTD_createCCtx();
    if (g_zdc==NULL) g_zdc = ZSTD_createDCtx();
    if (g_cstream==NULL) g_cstream = ZSTD_createCStream();
@ -412,62 +432,66 @@ static int benchMem(unsigned benchNb,
    switch(benchNb)
    {
    case 1:
-        buff2 = &cparams;
+        payload = &cparams;
        break;
    case 2:
-        g_cSize = ZSTD_compress(buff2, dstBuffSize, src, srcSize, cLevel);
+        g_cSize = ZSTD_compress(dstBuff2, dstBuffSize, src, srcSize, cLevel);
        break;
 #ifndef ZSTD_DLL_IMPORT
    case 11:
-        buff2 = &cparams;
+        payload = &cparams;
        break;
    case 12:
-        buff2 = &cparams;
+        payload = &cparams;
        break;
    case 13 :
-        g_cSize = ZSTD_compress(buff2, dstBuffSize, src, srcSize, cLevel);
+        g_cSize = ZSTD_compress(dstBuff2, dstBuffSize, src, srcSize, cLevel);
        break;
-    case 31:  /* ZSTD_decodeLiteralsBlock */
-        {   blockProperties_t bp;
-            ZSTD_frameHeader zfp;
-            size_t frameHeaderSize, skippedSize;
+    case 31:  /* ZSTD_decodeLiteralsBlock : starts literals block in dstBuff2 */
+        {   size_t frameHeaderSize;
            g_cSize = ZSTD_compress(dstBuff, dstBuffSize, src, srcSize, cLevel);
-            frameHeaderSize = ZSTD_getFrameHeader(&zfp, dstBuff, ZSTD_FRAMEHEADERSIZE_MIN);
-            if (frameHeaderSize==0) frameHeaderSize = ZSTD_FRAMEHEADERSIZE_MIN;
-            ZSTD_getcBlockSize(dstBuff+frameHeaderSize, dstBuffSize, &bp);  /* Get 1st block type */
-            if (bp.blockType != bt_compressed) {
-                DISPLAY("ZSTD_decodeLiteralsBlock : impossible to test on this sample (not compressible)\n");
-                goto _cleanOut;
+            frameHeaderSize = ZSTD_frameHeaderSize(dstBuff, ZSTD_FRAMEHEADERSIZE_PREFIX);
+            CONTROL(!ZSTD_isError(frameHeaderSize));
+            /* check block is compressible, hence contains a literals section */
+            {   blockProperties_t bp;
+                ZSTD_getcBlockSize(dstBuff+frameHeaderSize, dstBuffSize, &bp);  /* Get 1st block type */
+                if (bp.blockType != bt_compressed) {
+                    DISPLAY("ZSTD_decodeLiteralsBlock : impossible to test on this sample (not compressible)\n");
+                    goto _cleanOut;
+            }   }
+            {   size_t const skippedSize = frameHeaderSize + ZSTD_blockHeaderSize;
+                memcpy(dstBuff2, dstBuff+skippedSize, g_cSize-skippedSize);
            }
-            skippedSize = frameHeaderSize + ZSTD_blockHeaderSize;
-            memcpy(buff2, dstBuff+skippedSize, g_cSize-skippedSize);
            srcSize = srcSize > 128 KB ? 128 KB : srcSize;    /* speed relative to block */
            ZSTD_decompressBegin(g_zdc);
            break;
        }
    case 32:   /* ZSTD_decodeSeqHeaders */
        {   blockProperties_t bp;
-            ZSTD_frameHeader zfp;
            const BYTE* ip = dstBuff;
            const BYTE* iend;
-            size_t frameHeaderSize, cBlockSize;
-            ZSTD_compress(dstBuff, dstBuffSize, src, srcSize, cLevel);   /* it would be better to use direct block compression here */
-            g_cSize = ZSTD_compress(dstBuff, dstBuffSize, src, srcSize, cLevel);
-            frameHeaderSize = ZSTD_getFrameHeader(&zfp, dstBuff, ZSTD_FRAMEHEADERSIZE_MIN);
-            if (frameHeaderSize==0) frameHeaderSize = ZSTD_FRAMEHEADERSIZE_MIN;
-            ip += frameHeaderSize;   /* Skip frame Header */
-            cBlockSize = ZSTD_getcBlockSize(ip, dstBuffSize, &bp);   /* Get 1st block type */
-            if (bp.blockType != bt_compressed) {
-                DISPLAY("ZSTD_decodeSeqHeaders : impossible to test on this sample (not compressible)\n");
-                goto _cleanOut;
+            {   size_t const cSize = ZSTD_compress(dstBuff, dstBuffSize, src, srcSize, cLevel);
+                CONTROL(cSize > ZSTD_FRAMEHEADERSIZE_PREFIX);
            }
-            iend = ip + ZSTD_blockHeaderSize + cBlockSize;   /* End of first block */
-            ip += ZSTD_blockHeaderSize;                      /* skip block header */
+            /* Skip frame Header */
+            {   size_t const frameHeaderSize = ZSTD_frameHeaderSize(dstBuff, ZSTD_FRAMEHEADERSIZE_PREFIX);
+                CONTROL(!ZSTD_isError(frameHeaderSize));
+                ip += frameHeaderSize;
+            }
+            /* Find end of block */
+            {   size_t const cBlockSize = ZSTD_getcBlockSize(ip, dstBuffSize, &bp);   /* Get 1st block type */
+                if (bp.blockType != bt_compressed) {
+                    DISPLAY("ZSTD_decodeSeqHeaders : impossible to test on this sample (not compressible)\n");
+                    goto _cleanOut;
+                }
+                iend = ip + ZSTD_blockHeaderSize + cBlockSize;   /* End of first block */
+            }
+            ip += ZSTD_blockHeaderSize;    /* skip block header */
            ZSTD_decompressBegin(g_zdc);
-            assert(iend > ip);
+            CONTROL(iend > ip);
            ip += ZSTD_decodeLiteralsBlock(g_zdc, ip, (size_t)(iend-ip));   /* skip literal segment */
            g_cSize = (size_t)(iend-ip);
-            memcpy(buff2, ip, g_cSize);   /* copy rest of block (it starts by SeqHeader) */
+            memcpy(dstBuff2, ip, g_cSize);   /* copy rest of block (it starts by SeqHeader) */
            srcSize = srcSize > 128 KB ? 128 KB : srcSize;   /* speed relative to block */
            break;
        }
@ -476,10 +500,13 @@ static int benchMem(unsigned benchNb,
        goto _cleanOut;
 #endif
    case 41 :
-        buff2 = &cparams;
+        payload = &cparams;
        break;
    case 42 :
-        g_cSize = ZSTD_compress(buff2, dstBuffSize, src, srcSize, cLevel);
+        g_cSize = ZSTD_compress(payload, dstBuffSize, src, srcSize, cLevel);
+        break;
+    case 43 :
+        payload = &cparams;
        break;

    /* test functions */
@ -498,10 +525,10 @@ static int benchMem(unsigned benchNb,
        BMK_runTime_t bestResult;
        bestResult.sumOfReturn = 0;
        bestResult.nanoSecPerRun = (double)TIMELOOP_NANOSEC * 2000000000;  /* hopefully large enough : must be larger than any potential measurement */
-        assert(tfs != NULL);
+        CONTROL(tfs != NULL);

        bp.benchFn = benchFunction;
-        bp.benchPayload = buff2;
+        bp.benchPayload = payload;
        bp.initFn = NULL;
        bp.initPayload = NULL;
        bp.errorFn = ZSTD_isError;
@ -549,21 +576,19 @@ static int benchMem(unsigned benchNb,


 static int benchSample(U32 benchNb,
+                       size_t benchedSize, double compressibility,
                       int cLevel, ZSTD_compressionParameters cparams)
 {
-    size_t const benchedSize = g_sampleSize;
-    const char* const name = "Sample 10MiB";
-
    /* Allocation */
    void* const origBuff = malloc(benchedSize);
    if (!origBuff) { DISPLAY("\nError: not enough memory!\n"); return 12; }

    /* Fill buffer */
-    RDG_genBuffer(origBuff, benchedSize, g_compressibility, 0.0, 0);
+    RDG_genBuffer(origBuff, benchedSize, compressibility, 0.0, 0);

    /* bench */
    DISPLAY("\r%70s\r", "");
-    DISPLAY(" %s : \n", name);
+    DISPLAY(" Sample %u bytes : \n", (unsigned)benchedSize);
    if (benchNb) {
        benchMem(benchNb, origBuff, benchedSize, cLevel, cparams);
    } else {  /* 0 == run all tests */
@ -696,10 +721,11 @@ static int usage_advanced(const char* exename)
    usage(exename);
    DISPLAY( "\nAdvanced options :\n");
    DISPLAY( " -b#    : test only function # \n");
-    DISPLAY( " -i#    : iteration loops [1-9](default : %i)\n", NBLOOPS);
-    DISPLAY( " -P#    : sample compressibility (default : %.1f%%)\n", COMPRESSIBILITY_DEFAULT * 100);
    DISPLAY( " -l#    : benchmark functions at that compression level (default : %i)\n", DEFAULT_CLEVEL);
    DISPLAY( " --zstd : custom parameter selection. Format same as zstdcli \n");
+    DISPLAY( " -P#    : sample compressibility (default : %.1f%%)\n", COMPRESSIBILITY_DEFAULT * 100);
+    DISPLAY( " -B#    : sample size (default : %u)\n", (unsigned)kSampleSizeDefault);
+    DISPLAY( " -i#    : iteration loops [1-9](default : %i)\n", NBLOOPS);
    return 0;
 }

@ -718,13 +744,15 @@ int main(int argc, const char** argv)
    U32 benchNb = 0, main_pause = 0;
    int cLevel = DEFAULT_CLEVEL;
    ZSTD_compressionParameters cparams = ZSTD_getCParams(cLevel, 0, 0);
+    size_t sampleSize = kSampleSizeDefault;
+    double compressibility = COMPRESSIBILITY_DEFAULT;

    DISPLAY(WELCOME_MESSAGE);
    if (argc<1) return badusage(exename);

    for (argNb=1; argNb<argc; argNb++) {
        const char* argument = argv[argNb];
-        assert(argument != NULL);
+        CONTROL(argument != NULL);

        if (longCommandWArg(&argument, "--zstd=")) {
            for ( ; ;) {
@ -767,21 +795,29 @@ int main(int argc, const char** argv)
                    benchNb = readU32FromChar(&argument);
                    break;

-                    /* Modify Nb Iterations */
-                case 'i':
+                    /* Select compression level to use */
+                case 'l':
                    argument++;
-                    g_nbIterations = readU32FromChar(&argument);
+                    cLevel = (int)readU32FromChar(&argument);
+                    cparams = ZSTD_getCParams(cLevel, 0, 0);
                    break;

                    /* Select compressibility of synthetic sample */
                case 'P':
                    argument++;
-                    g_compressibility = (double)readU32FromChar(&argument) / 100.;
+                    compressibility = (double)readU32FromChar(&argument) / 100.;
                    break;
-                case 'l':
+
+                    /* Select size of synthetic sample */
+                case 'B':
                    argument++;
-                    cLevel = (int)readU32FromChar(&argument);
-                    cparams = ZSTD_getCParams(cLevel, 0, 0);
+                    sampleSize = (size_t)readU32FromChar(&argument);
+                    break;
+
+                    /* Modify Nb Iterations */
+                case 'i':
+                    argument++;
+                    g_nbIterations = readU32FromChar(&argument);
                    break;

                    /* Unknown command */
@ -798,7 +834,7 @@ int main(int argc, const char** argv)


    if (filenamesStart==0)   /* no input file */
-        result = benchSample(benchNb, cLevel, cparams);
+        result = benchSample(benchNb, sampleSize, compressibility, cLevel, cparams);
    else
        result = benchFiles(benchNb, argv+filenamesStart, argc-filenamesStart, cLevel, cparams);

--- a/sys/contrib/zstd/tests/fuzz/Makefile
+++ b/sys/contrib/zstd/tests/fuzz/Makefile
@ -26,8 +26,8 @@ ZSTDDIR = ../../lib
 PRGDIR = ../../programs

 FUZZ_CPPFLAGS := -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
-	-I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR) \
-	-DZSTD_MULTITHREAD $(CPPFLAGS)
+	-I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(ZSTDDIR)/legacy \
+	-I$(PRGDIR) -DZSTD_MULTITHREAD -DZSTD_LEGACY_SUPPORT=1 $(CPPFLAGS)
 FUZZ_EXTRA_FLAGS := -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
 	-Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
 	-Wstrict-prototypes -Wundef \
@ -47,12 +47,14 @@ ZSTDCOMMON_SRC := $(ZSTDDIR)/common/*.c
 ZSTDCOMP_SRC   := $(ZSTDDIR)/compress/*.c
 ZSTDDECOMP_SRC := $(ZSTDDIR)/decompress/*.c
 ZSTDDICT_SRC := $(ZSTDDIR)/dictBuilder/*.c
+ZSTDLEGACY_SRC := $(ZSTDDIR)/legacy/*.c
 FUZZ_SRC       := \
 	$(FUZZ_SRC) \
 	$(ZSTDDECOMP_SRC) \
 	$(ZSTDCOMMON_SRC) \
 	$(ZSTDCOMP_SRC) \
-	$(ZSTDDICT_SRC)
+	$(ZSTDDICT_SRC) \
+	$(ZSTDLEGACY_SRC)

 FUZZ_OBJ := $(patsubst %.c,%.o, $(wildcard $(FUZZ_SRC)))

@ -69,7 +71,9 @@ FUZZ_TARGETS :=       \
 	stream_decompress \
 	block_decompress  \
 	dictionary_round_trip \
-	dictionary_decompress
+	dictionary_decompress \
+	zstd_frame_info \
+	simple_compress

 all: $(FUZZ_TARGETS)

@ -100,6 +104,12 @@ dictionary_round_trip: $(FUZZ_HEADERS) $(FUZZ_OBJ) dictionary_round_trip.o
 dictionary_decompress: $(FUZZ_HEADERS) $(FUZZ_OBJ) dictionary_decompress.o
 	$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_OBJ) dictionary_decompress.o $(LIB_FUZZING_ENGINE) -o $@

+simple_compress: $(FUZZ_HEADERS) $(FUZZ_OBJ) simple_compress.o
+	$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_OBJ) simple_compress.o $(LIB_FUZZING_ENGINE) -o $@
+
+zstd_frame_info: $(FUZZ_HEADERS) $(FUZZ_OBJ) zstd_frame_info.o
+	$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_OBJ) zstd_frame_info.o $(LIB_FUZZING_ENGINE) -o $@
+
 libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c regression_driver.o
 	$(AR) $(FUZZ_ARFLAGS) $@ regression_driver.o

@ -122,6 +132,9 @@ corpora/%: corpora/%_seed_corpus.zip
 .PHONY: corpora
 corpora: $(patsubst %,corpora/%,$(FUZZ_TARGETS))

+.PHONY: seedcorpora
+seedcorpora: $(patsubst %,corpora/%_seed_corpus.zip,$(FUZZ_TARGETS))
+
 regressiontest: corpora
 	CC="$(CC)" CXX="$(CXX)" CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(LDFLAGS)" $(PYTHON) ./fuzz.py build all
 	$(PYTHON) ./fuzz.py regression all
@ -130,7 +143,9 @@ clean:
 	@$(MAKE) -C $(ZSTDDIR) clean
 	@$(RM) *.a *.o
 	@$(RM) simple_round_trip stream_round_trip simple_decompress \
-           stream_decompress block_decompress block_round_trip
+           stream_decompress block_decompress block_round_trip \
+           simple_compress dictionary_round_trip dictionary_decompress \
+           zstd_frame_info

 cleanall:
 	@$(RM) -r Fuzzer
--- a/sys/contrib/zstd/tests/fuzz/default.options
+++ b/sys/contrib/zstd/tests/fuzz/default.options
@ -1,2 +0,0 @@
-[libfuzzer]
-max_len = 8192
--- a/sys/contrib/zstd/tests/fuzz/dictionary_decompress.c
+++ b/sys/contrib/zstd/tests/fuzz/dictionary_decompress.c
@ -20,43 +20,42 @@
 #include "zstd_helpers.h"

 static ZSTD_DCtx *dctx = NULL;
-static void* rBuf = NULL;
-static size_t bufSize = 0;

 int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
 {
-    FUZZ_dict_t dict;
-    size_t neededBufSize;
-
    uint32_t seed = FUZZ_seed(&src, &size);
-    neededBufSize = MAX(20 * size, (size_t)256 << 10);
+    FUZZ_dict_t dict;
+    ZSTD_DDict* ddict = NULL;
+    int i;

-    /* Allocate all buffers and contexts if not already allocated */
-    if (neededBufSize > bufSize) {
-        free(rBuf);
-        rBuf = malloc(neededBufSize);
-        bufSize = neededBufSize;
-        FUZZ_ASSERT(rBuf);
-    }
    if (!dctx) {
        dctx = ZSTD_createDCtx();
        FUZZ_ASSERT(dctx);
    }
    dict = FUZZ_train(src, size, &seed);
    if (FUZZ_rand32(&seed, 0, 1) == 0) {
-        ZSTD_decompress_usingDict(dctx,
-                rBuf, neededBufSize,
-                src, size,
-                dict.buff, dict.size);
+        ddict = ZSTD_createDDict(dict.buff, dict.size);
+        FUZZ_ASSERT(ddict);
    } else {
        FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary_advanced(
                dctx, dict.buff, dict.size,
                (ZSTD_dictLoadMethod_e)FUZZ_rand32(&seed, 0, 1),
                (ZSTD_dictContentType_e)FUZZ_rand32(&seed, 0, 2)));
-        ZSTD_decompressDCtx(dctx, rBuf, neededBufSize, src, size);
    }
-
+    /* Run it 10 times over 10 output sizes. Reuse the context and dict. */
+    for (i = 0; i < 10; ++i) {
+        size_t const bufSize = FUZZ_rand32(&seed, 0, 2 * size);
+        void* rBuf = malloc(bufSize);
+        FUZZ_ASSERT(rBuf);
+        if (ddict) {
+            ZSTD_decompress_usingDDict(dctx, rBuf, bufSize, src, size, ddict);
+        } else {
+            ZSTD_decompressDCtx(dctx, rBuf, bufSize, src, size);
+        }
+        free(rBuf);
+    }
    free(dict.buff);
+    ZSTD_freeDDict(ddict);
 #ifndef STATEFUL_FUZZING
    ZSTD_freeDCtx(dctx); dctx = NULL;
 #endif
--- a/sys/contrib/zstd/tests/fuzz/fuzz.py
+++ b/sys/contrib/zstd/tests/fuzz/fuzz.py
@ -36,6 +36,8 @@ def abs_join(a, *p):
    'block_decompress',
    'dictionary_round_trip',
    'dictionary_decompress',
+    'zstd_frame_info',
+    'simple_compress',
 ]
 ALL_TARGETS = TARGETS + ['all']
 FUZZ_RNG_SEED_SIZE = 4
--- a/sys/contrib/zstd/tests/fuzz/simple_compress.c
+++ b/sys/contrib/zstd/tests/fuzz/simple_compress.c
@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ */
+
+/**
+ * This fuzz target attempts to comprss the fuzzed data with the simple
+ * compression function with an output buffer that may be too small to
+ * ensure that the compressor never crashes.
+ */
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "fuzz_helpers.h"
+#include "zstd.h"
+
+static ZSTD_CCtx *cctx = NULL;
+
+int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
+{
+    uint32_t seed = FUZZ_seed(&src, &size);
+    size_t const maxSize = ZSTD_compressBound(size);
+    int i;
+    if (!cctx) {
+        cctx = ZSTD_createCCtx();
+        FUZZ_ASSERT(cctx);
+    }
+    /* Run it 10 times over 10 output sizes. Reuse the context. */
+    for (i = 0; i < 10; ++i) {
+        int const level = (int)FUZZ_rand32(&seed, 0, 19 + 3) - 3; /* [-3, 19] */
+        size_t const bufSize = FUZZ_rand32(&seed, 0, maxSize);
+        void* rBuf = malloc(bufSize);
+        FUZZ_ASSERT(rBuf);
+        ZSTD_compressCCtx(cctx, rBuf, bufSize, src, size, level);
+        free(rBuf);
+    }
+
+#ifndef STATEFUL_FUZZING
+    ZSTD_freeCCtx(cctx); cctx = NULL;
+#endif
+    return 0;
+}
--- a/sys/contrib/zstd/tests/fuzz/simple_decompress.c
+++ b/sys/contrib/zstd/tests/fuzz/simple_decompress.c
@ -19,28 +19,24 @@
 #include "zstd.h"

 static ZSTD_DCtx *dctx = NULL;
-static void* rBuf = NULL;
-static size_t bufSize = 0;

 int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
 {
-    size_t neededBufSize;

-    FUZZ_seed(&src, &size);
-    neededBufSize = MAX(20 * size, (size_t)256 << 10);
-
-    /* Allocate all buffers and contexts if not already allocated */
-    if (neededBufSize > bufSize) {
-        free(rBuf);
-        rBuf = malloc(neededBufSize);
-        bufSize = neededBufSize;
-        FUZZ_ASSERT(rBuf);
-    }
+    uint32_t seed = FUZZ_seed(&src, &size);
+    int i;
    if (!dctx) {
        dctx = ZSTD_createDCtx();
        FUZZ_ASSERT(dctx);
    }
-    ZSTD_decompressDCtx(dctx, rBuf, neededBufSize, src, size);
+    /* Run it 10 times over 10 output sizes. Reuse the context. */
+    for (i = 0; i < 10; ++i) {
+        size_t const bufSize = FUZZ_rand32(&seed, 0, 2 * size);
+        void* rBuf = malloc(bufSize);
+        FUZZ_ASSERT(rBuf);
+        ZSTD_decompressDCtx(dctx, rBuf, bufSize, src, size);
+        free(rBuf);
+    }

 #ifndef STATEFUL_FUZZING
    ZSTD_freeDCtx(dctx); dctx = NULL;
--- a/sys/contrib/zstd/tests/fuzz/zstd_frame_info.c
+++ b/sys/contrib/zstd/tests/fuzz/zstd_frame_info.c
@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ */
+
+/**
+ * This fuzz target fuzzes all of the helper functions that consume compressed
+ * input.
+ */
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "fuzz_helpers.h"
+#include "zstd_helpers.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
+{
+    ZSTD_frameHeader zfh;
+    /* Consume the seed to be compatible with the corpora of other decompression
+     * fuzzers.
+     */
+    FUZZ_seed(&src, &size);
+    /* You can fuzz any helper functions here that are fast, and take zstd
+     * compressed data as input. E.g. don't expect the input to be a dictionary,
+     * so don't fuzz ZSTD_getDictID_fromDict().
+     */
+    ZSTD_getFrameContentSize(src, size);
+    ZSTD_getDecompressedSize(src, size);
+    ZSTD_findFrameCompressedSize(src, size);
+    ZSTD_getDictID_fromFrame(src, size);
+    ZSTD_findDecompressedSize(src, size);
+    ZSTD_decompressBound(src, size);
+    ZSTD_frameHeaderSize(src, size);
+    ZSTD_isFrame(src, size);
+    ZSTD_getFrameHeader(&zfh, src, size);
+    ZSTD_getFrameHeader_advanced(&zfh, src, size, ZSTD_f_zstd1);
+    return 0;
+}
--- a/sys/contrib/zstd/tests/fuzzer.c
+++ b/sys/contrib/zstd/tests/fuzzer.c
@ -62,10 +62,12 @@ static U32 g_displayLevel = 2;
 static const U64 g_refreshRate = SEC_TO_MICRO / 6;
 static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;

-#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
-            if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (g_displayLevel>=4)) \
-            { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
-            if (g_displayLevel>=4) fflush(stderr); } }
+#define DISPLAYUPDATE(l, ...) \
+    if (g_displayLevel>=l) { \
+        if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (g_displayLevel>=4)) \
+        { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
+        if (g_displayLevel>=4) fflush(stderr); } \
+    }


 /*-*******************************************************
@ -73,7 +75,7 @@ static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
 *********************************************************/
 #undef MIN
 #undef MAX
-/* Declaring the function is it isn't unused */
+/* Declaring the function, to avoid -Wmissing-prototype */
 void FUZ_bug976(void);
 void FUZ_bug976(void)
 {   /* these constants shall not depend on MIN() macro */
@ -247,7 +249,7 @@ static int FUZ_mallocTests_internal(unsigned seed, double compressibility, unsig

    /* advanced MT streaming API test */
    if (part <= 4)
-    {   unsigned nbThreads;
+    {   int nbThreads;
        for (nbThreads=1; nbThreads<=4; nbThreads++) {
            int compressionLevel;
            for (compressionLevel=1; compressionLevel<=6; compressionLevel++) {
@ -261,7 +263,7 @@ static int FUZ_mallocTests_internal(unsigned seed, double compressibility, unsig
                CHECK_Z( ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_continue) );
                while ( ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end) ) {}
                ZSTD_freeCCtx(cctx);
-                DISPLAYLEVEL(3, "compress_generic,-T%u,continue level %i : ",
+                DISPLAYLEVEL(3, "compress_generic,-T%i,continue level %i : ",
                                nbThreads, compressionLevel);
                FUZ_displayMallocStats(malcount);
    }   }   }
@ -768,13 +770,11 @@ static int basicUnitTests(U32 seed, double compressibility)
            DISPLAYLEVEL(3, "OK \n");

            DISPLAYLEVEL(3, "test%3i : init CCtx for small level %u (should work again) : ", testNb++, 1);
-            { size_t const r = ZSTD_compressBegin(staticCCtx, 1);
-              if (ZSTD_isError(r)) goto _output_error; }
+            CHECK( ZSTD_compressBegin(staticCCtx, 1) );
            DISPLAYLEVEL(3, "OK \n");

            DISPLAYLEVEL(3, "test%3i : init CStream for small level %u : ", testNb++, 1);
-            { size_t const r = ZSTD_initCStream(staticCCtx, 1);
-              if (ZSTD_isError(r)) goto _output_error; }
+            CHECK( ZSTD_initCStream(staticCCtx, 1) );
            DISPLAYLEVEL(3, "OK \n");

            DISPLAYLEVEL(3, "test%3i : init CStream with dictionary (should fail) : ", testNb++);
@ -1059,7 +1059,7 @@ static int basicUnitTests(U32 seed, double compressibility)
    /* Dictionary and dictBuilder tests */
    {   ZSTD_CCtx* const cctx = ZSTD_createCCtx();
        size_t const dictBufferCapacity = 16 KB;
-        void* dictBuffer = malloc(dictBufferCapacity);
+        void* const dictBuffer = malloc(dictBufferCapacity);
        size_t const totalSampleSize = 1 MB;
        size_t const sampleUnitSize = 8 KB;
        U32 const nbSamples = (U32)(totalSampleSize / sampleUnitSize);
@ -1104,6 +1104,22 @@ static int basicUnitTests(U32 seed, double compressibility)
        }
        DISPLAYLEVEL(3, "OK, created dictionary of size %u \n", (unsigned)dictSize);

+        DISPLAYLEVEL(3, "test%3i : COVER dictBuilder with shrinkDict: ", testNb++);
+        { U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
+        {   ZDICT_cover_params_t coverParams;
+            memset(&coverParams, 0, sizeof(coverParams));
+            coverParams.steps = 8;
+            coverParams.nbThreads = 4;
+            coverParams.shrinkDict = 1;
+            coverParams.shrinkDictMaxRegression = 1;
+            dictSize = ZDICT_optimizeTrainFromBuffer_cover(
+                dictBuffer, dictBufferCapacity,
+                CNBuffer, samplesSizes, nbSamples/8,  /* less samples for faster tests */
+                &coverParams);
+            if (ZDICT_isError(dictSize)) goto _output_error;
+        }
+        DISPLAYLEVEL(3, "OK, created dictionary of size %u \n", (unsigned)dictSize);
+
        DISPLAYLEVEL(3, "test%3i : Multithreaded FASTCOVER dictBuilder : ", testNb++);
        { U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
        {   ZDICT_fastCover_params_t fastCoverParams;
@ -1118,6 +1134,22 @@ static int basicUnitTests(U32 seed, double compressibility)
        }
        DISPLAYLEVEL(3, "OK, created dictionary of size %u \n", (unsigned)dictSize);

+        DISPLAYLEVEL(3, "test%3i : FASTCOVER dictBuilder with shrinkDict: ", testNb++);
+        { U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
+        {   ZDICT_fastCover_params_t fastCoverParams;
+            memset(&fastCoverParams, 0, sizeof(fastCoverParams));
+            fastCoverParams.steps = 8;
+            fastCoverParams.nbThreads = 4;
+            fastCoverParams.shrinkDict = 1;
+            fastCoverParams.shrinkDictMaxRegression = 1;
+            dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(
+                dictBuffer, dictBufferCapacity,
+                CNBuffer, samplesSizes, nbSamples,
+                &fastCoverParams);
+            if (ZDICT_isError(dictSize)) goto _output_error;
+        }
+        DISPLAYLEVEL(3, "OK, created dictionary of size %u \n", (unsigned)dictSize);
+
        DISPLAYLEVEL(3, "test%3i : check dictID : ", testNb++);
        dictID = ZDICT_getDictID(dictBuffer, dictSize);
        if (dictID==0) goto _output_error;
@ -1164,6 +1196,7 @@ static int basicUnitTests(U32 seed, double compressibility)
            ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize,
                                            ZSTD_dlm_byRef, ZSTD_dct_auto,
                                            cParams, ZSTD_defaultCMem);
+            assert(cdict != NULL);
            DISPLAYLEVEL(3, "(size : %u) : ", (unsigned)ZSTD_sizeof_CDict(cdict));
            cSize = ZSTD_compress_usingCDict(cctx, compressedBuffer, compressedBufferSize,
                                                 CNBuffer, CNBuffSize, cdict);
@ -1221,8 +1254,11 @@ static int basicUnitTests(U32 seed, double compressibility)
        {   ZSTD_frameParameters const fParams = { 0 /* frameSize */, 1 /* checksum */, 1 /* noDictID*/ };
            ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize);
            ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, cParams, ZSTD_defaultCMem);
-            cSize = ZSTD_compress_usingCDict_advanced(cctx, compressedBuffer, compressedBufferSize,
-                                                 CNBuffer, CNBuffSize, cdict, fParams);
+            assert(cdict != NULL);
+            cSize = ZSTD_compress_usingCDict_advanced(cctx,
+                                                      compressedBuffer, compressedBufferSize,
+                                                      CNBuffer, CNBuffSize,
+                                                      cdict, fParams);
            ZSTD_freeCDict(cdict);
            if (ZSTD_isError(cSize)) goto _output_error;
        }
@ -1235,7 +1271,8 @@ static int basicUnitTests(U32 seed, double compressibility)
        DISPLAYLEVEL(3, "OK (unknown)\n");

        DISPLAYLEVEL(3, "test%3i : frame built without dictID should be decompressible : ", testNb++);
-        {   ZSTD_DCtx* const dctx = ZSTD_createDCtx(); assert(dctx != NULL);
+        {   ZSTD_DCtx* const dctx = ZSTD_createDCtx();
+            assert(dctx != NULL);
            CHECKPLUS(r, ZSTD_decompress_usingDict(dctx,
                                           decodedBuffer, CNBuffSize,
                                           compressedBuffer, cSize,
@ -2459,7 +2496,7 @@ static unsigned readU32FromChar(const char** stringPtr)
 *  If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
 *  @return 0 and doesn't modify *stringPtr otherwise.
 */
-static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
+static int longCommandWArg(const char** stringPtr, const char* longCommand)
 {
    size_t const comSize = strlen(longCommand);
    int const result = !strncmp(*stringPtr, longCommand, comSize);
@ -2519,7 +2556,7 @@ int main(int argc, const char** argv)

                case 'i':
                    argument++; maxDuration = 0;
-                    nbTests = readU32FromChar(&argument);
+                    nbTests = (int)readU32FromChar(&argument);
                    break;

                case 'T':
@ -2539,12 +2576,12 @@ int main(int argc, const char** argv)

                case 't':
                    argument++;
-                    testNb = readU32FromChar(&argument);
+                    testNb = (int)readU32FromChar(&argument);
                    break;

                case 'P':   /* compressibility % */
                    argument++;
-                    proba = readU32FromChar(&argument);
+                    proba = (int)readU32FromChar(&argument);
                    if (proba>100) proba = 100;
                    break;

--- a/sys/contrib/zstd/tests/paramgrill.c
+++ b/sys/contrib/zstd/tests/paramgrill.c
@ -609,8 +609,8 @@ compareResultLT(const BMK_benchResult_t result1, const BMK_benchResult_t result2

 static constraint_t relaxTarget(constraint_t target) {
    target.cMem = (U32)-1;
-    target.cSpeed *= ((double)g_strictness) / 100;
-    target.dSpeed *= ((double)g_strictness) / 100;
+    target.cSpeed = (target.cSpeed * g_strictness) / 100;
+    target.dSpeed = (target.dSpeed * g_strictness) / 100;
    return target;
 }

@ -1737,8 +1737,8 @@ static int allBench(BMK_benchResult_t* resultPtr,

    /* optimistic assumption of benchres */
    {   BMK_benchResult_t resultMax = benchres;
-        resultMax.cSpeed *= uncertaintyConstantC * VARIANCE;
-        resultMax.dSpeed *= uncertaintyConstantD * VARIANCE;
+        resultMax.cSpeed = (unsigned long long)(resultMax.cSpeed * uncertaintyConstantC * VARIANCE);
+        resultMax.dSpeed = (unsigned long long)(resultMax.dSpeed * uncertaintyConstantD * VARIANCE);

        /* disregard infeasible results in feas mode */
        /* disregard if resultMax < winner in infeas mode */
@ -2429,9 +2429,9 @@ optimizeForSize(const char* const * const fileNamesTable, const size_t nbFiles,
        }

        g_lvltarget = winner.result;
-        g_lvltarget.cSpeed *= ((double)g_strictness) / 100;
-        g_lvltarget.dSpeed *= ((double)g_strictness) / 100;
-        g_lvltarget.cSize /= ((double)g_strictness) / 100;
+        g_lvltarget.cSpeed = (g_lvltarget.cSpeed * g_strictness) / 100;
+        g_lvltarget.dSpeed = (g_lvltarget.dSpeed * g_strictness) / 100;
+        g_lvltarget.cSize = (g_lvltarget.cSize * 100) / g_strictness;

        target.cSpeed = (U32)g_lvltarget.cSpeed;
        target.dSpeed = (U32)g_lvltarget.dSpeed;
--- a/sys/contrib/zstd/tests/playTests.sh
+++ b/sys/contrib/zstd/tests/playTests.sh
--- a/sys/contrib/zstd/tests/poolTests.c
+++ b/sys/contrib/zstd/tests/poolTests.c
@ -90,6 +90,7 @@ static int testWait(size_t numThreads, size_t queueSize) {

 typedef struct {
    ZSTD_pthread_mutex_t mut;
+    int countdown;
    int val;
    int max;
    ZSTD_pthread_cond_t cond;
@ -97,48 +98,56 @@ typedef struct {

 static void waitLongFn(void *opaque) {
  poolTest_t* const test = (poolTest_t*) opaque;
-  UTIL_sleepMilli(10);
  ZSTD_pthread_mutex_lock(&test->mut);
-  test->val = test->val + 1;
-  if (test->val == test->max)
-    ZSTD_pthread_cond_signal(&test->cond);
+  test->val++;
+  if (test->val > test->max)
+      test->max = test->val;
+  ZSTD_pthread_mutex_unlock(&test->mut);
+
+  UTIL_sleepMilli(10);
+
+  ZSTD_pthread_mutex_lock(&test->mut);
+  test->val--;
+  test->countdown--;
+  if (test->countdown == 0)
+      ZSTD_pthread_cond_signal(&test->cond);
  ZSTD_pthread_mutex_unlock(&test->mut);
 }

 static int testThreadReduction_internal(POOL_ctx* ctx, poolTest_t test)
 {
    int const nbWaits = 16;
-    UTIL_time_t startTime;
-    U64 time4threads, time2threads;

+    test.countdown = nbWaits;
    test.val = 0;
-    test.max = nbWaits;
+    test.max = 0;

-    startTime = UTIL_getTime();
    {   int i;
        for (i=0; i<nbWaits; i++)
            POOL_add(ctx, &waitLongFn, &test);
    }
    ZSTD_pthread_mutex_lock(&test.mut);
-    ZSTD_pthread_cond_wait(&test.cond, &test.mut);
-    ASSERT_EQ(test.val, nbWaits);
+    while (test.countdown > 0)
+        ZSTD_pthread_cond_wait(&test.cond, &test.mut);
+    ASSERT_EQ(test.val, 0);
+    ASSERT_EQ(test.max, 4);
    ZSTD_pthread_mutex_unlock(&test.mut);
-    time4threads = UTIL_clockSpanNano(startTime);

    ASSERT_EQ( POOL_resize(ctx, 2/*nbThreads*/) , 0 );
+    test.countdown = nbWaits;
    test.val = 0;
-    startTime = UTIL_getTime();
+    test.max = 0;
    {   int i;
        for (i=0; i<nbWaits; i++)
            POOL_add(ctx, &waitLongFn, &test);
    }
    ZSTD_pthread_mutex_lock(&test.mut);
-    ZSTD_pthread_cond_wait(&test.cond, &test.mut);
-    ASSERT_EQ(test.val, nbWaits);
+    while (test.countdown > 0)
+        ZSTD_pthread_cond_wait(&test.cond, &test.mut);
+    ASSERT_EQ(test.val, 0);
+    ASSERT_EQ(test.max, 2);
    ZSTD_pthread_mutex_unlock(&test.mut);
-    time2threads = UTIL_clockSpanNano(startTime);

-    if (time4threads >= time2threads) return 1;   /* check 4 threads were effectively faster than 2 */
    return 0;
 }

@ -246,7 +255,7 @@ int main(int argc, const char **argv) {
      printf("FAIL: thread reduction not effective \n");
      return 1;
  } else {
-      printf("SUCCESS: thread reduction effective (slower execution) \n");
+      printf("SUCCESS: thread reduction effective \n");
  }

  if (testAbruptEnding()) {
--- a/sys/contrib/zstd/tests/regression/results.csv
+++ b/sys/contrib/zstd/tests/regression/results.csv
--- a/sys/contrib/zstd/tests/zstreamtest.c
+++ b/sys/contrib/zstd/tests/zstreamtest.c
@ -1184,6 +1184,58 @@ static int basicUnitTests(U32 seed, double compressibility)
    }
    DISPLAYLEVEL(3, "OK \n");

+    /* Small Sequence Section bug */
+    DISPLAYLEVEL(3, "test%3i : decompress blocks with small sequences section : ", testNb++);
+    {   /* This test consists of 3 blocks. Each block has one sequence.
+            The sequence has literal length of 10, match length of 10 and offset of 10.
+            The sequence value and compression mode for the blocks are following:
+            The order of values are ll, ml, of.
+              - First block  : (10, 7, 13) (rle, rle, rle)
+                 - size of sequences section: 6 bytes (1 byte for nbSeq, 1 byte for encoding mode, 3 bytes for rle, 1 byte bitstream)
+              - Second block : (10, 7, 1) (repeat, repeat, rle)
+                 - size of sequences section: 4 bytes (1 byte for nbSeq, 1 byte for encoding mode, 1 bytes for rle, 1 byte bitstream)
+              - Third block  : (10, 7, 1) (repeat, repeat, repeat)
+                 - size of sequences section: 3 bytes (1 byte for nbSeq, 1 byte for encoding mode, 1 byte bitstream) */
+
+        unsigned char compressed[] = {
+            0x28, 0xb5, 0x2f, 0xfd, 0x24, 0x3c, 0x35, 0x01, 0x00, 0xf0, 0x85, 0x08,
+            0xc2, 0xc4, 0x70, 0xcf, 0xd7, 0xc0, 0x96, 0x7e, 0x4c, 0x6b, 0xa9, 0x8b,
+            0xbc, 0xc5, 0xb6, 0xd9, 0x7f, 0x4c, 0xf1, 0x05, 0xa6, 0x54, 0xef, 0xac,
+            0x69, 0x94, 0x89, 0x1c, 0x03, 0x44, 0x0a, 0x07, 0x00, 0xb4, 0x04, 0x80,
+            0x40, 0x0a, 0xa4
+        };
+        unsigned int compressedSize = 51;
+        unsigned char decompressed[] = {
+            0x85, 0x08, 0xc2, 0xc4, 0x70, 0xcf, 0xd7, 0xc0, 0x96, 0x7e, 0x85, 0x08,
+            0xc2, 0xc4, 0x70, 0xcf, 0xd7, 0xc0, 0x96, 0x7e, 0x4c, 0x6b, 0xa9, 0x8b,
+            0xbc, 0xc5, 0xb6, 0xd9, 0x7f, 0x4c, 0x4c, 0x6b, 0xa9, 0x8b, 0xbc, 0xc5,
+            0xb6, 0xd9, 0x7f, 0x4c, 0xf1, 0x05, 0xa6, 0x54, 0xef, 0xac, 0x69, 0x94,
+            0x89, 0x1c, 0xf1, 0x05, 0xa6, 0x54, 0xef, 0xac, 0x69, 0x94, 0x89, 0x1c
+        };
+        unsigned int decompressedSize = 60;
+
+        ZSTD_DStream* const zds = ZSTD_createDStream();
+        if (zds==NULL) goto _output_error;
+
+        CHECK_Z( ZSTD_initDStream(zds) );
+        inBuff.src = compressed;
+        inBuff.size = compressedSize;
+        inBuff.pos = 0;
+        outBuff.dst = decodedBuffer;
+        outBuff.size = CNBufferSize;
+        outBuff.pos = 0;
+
+        CHECK(ZSTD_decompressStream(zds, &outBuff, &inBuff) != 0,
+              "Decompress did not reach the end of frame");
+        CHECK(inBuff.pos != inBuff.size, "Decompress did not fully consume input");
+        CHECK(outBuff.pos != decompressedSize, "Decompressed size does not match");
+        CHECK(memcmp(outBuff.dst, decompressed, decompressedSize) != 0,
+              "Decompressed data does not match");
+
+        ZSTD_freeDStream(zds);
+    }
+    DISPLAYLEVEL(3, "OK \n");
+
    DISPLAYLEVEL(3, "test%3i : dictionary + uncompressible block + reusing tables checks offset table validity: ", testNb++);
    {   ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(
            dictionary.start, dictionary.filled,
--- a/sys/contrib/zstd/zlibWrapper/.gitignore
+++ b/sys/contrib/zstd/zlibWrapper/.gitignore
@ -1,11 +1,14 @@
+# object artifacts
+*.o
+
 # Default result files
 _*
-example.*
+example
 example_zstd.*
 example_gz.*
-fitblk.*
+fitblk
 fitblk_zstd.*
-zwrapbench.*
+zwrapbench
 foo.gz

 minigzip