From 902c8ce7dc1436627a242d98c30ce000ee4cd10a Mon Sep 17 00:00:00 2001
From: Baptiste Daroussin <bapt@FreeBSD.org>
Date: Tue, 22 Aug 2017 11:02:59 +0000
Subject: [PATCH] Import zstd 1.3.1

---
 CONTRIBUTING.md                           |   2 +-
 Makefile                                  |  16 +-
 NEWS                                      |  15 +
 PATENTS                                   |  33 --
 README.md                                 |   4 +-
 circle.yml                                |   6 +-
 doc/educational_decoder/harness.c         |  11 +-
 doc/educational_decoder/zstd_decompress.c | 425 +++++++--------
 doc/educational_decoder/zstd_decompress.h |  54 +-
 doc/zstd_compression_format.md            |  38 +-
 doc/zstd_manual.html                      | 104 ++--
 lib/common/bitstream.h                    |  81 ++-
 lib/common/error_private.c                |  18 +-
 lib/common/error_private.h                |   8 +-
 lib/common/fse.h                          |  16 +-
 lib/common/fse_decompress.c               |  25 +-
 lib/common/huf.h                          |  11 +-
 lib/common/mem.h                          |  12 +-
 lib/common/pool.c                         |  88 ++-
 lib/common/pool.h                         |  20 +-
 lib/common/threading.h                    |  11 +-
 lib/common/xxhash.c                       |  50 +-
 lib/common/zstd_common.c                  |   8 +-
 lib/common/zstd_errors.h                  |  62 ++-
 lib/common/zstd_internal.h                | 113 ++--
 lib/compress/fse_compress.c               |  26 +-
 lib/compress/huf_compress.c               |   7 +-
 lib/compress/zstd_compress.c              | 622 +++++++++++-----------
 lib/compress/zstd_opt.h                   | 234 ++++----
 lib/compress/zstdmt_compress.c            | 377 +++++++------
 lib/compress/zstdmt_compress.h            |  19 +-
 lib/decompress/huf_decompress.c           |  32 +-
 lib/decompress/zstd_decompress.c          | 210 ++++----
 lib/deprecated/zbuff.h                    |   8 +-
 lib/deprecated/zbuff_common.c             |   9 +-
 lib/deprecated/zbuff_compress.c           |   8 +-
 lib/deprecated/zbuff_decompress.c         |   8 +-
 lib/dictBuilder/cover.c                   |  16 +-
 lib/dictBuilder/zdict.c                   |  10 +-
 lib/dictBuilder/zdict.h                   |   8 +-
 lib/legacy/zstd_legacy.h                  |  12 +-
 lib/legacy/zstd_v01.c                     |   8 +-
 lib/legacy/zstd_v01.h                     |   8 +-
 lib/legacy/zstd_v02.c                     |   8 +-
 lib/legacy/zstd_v02.h                     |   8 +-
 lib/legacy/zstd_v03.c                     |   8 +-
 lib/legacy/zstd_v03.h                     |   8 +-
 lib/legacy/zstd_v04.c                     |  10 +-
 lib/legacy/zstd_v04.h                     |   8 +-
 lib/legacy/zstd_v05.c                     |  10 +-
 lib/legacy/zstd_v05.h                     |   8 +-
 lib/legacy/zstd_v06.c                     |  10 +-
 lib/legacy/zstd_v06.h                     |   8 +-
 lib/legacy/zstd_v07.c                     |   8 +-
 lib/legacy/zstd_v07.h                     |   8 +-
 lib/zstd.h                                |  30 +-
 programs/.gitignore                       |   4 +
 programs/Makefile                         |  39 +-
 programs/README.md                        |  45 +-
 programs/bench.c                          |   8 +-
 programs/bench.h                          |   8 +-
 programs/datagen.c                        |   8 +-
 programs/datagen.h                        |  10 +-
 programs/dibio.c                          |   8 +-
 programs/dibio.h                          |   8 +-
 programs/fileio.c                         | 467 ++++++++--------
 programs/fileio.h                         |   8 +-
 programs/platform.h                       |  12 +-
 programs/util.h                           |  12 +-
 programs/zstd.1                           |   6 +-
 programs/zstd.1.md                        |   5 +-
 programs/zstdcli.c                        | 106 ++--
 tests/Makefile                            |  28 +-
 tests/datagencli.c                        |   8 +-
 tests/decodecorpus.c                      |  10 +-
 tests/files/huffman-compressed-larger     | Bin 0 -> 143 bytes
 tests/fullbench.c                         |  15 +-
 tests/fuzz/Makefile                       | 108 ++++
 tests/fuzz/README.md                      |  34 ++
 tests/fuzz/fuzz.h                         |  52 ++
 tests/fuzz/fuzz_helpers.h                 |  70 +++
 tests/fuzz/regression_driver.c            |  69 +++
 tests/fuzz/simple_decompress.c            |  46 ++
 tests/fuzz/simple_round_trip.c            |  81 +++
 tests/fuzz/stream_decompress.c            |  85 +++
 tests/fuzz/stream_round_trip.c            | 153 ++++++
 tests/fuzzer.c                            | 305 +++++++++--
 tests/invalidDictionaries.c               |   9 +
 tests/legacy.c                            |  11 +-
 tests/longmatch.c                         |  10 +
 tests/namespaceTest.c                     |   8 +-
 tests/paramgrill.c                        |   8 +-
 tests/playTests.sh                        |  42 +-
 tests/{pool.c => poolTests.c}             |  41 +-
 tests/roundTripCrash.c                    |   8 +-
 tests/symbols.c                           |  10 +
 tests/zbufftest.c                         |   8 +-
 tests/zstreamtest.c                       |  34 +-
 zlibWrapper/examples/zwrapbench.c         |  10 +-
 zlibWrapper/gzcompatibility.h             |  16 +-
 zlibWrapper/gzlib.c                       |   2 +-
 zlibWrapper/gzread.c                      |   4 +-
 zlibWrapper/gzwrite.c                     |   4 +-
 zlibWrapper/zstd_zlibwrapper.c            |  10 +-
 zlibWrapper/zstd_zlibwrapper.h            |  18 +-
 105 files changed, 3076 insertions(+), 1980 deletions(-)
 delete mode 100644 PATENTS
 create mode 100644 tests/files/huffman-compressed-larger
 create mode 100644 tests/fuzz/Makefile
 create mode 100644 tests/fuzz/README.md
 create mode 100644 tests/fuzz/fuzz.h
 create mode 100644 tests/fuzz/fuzz_helpers.h
 create mode 100644 tests/fuzz/regression_driver.c
 create mode 100644 tests/fuzz/simple_decompress.c
 create mode 100644 tests/fuzz/simple_round_trip.c
 create mode 100644 tests/fuzz/stream_decompress.c
 create mode 100644 tests/fuzz/stream_round_trip.c
 rename tests/{pool.c => poolTests.c} (62%)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index edf5b7f47a1f..dd013f8084fa 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -39,4 +39,4 @@ outlined on that page and do not file a public issue.
 
 ## License
 By contributing to Zstandard, you agree that your contributions will be licensed
-under the [LICENSE](LICENSE) file in the root directory of this source tree.
+under both the [LICENSE](LICENSE) file and the [COPYING](COPYING) file in the root directory of this source tree.
diff --git a/Makefile b/Makefile
index ac3034c9b7bf..a72f99fcb9d2 100644
--- a/Makefile
+++ b/Makefile
@@ -74,12 +74,9 @@ zstdmt:
 zlibwrapper:
 	$(MAKE) -C $(ZWRAPDIR) test
 
-.PHONY: shortest
-shortest:
-	$(MAKE) -C $(TESTDIR) $@
-
-.PHONY: test
-test:
+.PHONY: test shortest
+test shortest:
+	$(MAKE) -C $(PRGDIR) allVariants
 	$(MAKE) -C $(TESTDIR) $@
 
 .PHONY: examples
@@ -146,6 +143,11 @@ gcc6build: clean
 	gcc-6 -v
 	CC=gcc-6 $(MAKE) all MOREFLAGS="-Werror"
 
+.PHONY: gcc7build
+gcc7build: clean
+	gcc-7 -v
+	CC=gcc-7 $(MAKE) all MOREFLAGS="-Werror"
+
 .PHONY: clangbuild
 clangbuild: clean
 	clang -v
@@ -180,7 +182,7 @@ ppc64fuzz: clean
 	CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc64-static MOREFLAGS="-m64 -static" FUZZER_FLAGS=--no-big-tests $(MAKE) -C $(TESTDIR) fuzztest
 
 gpptest: clean
-	CC=g++ $(MAKE) -C $(PRGDIR) all CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
+	CC=$(CXX) $(MAKE) -C $(PRGDIR) all CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
 
 gcc5test: clean
 	gcc-5 -v
diff --git a/NEWS b/NEWS
index d23a58f02046..59687532fe22 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,18 @@
+v1.3.1
+New license : BSD + GPLv2
+perf: substantially decreased memory usage in Multi-threading mode, thanks to reports by Tino Reichardt (@mcmilk)
+perf: Multi-threading supports up to 256 threads. Cap at 256 when more are requested (#760)
+cli : improved and fixed --list command, by @ib (#772)
+cli : command -vV to list supported formats, by @ib (#771)
+build : fixed binary variants, reported by @svenha (#788)
+build : fix Visual compilation for non x86/x64 targets, reported by Greg Slazinski (@GregSlazinski) (#718)
+API exp : breaking change : ZSTD_getframeHeader() provides more information
+API exp : breaking change : pinned down values of error codes
+doc : fixed huffman example, by Ulrich Kunitz (@ulikunitz)
+new : contrib/adaptive-compression, I/O driven compression strength, by Paul Cruz (@paulcruz74)
+new : contrib/long_distance_matching, statistics by Stella Lau (@stellamplau)
+updated : contrib/linux-kernel, by Nick Terrell (@terrelln)
+
 v1.3.0
 cli : new : `--list` command, by Paul Cruz
 cli : changed : xz/lzma support enabled by default
diff --git a/PATENTS b/PATENTS
deleted file mode 100644
index 15b4a2ea5ca5..000000000000
--- a/PATENTS
+++ /dev/null
@@ -1,33 +0,0 @@
-Additional Grant of Patent Rights Version 2
-
-"Software" means the Zstandard software distributed by Facebook, Inc.
-
-Facebook, Inc. ("Facebook") hereby grants to each recipient of the Software
-("you") a perpetual, worldwide, royalty-free, non-exclusive, irrevocable
-(subject to the termination provision below) license under any Necessary
-Claims, to make, have made, use, sell, offer to sell, import, and otherwise
-transfer the Software. For avoidance of doubt, no license is granted under
-Facebook’s rights in any patent claims that are infringed by (i) modifications
-to the Software made by you or any third party or (ii) the Software in
-combination with any software or other technology.
-
-The license granted hereunder will terminate, automatically and without notice,
-if you (or any of your subsidiaries, corporate affiliates or agents) initiate
-directly or indirectly, or take a direct financial interest in, any Patent
-Assertion: (i) against Facebook or any of its subsidiaries or corporate
-affiliates, (ii) against any party if such Patent Assertion arises in whole or
-in part from any software, technology, product or service of Facebook or any of
-its subsidiaries or corporate affiliates, or (iii) against any party relating
-to the Software. Notwithstanding the foregoing, if Facebook or any of its
-subsidiaries or corporate affiliates files a lawsuit alleging patent
-infringement against you in the first instance, and you respond by filing a
-patent infringement counterclaim in that lawsuit against that party that is
-unrelated to the Software, the license granted hereunder will not terminate
-under section (i) of this paragraph due to such counterclaim.
-
-A "Necessary Claim" is a claim of a patent owned by Facebook that is
-necessarily infringed by the Software standing alone.
-
-A "Patent Assertion" is any lawsuit or other action alleging direct, indirect,
-or contributory infringement or inducement to infringe any patent, including a
-cross-claim or counterclaim.
diff --git a/README.md b/README.md
index f37be4542c69..377ae0843978 100644
--- a/README.md
+++ b/README.md
@@ -134,12 +134,12 @@ Going into `build` directory, you will find additional possibilities :
 
 ### Status
 
-Zstandard is currently deployed within Facebook. It is used daily to compress and decompress very large amounts of data in multiple formats and use cases.
+Zstandard is currently deployed within Facebook. It is used continuously to compress large amounts of data in multiple formats and use cases.
 Zstandard is considered safe for production environments.
 
 ### License
 
-Zstandard is [BSD-licensed](LICENSE). We also provide an [additional patent grant](PATENTS).
+Zstandard is dual-licensed under [BSD](LICENSE) and [GPLv2](COPYING).
 
 ### Contributing
 
diff --git a/circle.yml b/circle.yml
index 218e33bfc333..8c2bd30d330d 100644
--- a/circle.yml
+++ b/circle.yml
@@ -3,7 +3,7 @@ dependencies:
     - sudo dpkg --add-architecture i386
     - sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test; sudo apt-get -y -qq update
     - sudo apt-get -y install gcc-powerpc-linux-gnu gcc-arm-linux-gnueabi libc6-dev-armel-cross gcc-aarch64-linux-gnu libc6-dev-arm64-cross
-    - sudo apt-get -y install libstdc++-6-dev clang gcc g++ gcc-5 gcc-6 zlib1g-dev liblzma-dev
+    - sudo apt-get -y install libstdc++-7-dev clang gcc g++ gcc-5 gcc-6 gcc-7 zlib1g-dev liblzma-dev
     - sudo apt-get -y install linux-libc-dev:i386 libc6-dev-i386
 
 test:
@@ -45,7 +45,7 @@ test:
         parallel: true
     - ? |
         if [[ "$CIRCLE_NODE_INDEX" == "0" ]]                                    ; then make ppc64build   && make clean; fi &&
-        if [[ "$CIRCLE_NODE_TOTAL" < "2" ]] || [[ "$CIRCLE_NODE_INDEX" == "1" ]]; then true              && make clean; fi #could add another test here
+        if [[ "$CIRCLE_NODE_TOTAL" < "2" ]] || [[ "$CIRCLE_NODE_INDEX" == "1" ]]; then make gcc7build    && make clean; fi #could add another test here
       :
         parallel: true
     - ? |
@@ -64,7 +64,7 @@ test:
     #- gcc -v; make -C tests test32 MOREFLAGS="-I/usr/include/x86_64-linux-gnu" && make clean
     #- make uasan               && make clean
     #- make asan32              && make clean
-    #- make -C tests test32 CC=clang MOREFLAGS="-g -fsanitize=address -I/usr/include/x86_64-linux-gnu" 
+    #- make -C tests test32 CC=clang MOREFLAGS="-g -fsanitize=address -I/usr/include/x86_64-linux-gnu"
   # Valgrind tests
     #- CFLAGS="-O1 -g" make -C zlibWrapper valgrindTest && make clean
     #- make -C tests valgrindTest && make clean
diff --git a/doc/educational_decoder/harness.c b/doc/educational_decoder/harness.c
index 683278dfcd01..982e066e28f0 100644
--- a/doc/educational_decoder/harness.c
+++ b/doc/educational_decoder/harness.c
@@ -87,7 +87,7 @@ int main(int argc, char **argv) {
     }
 
     size_t decompressed_size = ZSTD_get_decompressed_size(input, input_size);
-    if (decompressed_size == -1) {
+    if (decompressed_size == (size_t)-1) {
         decompressed_size = MAX_COMPRESSION_RATIO * input_size;
         fprintf(stderr, "WARNING: Compressed data does not contain "
                         "decompressed size, going to assume the compression "
@@ -106,9 +106,15 @@ int main(int argc, char **argv) {
         return 1;
     }
 
+    dictionary_t* const parsed_dict = create_dictionary();
+    if (dict) {
+        parse_dictionary(parsed_dict, dict, dict_size);
+    }
     size_t decompressed =
         ZSTD_decompress_with_dict(output, decompressed_size,
-                                  input, input_size, dict, dict_size);
+                                  input, input_size, parsed_dict);
+
+    free_dictionary(parsed_dict);
 
     write_file(argv[2], output, decompressed);
 
@@ -117,4 +123,3 @@ int main(int argc, char **argv) {
     free(dict);
     input = output = dict = NULL;
 }
-
diff --git a/doc/educational_decoder/zstd_decompress.c b/doc/educational_decoder/zstd_decompress.c
index 7c8d8114d401..af10db528d2a 100644
--- a/doc/educational_decoder/zstd_decompress.c
+++ b/doc/educational_decoder/zstd_decompress.c
@@ -14,21 +14,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-
-/// Zstandard decompression functions.
-/// `dst` must point to a space at least as large as the reconstructed output.
-size_t ZSTD_decompress(void *const dst, const size_t dst_len,
-                       const void *const src, const size_t src_len);
-/// If `dict != NULL` and `dict_len >= 8`, does the same thing as
-/// `ZSTD_decompress` but uses the provided dict
-size_t ZSTD_decompress_with_dict(void *const dst, const size_t dst_len,
-                                 const void *const src, const size_t src_len,
-                                 const void *const dict, const size_t dict_len);
-
-/// Get the decompressed size of an input stream so memory can be allocated in
-/// advance
-/// Returns -1 if the size can't be determined
-size_t ZSTD_get_decompressed_size(const void *const src, const size_t src_len);
+#include "zstd_decompress.h"
 
 /******* UTILITY MACROS AND TYPES *********************************************/
 // Max block size decompressed size is 128 KB and literal blocks can't be
@@ -108,10 +94,10 @@ static inline size_t IO_istream_len(const istream_t *const in);
 
 /// Advances the stream by `len` bytes, and returns a pointer to the chunk that
 /// was skipped.  The stream must be byte aligned.
-static inline const u8 *IO_read_bytes(istream_t *const in, size_t len);
+static inline const u8 *IO_get_read_ptr(istream_t *const in, size_t len);
 /// Advances the stream by `len` bytes, and returns a pointer to the chunk that
 /// was skipped so it can be written to.
-static inline u8 *IO_write_bytes(ostream_t *const out, size_t len);
+static inline u8 *IO_get_write_ptr(ostream_t *const out, size_t len);
 
 /// Advance the inner state by `len` bytes.  The stream must be byte aligned.
 static inline void IO_advance_input(istream_t *const in, size_t len);
@@ -307,7 +293,7 @@ typedef struct {
 
 /// The decoded contents of a dictionary so that it doesn't have to be repeated
 /// for each frame that uses it
-typedef struct {
+struct dictionary_s {
     // Entropy tables
     HUF_dtable literals_dtable;
     FSE_dtable ll_dtable;
@@ -322,7 +308,7 @@ typedef struct {
     u64 previous_offsets[3];
 
     u32 dictionary_id;
-} dictionary_t;
+};
 
 /// A tuple containing the parts necessary to decode and execute a ZSTD sequence
 /// command
@@ -367,27 +353,36 @@ static void execute_sequences(frame_context_t *const ctx, ostream_t *const out,
                               const sequence_command_t *const sequences,
                               const size_t num_sequences);
 
-// Parse a provided dictionary blob for use in decompression
-static void parse_dictionary(dictionary_t *const dict, const u8 *src,
-                             size_t src_len);
-static void free_dictionary(dictionary_t *const dict);
+// Copies literals and returns the total literal length that was copied
+static u32 copy_literals(const size_t seq, istream_t *litstream,
+                         ostream_t *const out);
+
+// Given an offset code from a sequence command (either an actual offset value
+// or an index for previous offset), computes the correct offset and udpates
+// the offset history
+static size_t compute_offset(sequence_command_t seq, u64 *const offset_hist);
+
+// Given an offset, match length, and total output, as well as the frame
+// context for the dictionary, determines if the dictionary is used and
+// executes the copy operation
+static void execute_match_copy(frame_context_t *const ctx, size_t offset,
+                              size_t match_length, size_t total_output,
+                              ostream_t *const out);
+
 /******* END ZSTD HELPER STRUCTS AND PROTOTYPES *******************************/
 
 size_t ZSTD_decompress(void *const dst, const size_t dst_len,
                        const void *const src, const size_t src_len) {
-    return ZSTD_decompress_with_dict(dst, dst_len, src, src_len, NULL, 0);
+    dictionary_t* uninit_dict = create_dictionary();
+    size_t const decomp_size = ZSTD_decompress_with_dict(dst, dst_len, src,
+                                                         src_len, uninit_dict);
+    free_dictionary(uninit_dict);
+    return decomp_size;
 }
 
 size_t ZSTD_decompress_with_dict(void *const dst, const size_t dst_len,
                                  const void *const src, const size_t src_len,
-                                 const void *const dict,
-                                 const size_t dict_len) {
-    dictionary_t parsed_dict;
-    memset(&parsed_dict, 0, sizeof(dictionary_t));
-    // dict_len < 8 is not a valid dictionary
-    if (dict && dict_len > 8) {
-        parse_dictionary(&parsed_dict, (const u8 *)dict, dict_len);
-    }
+                                 dictionary_t* parsed_dict) {
 
     istream_t in = IO_make_istream(src, src_len);
     ostream_t out = IO_make_ostream(dst, dst_len);
@@ -396,11 +391,9 @@ size_t ZSTD_decompress_with_dict(void *const dst, const size_t dst_len,
     // Multiple frames can be appended into a single file or stream. A frame is
     // totally independent, has a defined beginning and end, and a set of
     // parameters which tells the decoder how to decompress it."
-    while (IO_istream_len(&in) > 0) {
-        decode_frame(&out, &in, &parsed_dict);
-    }
 
-    free_dictionary(&parsed_dict);
+    /* this decoder assumes decompression of a single frame */
+    decode_frame(&out, &in, parsed_dict);
 
     return out.ptr - (u8 *)dst;
 }
@@ -424,30 +417,6 @@ static void decompress_data(frame_context_t *const ctx, ostream_t *const out,
 static void decode_frame(ostream_t *const out, istream_t *const in,
                          const dictionary_t *const dict) {
     const u32 magic_number = IO_read_bits(in, 32);
-
-    // Skippable frame
-    //
-    // "Magic_Number
-    //
-    // 4 Bytes, little-endian format. Value : 0x184D2A5?, which means any value
-    // from 0x184D2A50 to 0x184D2A5F. All 16 values are valid to identify a
-    // skippable frame."
-    if ((magic_number & ~0xFU) == 0x184D2A50U) {
-        // "Skippable frames allow the insertion of user-defined data into a
-        // flow of concatenated frames. Its design is pretty straightforward,
-        // with the sole objective to allow the decoder to quickly skip over
-        // user-defined data and continue decoding.
-        //
-        // Skippable frames defined in this specification are compatible with
-        // LZ4 ones."
-        const size_t frame_size = IO_read_bits(in, 32);
-
-        // skip over frame
-        IO_advance_input(in, frame_size);
-
-        return;
-    }
-
     // Zstandard frame
     //
     // "Magic_Number
@@ -460,8 +429,8 @@ static void decode_frame(ostream_t *const out, istream_t *const in,
         return;
     }
 
-    // not a real frame
-    ERROR("Invalid magic number");
+    // not a real frame or a skippable frame
+    ERROR("Tried to decode non-ZSTD frame");
 }
 
 /// Decode a frame that contains compressed data.  Not all frames do as there
@@ -672,8 +641,8 @@ static void decompress_data(frame_context_t *const ctx, ostream_t *const out,
         case 0: {
             // "Raw_Block - this is an uncompressed block. Block_Size is the
             // number of bytes to read and copy."
-            const u8 *const read_ptr = IO_read_bytes(in, block_len);
-            u8 *const write_ptr = IO_write_bytes(out, block_len);
+            const u8 *const read_ptr = IO_get_read_ptr(in, block_len);
+            u8 *const write_ptr = IO_get_write_ptr(out, block_len);
 
             // Copy the raw data into the output
             memcpy(write_ptr, read_ptr, block_len);
@@ -685,8 +654,8 @@ static void decompress_data(frame_context_t *const ctx, ostream_t *const out,
             // "RLE_Block - this is a single byte, repeated N times. In which
             // case, Block_Size is the size to regenerate, while the
             // "compressed" block is just 1 byte (the byte to repeat)."
-            const u8 *const read_ptr = IO_read_bytes(in, 1);
-            u8 *const write_ptr = IO_write_bytes(out, block_len);
+            const u8 *const read_ptr = IO_get_read_ptr(in, 1);
+            u8 *const write_ptr = IO_get_write_ptr(out, block_len);
 
             // Copy `block_len` copies of `read_ptr[0]` to the output
             memset(write_ptr, read_ptr[0], block_len);
@@ -832,13 +801,13 @@ static size_t decode_literals_simple(istream_t *const in, u8 **const literals,
     switch (block_type) {
     case 0: {
         // "Raw_Literals_Block - Literals are stored uncompressed."
-        const u8 *const read_ptr = IO_read_bytes(in, size);
+        const u8 *const read_ptr = IO_get_read_ptr(in, size);
         memcpy(*literals, read_ptr, size);
         break;
     }
     case 1: {
         // "RLE_Literals_Block - Literals consist of a single byte value repeated N times."
-        const u8 *const read_ptr = IO_read_bytes(in, 1);
+        const u8 *const read_ptr = IO_get_read_ptr(in, 1);
         memset(*literals, read_ptr[0], size);
         break;
     }
@@ -949,7 +918,7 @@ static void decode_huf_table(HUF_dtable *const dtable, istream_t *const in) {
         num_symbs = header - 127;
         const size_t bytes = (num_symbs + 1) / 2;
 
-        const u8 *const weight_src = IO_read_bytes(in, bytes);
+        const u8 *const weight_src = IO_get_read_ptr(in, bytes);
 
         for (int i = 0; i < num_symbs; i++) {
             // "They are encoded forward, 2
@@ -1157,7 +1126,7 @@ static void decompress_sequences(frame_context_t *const ctx, istream_t *in,
     }
 
     const size_t len = IO_istream_len(in);
-    const u8 *const src = IO_read_bytes(in, len);
+    const u8 *const src = IO_get_read_ptr(in, len);
 
     // "After writing the last bit containing information, the compressor writes
     // a single 1-bit and then fills the byte with 0-7 0 bits of padding."
@@ -1262,7 +1231,7 @@ static void decode_seq_table(FSE_dtable *const table, istream_t *const in,
     }
     case seq_rle: {
         // "RLE_Mode : it's a single code, repeated Number_of_Sequences times."
-        const u8 symb = IO_read_bytes(in, 1)[0];
+        const u8 symb = IO_get_read_ptr(in, 1)[0];
         FSE_init_dtable_rle(table, symb);
         break;
     }
@@ -1303,145 +1272,146 @@ static void execute_sequences(frame_context_t *const ctx, ostream_t *const out,
 
     for (size_t i = 0; i < num_sequences; i++) {
         const sequence_command_t seq = sequences[i];
-
         {
-            // If the sequence asks for more literals than are left, the
-            // sequence must be corrupted
-            if (seq.literal_length > IO_istream_len(&litstream)) {
-                CORRUPTION();
-            }
-
-            u8 *const write_ptr = IO_write_bytes(out, seq.literal_length);
-            const u8 *const read_ptr =
-                    IO_read_bytes(&litstream, seq.literal_length);
-            // Copy literals to output
-            memcpy(write_ptr, read_ptr, seq.literal_length);
-
-            total_output += seq.literal_length;
+            const u32 literals_size = copy_literals(seq.literal_length, &litstream, out);
+            total_output += literals_size;
         }
 
-        size_t offset;
+        size_t const offset = compute_offset(seq, offset_hist);
 
-        // Offsets are special, we need to handle the repeat offsets
-        if (seq.offset <= 3) {
-            // "The first 3 values define a repeated offset and we will call
-            // them Repeated_Offset1, Repeated_Offset2, and Repeated_Offset3.
-            // They are sorted in recency order, with Repeated_Offset1 meaning
-            // 'most recent one'".
+        size_t const match_length = seq.match_length;
 
-            // Use 0 indexing for the array
-            u32 idx = seq.offset - 1;
-            if (seq.literal_length == 0) {
-                // "There is an exception though, when current sequence's
-                // literals length is 0. In this case, repeated offsets are
-                // shifted by one, so Repeated_Offset1 becomes Repeated_Offset2,
-                // Repeated_Offset2 becomes Repeated_Offset3, and
-                // Repeated_Offset3 becomes Repeated_Offset1 - 1_byte."
-                idx++;
-            }
+        execute_match_copy(ctx, offset, match_length, total_output, out);
 
-            if (idx == 0) {
-                offset = offset_hist[0];
-            } else {
-                // If idx == 3 then literal length was 0 and the offset was 3,
-                // as per the exception listed above
-                offset = idx < 3 ? offset_hist[idx] : offset_hist[0] - 1;
-
-                // If idx == 1 we don't need to modify offset_hist[2], since
-                // we're using the second-most recent code
-                if (idx > 1) {
-                    offset_hist[2] = offset_hist[1];
-                }
-                offset_hist[1] = offset_hist[0];
-                offset_hist[0] = offset;
-            }
-        } else {
-            // When it's not a repeat offset:
-            // "if (Offset_Value > 3) offset = Offset_Value - 3;"
-            offset = seq.offset - 3;
-
-            // Shift back history
-            offset_hist[2] = offset_hist[1];
-            offset_hist[1] = offset_hist[0];
-            offset_hist[0] = offset;
-        }
-
-        size_t match_length = seq.match_length;
-
-        u8 *write_ptr = IO_write_bytes(out, match_length);
-        if (total_output <= ctx->header.window_size) {
-            // In this case offset might go back into the dictionary
-            if (offset > total_output + ctx->dict_content_len) {
-                // The offset goes beyond even the dictionary
-                CORRUPTION();
-            }
-
-            if (offset > total_output) {
-                // "The rest of the dictionary is its content. The content act
-                // as a "past" in front of data to compress or decompress, so it
-                // can be referenced in sequence commands."
-                const size_t dict_copy =
-                    MIN(offset - total_output, match_length);
-                const size_t dict_offset =
-                    ctx->dict_content_len - (offset - total_output);
-
-                memcpy(write_ptr, ctx->dict_content + dict_offset, dict_copy);
-                write_ptr += dict_copy;
-                match_length -= dict_copy;
-            }
-        } else if (offset > ctx->header.window_size) {
-            CORRUPTION();
-        }
-
-        // We must copy byte by byte because the match length might be larger
-        // than the offset
-        // ex: if the output so far was "abc", a command with offset=3 and
-        // match_length=6 would produce "abcabcabc" as the new output
-        for (size_t i = 0; i < match_length; i++) {
-            *write_ptr = *(write_ptr - offset);
-            write_ptr++;
-        }
-
-        total_output += seq.match_length;
+        total_output += match_length;
     }
 
     // Copy any leftover literals
     {
         size_t len = IO_istream_len(&litstream);
-        u8 *const write_ptr = IO_write_bytes(out, len);
-        const u8 *const read_ptr = IO_read_bytes(&litstream, len);
-        memcpy(write_ptr, read_ptr, len);
-
+        copy_literals(len, &litstream, out); 
         total_output += len;
     }
 
     ctx->current_total_output = total_output;
 }
+
+static u32 copy_literals(const size_t literal_length, istream_t *litstream,
+                         ostream_t *const out) {
+    // If the sequence asks for more literals than are left, the
+    // sequence must be corrupted
+    if (literal_length > IO_istream_len(litstream)) {
+        CORRUPTION();
+    }
+
+    u8 *const write_ptr = IO_get_write_ptr(out, literal_length);
+    const u8 *const read_ptr =
+         IO_get_read_ptr(litstream, literal_length);
+    // Copy literals to output
+    memcpy(write_ptr, read_ptr, literal_length);
+
+    return literal_length;
+}
+
+static size_t compute_offset(sequence_command_t seq, u64 *const offset_hist) {
+    size_t offset;
+    // Offsets are special, we need to handle the repeat offsets
+    if (seq.offset <= 3) {
+        // "The first 3 values define a repeated offset and we will call
+        // them Repeated_Offset1, Repeated_Offset2, and Repeated_Offset3.
+        // They are sorted in recency order, with Repeated_Offset1 meaning
+        // 'most recent one'".
+
+        // Use 0 indexing for the array
+        u32 idx = seq.offset - 1;
+        if (seq.literal_length == 0) {
+            // "There is an exception though, when current sequence's
+            // literals length is 0. In this case, repeated offsets are
+            // shifted by one, so Repeated_Offset1 becomes Repeated_Offset2,
+            // Repeated_Offset2 becomes Repeated_Offset3, and
+            // Repeated_Offset3 becomes Repeated_Offset1 - 1_byte."
+            idx++;
+        }
+
+        if (idx == 0) {
+            offset = offset_hist[0];
+        } else {
+            // If idx == 3 then literal length was 0 and the offset was 3,
+            // as per the exception listed above
+            offset = idx < 3 ? offset_hist[idx] : offset_hist[0] - 1;
+
+            // If idx == 1 we don't need to modify offset_hist[2], since
+            // we're using the second-most recent code
+            if (idx > 1) {
+                offset_hist[2] = offset_hist[1];
+            }
+            offset_hist[1] = offset_hist[0];
+            offset_hist[0] = offset;
+        }
+    } else {
+        // When it's not a repeat offset:
+        // "if (Offset_Value > 3) offset = Offset_Value - 3;"
+        offset = seq.offset - 3;
+
+        // Shift back history
+        offset_hist[2] = offset_hist[1];
+        offset_hist[1] = offset_hist[0];
+        offset_hist[0] = offset;
+    }
+    return offset;
+}
+
+static void execute_match_copy(frame_context_t *const ctx, size_t offset,
+                              size_t match_length, size_t total_output,
+                              ostream_t *const out) {
+    u8 *write_ptr = IO_get_write_ptr(out, match_length);
+    if (total_output <= ctx->header.window_size) {
+        // In this case offset might go back into the dictionary
+        if (offset > total_output + ctx->dict_content_len) {
+            // The offset goes beyond even the dictionary
+            CORRUPTION();
+        }
+
+        if (offset > total_output) {
+            // "The rest of the dictionary is its content. The content act
+            // as a "past" in front of data to compress or decompress, so it
+            // can be referenced in sequence commands."
+            const size_t dict_copy =
+                MIN(offset - total_output, match_length);
+            const size_t dict_offset =
+                ctx->dict_content_len - (offset - total_output);
+
+            memcpy(write_ptr, ctx->dict_content + dict_offset, dict_copy);
+            write_ptr += dict_copy;
+            match_length -= dict_copy;
+        }
+    } else if (offset > ctx->header.window_size) {
+        CORRUPTION();
+    }
+
+    // We must copy byte by byte because the match length might be larger
+    // than the offset
+    // ex: if the output so far was "abc", a command with offset=3 and
+    // match_length=6 would produce "abcabcabc" as the new output
+    for (size_t j = 0; j < match_length; j++) {
+        *write_ptr = *(write_ptr - offset);
+        write_ptr++;
+    }
+}
 /******* END SEQUENCE EXECUTION ***********************************************/
 
 /******* OUTPUT SIZE COUNTING *************************************************/
-static void traverse_frame(const frame_header_t *const header, istream_t *const in);
-
 /// Get the decompressed size of an input stream so memory can be allocated in
 /// advance.
-/// This is more complex than the implementation in the reference
-/// implementation, as this API allows for the decompression of multiple
-/// concatenated frames.
+/// This implementation assumes `src` points to a single ZSTD-compressed frame
 size_t ZSTD_get_decompressed_size(const void *src, const size_t src_len) {
     istream_t in = IO_make_istream(src, src_len);
-    size_t dst_size = 0;
 
-    // Each frame header only gives us the size of its frame, so iterate over
-    // all
-    // frames
-    while (IO_istream_len(&in) > 0) {
+    // get decompressed size from ZSTD frame header
+    {
         const u32 magic_number = IO_read_bits(&in, 32);
 
-        if ((magic_number & ~0xFU) == 0x184D2A50U) {
-            // skippable frame, this has no impact on output size
-            const size_t frame_size = IO_read_bits(&in, 32);
-            IO_advance_input(&in, frame_size);
-        } else if (magic_number == 0xFD2FB528U) {
+        if (magic_number == 0xFD2FB528U) {
             // ZSTD frame
             frame_header_t header;
             parse_frame_header(&header, &in);
@@ -1451,68 +1421,42 @@ size_t ZSTD_get_decompressed_size(const void *src, const size_t src_len) {
                 return -1;
             }
 
-            dst_size += header.frame_content_size;
-
-            // Consume the input from the frame to reach the start of the next
-            traverse_frame(&header, &in);
+            return header.frame_content_size;
         } else {
-            // not a real frame
-            ERROR("Invalid magic number");
+            // not a real frame or skippable frame
+            ERROR("ZSTD frame magic number did not match");
         }
     }
-
-    return dst_size;
 }
-
-/// Iterate over each block in a frame to find the end of it, to get to the
-/// start of the next frame
-static void traverse_frame(const frame_header_t *const header, istream_t *const in) {
-    int last_block = 0;
-
-    do {
-        // Parse the block header
-        last_block = IO_read_bits(in, 1);
-        const int block_type = IO_read_bits(in, 2);
-        const size_t block_len = IO_read_bits(in, 21);
-
-        switch (block_type) {
-        case 0: // Raw block, block_len bytes
-            IO_advance_input(in, block_len);
-            break;
-        case 1: // RLE block, 1 byte
-            IO_advance_input(in, 1);
-            break;
-        case 2: // Compressed block, compressed size is block_len
-            IO_advance_input(in, block_len);
-            break;
-        case 3:
-            // Reserved block type
-            CORRUPTION();
-            break;
-        default:
-            IMPOSSIBLE();
-        }
-    } while (!last_block);
-
-    if (header->content_checksum_flag) {
-        IO_advance_input(in, 4);
-    }
-}
-
 /******* END OUTPUT SIZE COUNTING *********************************************/
 
 /******* DICTIONARY PARSING ***************************************************/
+#define DICT_SIZE_ERROR() ERROR("Dictionary size cannot be less than 8 bytes")
+#define NULL_SRC() ERROR("Tried to create dictionary with pointer to null src");
+
+dictionary_t* create_dictionary() {
+    dictionary_t* dict = calloc(1, sizeof(dictionary_t));
+    if (!dict) {
+        BAD_ALLOC();
+    }
+    return dict;
+}
+
 static void init_dictionary_content(dictionary_t *const dict,
                                     istream_t *const in);
 
-static void parse_dictionary(dictionary_t *const dict, const u8 *src,
+void parse_dictionary(dictionary_t *const dict, const void *src,
                              size_t src_len) {
+    const u8 *byte_src = (const u8 *)src;
     memset(dict, 0, sizeof(dictionary_t));
+    if (src == NULL) { /* cannot initialize dictionary with null src */
+        NULL_SRC();
+    }
     if (src_len < 8) {
-        INP_SIZE();
+        DICT_SIZE_ERROR();
     }
 
-    istream_t in = IO_make_istream(src, src_len);
+    istream_t in = IO_make_istream(byte_src, src_len);
 
     const u32 magic_number = IO_read_bits(&in, 32);
     if (magic_number != 0xEC30A437) {
@@ -1564,13 +1508,13 @@ static void init_dictionary_content(dictionary_t *const dict,
         BAD_ALLOC();
     }
 
-    const u8 *const content = IO_read_bytes(in, dict->content_size);
+    const u8 *const content = IO_get_read_ptr(in, dict->content_size);
 
     memcpy(dict->content, content, dict->content_size);
 }
 
 /// Free an allocated dictionary
-static void free_dictionary(dictionary_t *const dict) {
+void free_dictionary(dictionary_t *const dict) {
     HUF_free_dtable(&dict->literals_dtable);
     FSE_free_dtable(&dict->ll_dtable);
     FSE_free_dtable(&dict->of_dtable);
@@ -1579,6 +1523,8 @@ static void free_dictionary(dictionary_t *const dict) {
     free(dict->content);
 
     memset(dict, 0, sizeof(dictionary_t));
+
+    free(dict);
 }
 /******* END DICTIONARY PARSING ***********************************************/
 
@@ -1657,7 +1603,7 @@ static inline size_t IO_istream_len(const istream_t *const in) {
 
 /// Returns a pointer where `len` bytes can be read, and advances the internal
 /// state.  The stream must be byte aligned.
-static inline const u8 *IO_read_bytes(istream_t *const in, size_t len) {
+static inline const u8 *IO_get_read_ptr(istream_t *const in, size_t len) {
     if (len > in->len) {
         INP_SIZE();
     }
@@ -1671,7 +1617,7 @@ static inline const u8 *IO_read_bytes(istream_t *const in, size_t len) {
     return ptr;
 }
 /// Returns a pointer to write `len` bytes to, and advances the internal state
-static inline u8 *IO_write_bytes(ostream_t *const out, size_t len) {
+static inline u8 *IO_get_write_ptr(ostream_t *const out, size_t len) {
     if (len > out->len) {
         OUT_SIZE();
     }
@@ -1710,7 +1656,7 @@ static inline istream_t IO_make_istream(const u8 *in, size_t len) {
 /// `in` must be byte aligned
 static inline istream_t IO_make_sub_istream(istream_t *const in, size_t len) {
     // Consume `len` bytes of the parent stream
-    const u8 *const ptr = IO_read_bytes(in, len);
+    const u8 *const ptr = IO_get_read_ptr(in, len);
 
     // Make a substream using the pointer to those `len` bytes
     return IO_make_istream(ptr, len);
@@ -1814,7 +1760,7 @@ static size_t HUF_decompress_1stream(const HUF_dtable *const dtable,
     if (len == 0) {
         INP_SIZE();
     }
-    const u8 *const src = IO_read_bytes(in, len);
+    const u8 *const src = IO_get_read_ptr(in, len);
 
     // "Each bitstream must be read backward, that is starting from the end down
     // to the beginning. Therefore it's necessary to know the size of each
@@ -2065,7 +2011,7 @@ static size_t FSE_decompress_interleaved2(const FSE_dtable *const dtable,
     if (len == 0) {
         INP_SIZE();
     }
-    const u8 *const src = IO_read_bytes(in, len);
+    const u8 *const src = IO_get_read_ptr(in, len);
 
     // "Each bitstream must be read backward, that is starting from the end down
     // to the beginning. Therefore it's necessary to know the size of each
@@ -2192,7 +2138,7 @@ static void FSE_init_dtable(FSE_dtable *const dtable,
     }
 
     // Now we can fill baseline and num bits
-    for (int i = 0; i < size; i++) {
+    for (size_t i = 0; i < size; i++) {
         u8 symbol = dtable->symbols[i];
         u16 next_state_desc = state_desc[symbol]++;
         // Fills in the table appropriately, next_state_desc increases by symbol
@@ -2355,4 +2301,3 @@ static void FSE_copy_dtable(FSE_dtable *const dst, const FSE_dtable *const src)
     memcpy(dst->new_state_base, src->new_state_base, size * sizeof(u16));
 }
 /******* END FSE PRIMITIVES ***************************************************/
-
diff --git a/doc/educational_decoder/zstd_decompress.h b/doc/educational_decoder/zstd_decompress.h
index 16f4da3eb92e..41009909bfa1 100644
--- a/doc/educational_decoder/zstd_decompress.h
+++ b/doc/educational_decoder/zstd_decompress.h
@@ -7,10 +7,52 @@
  * of patent rights can be found in the PATENTS file in the same directory.
  */
 
-size_t ZSTD_decompress(void *const dst, const size_t dst_len,
-                       const void *const src, const size_t src_len);
-size_t ZSTD_decompress_with_dict(void *const dst, const size_t dst_len,
-                                 const void *const src, const size_t src_len,
-                                 const void *const dict, const size_t dict_len);
-size_t ZSTD_get_decompressed_size(const void *const src, const size_t src_len);
+/******* EXPOSED TYPES ********************************************************/
+/*
+* Contains the parsed contents of a dictionary
+* This includes Huffman and FSE tables used for decoding and data on offsets
+*/
+typedef struct dictionary_s dictionary_t;
+/******* END EXPOSED TYPES ****************************************************/
 
+/******* DECOMPRESSION FUNCTIONS **********************************************/
+/// Zstandard decompression functions.
+/// `dst` must point to a space at least as large as the reconstructed output.
+size_t ZSTD_decompress(void *const dst, const size_t dst_len,
+                    const void *const src, const size_t src_len);
+
+/// If `dict != NULL` and `dict_len >= 8`, does the same thing as
+/// `ZSTD_decompress` but uses the provided dict
+size_t ZSTD_decompress_with_dict(void *const dst, const size_t dst_len,
+                              const void *const src, const size_t src_len,
+                              dictionary_t* parsed_dict);
+
+/// Get the decompressed size of an input stream so memory can be allocated in
+/// advance
+/// Returns -1 if the size can't be determined
+/// Assumes decompression of a single frame
+size_t ZSTD_get_decompressed_size(const void *const src, const size_t src_len);
+/******* END DECOMPRESSION FUNCTIONS ******************************************/
+
+/******* DICTIONARY MANAGEMENT ***********************************************/
+/*
+ * Return a valid dictionary_t pointer for use with dictionary initialization
+ * or decompression
+ */
+dictionary_t* create_dictionary();
+
+/*
+ * Parse a provided dictionary blob for use in decompression
+ * `src` -- must point to memory space representing the dictionary
+ * `src_len` -- must provide the dictionary size
+ * `dict` -- will contain the parsed contents of the dictionary and
+ *        can be used for decompression
+ */
+void parse_dictionary(dictionary_t *const dict, const void *src,
+                             size_t src_len);
+
+/*
+ * Free internal Huffman tables, FSE tables, and dictionary content
+ */
+void free_dictionary(dictionary_t *const dict);
+/******* END DICTIONARY MANAGEMENT *******************************************/
diff --git a/doc/zstd_compression_format.md b/doc/zstd_compression_format.md
index 1f212fea2305..aa86d1420eef 100644
--- a/doc/zstd_compression_format.md
+++ b/doc/zstd_compression_format.md
@@ -16,7 +16,7 @@ Distribution of this document is unlimited.
 
 ### Version
 
-0.2.5 (31/03/17)
+0.2.6 (19/08/17)
 
 
 Introduction
@@ -106,7 +106,7 @@ The structure of a single Zstandard frame is following:
 
 | `Magic_Number` | `Frame_Header` |`Data_Block`| [More data blocks] | [`Content_Checksum`] |
 |:--------------:|:--------------:|:----------:| ------------------ |:--------------------:|
-| 4 bytes        |  2-14 bytes    | n bytes    |                    |   0-4 bytes          |
+|  4 bytes       |  2-14 bytes    |  n bytes   |                    |     0-4 bytes        |
 
 __`Magic_Number`__
 
@@ -1249,23 +1249,30 @@ Consequently, a last byte of `0` is not possible.
 And the final-bit-flag itself is not part of the useful bitstream.
 Hence, the last byte contains between 0 and 7 useful bits.
 
-For example, if the literal sequence "0145" was encoded using the prefix codes above,
-it would be encoded as:
-```
-00000001 01110000
-```
+Starting from the end,
+it's possible to read the bitstream in a __little-endian__ fashion,
+keeping track of already used bits. Since the bitstream is encoded in reverse
+order, starting from the end read symbols in forward order.
+
+For example, if the literal sequence "0145" was encoded using above prefix code,
+it would be encoded (in reverse order) as:
 
 |Symbol  |   5  |   4  |  1 | 0 | Padding |
 |--------|------|------|----|---|---------|
-|Encoding|`0000`|`0001`|`01`|`1`| `10000` |
+|Encoding|`0000`|`0001`|`01`|`1`| `00001` |
 
-Starting from the end,
-it's possible to read the bitstream in a __little-endian__ fashion,
-keeping track of already used bits.  Since the bitstream is encoded in reverse
-order, by starting at the end the symbols can be read in forward order.
+Resulting in following 2-bytes bitstream :
+```
+00010000 00001101
+```
 
-Reading the last `Max_Number_of_Bits` bits,
-it's then possible to compare extracted value to decoding table,
+Here is an alternative representation with the symbol codes separated by underscore:
+```
+0001_0000 00001_1_01
+```
+
+Reading highest `Max_Number_of_Bits` bits,
+it's possible to compare extracted value to decoding table,
 determining the symbol to decode and number of bits to discard.
 
 The process continues up to reading the required number of symbols per stream.
@@ -1516,12 +1523,13 @@ to crosscheck that an implementation build its decoding tables correctly.
 
 Version changes
 ---------------
+- 0.2.6 : fixed an error in huffman example, by Ulrich Kunitz
 - 0.2.5 : minor typos and clarifications
 - 0.2.4 : section restructuring, by Sean Purcell
 - 0.2.3 : clarified several details, by Sean Purcell
 - 0.2.2 : added predefined codes, by Johannes Rudolph
 - 0.2.1 : clarify field names, by Przemyslaw Skibinski
-- 0.2.0 : numerous format adjustments for zstd v0.8
+- 0.2.0 : numerous format adjustments for zstd v0.8+
 - 0.1.2 : limit Huffman tree depth to 11 bits
 - 0.1.1 : reserved dictID ranges
 - 0.1.0 : initial release
diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html
index cd2b06dd8309..c166e7258d32 100644
--- a/doc/zstd_manual.html
+++ b/doc/zstd_manual.html
@@ -1,10 +1,10 @@
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-<title>zstd 1.3.0 Manual</title>
+<title>zstd 1.3.1 Manual</title>
 </head>
 <body>
-<h1>zstd 1.3.0 Manual</h1>
+<h1>zstd 1.3.1 Manual</h1>
 <hr>
 <a name="Contents"></a><h2>Contents</h2>
 <ol>
@@ -73,27 +73,41 @@
             or an errorCode if it fails (which can be tested using ZSTD_isError()). 
 </p></pre><BR>
 
-<pre><b>unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
-</b><p>  NOTE: This function is planned to be obsolete, in favor of ZSTD_getFrameContentSize().
-  ZSTD_getFrameContentSize() works the same way,
-  returning the decompressed size of a single frame,
-  but distinguishes empty frames from frames with an unknown size, or errors.
-
-  'src' is the start of a zstd compressed frame.
-  @return : content size to be decompressed, as a 64-bits value _if known_, 0 otherwise.
-   note 1 : decompressed size is an optional field, it may not be present, typically in streaming mode.
-            When `return==0`, data to decompress could be any size.
+<pre><b>#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
+#define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
+unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
+</b><p>  `src` should point to the start of a ZSTD encoded frame.
+  `srcSize` must be at least as large as the frame header.
+            hint : any size >= `ZSTD_frameHeaderSize_max` is large enough.
+  @return : - decompressed size of the frame in `src`, if known
+            - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
+            - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small)
+   note 1 : a 0 return value means the frame is valid but "empty".
+   note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode.
+            When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
             In which case, it's necessary to use streaming mode to decompress data.
-            Optionally, application can use ZSTD_decompress() while relying on implied limits.
-            (For example, data may be necessarily cut into blocks <= 16 KB).
-   note 2 : decompressed size is always present when compression is done with ZSTD_compress()
-   note 3 : decompressed size can be very large (64-bits value),
+            Optionally, application can rely on some implicit limit,
+            as ZSTD_decompress() only needs an upper bound of decompressed size.
+            (For example, data could be necessarily cut into blocks <= 16 KB).
+   note 3 : decompressed size is always present when compression is done with ZSTD_compress()
+   note 4 : decompressed size can be very large (64-bits value),
             potentially larger than what local system can handle as a single memory segment.
             In which case, it's necessary to use streaming mode to decompress data.
-   note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified.
-            Always ensure result fits within application's authorized limits.
+   note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified.
+            Always ensure return value fits within application's authorized limits.
             Each application can set its own limits.
-   note 5 : when `return==0`, if precise failure cause is needed, use ZSTD_getFrameHeader() to know more. 
+   note 6 : This function replaces ZSTD_getDecompressedSize() 
+</p></pre><BR>
+
+<pre><b>unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
+</b><p>  NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize().
+  Both functions work the same way,
+  but ZSTD_getDecompressedSize() blends
+  "empty", "unknown" and "error" results in the same return value (0),
+  while ZSTD_getFrameContentSize() distinguishes them.
+
+  'src' is the start of a zstd compressed frame.
+  @return : content size to be decompressed, as a 64-bits value _if known and not empty_, 0 otherwise. 
 </p></pre><BR>
 
 <h3>Helper functions</h3><pre></pre><b><pre>int         ZSTD_maxCLevel(void);               </b>/*!< maximum compression level available */<b>
@@ -298,8 +312,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
 <pre><b>size_t ZSTD_DStreamOutSize(void);   </b>/*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */<b>
 </b></pre><BR>
 <a name="Chapter10"></a><h2>START OF ADVANCED AND EXPERIMENTAL FUNCTIONS</h2><pre> The definitions in this section are considered experimental.
- They should never be used with a dynamic library, as they may change in the future.
- They are provided for advanced usages.
+ They should never be used with a dynamic library, as prototypes may change in the future.
+ They are provided for advanced scenarios.
  Use them only in association with static linking.
  
 <BR></pre>
@@ -330,13 +344,6 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
     ZSTD_frameParameters fParams;
 } ZSTD_parameters;
 </b></pre><BR>
-<pre><b>typedef struct {
-    unsigned long long frameContentSize;
-    size_t windowSize;
-    unsigned dictID;
-    unsigned checksumFlag;
-} ZSTD_frameHeader;
-</b></pre><BR>
 <h3>Custom memory allocation functions</h3><pre></pre><b><pre>typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size);
 typedef void  (*ZSTD_freeFunction) (void* opaque, void* address);
 typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem;
@@ -348,26 +355,15 @@ static const ZSTD_customMem ZSTD_defaultCMem = { NULL, NULL, NULL };
 <pre><b>size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize);
 </b><p>  `src` should point to the start of a ZSTD encoded frame or skippable frame
   `srcSize` must be at least as large as the frame
-  @return : the compressed size of the frame pointed to by `src`,
+  @return : the compressed size of the first frame starting at `src`,
             suitable to pass to `ZSTD_decompress` or similar,
-            or an error code if given invalid input. 
-</p></pre><BR>
-
-<pre><b>#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
-#define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
-unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
-</b><p>  `src` should point to the start of a ZSTD encoded frame.
-  `srcSize` must be at least as large as the frame header.
-       A value >= `ZSTD_frameHeaderSize_max` is guaranteed to be large enough.
-  @return : - decompressed size of the frame pointed to be `src` if known
-            - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
-            - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) 
+            or an error code if input is invalid 
 </p></pre><BR>
 
 <pre><b>unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
 </b><p>  `src` should point the start of a series of ZSTD encoded and/or skippable frames
   `srcSize` must be the _exact_ size of this series
-       (i.e. there should be a frame boundary exactly `srcSize` bytes after `src`)
+       (i.e. there should be a frame boundary exactly at `srcSize` bytes after `src`)
   @return : - decompressed size of all data in all successive frames
             - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN
             - if an error occurred: ZSTD_CONTENTSIZE_ERROR
@@ -375,8 +371,6 @@ unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
    note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode.
             When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
             In which case, it's necessary to use streaming mode to decompress data.
-            Optionally, application can still use ZSTD_decompress() while relying on implied limits.
-            (For example, data may be necessarily cut into blocks <= 16 KB).
    note 2 : decompressed size is always present when compression is done with ZSTD_compress()
    note 3 : decompressed size can be very large (64-bits value),
             potentially larger than what local system can handle as a single memory segment.
@@ -385,7 +379,7 @@ unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
             Always ensure result fits within application's authorized limits.
             Each application can set its own limits.
    note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to
-            read each contained frame header.  This is efficient as most of the data is skipped,
+            read each contained frame header.  This is fast as most of the data is skipped,
             however it does mean that all frame data must be present and valid. 
 </p></pre><BR>
 
@@ -483,14 +477,15 @@ size_t ZSTD_estimateDDictSize(size_t dictSize, unsigned byReference);
   It is important that dictBuffer outlives CDict, it must remain read accessible throughout the lifetime of CDict 
 </p></pre><BR>
 
-<pre><b>typedef enum { ZSTD_dm_auto=0,        </b>/* dictionary is "full" if it starts with ZSTD_MAGIC_DICTIONARY, rawContent otherwize */<b>
+<pre><b>typedef enum { ZSTD_dm_auto=0,        </b>/* dictionary is "full" if it starts with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */<b>
                ZSTD_dm_rawContent,    </b>/* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */<b>
                ZSTD_dm_fullDict       </b>/* refuses to load a dictionary if it does not respect Zstandard's specification */<b>
 } ZSTD_dictMode_e;
 </b></pre><BR>
 <pre><b>ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
                                       unsigned byReference, ZSTD_dictMode_e dictMode,
-                                      ZSTD_compressionParameters cParams, ZSTD_customMem customMem);
+                                      ZSTD_compressionParameters cParams,
+                                      ZSTD_customMem customMem);
 </b><p>  Create a ZSTD_CDict using external alloc and free, and customized compression parameters 
 </p></pre><BR>
 
@@ -760,7 +755,16 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned lo
   It also returns Frame Size as fparamsPtr->frameContentSize.
 <BR></pre>
 
-<h3>Buffer-less streaming decompression functions</h3><pre></pre><b><pre>size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize);   </b>/**< doesn't consume input */<b>
+<h3>Buffer-less streaming decompression functions</h3><pre></pre><b><pre>typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e;
+typedef struct {
+    unsigned long long frameContentSize; </b>/* ZSTD_CONTENTSIZE_UNKNOWN means this field is not available. 0 means "empty" */<b>
+    unsigned long long windowSize;       </b>/* can be very large, up to <= frameContentSize */<b>
+    ZSTD_frameType_e frameType;          </b>/* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */<b>
+    unsigned headerSize;
+    unsigned dictID;
+    unsigned checksumFlag;
+} ZSTD_frameHeader;
+size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize);   </b>/**< doesn't consume input */<b>
 size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
 size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
 size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
@@ -809,7 +813,9 @@ void   ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
                               * Special: value 0 means "do not change strategy". */
 
     </b>/* frame parameters */<b>
-    ZSTD_p_contentSizeFlag=200, </b>/* Content size is written into frame header _whenever known_ (default:1) */<b>
+    ZSTD_p_contentSizeFlag=200, </b>/* Content size is written into frame header _whenever known_ (default:1)<b>
+                              * note that content size must be known at the beginning,
+                              * it is sent using ZSTD_CCtx_setPledgedSrcSize() */
     ZSTD_p_checksumFlag,     </b>/* A 32-bits checksum of content is written at end of frame (default:0) */<b>
     ZSTD_p_dictIDFlag,       </b>/* When applicable, dictID of dictionary is provided in frame header (default:1) */<b>
 
diff --git a/lib/common/bitstream.h b/lib/common/bitstream.h
index 07b85026c95b..06121f21c5b3 100644
--- a/lib/common/bitstream.h
+++ b/lib/common/bitstream.h
@@ -80,9 +80,9 @@ extern "C" {
 *  bitStream encoding API (write forward)
 ********************************************/
 /* bitStream can mix input from multiple sources.
-*  A critical property of these streams is that they encode and decode in **reverse** direction.
-*  So the first bit sequence you add will be the last to be read, like a LIFO stack.
-*/
+ * A critical property of these streams is that they encode and decode in **reverse** direction.
+ * So the first bit sequence you add will be the last to be read, like a LIFO stack.
+ */
 typedef struct
 {
     size_t bitContainer;
@@ -203,7 +203,7 @@ static const unsigned BIT_mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F,
 /*! BIT_initCStream() :
  *  `dstCapacity` must be > sizeof(size_t)
  *  @return : 0 if success,
-              otherwise an error code (can be tested using ERR_isError() ) */
+ *            otherwise an error code (can be tested using ERR_isError()) */
 MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
                                   void* startPtr, size_t dstCapacity)
 {
@@ -217,8 +217,8 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
 }
 
 /*! BIT_addBits() :
-    can add up to 26 bits into `bitC`.
-    Does not check for register overflow ! */
+ *  can add up to 26 bits into `bitC`.
+ *  Note : does not check for register overflow ! */
 MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
                             size_t value, unsigned nbBits)
 {
@@ -268,7 +268,7 @@ MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
 
 /*! BIT_closeCStream() :
  *  @return : size of CStream, in bytes,
-              or 0 if it could not fit into dstBuffer */
+ *            or 0 if it could not fit into dstBuffer */
 MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
 {
     BIT_addBitsFast(bitC, 1, 1);   /* endMark */
@@ -279,14 +279,14 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
 
 
 /*-********************************************************
-* bitStream decoding
+*  bitStream decoding
 **********************************************************/
 /*! BIT_initDStream() :
-*   Initialize a BIT_DStream_t.
-*   `bitD` : a pointer to an already allocated BIT_DStream_t structure.
-*   `srcSize` must be the *exact* size of the bitStream, in bytes.
-*   @return : size of stream (== srcSize) or an errorCode if a problem is detected
-*/
+ *  Initialize a BIT_DStream_t.
+ * `bitD` : a pointer to an already allocated BIT_DStream_t structure.
+ * `srcSize` must be the *exact* size of the bitStream, in bytes.
+ * @return : size of stream (== srcSize), or an errorCode if a problem is detected
+ */
 MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
 {
     if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
@@ -305,29 +305,30 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
         bitD->bitContainer = *(const BYTE*)(bitD->start);
         switch(srcSize)
         {
-	    case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
-	            /* fall-through */
+        case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
+                /* fall-through */
 
-	    case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
-	            /* fall-through */
+        case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
+                /* fall-through */
 
-	    case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
-	            /* fall-through */
+        case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
+                /* fall-through */
 
-	    case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
-	            /* fall-through */
+        case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
+                /* fall-through */
 
-	    case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
-	            /* fall-through */
+        case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
+                /* fall-through */
 
-	    case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8;
-	            /* fall-through */
+        case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8;
+                /* fall-through */
 
-            default: break;
+        default: break;
+        }
+        {   BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
+            bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
+            if (lastByte == 0) return ERROR(corruption_detected);  /* endMark not present */
         }
-        { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
-          bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
-          if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
         bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
     }
 
@@ -363,9 +364,8 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
  *  local register is not modified.
  *  On 32-bits, maxNbBits==24.
  *  On 64-bits, maxNbBits==56.
- *  @return : value extracted
- */
- MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
+ * @return : value extracted */
+MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
 {
 #if defined(__BMI__) && defined(__GNUC__)   /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */
     return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
@@ -392,8 +392,7 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
 /*! BIT_readBits() :
  *  Read (consume) next n bits from local register and update.
  *  Pay attention to not read more than nbBits contained into local register.
- *  @return : extracted value.
- */
+ * @return : extracted value. */
 MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
 {
     size_t const value = BIT_lookBits(bitD, nbBits);
@@ -402,7 +401,7 @@ MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
 }
 
 /*! BIT_readBitsFast() :
-*   unsafe version; only works only if nbBits >= 1 */
+ *  unsafe version; only works only if nbBits >= 1 */
 MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
 {
     size_t const value = BIT_lookBitsFast(bitD, nbBits);
@@ -412,10 +411,10 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
 }
 
 /*! BIT_reloadDStream() :
-*   Refill `bitD` from buffer previously set in BIT_initDStream() .
-*   This function is safe, it guarantees it will not read beyond src buffer.
-*   @return : status of `BIT_DStream_t` internal register.
-              if status == BIT_DStream_unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */
+ *  Refill `bitD` from buffer previously set in BIT_initDStream() .
+ *  This function is safe, it guarantees it will not read beyond src buffer.
+ * @return : status of `BIT_DStream_t` internal register.
+ *           when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
 MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
 {
     if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* overflow detected, like end of stream */
@@ -446,8 +445,8 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
 }
 
 /*! BIT_endOfDStream() :
-*   @return Tells if DStream has exactly reached its end (all bits consumed).
-*/
+ * @return : 1 if DStream has _exactly_ reached its end (all bits consumed).
+ */
 MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
 {
     return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
diff --git a/lib/common/error_private.c b/lib/common/error_private.c
index 2d752cd23a72..b5b14b509cf1 100644
--- a/lib/common/error_private.c
+++ b/lib/common/error_private.c
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 /* The purpose of this file is to have a single list of error strings embedded in binary */
@@ -20,19 +20,17 @@ const char* ERR_getErrorString(ERR_enum code)
     case PREFIX(GENERIC):  return "Error (generic)";
     case PREFIX(prefix_unknown): return "Unknown frame descriptor";
     case PREFIX(version_unsupported): return "Version not supported";
-    case PREFIX(parameter_unknown): return "Unknown parameter type";
     case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
-    case PREFIX(frameParameter_unsupportedBy32bits): return "Frame parameter unsupported in 32-bits mode";
     case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding";
-    case PREFIX(compressionParameter_unsupported): return "Compression parameter is not supported";
-    case PREFIX(compressionParameter_outOfBound): return "Compression parameter is out of bound";
+    case PREFIX(corruption_detected): return "Corrupted block detected";
+    case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
+    case PREFIX(parameter_unsupported): return "Unsupported parameter";
+    case PREFIX(parameter_outOfBound): return "Parameter is out of bound";
     case PREFIX(init_missing): return "Context should be init first";
     case PREFIX(memory_allocation): return "Allocation error : not enough memory";
     case PREFIX(stage_wrong): return "Operation not authorized at current processing stage";
     case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
     case PREFIX(srcSize_wrong): return "Src size is incorrect";
-    case PREFIX(corruption_detected): return "Corrupted block detected";
-    case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
     case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
     case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
     case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
diff --git a/lib/common/error_private.h b/lib/common/error_private.h
index 1bc2e4954818..9dd9a87cfac8 100644
--- a/lib/common/error_private.h
+++ b/lib/common/error_private.h
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 /* Note : this module is expected to remain private, do not expose it */
diff --git a/lib/common/fse.h b/lib/common/fse.h
index 6d5d41def19b..1c44f8375078 100644
--- a/lib/common/fse.h
+++ b/lib/common/fse.h
@@ -31,13 +31,14 @@
    You can contact the author at :
    - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
 ****************************************************************** */
-#ifndef FSE_H
-#define FSE_H
 
 #if defined (__cplusplus)
 extern "C" {
 #endif
 
+#ifndef FSE_H
+#define FSE_H
+
 
 /*-*****************************************
 *  Dependencies
@@ -297,8 +298,10 @@ FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<
 If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
 */
 
+#endif  /* FSE_H */
 
-#ifdef FSE_STATIC_LINKING_ONLY
+#if defined(FSE_STATIC_LINKING_ONLY) && !defined(FSE_H_FSE_STATIC_LINKING_ONLY)
+#define FSE_H_FSE_STATIC_LINKING_ONLY
 
 /* *** Dependency *** */
 #include "bitstream.h"
@@ -381,6 +384,11 @@ size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
 size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog);
 /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */
 
+typedef enum {
+   FSE_repeat_none,  /**< Cannot use the previous table */
+   FSE_repeat_check, /**< Can use the previous table but it must be checked */
+   FSE_repeat_valid  /**< Can use the previous table and it is asumed to be valid */
+ } FSE_repeat;
 
 /* *****************************************
 *  FSE symbol compression API
@@ -694,5 +702,3 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
 #if defined (__cplusplus)
 }
 #endif
-
-#endif  /* FSE_H */
diff --git a/lib/common/fse_decompress.c b/lib/common/fse_decompress.c
index 8474a4c079b2..8e3f0035f69a 100644
--- a/lib/common/fse_decompress.c
+++ b/lib/common/fse_decompress.c
@@ -33,35 +33,16 @@
 ****************************************************************** */
 
 
-/* **************************************************************
-*  Compiler specifics
-****************************************************************/
-#ifdef _MSC_VER    /* Visual Studio */
-#  define FORCE_INLINE static __forceinline
-#  include <intrin.h>                    /* For Visual 2005 */
-#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
-#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
-#else
-#  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
-#    ifdef __GNUC__
-#      define FORCE_INLINE static inline __attribute__((always_inline))
-#    else
-#      define FORCE_INLINE static inline
-#    endif
-#  else
-#    define FORCE_INLINE static
-#  endif /* __STDC_VERSION__ */
-#endif
-
-
 /* **************************************************************
 *  Includes
 ****************************************************************/
 #include <stdlib.h>     /* malloc, free, qsort */
 #include <string.h>     /* memcpy, memset */
 #include "bitstream.h"
+#include "compiler.h"
 #define FSE_STATIC_LINKING_ONLY
 #include "fse.h"
+#include "error_private.h"
 
 
 /* **************************************************************
@@ -216,7 +197,7 @@ size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
     return 0;
 }
 
-FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
+FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
           void* dst, size_t maxDstSize,
     const void* cSrc, size_t cSrcSize,
     const FSE_DTable* dt, const unsigned fast)
diff --git a/lib/common/huf.h b/lib/common/huf.h
index dabd359915a7..2b3015a84c19 100644
--- a/lib/common/huf.h
+++ b/lib/common/huf.h
@@ -31,13 +31,13 @@
    You can contact the author at :
    - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
 ****************************************************************** */
-#ifndef HUF_H_298734234
-#define HUF_H_298734234
 
 #if defined (__cplusplus)
 extern "C" {
 #endif
 
+#ifndef HUF_H_298734234
+#define HUF_H_298734234
 
 /* *** Dependencies *** */
 #include <stddef.h>    /* size_t */
@@ -124,6 +124,7 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, const
 #define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10)
 #define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
 
+#endif   /* HUF_H_298734234 */
 
 /* ******************************************************************
  *  WARNING !!
@@ -132,7 +133,8 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, const
  *  because they are not guaranteed to remain stable in the future.
  *  Only consider them in association with static linking.
  *******************************************************************/
-#ifdef HUF_STATIC_LINKING_ONLY
+#if defined(HUF_STATIC_LINKING_ONLY) && !defined(HUF_H_HUF_STATIC_LINKING_ONLY)
+#define HUF_H_HUF_STATIC_LINKING_ONLY
 
 /* *** Dependencies *** */
 #include "mem.h"   /* U32 */
@@ -295,9 +297,6 @@ size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* c
 
 #endif /* HUF_STATIC_LINKING_ONLY */
 
-
 #if defined (__cplusplus)
 }
 #endif
-
-#endif   /* HUF_H_298734234 */
diff --git a/lib/common/mem.h b/lib/common/mem.h
index b0e5bf60b43b..df85404fb869 100644
--- a/lib/common/mem.h
+++ b/lib/common/mem.h
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 #ifndef MEM_H_MODULE
@@ -110,7 +110,7 @@ Only use if no other choice to achieve best performance on target platform */
 MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
 MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
 MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
-MEM_STATIC U64 MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; }
+MEM_STATIC size_t MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; }
 
 MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
 MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
@@ -131,7 +131,7 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
 MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
 MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
 MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
-MEM_STATIC U64 MEM_readST(const void* ptr) { return ((const unalign*)ptr)->st; }
+MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalign*)ptr)->st; }
 
 MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
 MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
diff --git a/lib/common/pool.c b/lib/common/pool.c
index 749fa4f2f7b4..a227044f7f1c 100644
--- a/lib/common/pool.c
+++ b/lib/common/pool.c
@@ -1,10 +1,10 @@
-/**
- * Copyright (c) 2016-present, Facebook, Inc.
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 
@@ -39,6 +39,12 @@ struct POOL_ctx_s {
     size_t queueHead;
     size_t queueTail;
     size_t queueSize;
+
+    /* The number of threads working on jobs */
+    size_t numThreadsBusy;
+    /* Indicates if the queue is empty */
+    int queueEmpty;
+
     /* The mutex protects the queue */
     pthread_mutex_t queueMutex;
     /* Condition variable for pushers to wait on when the queue is full */
@@ -60,30 +66,41 @@ static void* POOL_thread(void* opaque) {
     for (;;) {
         /* Lock the mutex and wait for a non-empty queue or until shutdown */
         pthread_mutex_lock(&ctx->queueMutex);
-        while (ctx->queueHead == ctx->queueTail && !ctx->shutdown) {
+
+        while (ctx->queueEmpty && !ctx->shutdown) {
             pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex);
         }
         /* empty => shutting down: so stop */
-        if (ctx->queueHead == ctx->queueTail) {
+        if (ctx->queueEmpty) {
             pthread_mutex_unlock(&ctx->queueMutex);
             return opaque;
         }
         /* Pop a job off the queue */
         {   POOL_job const job = ctx->queue[ctx->queueHead];
             ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize;
+            ctx->numThreadsBusy++;
+            ctx->queueEmpty = ctx->queueHead == ctx->queueTail;
             /* Unlock the mutex, signal a pusher, and run the job */
             pthread_mutex_unlock(&ctx->queueMutex);
             pthread_cond_signal(&ctx->queuePushCond);
+
             job.function(job.opaque);
-        }
-    }
+
+            /* If the intended queue size was 0, signal after finishing job */
+            if (ctx->queueSize == 1) {
+                pthread_mutex_lock(&ctx->queueMutex);
+                ctx->numThreadsBusy--;
+                pthread_mutex_unlock(&ctx->queueMutex);
+                pthread_cond_signal(&ctx->queuePushCond);
+        }   }
+    }  /* for (;;) */
     /* Unreachable */
 }
 
 POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) {
     POOL_ctx *ctx;
     /* Check the parameters */
-    if (!numThreads || !queueSize) { return NULL; }
+    if (!numThreads) { return NULL; }
     /* Allocate the context and zero initialize */
     ctx = (POOL_ctx *)calloc(1, sizeof(POOL_ctx));
     if (!ctx) { return NULL; }
@@ -92,15 +109,17 @@ POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) {
      * and full queues.
      */
     ctx->queueSize = queueSize + 1;
-    ctx->queue = (POOL_job *)malloc(ctx->queueSize * sizeof(POOL_job));
+    ctx->queue = (POOL_job*) malloc(ctx->queueSize * sizeof(POOL_job));
     ctx->queueHead = 0;
     ctx->queueTail = 0;
-    pthread_mutex_init(&ctx->queueMutex, NULL);
-    pthread_cond_init(&ctx->queuePushCond, NULL);
-    pthread_cond_init(&ctx->queuePopCond, NULL);
+    ctx->numThreadsBusy = 0;
+    ctx->queueEmpty = 1;
+    (void)pthread_mutex_init(&ctx->queueMutex, NULL);
+    (void)pthread_cond_init(&ctx->queuePushCond, NULL);
+    (void)pthread_cond_init(&ctx->queuePopCond, NULL);
     ctx->shutdown = 0;
     /* Allocate space for the thread handles */
-    ctx->threads = (pthread_t *)malloc(numThreads * sizeof(pthread_t));
+    ctx->threads = (pthread_t*)malloc(numThreads * sizeof(pthread_t));
     ctx->numThreads = 0;
     /* Check for errors */
     if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; }
@@ -153,22 +172,37 @@ size_t POOL_sizeof(POOL_ctx *ctx) {
         + ctx->numThreads * sizeof(pthread_t);
 }
 
-void POOL_add(void *ctxVoid, POOL_function function, void *opaque) {
-    POOL_ctx *ctx = (POOL_ctx *)ctxVoid;
+/**
+ * Returns 1 if the queue is full and 0 otherwise.
+ *
+ * If the queueSize is 1 (the pool was created with an intended queueSize of 0),
+ * then a queue is empty if there is a thread free and no job is waiting.
+ */
+static int isQueueFull(POOL_ctx const* ctx) {
+    if (ctx->queueSize > 1) {
+        return ctx->queueHead == ((ctx->queueTail + 1) % ctx->queueSize);
+    } else {
+        return ctx->numThreadsBusy == ctx->numThreads ||
+               !ctx->queueEmpty;
+    }
+}
+
+void POOL_add(void* ctxVoid, POOL_function function, void *opaque) {
+    POOL_ctx* const ctx = (POOL_ctx*)ctxVoid;
     if (!ctx) { return; }
 
     pthread_mutex_lock(&ctx->queueMutex);
     {   POOL_job const job = {function, opaque};
+
         /* Wait until there is space in the queue for the new job */
-        size_t newTail = (ctx->queueTail + 1) % ctx->queueSize;
-        while (ctx->queueHead == newTail && !ctx->shutdown) {
+        while (isQueueFull(ctx) && !ctx->shutdown) {
           pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex);
-          newTail = (ctx->queueTail + 1) % ctx->queueSize;
         }
         /* The queue is still going => there is space */
         if (!ctx->shutdown) {
+            ctx->queueEmpty = 0;
             ctx->queue[ctx->queueTail] = job;
-            ctx->queueTail = newTail;
+            ctx->queueTail = (ctx->queueTail + 1) % ctx->queueSize;
         }
     }
     pthread_mutex_unlock(&ctx->queueMutex);
@@ -183,22 +217,22 @@ struct POOL_ctx_s {
   int data;
 };
 
-POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) {
+POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
   (void)numThreads;
   (void)queueSize;
-  return (POOL_ctx *)malloc(sizeof(POOL_ctx));
+  return (POOL_ctx*)malloc(sizeof(POOL_ctx));
 }
 
-void POOL_free(POOL_ctx *ctx) {
-  if (ctx) free(ctx);
+void POOL_free(POOL_ctx* ctx) {
+    free(ctx);
 }
 
-void POOL_add(void *ctx, POOL_function function, void *opaque) {
+void POOL_add(void* ctx, POOL_function function, void* opaque) {
   (void)ctx;
   function(opaque);
 }
 
-size_t POOL_sizeof(POOL_ctx *ctx) {
+size_t POOL_sizeof(POOL_ctx* ctx) {
     if (ctx==NULL) return 0;  /* supports sizeof NULL */
     return sizeof(*ctx);
 }
diff --git a/lib/common/pool.h b/lib/common/pool.h
index 386cd674b7c0..264c5c9ca7ea 100644
--- a/lib/common/pool.h
+++ b/lib/common/pool.h
@@ -1,11 +1,12 @@
-/**
- * Copyright (c) 2016-present, Facebook, Inc.
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
+
 #ifndef POOL_H
 #define POOL_H
 
@@ -19,11 +20,10 @@ extern "C" {
 typedef struct POOL_ctx_s POOL_ctx;
 
 /*! POOL_create() :
-    Create a thread pool with at most `numThreads` threads.
-    `numThreads` must be at least 1.
-    The maximum number of queued jobs before blocking is `queueSize`.
-    `queueSize` must be at least 1.
-    @return : The POOL_ctx pointer on success else NULL.
+ *  Create a thread pool with at most `numThreads` threads.
+ * `numThreads` must be at least 1.
+ *  The maximum number of queued jobs before blocking is `queueSize`.
+ * @return : POOL_ctx pointer on success, else NULL.
 */
 POOL_ctx *POOL_create(size_t numThreads, size_t queueSize);
 
diff --git a/lib/common/threading.h b/lib/common/threading.h
index c0086139ea33..ab09977a86dc 100644
--- a/lib/common/threading.h
+++ b/lib/common/threading.h
@@ -1,4 +1,3 @@
-
 /**
  * Copyright (c) 2016 Tino Reichardt
  * All rights reserved.
@@ -42,14 +41,14 @@ extern "C" {
 
 /* mutex */
 #define pthread_mutex_t           CRITICAL_SECTION
-#define pthread_mutex_init(a,b)   InitializeCriticalSection((a))
+#define pthread_mutex_init(a,b)   (InitializeCriticalSection((a)), 0)
 #define pthread_mutex_destroy(a)  DeleteCriticalSection((a))
 #define pthread_mutex_lock(a)     EnterCriticalSection((a))
 #define pthread_mutex_unlock(a)   LeaveCriticalSection((a))
 
 /* condition variable */
 #define pthread_cond_t             CONDITION_VARIABLE
-#define pthread_cond_init(a, b)    InitializeConditionVariable((a))
+#define pthread_cond_init(a, b)    (InitializeConditionVariable((a)), 0)
 #define pthread_cond_destroy(a)    /* No delete */
 #define pthread_cond_wait(a, b)    SleepConditionVariableCS((a), (b), INFINITE)
 #define pthread_cond_signal(a)     WakeConditionVariable((a))
@@ -80,14 +79,14 @@ int _pthread_join(pthread_t* thread, void** value_ptr);
 #else  /* ZSTD_MULTITHREAD not defined */
 /* No multithreading support */
 
-#define pthread_mutex_t int   /* #define rather than typedef, as sometimes pthread support is implicit, resulting in duplicated symbols */
-#define pthread_mutex_init(a,b)
+#define pthread_mutex_t int   /* #define rather than typedef, because sometimes pthread support is implicit, resulting in duplicated symbols */
+#define pthread_mutex_init(a,b)    ((void)a, 0)
 #define pthread_mutex_destroy(a)
 #define pthread_mutex_lock(a)
 #define pthread_mutex_unlock(a)
 
 #define pthread_cond_t int
-#define pthread_cond_init(a,b)
+#define pthread_cond_init(a,b)     ((void)a, 0)
 #define pthread_cond_destroy(a)
 #define pthread_cond_wait(a,b)
 #define pthread_cond_signal(a)
diff --git a/lib/common/xxhash.c b/lib/common/xxhash.c
index eb44222c5fce..9d9c0e963cbf 100644
--- a/lib/common/xxhash.c
+++ b/lib/common/xxhash.c
@@ -113,19 +113,25 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcp
 /* *************************************
 *  Compiler Specific Options
 ***************************************/
-#ifdef _MSC_VER    /* Visual Studio */
-#  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
-#  define FORCE_INLINE static __forceinline
+#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#  define INLINE_KEYWORD inline
 #else
-#  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
-#    ifdef __GNUC__
-#      define FORCE_INLINE static inline __attribute__((always_inline))
-#    else
-#      define FORCE_INLINE static inline
-#    endif
-#  else
-#    define FORCE_INLINE static
-#  endif /* __STDC_VERSION__ */
+#  define INLINE_KEYWORD
+#endif
+
+#if defined(__GNUC__)
+#  define FORCE_INLINE_ATTR __attribute__((always_inline))
+#elif defined(_MSC_VER)
+#  define FORCE_INLINE_ATTR __forceinline
+#else
+#  define FORCE_INLINE_ATTR
+#endif
+
+#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
+
+
+#ifdef _MSC_VER
+#  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
 #endif
 
 
@@ -248,7 +254,7 @@ typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
 *****************************/
 typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
 
-FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+FORCE_INLINE_TEMPLATE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
 {
     if (align==XXH_unaligned)
         return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
@@ -256,7 +262,7 @@ FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_a
         return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
 }
 
-FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
+FORCE_INLINE_TEMPLATE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
 {
     return XXH_readLE32_align(ptr, endian, XXH_unaligned);
 }
@@ -266,7 +272,7 @@ static U32 XXH_readBE32(const void* ptr)
     return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
 }
 
-FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+FORCE_INLINE_TEMPLATE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
 {
     if (align==XXH_unaligned)
         return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
@@ -274,7 +280,7 @@ FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_a
         return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
 }
 
-FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
+FORCE_INLINE_TEMPLATE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
 {
     return XXH_readLE64_align(ptr, endian, XXH_unaligned);
 }
@@ -335,7 +341,7 @@ static U32 XXH32_round(U32 seed, U32 input)
     return seed;
 }
 
-FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
+FORCE_INLINE_TEMPLATE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
 {
     const BYTE* p = (const BYTE*)input;
     const BYTE* bEnd = p + len;
@@ -435,7 +441,7 @@ static U64 XXH64_mergeRound(U64 acc, U64 val)
     return acc;
 }
 
-FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
+FORCE_INLINE_TEMPLATE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
 {
     const BYTE* p = (const BYTE*)input;
     const BYTE* const bEnd = p + len;
@@ -584,7 +590,7 @@ XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long
 }
 
 
-FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian)
+FORCE_INLINE_TEMPLATE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian)
 {
     const BYTE* p = (const BYTE*)input;
     const BYTE* const bEnd = p + len;
@@ -654,7 +660,7 @@ XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void*
 
 
 
-FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian)
+FORCE_INLINE_TEMPLATE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian)
 {
     const BYTE * p = (const BYTE*)state->mem32;
     const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize;
@@ -704,7 +710,7 @@ XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in)
 
 /* **** XXH64 **** */
 
-FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian)
+FORCE_INLINE_TEMPLATE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian)
 {
     const BYTE* p = (const BYTE*)input;
     const BYTE* const bEnd = p + len;
@@ -771,7 +777,7 @@ XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void*
 
 
 
-FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian)
+FORCE_INLINE_TEMPLATE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian)
 {
     const BYTE * p = (const BYTE*)state->mem64;
     const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize;
diff --git a/lib/common/zstd_common.c b/lib/common/zstd_common.c
index f68167238158..08384cabf59b 100644
--- a/lib/common/zstd_common.c
+++ b/lib/common/zstd_common.c
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 
diff --git a/lib/common/zstd_errors.h b/lib/common/zstd_errors.h
index 19f1597aa340..a69387b714a8 100644
--- a/lib/common/zstd_errors.h
+++ b/lib/common/zstd_errors.h
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 #ifndef ZSTD_ERRORS_H_398273423
@@ -37,43 +37,41 @@ extern "C" {
 /*-****************************************
  *  error codes list
  *  note : this API is still considered unstable
- *         it should not be used with a dynamic library
+ *         and shall not be used with a dynamic library.
  *         only static linking is allowed
  ******************************************/
 typedef enum {
-  ZSTD_error_no_error,
-  ZSTD_error_GENERIC,
-  ZSTD_error_prefix_unknown,
-  ZSTD_error_version_unsupported,
-  ZSTD_error_parameter_unknown,
-  ZSTD_error_frameParameter_unsupported,
-  ZSTD_error_frameParameter_unsupportedBy32bits,
-  ZSTD_error_frameParameter_windowTooLarge,
-  ZSTD_error_compressionParameter_unsupported,
-  ZSTD_error_compressionParameter_outOfBound,
-  ZSTD_error_init_missing,
-  ZSTD_error_memory_allocation,
-  ZSTD_error_stage_wrong,
-  ZSTD_error_dstSize_tooSmall,
-  ZSTD_error_srcSize_wrong,
-  ZSTD_error_corruption_detected,
-  ZSTD_error_checksum_wrong,
-  ZSTD_error_tableLog_tooLarge,
-  ZSTD_error_maxSymbolValue_tooLarge,
-  ZSTD_error_maxSymbolValue_tooSmall,
-  ZSTD_error_dictionary_corrupted,
-  ZSTD_error_dictionary_wrong,
-  ZSTD_error_dictionaryCreation_failed,
-  ZSTD_error_frameIndex_tooLarge,
-  ZSTD_error_seekableIO,
-  ZSTD_error_maxCode
+  ZSTD_error_no_error = 0,
+  ZSTD_error_GENERIC  = 1,
+  ZSTD_error_prefix_unknown                = 10,
+  ZSTD_error_version_unsupported           = 12,
+  ZSTD_error_frameParameter_unsupported    = 14,
+  ZSTD_error_frameParameter_windowTooLarge = 16,
+  ZSTD_error_corruption_detected = 20,
+  ZSTD_error_checksum_wrong      = 22,
+  ZSTD_error_dictionary_corrupted      = 30,
+  ZSTD_error_dictionary_wrong          = 32,
+  ZSTD_error_dictionaryCreation_failed = 34,
+  ZSTD_error_parameter_unsupported   = 40,
+  ZSTD_error_parameter_outOfBound    = 42,
+  ZSTD_error_tableLog_tooLarge       = 44,
+  ZSTD_error_maxSymbolValue_tooLarge = 46,
+  ZSTD_error_maxSymbolValue_tooSmall = 48,
+  ZSTD_error_stage_wrong       = 60,
+  ZSTD_error_init_missing      = 62,
+  ZSTD_error_memory_allocation = 64,
+  ZSTD_error_dstSize_tooSmall = 70,
+  ZSTD_error_srcSize_wrong    = 72,
+  ZSTD_error_frameIndex_tooLarge = 100,
+  ZSTD_error_seekableIO          = 102,
+  ZSTD_error_maxCode = 120  /* never EVER use this value directly, it may change in future versions! Use ZSTD_isError() instead */
 } ZSTD_ErrorCode;
 
 /*! ZSTD_getErrorCode() :
     convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
     which can be used to compare with enum list published above */
 ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
-ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code);
+ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code);   /**< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */
 
 
 #if defined (__cplusplus)
diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h
index f2c4e6249fb8..2610528608d4 100644
--- a/lib/common/zstd_internal.h
+++ b/lib/common/zstd_internal.h
@@ -1,55 +1,28 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 #ifndef ZSTD_CCOMMON_H_MODULE
 #define ZSTD_CCOMMON_H_MODULE
 
-/*-*******************************************************
-*  Compiler specifics
-*********************************************************/
-#ifdef _MSC_VER    /* Visual Studio */
-#  define FORCE_INLINE static __forceinline
-#  include <intrin.h>                    /* For Visual 2005 */
-#  pragma warning(disable : 4100)        /* disable: C4100: unreferenced formal parameter */
-#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
-#  pragma warning(disable : 4204)        /* disable: C4204: non-constant aggregate initializer */
-#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
-#else
-#  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
-#    ifdef __GNUC__
-#      define FORCE_INLINE static inline __attribute__((always_inline))
-#    else
-#      define FORCE_INLINE static inline
-#    endif
-#  else
-#    define FORCE_INLINE static
-#  endif /* __STDC_VERSION__ */
-#endif
-
-#ifdef _MSC_VER
-#  define FORCE_NOINLINE static __declspec(noinline)
-#else
-#  ifdef __GNUC__
-#    define FORCE_NOINLINE static __attribute__((__noinline__))
-#  else
-#    define FORCE_NOINLINE static
-#  endif
-#endif
-
 
 /*-*************************************
 *  Dependencies
 ***************************************/
+#include "compiler.h"
 #include "mem.h"
 #include "error_private.h"
 #define ZSTD_STATIC_LINKING_ONLY
 #include "zstd.h"
+#define FSE_STATIC_LINKING_ONLY
+#include "fse.h"
+#define HUF_STATIC_LINKING_ONLY
+#include "huf.h"
 #ifndef XXH_STATIC_LINKING_ONLY
 #  define XXH_STATIC_LINKING_ONLY  /* XXH64_state_t */
 #endif
@@ -211,20 +184,6 @@ MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd)   /* s
 *********************************************/
 typedef struct ZSTD_stats_s ZSTD_stats_t;
 
-typedef struct {
-    U32 off;
-    U32 len;
-} ZSTD_match_t;
-
-typedef struct {
-    U32 price;
-    U32 off;
-    U32 mlen;
-    U32 litlen;
-    U32 rep[ZSTD_REP_NUM];
-} ZSTD_optimal_t;
-
-
 typedef struct seqDef_s {
     U32 offset;
     U16 litLength;
@@ -242,13 +201,31 @@ typedef struct {
     BYTE* ofCode;
     U32   longLengthID;   /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
     U32   longLengthPos;
-    /* opt */
-    ZSTD_optimal_t* priceTable;
-    ZSTD_match_t* matchTable;
-    U32* matchLengthFreq;
-    U32* litLengthFreq;
+    U32   rep[ZSTD_REP_NUM];
+    U32   repToConfirm[ZSTD_REP_NUM];
+} seqStore_t;
+
+typedef struct {
+    U32 off;
+    U32 len;
+} ZSTD_match_t;
+
+typedef struct {
+    U32 price;
+    U32 off;
+    U32 mlen;
+    U32 litlen;
+    U32 rep[ZSTD_REP_NUM];
+} ZSTD_optimal_t;
+
+typedef struct {
     U32* litFreq;
+    U32* litLengthFreq;
+    U32* matchLengthFreq;
     U32* offCodeFreq;
+    ZSTD_match_t* matchTable;
+    ZSTD_optimal_t* priceTable;
+
     U32  matchLengthSum;
     U32  matchSum;
     U32  litLengthSum;
@@ -264,7 +241,19 @@ typedef struct {
     U32  cachedPrice;
     U32  cachedLitLength;
     const BYTE* cachedLiterals;
-} seqStore_t;
+} optState_t;
+
+typedef struct {
+    U32 hufCTable[HUF_CTABLE_SIZE_U32(255)];
+    FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
+    FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
+    FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
+    U32 workspace[HUF_WORKSPACE_SIZE_U32];
+    HUF_repeat hufCTable_repeatMode;
+    FSE_repeat offcode_repeatMode;
+    FSE_repeat matchlength_repeatMode;
+    FSE_repeat litlength_repeatMode;
+} ZSTD_entropyCTables_t;
 
 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);
 void ZSTD_seqToCodes(const seqStore_t* seqStorePtr);
@@ -331,4 +320,16 @@ size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
 ZSTD_parameters ZSTD_getParamsFromCDict(const ZSTD_CDict* cdict);
 
 
+typedef struct {
+    blockType_e blockType;
+    U32 lastBlock;
+    U32 origSize;
+} blockProperties_t;
+
+/*! ZSTD_getcBlockSize() :
+*   Provides the size of compressed block from block header `src` */
+size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
+                          blockProperties_t* bpPtr);
+
+
 #endif   /* ZSTD_CCOMMON_H_MODULE */
diff --git a/lib/compress/fse_compress.c b/lib/compress/fse_compress.c
index 26e8052ddcce..cc9fa73514ad 100644
--- a/lib/compress/fse_compress.c
+++ b/lib/compress/fse_compress.c
@@ -32,27 +32,6 @@
     - Public forum : https://groups.google.com/forum/#!forum/lz4c
 ****************************************************************** */
 
-/* **************************************************************
-*  Compiler specifics
-****************************************************************/
-#ifdef _MSC_VER    /* Visual Studio */
-#  define FORCE_INLINE static __forceinline
-#  include <intrin.h>                    /* For Visual 2005 */
-#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
-#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
-#else
-#  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
-#    ifdef __GNUC__
-#      define FORCE_INLINE static inline __attribute__((always_inline))
-#    else
-#      define FORCE_INLINE static inline
-#    endif
-#  else
-#    define FORCE_INLINE static
-#  endif /* __STDC_VERSION__ */
-#endif
-
-
 /* **************************************************************
 *  Includes
 ****************************************************************/
@@ -60,13 +39,16 @@
 #include <string.h>     /* memcpy, memset */
 #include <stdio.h>      /* printf (debug) */
 #include "bitstream.h"
+#include "compiler.h"
 #define FSE_STATIC_LINKING_ONLY
 #include "fse.h"
+#include "error_private.h"
 
 
 /* **************************************************************
 *  Error Management
 ****************************************************************/
+#define FSE_isError ERR_isError
 #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
 
 
@@ -781,7 +763,7 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
 
 size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
 
-#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return f
+#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
 #define CHECK_F(f)   { CHECK_V_F(_var_err__, f); }
 
 /* FSE_compress_wksp() :
diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c
index 7af0789a9c58..2a47c1820526 100644
--- a/lib/compress/huf_compress.c
+++ b/lib/compress/huf_compress.c
@@ -50,13 +50,15 @@
 #include "fse.h"        /* header compression */
 #define HUF_STATIC_LINKING_ONLY
 #include "huf.h"
+#include "error_private.h"
 
 
 /* **************************************************************
 *  Error Management
 ****************************************************************/
+#define HUF_isError ERR_isError
 #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
-#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return f
+#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
 #define CHECK_F(f)   { CHECK_V_F(_var_err__, f); }
 
 
@@ -436,7 +438,7 @@ static void HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt*
 
 size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
 
-#define HUF_FLUSHBITS(s)  (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
+#define HUF_FLUSHBITS(s)  BIT_flushBits(s)
 
 #define HUF_FLUSHBITS_1(stream) \
     if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
@@ -451,7 +453,6 @@ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, si
     BYTE* const oend = ostart + dstSize;
     BYTE* op = ostart;
     size_t n;
-    const unsigned fast = (dstSize >= HUF_BLOCKBOUND(srcSize));
     BIT_CStream_t bitC;
 
     /* init */
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index 9300357f2d38..0322c03eb316 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 
@@ -36,13 +36,6 @@ static const U32 g_searchStrength = 8;   /* control skip over incompressible dat
 #define HASH_READ_SIZE 8
 typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
 
-/* entropy tables always have same size */
-static size_t const hufCTable_size = HUF_CTABLE_SIZE(255);
-static size_t const litlengthCTable_size = FSE_CTABLE_SIZE(LLFSELog, MaxLL);
-static size_t const offcodeCTable_size = FSE_CTABLE_SIZE(OffFSELog, MaxOff);
-static size_t const matchlengthCTable_size = FSE_CTABLE_SIZE(MLFSELog, MaxML);
-static size_t const entropyScratchSpace_size = HUF_WORKSPACE_SIZE;
-
 
 /*-*************************************
 *  Helper functions
@@ -89,8 +82,6 @@ struct ZSTD_CCtx_s {
     U32   loadedDictEnd;    /* index of end of dictionary */
     U32   forceWindow;      /* force back-references to respect limit of 1<<wLog, even for dictionary */
     ZSTD_compressionStage_e stage;
-    U32   rep[ZSTD_REP_NUM];
-    U32   repToConfirm[ZSTD_REP_NUM];
     U32   dictID;
     int   compressionLevel;
     ZSTD_parameters requestedParams;
@@ -105,16 +96,11 @@ struct ZSTD_CCtx_s {
     size_t staticSize;
 
     seqStore_t seqStore;    /* sequences storage ptrs */
+    optState_t optState;
     U32* hashTable;
     U32* hashTable3;
     U32* chainTable;
-    HUF_repeat hufCTable_repeatMode;
-    HUF_CElt* hufCTable;
-    U32 fseCTables_ready;
-    FSE_CTable* offcodeCTable;
-    FSE_CTable* matchlengthCTable;
-    FSE_CTable* litlengthCTable;
-    unsigned* entropyScratchSpace;
+    ZSTD_entropyCTables_t* entropy;
 
     /* streaming */
     char*  inBuff;
@@ -174,19 +160,9 @@ ZSTD_CCtx* ZSTD_initStaticCCtx(void *workspace, size_t workspaceSize)
     cctx->workSpaceSize = workspaceSize - sizeof(ZSTD_CCtx);
 
     /* entropy space (never moves) */
-    /* note : this code should be shared with resetCCtx, rather than copy/pasted */
-    {   void* ptr = cctx->workSpace;
-        cctx->hufCTable = (HUF_CElt*)ptr;
-        ptr = (char*)cctx->hufCTable + hufCTable_size;
-        cctx->offcodeCTable = (FSE_CTable*) ptr;
-        ptr = (char*)ptr + offcodeCTable_size;
-        cctx->matchlengthCTable = (FSE_CTable*) ptr;
-        ptr = (char*)ptr + matchlengthCTable_size;
-        cctx->litlengthCTable = (FSE_CTable*) ptr;
-        ptr = (char*)ptr + litlengthCTable_size;
-        assert(((size_t)ptr & 3) == 0);   /* ensure correct alignment */
-        cctx->entropyScratchSpace = (unsigned*) ptr;
-    }
+    if (cctx->workSpaceSize < sizeof(ZSTD_entropyCTables_t)) return NULL;
+    assert(((size_t)cctx->workSpace & 7) == 0);   /* ensure correct alignment */
+    cctx->entropy = (ZSTD_entropyCTables_t*)cctx->workSpace;
 
     return cctx;
 }
@@ -237,7 +213,7 @@ size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned
     ZSTD_STATIC_ASSERT(ZSTD_dm_auto==0);
     ZSTD_STATIC_ASSERT(ZSTD_dm_rawContent==1);
     case ZSTD_p_forceRawDict : cctx->dictMode = (ZSTD_dictMode_e)(value>0); return 0;
-    default: return ERROR(parameter_unknown);
+    default: return ERROR(parameter_unsupported);
     }
 }
 
@@ -251,9 +227,9 @@ static void ZSTD_cLevelToCParams(ZSTD_CCtx* cctx)
     cctx->compressionLevel = ZSTD_CLEVEL_CUSTOM;
 }
 
-#define CLAMPCHECK(val,min,max) {                       \
-    if (((val)<(min)) | ((val)>(max))) {                \
-        return ERROR(compressionParameter_outOfBound);  \
+#define CLAMPCHECK(val,min,max) {            \
+    if (((val)<(min)) | ((val)>(max))) {     \
+        return ERROR(parameter_outOfBound);  \
 }   }
 
 size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value)
@@ -349,7 +325,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v
         /* restrict dictionary mode, to "rawContent" or "fullDict" only */
         ZSTD_STATIC_ASSERT((U32)ZSTD_dm_fullDict > (U32)ZSTD_dm_rawContent);
         if (value > (unsigned)ZSTD_dm_fullDict)
-            return ERROR(compressionParameter_outOfBound);
+            return ERROR(parameter_outOfBound);
         cctx->dictMode = (ZSTD_dictMode_e)value;
         return 0;
 
@@ -370,31 +346,31 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v
         if (value==0) return 0;
         DEBUGLOG(5, " setting nbThreads : %u", value);
 #ifndef ZSTD_MULTITHREAD
-        if (value > 1) return ERROR(compressionParameter_unsupported);
+        if (value > 1) return ERROR(parameter_unsupported);
 #endif
         if ((value>1) && (cctx->nbThreads != value)) {
             if (cctx->staticSize)  /* MT not compatible with static alloc */
-                return ERROR(compressionParameter_unsupported);
+                return ERROR(parameter_unsupported);
             ZSTDMT_freeCCtx(cctx->mtctx);
             cctx->nbThreads = 1;
-            cctx->mtctx = ZSTDMT_createCCtx(value);
+            cctx->mtctx = ZSTDMT_createCCtx_advanced(value, cctx->customMem);
             if (cctx->mtctx == NULL) return ERROR(memory_allocation);
         }
         cctx->nbThreads = value;
         return 0;
 
     case ZSTD_p_jobSize:
-        if (cctx->nbThreads <= 1) return ERROR(compressionParameter_unsupported);
+        if (cctx->nbThreads <= 1) return ERROR(parameter_unsupported);
         assert(cctx->mtctx != NULL);
         return ZSTDMT_setMTCtxParameter(cctx->mtctx, ZSTDMT_p_sectionSize, value);
 
     case ZSTD_p_overlapSizeLog:
         DEBUGLOG(5, " setting overlap with nbThreads == %u", cctx->nbThreads);
-        if (cctx->nbThreads <= 1) return ERROR(compressionParameter_unsupported);
+        if (cctx->nbThreads <= 1) return ERROR(parameter_unsupported);
         assert(cctx->mtctx != NULL);
         return ZSTDMT_setMTCtxParameter(cctx->mtctx, ZSTDMT_p_overlapSectionLog, value);
 
-    default: return ERROR(parameter_unknown);
+    default: return ERROR(parameter_unsupported);
     }
 }
 
@@ -474,7 +450,8 @@ size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
     CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
     CLAMPCHECK(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
     CLAMPCHECK(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
-    if ((U32)(cParams.strategy) > (U32)ZSTD_btultra) return ERROR(compressionParameter_unsupported);
+    if ((U32)(cParams.strategy) > (U32)ZSTD_btultra)
+        return ERROR(parameter_unsupported);
     return 0;
 }
 
@@ -551,9 +528,7 @@ size_t ZSTD_estimateCCtxSize_advanced(ZSTD_compressionParameters cParams)
     size_t const hSize = ((size_t)1) << cParams.hashLog;
     U32    const hashLog3 = (cParams.searchLength>3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, cParams.windowLog);
     size_t const h3Size = ((size_t)1) << hashLog3;
-    size_t const entropySpace = hufCTable_size + litlengthCTable_size
-                              + offcodeCTable_size + matchlengthCTable_size
-                              + entropyScratchSpace_size;
+    size_t const entropySpace = sizeof(ZSTD_entropyCTables_t);
     size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
 
     size_t const optBudget = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits))*sizeof(U32)
@@ -620,8 +595,8 @@ static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_parameters params, U64 ple
     cctx->stage = ZSTDcs_init;
     cctx->dictID = 0;
     cctx->loadedDictEnd = 0;
-    { int i; for (i=0; i<ZSTD_REP_NUM; i++) cctx->rep[i] = repStartValue[i]; }
-    cctx->seqStore.litLengthSum = 0;  /* force reset of btopt stats */
+    { int i; for (i=0; i<ZSTD_REP_NUM; i++) cctx->seqStore.rep[i] = repStartValue[i]; }
+    cctx->optState.litLengthSum = 0;  /* force reset of btopt stats */
     XXH64_reset(&cctx->xxhState, 0);
     return 0;
 }
@@ -641,8 +616,10 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
     if (crp == ZSTDcrp_continue) {
         if (ZSTD_equivalentParams(params.cParams, zc->appliedParams.cParams)) {
             DEBUGLOG(5, "ZSTD_equivalentParams()==1");
-            zc->fseCTables_ready = 0;
-            zc->hufCTable_repeatMode = HUF_repeat_none;
+            zc->entropy->hufCTable_repeatMode = HUF_repeat_none;
+            zc->entropy->offcode_repeatMode = FSE_repeat_none;
+            zc->entropy->matchlength_repeatMode = FSE_repeat_none;
+            zc->entropy->litlength_repeatMode = FSE_repeat_none;
             return ZSTD_continueCCtx(zc, params, pledgedSrcSize);
     }   }
 
@@ -662,9 +639,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
         void* ptr;
 
         /* Check if workSpace is large enough, alloc a new one if needed */
-        {   size_t const entropySpace = hufCTable_size + litlengthCTable_size
-                                  + offcodeCTable_size + matchlengthCTable_size
-                                  + entropyScratchSpace_size;
+        {   size_t const entropySpace = sizeof(ZSTD_entropyCTables_t);
             size_t const optPotentialSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits)) * sizeof(U32)
                                   + (ZSTD_OPT_NUM+1) * (sizeof(ZSTD_match_t)+sizeof(ZSTD_optimal_t));
             size_t const optSpace = ( (params.cParams.strategy == ZSTD_btopt)
@@ -689,16 +664,9 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
                 ptr = zc->workSpace;
 
                 /* entropy space */
-                zc->hufCTable = (HUF_CElt*)ptr;
-                ptr = (char*)zc->hufCTable + hufCTable_size;  /* note : HUF_CElt* is incomplete type, size is estimated via macro */
-                zc->offcodeCTable = (FSE_CTable*) ptr;
-                ptr = (char*)ptr + offcodeCTable_size;
-                zc->matchlengthCTable = (FSE_CTable*) ptr;
-                ptr = (char*)ptr + matchlengthCTable_size;
-                zc->litlengthCTable = (FSE_CTable*) ptr;
-                ptr = (char*)ptr + litlengthCTable_size;
-                assert(((size_t)ptr & 3) == 0);   /* ensure correct alignment */
-                zc->entropyScratchSpace = (unsigned*) ptr;
+                assert(((size_t)zc->workSpace & 3) == 0);   /* ensure correct alignment */
+                assert(zc->workSpaceSize >= sizeof(ZSTD_entropyCTables_t));
+                zc->entropy = (ZSTD_entropyCTables_t*)zc->workSpace;
         }   }
 
         /* init params */
@@ -715,39 +683,35 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
         zc->stage = ZSTDcs_init;
         zc->dictID = 0;
         zc->loadedDictEnd = 0;
-        zc->fseCTables_ready = 0;
-        zc->hufCTable_repeatMode = HUF_repeat_none;
+        zc->entropy->hufCTable_repeatMode = HUF_repeat_none;
+        zc->entropy->offcode_repeatMode = FSE_repeat_none;
+        zc->entropy->matchlength_repeatMode = FSE_repeat_none;
+        zc->entropy->litlength_repeatMode = FSE_repeat_none;
         zc->nextToUpdate = 1;
         zc->nextSrc = NULL;
         zc->base = NULL;
         zc->dictBase = NULL;
         zc->dictLimit = 0;
         zc->lowLimit = 0;
-        { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = repStartValue[i]; }
+        { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->seqStore.rep[i] = repStartValue[i]; }
         zc->hashLog3 = hashLog3;
-        zc->seqStore.litLengthSum = 0;
+        zc->optState.litLengthSum = 0;
 
-        /* ensure entropy tables are close together at the beginning */
-        assert((void*)zc->hufCTable == zc->workSpace);
-        assert((char*)zc->offcodeCTable == (char*)zc->hufCTable + hufCTable_size);
-        assert((char*)zc->matchlengthCTable == (char*)zc->offcodeCTable + offcodeCTable_size);
-        assert((char*)zc->litlengthCTable == (char*)zc->matchlengthCTable + matchlengthCTable_size);
-        assert((char*)zc->entropyScratchSpace == (char*)zc->litlengthCTable + litlengthCTable_size);
-        ptr = (char*)zc->entropyScratchSpace + entropyScratchSpace_size;
+        ptr = zc->entropy + 1;
 
         /* opt parser space */
         if ((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btultra)) {
             DEBUGLOG(5, "reserving optimal parser space");
             assert(((size_t)ptr & 3) == 0);  /* ensure ptr is properly aligned */
-            zc->seqStore.litFreq = (U32*)ptr;
-            zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<<Litbits);
-            zc->seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1);
-            zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (MaxML+1);
-            ptr = zc->seqStore.offCodeFreq + (MaxOff+1);
-            zc->seqStore.matchTable = (ZSTD_match_t*)ptr;
-            ptr = zc->seqStore.matchTable + ZSTD_OPT_NUM+1;
-            zc->seqStore.priceTable = (ZSTD_optimal_t*)ptr;
-            ptr = zc->seqStore.priceTable + ZSTD_OPT_NUM+1;
+            zc->optState.litFreq = (U32*)ptr;
+            zc->optState.litLengthFreq = zc->optState.litFreq + (1<<Litbits);
+            zc->optState.matchLengthFreq = zc->optState.litLengthFreq + (MaxLL+1);
+            zc->optState.offCodeFreq = zc->optState.matchLengthFreq + (MaxML+1);
+            ptr = zc->optState.offCodeFreq + (MaxOff+1);
+            zc->optState.matchTable = (ZSTD_match_t*)ptr;
+            ptr = zc->optState.matchTable + ZSTD_OPT_NUM+1;
+            zc->optState.priceTable = (ZSTD_optimal_t*)ptr;
+            ptr = zc->optState.priceTable + ZSTD_OPT_NUM+1;
         }
 
         /* table Space */
@@ -783,7 +747,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
  *        do not use with extDict variant ! */
 void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) {
     int i;
-    for (i=0; i<ZSTD_REP_NUM; i++) cctx->rep[i] = 0;
+    for (i=0; i<ZSTD_REP_NUM; i++) cctx->seqStore.rep[i] = 0;
 }
 
 
@@ -830,16 +794,7 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
     dstCCtx->dictID       = srcCCtx->dictID;
 
     /* copy entropy tables */
-    dstCCtx->fseCTables_ready = srcCCtx->fseCTables_ready;
-    if (srcCCtx->fseCTables_ready) {
-        memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, litlengthCTable_size);
-        memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, matchlengthCTable_size);
-        memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, offcodeCTable_size);
-    }
-    dstCCtx->hufCTable_repeatMode = srcCCtx->hufCTable_repeatMode;
-    if (srcCCtx->hufCTable_repeatMode) {
-        memcpy(dstCCtx->hufCTable, srcCCtx->hufCTable, hufCTable_size);
-    }
+    memcpy(dstCCtx->entropy, srcCCtx->entropy, sizeof(ZSTD_entropyCTables_t));
 
     return 0;
 }
@@ -956,7 +911,8 @@ static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, cons
 
 static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; }
 
-static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc,
+static size_t ZSTD_compressLiterals (ZSTD_entropyCTables_t * entropy,
+                                     ZSTD_strategy strategy,
                                      void* dst, size_t dstCapacity,
                                const void* src, size_t srcSize)
 {
@@ -970,28 +926,28 @@ static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc,
 
     /* small ? don't even attempt compression (speed opt) */
 #   define LITERAL_NOENTROPY 63
-    {   size_t const minLitSize = zc->hufCTable_repeatMode == HUF_repeat_valid ? 6 : LITERAL_NOENTROPY;
+    {   size_t const minLitSize = entropy->hufCTable_repeatMode == HUF_repeat_valid ? 6 : LITERAL_NOENTROPY;
         if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
     }
 
     if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall);   /* not enough space for compression */
-    {   HUF_repeat repeat = zc->hufCTable_repeatMode;
-        int const preferRepeat = zc->appliedParams.cParams.strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
+    {   HUF_repeat repeat = entropy->hufCTable_repeatMode;
+        int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
         if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
         cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
-                                      zc->entropyScratchSpace, entropyScratchSpace_size, zc->hufCTable, &repeat, preferRepeat)
+                                      entropy->workspace, sizeof(entropy->workspace), (HUF_CElt*)entropy->hufCTable, &repeat, preferRepeat)
                                 : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
-                                      zc->entropyScratchSpace, entropyScratchSpace_size, zc->hufCTable, &repeat, preferRepeat);
+                                      entropy->workspace, sizeof(entropy->workspace), (HUF_CElt*)entropy->hufCTable, &repeat, preferRepeat);
         if (repeat != HUF_repeat_none) { hType = set_repeat; }    /* reused the existing table */
-        else { zc->hufCTable_repeatMode = HUF_repeat_check; }       /* now have a table to reuse */
+        else { entropy->hufCTable_repeatMode = HUF_repeat_check; }       /* now have a table to reuse */
     }
 
-    if ((cLitSize==0) | (cLitSize >= srcSize - minGain)) {
-        zc->hufCTable_repeatMode = HUF_repeat_none;
+    if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
+        entropy->hufCTable_repeatMode = HUF_repeat_none;
         return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
     }
     if (cLitSize==1) {
-        zc->hufCTable_repeatMode = HUF_repeat_none;
+        entropy->hufCTable_repeatMode = HUF_repeat_none;
         return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
     }
 
@@ -1062,17 +1018,154 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
         mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
 }
 
-MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc,
-                              void* dst, size_t dstCapacity,
-                              size_t srcSize)
+MEM_STATIC symbolEncodingType_e ZSTD_selectEncodingType(FSE_repeat* repeatMode,
+        size_t const mostFrequent, size_t nbSeq, U32 defaultNormLog)
 {
-    const int longOffsets = zc->appliedParams.cParams.windowLog > STREAM_ACCUMULATOR_MIN;
-    const seqStore_t* seqStorePtr = &(zc->seqStore);
+#define MIN_SEQ_FOR_DYNAMIC_FSE   64
+#define MAX_SEQ_FOR_STATIC_FSE  1000
+
+    if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
+        *repeatMode = FSE_repeat_check;
+        return set_rle;
+    }
+    if ((*repeatMode == FSE_repeat_valid) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
+        return set_repeat;
+    }
+    if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (defaultNormLog-1)))) {
+        *repeatMode = FSE_repeat_valid;
+        return set_basic;
+    }
+    *repeatMode = FSE_repeat_check;
+    return set_compressed;
+}
+
+MEM_STATIC size_t ZSTD_buildCTable(void* dst, size_t dstCapacity,
+        FSE_CTable* CTable, U32 FSELog, symbolEncodingType_e type,
+        U32* count, U32 max,
+        BYTE const* codeTable, size_t nbSeq,
+        S16 const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
+        void* workspace, size_t workspaceSize)
+{
+    BYTE* op = (BYTE*)dst;
+    BYTE const* const oend = op + dstCapacity;
+
+    switch (type) {
+    case set_rle:
+        *op = codeTable[0];
+        CHECK_F(FSE_buildCTable_rle(CTable, (BYTE)max));
+        return 1;
+    case set_repeat:
+        return 0;
+    case set_basic:
+        CHECK_F(FSE_buildCTable_wksp(CTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize));
+        return 0;
+    case set_compressed: {
+        S16 norm[MaxSeq + 1];
+        size_t nbSeq_1 = nbSeq;
+        const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
+        if (count[codeTable[nbSeq-1]] > 1) {
+            count[codeTable[nbSeq-1]]--;
+            nbSeq_1--;
+        }
+        CHECK_F(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max));
+        {   size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog);   /* overflow protected */
+            if (FSE_isError(NCountSize)) return NCountSize;
+            CHECK_F(FSE_buildCTable_wksp(CTable, norm, max, tableLog, workspace, workspaceSize));
+            return NCountSize;
+        }
+    }
+    default: return assert(0), ERROR(GENERIC);
+    }
+}
+
+MEM_STATIC size_t ZSTD_encodeSequences(void* dst, size_t dstCapacity,
+    FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+    FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+    FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+    seqDef const* sequences, size_t nbSeq, int longOffsets)
+{
+    BIT_CStream_t blockStream;
+    FSE_CState_t  stateMatchLength;
+    FSE_CState_t  stateOffsetBits;
+    FSE_CState_t  stateLitLength;
+
+    CHECK_E(BIT_initCStream(&blockStream, dst, dstCapacity), dstSize_tooSmall); /* not enough space remaining */
+
+    /* first symbols */
+    FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
+    FSE_initCState2(&stateOffsetBits,  CTable_OffsetBits,  ofCodeTable[nbSeq-1]);
+    FSE_initCState2(&stateLitLength,   CTable_LitLength,   llCodeTable[nbSeq-1]);
+    BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
+    if (MEM_32bits()) BIT_flushBits(&blockStream);
+    BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
+    if (MEM_32bits()) BIT_flushBits(&blockStream);
+    if (longOffsets) {
+        U32 const ofBits = ofCodeTable[nbSeq-1];
+        int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
+        if (extraBits) {
+            BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
+            BIT_flushBits(&blockStream);
+        }
+        BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
+                    ofBits - extraBits);
+    } else {
+        BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
+    }
+    BIT_flushBits(&blockStream);
+
+    {   size_t n;
+        for (n=nbSeq-2 ; n<nbSeq ; n--) {      /* intentional underflow */
+            BYTE const llCode = llCodeTable[n];
+            BYTE const ofCode = ofCodeTable[n];
+            BYTE const mlCode = mlCodeTable[n];
+            U32  const llBits = LL_bits[llCode];
+            U32  const ofBits = ofCode;                                     /* 32b*/  /* 64b*/
+            U32  const mlBits = ML_bits[mlCode];
+                                                                            /* (7)*/  /* (7)*/
+            FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode);       /* 15 */  /* 15 */
+            FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode);      /* 24 */  /* 24 */
+            if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
+            FSE_encodeSymbol(&blockStream, &stateLitLength, llCode);        /* 16 */  /* 33 */
+            if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
+                BIT_flushBits(&blockStream);                                /* (7)*/
+            BIT_addBits(&blockStream, sequences[n].litLength, llBits);
+            if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
+            BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
+            if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
+            if (longOffsets) {
+                int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
+                if (extraBits) {
+                    BIT_addBits(&blockStream, sequences[n].offset, extraBits);
+                    BIT_flushBits(&blockStream);                            /* (7)*/
+                }
+                BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
+                            ofBits - extraBits);                            /* 31 */
+            } else {
+                BIT_addBits(&blockStream, sequences[n].offset, ofBits);     /* 31 */
+            }
+            BIT_flushBits(&blockStream);                                    /* (7)*/
+    }   }
+
+    FSE_flushCState(&blockStream, &stateMatchLength);
+    FSE_flushCState(&blockStream, &stateOffsetBits);
+    FSE_flushCState(&blockStream, &stateLitLength);
+
+    {   size_t const streamSize = BIT_closeCStream(&blockStream);
+        if (streamSize==0) return ERROR(dstSize_tooSmall);   /* not enough space */
+        return streamSize;
+    }
+}
+
+MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
+                              ZSTD_entropyCTables_t* entropy,
+                              ZSTD_compressionParameters const* cParams,
+                              void* dst, size_t dstCapacity)
+{
+    const int longOffsets = cParams->windowLog > STREAM_ACCUMULATOR_MIN;
     U32 count[MaxSeq+1];
-    S16 norm[MaxSeq+1];
-    FSE_CTable* CTable_LitLength = zc->litlengthCTable;
-    FSE_CTable* CTable_OffsetBits = zc->offcodeCTable;
-    FSE_CTable* CTable_MatchLength = zc->matchlengthCTable;
+    FSE_CTable* CTable_LitLength = entropy->litlengthCTable;
+    FSE_CTable* CTable_OffsetBits = entropy->offcodeCTable;
+    FSE_CTable* CTable_MatchLength = entropy->matchlengthCTable;
     U32 LLtype, Offtype, MLtype;   /* compressed, raw or rle */
     const seqDef* const sequences = seqStorePtr->sequencesStart;
     const BYTE* const ofCodeTable = seqStorePtr->ofCode;
@@ -1083,13 +1176,16 @@ MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc,
     BYTE* op = ostart;
     size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
     BYTE* seqHead;
-    BYTE scratchBuffer[1<<MAX(MLFSELog,LLFSELog)];
+
+    ZSTD_STATIC_ASSERT(sizeof(entropy->workspace) >= (1<<MAX(MLFSELog,LLFSELog)));
 
     /* Compress literals */
     {   const BYTE* const literals = seqStorePtr->litStart;
         size_t const litSize = seqStorePtr->lit - literals;
-        size_t const cSize = ZSTD_compressLiterals(zc, op, dstCapacity, literals, litSize);
-        if (ZSTD_isError(cSize)) return cSize;
+        size_t const cSize = ZSTD_compressLiterals(
+                entropy, cParams->strategy, op, dstCapacity, literals, litSize);
+        if (ZSTD_isError(cSize))
+          return cSize;
         op += cSize;
     }
 
@@ -1098,179 +1194,91 @@ MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc,
     if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq;
     else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
     else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
-    if (nbSeq==0) goto _check_compressibility;
+    if (nbSeq==0) return op - ostart;
 
     /* seqHead : flags for FSE encoding type */
     seqHead = op++;
 
-#define MIN_SEQ_FOR_DYNAMIC_FSE   64
-#define MAX_SEQ_FOR_STATIC_FSE  1000
-
     /* convert length/distances into codes */
     ZSTD_seqToCodes(seqStorePtr);
-
     /* CTable for Literal Lengths */
     {   U32 max = MaxLL;
-        size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, zc->entropyScratchSpace);
-        if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
-            *op++ = llCodeTable[0];
-            FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
-            LLtype = set_rle;
-        } else if ((zc->fseCTables_ready) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
-            LLtype = set_repeat;
-        } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) {
-            FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
-            LLtype = set_basic;
-        } else {
-            size_t nbSeq_1 = nbSeq;
-            const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
-            if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; }
-            FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
-            { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
-              if (FSE_isError(NCountSize)) return NCountSize;
-              op += NCountSize; }
-            FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
-            LLtype = set_compressed;
+        size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, entropy->workspace);
+        LLtype = ZSTD_selectEncodingType(&entropy->litlength_repeatMode, mostFrequent, nbSeq, LL_defaultNormLog);
+        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
+                    count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
+                    entropy->workspace, sizeof(entropy->workspace));
+            if (ZSTD_isError(countSize)) return countSize;
+            op += countSize;
     }   }
-
     /* CTable for Offsets */
     {   U32 max = MaxOff;
-        size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, zc->entropyScratchSpace);
-        if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
-            *op++ = ofCodeTable[0];
-            FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
-            Offtype = set_rle;
-        } else if ((zc->fseCTables_ready) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
-            Offtype = set_repeat;
-        } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog-1)))) {
-            FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
-            Offtype = set_basic;
-        } else {
-            size_t nbSeq_1 = nbSeq;
-            const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
-            if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; }
-            FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
-            { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
-              if (FSE_isError(NCountSize)) return NCountSize;
-              op += NCountSize; }
-            FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
-            Offtype = set_compressed;
+        size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, entropy->workspace);
+        Offtype = ZSTD_selectEncodingType(&entropy->offcode_repeatMode, mostFrequent, nbSeq, OF_defaultNormLog);
+        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
+                    count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, MaxOff,
+                    entropy->workspace, sizeof(entropy->workspace));
+            if (ZSTD_isError(countSize)) return countSize;
+            op += countSize;
     }   }
-
     /* CTable for MatchLengths */
     {   U32 max = MaxML;
-        size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, zc->entropyScratchSpace);
-        if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
-            *op++ = *mlCodeTable;
-            FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
-            MLtype = set_rle;
-        } else if ((zc->fseCTables_ready) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
-            MLtype = set_repeat;
-        } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog-1)))) {
-            FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
-            MLtype = set_basic;
-        } else {
-            size_t nbSeq_1 = nbSeq;
-            const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
-            if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; }
-            FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
-            { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
-              if (FSE_isError(NCountSize)) return NCountSize;
-              op += NCountSize; }
-            FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
-            MLtype = set_compressed;
+        size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, entropy->workspace);
+        MLtype = ZSTD_selectEncodingType(&entropy->matchlength_repeatMode, mostFrequent, nbSeq, ML_defaultNormLog);
+        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
+                    count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
+                    entropy->workspace, sizeof(entropy->workspace));
+            if (ZSTD_isError(countSize)) return countSize;
+            op += countSize;
     }   }
 
     *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
-    zc->fseCTables_ready = 0;
 
-    /* Encoding Sequences */
-    {   BIT_CStream_t blockStream;
-        FSE_CState_t  stateMatchLength;
-        FSE_CState_t  stateOffsetBits;
-        FSE_CState_t  stateLitLength;
-
-        CHECK_E(BIT_initCStream(&blockStream, op, oend-op), dstSize_tooSmall); /* not enough space remaining */
-
-        /* first symbols */
-        FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
-        FSE_initCState2(&stateOffsetBits,  CTable_OffsetBits,  ofCodeTable[nbSeq-1]);
-        FSE_initCState2(&stateLitLength,   CTable_LitLength,   llCodeTable[nbSeq-1]);
-        BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
-        if (MEM_32bits()) BIT_flushBits(&blockStream);
-        BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
-        if (MEM_32bits()) BIT_flushBits(&blockStream);
-        if (longOffsets) {
-            U32 const ofBits = ofCodeTable[nbSeq-1];
-            int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
-            if (extraBits) {
-                BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
-                BIT_flushBits(&blockStream);
-            }
-            BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
-                        ofBits - extraBits);
-        } else {
-            BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
-        }
-        BIT_flushBits(&blockStream);
-
-        {   size_t n;
-            for (n=nbSeq-2 ; n<nbSeq ; n--) {      /* intentional underflow */
-                BYTE const llCode = llCodeTable[n];
-                BYTE const ofCode = ofCodeTable[n];
-                BYTE const mlCode = mlCodeTable[n];
-                U32  const llBits = LL_bits[llCode];
-                U32  const ofBits = ofCode;                                     /* 32b*/  /* 64b*/
-                U32  const mlBits = ML_bits[mlCode];
-                                                                                /* (7)*/  /* (7)*/
-                FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode);       /* 15 */  /* 15 */
-                FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode);      /* 24 */  /* 24 */
-                if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
-                FSE_encodeSymbol(&blockStream, &stateLitLength, llCode);        /* 16 */  /* 33 */
-                if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
-                    BIT_flushBits(&blockStream);                                /* (7)*/
-                BIT_addBits(&blockStream, sequences[n].litLength, llBits);
-                if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
-                BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
-                if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
-                if (longOffsets) {
-                    int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
-                    if (extraBits) {
-                        BIT_addBits(&blockStream, sequences[n].offset, extraBits);
-                        BIT_flushBits(&blockStream);                            /* (7)*/
-                    }
-                    BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
-                                ofBits - extraBits);                            /* 31 */
-                } else {
-                    BIT_addBits(&blockStream, sequences[n].offset, ofBits);     /* 31 */
-                }
-                BIT_flushBits(&blockStream);                                    /* (7)*/
-        }   }
-
-        FSE_flushCState(&blockStream, &stateMatchLength);
-        FSE_flushCState(&blockStream, &stateOffsetBits);
-        FSE_flushCState(&blockStream, &stateLitLength);
-
-        {   size_t const streamSize = BIT_closeCStream(&blockStream);
-            if (streamSize==0) return ERROR(dstSize_tooSmall);   /* not enough space */
-            op += streamSize;
-    }   }
-
-    /* check compressibility */
-_check_compressibility:
-    {   size_t const minGain = ZSTD_minGain(srcSize);
-        size_t const maxCSize = srcSize - minGain;
-        if ((size_t)(op-ostart) >= maxCSize) {
-            zc->hufCTable_repeatMode = HUF_repeat_none;
-            return 0;
-    }   }
-
-    /* confirm repcodes */
-    { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = zc->repToConfirm[i]; }
+    {   size_t const streamSize = ZSTD_encodeSequences(op, oend - op,
+                CTable_MatchLength, mlCodeTable,
+                CTable_OffsetBits, ofCodeTable,
+                CTable_LitLength, llCodeTable,
+                sequences, nbSeq, longOffsets);
+        if (ZSTD_isError(streamSize)) return streamSize;
+        op += streamSize;
+    }
 
     return op - ostart;
 }
 
+MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr,
+                              ZSTD_entropyCTables_t* entropy,
+                              ZSTD_compressionParameters const* cParams,
+                              void* dst, size_t dstCapacity,
+                              size_t srcSize)
+{
+    size_t const cSize = ZSTD_compressSequences_internal(seqStorePtr, entropy, cParams,
+                                                         dst, dstCapacity);
+    size_t const minGain = ZSTD_minGain(srcSize);
+    size_t const maxCSize = srcSize - minGain;
+    /* If the srcSize <= dstCapacity, then there is enough space to write a
+     * raw uncompressed block. Since we ran out of space, the block must not
+     * be compressible, so fall back to a raw uncompressed block.
+     */
+    int const uncompressibleError = cSize == ERROR(dstSize_tooSmall) && srcSize <= dstCapacity;
+
+    if (ZSTD_isError(cSize) && !uncompressibleError)
+        return cSize;
+    /* Check compressibility */
+    if (cSize >= maxCSize || uncompressibleError) {
+        entropy->hufCTable_repeatMode = HUF_repeat_none;
+        entropy->offcode_repeatMode = FSE_repeat_none;
+        entropy->matchlength_repeatMode = FSE_repeat_none;
+        entropy->litlength_repeatMode = FSE_repeat_none;
+        return 0;
+    }
+    assert(!ZSTD_isError(cSize));
+
+    /* confirm repcodes */
+    { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqStorePtr->rep[i] = seqStorePtr->repToConfirm[i]; }
+    return cSize;
+}
+
 
 /*! ZSTD_storeSeq() :
     Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
@@ -1475,7 +1483,7 @@ static void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls)
 }
 
 
-FORCE_INLINE
+FORCE_INLINE_TEMPLATE
 void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
                                const void* src, size_t srcSize,
                                const U32 mls)
@@ -1491,7 +1499,7 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
     const BYTE* const lowest = base + lowestIndex;
     const BYTE* const iend = istart + srcSize;
     const BYTE* const ilimit = iend - HASH_READ_SIZE;
-    U32 offset_1=cctx->rep[0], offset_2=cctx->rep[1];
+    U32 offset_1=seqStorePtr->rep[0], offset_2=seqStorePtr->rep[1];
     U32 offsetSaved = 0;
 
     /* init */
@@ -1552,8 +1560,8 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
     }   }   }
 
     /* save reps for next block */
-    cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
-    cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
+    seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
+    seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
 
     /* Last Literals */
     {   size_t const lastLLSize = iend - anchor;
@@ -1601,7 +1609,7 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
     const BYTE* const dictEnd = dictBase + dictLimit;
     const BYTE* const iend = istart + srcSize;
     const BYTE* const ilimit = iend - 8;
-    U32 offset_1=ctx->rep[0], offset_2=ctx->rep[1];
+    U32 offset_1=seqStorePtr->rep[0], offset_2=seqStorePtr->rep[1];
 
     /* Search Loop */
     while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
@@ -1667,7 +1675,7 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
     }   }   }
 
     /* save reps for next block */
-    ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
+    seqStorePtr->repToConfirm[0] = offset_1; seqStorePtr->repToConfirm[1] = offset_2;
 
     /* Last Literals */
     {   size_t const lastLLSize = iend - anchor;
@@ -1718,7 +1726,7 @@ static void ZSTD_fillDoubleHashTable (ZSTD_CCtx* cctx, const void* end, const U3
 }
 
 
-FORCE_INLINE
+FORCE_INLINE_TEMPLATE
 void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
                                  const void* src, size_t srcSize,
                                  const U32 mls)
@@ -1736,7 +1744,7 @@ void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
     const BYTE* const lowest = base + lowestIndex;
     const BYTE* const iend = istart + srcSize;
     const BYTE* const ilimit = iend - HASH_READ_SIZE;
-    U32 offset_1=cctx->rep[0], offset_2=cctx->rep[1];
+    U32 offset_1=seqStorePtr->rep[0], offset_2=seqStorePtr->rep[1];
     U32 offsetSaved = 0;
 
     /* init */
@@ -1823,8 +1831,8 @@ void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
     }   }   }
 
     /* save reps for next block */
-    cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
-    cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
+    seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
+    seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
 
     /* Last Literals */
     {   size_t const lastLLSize = iend - anchor;
@@ -1873,7 +1881,7 @@ static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
     const BYTE* const dictEnd = dictBase + dictLimit;
     const BYTE* const iend = istart + srcSize;
     const BYTE* const ilimit = iend - 8;
-    U32 offset_1=ctx->rep[0], offset_2=ctx->rep[1];
+    U32 offset_1=seqStorePtr->rep[0], offset_2=seqStorePtr->rep[1];
 
     /* Search Loop */
     while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
@@ -1973,7 +1981,7 @@ static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
     }   }   }
 
     /* save reps for next block */
-    ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
+    seqStorePtr->repToConfirm[0] = offset_1; seqStorePtr->repToConfirm[1] = offset_2;
 
     /* Last Literals */
     {   size_t const lastLLSize = iend - anchor;
@@ -2276,7 +2284,7 @@ static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
 
 /* Update chains up to ip (excluded)
    Assumption : always within prefix (i.e. not within extDict) */
-FORCE_INLINE
+FORCE_INLINE_TEMPLATE
 U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls)
 {
     U32* const hashTable  = zc->hashTable;
@@ -2300,7 +2308,7 @@ U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls)
 
 
 /* inlining is important to hardwire a hot branch (template emulation) */
-FORCE_INLINE
+FORCE_INLINE_TEMPLATE
 size_t ZSTD_HcFindBestMatch_generic (
                         ZSTD_CCtx* zc,   /* Index table will be updated */
                         const BYTE* const ip, const BYTE* const iLimit,
@@ -2352,7 +2360,7 @@ size_t ZSTD_HcFindBestMatch_generic (
 }
 
 
-FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS (
+FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
                         ZSTD_CCtx* zc,
                         const BYTE* ip, const BYTE* const iLimit,
                         size_t* offsetPtr,
@@ -2369,7 +2377,7 @@ FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS (
 }
 
 
-FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
+FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
                         ZSTD_CCtx* zc,
                         const BYTE* ip, const BYTE* const iLimit,
                         size_t* offsetPtr,
@@ -2389,7 +2397,7 @@ FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
 /* *******************************
 *  Common parser - lazy strategy
 *********************************/
-FORCE_INLINE
+FORCE_INLINE_TEMPLATE
 void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
                                      const void* src, size_t srcSize,
                                      const U32 searchMethod, const U32 depth)
@@ -2409,7 +2417,7 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
                         size_t* offsetPtr,
                         U32 maxNbAttempts, U32 matchLengthSearch);
     searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS;
-    U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1], savedOffset=0;
+    U32 offset_1 = seqStorePtr->rep[0], offset_2 = seqStorePtr->rep[1], savedOffset=0;
 
     /* init */
     ip += (ip==base);
@@ -2519,8 +2527,8 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
     }   }
 
     /* Save reps for next block */
-    ctx->repToConfirm[0] = offset_1 ? offset_1 : savedOffset;
-    ctx->repToConfirm[1] = offset_2 ? offset_2 : savedOffset;
+    seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : savedOffset;
+    seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : savedOffset;
 
     /* Last Literals */
     {   size_t const lastLLSize = iend - anchor;
@@ -2551,7 +2559,7 @@ static void ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t sr
 }
 
 
-FORCE_INLINE
+FORCE_INLINE_TEMPLATE
 void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
                                      const void* src, size_t srcSize,
                                      const U32 searchMethod, const U32 depth)
@@ -2578,7 +2586,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
                         U32 maxNbAttempts, U32 matchLengthSearch);
     searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS;
 
-    U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1];
+    U32 offset_1 = seqStorePtr->rep[0], offset_2 = seqStorePtr->rep[1];
 
     /* init */
     ctx->nextToUpdate3 = ctx->nextToUpdate;
@@ -2714,7 +2722,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
     }   }
 
     /* Save reps for next block */
-    ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
+    seqStorePtr->repToConfirm[0] = offset_1; seqStorePtr->repToConfirm[1] = offset_2;
 
     /* Last Literals */
     {   size_t const lastLLSize = iend - anchor;
@@ -2823,7 +2831,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCa
     if (current > zc->nextToUpdate + 384)
         zc->nextToUpdate = current - MIN(192, (U32)(current - zc->nextToUpdate - 384));   /* limited update after finding a very long match */
     blockCompressor(zc, src, srcSize);
-    return ZSTD_compressSequences(zc, dst, dstCapacity, srcSize);
+    return ZSTD_compressSequences(&zc->seqStore, zc->entropy, &zc->appliedParams.cParams, dst, dstCapacity, srcSize);
 }
 
 
@@ -3000,7 +3008,6 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
         return fhSize;
 }
 
-
 size_t ZSTD_compressContinue (ZSTD_CCtx* cctx,
                               void* dst, size_t dstCapacity,
                         const void* src, size_t srcSize)
@@ -3106,13 +3113,14 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t
     const BYTE* const dictEnd = dictPtr + dictSize;
     short offcodeNCount[MaxOff+1];
     unsigned offcodeMaxValue = MaxOff;
-    BYTE scratchBuffer[1<<MAX(MLFSELog,LLFSELog)];
+
+    ZSTD_STATIC_ASSERT(sizeof(cctx->entropy->workspace) >= (1<<MAX(MLFSELog,LLFSELog)));
 
     dictPtr += 4;   /* skip magic number */
     cctx->dictID = cctx->appliedParams.fParams.noDictIDFlag ? 0 :  MEM_readLE32(dictPtr);
     dictPtr += 4;
 
-    {   size_t const hufHeaderSize = HUF_readCTable(cctx->hufCTable, 255, dictPtr, dictEnd-dictPtr);
+    {   size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)cctx->entropy->hufCTable, 255, dictPtr, dictEnd-dictPtr);
         if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
         dictPtr += hufHeaderSize;
     }
@@ -3122,7 +3130,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t
         if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
         if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
         /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
-        CHECK_E( FSE_buildCTable_wksp(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, scratchBuffer, sizeof(scratchBuffer)),
+        CHECK_E( FSE_buildCTable_wksp(cctx->entropy->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, cctx->entropy->workspace, sizeof(cctx->entropy->workspace)),
                  dictionary_corrupted);
         dictPtr += offcodeHeaderSize;
     }
@@ -3134,7 +3142,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t
         if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
         /* Every match length code must have non-zero probability */
         CHECK_F( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
-        CHECK_E( FSE_buildCTable_wksp(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, scratchBuffer, sizeof(scratchBuffer)),
+        CHECK_E( FSE_buildCTable_wksp(cctx->entropy->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, cctx->entropy->workspace, sizeof(cctx->entropy->workspace)),
                  dictionary_corrupted);
         dictPtr += matchlengthHeaderSize;
     }
@@ -3146,15 +3154,15 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t
         if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
         /* Every literal length code must have non-zero probability */
         CHECK_F( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
-        CHECK_E( FSE_buildCTable_wksp(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, scratchBuffer, sizeof(scratchBuffer)),
+        CHECK_E( FSE_buildCTable_wksp(cctx->entropy->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, cctx->entropy->workspace, sizeof(cctx->entropy->workspace)),
                  dictionary_corrupted);
         dictPtr += litlengthHeaderSize;
     }
 
     if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
-    cctx->rep[0] = MEM_readLE32(dictPtr+0);
-    cctx->rep[1] = MEM_readLE32(dictPtr+4);
-    cctx->rep[2] = MEM_readLE32(dictPtr+8);
+    cctx->seqStore.rep[0] = MEM_readLE32(dictPtr+0);
+    cctx->seqStore.rep[1] = MEM_readLE32(dictPtr+4);
+    cctx->seqStore.rep[2] = MEM_readLE32(dictPtr+8);
     dictPtr += 12;
 
     {   size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
@@ -3168,12 +3176,14 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t
         /* All repCodes must be <= dictContentSize and != 0*/
         {   U32 u;
             for (u=0; u<3; u++) {
-                if (cctx->rep[u] == 0) return ERROR(dictionary_corrupted);
-                if (cctx->rep[u] > dictContentSize) return ERROR(dictionary_corrupted);
+                if (cctx->seqStore.rep[u] == 0) return ERROR(dictionary_corrupted);
+                if (cctx->seqStore.rep[u] > dictContentSize) return ERROR(dictionary_corrupted);
         }   }
 
-        cctx->fseCTables_ready = 1;
-        cctx->hufCTable_repeatMode = HUF_repeat_valid;
+        cctx->entropy->hufCTable_repeatMode = HUF_repeat_valid;
+        cctx->entropy->offcode_repeatMode = FSE_repeat_valid;
+        cctx->entropy->matchlength_repeatMode = FSE_repeat_valid;
+        cctx->entropy->litlength_repeatMode = FSE_repeat_valid;
         return ZSTD_loadDictionaryContent(cctx, dictPtr, dictContentSize);
     }
 }
diff --git a/lib/compress/zstd_opt.h b/lib/compress/zstd_opt.h
index e8e98915ea31..ae24732c7d84 100644
--- a/lib/compress/zstd_opt.h
+++ b/lib/compress/zstd_opt.h
@@ -1,10 +1,10 @@
-/**
- * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 
@@ -22,173 +22,173 @@
 /*-*************************************
 *  Price functions for optimal parser
 ***************************************/
-FORCE_INLINE void ZSTD_setLog2Prices(seqStore_t* ssPtr)
+static void ZSTD_setLog2Prices(optState_t* optPtr)
 {
-    ssPtr->log2matchLengthSum = ZSTD_highbit32(ssPtr->matchLengthSum+1);
-    ssPtr->log2litLengthSum = ZSTD_highbit32(ssPtr->litLengthSum+1);
-    ssPtr->log2litSum = ZSTD_highbit32(ssPtr->litSum+1);
-    ssPtr->log2offCodeSum = ZSTD_highbit32(ssPtr->offCodeSum+1);
-    ssPtr->factor = 1 + ((ssPtr->litSum>>5) / ssPtr->litLengthSum) + ((ssPtr->litSum<<1) / (ssPtr->litSum + ssPtr->matchSum));
+    optPtr->log2matchLengthSum = ZSTD_highbit32(optPtr->matchLengthSum+1);
+    optPtr->log2litLengthSum = ZSTD_highbit32(optPtr->litLengthSum+1);
+    optPtr->log2litSum = ZSTD_highbit32(optPtr->litSum+1);
+    optPtr->log2offCodeSum = ZSTD_highbit32(optPtr->offCodeSum+1);
+    optPtr->factor = 1 + ((optPtr->litSum>>5) / optPtr->litLengthSum) + ((optPtr->litSum<<1) / (optPtr->litSum + optPtr->matchSum));
 }
 
 
-MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr, const BYTE* src, size_t srcSize)
+static void ZSTD_rescaleFreqs(optState_t* optPtr, const BYTE* src, size_t srcSize)
 {
     unsigned u;
 
-    ssPtr->cachedLiterals = NULL;
-    ssPtr->cachedPrice = ssPtr->cachedLitLength = 0;
-    ssPtr->staticPrices = 0;
+    optPtr->cachedLiterals = NULL;
+    optPtr->cachedPrice = optPtr->cachedLitLength = 0;
+    optPtr->staticPrices = 0;
 
-    if (ssPtr->litLengthSum == 0) {
-        if (srcSize <= 1024) ssPtr->staticPrices = 1;
+    if (optPtr->litLengthSum == 0) {
+        if (srcSize <= 1024) optPtr->staticPrices = 1;
 
-        assert(ssPtr->litFreq!=NULL);
+        assert(optPtr->litFreq!=NULL);
         for (u=0; u<=MaxLit; u++)
-            ssPtr->litFreq[u] = 0;
+            optPtr->litFreq[u] = 0;
         for (u=0; u<srcSize; u++)
-            ssPtr->litFreq[src[u]]++;
+            optPtr->litFreq[src[u]]++;
 
-        ssPtr->litSum = 0;
-        ssPtr->litLengthSum = MaxLL+1;
-        ssPtr->matchLengthSum = MaxML+1;
-        ssPtr->offCodeSum = (MaxOff+1);
-        ssPtr->matchSum = (ZSTD_LITFREQ_ADD<<Litbits);
+        optPtr->litSum = 0;
+        optPtr->litLengthSum = MaxLL+1;
+        optPtr->matchLengthSum = MaxML+1;
+        optPtr->offCodeSum = (MaxOff+1);
+        optPtr->matchSum = (ZSTD_LITFREQ_ADD<<Litbits);
 
         for (u=0; u<=MaxLit; u++) {
-            ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV);
-            ssPtr->litSum += ssPtr->litFreq[u];
+            optPtr->litFreq[u] = 1 + (optPtr->litFreq[u]>>ZSTD_FREQ_DIV);
+            optPtr->litSum += optPtr->litFreq[u];
         }
         for (u=0; u<=MaxLL; u++)
-            ssPtr->litLengthFreq[u] = 1;
+            optPtr->litLengthFreq[u] = 1;
         for (u=0; u<=MaxML; u++)
-            ssPtr->matchLengthFreq[u] = 1;
+            optPtr->matchLengthFreq[u] = 1;
         for (u=0; u<=MaxOff; u++)
-            ssPtr->offCodeFreq[u] = 1;
+            optPtr->offCodeFreq[u] = 1;
     } else {
-        ssPtr->matchLengthSum = 0;
-        ssPtr->litLengthSum = 0;
-        ssPtr->offCodeSum = 0;
-        ssPtr->matchSum = 0;
-        ssPtr->litSum = 0;
+        optPtr->matchLengthSum = 0;
+        optPtr->litLengthSum = 0;
+        optPtr->offCodeSum = 0;
+        optPtr->matchSum = 0;
+        optPtr->litSum = 0;
 
         for (u=0; u<=MaxLit; u++) {
-            ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>(ZSTD_FREQ_DIV+1));
-            ssPtr->litSum += ssPtr->litFreq[u];
+            optPtr->litFreq[u] = 1 + (optPtr->litFreq[u]>>(ZSTD_FREQ_DIV+1));
+            optPtr->litSum += optPtr->litFreq[u];
         }
         for (u=0; u<=MaxLL; u++) {
-            ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1));
-            ssPtr->litLengthSum += ssPtr->litLengthFreq[u];
+            optPtr->litLengthFreq[u] = 1 + (optPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1));
+            optPtr->litLengthSum += optPtr->litLengthFreq[u];
         }
         for (u=0; u<=MaxML; u++) {
-            ssPtr->matchLengthFreq[u] = 1 + (ssPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV);
-            ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u];
-            ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3);
+            optPtr->matchLengthFreq[u] = 1 + (optPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV);
+            optPtr->matchLengthSum += optPtr->matchLengthFreq[u];
+            optPtr->matchSum += optPtr->matchLengthFreq[u] * (u + 3);
         }
-        ssPtr->matchSum *= ZSTD_LITFREQ_ADD;
+        optPtr->matchSum *= ZSTD_LITFREQ_ADD;
         for (u=0; u<=MaxOff; u++) {
-            ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV);
-            ssPtr->offCodeSum += ssPtr->offCodeFreq[u];
+            optPtr->offCodeFreq[u] = 1 + (optPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV);
+            optPtr->offCodeSum += optPtr->offCodeFreq[u];
         }
     }
 
-    ZSTD_setLog2Prices(ssPtr);
+    ZSTD_setLog2Prices(optPtr);
 }
 
 
-FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* ssPtr, U32 litLength, const BYTE* literals)
+static U32 ZSTD_getLiteralPrice(optState_t* optPtr, U32 litLength, const BYTE* literals)
 {
     U32 price, u;
 
-    if (ssPtr->staticPrices)
+    if (optPtr->staticPrices)
         return ZSTD_highbit32((U32)litLength+1) + (litLength*6);
 
     if (litLength == 0)
-        return ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[0]+1);
+        return optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[0]+1);
 
     /* literals */
-    if (ssPtr->cachedLiterals == literals) {
-        U32 const additional = litLength - ssPtr->cachedLitLength;
-        const BYTE* literals2 = ssPtr->cachedLiterals + ssPtr->cachedLitLength;
-        price = ssPtr->cachedPrice + additional * ssPtr->log2litSum;
+    if (optPtr->cachedLiterals == literals) {
+        U32 const additional = litLength - optPtr->cachedLitLength;
+        const BYTE* literals2 = optPtr->cachedLiterals + optPtr->cachedLitLength;
+        price = optPtr->cachedPrice + additional * optPtr->log2litSum;
         for (u=0; u < additional; u++)
-            price -= ZSTD_highbit32(ssPtr->litFreq[literals2[u]]+1);
-        ssPtr->cachedPrice = price;
-        ssPtr->cachedLitLength = litLength;
+            price -= ZSTD_highbit32(optPtr->litFreq[literals2[u]]+1);
+        optPtr->cachedPrice = price;
+        optPtr->cachedLitLength = litLength;
     } else {
-        price = litLength * ssPtr->log2litSum;
+        price = litLength * optPtr->log2litSum;
         for (u=0; u < litLength; u++)
-            price -= ZSTD_highbit32(ssPtr->litFreq[literals[u]]+1);
+            price -= ZSTD_highbit32(optPtr->litFreq[literals[u]]+1);
 
         if (litLength >= 12) {
-            ssPtr->cachedLiterals = literals;
-            ssPtr->cachedPrice = price;
-            ssPtr->cachedLitLength = litLength;
+            optPtr->cachedLiterals = literals;
+            optPtr->cachedPrice = price;
+            optPtr->cachedLitLength = litLength;
         }
     }
 
     /* literal Length */
     {   const BYTE LL_deltaCode = 19;
         const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
-        price += LL_bits[llCode] + ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[llCode]+1);
+        price += LL_bits[llCode] + optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1);
     }
 
     return price;
 }
 
 
-FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra)
+FORCE_INLINE_TEMPLATE U32 ZSTD_getPrice(optState_t* optPtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra)
 {
     /* offset */
     U32 price;
     BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
 
-    if (seqStorePtr->staticPrices)
-        return ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ZSTD_highbit32((U32)matchLength+1) + 16 + offCode;
+    if (optPtr->staticPrices)
+        return ZSTD_getLiteralPrice(optPtr, litLength, literals) + ZSTD_highbit32((U32)matchLength+1) + 16 + offCode;
 
-    price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode]+1);
+    price = offCode + optPtr->log2offCodeSum - ZSTD_highbit32(optPtr->offCodeFreq[offCode]+1);
     if (!ultra && offCode >= 20) price += (offCode-19)*2;
 
     /* match Length */
     {   const BYTE ML_deltaCode = 36;
         const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength];
-        price += ML_bits[mlCode] + seqStorePtr->log2matchLengthSum - ZSTD_highbit32(seqStorePtr->matchLengthFreq[mlCode]+1);
+        price += ML_bits[mlCode] + optPtr->log2matchLengthSum - ZSTD_highbit32(optPtr->matchLengthFreq[mlCode]+1);
     }
 
-    return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + seqStorePtr->factor;
+    return price + ZSTD_getLiteralPrice(optPtr, litLength, literals) + optPtr->factor;
 }
 
 
-MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength)
+static void ZSTD_updatePrice(optState_t* optPtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength)
 {
     U32 u;
 
     /* literals */
-    seqStorePtr->litSum += litLength*ZSTD_LITFREQ_ADD;
+    optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
     for (u=0; u < litLength; u++)
-        seqStorePtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
+        optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
 
     /* literal Length */
     {   const BYTE LL_deltaCode = 19;
         const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
-        seqStorePtr->litLengthFreq[llCode]++;
-        seqStorePtr->litLengthSum++;
+        optPtr->litLengthFreq[llCode]++;
+        optPtr->litLengthSum++;
     }
 
     /* match offset */
     {   BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
-        seqStorePtr->offCodeSum++;
-        seqStorePtr->offCodeFreq[offCode]++;
+        optPtr->offCodeSum++;
+        optPtr->offCodeFreq[offCode]++;
     }
 
     /* match Length */
     {   const BYTE ML_deltaCode = 36;
         const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength];
-        seqStorePtr->matchLengthFreq[mlCode]++;
-        seqStorePtr->matchLengthSum++;
+        optPtr->matchLengthFreq[mlCode]++;
+        optPtr->matchLengthSum++;
     }
 
-    ZSTD_setLog2Prices(seqStorePtr);
+    ZSTD_setLog2Prices(optPtr);
 }
 
 
@@ -203,7 +203,7 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B
 
 
 /* function safe only for comparisons */
-MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
+static U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
 {
     switch (length)
     {
@@ -219,7 +219,7 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
 
 /* Update hashTable3 up to ip (excluded)
    Assumption : always within prefix (i.e. not within extDict) */
-FORCE_INLINE
+static
 U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip)
 {
     U32* const hashTable3  = zc->hashTable3;
@@ -412,11 +412,12 @@ static U32 ZSTD_BtGetAllMatches_selectMLS_extDict (
 /*-*******************************
 *  Optimal parser
 *********************************/
-FORCE_INLINE
+FORCE_INLINE_TEMPLATE
 void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
                                     const void* src, size_t srcSize, const int ultra)
 {
     seqStore_t* seqStorePtr = &(ctx->seqStore);
+    optState_t* optStatePtr = &(ctx->optState);
     const BYTE* const istart = (const BYTE*)src;
     const BYTE* ip = istart;
     const BYTE* anchor = istart;
@@ -430,16 +431,16 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
     const U32 mls = ctx->appliedParams.cParams.searchLength;
     const U32 minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4;
 
-    ZSTD_optimal_t* opt = seqStorePtr->priceTable;
-    ZSTD_match_t* matches = seqStorePtr->matchTable;
+    ZSTD_optimal_t* opt = optStatePtr->priceTable;
+    ZSTD_match_t* matches = optStatePtr->matchTable;
     const BYTE* inr;
     U32 offset, rep[ZSTD_REP_NUM];
 
     /* init */
     ctx->nextToUpdate3 = ctx->nextToUpdate;
-    ZSTD_rescaleFreqs(seqStorePtr, (const BYTE*)src, srcSize);
+    ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize);
     ip += (ip==prefixStart);
-    { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; }
+    { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=seqStorePtr->rep[i]; }
 
     /* Match Loop */
     while (ip < ilimit) {
@@ -462,7 +463,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
                     }
                     best_off = i - (ip == anchor);
                     do {
-                        price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
+                        price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
                         if (mlen > last_pos || price < opt[mlen].price)
                             SET_PRICE(mlen, mlen, i, litlen, price);   /* note : macro modifies last_pos */
                         mlen--;
@@ -487,7 +488,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
             mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
             best_mlen = matches[u].len;
             while (mlen <= best_mlen) {
-                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
+                price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
                 if (mlen > last_pos || price < opt[mlen].price)
                     SET_PRICE(mlen, mlen, matches[u].off, litlen, price);   /* note : macro modifies last_pos */
                 mlen++;
@@ -507,12 +508,12 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
            if (opt[cur-1].mlen == 1) {
                 litlen = opt[cur-1].litlen + 1;
                 if (cur > litlen) {
-                    price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-litlen);
+                    price = opt[cur - litlen].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-litlen);
                 } else
-                    price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor);
+                    price = ZSTD_getLiteralPrice(optStatePtr, litlen, anchor);
            } else {
                 litlen = 1;
-                price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1);
+                price = opt[cur - 1].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-1);
            }
 
            if (cur > last_pos || price <= opt[cur].price)
@@ -554,12 +555,12 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
                            if (opt[cur].mlen == 1) {
                                 litlen = opt[cur].litlen;
                                 if (cur > litlen) {
-                                    price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra);
+                                    price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra);
                                 } else
-                                    price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
+                                    price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
                             } else {
                                 litlen = 0;
-                                price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
+                                price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
                             }
 
                             if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
@@ -586,12 +587,12 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
                     if (opt[cur].mlen == 1) {
                         litlen = opt[cur].litlen;
                         if (cur > litlen)
-                            price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra);
+                            price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra);
                         else
-                            price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
+                            price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
                     } else {
                         litlen = 0;
-                        price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra);
+                        price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra);
                     }
 
                     if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
@@ -645,13 +646,13 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
                 if (litLength==0) offset--;
             }
 
-            ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
+            ZSTD_updatePrice(optStatePtr, litLength, anchor, offset, mlen-MINMATCH);
             ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
             anchor = ip = ip + mlen;
     }    }   /* for (cur=0; cur < last_pos; ) */
 
     /* Save reps for next block */
-    { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->repToConfirm[i] = rep[i]; }
+    { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqStorePtr->repToConfirm[i] = rep[i]; }
 
     /* Last Literals */
     {   size_t const lastLLSize = iend - anchor;
@@ -661,11 +662,12 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
 }
 
 
-FORCE_INLINE
+FORCE_INLINE_TEMPLATE
 void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
                                      const void* src, size_t srcSize, const int ultra)
 {
     seqStore_t* seqStorePtr = &(ctx->seqStore);
+    optState_t* optStatePtr = &(ctx->optState);
     const BYTE* const istart = (const BYTE*)src;
     const BYTE* ip = istart;
     const BYTE* anchor = istart;
@@ -683,16 +685,16 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
     const U32 mls = ctx->appliedParams.cParams.searchLength;
     const U32 minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4;
 
-    ZSTD_optimal_t* opt = seqStorePtr->priceTable;
-    ZSTD_match_t* matches = seqStorePtr->matchTable;
+    ZSTD_optimal_t* opt = optStatePtr->priceTable;
+    ZSTD_match_t* matches = optStatePtr->matchTable;
     const BYTE* inr;
 
     /* init */
     U32 offset, rep[ZSTD_REP_NUM];
-    { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; }
+    { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=seqStorePtr->rep[i]; }
 
     ctx->nextToUpdate3 = ctx->nextToUpdate;
-    ZSTD_rescaleFreqs(seqStorePtr, (const BYTE*)src, srcSize);
+    ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize);
     ip += (ip==prefixStart);
 
     /* Match Loop */
@@ -726,7 +728,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
                     best_off = i - (ip==anchor);
                     litlen = opt[0].litlen;
                     do {
-                        price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
+                        price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
                         if (mlen > last_pos || price < opt[mlen].price)
                             SET_PRICE(mlen, mlen, i, litlen, price);   /* note : macro modifies last_pos */
                         mlen--;
@@ -756,7 +758,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
             best_mlen = matches[u].len;
             litlen = opt[0].litlen;
             while (mlen <= best_mlen) {
-                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
+                price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
                 if (mlen > last_pos || price < opt[mlen].price)
                     SET_PRICE(mlen, mlen, matches[u].off, litlen, price);
                 mlen++;
@@ -773,12 +775,12 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
             if (opt[cur-1].mlen == 1) {
                 litlen = opt[cur-1].litlen + 1;
                 if (cur > litlen) {
-                    price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-litlen);
+                    price = opt[cur - litlen].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-litlen);
                 } else
-                    price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor);
+                    price = ZSTD_getLiteralPrice(optStatePtr, litlen, anchor);
             } else {
                 litlen = 1;
-                price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1);
+                price = opt[cur - 1].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-1);
             }
 
             if (cur > last_pos || price <= opt[cur].price)
@@ -826,12 +828,12 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
                             if (opt[cur].mlen == 1) {
                                 litlen = opt[cur].litlen;
                                 if (cur > litlen) {
-                                    price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra);
+                                    price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra);
                                 } else
-                                    price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
+                                    price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
                             } else {
                                 litlen = 0;
-                                price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
+                                price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
                             }
 
                             if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
@@ -858,12 +860,12 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
                     if (opt[cur].mlen == 1) {
                         litlen = opt[cur].litlen;
                         if (cur > litlen)
-                            price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra);
+                            price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra);
                         else
-                            price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
+                            price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
                     } else {
                         litlen = 0;
-                        price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra);
+                        price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra);
                     }
 
                     if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
@@ -918,13 +920,13 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
                 if (litLength==0) offset--;
             }
 
-            ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
+            ZSTD_updatePrice(optStatePtr, litLength, anchor, offset, mlen-MINMATCH);
             ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
             anchor = ip = ip + mlen;
     }    }   /* for (cur=0; cur < last_pos; ) */
 
     /* Save reps for next block */
-    { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->repToConfirm[i] = rep[i]; }
+    { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqStorePtr->repToConfirm[i] = rep[i]; }
 
     /* Last Literals */
     {   size_t lastLLSize = iend - anchor;
diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index 0cee01eacb86..8564bc439214 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -1,15 +1,16 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 
 /* ======   Tuning parameters   ====== */
-#define ZSTDMT_NBTHREADS_MAX 128
+#define ZSTDMT_NBTHREADS_MAX 256
+#define ZSTDMT_OVERLAPLOG_DEFAULT 6
 
 
 /* ======   Compiler specifics   ====== */
@@ -73,6 +74,7 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
 
 
 /* =====   Buffer Pool   ===== */
+/* a single Buffer Pool can be invoked from multiple threads in parallel */
 
 typedef struct buffer_s {
     void* start;
@@ -82,6 +84,8 @@ typedef struct buffer_s {
 static const buffer_t g_nullBuffer = { NULL, 0 };
 
 typedef struct ZSTDMT_bufferPool_s {
+    pthread_mutex_t poolMutex;
+    size_t bufferSize;
     unsigned totalBuffers;
     unsigned nbBuffers;
     ZSTD_customMem cMem;
@@ -90,10 +94,15 @@ typedef struct ZSTDMT_bufferPool_s {
 
 static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbThreads, ZSTD_customMem cMem)
 {
-    unsigned const maxNbBuffers = 2*nbThreads + 2;
+    unsigned const maxNbBuffers = 2*nbThreads + 3;
     ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_calloc(
         sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem);
     if (bufPool==NULL) return NULL;
+    if (pthread_mutex_init(&bufPool->poolMutex, NULL)) {
+        ZSTD_free(bufPool, cMem);
+        return NULL;
+    }
+    bufPool->bufferSize = 64 KB;
     bufPool->totalBuffers = maxNbBuffers;
     bufPool->nbBuffers = 0;
     bufPool->cMem = cMem;
@@ -106,6 +115,7 @@ static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
     if (!bufPool) return;   /* compatibility with free on NULL */
     for (u=0; u<bufPool->totalBuffers; u++)
         ZSTD_free(bufPool->bTable[u].start, bufPool->cMem);
+    pthread_mutex_destroy(&bufPool->poolMutex);
     ZSTD_free(bufPool, bufPool->cMem);
 }
 
@@ -116,65 +126,85 @@ static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool)
                             + (bufPool->totalBuffers - 1) * sizeof(buffer_t);
     unsigned u;
     size_t totalBufferSize = 0;
+    pthread_mutex_lock(&bufPool->poolMutex);
     for (u=0; u<bufPool->totalBuffers; u++)
         totalBufferSize += bufPool->bTable[u].size;
+    pthread_mutex_unlock(&bufPool->poolMutex);
 
     return poolSize + totalBufferSize;
 }
 
-/** ZSTDMT_getBuffer() :
- *  assumption : invocation from main thread only ! */
-static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize)
+static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* bufPool, size_t bSize)
 {
-    if (pool->nbBuffers) {   /* try to use an existing buffer */
-        buffer_t const buf = pool->bTable[--(pool->nbBuffers)];
+    bufPool->bufferSize = bSize;
+}
+
+/** ZSTDMT_getBuffer() :
+ *  assumption : bufPool must be valid */
+static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
+{
+    size_t const bSize = bufPool->bufferSize;
+    DEBUGLOG(5, "ZSTDMT_getBuffer");
+    pthread_mutex_lock(&bufPool->poolMutex);
+    if (bufPool->nbBuffers) {   /* try to use an existing buffer */
+        buffer_t const buf = bufPool->bTable[--(bufPool->nbBuffers)];
         size_t const availBufferSize = buf.size;
-        if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize))
+        if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize)) {
             /* large enough, but not too much */
+            pthread_mutex_unlock(&bufPool->poolMutex);
             return buf;
+        }
         /* size conditions not respected : scratch this buffer, create new one */
-        ZSTD_free(buf.start, pool->cMem);
+        DEBUGLOG(5, "existing buffer does not meet size conditions => freeing");
+        ZSTD_free(buf.start, bufPool->cMem);
     }
+    pthread_mutex_unlock(&bufPool->poolMutex);
     /* create new buffer */
+    DEBUGLOG(5, "create a new buffer");
     {   buffer_t buffer;
-        void* const start = ZSTD_malloc(bSize, pool->cMem);
-        if (start==NULL) bSize = 0;
+        void* const start = ZSTD_malloc(bSize, bufPool->cMem);
         buffer.start = start;   /* note : start can be NULL if malloc fails ! */
-        buffer.size = bSize;
+        buffer.size = (start==NULL) ? 0 : bSize;
         return buffer;
     }
 }
 
 /* store buffer for later re-use, up to pool capacity */
-static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* pool, buffer_t buf)
+static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
 {
-    if (buf.start == NULL) return;   /* release on NULL */
-    if (pool->nbBuffers < pool->totalBuffers) {
-        pool->bTable[pool->nbBuffers++] = buf;   /* store for later re-use */
+    if (buf.start == NULL) return;   /* compatible with release on NULL */
+    DEBUGLOG(5, "ZSTDMT_releaseBuffer");
+    pthread_mutex_lock(&bufPool->poolMutex);
+    if (bufPool->nbBuffers < bufPool->totalBuffers) {
+        bufPool->bTable[bufPool->nbBuffers++] = buf;  /* stored for later use */
+        pthread_mutex_unlock(&bufPool->poolMutex);
         return;
     }
+    pthread_mutex_unlock(&bufPool->poolMutex);
     /* Reached bufferPool capacity (should not happen) */
-    ZSTD_free(buf.start, pool->cMem);
+    DEBUGLOG(5, "buffer pool capacity reached => freeing ");
+    ZSTD_free(buf.start, bufPool->cMem);
 }
 
 
 /* =====   CCtx Pool   ===== */
+/* a single CCtx Pool can be invoked from multiple threads in parallel */
 
 typedef struct {
+    pthread_mutex_t poolMutex;
     unsigned totalCCtx;
     unsigned availCCtx;
     ZSTD_customMem cMem;
     ZSTD_CCtx* cctx[1];   /* variable size */
 } ZSTDMT_CCtxPool;
 
-/* assumption : CCtxPool invocation only from main thread */
-
 /* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */
 static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
 {
     unsigned u;
     for (u=0; u<pool->totalCCtx; u++)
         ZSTD_freeCCtx(pool->cctx[u]);  /* note : compatible with free on NULL */
+    pthread_mutex_destroy(&pool->poolMutex);
     ZSTD_free(pool, pool->cMem);
 }
 
@@ -186,6 +216,10 @@ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads,
     ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_calloc(
         sizeof(ZSTDMT_CCtxPool) + (nbThreads-1)*sizeof(ZSTD_CCtx*), cMem);
     if (!cctxPool) return NULL;
+    if (pthread_mutex_init(&cctxPool->poolMutex, NULL)) {
+        ZSTD_free(cctxPool, cMem);
+        return NULL;
+    }
     cctxPool->cMem = cMem;
     cctxPool->totalCCtx = nbThreads;
     cctxPool->availCCtx = 1;   /* at least one cctx for single-thread mode */
@@ -198,50 +232,57 @@ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads,
 /* only works during initialization phase, not during compression */
 static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
 {
-    unsigned const nbThreads = cctxPool->totalCCtx;
-    size_t const poolSize = sizeof(*cctxPool)
-                            + (nbThreads-1)*sizeof(ZSTD_CCtx*);
-    unsigned u;
-    size_t totalCCtxSize = 0;
-    for (u=0; u<nbThreads; u++)
-        totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctx[u]);
-
-    return poolSize + totalCCtxSize;
+    pthread_mutex_lock(&cctxPool->poolMutex);
+    {   unsigned const nbThreads = cctxPool->totalCCtx;
+        size_t const poolSize = sizeof(*cctxPool)
+                                + (nbThreads-1)*sizeof(ZSTD_CCtx*);
+        unsigned u;
+        size_t totalCCtxSize = 0;
+        for (u=0; u<nbThreads; u++) {
+            totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctx[u]);
+        }
+        pthread_mutex_unlock(&cctxPool->poolMutex);
+        return poolSize + totalCCtxSize;
+    }
 }
 
-static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* pool)
+static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* cctxPool)
 {
-    if (pool->availCCtx) {
-        pool->availCCtx--;
-        return pool->cctx[pool->availCCtx];
-    }
-    return ZSTD_createCCtx();   /* note : can be NULL, when creation fails ! */
+    DEBUGLOG(5, "ZSTDMT_getCCtx");
+    pthread_mutex_lock(&cctxPool->poolMutex);
+    if (cctxPool->availCCtx) {
+        cctxPool->availCCtx--;
+        {   ZSTD_CCtx* const cctx = cctxPool->cctx[cctxPool->availCCtx];
+            pthread_mutex_unlock(&cctxPool->poolMutex);
+            return cctx;
+    }   }
+    pthread_mutex_unlock(&cctxPool->poolMutex);
+    DEBUGLOG(5, "create one more CCtx");
+    return ZSTD_createCCtx_advanced(cctxPool->cMem);   /* note : can be NULL, when creation fails ! */
 }
 
 static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
 {
     if (cctx==NULL) return;   /* compatibility with release on NULL */
+    pthread_mutex_lock(&pool->poolMutex);
     if (pool->availCCtx < pool->totalCCtx)
         pool->cctx[pool->availCCtx++] = cctx;
-    else
+    else {
         /* pool overflow : should not happen, since totalCCtx==nbThreads */
+        DEBUGLOG(5, "CCtx pool overflow : free cctx");
         ZSTD_freeCCtx(cctx);
+    }
+    pthread_mutex_unlock(&pool->poolMutex);
 }
 
 
 /* =====   Thread worker   ===== */
 
 typedef struct {
-    buffer_t buffer;
-    size_t filled;
-} inBuff_t;
-
-typedef struct {
-    ZSTD_CCtx* cctx;
     buffer_t src;
     const void* srcStart;
-    size_t   srcSize;
     size_t   dictSize;
+    size_t   srcSize;
     buffer_t dstBuff;
     size_t   cSize;
     size_t   dstFlushed;
@@ -253,6 +294,8 @@ typedef struct {
     pthread_cond_t* jobCompleted_cond;
     ZSTD_parameters params;
     const ZSTD_CDict* cdict;
+    ZSTDMT_CCtxPool* cctxPool;
+    ZSTDMT_bufferPool* bufPool;
     unsigned long long fullFrameSize;
 } ZSTDMT_jobDescription;
 
@@ -260,37 +303,56 @@ typedef struct {
 void ZSTDMT_compressChunk(void* jobDescription)
 {
     ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription;
+    ZSTD_CCtx* cctx = ZSTDMT_getCCtx(job->cctxPool);
     const void* const src = (const char*)job->srcStart + job->dictSize;
-    buffer_t const dstBuff = job->dstBuff;
+    buffer_t dstBuff = job->dstBuff;
     DEBUGLOG(5, "job (first:%u) (last:%u) : dictSize %u, srcSize %u",
                  job->firstChunk, job->lastChunk, (U32)job->dictSize, (U32)job->srcSize);
+
+    if (cctx==NULL) {
+        job->cSize = ERROR(memory_allocation);
+        goto _endJob;
+    }
+
+    if (dstBuff.start == NULL) {
+        dstBuff = ZSTDMT_getBuffer(job->bufPool);
+        if (dstBuff.start==NULL) {
+            job->cSize = ERROR(memory_allocation);
+            goto _endJob;
+        }
+        job->dstBuff = dstBuff;
+    }
+
     if (job->cdict) {  /* should only happen for first segment */
-        size_t const initError = ZSTD_compressBegin_usingCDict_advanced(job->cctx, job->cdict, job->params.fParams, job->fullFrameSize);
+        size_t const initError = ZSTD_compressBegin_usingCDict_advanced(cctx, job->cdict, job->params.fParams, job->fullFrameSize);
         DEBUGLOG(5, "using CDict");
         if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
     } else {  /* srcStart points at reloaded section */
         if (!job->firstChunk) job->params.fParams.contentSizeFlag = 0;  /* ensure no srcSize control */
-        {   size_t const dictModeError = ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceRawDict, 1);  /* Force loading dictionary in "content-only" mode (no header analysis) */
-            size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->srcStart, job->dictSize, job->params, job->fullFrameSize);
+        {   size_t const dictModeError = ZSTD_setCCtxParameter(cctx, ZSTD_p_forceRawDict, 1);  /* Force loading dictionary in "content-only" mode (no header analysis) */
+            size_t const initError = ZSTD_compressBegin_advanced(cctx, job->srcStart, job->dictSize, job->params, job->fullFrameSize);
             if (ZSTD_isError(initError) || ZSTD_isError(dictModeError)) { job->cSize = initError; goto _endJob; }
-            ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceWindow, 1);
+            ZSTD_setCCtxParameter(cctx, ZSTD_p_forceWindow, 1);
     }   }
     if (!job->firstChunk) {  /* flush and overwrite frame header when it's not first segment */
-        size_t const hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, 0);
+        size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.size, src, 0);
         if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; }
-        ZSTD_invalidateRepCodes(job->cctx);
+        ZSTD_invalidateRepCodes(cctx);
     }
 
     DEBUGLOG(5, "Compressing : ");
     DEBUG_PRINTHEX(4, job->srcStart, 12);
     job->cSize = (job->lastChunk) ?
-                 ZSTD_compressEnd     (job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize) :
-                 ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize);
+                 ZSTD_compressEnd     (cctx, dstBuff.start, dstBuff.size, src, job->srcSize) :
+                 ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.size, src, job->srcSize);
     DEBUGLOG(5, "compressed %u bytes into %u bytes   (first:%u) (last:%u)",
                 (unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk);
     DEBUGLOG(5, "dstBuff.size : %u ; => %s", (U32)dstBuff.size, ZSTD_getErrorName(job->cSize));
 
 _endJob:
+    ZSTDMT_releaseCCtx(job->cctxPool, cctx);
+    ZSTDMT_releaseBuffer(job->bufPool, job->src);
+    job->src = g_nullBuffer; job->srcStart = NULL;
     PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex);
     job->jobCompleted = 1;
     job->jobScanned = 0;
@@ -303,15 +365,19 @@ void ZSTDMT_compressChunk(void* jobDescription)
 /* =====   Multi-threaded compression   ===== */
 /* ------------------------------------------ */
 
+typedef struct {
+    buffer_t buffer;
+    size_t filled;
+} inBuff_t;
+
 struct ZSTDMT_CCtx_s {
     POOL_ctx* factory;
     ZSTDMT_jobDescription* jobs;
-    ZSTDMT_bufferPool* buffPool;
+    ZSTDMT_bufferPool* bufPool;
     ZSTDMT_CCtxPool* cctxPool;
     pthread_mutex_t jobCompleted_mutex;
     pthread_cond_t jobCompleted_cond;
     size_t targetSectionSize;
-    size_t marginSize;
     size_t inBuffSize;
     size_t dictSize;
     size_t targetDictSize;
@@ -324,7 +390,7 @@ struct ZSTDMT_CCtx_s {
     unsigned nextJobID;
     unsigned frameEnded;
     unsigned allJobsCompleted;
-    unsigned overlapRLog;
+    unsigned overlapLog;
     unsigned long long frameContentSize;
     size_t sectionSize;
     ZSTD_customMem cMem;
@@ -347,7 +413,8 @@ ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads, ZSTD_customMem cMem)
     U32 nbJobs = nbThreads + 2;
     DEBUGLOG(3, "ZSTDMT_createCCtx_advanced");
 
-    if ((nbThreads < 1) | (nbThreads > ZSTDMT_NBTHREADS_MAX)) return NULL;
+    if (nbThreads < 1) return NULL;
+    nbThreads = MIN(nbThreads , ZSTDMT_NBTHREADS_MAX);
     if ((cMem.customAlloc!=NULL) ^ (cMem.customFree!=NULL))
         /* invalid custom allocator */
         return NULL;
@@ -358,18 +425,24 @@ ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads, ZSTD_customMem cMem)
     mtctx->nbThreads = nbThreads;
     mtctx->allJobsCompleted = 1;
     mtctx->sectionSize = 0;
-    mtctx->overlapRLog = 3;
-    mtctx->factory = POOL_create(nbThreads, 1);
+    mtctx->overlapLog = ZSTDMT_OVERLAPLOG_DEFAULT;
+    mtctx->factory = POOL_create(nbThreads, 0);
     mtctx->jobs = ZSTDMT_allocJobsTable(&nbJobs, cMem);
     mtctx->jobIDMask = nbJobs - 1;
-    mtctx->buffPool = ZSTDMT_createBufferPool(nbThreads, cMem);
+    mtctx->bufPool = ZSTDMT_createBufferPool(nbThreads, cMem);
     mtctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads, cMem);
-    if (!mtctx->factory | !mtctx->jobs | !mtctx->buffPool | !mtctx->cctxPool) {
+    if (!mtctx->factory | !mtctx->jobs | !mtctx->bufPool | !mtctx->cctxPool) {
+        ZSTDMT_freeCCtx(mtctx);
+        return NULL;
+    }
+    if (pthread_mutex_init(&mtctx->jobCompleted_mutex, NULL)) {
+        ZSTDMT_freeCCtx(mtctx);
+        return NULL;
+    }
+    if (pthread_cond_init(&mtctx->jobCompleted_cond, NULL)) {
         ZSTDMT_freeCCtx(mtctx);
         return NULL;
     }
-    pthread_mutex_init(&mtctx->jobCompleted_mutex, NULL);   /* Todo : check init function return */
-    pthread_cond_init(&mtctx->jobCompleted_cond, NULL);
     DEBUGLOG(3, "mt_cctx created, for %u threads", nbThreads);
     return mtctx;
 }
@@ -386,15 +459,13 @@ static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
     unsigned jobID;
     DEBUGLOG(3, "ZSTDMT_releaseAllJobResources");
     for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) {
-        ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].dstBuff);
+        ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
         mtctx->jobs[jobID].dstBuff = g_nullBuffer;
-        ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].src);
+        ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].src);
         mtctx->jobs[jobID].src = g_nullBuffer;
-        ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[jobID].cctx);
-        mtctx->jobs[jobID].cctx = NULL;
     }
     memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription));
-    ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->inBuff.buffer);
+    ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->inBuff.buffer);
     mtctx->inBuff.buffer = g_nullBuffer;
     mtctx->allJobsCompleted = 1;
 }
@@ -404,7 +475,7 @@ size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx)
     if (mtctx==NULL) return 0;   /* compatible with free on NULL */
     POOL_free(mtctx->factory);
     if (!mtctx->allJobsCompleted) ZSTDMT_releaseAllJobResources(mtctx); /* stop workers first */
-    ZSTDMT_freeBufferPool(mtctx->buffPool);  /* release job resources into pools first */
+    ZSTDMT_freeBufferPool(mtctx->bufPool);  /* release job resources into pools first */
     ZSTD_free(mtctx->jobs, mtctx->cMem);
     ZSTDMT_freeCCtxPool(mtctx->cctxPool);
     ZSTD_freeCDict(mtctx->cdictLocal);
@@ -418,11 +489,11 @@ size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx)
 {
     if (mtctx == NULL) return 0;   /* supports sizeof NULL */
     return sizeof(*mtctx)
-        + POOL_sizeof(mtctx->factory)
-        + ZSTDMT_sizeof_bufferPool(mtctx->buffPool)
-        + (mtctx->jobIDMask+1) * sizeof(ZSTDMT_jobDescription)
-        + ZSTDMT_sizeof_CCtxPool(mtctx->cctxPool)
-        + ZSTD_sizeof_CDict(mtctx->cdictLocal);
+            + POOL_sizeof(mtctx->factory)
+            + ZSTDMT_sizeof_bufferPool(mtctx->bufPool)
+            + (mtctx->jobIDMask+1) * sizeof(ZSTDMT_jobDescription)
+            + ZSTDMT_sizeof_CCtxPool(mtctx->cctxPool)
+            + ZSTD_sizeof_CDict(mtctx->cdictLocal);
 }
 
 size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value)
@@ -434,10 +505,10 @@ size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter,
         return 0;
     case ZSTDMT_p_overlapSectionLog :
         DEBUGLOG(5, "ZSTDMT_p_overlapSectionLog : %u", value);
-        mtctx->overlapRLog = (value >= 9) ? 0 : 9 - value;
+        mtctx->overlapLog = (value >= 9) ? 9 : value;
         return 0;
     default :
-        return ERROR(compressionParameter_unsupported);
+        return ERROR(parameter_unsupported);
     }
 }
 
@@ -459,12 +530,13 @@ static unsigned computeNbChunks(size_t srcSize, unsigned windowLog, unsigned nbT
 
 
 size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
-                           void* dst, size_t dstCapacity,
-                     const void* src, size_t srcSize,
-                     const ZSTD_CDict* cdict,
-                           ZSTD_parameters const params,
-                           unsigned overlapRLog)
+                               void* dst, size_t dstCapacity,
+                         const void* src, size_t srcSize,
+                         const ZSTD_CDict* cdict,
+                               ZSTD_parameters const params,
+                               unsigned overlapLog)
 {
+    unsigned const overlapRLog = (overlapLog>9) ? 0 : 9-overlapLog;
     size_t const overlapSize = (overlapRLog>=9) ? 0 : (size_t)1 << (params.cParams.windowLog - overlapRLog);
     unsigned nbChunks = computeNbChunks(srcSize, params.cParams.windowLog, mtctx->nbThreads);
     size_t const proposedChunkSize = (srcSize + (nbChunks-1)) / nbChunks;
@@ -473,6 +545,7 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
     size_t remainingSrcSize = srcSize;
     unsigned const compressWithinDst = (dstCapacity >= ZSTD_compressBound(srcSize)) ? nbChunks : (unsigned)(dstCapacity / ZSTD_compressBound(avgChunkSize));  /* presumes avgChunkSize >= 256 KB, which should be the case */
     size_t frameStartPos = 0, dstBufferPos = 0;
+    XXH64_state_t xxh64;
 
     DEBUGLOG(4, "nbChunks  : %2u   (chunkSize : %u bytes)   ", nbChunks, (U32)avgChunkSize);
     if (nbChunks==1) {   /* fallback to single-thread mode */
@@ -480,7 +553,9 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
         if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, params.fParams);
         return ZSTD_compress_advanced(cctx, dst, dstCapacity, src, srcSize, NULL, 0, params);
     }
-    assert(avgChunkSize >= 256 KB);  /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), which is useful to avoid allocating extra buffers */
+    assert(avgChunkSize >= 256 KB);  /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), which is required for compressWithinDst */
+    ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgChunkSize) );
+    XXH64_reset(&xxh64, 0);
 
     if (nbChunks > mtctx->jobIDMask+1) {  /* enlarge job table */
         U32 nbJobs = nbChunks;
@@ -496,17 +571,10 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
             size_t const chunkSize = MIN(remainingSrcSize, avgChunkSize);
             size_t const dstBufferCapacity = ZSTD_compressBound(chunkSize);
             buffer_t const dstAsBuffer = { (char*)dst + dstBufferPos, dstBufferCapacity };
-            buffer_t const dstBuffer = u < compressWithinDst ? dstAsBuffer : ZSTDMT_getBuffer(mtctx->buffPool, dstBufferCapacity);
-            ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(mtctx->cctxPool);
+            buffer_t const dstBuffer = u < compressWithinDst ? dstAsBuffer : g_nullBuffer;
             size_t dictSize = u ? overlapSize : 0;
 
-            if ((cctx==NULL) || (dstBuffer.start==NULL)) {
-                mtctx->jobs[u].cSize = ERROR(memory_allocation);   /* job result */
-                mtctx->jobs[u].jobCompleted = 1;
-                nbChunks = u+1;   /* only wait and free u jobs, instead of initially expected nbChunks ones */
-                break;   /* let's wait for previous jobs to complete, but don't start new ones */
-            }
-
+            mtctx->jobs[u].src = g_nullBuffer;
             mtctx->jobs[u].srcStart = srcStart + frameStartPos - dictSize;
             mtctx->jobs[u].dictSize = dictSize;
             mtctx->jobs[u].srcSize = chunkSize;
@@ -516,13 +584,18 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
             /* do not calculate checksum within sections, but write it in header for first section */
             if (u!=0) mtctx->jobs[u].params.fParams.checksumFlag = 0;
             mtctx->jobs[u].dstBuff = dstBuffer;
-            mtctx->jobs[u].cctx = cctx;
+            mtctx->jobs[u].cctxPool = mtctx->cctxPool;
+            mtctx->jobs[u].bufPool = mtctx->bufPool;
             mtctx->jobs[u].firstChunk = (u==0);
             mtctx->jobs[u].lastChunk = (u==nbChunks-1);
             mtctx->jobs[u].jobCompleted = 0;
             mtctx->jobs[u].jobCompleted_mutex = &mtctx->jobCompleted_mutex;
             mtctx->jobs[u].jobCompleted_cond = &mtctx->jobCompleted_cond;
 
+            if (params.fParams.checksumFlag) {
+                XXH64_update(&xxh64, srcStart + frameStartPos, chunkSize);
+            }
+
             DEBUGLOG(5, "posting job %u   (%u bytes)", u, (U32)chunkSize);
             DEBUG_PRINTHEX(6, mtctx->jobs[u].srcStart, 12);
             POOL_add(mtctx->factory, ZSTDMT_compressChunk, &mtctx->jobs[u]);
@@ -533,8 +606,8 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
     }   }
 
     /* collect result */
-    {   unsigned chunkID;
-        size_t error = 0, dstPos = 0;
+    {   size_t error = 0, dstPos = 0;
+        unsigned chunkID;
         for (chunkID=0; chunkID<nbChunks; chunkID++) {
             DEBUGLOG(5, "waiting for chunk %u ", chunkID);
             PTHREAD_MUTEX_LOCK(&mtctx->jobCompleted_mutex);
@@ -545,8 +618,6 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
             pthread_mutex_unlock(&mtctx->jobCompleted_mutex);
             DEBUGLOG(5, "ready to write chunk %u ", chunkID);
 
-            ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[chunkID].cctx);
-            mtctx->jobs[chunkID].cctx = NULL;
             mtctx->jobs[chunkID].srcStart = NULL;
             {   size_t const cSize = mtctx->jobs[chunkID].cSize;
                 if (ZSTD_isError(cSize)) error = cSize;
@@ -556,13 +627,25 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
                         memmove((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize);  /* may overlap when chunk compressed within dst */
                     if (chunkID >= compressWithinDst) {  /* chunk compressed into its own buffer, which must be released */
                         DEBUGLOG(5, "releasing buffer %u>=%u", chunkID, compressWithinDst);
-                        ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[chunkID].dstBuff);
+                        ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[chunkID].dstBuff);
                     }
                     mtctx->jobs[chunkID].dstBuff = g_nullBuffer;
                 }
                 dstPos += cSize ;
             }
-        }
+        }  /* for (chunkID=0; chunkID<nbChunks; chunkID++) */
+
+        DEBUGLOG(4, "checksumFlag : %u ", params.fParams.checksumFlag);
+        if (params.fParams.checksumFlag) {
+            U32 const checksum = (U32)XXH64_digest(&xxh64);
+            if (dstPos + 4 > dstCapacity) {
+                error = ERROR(dstSize_tooSmall);
+            } else {
+                DEBUGLOG(4, "writing checksum : %08X \n", checksum);
+                MEM_writeLE32((char*)dst + dstPos, checksum);
+                dstPos += 4;
+        }   }
+
         if (!error) DEBUGLOG(4, "compressed size : %u  ", (U32)dstPos);
         return error ? error : dstPos;
     }
@@ -574,10 +657,10 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
                      const void* src, size_t srcSize,
                            int compressionLevel)
 {
-    U32 const overlapRLog = (compressionLevel >= ZSTD_maxCLevel()) ? 0 : 3;
+    U32 const overlapLog = (compressionLevel >= ZSTD_maxCLevel()) ? 9 : ZSTDMT_OVERLAPLOG_DEFAULT;
     ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
     params.fParams.contentSizeFlag = 1;
-    return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapRLog);
+    return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapLog);
 }
 
 
@@ -615,8 +698,8 @@ size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
     if (zcs->nbThreads==1) {
         DEBUGLOG(4, "single thread mode");
         return ZSTD_initCStream_internal(zcs->cctxPool->cctx[0],
-                                dict, dictSize, cdict,
-                                params, pledgedSrcSize);
+                                        dict, dictSize, cdict,
+                                        params, pledgedSrcSize);
     }
 
     if (zcs->allJobsCompleted == 0) {   /* previous compression not correctly finished */
@@ -642,18 +725,16 @@ size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
         zcs->cdict = cdict;
     }
 
-    zcs->targetDictSize = (zcs->overlapRLog>=9) ? 0 : (size_t)1 << (zcs->params.cParams.windowLog - zcs->overlapRLog);
-    DEBUGLOG(4, "overlapRLog : %u ", zcs->overlapRLog);
+    zcs->targetDictSize = (zcs->overlapLog==0) ? 0 : (size_t)1 << (zcs->params.cParams.windowLog - (9 - zcs->overlapLog));
+    DEBUGLOG(4, "overlapLog : %u ", zcs->overlapLog);
     DEBUGLOG(4, "overlap Size : %u KB", (U32)(zcs->targetDictSize>>10));
     zcs->targetSectionSize = zcs->sectionSize ? zcs->sectionSize : (size_t)1 << (zcs->params.cParams.windowLog + 2);
     zcs->targetSectionSize = MAX(ZSTDMT_SECTION_SIZE_MIN, zcs->targetSectionSize);
     zcs->targetSectionSize = MAX(zcs->targetDictSize, zcs->targetSectionSize);
     DEBUGLOG(4, "Section Size : %u KB", (U32)(zcs->targetSectionSize>>10));
-    zcs->marginSize = zcs->targetSectionSize >> 2;
-    zcs->inBuffSize = zcs->targetDictSize + zcs->targetSectionSize + zcs->marginSize;
-    zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize);
-    if (zcs->inBuff.buffer.start == NULL) return ERROR(memory_allocation);
-    zcs->inBuff.filled = 0;
+    zcs->inBuffSize = zcs->targetDictSize + zcs->targetSectionSize;
+    ZSTDMT_setBufferSize(zcs->bufPool, MAX(zcs->inBuffSize, ZSTD_compressBound(zcs->targetSectionSize)) );
+    zcs->inBuff.buffer = g_nullBuffer;
     zcs->dictSize = 0;
     zcs->doneJobID = 0;
     zcs->nextJobID = 0;
@@ -664,8 +745,9 @@ size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
 }
 
 size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
-                                const void* dict, size_t dictSize,
-                                ZSTD_parameters params, unsigned long long pledgedSrcSize)
+                             const void* dict, size_t dictSize,
+                                   ZSTD_parameters params,
+                                   unsigned long long pledgedSrcSize)
 {
     DEBUGLOG(5, "ZSTDMT_initCStream_advanced");
     return ZSTDMT_initCStream_internal(mtctx, dict, dictSize, NULL, params, pledgedSrcSize);
@@ -701,19 +783,8 @@ size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) {
 
 static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsigned endFrame)
 {
-    size_t const dstBufferCapacity = ZSTD_compressBound(srcSize);
-    buffer_t const dstBuffer = ZSTDMT_getBuffer(zcs->buffPool, dstBufferCapacity);
-    ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(zcs->cctxPool);
     unsigned const jobID = zcs->nextJobID & zcs->jobIDMask;
 
-    if ((cctx==NULL) || (dstBuffer.start==NULL)) {
-        zcs->jobs[jobID].jobCompleted = 1;
-        zcs->nextJobID++;
-        ZSTDMT_waitForAllJobsCompleted(zcs);
-        ZSTDMT_releaseAllJobResources(zcs);
-        return ERROR(memory_allocation);
-    }
-
     DEBUGLOG(4, "preparing job %u to compress %u bytes with %u preload ",
                 zcs->nextJobID, (U32)srcSize, (U32)zcs->dictSize);
     zcs->jobs[jobID].src = zcs->inBuff.buffer;
@@ -726,8 +797,9 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
     if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0;
     zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL;
     zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize;
-    zcs->jobs[jobID].dstBuff = dstBuffer;
-    zcs->jobs[jobID].cctx = cctx;
+    zcs->jobs[jobID].dstBuff = g_nullBuffer;
+    zcs->jobs[jobID].cctxPool = zcs->cctxPool;
+    zcs->jobs[jobID].bufPool = zcs->bufPool;
     zcs->jobs[jobID].firstChunk = (zcs->nextJobID==0);
     zcs->jobs[jobID].lastChunk = endFrame;
     zcs->jobs[jobID].jobCompleted = 0;
@@ -735,11 +807,13 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
     zcs->jobs[jobID].jobCompleted_mutex = &zcs->jobCompleted_mutex;
     zcs->jobs[jobID].jobCompleted_cond = &zcs->jobCompleted_cond;
 
+    if (zcs->params.fParams.checksumFlag)
+        XXH64_update(&zcs->xxhState, (const char*)zcs->inBuff.buffer.start + zcs->dictSize, srcSize);
+
     /* get a new buffer for next input */
     if (!endFrame) {
         size_t const newDictSize = MIN(srcSize + zcs->dictSize, zcs->targetDictSize);
-        DEBUGLOG(5, "ZSTDMT_createCompressionJob::endFrame = %u", endFrame);
-        zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize);
+        zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->bufPool);
         if (zcs->inBuff.buffer.start == NULL) {   /* not enough memory to allocate next input buffer */
             zcs->jobs[jobID].jobCompleted = 1;
             zcs->nextJobID++;
@@ -747,26 +821,20 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
             ZSTDMT_releaseAllJobResources(zcs);
             return ERROR(memory_allocation);
         }
-        DEBUGLOG(5, "inBuff currently filled to %u", (U32)zcs->inBuff.filled);
         zcs->inBuff.filled -= srcSize + zcs->dictSize - newDictSize;
-        DEBUGLOG(5, "new job : inBuff filled to %u, with %u dict and %u src",
-                    (U32)zcs->inBuff.filled, (U32)newDictSize,
-                    (U32)(zcs->inBuff.filled - newDictSize));
         memmove(zcs->inBuff.buffer.start,
             (const char*)zcs->jobs[jobID].srcStart + zcs->dictSize + srcSize - newDictSize,
             zcs->inBuff.filled);
-        DEBUGLOG(5, "new inBuff pre-filled");
         zcs->dictSize = newDictSize;
     } else {   /* if (endFrame==1) */
-        DEBUGLOG(5, "ZSTDMT_createCompressionJob::endFrame = %u", endFrame);
         zcs->inBuff.buffer = g_nullBuffer;
         zcs->inBuff.filled = 0;
         zcs->dictSize = 0;
         zcs->frameEnded = 1;
-        if (zcs->nextJobID == 0)
+        if (zcs->nextJobID == 0) {
             /* single chunk exception : checksum is calculated directly within worker thread */
             zcs->params.fParams.checksumFlag = 0;
-    }
+    }   }
 
     DEBUGLOG(4, "posting job %u : %u bytes  (end:%u) (note : doneJob = %u=>%u)",
                 zcs->nextJobID,
@@ -804,11 +872,8 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi
                 ZSTDMT_releaseAllJobResources(zcs);
                 return job.cSize;
             }
-            ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx);
-            zcs->jobs[wJobID].cctx = NULL;
             DEBUGLOG(5, "zcs->params.fParams.checksumFlag : %u ", zcs->params.fParams.checksumFlag);
             if (zcs->params.fParams.checksumFlag) {
-                XXH64_update(&zcs->xxhState, (const char*)job.srcStart + job.dictSize, job.srcSize);
                 if (zcs->frameEnded && (zcs->doneJobID+1 == zcs->nextJobID)) {  /* write checksum at end of last section */
                     U32 const checksum = (U32)XXH64_digest(&zcs->xxhState);
                     DEBUGLOG(5, "writing checksum : %08X \n", checksum);
@@ -816,9 +881,6 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi
                     job.cSize += 4;
                     zcs->jobs[wJobID].cSize += 4;
             }   }
-            ZSTDMT_releaseBuffer(zcs->buffPool, job.src);
-            zcs->jobs[wJobID].srcStart = NULL;
-            zcs->jobs[wJobID].src = g_nullBuffer;
             zcs->jobs[wJobID].jobScanned = 1;
         }
         {   size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos);
@@ -828,7 +890,7 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi
             job.dstFlushed += toWrite;
         }
         if (job.dstFlushed == job.cSize) {   /* output buffer fully flushed => move to next one */
-            ZSTDMT_releaseBuffer(zcs->buffPool, job.dstBuff);
+            ZSTDMT_releaseBuffer(zcs->bufPool, job.dstBuff);
             zcs->jobs[wJobID].dstBuff = g_nullBuffer;
             zcs->jobs[wJobID].jobCompleted = 0;
             zcs->doneJobID++;
@@ -852,18 +914,18 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
                                      ZSTD_inBuffer* input,
                                      ZSTD_EndDirective endOp)
 {
-    size_t const newJobThreshold = mtctx->dictSize + mtctx->targetSectionSize + mtctx->marginSize;
+    size_t const newJobThreshold = mtctx->dictSize + mtctx->targetSectionSize;
     assert(output->pos <= output->size);
     assert(input->pos  <= input->size);
     if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) {
         /* current frame being ended. Only flush/end are allowed. Or start new frame with init */
         return ERROR(stage_wrong);
     }
-    if (mtctx->nbThreads==1) {
+    if (mtctx->nbThreads==1) {  /* delegate to single-thread (synchronous) */
         return ZSTD_compressStream_generic(mtctx->cctxPool->cctx[0], output, input, endOp);
     }
 
-    /* single-pass shortcut (note : this is blocking-mode) */
+    /* single-pass shortcut (note : this is synchronous-mode) */
     if ( (mtctx->nextJobID==0)      /* just started */
       && (mtctx->inBuff.filled==0)  /* nothing buffered */
       && (endOp==ZSTD_e_end)        /* end order */
@@ -871,24 +933,29 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
         size_t const cSize = ZSTDMT_compress_advanced(mtctx,
                 (char*)output->dst + output->pos, output->size - output->pos,
                 (const char*)input->src + input->pos, input->size - input->pos,
-                mtctx->cdict, mtctx->params, mtctx->overlapRLog);
+                mtctx->cdict, mtctx->params, mtctx->overlapLog);
         if (ZSTD_isError(cSize)) return cSize;
         input->pos = input->size;
         output->pos += cSize;
-        ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->inBuff.buffer);  /* was allocated in initStream */
+        ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->inBuff.buffer);  /* was allocated in initStream */
         mtctx->allJobsCompleted = 1;
         mtctx->frameEnded = 1;
         return 0;
     }
 
     /* fill input buffer */
-    if ((input->src) && (mtctx->inBuff.buffer.start)) {   /* support NULL input */
-        size_t const toLoad = MIN(input->size - input->pos, mtctx->inBuffSize - mtctx->inBuff.filled);
-        DEBUGLOG(2, "inBuff:%08X;  inBuffSize=%u;  ToCopy=%u", (U32)(size_t)mtctx->inBuff.buffer.start, (U32)mtctx->inBuffSize, (U32)toLoad);
-        memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, toLoad);
-        input->pos += toLoad;
-        mtctx->inBuff.filled += toLoad;
-    }
+    if (input->size > input->pos) {   /* support NULL input */
+        if (mtctx->inBuff.buffer.start == NULL) {
+            mtctx->inBuff.buffer = ZSTDMT_getBuffer(mtctx->bufPool);
+            if (mtctx->inBuff.buffer.start == NULL) return ERROR(memory_allocation);
+            mtctx->inBuff.filled = 0;
+        }
+        {   size_t const toLoad = MIN(input->size - input->pos, mtctx->inBuffSize - mtctx->inBuff.filled);
+            DEBUGLOG(5, "inBuff:%08X;  inBuffSize=%u;  ToCopy=%u", (U32)(size_t)mtctx->inBuff.buffer.start, (U32)mtctx->inBuffSize, (U32)toLoad);
+            memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, toLoad);
+            input->pos += toLoad;
+            mtctx->inBuff.filled += toLoad;
+    }   }
 
     if ( (mtctx->inBuff.filled >= newJobThreshold)  /* filled enough : let's compress */
       && (mtctx->nextJobID <= mtctx->doneJobID + mtctx->jobIDMask) ) {   /* avoid overwriting job round buffer */
diff --git a/lib/compress/zstdmt_compress.h b/lib/compress/zstdmt_compress.h
index fad63b6d8610..0f0fc2b03fc6 100644
--- a/lib/compress/zstdmt_compress.h
+++ b/lib/compress/zstdmt_compress.h
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
  #ifndef ZSTDMT_COMPRESS_H
@@ -15,10 +15,11 @@
  #endif
 
 
-/* Note : All prototypes defined in this file are labelled experimental.
- *        No guarantee of API continuity is provided on any of them.
- *        In fact, the expectation is that these prototypes will be replaced
- *        by ZSTD_compress_generic() API in the near future */
+/* Note : This is an internal API.
+ *        Some methods are still exposed (ZSTDLIB_API),
+ *        because it used to be the only way to invoke MT compression.
+ *        Now, it's recommended to use ZSTD_compress_generic() instead.
+ *        These methods will stop being exposed in a future version */
 
 /* ===   Dependencies   === */
 #include <stddef.h>                /* size_t */
@@ -67,7 +68,7 @@ ZSTDLIB_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
                                      const void* src, size_t srcSize,
                                      const ZSTD_CDict* cdict,
                                            ZSTD_parameters const params,
-                                           unsigned overlapRLog);
+                                           unsigned overlapLog);
 
 ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
                                         const void* dict, size_t dictSize,   /* dict can be released after init, a local copy is preserved within zcs */
diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c
index 2a1b70ea5ef2..79ded96bf613 100644
--- a/lib/decompress/huf_decompress.c
+++ b/lib/decompress/huf_decompress.c
@@ -32,38 +32,22 @@
     - Public forum : https://groups.google.com/forum/#!forum/lz4c
 ****************************************************************** */
 
-/* **************************************************************
-*  Compiler specifics
-****************************************************************/
-#ifdef _MSC_VER    /* Visual Studio */
-#  define FORCE_INLINE static __forceinline
-#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
-#else
-#  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
-#    ifdef __GNUC__
-#      define FORCE_INLINE static inline __attribute__((always_inline))
-#    else
-#      define FORCE_INLINE static inline
-#    endif
-#  else
-#    define FORCE_INLINE static
-#  endif /* __STDC_VERSION__ */
-#endif
-
-
 /* **************************************************************
 *  Dependencies
 ****************************************************************/
 #include <string.h>     /* memcpy, memset */
 #include "bitstream.h"  /* BIT_* */
+#include "compiler.h"
 #include "fse.h"        /* header compression */
 #define HUF_STATIC_LINKING_ONLY
 #include "huf.h"
+#include "error_private.h"
 
 
 /* **************************************************************
 *  Error Management
 ****************************************************************/
+#define HUF_isError ERR_isError
 #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
 
 
@@ -180,7 +164,7 @@ static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, con
     if (MEM_64bits()) \
         HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
 
-FORCE_INLINE size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
+HINT_INLINE size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
 {
     BYTE* const pStart = p;
 
@@ -639,7 +623,7 @@ static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DE
     if (MEM_64bits()) \
         ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
 
-FORCE_INLINE size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog)
+HINT_INLINE size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog)
 {
     BYTE* const pStart = p;
 
@@ -917,11 +901,11 @@ static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, qu
 *   Tells which decoder is likely to decode faster,
 *   based on a set of pre-determined metrics.
 *   @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
-*   Assumption : 0 < cSrcSize < dstSize <= 128 KB */
+*   Assumption : 0 < cSrcSize, dstSize <= 128 KB */
 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
 {
     /* decoder timing evaluation */
-    U32 const Q = (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 since dstSize > cSrcSize */
+    U32 const Q = cSrcSize >= dstSize ? 15 : (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 */
     U32 const D256 = (U32)(dstSize >> 8);
     U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
     U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
@@ -977,7 +961,7 @@ size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
 {
     /* validation checks */
     if (dstSize == 0) return ERROR(dstSize_tooSmall);
-    if ((cSrcSize >= dstSize) || (cSrcSize <= 1)) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == 0) return ERROR(corruption_detected);
 
     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
         return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize):
diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c
index 003d703a5eb3..d2bc545e52c5 100644
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 
@@ -53,15 +53,6 @@
 #  include "zstd_legacy.h"
 #endif
 
-#if defined(_MSC_VER) && !defined(_M_IA64)  /* _mm_prefetch() is not defined for ia64 */
-#  include <mmintrin.h>   /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
-#  define ZSTD_PREFETCH(ptr)   _mm_prefetch((const char*)ptr, _MM_HINT_T0)
-#elif defined(__GNUC__)
-#  define ZSTD_PREFETCH(ptr)   __builtin_prefetch(ptr, 0, 0)
-#else
-#  define ZSTD_PREFETCH(ptr)   /* disabled */
-#endif
-
 
 /*-*************************************
 *  Errors
@@ -95,7 +86,7 @@ typedef struct {
     HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)];  /* can accommodate HUF_decompress4X */
     U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
     U32 rep[ZSTD_REP_NUM];
-} ZSTD_entropyTables_t;
+} ZSTD_entropyDTables_t;
 
 struct ZSTD_DCtx_s
 {
@@ -103,7 +94,7 @@ struct ZSTD_DCtx_s
     const FSE_DTable* MLTptr;
     const FSE_DTable* OFTptr;
     const HUF_DTable* HUFptr;
-    ZSTD_entropyTables_t entropy;
+    ZSTD_entropyDTables_t entropy;
     const void* previousDstEnd;   /* detect continuity */
     const void* base;             /* start of current segment */
     const void* vBase;            /* virtual start of previous segment if it was just before current one */
@@ -304,15 +295,18 @@ size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t src
                 return ZSTD_skippableHeaderSize; /* magic number + frame length */
             memset(zfhPtr, 0, sizeof(*zfhPtr));
             zfhPtr->frameContentSize = MEM_readLE32((const char *)src + 4);
-            zfhPtr->windowSize = 0; /* windowSize==0 means a frame is skippable */
+            zfhPtr->frameType = ZSTD_skippableFrame;
+            zfhPtr->windowSize = 0;
             return 0;
         }
         return ERROR(prefix_unknown);
     }
 
     /* ensure there is enough `srcSize` to fully read/decode frame header */
-    { size_t const fhsize = ZSTD_frameHeaderSize(src, srcSize);
-      if (srcSize < fhsize) return fhsize; }
+    {   size_t const fhsize = ZSTD_frameHeaderSize(src, srcSize);
+        if (srcSize < fhsize) return fhsize;
+        zfhPtr->headerSize = (U32)fhsize;
+    }
 
     {   BYTE const fhdByte = ip[4];
         size_t pos = 5;
@@ -320,24 +314,23 @@ size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t src
         U32 const checksumFlag = (fhdByte>>2)&1;
         U32 const singleSegment = (fhdByte>>5)&1;
         U32 const fcsID = fhdByte>>6;
-        U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX;
-        U32 windowSize = 0;
+        U64 windowSize = 0;
         U32 dictID = 0;
-        U64 frameContentSize = 0;
+        U64 frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN;
         if ((fhdByte & 0x08) != 0)
-            return ERROR(frameParameter_unsupported);   /* reserved bits, must be zero */
+            return ERROR(frameParameter_unsupported); /* reserved bits, must be zero */
+
         if (!singleSegment) {
             BYTE const wlByte = ip[pos++];
             U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
             if (windowLog > ZSTD_WINDOWLOG_MAX)
                 return ERROR(frameParameter_windowTooLarge);
-            windowSize = (1U << windowLog);
+            windowSize = (1ULL << windowLog);
             windowSize += (windowSize >> 3) * (wlByte&7);
         }
-
         switch(dictIDSizeCode)
         {
-            default:   /* impossible */
+            default: assert(0);  /* impossible */
             case 0 : break;
             case 1 : dictID = ip[pos]; pos++; break;
             case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break;
@@ -345,14 +338,15 @@ size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t src
         }
         switch(fcsID)
         {
-            default:   /* impossible */
+            default: assert(0);  /* impossible */
             case 0 : if (singleSegment) frameContentSize = ip[pos]; break;
             case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break;
             case 2 : frameContentSize = MEM_readLE32(ip+pos); break;
             case 3 : frameContentSize = MEM_readLE64(ip+pos); break;
         }
-        if (!windowSize) windowSize = (U32)frameContentSize;
-        if (windowSize > windowSizeMax) return ERROR(frameParameter_windowTooLarge);
+        if (singleSegment) windowSize = frameContentSize;
+
+        zfhPtr->frameType = ZSTD_frame;
         zfhPtr->frameContentSize = frameContentSize;
         zfhPtr->windowSize = windowSize;
         zfhPtr->dictID = dictID;
@@ -362,10 +356,10 @@ size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t src
 }
 
 /** ZSTD_getFrameContentSize() :
-*   compatible with legacy mode
-*   @return : decompressed size of the single frame pointed to be `src` if known, otherwise
-*             - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
-*             - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */
+ *  compatible with legacy mode
+ * @return : decompressed size of the single frame pointed to be `src` if known, otherwise
+ *         - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
+ *         - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */
 unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize)
 {
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
@@ -374,17 +368,14 @@ unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize)
         return ret == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : ret;
     }
 #endif
-    {   ZSTD_frameHeader fParams;
-        if (ZSTD_getFrameHeader(&fParams, src, srcSize) != 0) return ZSTD_CONTENTSIZE_ERROR;
-        if (fParams.windowSize == 0) {
-            /* Either skippable or empty frame, size == 0 either way */
+    {   ZSTD_frameHeader zfh;
+        if (ZSTD_getFrameHeader(&zfh, src, srcSize) != 0)
+            return ZSTD_CONTENTSIZE_ERROR;
+        if (zfh.frameType == ZSTD_skippableFrame) {
             return 0;
-        } else if (fParams.frameContentSize != 0) {
-            return fParams.frameContentSize;
         } else {
-            return ZSTD_CONTENTSIZE_UNKNOWN;
-        }
-    }
+            return zfh.frameContentSize;
+    }   }
 }
 
 /** ZSTD_findDecompressedSize() :
@@ -442,7 +433,8 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
 *   compatible with legacy mode
 *   @return : decompressed size if known, 0 otherwise
               note : 0 can mean any of the following :
-                   - decompressed size is not present within frame header
+                   - frame content is empty
+                   - decompressed size field is not present in frame header
                    - frame header unknown / not supported
                    - frame header not complete (`srcSize` too small) */
 unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize)
@@ -460,19 +452,13 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t he
     size_t const result = ZSTD_getFrameHeader(&(dctx->fParams), src, headerSize);
     if (ZSTD_isError(result)) return result;  /* invalid header */
     if (result>0) return ERROR(srcSize_wrong);   /* headerSize too small */
-    if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID)) return ERROR(dictionary_wrong);
+    if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID))
+        return ERROR(dictionary_wrong);
     if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0);
     return 0;
 }
 
 
-typedef struct
-{
-    blockType_e blockType;
-    U32 lastBlock;
-    U32 origSize;
-} blockProperties_t;
-
 /*! ZSTD_getcBlockSize() :
 *   Provides the size of compressed block from block header `src` */
 size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
@@ -958,7 +944,7 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState)
 }
 
 
-FORCE_INLINE
+HINT_INLINE
 size_t ZSTD_execSequence(BYTE* op,
                          BYTE* const oend, seq_t sequence,
                          const BYTE** litPtr, const BYTE* const litLimit,
@@ -1102,7 +1088,7 @@ static size_t ZSTD_decompressSequences(
 }
 
 
-FORCE_INLINE seq_t ZSTD_decodeSequenceLong_generic(seqState_t* seqState, int const longOffsets)
+FORCE_INLINE_TEMPLATE seq_t ZSTD_decodeSequenceLong_generic(seqState_t* seqState, int const longOffsets)
 {
     seq_t seq;
 
@@ -1202,7 +1188,7 @@ static seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, unsigned const window
     }
 }
 
-FORCE_INLINE
+HINT_INLINE
 size_t ZSTD_execSequenceLong(BYTE* op,
                                 BYTE* const oend, seq_t sequence,
                                 const BYTE** litPtr, const BYTE* const litLimit,
@@ -1338,7 +1324,7 @@ static size_t ZSTD_decompressSequencesLong(
             seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, windowSize32);
             size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STOSEQ_MASK], &litPtr, litEnd, base, vBase, dictEnd);
             if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
-            ZSTD_PREFETCH(sequence.match);
+            PREFETCH(sequence.match);
             sequences[seqNb&STOSEQ_MASK] = sequence;
             op += oneSeqSize;
         }
@@ -1440,28 +1426,26 @@ size_t ZSTD_generateNxBytes(void* dst, size_t dstCapacity, BYTE byte, size_t len
 size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
 {
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
-    if (ZSTD_isLegacy(src, srcSize)) return ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
+    if (ZSTD_isLegacy(src, srcSize))
+        return ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
 #endif
-    if (srcSize >= ZSTD_skippableHeaderSize &&
-            (MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
+    if ( (srcSize >= ZSTD_skippableHeaderSize)
+      && (MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START ) {
         return ZSTD_skippableHeaderSize + MEM_readLE32((const BYTE*)src + 4);
     } else {
         const BYTE* ip = (const BYTE*)src;
         const BYTE* const ipstart = ip;
         size_t remainingSize = srcSize;
-        ZSTD_frameHeader fParams;
+        ZSTD_frameHeader zfh;
 
-        size_t const headerSize = ZSTD_frameHeaderSize(ip, remainingSize);
-        if (ZSTD_isError(headerSize)) return headerSize;
-
-        /* Frame Header */
-        {   size_t const ret = ZSTD_getFrameHeader(&fParams, ip, remainingSize);
+        /* Extract Frame Header */
+        {   size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize);
             if (ZSTD_isError(ret)) return ret;
             if (ret > 0) return ERROR(srcSize_wrong);
         }
 
-        ip += headerSize;
-        remainingSize -= headerSize;
+        ip += zfh.headerSize;
+        remainingSize -= zfh.headerSize;
 
         /* Loop on each block */
         while (1) {
@@ -1469,7 +1453,8 @@ size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
             size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
             if (ZSTD_isError(cBlockSize)) return cBlockSize;
 
-            if (ZSTD_blockHeaderSize + cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+            if (ZSTD_blockHeaderSize + cBlockSize > remainingSize)
+                return ERROR(srcSize_wrong);
 
             ip += ZSTD_blockHeaderSize + cBlockSize;
             remainingSize -= ZSTD_blockHeaderSize + cBlockSize;
@@ -1477,7 +1462,7 @@ size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
             if (blockProperties.lastBlock) break;
         }
 
-        if (fParams.checksumFlag) {   /* Frame content checksum */
+        if (zfh.checksumFlag) {   /* Final frame content checksum */
             if (remainingSize < 4) return ERROR(srcSize_wrong);
             ip += 4;
             remainingSize -= 4;
@@ -1490,8 +1475,8 @@ size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
 /*! ZSTD_decompressFrame() :
 *   @dctx must be properly initialized */
 static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
-                                 void* dst, size_t dstCapacity,
-                                 const void** srcPtr, size_t *srcSizePtr)
+                                   void* dst, size_t dstCapacity,
+                             const void** srcPtr, size_t *srcSizePtr)
 {
     const BYTE* ip = (const BYTE*)(*srcPtr);
     BYTE* const ostart = (BYTE* const)dst;
@@ -1500,13 +1485,15 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
     size_t remainingSize = *srcSizePtr;
 
     /* check */
-    if (remainingSize < ZSTD_frameHeaderSize_min+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+    if (remainingSize < ZSTD_frameHeaderSize_min+ZSTD_blockHeaderSize)
+        return ERROR(srcSize_wrong);
 
     /* Frame Header */
     {   size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_frameHeaderSize_prefix);
         if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
-        if (remainingSize < frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
-        CHECK_F(ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize));
+        if (remainingSize < frameHeaderSize+ZSTD_blockHeaderSize)
+            return ERROR(srcSize_wrong);
+        CHECK_F( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) );
         ip += frameHeaderSize; remainingSize -= frameHeaderSize;
     }
 
@@ -1538,14 +1525,15 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
         }
 
         if (ZSTD_isError(decodedSize)) return decodedSize;
-        if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, op, decodedSize);
+        if (dctx->fParams.checksumFlag)
+            XXH64_update(&dctx->xxhState, op, decodedSize);
         op += decodedSize;
         ip += cBlockSize;
         remainingSize -= cBlockSize;
         if (blockProperties.lastBlock) break;
     }
 
-    if (dctx->fParams.checksumFlag) {   /* Frame content checksum verification */
+    if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */
         U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState);
         U32 checkRead;
         if (remainingSize<4) return ERROR(checksum_wrong);
@@ -1567,17 +1555,13 @@ static size_t ZSTD_DDictDictSize(const ZSTD_DDict* ddict);
 static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
                                         void* dst, size_t dstCapacity,
                                   const void* src, size_t srcSize,
-                                  const void *dict, size_t dictSize,
+                                  const void* dict, size_t dictSize,
                                   const ZSTD_DDict* ddict)
 {
     void* const dststart = dst;
+    assert(dict==NULL || ddict==NULL);  /* either dict or ddict set, not both */
 
     if (ddict) {
-        if (dict) {
-            /* programmer error, these two cases should be mutually exclusive */
-            return ERROR(GENERIC);
-        }
-
         dict = ZSTD_DDictDictContent(ddict);
         dictSize = ZSTD_DDictDictSize(ddict);
     }
@@ -1590,7 +1574,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
             size_t decodedSize;
             size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
             if (ZSTD_isError(frameSize)) return frameSize;
-            /* legacy support is incompatible with static dctx */
+            /* legacy support is not compatible with static dctx */
             if (dctx->staticSize) return ERROR(memory_allocation);
 
             decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize);
@@ -1613,16 +1597,13 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
                     return ERROR(srcSize_wrong);
                 skippableSize = MEM_readLE32((const BYTE *)src + 4) +
                                 ZSTD_skippableHeaderSize;
-                if (srcSize < skippableSize) {
-                    return ERROR(srcSize_wrong);
-                }
+                if (srcSize < skippableSize) return ERROR(srcSize_wrong);
 
                 src = (const BYTE *)src + skippableSize;
                 srcSize -= skippableSize;
                 continue;
-            } else {
-                return ERROR(prefix_unknown);
             }
+            return ERROR(prefix_unknown);
         }
 
         if (ddict) {
@@ -1638,12 +1619,11 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
         {   const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity,
                                                     &src, &srcSize);
             if (ZSTD_isError(res)) return res;
-            /* don't need to bounds check this, ZSTD_decompressFrame will have
-             * already */
+            /* no need to bound check, ZSTD_decompressFrame already has */
             dst = (BYTE*)dst + res;
             dstCapacity -= res;
         }
-    }
+    }  /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */
 
     if (srcSize) return ERROR(srcSize_wrong); /* input not entirely consumed */
 
@@ -1742,7 +1722,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
             return 0;
         }
         dctx->expected = 0;   /* not necessary to copy more */
-
+        /* fall-through */
     case ZSTDds_decodeFrameHeader:
         assert(src != NULL);
         memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected);
@@ -1853,7 +1833,7 @@ static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dict
 /* ZSTD_loadEntropy() :
  * dict : must point at beginning of a valid zstd dictionary
  * @return : size of entropy tables read */
-static size_t ZSTD_loadEntropy(ZSTD_entropyTables_t* entropy, const void* const dict, size_t const dictSize)
+static size_t ZSTD_loadEntropy(ZSTD_entropyDTables_t* entropy, const void* const dict, size_t const dictSize)
 {
     const BYTE* dictPtr = (const BYTE*)dict;
     const BYTE* const dictEnd = dictPtr + dictSize;
@@ -1931,8 +1911,9 @@ static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict
 
 size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
 {
-    CHECK_F(ZSTD_decompressBegin(dctx));
-    if (dict && dictSize) CHECK_E(ZSTD_decompress_insertDictionary(dctx, dict, dictSize), dictionary_corrupted);
+    CHECK_F( ZSTD_decompressBegin(dctx) );
+    if (dict && dictSize)
+        CHECK_E(ZSTD_decompress_insertDictionary(dctx, dict, dictSize), dictionary_corrupted);
     return 0;
 }
 
@@ -1943,7 +1924,7 @@ struct ZSTD_DDict_s {
     void* dictBuffer;
     const void* dictContent;
     size_t dictSize;
-    ZSTD_entropyTables_t entropy;
+    ZSTD_entropyDTables_t entropy;
     U32 dictID;
     U32 entropyPresent;
     ZSTD_customMem cMem;
@@ -1961,7 +1942,7 @@ static size_t ZSTD_DDictDictSize(const ZSTD_DDict* ddict)
 
 size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dstDCtx, const ZSTD_DDict* ddict)
 {
-    CHECK_F(ZSTD_decompressBegin(dstDCtx));
+    CHECK_F( ZSTD_decompressBegin(dstDCtx) );
     if (ddict) {   /* support begin on NULL */
         dstDCtx->dictID = ddict->dictID;
         dstDCtx->base = ddict->dictContent;
@@ -2142,7 +2123,7 @@ unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
  *  ZSTD_getFrameHeader(), which will provide a more precise error code. */
 unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize)
 {
-    ZSTD_frameHeader zfp = { 0 , 0 , 0 , 0 };
+    ZSTD_frameHeader zfp = { 0, 0, ZSTD_frame, 0, 0, 0 };
     size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize);
     if (ZSTD_isError(hError)) return 0;
     return zfp.dictID;
@@ -2237,7 +2218,7 @@ size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds,
 {
     switch(paramType)
     {
-        default : return ERROR(parameter_unknown);
+        default : return ERROR(parameter_unsupported);
         case DStream_p_maxWindowSize : zds->maxWindowSize = paramValue ? paramValue : (U32)(-1); break;
     }
     return 0;
@@ -2254,16 +2235,19 @@ size_t ZSTD_estimateDStreamSize(size_t windowSize)
     size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
     size_t const inBuffSize = blockSize;  /* no block can be larger */
     size_t const outBuffSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2);
-    return sizeof(ZSTD_DStream) + ZSTD_estimateDCtxSize() + inBuffSize + outBuffSize;
+    return ZSTD_estimateDCtxSize() + inBuffSize + outBuffSize;
 }
 
 ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize)
 {
-    ZSTD_frameHeader fh;
-    size_t const err = ZSTD_getFrameHeader(&fh, src, srcSize);
+    U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX;
+    ZSTD_frameHeader zfh;
+    size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize);
     if (ZSTD_isError(err)) return err;
     if (err>0) return ERROR(srcSize_wrong);
-    return ZSTD_estimateDStreamSize(fh.windowSize);
+    if (zfh.windowSize > windowSizeMax)
+        return ERROR(frameParameter_windowTooLarge);
+    return ZSTD_estimateDStreamSize((size_t)zfh.windowSize);
 }
 
 
@@ -2314,16 +2298,14 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
                         size_t const dictSize = zds->ddict ? zds->ddict->dictSize : 0;
                         /* legacy support is incompatible with static dctx */
                         if (zds->staticSize) return ERROR(memory_allocation);
-                        CHECK_F(ZSTD_initLegacyStream(&zds->legacyContext, zds->previousLegacyVersion, legacyVersion,
-                                                       dict, dictSize));
+                        CHECK_F(ZSTD_initLegacyStream(&zds->legacyContext,
+                                    zds->previousLegacyVersion, legacyVersion,
+                                    dict, dictSize));
                         zds->legacyVersion = zds->previousLegacyVersion = legacyVersion;
-                        return ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input);
-                    } else {
-                        return hSize; /* error */
+                        return ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion, output, input);
                     }
-#else
-                    return hSize;
 #endif
+                    return hSize; /* error */
                 }
                 if (hSize != 0) {   /* need more input */
                     size_t const toLoad = hSize - zds->lhSize;   /* if hSize!=0, hSize > zds->lhSize */
@@ -2374,8 +2356,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
             if (zds->fParams.windowSize > zds->maxWindowSize) return ERROR(frameParameter_windowTooLarge);
 
             /* Adapt buffer sizes to frame header instructions */
-            {   size_t const blockSize = MIN(zds->fParams.windowSize, ZSTD_BLOCKSIZE_MAX);
-                size_t const neededOutSize = zds->fParams.windowSize + blockSize + WILDCOPY_OVERLENGTH * 2;
+            {   size_t const blockSize = (size_t)(MIN(zds->fParams.windowSize, ZSTD_BLOCKSIZE_MAX));
+                size_t const neededOutSize = (size_t)(zds->fParams.windowSize + blockSize + WILDCOPY_OVERLENGTH * 2);
                 zds->blockSize = blockSize;
                 if ((zds->inBuffSize < blockSize) || (zds->outBuffSize < neededOutSize)) {
                     size_t const bufferSize = blockSize + neededOutSize;
@@ -2400,7 +2382,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
                     zds->outBuffSize = neededOutSize;
             }   }
             zds->streamStage = zdss_read;
-            /* pass-through */
+            /* fall-through */
 
         case zdss_read:
             DEBUGLOG(5, "stage zdss_read");
@@ -2425,8 +2407,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
             }   }
             if (ip==iend) { someMoreWork = 0; break; }   /* no more input */
             zds->streamStage = zdss_load;
-            /* pass-through */
-
+            /* fall-through */
         case zdss_load:
             {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds);
                 size_t const toLoad = neededInSize - zds->inPos;   /* should always be <= remaining space within inBuff */
@@ -2448,8 +2429,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
                     zds->outEnd = zds->outStart +  decodedSize;
             }   }
             zds->streamStage = zdss_flush;
-            /* pass-through */
-
+            /* fall-through */
         case zdss_flush:
             {   size_t const toFlushSize = zds->outEnd - zds->outStart;
                 size_t const flushedSize = ZSTD_limitCopy(op, oend-op, zds->outBuff + zds->outStart, toFlushSize);
diff --git a/lib/deprecated/zbuff.h b/lib/deprecated/zbuff.h
index f62091976c76..e6ea84ad3b45 100644
--- a/lib/deprecated/zbuff.h
+++ b/lib/deprecated/zbuff.h
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 /* ***************************************************************
diff --git a/lib/deprecated/zbuff_common.c b/lib/deprecated/zbuff_common.c
index 9fff6eb2095e..2de45bec1705 100644
--- a/lib/deprecated/zbuff_common.c
+++ b/lib/deprecated/zbuff_common.c
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 /*-*************************************
@@ -23,4 +23,3 @@ unsigned ZBUFF_isError(size_t errorCode) { return ERR_isError(errorCode); }
 /*! ZBUFF_getErrorName() :
 *   provides error code string from function result (useful for debugging) */
 const char* ZBUFF_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
-
diff --git a/lib/deprecated/zbuff_compress.c b/lib/deprecated/zbuff_compress.c
index 5a37a0027ae6..4444e95d8fd1 100644
--- a/lib/deprecated/zbuff_compress.c
+++ b/lib/deprecated/zbuff_compress.c
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 
diff --git a/lib/deprecated/zbuff_decompress.c b/lib/deprecated/zbuff_decompress.c
index d9c155e08eb8..a819d7f40429 100644
--- a/lib/deprecated/zbuff_decompress.c
+++ b/lib/deprecated/zbuff_decompress.c
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 
diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c
index 06c1b9fadb7a..3d445ae8b81d 100644
--- a/lib/dictBuilder/cover.c
+++ b/lib/dictBuilder/cover.c
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 /* *****************************************************************************
@@ -714,11 +714,9 @@ typedef struct COVER_best_s {
  * Initialize the `COVER_best_t`.
  */
 static void COVER_best_init(COVER_best_t *best) {
-  if (!best) {
-    return;
-  }
-  pthread_mutex_init(&best->mutex, NULL);
-  pthread_cond_init(&best->cond, NULL);
+  if (best==NULL) return; /* compatible with init on NULL */
+  (void)pthread_mutex_init(&best->mutex, NULL);
+  (void)pthread_cond_init(&best->cond, NULL);
   best->liveJobs = 0;
   best->dict = NULL;
   best->dictSize = 0;
diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c
index 742586eacdd2..c2871c2ccfbc 100644
--- a/lib/dictBuilder/zdict.c
+++ b/lib/dictBuilder/zdict.c
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 
@@ -695,7 +695,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
         DISPLAYLEVEL(1, "Not enough memory \n");
         goto _cleanup;
     }
-    if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionary_wrong); goto _cleanup; }   /* too large dictionary */
+    if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionaryCreation_failed); goto _cleanup; }   /* too large dictionary */
     for (u=0; u<256; u++) countLit[u] = 1;   /* any character must be described */
     for (u=0; u<=offcodeMax; u++) offcodeCount[u] = 1;
     for (u=0; u<=MaxML; u++) matchLengthCount[u] = 1;
diff --git a/lib/dictBuilder/zdict.h b/lib/dictBuilder/zdict.h
index 7bfbb351a1dd..3d72a465e5e7 100644
--- a/lib/dictBuilder/zdict.h
+++ b/lib/dictBuilder/zdict.h
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 #ifndef DICTBUILDER_H_001
diff --git a/lib/legacy/zstd_legacy.h b/lib/legacy/zstd_legacy.h
index 3c9798f880e0..1126e24669f3 100644
--- a/lib/legacy/zstd_legacy.h
+++ b/lib/legacy/zstd_legacy.h
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 #ifndef ZSTD_LEGACY_H
@@ -123,6 +123,7 @@ MEM_STATIC size_t ZSTD_decompressLegacy(
                const void* dict,size_t dictSize)
 {
     U32 const version = ZSTD_isLegacy(src, compressedSize);
+    (void)dst; (void)dstCapacity; (void)dict; (void)dictSize;  /* unused when ZSTD_LEGACY_SUPPORT >= 8 */
     switch(version)
     {
 #if (ZSTD_LEGACY_SUPPORT <= 1)
@@ -223,6 +224,7 @@ MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version)
         case 1 :
         case 2 :
         case 3 :
+            (void)legacyContext;
             return ERROR(version_unsupported);
 #if (ZSTD_LEGACY_SUPPORT <= 4)
         case 4 : return ZBUFFv04_freeDCtx((ZBUFFv04_DCtx*)legacyContext);
@@ -250,6 +252,7 @@ MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U
         case 1 :
         case 2 :
         case 3 :
+            (void)dict; (void)dictSize;
             return 0;
 #if (ZSTD_LEGACY_SUPPORT <= 4)
         case 4 :
@@ -306,6 +309,7 @@ MEM_STATIC size_t ZSTD_decompressLegacyStream(void* legacyContext, U32 version,
         case 1 :
         case 2 :
         case 3 :
+            (void)legacyContext; (void)output; (void)input;
             return ERROR(version_unsupported);
 #if (ZSTD_LEGACY_SUPPORT <= 4)
         case 4 :
diff --git a/lib/legacy/zstd_v01.c b/lib/legacy/zstd_v01.c
index cf5354d6a9b6..45f421ae6f2c 100644
--- a/lib/legacy/zstd_v01.c
+++ b/lib/legacy/zstd_v01.c
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 
diff --git a/lib/legacy/zstd_v01.h b/lib/legacy/zstd_v01.h
index 13cb3acfdc3b..a91c6a133bac 100644
--- a/lib/legacy/zstd_v01.h
+++ b/lib/legacy/zstd_v01.h
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 #ifndef ZSTD_V01_H_28739879432
diff --git a/lib/legacy/zstd_v02.c b/lib/legacy/zstd_v02.c
index 3cf8f4778250..dc1ec0e7c678 100644
--- a/lib/legacy/zstd_v02.c
+++ b/lib/legacy/zstd_v02.c
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 
diff --git a/lib/legacy/zstd_v02.h b/lib/legacy/zstd_v02.h
index d14f0293cbe2..63cb3b8d578f 100644
--- a/lib/legacy/zstd_v02.h
+++ b/lib/legacy/zstd_v02.h
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 #ifndef ZSTD_V02_H_4174539423
diff --git a/lib/legacy/zstd_v03.c b/lib/legacy/zstd_v03.c
index f438330a4692..8257de7e6cd3 100644
--- a/lib/legacy/zstd_v03.c
+++ b/lib/legacy/zstd_v03.c
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 
diff --git a/lib/legacy/zstd_v03.h b/lib/legacy/zstd_v03.h
index 07f7597bb7c3..e38e0109b136 100644
--- a/lib/legacy/zstd_v03.h
+++ b/lib/legacy/zstd_v03.h
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 #ifndef ZSTD_V03_H_298734209782
diff --git a/lib/legacy/zstd_v04.c b/lib/legacy/zstd_v04.c
index 8b8e23cb09cb..951561a6cc8c 100644
--- a/lib/legacy/zstd_v04.c
+++ b/lib/legacy/zstd_v04.c
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 
@@ -2776,7 +2776,7 @@ static size_t ZSTD_decodeFrameHeader_Part2(ZSTD_DCtx* zc, const void* src, size_
     size_t result;
     if (srcSize != zc->headerSize) return ERROR(srcSize_wrong);
     result = ZSTD_getFrameParams(&(zc->params), src, srcSize);
-    if ((MEM_32bits()) && (zc->params.windowLog > 25)) return ERROR(frameParameter_unsupportedBy32bits);
+    if ((MEM_32bits()) && (zc->params.windowLog > 25)) return ERROR(frameParameter_unsupported);
     return result;
 }
 
diff --git a/lib/legacy/zstd_v04.h b/lib/legacy/zstd_v04.h
index 1b5439d39249..a7d6623305f2 100644
--- a/lib/legacy/zstd_v04.h
+++ b/lib/legacy/zstd_v04.h
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 #ifndef ZSTD_V04_H_91868324769238
diff --git a/lib/legacy/zstd_v05.c b/lib/legacy/zstd_v05.c
index e929618a3bf5..4a1d4d4bd931 100644
--- a/lib/legacy/zstd_v05.c
+++ b/lib/legacy/zstd_v05.c
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 
@@ -2888,7 +2888,7 @@ static size_t ZSTDv05_decodeFrameHeader_Part2(ZSTDv05_DCtx* zc, const void* src,
     if (srcSize != zc->headerSize)
         return ERROR(srcSize_wrong);
     result = ZSTDv05_getFrameParams(&(zc->params), src, srcSize);
-    if ((MEM_32bits()) && (zc->params.windowLog > 25)) return ERROR(frameParameter_unsupportedBy32bits);
+    if ((MEM_32bits()) && (zc->params.windowLog > 25)) return ERROR(frameParameter_unsupported);
     return result;
 }
 
diff --git a/lib/legacy/zstd_v05.h b/lib/legacy/zstd_v05.h
index 8ce662fd9fc6..a333bd127bd8 100644
--- a/lib/legacy/zstd_v05.h
+++ b/lib/legacy/zstd_v05.h
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 #ifndef ZSTDv05_H
diff --git a/lib/legacy/zstd_v06.c b/lib/legacy/zstd_v06.c
index 26f0929da6fd..a285a09016f6 100644
--- a/lib/legacy/zstd_v06.c
+++ b/lib/legacy/zstd_v06.c
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 
@@ -3084,7 +3084,7 @@ size_t ZSTDv06_getFrameParams(ZSTDv06_frameParams* fparamsPtr, const void* src,
 static size_t ZSTDv06_decodeFrameHeader(ZSTDv06_DCtx* zc, const void* src, size_t srcSize)
 {
     size_t const result = ZSTDv06_getFrameParams(&(zc->fParams), src, srcSize);
-    if ((MEM_32bits()) && (zc->fParams.windowLog > 25)) return ERROR(frameParameter_unsupportedBy32bits);
+    if ((MEM_32bits()) && (zc->fParams.windowLog > 25)) return ERROR(frameParameter_unsupported);
     return result;
 }
 
diff --git a/lib/legacy/zstd_v06.h b/lib/legacy/zstd_v06.h
index 10c9c7725974..ee043a179090 100644
--- a/lib/legacy/zstd_v06.h
+++ b/lib/legacy/zstd_v06.h
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 #ifndef ZSTDv06_H
diff --git a/lib/legacy/zstd_v07.c b/lib/legacy/zstd_v07.c
index 6669b71cea40..ad392e90b61e 100644
--- a/lib/legacy/zstd_v07.c
+++ b/lib/legacy/zstd_v07.c
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 
diff --git a/lib/legacy/zstd_v07.h b/lib/legacy/zstd_v07.h
index cc95c661bc9a..68d18e9636bf 100644
--- a/lib/legacy/zstd_v07.h
+++ b/lib/legacy/zstd_v07.h
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 #ifndef ZSTDv07_H_235446
diff --git a/lib/zstd.h b/lib/zstd.h
index 58e9a5606db8..13b4563fd69a 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -2,11 +2,10 @@
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
-
 #if defined (__cplusplus)
 extern "C" {
 #endif
@@ -59,7 +58,7 @@ extern "C" {
 /*------   Version   ------*/
 #define ZSTD_VERSION_MAJOR    1
 #define ZSTD_VERSION_MINOR    3
-#define ZSTD_VERSION_RELEASE  0
+#define ZSTD_VERSION_RELEASE  1
 
 #define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
 ZSTDLIB_API unsigned ZSTD_versionNumber(void);   /**< useful to check dll version */
@@ -425,13 +424,6 @@ typedef struct {
     ZSTD_frameParameters fParams;
 } ZSTD_parameters;
 
-typedef struct {
-    unsigned long long frameContentSize;
-    size_t windowSize;
-    unsigned dictID;
-    unsigned checksumFlag;
-} ZSTD_frameHeader;
-
 /*= Custom memory allocation functions */
 typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size);
 typedef void  (*ZSTD_freeFunction) (void* opaque, void* address);
@@ -809,7 +801,6 @@ ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstC
 ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 
 
-
 /*-
   Buffer-less streaming decompression (synchronous mode)
 
@@ -874,6 +865,15 @@ ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapaci
 */
 
 /*=====   Buffer-less streaming decompression functions  =====*/
+typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e;
+typedef struct {
+    unsigned long long frameContentSize; /* ZSTD_CONTENTSIZE_UNKNOWN means this field is not available. 0 means "empty" */
+    unsigned long long windowSize;       /* can be very large, up to <= frameContentSize */
+    ZSTD_frameType_e frameType;          /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */
+    unsigned headerSize;
+    unsigned dictID;
+    unsigned checksumFlag;
+} ZSTD_frameHeader;
 ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize);   /**< doesn't consume input */
 ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
 ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
@@ -953,7 +953,9 @@ typedef enum {
                               * Special: value 0 means "do not change strategy". */
 
     /* frame parameters */
-    ZSTD_p_contentSizeFlag=200, /* Content size is written into frame header _whenever known_ (default:1) */
+    ZSTD_p_contentSizeFlag=200, /* Content size is written into frame header _whenever known_ (default:1)
+                              * note that content size must be known at the beginning,
+                              * it is sent using ZSTD_CCtx_setPledgedSrcSize() */
     ZSTD_p_checksumFlag,     /* A 32-bits checksum of content is written at end of frame (default:0) */
     ZSTD_p_dictIDFlag,       /* When applicable, dictID of dictionary is provided in frame header (default:1) */
 
diff --git a/programs/.gitignore b/programs/.gitignore
index eeaf051d6edf..701830c777c2 100644
--- a/programs/.gitignore
+++ b/programs/.gitignore
@@ -1,8 +1,12 @@
 # local binary (Makefile)
 zstd
 zstd32
+zstd4
 zstd-compress
 zstd-decompress
+zstd-frugal
+zstd-small
+zstd-nolegacy
 
 # Object files
 *.o
diff --git a/programs/Makefile b/programs/Makefile
index 8b080d446606..c5469cfc4def 100644
--- a/programs/Makefile
+++ b/programs/Makefile
@@ -39,9 +39,10 @@ endif
 
 CPPFLAGS+= -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
            -I$(ZSTDDIR)/dictBuilder \
+           -DZSTD_NEWAPI \
            -DXXH_NAMESPACE=ZSTD_   # because xxhash.o already compiled with this macro from library
 CFLAGS  ?= -O3
-DEBUGFLAGS = -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
+DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
             -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
             -Wstrict-prototypes -Wundef -Wpointer-arith -Wformat-security \
             -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
@@ -131,12 +132,15 @@ else
 LZ4_MSG := $(NO_LZ4_MSG)
 endif
 
-.PHONY: default all clean clean_decomp_o install uninstall generate_res
-
+.PHONY: default
 default: zstd-release
 
+.PHONY: all
 all: zstd
 
+.PHONY: allVariants
+allVariants: zstd zstd-compress zstd-decompress zstd-small zstd-nolegacy
+
 $(ZSTDDECOMP_O): CFLAGS += $(ALIGN_LOOP)
 
 zstd zstd4 : CPPFLAGS += $(THREAD_CPP) $(ZLIBCPP) $(LZMACPP)
@@ -153,8 +157,9 @@ zstd zstd4 : $(ZSTDLIB_FILES) zstdcli.o fileio.o bench.o datagen.o dibio.o
 ifneq (,$(filter Windows%,$(OS)))
 	windres/generate_res.bat
 endif
-	$(CC) $(FLAGS) $^ $(RES_FILE) -o zstd$(EXT) $(LDFLAGS)
+	$(CC) $(FLAGS) $^ $(RES_FILE) -o $@$(EXT) $(LDFLAGS)
 
+.PHONY: zstd-release
 zstd-release: DEBUGFLAGS :=
 zstd-release: zstd
 
@@ -165,8 +170,8 @@ ifneq (,$(filter Windows%,$(OS)))
 endif
 	$(CC) -m32 $(FLAGS) $^ $(RES32_FILE) -o $@$(EXT)
 
-zstd-nolegacy : clean_decomp_o
-	$(MAKE) zstd ZSTD_LEGACY_SUPPORT=0
+zstd-nolegacy : $(ZSTD_FILES) $(ZDICT_FILES) zstdcli.o fileio.c bench.o datagen.o dibio.o
+	$(CC) $(FLAGS) $^ -o $@$(EXT) $(LDFLAGS)
 
 zstd-nomt : THREAD_CPP :=
 zstd-nomt : THREAD_LD :=
@@ -197,9 +202,9 @@ zstd-pgo : clean zstd
 	$(MAKE) zstd MOREFLAGS=-fprofile-use
 
 # minimal target, with only zstd compression and decompression. no bench. no legacy.
-zstd-small: CFLAGS = "-Os -s"
+zstd-small: CFLAGS = -Os -s
 zstd-frugal zstd-small: $(ZSTD_FILES) zstdcli.c fileio.c
-	$(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT $^ -o zstd$(EXT)
+	$(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT $^ -o $@$(EXT)
 
 zstd-decompress: $(ZSTDCOMMON_FILES) $(ZSTDDECOMP_FILES) zstdcli.c fileio.c
 	$(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NOCOMPRESS $^ -o $@$(EXT)
@@ -207,34 +212,37 @@ zstd-decompress: $(ZSTDCOMMON_FILES) $(ZSTDDECOMP_FILES) zstdcli.c fileio.c
 zstd-compress: $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) zstdcli.c fileio.c
 	$(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NODECOMPRESS $^ -o $@$(EXT)
 
-# zstd is now built with Multi-threading by default
+# zstd is now built with multithreading enabled y default
 zstdmt: zstd
 
+.PHONY: generate_res
 generate_res:
 	windres/generate_res.bat
 
+.PHONY: clean
 clean:
 	$(MAKE) -C $(ZSTDDIR) clean
 	@$(RM) $(ZSTDDIR)/decompress/*.o $(ZSTDDIR)/decompress/zstd_decompress.gcda
 	@$(RM) core *.o tmp* result* *.gcda dictionary *.zst \
         zstd$(EXT) zstd32$(EXT) zstd-compress$(EXT) zstd-decompress$(EXT) \
+        zstd-small$(EXT) zstd-frugal$(EXT) zstd-nolegacy$(EXT) zstd4$(EXT) \
         *.gcda default.profraw have_zlib$(EXT)
 	@echo Cleaning completed
 
-clean_decomp_o:
-	@$(RM) $(ZSTDDECOMP_O)
-
 MD2ROFF = ronn
 MD2ROFF_FLAGS = --roff --warnings --manual="User Commands" --organization="zstd $(ZSTD_VERSION)"
 
 zstd.1: zstd.1.md
 	cat $^ | $(MD2ROFF) $(MD2ROFF_FLAGS) | sed -n '/^\.\\\".*/!p' > $@
 
+.PHONY: man
 man: zstd.1
 
+.PHONY: clean-man
 clean-man:
 	rm zstd.1
 
+.PHONY: preview-man
 preview-man: clean-man man
 	man ./zstd.1
 
@@ -243,6 +251,10 @@ preview-man: clean-man man
 #-----------------------------------------------------------------------------
 ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS))
 
+.PHONY: list
+list:
+	@$(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null | awk -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' | sort | egrep -v -e '^[^[:alnum:]]' -e '^$@$$' | xargs
+
 ifneq (,$(filter $(shell uname),SunOS))
 INSTALL ?= ginstall
 else
@@ -263,6 +275,7 @@ INSTALL_PROGRAM ?= $(INSTALL) -m 755
 INSTALL_SCRIPT  ?= $(INSTALL) -m 755
 INSTALL_MAN     ?= $(INSTALL) -m 644
 
+.PHONY: install
 install: zstd
 	@echo Installing binaries
 	@$(INSTALL) -d -m 755 $(DESTDIR)$(BINDIR)/ $(DESTDIR)$(MANDIR)/
@@ -278,6 +291,7 @@ install: zstd
 	@ln -sf zstd.1 $(DESTDIR)$(MANDIR)/unzstd.1
 	@echo zstd installation completed
 
+.PHONY: uninstall
 uninstall:
 	@$(RM) $(DESTDIR)$(BINDIR)/zstdgrep
 	@$(RM) $(DESTDIR)$(BINDIR)/zstdless
@@ -288,4 +302,5 @@ uninstall:
 	@$(RM) $(DESTDIR)$(MANDIR)/unzstd.1
 	@$(RM) $(DESTDIR)$(MANDIR)/zstd.1
 	@echo zstd programs successfully uninstalled
+
 endif
diff --git a/programs/README.md b/programs/README.md
index bd8fba069952..8b65dfdb3f24 100644
--- a/programs/README.md
+++ b/programs/README.md
@@ -3,44 +3,54 @@ Command Line Interface for Zstandard library
 
 Command Line Interface (CLI) can be created using the `make` command without any additional parameters.
 There are however other Makefile targets that create different variations of CLI:
-- `zstd` : default CLI supporting gzip-like arguments; includes dictionary builder, benchmark, and support for decompression of legacy zstd versions
-- `zstd32` : Same as `zstd`, but forced to compile in 32-bits mode
-- `zstd_nolegacy` : Same as `zstd` except of support for decompression of legacy zstd versions
-- `zstd-small` : CLI optimized for minimal size; without dictionary builder, benchmark, and support for decompression of legacy zstd versions
-- `zstd-compress` : compressor-only version of CLI; without dictionary builder, benchmark, and support for decompression of legacy zstd versions
-- `zstd-decompress` : decompressor-only version of CLI; without dictionary builder, benchmark, and support for decompression of legacy zstd versions
+- `zstd` : default CLI supporting gzip-like arguments; includes dictionary builder, benchmark, and support for decompression of legacy zstd formats
+- `zstd_nolegacy` : Same as `zstd` but without support for legacy zstd formats
+- `zstd-small` : CLI optimized for minimal size; no dictionary builder, no benchmark, and no support for legacy zstd formats
+- `zstd-compress` : version of CLI which can only compress into zstd format
+- `zstd-decompress` : version of CLI which can only decompress zstd format
 
 
 #### Compilation variables
-`zstd` tries to detect and use the following features automatically :
+`zstd` scope can be altered by modifying the following compilation variables :
 
 - __HAVE_THREAD__ : multithreading is automatically enabled when `pthread` is detected.
-  It's possible to disable multithread support, by either compiling `zstd-nomt` target or using HAVE_THREAD=0 variable.
+  It's possible to disable multithread support, by setting HAVE_THREAD=0 .
   Example : make zstd HAVE_THREAD=0
   It's also possible to force compilation with multithread support, using HAVE_THREAD=1.
   In which case, linking stage will fail if `pthread` library cannot be found.
   This might be useful to prevent silent feature disabling.
 
 - __HAVE_ZLIB__ : `zstd` can compress and decompress files in `.gz` format.
-  This is done through command `--format=gzip`.
+  This is ordered through command `--format=gzip`.
   Alternatively, symlinks named `gzip` or `gunzip` will mimic intended behavior.
   `.gz` support is automatically enabled when `zlib` library is detected at build time.
-  It's possible to disable `.gz` support, by either compiling `zstd-nogz` target or using HAVE_ZLIB=0 variable.
+  It's possible to disable `.gz` support, by setting HAVE_ZLIB=0.
   Example : make zstd HAVE_ZLIB=0
   It's also possible to force compilation with zlib support, using HAVE_ZLIB=1.
   In which case, linking stage will fail if `zlib` library cannot be found.
   This might be useful to prevent silent feature disabling.
 
 - __HAVE_LZMA__ : `zstd` can compress and decompress files in `.xz` and `.lzma` formats.
-  This is done through commands `--format=xz` and `--format=lzma` respectively.
+  This is ordered through commands `--format=xz` and `--format=lzma` respectively.
   Alternatively, symlinks named `xz`, `unxz`, `lzma`, or `unlzma` will mimic intended behavior.
   `.xz` and `.lzma` support is automatically enabled when `lzma` library is detected at build time.
-  It's possible to disable `.xz` and `.lzma` support, by either compiling `zstd-noxz` target or using HAVE_LZMA=0 variable.
+  It's possible to disable `.xz` and `.lzma` support, by setting HAVE_LZMA=0 .
   Example : make zstd HAVE_LZMA=0
   It's also possible to force compilation with lzma support, using HAVE_LZMA=1.
   In which case, linking stage will fail if `lzma` library cannot be found.
   This might be useful to prevent silent feature disabling.
 
+- __ZSTD_LEGACY_SUPPORT__ : `zstd` can decompress files compressed by older versions of `zstd`.
+  Starting v0.8.0, all versions of `zstd` produce frames compliant with the [specification](../doc/zstd_compression_format.md), and are therefore compatible.
+  But older versions (< v0.8.0) produced different, incompatible, frames.
+  By default, `zstd` supports decoding legacy formats >= v0.4.0 (`ZSTD_LEGACY_SUPPORT=4`).
+  This can be altered by modifying this compilation variable.
+  `ZSTD_LEGACY_SUPPORT=1` means "support all formats >= v0.1.0".
+  `ZSTD_LEGACY_SUPPORT=2` means "support all formats >= v0.2.0", and so on.
+  `ZSTD_LEGACY_SUPPORT=0` means _DO NOT_ support any legacy format.
+  if `ZSTD_LEGACY_SUPPORT >= 8`, it's the same as `0`, since there is no legacy format after `7`.
+  Note : `zstd` only supports decoding older formats, and cannot generate any legacy format.
+
 
 #### Aggregation of parameters
 CLI supports aggregation of parameters i.e. `-b1`, `-e18`, and `-i1` can be joined into `-b1e18i1`.
@@ -61,7 +71,7 @@ will rely more and more on previously decoded content to compress the rest of th
 
 Usage of the dictionary builder and created dictionaries with CLI:
 
-1. Create the dictionary : `zstd --train FullPathToTrainingSet/* -o dictionaryName`
+1. Create the dictionary : `zstd --train PathToTrainingSet/* -o dictionaryName`
 2. Compress with the dictionary: `zstd FILE -D dictionaryName`
 3. Decompress with the dictionary: `zstd --decompress FILE.zst -D dictionaryName`
 
@@ -70,8 +80,8 @@ Usage of the dictionary builder and created dictionaries with CLI:
 CLI includes in-memory compression benchmark module for zstd.
 The benchmark is conducted using given filenames. The files are read into memory and joined together.
 It makes benchmark more precise as it eliminates I/O overhead.
-Many filenames can be supplied as multiple parameters, parameters with wildcards or
-names of directories can be used as parameters with the `-r` option.
+Multiple filenames can be supplied, as multiple parameters, with wildcards,
+or names of directories can be used as parameters with `-r` option.
 
 The benchmark measures ratio, compressed size, compression and decompression speed.
 One can select compression levels starting from `-b` and ending with `-e`.
@@ -101,13 +111,14 @@ Advanced arguments :
  -v     : verbose mode; specify multiple times to increase verbosity
  -q     : suppress warnings; specify twice to suppress errors too
  -c     : force write to standard output, even if it is the console
+ -l     : print information about zstd compressed files
 --ultra : enable levels beyond 19, up to 22 (requires more memory)
- -T#    : use # threads for compression (default:1)
- -B#    : select size of each job (default:0==automatic)
 --no-dictID : don't write dictID into header (dictionary compression)
 --[no-]check : integrity check (default:enabled)
  -r     : operate recursively on directories
 --format=gzip : compress files to the .gz format
+--format=xz : compress files to the .xz format
+--format=lzma : compress files to the .lzma format
 --test  : test compressed file integrity
 --[no-]sparse : sparse mode (default:disabled)
  -M#    : Set a memory usage limit for decompression
diff --git a/programs/bench.c b/programs/bench.c
index f9493e3b0707..2b48a4663a86 100644
--- a/programs/bench.c
+++ b/programs/bench.c
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 
diff --git a/programs/bench.h b/programs/bench.h
index 77a527f8ff82..5f8d61a25b30 100644
--- a/programs/bench.h
+++ b/programs/bench.h
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 
diff --git a/programs/datagen.c b/programs/datagen.c
index d0116b97232f..b1da8e78b31e 100644
--- a/programs/datagen.c
+++ b/programs/datagen.c
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 
diff --git a/programs/datagen.h b/programs/datagen.h
index 094056b696ca..5b1b7c47cc5d 100644
--- a/programs/datagen.h
+++ b/programs/datagen.h
@@ -1,11 +1,13 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
+
+
 #ifndef DATAGEN_H
 #define DATAGEN_H
 
diff --git a/programs/dibio.c b/programs/dibio.c
index 31cde5c95db1..ab2dc285a273 100644
--- a/programs/dibio.c
+++ b/programs/dibio.c
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 
diff --git a/programs/dibio.h b/programs/dibio.h
index 84f7d580283d..0227239b26db 100644
--- a/programs/dibio.h
+++ b/programs/dibio.h
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 /* This library is designed for a single-threaded console application.
diff --git a/programs/fileio.c b/programs/fileio.c
index 1dd8008e8495..65b4c7579194 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 
@@ -925,230 +925,6 @@ int FIO_compressFilename(const char* dstFileName, const char* srcFileName,
     return result;
 }
 
-typedef struct {
-    int numActualFrames;
-    int numSkippableFrames;
-    unsigned long long decompressedSize;
-    int decompUnavailable;
-    unsigned long long compressedSize;
-    int usesCheck;
-} fileInfo_t;
-
-/*
- * Reads information from file, stores in *info
- * if successful, returns 0, returns 1 for frame analysis error, returns 2 for file not compressed with zstd
- * returns 3 for cases in which file could not be opened.
- */
-static int getFileInfo(fileInfo_t* info, const char* inFileName){
-    int detectError = 0;
-    FILE* const srcFile = FIO_openSrcFile(inFileName);
-    if (srcFile == NULL) {
-        DISPLAY("Error: could not open source file %s\n", inFileName);
-        return 3;
-    }
-    info->compressedSize = (unsigned long long)UTIL_getFileSize(inFileName);
-    /* begin analyzing frame */
-    for ( ; ; ) {
-        BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
-        size_t const numBytesRead = fread(headerBuffer, 1, sizeof(headerBuffer), srcFile);
-        if (numBytesRead < ZSTD_frameHeaderSize_min) {
-            if (feof(srcFile) && numBytesRead == 0 && info->compressedSize > 0) {
-                break;
-            }
-            else if (feof(srcFile)) {
-                DISPLAY("Error: reached end of file with incomplete frame\n");
-                detectError = 2;
-                break;
-            }
-            else {
-                DISPLAY("Error: did not reach end of file but ran out of frames\n");
-                detectError = 1;
-                break;
-            }
-        }
-        {
-            U32 const magicNumber = MEM_readLE32(headerBuffer);
-            if (magicNumber == ZSTD_MAGICNUMBER) {
-                U64 const frameContentSize = ZSTD_getFrameContentSize(headerBuffer, numBytesRead);
-                if (frameContentSize == ZSTD_CONTENTSIZE_ERROR || frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN) {
-                    info->decompUnavailable = 1;
-                }
-                else {
-                    info->decompressedSize += frameContentSize;
-                }
-                {
-                    /* move to the end of the frame header */
-                    size_t const headerSize = ZSTD_frameHeaderSize(headerBuffer, numBytesRead);
-                    if (ZSTD_isError(headerSize)) {
-                        DISPLAY("Error: could not determine frame header size\n");
-                        detectError = 1;
-                        break;
-                    }
-                    {
-                        int const ret = fseek(srcFile, ((long)headerSize)-((long)numBytesRead), SEEK_CUR);
-                        if (ret != 0) {
-                            DISPLAY("Error: could not move to end of frame header\n");
-                            detectError = 1;
-                            break;
-                        }
-                    }
-                }
-
-                /* skip the rest of the blocks in the frame */
-                {
-                    int lastBlock = 0;
-                    do {
-                        BYTE blockHeaderBuffer[3];
-                        U32 blockHeader;
-                        int blockSize;
-                        size_t const readBytes = fread(blockHeaderBuffer, 1, 3, srcFile);
-                        if (readBytes != 3) {
-                            DISPLAY("There was a problem reading the block header\n");
-                            detectError = 1;
-                            break;
-                        }
-                        blockHeader = MEM_readLE24(blockHeaderBuffer);
-                        lastBlock = blockHeader & 1;
-                        blockSize = blockHeader >> 3;
-                        {
-                            int const ret = fseek(srcFile, blockSize, SEEK_CUR);
-                            if (ret != 0) {
-                                DISPLAY("Error: could not skip to end of block\n");
-                                detectError = 1;
-                                break;
-                            }
-                        }
-                    } while (lastBlock != 1);
-
-                    if (detectError) {
-                        break;
-                    }
-                }
-                {
-                    /* check if checksum is used */
-                    BYTE const frameHeaderDescriptor = headerBuffer[4];
-                    int const contentChecksumFlag = (frameHeaderDescriptor & (1 << 2)) >> 2;
-                    if (contentChecksumFlag) {
-                        int const ret = fseek(srcFile, 4, SEEK_CUR);
-                        info->usesCheck = 1;
-                        if (ret != 0) {
-                            DISPLAY("Error: could not skip past checksum\n");
-                            detectError = 1;
-                            break;
-                        }
-                    }
-                }
-                info->numActualFrames++;
-            }
-            else if (magicNumber == ZSTD_MAGIC_SKIPPABLE_START) {
-                BYTE frameSizeBuffer[4];
-                size_t const readBytes = fread(frameSizeBuffer, 1, 4, srcFile);
-                if (readBytes != 4) {
-                    DISPLAY("There was an error reading skippable frame size");
-                    detectError = 1;
-                    break;
-                }
-                {
-                    U32 const frameSize = MEM_readLE32(frameSizeBuffer);
-                    int const ret = LONG_SEEK(srcFile, frameSize, SEEK_CUR);
-                    if (ret != 0) {
-                        DISPLAY("Error: could not find end of skippable frame\n");
-                        detectError = 1;
-                        break;
-                    }
-                }
-                info->numSkippableFrames++;
-            }
-            else {
-                detectError = 2;
-                break;
-            }
-        }
-    }
-    fclose(srcFile);
-    return detectError;
-}
-
-static void displayInfo(const char* inFileName, fileInfo_t* info, int displayLevel){
-    double const compressedSizeMB = (double)info->compressedSize/(1 MB);
-    double const decompressedSizeMB = (double)info->decompressedSize/(1 MB);
-    double const ratio = (info->compressedSize == 0) ? 0 : ((double)info->decompressedSize)/info->compressedSize;
-    const char* const checkString = (info->usesCheck ? "XXH64" : "None");
-    if (displayLevel <= 2) {
-        if (!info->decompUnavailable) {
-            DISPLAYOUT("Skippable  Non-Skippable  Compressed  Uncompressed  Ratio  Check  Filename\n");
-            DISPLAYOUT("%9d  %13d  %7.2f MB  %9.2f MB  %5.3f  %5s  %s\n",
-                    info->numSkippableFrames, info->numActualFrames, compressedSizeMB, decompressedSizeMB,
-                    ratio, checkString, inFileName);
-        }
-        else {
-            DISPLAYOUT("Skippable  Non-Skippable  Compressed  Check  Filename\n");
-            DISPLAYOUT("%9d  %13d  %7.2f MB  %5s  %s\n",
-                    info->numSkippableFrames, info->numActualFrames, compressedSizeMB, checkString, inFileName);
-        }
-    }
-    else{
-        DISPLAYOUT("# Zstandard Frames: %d\n", info->numActualFrames);
-        DISPLAYOUT("# Skippable Frames: %d\n", info->numSkippableFrames);
-        DISPLAYOUT("Compressed Size: %.2f MB (%llu B)\n", compressedSizeMB, info->compressedSize);
-        if (!info->decompUnavailable) {
-            DISPLAYOUT("Decompressed Size: %.2f MB (%llu B)\n", decompressedSizeMB, info->decompressedSize);
-            DISPLAYOUT("Ratio: %.4f\n", ratio);
-        }
-        DISPLAYOUT("Check: %s\n", checkString);
-        DISPLAYOUT("\n");
-    }
-}
-
-
-static int FIO_listFile(const char* inFileName, int displayLevel, unsigned fileNo, unsigned numFiles){
-    /* initialize info to avoid warnings */
-    fileInfo_t info;
-    memset(&info, 0, sizeof(info));
-    DISPLAYOUT("%s (%u/%u):\n", inFileName, fileNo, numFiles);
-    {
-        int const error = getFileInfo(&info, inFileName);
-        if (error == 1) {
-            /* display error, but provide output */
-            DISPLAY("An error occurred with getting file info\n");
-        }
-        else if (error == 2) {
-            DISPLAYOUT("File %s not compressed with zstd\n", inFileName);
-            if (displayLevel > 2) {
-                DISPLAYOUT("\n");
-            }
-            return 1;
-        }
-        else if (error == 3) {
-            /* error occurred with opening the file */
-            if (displayLevel > 2) {
-                DISPLAYOUT("\n");
-            }
-            return 1;
-        }
-        displayInfo(inFileName, &info, displayLevel);
-        return error;
-    }
-}
-
-int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int displayLevel){
-    if (numFiles == 0) {
-        DISPLAYOUT("No files given\n");
-        return 0;
-    }
-    DISPLAYOUT("===========================================\n");
-    DISPLAYOUT("Printing information about compressed files\n");
-    DISPLAYOUT("===========================================\n");
-    DISPLAYOUT("Number of files listed: %u\n", numFiles);
-    {
-        int error = 0;
-        unsigned u;
-        for (u=0; u<numFiles;u++) {
-            error |= FIO_listFile(filenameTable[u], displayLevel, u+1, numFiles);
-        }
-        return error;
-    }
-}
 
 int FIO_compressMultipleFilenames(const char** inFileNamesTable, unsigned nbFiles,
                                   const char* suffix,
@@ -1194,10 +970,8 @@ int FIO_compressMultipleFilenames(const char** inFileNamesTable, unsigned nbFile
             missed_files += FIO_compressFilename_dstFile(ress, dstFileName, inFileNamesTable[u], compressionLevel);
     }   }
 
-    /* Close & Free */
     FIO_freeCResources(ress);
     free(dstFileName);
-
     return missed_files;
 }
 
@@ -1208,8 +982,8 @@ int FIO_compressMultipleFilenames(const char** inFileNamesTable, unsigned nbFile
 #ifndef ZSTD_NODECOMPRESS
 
 /* **************************************************************************
-*  Decompression
-****************************************************************************/
+ *  Decompression
+ ***************************************************************************/
 typedef struct {
     void*  srcBuffer;
     size_t srcBufferLoaded;
@@ -1616,7 +1390,7 @@ static unsigned long long FIO_decompressLz4Frame(dRess_t* ress,
             /* Write Block */
             if (decodedBytes) {
                 if (fwrite(ress->dstBuffer, 1, decodedBytes, ress->dstFile) != decodedBytes) {
-                    DISPLAYLEVEL(1, "zstd: %s \n", strerr(errno));
+                    DISPLAYLEVEL(1, "zstd: %s \n", strerror(errno));
                     decodingError = 1; break;
                 }
                 filesize += decodedBytes;
@@ -1878,4 +1652,231 @@ int FIO_decompressMultipleFilenames(const char** srcNamesTable, unsigned nbFiles
     return missingFiles + skippedFiles;
 }
 
+
+
+/* **************************************************************************
+ *  .zst file info (--list command)
+ ***************************************************************************/
+
+typedef struct {
+    int numActualFrames;
+    int numSkippableFrames;
+    unsigned long long decompressedSize;
+    int decompUnavailable;
+    unsigned long long compressedSize;
+    int usesCheck;
+} fileInfo_t;
+
+/** getFileInfo() :
+ *  Reads information from file, stores in *info
+ * @return : 0 if successful
+ *           1 for frame analysis error
+ *           2 for file not compressed with zstd
+ *           3 for cases in which file could not be opened.
+ */
+static int getFileInfo(fileInfo_t* info, const char* inFileName){
+    int detectError = 0;
+    FILE* const srcFile = FIO_openSrcFile(inFileName);
+    if (srcFile == NULL) {
+        DISPLAY("Error: could not open source file %s\n", inFileName);
+        return 3;
+    }
+    info->compressedSize = (unsigned long long)UTIL_getFileSize(inFileName);
+
+    /* begin analyzing frame */
+    for ( ; ; ) {
+        BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
+        size_t const numBytesRead = fread(headerBuffer, 1, sizeof(headerBuffer), srcFile);
+        if (numBytesRead < ZSTD_frameHeaderSize_min) {
+            if (feof(srcFile) && numBytesRead == 0 && info->compressedSize > 0) {
+                break;
+            }
+            else if (feof(srcFile)) {
+                DISPLAY("Error: reached end of file with incomplete frame\n");
+                detectError = 2;
+                break;
+            }
+            else {
+                DISPLAY("Error: did not reach end of file but ran out of frames\n");
+                detectError = 1;
+                break;
+            }
+        }
+        {   U32 const magicNumber = MEM_readLE32(headerBuffer);
+            /* Zstandard frame */
+            if (magicNumber == ZSTD_MAGICNUMBER) {
+                U64 const frameContentSize = ZSTD_getFrameContentSize(headerBuffer, numBytesRead);
+                if (frameContentSize == ZSTD_CONTENTSIZE_ERROR || frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN) {
+                    info->decompUnavailable = 1;
+                } else {
+                    info->decompressedSize += frameContentSize;
+                }
+                /* move to the end of the frame header */
+                {   size_t const headerSize = ZSTD_frameHeaderSize(headerBuffer, numBytesRead);
+                    if (ZSTD_isError(headerSize)) {
+                        DISPLAY("Error: could not determine frame header size\n");
+                        detectError = 1;
+                        break;
+                    }
+                    {   int const ret = fseek(srcFile, ((long)headerSize)-((long)numBytesRead), SEEK_CUR);
+                        if (ret != 0) {
+                            DISPLAY("Error: could not move to end of frame header\n");
+                            detectError = 1;
+                            break;
+                }   }   }
+
+                /* skip the rest of the blocks in the frame */
+                {   int lastBlock = 0;
+                    do {
+                        BYTE blockHeaderBuffer[3];
+                        size_t const readBytes = fread(blockHeaderBuffer, 1, 3, srcFile);
+                        if (readBytes != 3) {
+                            DISPLAY("There was a problem reading the block header\n");
+                            detectError = 1;
+                            break;
+                        }
+                        {   U32 const blockHeader = MEM_readLE24(blockHeaderBuffer);
+                            U32 const blockTypeID = (blockHeader >> 1) & 3;
+                            U32 const isRLE = (blockTypeID == 1);
+                            U32 const isWrongBlock = (blockTypeID == 3);
+                            long const blockSize = isRLE ? 1 : (long)(blockHeader >> 3);
+                            if (isWrongBlock) {
+                                DISPLAY("Error: unsupported block type \n");
+                                detectError = 1;
+                                break;
+                            }
+                            lastBlock = blockHeader & 1;
+                            {   int const ret = fseek(srcFile, blockSize, SEEK_CUR);
+                                if (ret != 0) {
+                                    DISPLAY("Error: could not skip to end of block\n");
+                                    detectError = 1;
+                                    break;
+                        }   }   }
+                    } while (lastBlock != 1);
+
+                    if (detectError) break;
+                }
+
+                /* check if checksum is used */
+                {   BYTE const frameHeaderDescriptor = headerBuffer[4];
+                    int const contentChecksumFlag = (frameHeaderDescriptor & (1 << 2)) >> 2;
+                    if (contentChecksumFlag) {
+                        int const ret = fseek(srcFile, 4, SEEK_CUR);
+                        info->usesCheck = 1;
+                        if (ret != 0) {
+                            DISPLAY("Error: could not skip past checksum\n");
+                            detectError = 1;
+                            break;
+                }   }   }
+                info->numActualFrames++;
+            }
+            /* Skippable frame */
+            else if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
+                U32 const frameSize = MEM_readLE32(headerBuffer + 4);
+                long const seek = (long)(8 + frameSize - numBytesRead);
+                int const ret = LONG_SEEK(srcFile, seek, SEEK_CUR);
+                if (ret != 0) {
+                    DISPLAY("Error: could not find end of skippable frame\n");
+                    detectError = 1;
+                    break;
+                }
+                info->numSkippableFrames++;
+            }
+            /* unknown content */
+            else {
+                detectError = 2;
+                break;
+            }
+        }
+    }  /* end analyzing frame */
+    fclose(srcFile);
+    return detectError;
+}
+
+static void displayInfo(const char* inFileName, fileInfo_t* info, int displayLevel){
+    unsigned const unit = info->compressedSize < (1 MB) ? (1 KB) : (1 MB);
+    const char* const unitStr = info->compressedSize < (1 MB) ? "KB" : "MB";
+    double const compressedSizeUnit = (double)info->compressedSize / unit;
+    double const decompressedSizeUnit = (double)info->decompressedSize / unit;
+    double const ratio = (info->compressedSize == 0) ? 0 : ((double)info->decompressedSize)/info->compressedSize;
+    const char* const checkString = (info->usesCheck ? "XXH64" : "None");
+    if (displayLevel <= 2) {
+        if (!info->decompUnavailable) {
+            DISPLAYOUT("Skippable  Non-Skippable  Compressed  Uncompressed  Ratio  Check  Filename\n");
+            DISPLAYOUT("%9d  %13d  %7.2f %2s  %9.2f %2s  %5.3f  %5s  %s\n",
+                    info->numSkippableFrames, info->numActualFrames,
+                    compressedSizeUnit, unitStr, decompressedSizeUnit, unitStr,
+                    ratio, checkString, inFileName);
+        } else {
+            DISPLAYOUT("Skippable  Non-Skippable  Compressed  Check  Filename\n");
+            DISPLAYOUT("%9d  %13d  %7.2f MB  %5s  %s\n",
+                    info->numSkippableFrames, info->numActualFrames,
+                    compressedSizeUnit, checkString, inFileName);
+        }
+    } else {
+        DISPLAYOUT("# Zstandard Frames: %d\n", info->numActualFrames);
+        DISPLAYOUT("# Skippable Frames: %d\n", info->numSkippableFrames);
+        DISPLAYOUT("Compressed Size: %.2f %2s (%llu B)\n",
+                    compressedSizeUnit, unitStr, info->compressedSize);
+        if (!info->decompUnavailable) {
+            DISPLAYOUT("Decompressed Size: %.2f %2s (%llu B)\n",
+                    decompressedSizeUnit, unitStr, info->decompressedSize);
+            DISPLAYOUT("Ratio: %.4f\n", ratio);
+        }
+        DISPLAYOUT("Check: %s\n", checkString);
+        DISPLAYOUT("\n");
+    }
+}
+
+
+static int FIO_listFile(const char* inFileName, int displayLevel, unsigned fileNo, unsigned numFiles){
+    /* initialize info to avoid warnings */
+    fileInfo_t info;
+    memset(&info, 0, sizeof(info));
+    DISPLAYOUT("%s (%u/%u):\n", inFileName, fileNo, numFiles);
+    {
+        int const error = getFileInfo(&info, inFileName);
+        if (error == 1) {
+            /* display error, but provide output */
+            DISPLAY("An error occurred with getting file info\n");
+        }
+        else if (error == 2) {
+            DISPLAYOUT("File %s not compressed with zstd\n", inFileName);
+            if (displayLevel > 2) {
+                DISPLAYOUT("\n");
+            }
+            return 1;
+        }
+        else if (error == 3) {
+            /* error occurred with opening the file */
+            if (displayLevel > 2) {
+                DISPLAYOUT("\n");
+            }
+            return 1;
+        }
+        displayInfo(inFileName, &info, displayLevel);
+        return error;
+    }
+}
+
+int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int displayLevel){
+    if (numFiles == 0) {
+        DISPLAYOUT("No files given\n");
+        return 0;
+    }
+    DISPLAYOUT("===========================================\n");
+    DISPLAYOUT("Printing information about compressed files\n");
+    DISPLAYOUT("===========================================\n");
+    DISPLAYOUT("Number of files listed: %u\n", numFiles);
+    {
+        int error = 0;
+        unsigned u;
+        for (u=0; u<numFiles;u++) {
+            error |= FIO_listFile(filenameTable[u], displayLevel, u+1, numFiles);
+        }
+        return error;
+    }
+}
+
+
 #endif /* #ifndef ZSTD_NODECOMPRESS */
diff --git a/programs/fileio.h b/programs/fileio.h
index 9d9167df9ef5..8008e97dd5f3 100644
--- a/programs/fileio.h
+++ b/programs/fileio.h
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 
diff --git a/programs/platform.h b/programs/platform.h
index 74412cde332e..fb2e9b173d2a 100644
--- a/programs/platform.h
+++ b/programs/platform.h
@@ -1,12 +1,10 @@
-/**
- * platform.h - compiler and OS detection
- *
- * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 #ifndef PLATFORM_H_MODULE
diff --git a/programs/util.h b/programs/util.h
index dd971e0f884b..7b553661cde3 100644
--- a/programs/util.h
+++ b/programs/util.h
@@ -1,12 +1,10 @@
-/**
- * util.h - utility functions
- *
- * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 #ifndef UTIL_H_MODULE
diff --git a/programs/zstd.1 b/programs/zstd.1
index 5df45db2153e..5a91eea281f4 100644
--- a/programs/zstd.1
+++ b/programs/zstd.1
@@ -1,5 +1,5 @@
 .
-.TH "ZSTD" "1" "June 2017" "zstd 1.3.0" "User Commands"
+.TH "ZSTD" "1" "August 2017" "zstd 1.3.1" "User Commands"
 .
 .SH "NAME"
 \fBzstd\fR \- zstd, zstdmt, unzstd, zstdcat \- Compress or decompress \.zst files
@@ -105,7 +105,7 @@ unlocks high compression levels 20+ (maximum 22), using a lot more memory\. Note
 .
 .TP
 \fB\-T#\fR, \fB\-\-threads=#\fR
-Compress using \fB#\fR threads (default: 1)\. If \fB#\fR is 0, attempt to detect and use the number of physical CPU cores\. This modifier does nothing if \fBzstd\fR is compiled without multithread support\.
+Compress using \fB#\fR threads (default: 1)\. If \fB#\fR is 0, attempt to detect and use the number of physical CPU cores\. In all cases, the nb of threads is capped to ZSTDMT_NBTHREADS_MAX==256\. This modifier does nothing if \fBzstd\fR is compiled without multithread support\.
 .
 .TP
 \fB\-D file\fR
@@ -149,7 +149,7 @@ display help/long help and exit
 .
 .TP
 \fB\-V\fR, \fB\-\-version\fR
-display version number and exit
+display version number and exit\. Advanced : \fB\-vV\fR also displays supported formats\. \fB\-vvV\fR also displays POSIX support\.
 .
 .TP
 \fB\-v\fR
diff --git a/programs/zstd.1.md b/programs/zstd.1.md
index 24e25a2f3af1..4310afa1aaf8 100644
--- a/programs/zstd.1.md
+++ b/programs/zstd.1.md
@@ -108,6 +108,7 @@ the last one takes effect.
 * `-T#`, `--threads=#`:
     Compress using `#` threads (default: 1).
     If `#` is 0, attempt to detect and use the number of physical CPU cores.
+    In all cases, the nb of threads is capped to ZSTDMT_NBTHREADS_MAX==256.
     This modifier does nothing if `zstd` is compiled without multithread support.
 * `-D file`:
     use `file` as Dictionary to compress or decompress FILE(s)
@@ -139,7 +140,9 @@ the last one takes effect.
 * `-h`/`-H`, `--help`:
     display help/long help and exit
 * `-V`, `--version`:
-    display version number and exit
+    display version number and exit.
+    Advanced : `-vV` also displays supported formats.
+    `-vvV` also displays POSIX support.
 * `-v`:
     verbose mode
 * `-q`, `--quiet`:
diff --git a/programs/zstdcli.c b/programs/zstdcli.c
index 35772e0a7140..e7eb71db6e37 100644
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 
@@ -16,7 +16,7 @@
 #endif
 
 #ifndef ZSTDCLI_CLEVEL_MAX
-#  define ZSTDCLI_CLEVEL_MAX 19   /* when not using --ultra */
+#  define ZSTDCLI_CLEVEL_MAX 19   /* without using --ultra */
 #endif
 
 
@@ -26,14 +26,15 @@
 **************************************/
 #include "platform.h" /* IS_CONSOLE, PLATFORM_POSIX_VERSION */
 #include "util.h"     /* UTIL_HAS_CREATEFILELIST, UTIL_createFileList */
+#include <stdio.h>    /* fprintf(), stdin, stdout, stderr */
 #include <string.h>   /* strcmp, strlen */
 #include <errno.h>    /* errno */
-#include "fileio.h"
+#include "fileio.h"   /* stdinmark, stdoutmark, ZSTD_EXTENSION */
 #ifndef ZSTD_NOBENCH
 #  include "bench.h"  /* BMK_benchFiles, BMK_SetNbSeconds */
 #endif
 #ifndef ZSTD_NODICT
-#  include "dibio.h"
+#  include "dibio.h"  /* ZDICT_cover_params_t, DiB_trainFromFiles() */
 #endif
 #define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_maxCLevel */
 #include "zstd.h"     /* ZSTD_VERSION_STRING */
@@ -64,7 +65,7 @@
 #define MB *(1 <<20)
 #define GB *(1U<<30)
 
-#define DEFAULT_DISPLAY_LEVEL 2
+#define DISPLAY_LEVEL_DEFAULT 2
 
 static const char*    g_defaultDictName = "dictionary";
 static const unsigned g_defaultMaxDictSize = 110 KB;
@@ -79,7 +80,7 @@ static U32 g_overlapLog = OVERLAP_LOG_DEFAULT;
 **************************************/
 #define DISPLAY(...)         fprintf(g_displayOut, __VA_ARGS__)
 #define DISPLAYLEVEL(l, ...) { if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } }
-static int g_displayLevel = DEFAULT_DISPLAY_LEVEL;   /* 0 : no display,  1: errors,  2 : + result + interaction + warnings,  3 : + progression,  4 : + information */
+static int g_displayLevel = DISPLAY_LEVEL_DEFAULT;   /* 0 : no display,  1: errors,  2 : + result + interaction + warnings,  3 : + progression,  4 : + information */
 static FILE* g_displayOut;
 
 
@@ -88,12 +89,12 @@ static FILE* g_displayOut;
 **************************************/
 static int usage(const char* programName)
 {
-    DISPLAY( "Usage :\n");
-    DISPLAY( "      %s [args] [FILE(s)] [-o file]\n", programName);
+    DISPLAY( "Usage : \n");
+    DISPLAY( "      %s [args] [FILE(s)] [-o file] \n", programName);
     DISPLAY( "\n");
-    DISPLAY( "FILE    : a filename\n");
+    DISPLAY( "FILE    : a filename \n");
     DISPLAY( "          with no FILE, or when FILE is - , read standard input\n");
-    DISPLAY( "Arguments :\n");
+    DISPLAY( "Arguments : \n");
 #ifndef ZSTD_NOCOMPRESS
     DISPLAY( " -#     : # compression level (1-%d, default:%d) \n", ZSTDCLI_CLEVEL_MAX, ZSTDCLI_CLEVEL_DEFAULT);
 #endif
@@ -105,7 +106,7 @@ static int usage(const char* programName)
     DISPLAY( " -f     : overwrite output without prompting and (de)compress links \n");
     DISPLAY( "--rm    : remove source file(s) after successful de/compression \n");
     DISPLAY( " -k     : preserve source file(s) (default) \n");
-    DISPLAY( " -h/-H  : display help/long help and exit\n");
+    DISPLAY( " -h/-H  : display help/long help and exit \n");
     return 0;
 }
 
@@ -114,12 +115,12 @@ static int usage_advanced(const char* programName)
     DISPLAY(WELCOME_MESSAGE);
     usage(programName);
     DISPLAY( "\n");
-    DISPLAY( "Advanced arguments :\n");
-    DISPLAY( " -V     : display Version number and exit\n");
+    DISPLAY( "Advanced arguments : \n");
+    DISPLAY( " -V     : display Version number and exit \n");
     DISPLAY( " -v     : verbose mode; specify multiple times to increase verbosity\n");
     DISPLAY( " -q     : suppress warnings; specify twice to suppress errors too\n");
     DISPLAY( " -c     : force write to standard output, even if it is the console\n");
-    DISPLAY( " -l     : print information about zstd compressed files.\n");
+    DISPLAY( " -l     : print information about zstd compressed files \n");
 #ifndef ZSTD_NOCOMPRESS
     DISPLAY( "--ultra : enable levels beyond %i, up to %i (requires more memory)\n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel());
 #ifdef ZSTD_MULTITHREAD
@@ -151,11 +152,10 @@ static int usage_advanced(const char* programName)
 #endif
 #endif
     DISPLAY( " -M#    : Set a memory usage limit for decompression \n");
-    DISPLAY( "--list  : list information about a zstd compressed file \n");
     DISPLAY( "--      : All arguments after \"--\" are treated as files \n");
 #ifndef ZSTD_NODICT
     DISPLAY( "\n");
-    DISPLAY( "Dictionary builder :\n");
+    DISPLAY( "Dictionary builder : \n");
     DISPLAY( "--train ## : create a dictionary from a training set of files \n");
     DISPLAY( "--train-cover[=k=#,d=#,steps=#] : use the cover algorithm with optional args\n");
     DISPLAY( "--train-legacy[=s=#] : use the legacy algorithm with selectivity (default: %u)\n", g_defaultSelectivityLevel);
@@ -165,12 +165,12 @@ static int usage_advanced(const char* programName)
 #endif
 #ifndef ZSTD_NOBENCH
     DISPLAY( "\n");
-    DISPLAY( "Benchmark arguments :\n");
+    DISPLAY( "Benchmark arguments : \n");
     DISPLAY( " -b#    : benchmark file(s), using # compression level (default : 1) \n");
     DISPLAY( " -e#    : test all compression levels from -bX to # (default: 1)\n");
-    DISPLAY( " -i#    : minimum evaluation time in seconds (default : 3s)\n");
+    DISPLAY( " -i#    : minimum evaluation time in seconds (default : 3s) \n");
     DISPLAY( " -B#    : cut file into independent blocks of size # (default: no block)\n");
-    DISPLAY( "--priority=rt : set process priority to real-time\n");
+    DISPLAY( "--priority=rt : set process priority to real-time \n");
 #endif
     return 0;
 }
@@ -313,6 +313,35 @@ static unsigned parseCompressionParameters(const char* stringPtr, ZSTD_compressi
     return 1;
 }
 
+static void printVersion(void)
+{
+    DISPLAY(WELCOME_MESSAGE);
+    /* format support */
+    DISPLAYLEVEL(3, "*** supports: zstd");
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>0) && (ZSTD_LEGACY_SUPPORT<8)
+    DISPLAYLEVEL(3, ", zstd legacy v0.%d+", ZSTD_LEGACY_SUPPORT);
+#endif
+#ifdef ZSTD_GZCOMPRESS
+    DISPLAYLEVEL(3, ", gzip");
+#endif
+#ifdef ZSTD_LZ4COMPRESS
+    DISPLAYLEVEL(3, ", lz4");
+#endif
+#ifdef ZSTD_LZMACOMPRESS
+    DISPLAYLEVEL(3, ", lzma, xz ");
+#endif
+    DISPLAYLEVEL(3, "\n");
+    /* posix support */
+#ifdef _POSIX_C_SOURCE
+    DISPLAYLEVEL(4, "_POSIX_C_SOURCE defined: %ldL\n", (long) _POSIX_C_SOURCE);
+#endif
+#ifdef _POSIX_VERSION
+    DISPLAYLEVEL(4, "_POSIX_VERSION defined: %ldL \n", (long) _POSIX_VERSION);
+#endif
+#ifdef PLATFORM_POSIX_VERSION
+    DISPLAYLEVEL(4, "PLATFORM_POSIX_VERSION defined: %ldL\n", (long) PLATFORM_POSIX_VERSION);
+#endif
+}
 
 typedef enum { zom_compress, zom_decompress, zom_test, zom_bench, zom_train, zom_list } zstd_operation_mode;
 
@@ -492,7 +521,7 @@ int main(int argCount, const char* argv[])
                     switch(argument[0])
                     {
                         /* Display help */
-                    case 'V': g_displayOut=stdout; DISPLAY(WELCOME_MESSAGE); CLEAN_RETURN(0);   /* Version Only */
+                    case 'V': g_displayOut=stdout; printVersion(); CLEAN_RETURN(0);   /* Version Only */
                     case 'H':
                     case 'h': g_displayOut=stdout; CLEAN_RETURN(usage_advanced(programName));
 
@@ -635,24 +664,18 @@ int main(int argCount, const char* argv[])
         filenameTable[filenameIdx++] = argument;
     }
 
-    if (lastCommand) { DISPLAY("error : command must be followed by argument \n"); CLEAN_RETURN(1); }  /* forgotten argument */
+    if (lastCommand) { /* forgotten argument */
+        DISPLAY("error : command must be followed by argument \n");
+        CLEAN_RETURN(1);
+    }
 
     /* Welcome message (if verbose) */
     DISPLAYLEVEL(3, WELCOME_MESSAGE);
-#ifdef _POSIX_C_SOURCE
-    DISPLAYLEVEL(4, "_POSIX_C_SOURCE defined: %ldL\n", (long) _POSIX_C_SOURCE);
-#endif
-#ifdef _POSIX_VERSION
-    DISPLAYLEVEL(4, "_POSIX_VERSION defined: %ldL\n", (long) _POSIX_VERSION);
-#endif
-#ifdef PLATFORM_POSIX_VERSION
-    DISPLAYLEVEL(4, "PLATFORM_POSIX_VERSION defined: %ldL\n", (long) PLATFORM_POSIX_VERSION);
-#endif
 
     if (nbThreads == 0) {
         /* try to guess */
         nbThreads = UTIL_countPhysicalCores();
-        DISPLAYLEVEL(3, "Note: %d physical core(s) detected\n", nbThreads);
+        DISPLAYLEVEL(3, "Note: %d physical core(s) detected \n", nbThreads);
     }
 
     g_utilDisplayLevel = g_displayLevel;
@@ -679,10 +702,17 @@ int main(int argCount, const char* argv[])
         }
     }
 #endif
+
     if (operation == zom_list) {
+#ifndef ZSTD_NODECOMPRESS
         int const ret = FIO_listMultipleFiles(filenameIdx, filenameTable, g_displayLevel);
         CLEAN_RETURN(ret);
+#else
+        DISPLAY("file information is not supported \n");
+        CLEAN_RETURN(1);
+#endif
     }
+
     /* Check if benchmark is selected */
     if (operation==zom_bench) {
 #ifndef ZSTD_NOBENCH
@@ -692,7 +722,7 @@ int main(int argCount, const char* argv[])
         BMK_setNbSeconds(bench_nbSeconds);
         BMK_benchFiles(filenameTable, filenameIdx, dictFileName, cLevel, cLevelLast, &compressionParams, setRealTimePrio);
 #endif
-        (void)bench_nbSeconds;
+        (void)bench_nbSeconds; (void)blockSize; (void)setRealTimePrio;
         goto _end;
     }
 
@@ -719,6 +749,10 @@ int main(int argCount, const char* argv[])
         goto _end;
     }
 
+#ifndef ZSTD_NODECOMPRESS
+    if (operation==zom_test) { outFileName=nulmark; FIO_setRemoveSrcFile(0); } /* test mode */
+#endif
+
     /* No input filename ==> use stdin and stdout */
     filenameIdx += !filenameIdx;   /* filenameTable[0] is stdin by default */
     if (!strcmp(filenameTable[0], stdinmark) && !outFileName) outFileName = stdoutmark;   /* when input is stdin, default output is stdout */
@@ -759,11 +793,11 @@ int main(int argCount, const char* argv[])
         else
           operationResult = FIO_compressMultipleFilenames(filenameTable, filenameIdx, outFileName ? outFileName : suffix, dictFileName, cLevel, &compressionParams);
 #else
+        (void)suffix;
         DISPLAY("Compression not supported\n");
 #endif
     } else {  /* decompression or test */
 #ifndef ZSTD_NODECOMPRESS
-        if (operation==zom_test) { outFileName=nulmark; FIO_setRemoveSrcFile(0); } /* test mode */
         FIO_setMemLimit(memLimit);
         if (filenameIdx==1 && outFileName)
             operationResult = FIO_decompressFilename(outFileName, filenameTable[0], dictFileName);
diff --git a/tests/Makefile b/tests/Makefile
index 82f12887bf9b..3be79c159057 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -73,13 +73,13 @@ DECODECORPUS_TESTTIME ?= -T30
 
 default: fullbench
 
-all: fullbench fuzzer zstreamtest paramgrill datagen zbufftest decodecorpus
+all: fullbench fuzzer zstreamtest paramgrill datagen decodecorpus
 
-all32: fullbench32 fuzzer32 zstreamtest32 zbufftest32
+all32: fullbench32 fuzzer32 zstreamtest32
 
-allnothread: fullbench fuzzer paramgrill datagen zbufftest decodecorpus
+allnothread: fullbench fuzzer paramgrill datagen  decodecorpus
 
-dll: fuzzer-dll zstreamtest-dll zbufftest-dll
+dll: fuzzer-dll zstreamtest-dll
 
 zstd:
 	$(MAKE) -C $(PRGDIR) $@
@@ -108,11 +108,11 @@ fullbench-dll: $(PRGDIR)/datagen.c fullbench.c
 	$(MAKE) -C $(ZSTDDIR) libzstd
 	$(CC) $(FLAGS) $^ -o $@$(EXT) -DZSTD_DLL_IMPORT=1 $(ZSTDDIR)/dll/libzstd.dll
 
-fuzzer   : $(ZSTD_FILES) $(ZDICT_FILES) $(PRGDIR)/datagen.c fuzzer.c
-	$(CC)      $(FLAGS) $^ -o $@$(EXT)
-
-fuzzer32 : $(ZSTD_FILES) $(ZDICT_FILES) $(PRGDIR)/datagen.c fuzzer.c
-	$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
+fuzzer : CPPFLAGS += $(MULTITHREAD_CPP)
+fuzzer : LDFLAGS += $(MULTITHREAD_LD)
+fuzzer32: CFLAGS += -m32
+fuzzer fuzzer32 : $(ZSTD_FILES) $(ZDICT_FILES) $(PRGDIR)/datagen.c fuzzer.c
+	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
 fuzzer-dll : LDFLAGS+= -L$(ZSTDDIR) -lzstd
 fuzzer-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/datagen.c fuzzer.c
@@ -192,7 +192,7 @@ else
 	$(CC) $(FLAGS) $^ -o $@$(EXT) -Wl,-rpath=$(ZSTDDIR) $(ZSTDDIR)/libzstd.so
 endif
 
-pool  : pool.c $(ZSTDDIR)/common/pool.c $(ZSTDDIR)/common/threading.c
+poolTests  : poolTests.c $(ZSTDDIR)/common/pool.c $(ZSTDDIR)/common/threading.c
 	$(CC)    $(FLAGS) $(MULTITHREAD) $^ -o $@$(EXT)
 
 namespaceTest:
@@ -213,7 +213,7 @@ clean:
         fuzzer-dll$(EXT) zstreamtest-dll$(EXT) zbufftest-dll$(EXT)\
         zstreamtest$(EXT) zstreamtest32$(EXT) \
         datagen$(EXT) paramgrill$(EXT) roundTripCrash$(EXT) longmatch$(EXT) \
-        symbols$(EXT) invalidDictionaries$(EXT) legacy$(EXT) pool$(EXT) \
+        symbols$(EXT) invalidDictionaries$(EXT) legacy$(EXT) poolTests$(EXT) \
 	decodecorpus$(EXT)
 	@echo Cleaning completed
 
@@ -280,7 +280,7 @@ test-zstd: zstd zstd-playTests
 test-zstd32: ZSTD = $(PRGDIR)/zstd32
 test-zstd32: zstd32 zstd-playTests
 
-test-zstd-nolegacy: ZSTD = $(PRGDIR)/zstd
+test-zstd-nolegacy: ZSTD = $(PRGDIR)/zstd-nolegacy
 test-zstd-nolegacy: zstd-nolegacy zstd-playTests
 
 test-gzstd: gzstd
@@ -375,7 +375,7 @@ test-decodecorpus-cli: decodecorpus
 	cd ..
 	@rm -rf testdir
 
-test-pool: pool
-	$(QEMU_SYS) ./pool
+test-pool: poolTests
+	$(QEMU_SYS) ./poolTests
 
 endif
diff --git a/tests/datagencli.c b/tests/datagencli.c
index 8a81939d16de..bf9601f20976 100644
--- a/tests/datagencli.c
+++ b/tests/datagencli.c
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 
diff --git a/tests/decodecorpus.c b/tests/decodecorpus.c
index eaf07457894f..23166bd67f0a 100644
--- a/tests/decodecorpus.c
+++ b/tests/decodecorpus.c
@@ -1,10 +1,10 @@
-/**
- * Copyright (c) 2017-present, Facebook, Inc.
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 #include <limits.h>
diff --git a/tests/files/huffman-compressed-larger b/tests/files/huffman-compressed-larger
new file mode 100644
index 0000000000000000000000000000000000000000..f594f1ae9816a52054935aab96eec94c4ffe14b7
GIT binary patch
literal 143
zcmV;A0C4{(wJ-eyP!$9KW`-Ueka7hx0Dvr|0RR9101)1^uIswqo=F4%0Du4hfEEB3
z02Tl|2JCX%cWtRD`Y@9(#KBM?3<Lqd2l|LKemocwY80ZFKr5DNr=tOhzaC5&JIIH{
xGqKTRE|$tf1Ys37TWuzbbzfzF{`>Vi8VgP*Qj(lfvmE1c0RaG879=~%<1Ug5Gim?;

literal 0
HcmV?d00001

diff --git a/tests/fullbench.c b/tests/fullbench.c
index 81de5157b8e1..78a70940f9d2 100644
--- a/tests/fullbench.c
+++ b/tests/fullbench.c
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 
@@ -91,12 +91,6 @@ static size_t BMK_findMaxMem(U64 requiredMem)
 /*_*******************************************************
 *  Benchmark wrappers
 *********************************************************/
-typedef struct {
-    blockType_e blockType;
-    U32 unusedBits;
-    U32 origSize;
-} blockProperties_t;
-
 size_t local_ZSTD_compress(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize)
 {
     (void)buff2;
@@ -120,7 +114,6 @@ size_t local_ZSTD_decodeLiteralsBlock(void* dst, size_t dstSize, void* buff2, co
     return ZSTD_decodeLiteralsBlock((ZSTD_DCtx*)g_zdc, buff2, g_cSize);
 }
 
-extern size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr);
 extern size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeq, const void* src, size_t srcSize);
 size_t local_ZSTD_decodeSeqHeaders(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize)
 {
diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile
new file mode 100644
index 000000000000..da22ed0d0baa
--- /dev/null
+++ b/tests/fuzz/Makefile
@@ -0,0 +1,108 @@
+# ##########################################################################
+# Copyright (c) 2016-present, Facebook, Inc.
+# All rights reserved.
+#
+# This Makefile is validated for Linux, and macOS targets
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree. An additional grant
+# of patent rights can be found in the PATENTS file in the same directory.
+# ##########################################################################
+
+CFLAGS ?= -O3
+CXXFLAGS ?= -O3
+
+ZSTDDIR = ../../lib
+PRGDIR = ../../programs
+
+FUZZ_CPPFLAGS := -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
+	-I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR) \
+	-DZSTD_DEBUG=1 -DMEM_FORCE_MEMORY_ACCESS=0 \
+	-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION $(CPPFLAGS)
+FUZZ_CFLAGS := -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
+	-Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
+	-Wstrict-prototypes -Wundef -Wformat-security \
+	-Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
+	-Wredundant-decls \
+	-g -fno-omit-frame-pointer $(CFLAGS)
+FUZZ_CXXFLAGS := -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
+	-Wstrict-aliasing=1 -Wswitch-enum \
+	-Wdeclaration-after-statement -Wstrict-prototypes -Wundef \
+	-Wformat-security -Wvla -Wformat=2 -Winit-self -Wfloat-equal \
+	-Wwrite-strings -Wredundant-decls \
+	-g -fno-omit-frame-pointer -std=c++11 $(CXXFLAGS)
+FUZZ_LDFLAGS := $(LDFLAGS)
+FUZZ_ARFLAGS := $(ARFLAGS)
+FUZZ_TARGET_FLAGS = $(FUZZ_CPPFLAGS) $(FUZZ_CXXFLAGS) $(FUZZ_LDFLAGS)
+
+FUZZ_HEADERS := fuzz_helpers.h fuzz.h
+
+ZSTDCOMMON_FILES := $(ZSTDDIR)/common/*.c
+ZSTDCOMP_FILES   := $(ZSTDDIR)/compress/*.c
+ZSTDDECOMP_FILES := $(ZSTDDIR)/decompress/*.c
+ZSTD_FILES       := $(ZSTDDECOMP_FILES) $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES)
+
+ZSTD_OBJ  := $(patsubst %.c,%.o, $(wildcard $(ZSTD_FILES)))
+
+LIBFUZZER ?= -lFuzzer
+
+.PHONY: default all clean
+
+default: all
+
+all: round_trip simple_decompress
+
+%.o: %.c
+	$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $^ -c -o $@
+
+simple_round_trip: $(FUZZ_HEADERS) $(ZSTD_OBJ) simple_round_trip.o
+	$(CXX) $(FUZZ_TARGET_FLAGS) $(ZSTD_OBJ) simple_round_trip.o $(LIBFUZZER) -o $@
+
+stream_round_trip: $(FUZZ_HEADERS) $(ZSTD_OBJ) stream_round_trip.o
+	$(CXX) $(FUZZ_TARGET_FLAGS) $(ZSTD_OBJ) stream_round_trip.o $(LIBFUZZER) -o $@
+
+simple_decompress: $(FUZZ_HEADERS) $(ZSTD_OBJ) simple_decompress.o
+	$(CXX) $(FUZZ_TARGET_FLAGS) $(ZSTD_OBJ) simple_decompress.o $(LIBFUZZER) -o $@
+
+stream_decompress: $(FUZZ_HEADERS) $(ZSTD_OBJ) stream_decompress.o
+	$(CXX) $(FUZZ_TARGET_FLAGS) $(ZSTD_OBJ) stream_decompress.o $(LIBFUZZER) -o $@
+
+libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h regression_driver.o
+	$(AR) $(FUZZ_ARFLAGS) $@ regression_driver.o
+
+%-regression: libregression.a
+	$(RM) $*
+	$(MAKE) $* LDFLAGS="$(FUZZ_LDFLAGS) -L." LIBFUZZER=-lregression
+
+%-regression-test: %-regression
+	./$* corpora/$*
+
+regression-test: \
+	simple_round_trip-regression-test \
+	stream_round_trip-regression-test \
+	simple_decompress-regression-test \
+	stream_decompress-regression-test
+
+%-msan: clean
+	$(MAKE) $* CFLAGS="-fsanitize=memory $(FUZZ_CFLAGS)" \
+		CXXFLAGS="-fsanitize=memory $(FUZZ_CXXFLAGS)"
+
+UASAN_FLAGS := -fsanitize=address,undefined -fno-sanitize-recover=undefined \
+	-fno-sanitize=pointer-overflow
+%-uasan: clean
+	$(MAKE) $* CFLAGS="$(FUZZ_CFLAGS) $(UASAN_FLAGS)" \
+		CXXFLAGS="$(FUZZ_CXXFLAGS) $(UASAN_FLAGS)"
+
+# Install libfuzzer (not usable for MSAN testing)
+# Provided for convienence. To use this library run make libFuzzer and
+# set LDFLAGS=-L.
+.PHONY: libFuzzer
+libFuzzer:
+	@$(RM) -rf Fuzzer
+	@git clone https://chromium.googlesource.com/chromium/llvm-project/llvm/lib/Fuzzer
+	@./Fuzzer/build.sh
+
+clean:
+	@$(MAKE) -C $(ZSTDDIR) clean
+	@$(RM) -f *.a *.o
+	@$(RM) -f simple_round_trip stream_round_trip simple_decompress stream_decompress
diff --git a/tests/fuzz/README.md b/tests/fuzz/README.md
new file mode 100644
index 000000000000..38a4f3d1ab02
--- /dev/null
+++ b/tests/fuzz/README.md
@@ -0,0 +1,34 @@
+# Fuzzing
+
+Each fuzzing target can be built with multiple engines.
+
+## LibFuzzer
+
+You can install `libFuzzer` with `make libFuzzer`. Then you can make each target
+with `make target LDFLAGS=-L. CC=clang CXX=clang++`.
+
+## AFL
+
+The regression driver also serves as a binary for `afl-fuzz`. You can make each
+target with one of these commands:
+
+```
+make target-regression CC=afl-clang CXX=afl-clang++
+AFL_MSAN=1 make target-regression-msan CC=afl-clang CXX=afl-clang++
+AFL_ASAN=1 make target-regression-uasan CC=afl-clang CXX=afl-clang++
+```
+
+Then run as `./target @@`.
+
+## Regression Testing
+
+Each fuzz target has a corpus checked into the repo under `fuzz/corpora/`.
+You can run regression tests on the corpora to ensure that inputs which
+previously exposed bugs still pass. You can make these targets to run the
+regression tests with different sanitizers.
+
+```
+make regression-test
+make regression-test-msan
+make regression-test-uasan
+```
diff --git a/tests/fuzz/fuzz.h b/tests/fuzz/fuzz.h
new file mode 100644
index 000000000000..5b71aba89b43
--- /dev/null
+++ b/tests/fuzz/fuzz.h
@@ -0,0 +1,52 @@
+/**
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+/**
+ * Fuzz target interface.
+ * Fuzz targets have some common parameters passed as macros during compilation.
+ * Check the documentation for each individual fuzzer for more parameters.
+ *
+ * @param STATEFULL_FUZZING:
+ *        Define this to reuse state between fuzzer runs. This can be useful to
+ *        test code paths which are only executed when contexts are reused.
+ *        WARNING: Makes reproducing crashes much harder.
+ *        Default: Not defined.
+ * @param FUZZ_RNG_SEED_SIZE:
+ *        The number of bytes of the source to look at when constructing a seed
+ *        for the deterministic RNG.
+ *        Default: 128.
+ * @param ZSTD_DEBUG:
+ *        This is a parameter for the zstd library. Defining `ZSTD_DEBUG=1`
+ *        enables assert() statements in the zstd library. Higher levels enable
+ *        logging, so aren't recommended. Defining `ZSTD_DEBUG=1` is
+ *        recommended.
+ * @param MEM_FORCE_MEMORY_ACCESS:
+ *        This flag controls how the zstd library accesses unaligned memory.
+ *        It can be undefined, or 0 through 2. If it is undefined, it selects
+ *        the method to use based on the compiler. If testing with UBSAN set
+ *        MEM_FORCE_MEMORY_ACCESS=0 to use the standard compliant method.
+ * @param FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ *        This is the canonical flag to enable deterministic builds for fuzzing.
+ *        Changes to zstd for fuzzing are gated behind this define.
+ *        It is recommended to define this when building zstd for fuzzing.
+ */
+
+#ifndef FUZZ_H
+#define FUZZ_H
+
+#ifndef FUZZ_RNG_SEED_SIZE
+#  define FUZZ_RNG_SEED_SIZE 128
+#endif
+
+#include <stddef.h>
+#include <stdint.h>
+
+int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size);
+
+#endif
diff --git a/tests/fuzz/fuzz_helpers.h b/tests/fuzz/fuzz_helpers.h
new file mode 100644
index 000000000000..5f07fa4de935
--- /dev/null
+++ b/tests/fuzz/fuzz_helpers.h
@@ -0,0 +1,70 @@
+/**
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+/**
+ * Helper functions for fuzzing.
+ */
+
+#ifndef FUZZ_HELPERS_H
+#define FUZZ_HELPERS_H
+
+#include "fuzz.h"
+#include "xxhash.h"
+#include <stdint.h>
+#include <stdio.h>
+
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
+#define FUZZ_QUOTE_IMPL(str) #str
+#define FUZZ_QUOTE(str) FUZZ_QUOTE_IMPL(str)
+
+/**
+ * Asserts for fuzzing that are always enabled.
+ */
+#define FUZZ_ASSERT_MSG(cond, msg)                                             \
+  ((cond) ? (void)0                                                            \
+          : (fprintf(stderr, "%s: %u: Assertion: `%s' failed. %s\n", __FILE__, \
+                     __LINE__, FUZZ_QUOTE(cond), (msg)),                       \
+             abort()))
+#define FUZZ_ASSERT(cond) FUZZ_ASSERT_MSG((cond), "");
+
+#if defined(__GNUC__)
+#define FUZZ_STATIC static __inline __attribute__((unused))
+#elif defined(__cplusplus) ||                                                  \
+    (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#define FUZZ_STATIC static inline
+#elif defined(_MSC_VER)
+#define FUZZ_STATIC static __inline
+#else
+#define FUZZ_STATIC static
+#endif
+
+/**
+ * Determininistically constructs a seed based on the fuzz input.
+ * Only looks at the first FUZZ_RNG_SEED_SIZE bytes of the input.
+ */
+FUZZ_STATIC uint32_t FUZZ_seed(const uint8_t *src, size_t size) {
+  size_t const toHash = MIN(FUZZ_RNG_SEED_SIZE, size);
+  return XXH32(src, toHash, 0);
+}
+
+#define FUZZ_rotl32(x, r) (((x) << (r)) | ((x) >> (32 - (r))))
+FUZZ_STATIC uint32_t FUZZ_rand(uint32_t *state) {
+  static const uint32_t prime1 = 2654435761U;
+  static const uint32_t prime2 = 2246822519U;
+  uint32_t rand32 = *state;
+  rand32 *= prime1;
+  rand32 += prime2;
+  rand32 = FUZZ_rotl32(rand32, 13);
+  *state = rand32;
+  return rand32 >> 5;
+}
+
+#endif
diff --git a/tests/fuzz/regression_driver.c b/tests/fuzz/regression_driver.c
new file mode 100644
index 000000000000..eee5f0a2a2b2
--- /dev/null
+++ b/tests/fuzz/regression_driver.c
@@ -0,0 +1,69 @@
+/**
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+#include "fuzz.h"
+#include "fuzz_helpers.h"
+#include "util.h"
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(int argc, char const **argv) {
+  size_t const kMaxFileSize = (size_t)1 << 20;
+  int const kFollowLinks = 1;
+  char *fileNamesBuf = NULL;
+  char const **files = argv + 1;
+  unsigned numFiles = argc - 1;
+  uint8_t *buffer = NULL;
+  size_t bufferSize = 0;
+  unsigned i;
+  int ret;
+
+#ifdef UTIL_HAS_CREATEFILELIST
+  files = UTIL_createFileList(files, numFiles, &fileNamesBuf, &numFiles,
+                              kFollowLinks);
+  FUZZ_ASSERT(files);
+#endif
+
+  for (i = 0; i < numFiles; ++i) {
+    char const *fileName = files[i];
+    size_t const fileSize = UTIL_getFileSize(fileName);
+    size_t readSize;
+    FILE *file;
+
+    /* Check that it is a regular file, and that the fileSize is valid */
+    FUZZ_ASSERT_MSG(UTIL_isRegFile(fileName), fileName);
+    FUZZ_ASSERT_MSG(fileSize <= kMaxFileSize, fileName);
+    /* Ensure we have a large enough buffer allocated */
+    if (fileSize > bufferSize) {
+      free(buffer);
+      buffer = (uint8_t *)malloc(fileSize);
+      FUZZ_ASSERT_MSG(buffer, fileName);
+      bufferSize = fileSize;
+    }
+    /* Open the file */
+    file = fopen(fileName, "rb");
+    FUZZ_ASSERT_MSG(file, fileName);
+    /* Read the file */
+    readSize = fread(buffer, 1, fileSize, file);
+    FUZZ_ASSERT_MSG(readSize == fileSize, fileName);
+    /* Close the file */
+    fclose(file);
+    /* Run the fuzz target */
+    LLVMFuzzerTestOneInput(buffer, fileSize);
+  }
+
+  ret = 0;
+  free(buffer);
+#ifdef UTIL_HAS_CREATEFILELIST
+  UTIL_freeFileList(files, fileNamesBuf);
+#endif
+  return ret;
+}
diff --git a/tests/fuzz/simple_decompress.c b/tests/fuzz/simple_decompress.c
new file mode 100644
index 000000000000..c22ad7c5301c
--- /dev/null
+++ b/tests/fuzz/simple_decompress.c
@@ -0,0 +1,46 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+/**
+ * This fuzz target attempts to decompress the fuzzed data with the simple
+ * decompression function to ensure the decompressor never crashes.
+ */
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "fuzz_helpers.h"
+#include "zstd.h"
+
+static ZSTD_DCtx *dctx = NULL;
+static void* rBuf = NULL;
+static size_t bufSize = 0;
+
+int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
+{
+    size_t const neededBufSize = MAX(20 * size, (size_t)256 << 10);
+
+    /* Allocate all buffers and contexts if not already allocated */
+    if (neededBufSize > bufSize) {
+        free(rBuf);
+        rBuf = malloc(neededBufSize);
+        bufSize = neededBufSize;
+        FUZZ_ASSERT(rBuf);
+    }
+    if (!dctx) {
+        dctx = ZSTD_createDCtx();
+        FUZZ_ASSERT(dctx);
+    }
+    ZSTD_decompressDCtx(dctx, rBuf, neededBufSize, src, size);
+
+#ifndef STATEFULL_FUZZING
+    ZSTD_freeDCtx(dctx); dctx = NULL;
+#endif
+    return 0;
+}
diff --git a/tests/fuzz/simple_round_trip.c b/tests/fuzz/simple_round_trip.c
new file mode 100644
index 000000000000..703ea582630a
--- /dev/null
+++ b/tests/fuzz/simple_round_trip.c
@@ -0,0 +1,81 @@
+/**
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+/**
+ * This fuzz target performs a zstd round-trip test (compress & decompress),
+ * compares the result with the original, and calls abort() on corruption.
+ */
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "fuzz_helpers.h"
+#include "zstd.h"
+
+static const int kMaxClevel = 19;
+
+static ZSTD_CCtx *cctx = NULL;
+static ZSTD_DCtx *dctx = NULL;
+static void* cBuf = NULL;
+static void* rBuf = NULL;
+static size_t bufSize = 0;
+static uint32_t seed;
+
+static size_t roundTripTest(void *result, size_t resultCapacity,
+                            void *compressed, size_t compressedCapacity,
+                            const void *src, size_t srcSize)
+{
+  int const cLevel = FUZZ_rand(&seed) % kMaxClevel;
+  size_t const cSize = ZSTD_compressCCtx(cctx, compressed, compressedCapacity,
+                                         src, srcSize, cLevel);
+  if (ZSTD_isError(cSize)) {
+    fprintf(stderr, "Compression error: %s\n", ZSTD_getErrorName(cSize));
+    return cSize;
+  }
+  return ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize);
+}
+
+int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
+{
+    size_t const neededBufSize = ZSTD_compressBound(size);
+
+    seed = FUZZ_seed(src, size);
+
+    /* Allocate all buffers and contexts if not already allocated */
+    if (neededBufSize > bufSize) {
+        free(cBuf);
+        free(rBuf);
+        cBuf = malloc(neededBufSize);
+        rBuf = malloc(neededBufSize);
+        bufSize = neededBufSize;
+        FUZZ_ASSERT(cBuf && rBuf);
+    }
+    if (!cctx) {
+        cctx = ZSTD_createCCtx();
+        FUZZ_ASSERT(cctx);
+    }
+    if (!dctx) {
+        dctx = ZSTD_createDCtx();
+        FUZZ_ASSERT(dctx);
+    }
+
+    {
+        size_t const result =
+            roundTripTest(rBuf, neededBufSize, cBuf, neededBufSize, src, size);
+        FUZZ_ASSERT_MSG(!ZSTD_isError(result), ZSTD_getErrorName(result));
+        FUZZ_ASSERT_MSG(result == size, "Incorrect regenerated size");
+        FUZZ_ASSERT_MSG(!memcmp(src, rBuf, size), "Corruption!");
+    }
+#ifndef STATEFULL_FUZZING
+    ZSTD_freeCCtx(cctx); cctx = NULL;
+    ZSTD_freeDCtx(dctx); dctx = NULL;
+#endif
+    return 0;
+}
diff --git a/tests/fuzz/stream_decompress.c b/tests/fuzz/stream_decompress.c
new file mode 100644
index 000000000000..778a426dec7c
--- /dev/null
+++ b/tests/fuzz/stream_decompress.c
@@ -0,0 +1,85 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+/**
+ * This fuzz target attempts to decompress the fuzzed data with the simple
+ * decompression function to ensure the decompressor never crashes.
+ */
+
+#define ZSTD_STATIC_LINKING_ONLY
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "fuzz_helpers.h"
+#include "zstd.h"
+
+static size_t const kBufSize = ZSTD_BLOCKSIZE_ABSOLUTEMAX;
+
+static ZSTD_DStream *dstream = NULL;
+static void* buf = NULL;
+uint32_t seed;
+
+static ZSTD_outBuffer makeOutBuffer(void)
+{
+  ZSTD_outBuffer buffer = { buf, 0, 0 };
+
+  buffer.size = (FUZZ_rand(&seed) % kBufSize) + 1;
+  FUZZ_ASSERT(buffer.size <= kBufSize);
+
+  return buffer;
+}
+
+static ZSTD_inBuffer makeInBuffer(const uint8_t **src, size_t *size)
+{
+  ZSTD_inBuffer buffer = { *src, 0, 0 };
+
+  FUZZ_ASSERT(*size > 0);
+  buffer.size = (FUZZ_rand(&seed) % *size) + 1;
+  FUZZ_ASSERT(buffer.size <= *size);
+  *src += buffer.size;
+  *size -= buffer.size;
+
+  return buffer;
+}
+
+int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
+{
+    seed = FUZZ_seed(src, size);
+
+    /* Allocate all buffers and contexts if not already allocated */
+    if (!buf) {
+      buf = malloc(kBufSize);
+      FUZZ_ASSERT(buf);
+    }
+
+    if (!dstream) {
+        dstream = ZSTD_createDStream();
+        FUZZ_ASSERT(dstream);
+        FUZZ_ASSERT(!ZSTD_isError(ZSTD_initDStream(dstream)));
+    } else {
+        FUZZ_ASSERT(!ZSTD_isError(ZSTD_resetDStream(dstream)));
+    }
+
+    while (size > 0) {
+        ZSTD_inBuffer in = makeInBuffer(&src, &size);
+        while (in.pos != in.size) {
+            ZSTD_outBuffer out = makeOutBuffer();
+            size_t const rc = ZSTD_decompressStream(dstream, &out, &in);
+            if (ZSTD_isError(rc)) goto error;
+            if (rc == 0) FUZZ_ASSERT(!ZSTD_isError(ZSTD_resetDStream(dstream)));
+        }
+    }
+
+error:
+#ifndef STATEFULL_FUZZING
+    ZSTD_freeDStream(dstream); dstream = NULL;
+#endif
+    return 0;
+}
diff --git a/tests/fuzz/stream_round_trip.c b/tests/fuzz/stream_round_trip.c
new file mode 100644
index 000000000000..17c7dfdd29a7
--- /dev/null
+++ b/tests/fuzz/stream_round_trip.c
@@ -0,0 +1,153 @@
+/**
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+/**
+ * This fuzz target performs a zstd round-trip test (compress & decompress),
+ * compares the result with the original, and calls abort() on corruption.
+ */
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "fuzz_helpers.h"
+#include "zstd.h"
+
+static const int kMaxClevel = 19;
+
+static ZSTD_CStream *cstream = NULL;
+static ZSTD_DCtx *dctx = NULL;
+static uint8_t* cBuf = NULL;
+static uint8_t* rBuf = NULL;
+static size_t bufSize = 0;
+static uint32_t seed;
+
+static ZSTD_outBuffer makeOutBuffer(uint8_t *dst, size_t capacity)
+{
+  ZSTD_outBuffer buffer = { dst, 0, 0 };
+
+  FUZZ_ASSERT(capacity > 0);
+  buffer.size = (FUZZ_rand(&seed) % capacity) + 1;
+  FUZZ_ASSERT(buffer.size <= capacity);
+
+  return buffer;
+}
+
+static ZSTD_inBuffer makeInBuffer(const uint8_t **src, size_t *size)
+{
+  ZSTD_inBuffer buffer = { *src, 0, 0 };
+
+  FUZZ_ASSERT(*size > 0);
+  buffer.size = (FUZZ_rand(&seed) % *size) + 1;
+  FUZZ_ASSERT(buffer.size <= *size);
+  *src += buffer.size;
+  *size -= buffer.size;
+
+  return buffer;
+}
+
+static size_t compress(uint8_t *dst, size_t capacity,
+                       const uint8_t *src, size_t srcSize)
+{
+    int cLevel = FUZZ_rand(&seed) % kMaxClevel;
+    size_t dstSize = 0;
+    FUZZ_ASSERT(!ZSTD_isError(ZSTD_initCStream(cstream, cLevel)));
+
+    while (srcSize > 0) {
+        ZSTD_inBuffer in = makeInBuffer(&src, &srcSize);
+        /* Mode controls the action. If mode == -1 we pick a new mode */
+        int mode = -1;
+        while (in.pos < in.size) {
+          ZSTD_outBuffer out = makeOutBuffer(dst, capacity);
+          /* Previous action finished, pick a new mode. */
+          if (mode == -1) mode = FUZZ_rand(&seed) % 10;
+          switch (mode) {
+            case 0: /* fall-though */
+            case 1: /* fall-though */
+            case 2: {
+                size_t const ret = ZSTD_flushStream(cstream, &out);
+                FUZZ_ASSERT_MSG(!ZSTD_isError(ret), ZSTD_getErrorName(ret));
+                if (ret == 0) mode = -1;
+                break;
+            }
+            case 3: {
+                size_t ret = ZSTD_endStream(cstream, &out);
+                FUZZ_ASSERT_MSG(!ZSTD_isError(ret), ZSTD_getErrorName(ret));
+                /* Reset the compressor when the frame is finished */
+                if (ret == 0) {
+                    cLevel = FUZZ_rand(&seed) % kMaxClevel;
+                    ret = ZSTD_initCStream(cstream, cLevel);
+                    FUZZ_ASSERT(!ZSTD_isError(ret));
+                    mode = -1;
+                }
+                break;
+            }
+            default: {
+                size_t const ret = ZSTD_compressStream(cstream, &out, &in);
+                FUZZ_ASSERT_MSG(!ZSTD_isError(ret), ZSTD_getErrorName(ret));
+                mode = -1;
+            }
+          }
+          dst += out.pos;
+          dstSize += out.pos;
+          capacity -= out.pos;
+        }
+    }
+    for (;;) {
+        ZSTD_outBuffer out = makeOutBuffer(dst, capacity);
+        size_t const ret = ZSTD_endStream(cstream, &out);
+        FUZZ_ASSERT_MSG(!ZSTD_isError(ret), ZSTD_getErrorName(ret));
+
+        dst += out.pos;
+        dstSize += out.pos;
+        capacity -= out.pos;
+        if (ret == 0) break;
+    }
+    return dstSize;
+}
+
+int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
+{
+    size_t const neededBufSize = ZSTD_compressBound(size) * 2;
+
+    seed = FUZZ_seed(src, size);
+
+    /* Allocate all buffers and contexts if not already allocated */
+    if (neededBufSize > bufSize) {
+        free(cBuf);
+        free(rBuf);
+        cBuf = (uint8_t*)malloc(neededBufSize);
+        rBuf = (uint8_t*)malloc(neededBufSize);
+        bufSize = neededBufSize;
+        FUZZ_ASSERT(cBuf && rBuf);
+    }
+    if (!cstream) {
+        cstream = ZSTD_createCStream();
+        FUZZ_ASSERT(cstream);
+    }
+    if (!dctx) {
+        dctx = ZSTD_createDCtx();
+        FUZZ_ASSERT(dctx);
+    }
+
+    {
+        size_t const cSize = compress(cBuf, neededBufSize, src, size);
+        size_t const rSize =
+            ZSTD_decompressDCtx(dctx, rBuf, neededBufSize, cBuf, cSize);
+        FUZZ_ASSERT_MSG(!ZSTD_isError(rSize), ZSTD_getErrorName(rSize));
+        FUZZ_ASSERT_MSG(rSize == size, "Incorrect regenerated size");
+        FUZZ_ASSERT_MSG(!memcmp(src, rBuf, size), "Corruption!");
+    }
+
+#ifndef STATEFULL_FUZZING
+    ZSTD_freeCStream(cstream); cstream = NULL;
+    ZSTD_freeDCtx(dctx); dctx = NULL;
+#endif
+    return 0;
+}
diff --git a/tests/fuzzer.c b/tests/fuzzer.c
index b8f514785542..0c13a6e488ad 100644
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 
@@ -51,14 +51,14 @@ static const U32 nbTestsDefault = 30000;
 /*-************************************
 *  Display Macros
 **************************************/
-#define DISPLAY(...)          fprintf(stderr, __VA_ARGS__)
+#define DISPLAY(...)          fprintf(stdout, __VA_ARGS__)
 #define DISPLAYLEVEL(l, ...)  if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
 static U32 g_displayLevel = 2;
 
 #define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
             if ((FUZ_clockSpan(g_displayClock) > g_refreshRate) || (g_displayLevel>=4)) \
             { g_displayClock = clock(); DISPLAY(__VA_ARGS__); \
-            if (g_displayLevel>=4) fflush(stderr); } }
+            if (g_displayLevel>=4) fflush(stdout); } }
 static const clock_t g_refreshRate = CLOCKS_PER_SEC / 6;
 static clock_t g_displayClock = 0;
 
@@ -97,7 +97,165 @@ static unsigned FUZ_highbit32(U32 v32)
 
 
 /*=============================================
-*   Basic Unit tests
+*   Memory Tests
+=============================================*/
+#if defined(__APPLE__) && defined(__MACH__)
+
+#include <malloc/malloc.h>    /* malloc_size */
+
+typedef struct {
+    unsigned long long totalMalloc;
+    size_t currentMalloc;
+    size_t peakMalloc;
+    unsigned nbMalloc;
+    unsigned nbFree;
+} mallocCounter_t;
+
+static const mallocCounter_t INIT_MALLOC_COUNTER = { 0, 0, 0, 0, 0 };
+
+static void* FUZ_mallocDebug(void* counter, size_t size)
+{
+    mallocCounter_t* const mcPtr = (mallocCounter_t*)counter;
+    void* const ptr = malloc(size);
+    if (ptr==NULL) return NULL;
+    DISPLAYLEVEL(4, "allocating %u KB => effectively %u KB \n",
+        (U32)(size >> 10), (U32)(malloc_size(ptr) >> 10));  /* OS-X specific */
+    mcPtr->totalMalloc += size;
+    mcPtr->currentMalloc += size;
+    if (mcPtr->currentMalloc > mcPtr->peakMalloc)
+        mcPtr->peakMalloc = mcPtr->currentMalloc;
+    mcPtr->nbMalloc += 1;
+    return ptr;
+}
+
+static void FUZ_freeDebug(void* counter, void* address)
+{
+    mallocCounter_t* const mcPtr = (mallocCounter_t*)counter;
+    DISPLAYLEVEL(4, "freeing %u KB \n", (U32)(malloc_size(address) >> 10));
+    mcPtr->nbFree += 1;
+    mcPtr->currentMalloc -= malloc_size(address);  /* OS-X specific */
+    free(address);
+}
+
+static void FUZ_displayMallocStats(mallocCounter_t count)
+{
+    DISPLAYLEVEL(3, "peak:%6u KB,  nbMallocs:%2u, total:%6u KB \n",
+        (U32)(count.peakMalloc >> 10),
+        count.nbMalloc,
+        (U32)(count.totalMalloc >> 10));
+}
+
+#define CHECK_Z(f) {                               \
+    size_t const err = f;                          \
+    if (ZSTD_isError(err)) {                       \
+        DISPLAY("Error => %s : %s ",               \
+                #f, ZSTD_getErrorName(err));       \
+        exit(1);                                   \
+}   }
+
+static int FUZ_mallocTests(unsigned seed, double compressibility, unsigned part)
+{
+    size_t const inSize = 64 MB + 16 MB + 4 MB + 1 MB + 256 KB + 64 KB; /* 85.3 MB */
+    size_t const outSize = ZSTD_compressBound(inSize);
+    void* const inBuffer = malloc(inSize);
+    void* const outBuffer = malloc(outSize);
+
+    /* test only played in verbose mode, as they are long */
+    if (g_displayLevel<3) return 0;
+
+    /* Create compressible noise */
+    if (!inBuffer || !outBuffer) {
+        DISPLAY("Not enough memory, aborting\n");
+        exit(1);
+    }
+    RDG_genBuffer(inBuffer, inSize, compressibility, 0. /*auto*/, seed);
+
+    /* simple compression tests */
+    if (part <= 1)
+    {   int compressionLevel;
+        for (compressionLevel=1; compressionLevel<=6; compressionLevel++) {
+            mallocCounter_t malcount = INIT_MALLOC_COUNTER;
+            ZSTD_customMem const cMem = { FUZ_mallocDebug, FUZ_freeDebug, &malcount };
+            ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(cMem);
+            CHECK_Z( ZSTD_compressCCtx(cctx, outBuffer, outSize, inBuffer, inSize, compressionLevel) );
+            ZSTD_freeCCtx(cctx);
+            DISPLAYLEVEL(3, "compressCCtx level %i : ", compressionLevel);
+            FUZ_displayMallocStats(malcount);
+    }   }
+
+    /* streaming compression tests */
+    if (part <= 2)
+    {   int compressionLevel;
+        for (compressionLevel=1; compressionLevel<=6; compressionLevel++) {
+            mallocCounter_t malcount = INIT_MALLOC_COUNTER;
+            ZSTD_customMem const cMem = { FUZ_mallocDebug, FUZ_freeDebug, &malcount };
+            ZSTD_CCtx* const cstream = ZSTD_createCStream_advanced(cMem);
+            ZSTD_outBuffer out = { outBuffer, outSize, 0 };
+            ZSTD_inBuffer in = { inBuffer, inSize, 0 };
+            CHECK_Z( ZSTD_initCStream(cstream, compressionLevel) );
+            CHECK_Z( ZSTD_compressStream(cstream, &out, &in) );
+            CHECK_Z( ZSTD_endStream(cstream, &out) );
+            ZSTD_freeCStream(cstream);
+            DISPLAYLEVEL(3, "compressStream level %i : ", compressionLevel);
+            FUZ_displayMallocStats(malcount);
+    }   }
+
+    /* advanced MT API test */
+    if (part <= 3)
+    {   U32 nbThreads;
+        for (nbThreads=1; nbThreads<=4; nbThreads++) {
+            int compressionLevel;
+            for (compressionLevel=1; compressionLevel<=6; compressionLevel++) {
+                mallocCounter_t malcount = INIT_MALLOC_COUNTER;
+                ZSTD_customMem const cMem = { FUZ_mallocDebug, FUZ_freeDebug, &malcount };
+                ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(cMem);
+                ZSTD_outBuffer out = { outBuffer, outSize, 0 };
+                ZSTD_inBuffer in = { inBuffer, inSize, 0 };
+                CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_p_compressionLevel, (U32)compressionLevel) );
+                CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_p_nbThreads, nbThreads) );
+                while ( ZSTD_compress_generic(cctx, &out, &in, ZSTD_e_end) ) {}
+                ZSTD_freeCCtx(cctx);
+                DISPLAYLEVEL(3, "compress_generic,-T%u,end level %i : ",
+                                nbThreads, compressionLevel);
+                FUZ_displayMallocStats(malcount);
+    }   }   }
+
+    /* advanced MT streaming API test */
+    if (part <= 4)
+    {   U32 nbThreads;
+        for (nbThreads=1; nbThreads<=4; nbThreads++) {
+            int compressionLevel;
+            for (compressionLevel=1; compressionLevel<=6; compressionLevel++) {
+                mallocCounter_t malcount = INIT_MALLOC_COUNTER;
+                ZSTD_customMem const cMem = { FUZ_mallocDebug, FUZ_freeDebug, &malcount };
+                ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(cMem);
+                ZSTD_outBuffer out = { outBuffer, outSize, 0 };
+                ZSTD_inBuffer in = { inBuffer, inSize, 0 };
+                CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_p_compressionLevel, (U32)compressionLevel) );
+                CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_p_nbThreads, nbThreads) );
+                CHECK_Z( ZSTD_compress_generic(cctx, &out, &in, ZSTD_e_continue) );
+                while ( ZSTD_compress_generic(cctx, &out, &in, ZSTD_e_end) ) {}
+                ZSTD_freeCCtx(cctx);
+                DISPLAYLEVEL(3, "compress_generic,-T%u,continue level %i : ",
+                                nbThreads, compressionLevel);
+                FUZ_displayMallocStats(malcount);
+    }   }   }
+
+    return 0;
+}
+
+#else
+
+static int FUZ_mallocTests(unsigned seed, double compressibility, unsigned part)
+{
+    (void)seed; (void)compressibility; (void)part;
+    return 0;
+}
+
+#endif
+
+/*=============================================
+*   Unit tests
 =============================================*/
 
 #define CHECK_V(var, fn)  size_t const var = fn; if (ZSTD_isError(var)) goto _output_error
@@ -108,7 +266,8 @@ static int basicUnitTests(U32 seed, double compressibility)
 {
     size_t const CNBuffSize = 5 MB;
     void* const CNBuffer = malloc(CNBuffSize);
-    void* const compressedBuffer = malloc(ZSTD_compressBound(CNBuffSize));
+    size_t const compressedBufferSize = ZSTD_compressBound(CNBuffSize);
+    void* const compressedBuffer = malloc(compressedBufferSize);
     void* const decodedBuffer = malloc(CNBuffSize);
     ZSTD_DCtx* dctx = ZSTD_createDCtx();
     int testResult = 0;
@@ -136,10 +295,20 @@ static int basicUnitTests(U32 seed, double compressibility)
 
 
     DISPLAYLEVEL(4, "test%3i : compress %u bytes : ", testNb++, (U32)CNBuffSize);
-    CHECKPLUS(r, ZSTD_compress(compressedBuffer, ZSTD_compressBound(CNBuffSize),
-                               CNBuffer, CNBuffSize, 1),
-              cSize=r );
-    DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/CNBuffSize*100);
+    {   ZSTD_CCtx* cctx = ZSTD_createCCtx();
+        if (cctx==NULL) goto _output_error;
+        CHECKPLUS(r, ZSTD_compressCCtx(cctx,
+                            compressedBuffer, compressedBufferSize,
+                            CNBuffer, CNBuffSize, 1),
+                  cSize=r );
+        DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/CNBuffSize*100);
+
+        DISPLAYLEVEL(4, "test%3i : size of cctx for level 1 : ", testNb++);
+        {   size_t const cctxSize = ZSTD_sizeof_CCtx(cctx);
+            DISPLAYLEVEL(4, "%u bytes \n", (U32)cctxSize);
+        }
+        ZSTD_freeCCtx(cctx);
+    }
 
 
     DISPLAYLEVEL(4, "test%3i : ZSTD_getFrameContentSize test : ", testNb++);
@@ -216,7 +385,7 @@ static int basicUnitTests(U32 seed, double compressibility)
 
             DISPLAYLEVEL(4, "test%3i : simple compression test with static CCtx : ", testNb++);
             CHECKPLUS(r, ZSTD_compressCCtx(staticCCtx,
-                            compressedBuffer, ZSTD_compressBound(CNBuffSize),
+                            compressedBuffer, compressedBufferSize,
                             CNBuffer, CNBuffSize, STATIC_CCTX_LEVEL),
                       cSize=r );
             DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n",
@@ -285,7 +454,7 @@ static int basicUnitTests(U32 seed, double compressibility)
 
         DISPLAYLEVEL(4, "test%3i : compress %u bytes with 2 threads : ", testNb++, (U32)CNBuffSize);
         CHECKPLUS(r, ZSTDMT_compressCCtx(mtctx,
-                                compressedBuffer, ZSTD_compressBound(CNBuffSize),
+                                compressedBuffer, compressedBufferSize,
                                 CNBuffer, CNBuffSize,
                                 1),
                   cSize=r );
@@ -311,6 +480,23 @@ static int basicUnitTests(U32 seed, double compressibility)
         }   }
         DISPLAYLEVEL(4, "OK \n");
 
+        DISPLAYLEVEL(4, "test%3i : compress -T2 with checksum : ", testNb++);
+        {   ZSTD_parameters params = ZSTD_getParams(1, CNBuffSize, 0);
+            params.fParams.checksumFlag = 1;
+            params.fParams.contentSizeFlag = 1;
+            CHECKPLUS(r, ZSTDMT_compress_advanced(mtctx,
+                                    compressedBuffer, compressedBufferSize,
+                                    CNBuffer, CNBuffSize,
+                                    NULL, params, 3 /*overlapRLog*/),
+                      cSize=r );
+        }
+        DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/CNBuffSize*100);
+
+        DISPLAYLEVEL(4, "test%3i : decompress %u bytes : ", testNb++, (U32)CNBuffSize);
+        { size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize);
+          if (r != CNBuffSize) goto _output_error; }
+        DISPLAYLEVEL(4, "OK \n");
+
         ZSTDMT_freeCCtx(mtctx);
     }
 
@@ -372,7 +558,7 @@ static int basicUnitTests(U32 seed, double compressibility)
 
         DISPLAYLEVEL(4, "test%3i : compress with flat dictionary : ", testNb++);
         cSize = 0;
-        CHECKPLUS(r, ZSTD_compressEnd(ctxOrig, compressedBuffer, ZSTD_compressBound(CNBuffSize),
+        CHECKPLUS(r, ZSTD_compressEnd(ctxOrig, compressedBuffer, compressedBufferSize,
                                            (const char*)CNBuffer + dictSize, CNBuffSize - dictSize),
                   cSize += r);
         DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/CNBuffSize*100);
@@ -388,7 +574,7 @@ static int basicUnitTests(U32 seed, double compressibility)
         DISPLAYLEVEL(4, "test%3i : compress with duplicated context : ", testNb++);
         {   size_t const cSizeOrig = cSize;
             cSize = 0;
-            CHECKPLUS(r, ZSTD_compressEnd(ctxDuplicated, compressedBuffer, ZSTD_compressBound(CNBuffSize),
+            CHECKPLUS(r, ZSTD_compressEnd(ctxDuplicated, compressedBuffer, compressedBufferSize,
                                                (const char*)CNBuffer + dictSize, CNBuffSize - dictSize),
                       cSize += r);
             if (cSize != cSizeOrig) goto _output_error;   /* should be identical ==> same size */
@@ -434,9 +620,9 @@ static int basicUnitTests(U32 seed, double compressibility)
             CHECKPLUS(r, ZSTD_compressEnd(ctxDuplicated, compressedBuffer, ZSTD_compressBound(testSize),
                                           (const char*)CNBuffer + dictSize, testSize),
                       cSize = r);
-            {   ZSTD_frameHeader fp;
-                if (ZSTD_getFrameHeader(&fp, compressedBuffer, cSize)) goto _output_error;
-                if ((fp.frameContentSize != testSize) && (fp.frameContentSize != 0)) goto _output_error;
+            {   ZSTD_frameHeader zfh;
+                if (ZSTD_getFrameHeader(&zfh, compressedBuffer, cSize)) goto _output_error;
+                if ((zfh.frameContentSize != testSize) && (zfh.frameContentSize != 0)) goto _output_error;
         }   }
         DISPLAYLEVEL(4, "OK \n");
 
@@ -473,7 +659,7 @@ static int basicUnitTests(U32 seed, double compressibility)
         DISPLAYLEVEL(4, "OK : %u \n", dictID);
 
         DISPLAYLEVEL(4, "test%3i : compress with dictionary : ", testNb++);
-        cSize = ZSTD_compress_usingDict(cctx, compressedBuffer, ZSTD_compressBound(CNBuffSize),
+        cSize = ZSTD_compress_usingDict(cctx, compressedBuffer, compressedBufferSize,
                                         CNBuffer, CNBuffSize,
                                         dictBuffer, dictSize, 4);
         if (ZSTD_isError(cSize)) goto _output_error;
@@ -511,7 +697,7 @@ static int basicUnitTests(U32 seed, double compressibility)
                                             1 /* byReference */, ZSTD_dm_auto,
                                             cParams, ZSTD_defaultCMem);
             DISPLAYLEVEL(4, "(size : %u) : ", (U32)ZSTD_sizeof_CDict(cdict));
-            cSize = ZSTD_compress_usingCDict(cctx, compressedBuffer, ZSTD_compressBound(CNBuffSize),
+            cSize = ZSTD_compress_usingCDict(cctx, compressedBuffer, compressedBufferSize,
                                                  CNBuffer, CNBuffSize, cdict);
             ZSTD_freeCDict(cdict);
             if (ZSTD_isError(cSize)) goto _output_error;
@@ -546,7 +732,7 @@ static int basicUnitTests(U32 seed, double compressibility)
                     goto _output_error;
                 }
                 cSize = ZSTD_compress_usingCDict(cctx,
-                                compressedBuffer, ZSTD_compressBound(CNBuffSize),
+                                compressedBuffer, compressedBufferSize,
                                 CNBuffer, CNBuffSize, cdict);
                 if (ZSTD_isError(cSize)) {
                     DISPLAY("ZSTD_compress_usingCDict failed ");
@@ -560,7 +746,7 @@ static int basicUnitTests(U32 seed, double compressibility)
         {   ZSTD_frameParameters const fParams = { 0 /* frameSize */, 1 /* checksum */, 1 /* noDictID*/ };
             ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize);
             ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize, 1 /*byRef*/, ZSTD_dm_auto, cParams, ZSTD_defaultCMem);
-            cSize = ZSTD_compress_usingCDict_advanced(cctx, compressedBuffer, ZSTD_compressBound(CNBuffSize),
+            cSize = ZSTD_compress_usingCDict_advanced(cctx, compressedBuffer, compressedBufferSize,
                                                  CNBuffer, CNBuffSize, cdict, fParams);
             ZSTD_freeCDict(cdict);
             if (ZSTD_isError(cSize)) goto _output_error;
@@ -584,7 +770,7 @@ static int basicUnitTests(U32 seed, double compressibility)
         DISPLAYLEVEL(4, "test%3i : ZSTD_compress_advanced, no dictID : ", testNb++);
         {   ZSTD_parameters p = ZSTD_getParams(3, CNBuffSize, dictSize);
             p.fParams.noDictIDFlag = 1;
-            cSize = ZSTD_compress_advanced(cctx, compressedBuffer, ZSTD_compressBound(CNBuffSize),
+            cSize = ZSTD_compress_advanced(cctx, compressedBuffer, compressedBufferSize,
                                            CNBuffer, CNBuffSize,
                                            dictBuffer, dictSize, p);
             if (ZSTD_isError(cSize)) goto _output_error;
@@ -821,6 +1007,42 @@ static int basicUnitTests(U32 seed, double compressibility)
       if (r != _3BYTESTESTLENGTH) goto _output_error; }
     DISPLAYLEVEL(4, "OK \n");
 
+    DISPLAYLEVEL(4, "test%3i : incompressible data and ill suited dictionary : ", testNb++);
+    RDG_genBuffer(CNBuffer, CNBuffSize, 0.0, 0.1, seed);
+    {   /* Train a dictionary on low characters */
+        size_t dictSize = 16 KB;
+        void* const dictBuffer = malloc(dictSize);
+        size_t const totalSampleSize = 1 MB;
+        size_t const sampleUnitSize = 8 KB;
+        U32 const nbSamples = (U32)(totalSampleSize / sampleUnitSize);
+        size_t* const samplesSizes = (size_t*) malloc(nbSamples * sizeof(size_t));
+        if (!dictBuffer || !samplesSizes) goto _output_error;
+        { U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
+        dictSize = ZDICT_trainFromBuffer(dictBuffer, dictSize, CNBuffer, samplesSizes, nbSamples);
+        if (ZDICT_isError(dictSize)) goto _output_error;
+        /* Reverse the characters to make the dictionary ill suited */
+        {   U32 u;
+            for (u = 0; u < CNBuffSize; ++u) {
+              ((BYTE*)CNBuffer)[u] = 255 - ((BYTE*)CNBuffer)[u];
+            }
+        }
+        {   /* Compress the data */
+            size_t const inputSize = 500;
+            size_t const outputSize = ZSTD_compressBound(inputSize);
+            void* const outputBuffer = malloc(outputSize);
+            ZSTD_CCtx* const cctx = ZSTD_createCCtx();
+            if (!outputBuffer || !cctx) goto _output_error;
+            CHECK(ZSTD_compress_usingDict(cctx, outputBuffer, outputSize, CNBuffer, inputSize, dictBuffer, dictSize, 1));
+            free(outputBuffer);
+            ZSTD_freeCCtx(cctx);
+        }
+
+        free(dictBuffer);
+        free(samplesSizes);
+    }
+    DISPLAYLEVEL(4, "OK \n");
+
+
     /* findFrameCompressedSize on skippable frames */
     DISPLAYLEVEL(4, "test%3i : frame compressed size of skippable frame : ", testNb++);
     {   const char* frame = "\x50\x2a\x4d\x18\x05\x0\x0\0abcde";
@@ -892,6 +1114,7 @@ static size_t FUZ_randomLength(U32* seed, U32 maxLog)
         goto _output_error;                                   \
 }   }
 
+#undef CHECK_Z
 #define CHECK_Z(f) {                                          \
     size_t const err = f;                                     \
     if (ZSTD_isError(err)) {                                  \
@@ -1006,17 +1229,17 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxD
                   CHECK(endCheck != endMark, "ZSTD_compressCCtx : dst buffer overflow"); }
         }   }
 
+        /* frame header decompression test */
+        {   ZSTD_frameHeader zfh;
+            CHECK_Z( ZSTD_getFrameHeader(&zfh, cBuffer, cSize) );
+            CHECK(zfh.frameContentSize != sampleSize, "Frame content size incorrect");
+        }
+
         /* Decompressed size test */
         {   unsigned long long const rSize = ZSTD_findDecompressedSize(cBuffer, cSize);
             CHECK(rSize != sampleSize, "decompressed size incorrect");
         }
 
-        /* frame header decompression test */
-        {   ZSTD_frameHeader dParams;
-            CHECK_Z( ZSTD_getFrameHeader(&dParams, cBuffer, cSize) );
-            CHECK(dParams.frameContentSize != sampleSize, "Frame content size incorrect");
-        }
-
         /* successful decompression test */
         {   size_t const margin = (FUZ_rand(&lseed) & 1) ? 0 : (FUZ_rand(&lseed) & 31) + 1;
             size_t const dSize = ZSTD_decompress(dstBuffer, sampleSize + margin, cBuffer, cSize);
@@ -1223,6 +1446,19 @@ static unsigned readU32FromChar(const char** stringPtr)
     return result;
 }
 
+/** longCommandWArg() :
+ *  check if *stringPtr is the same as longCommand.
+ *  If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
+ *  @return 0 and doesn't modify *stringPtr otherwise.
+ */
+static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
+{
+    size_t const comSize = strlen(longCommand);
+    int const result = !strncmp(*stringPtr, longCommand, comSize);
+    if (result) *stringPtr += comSize;
+    return result;
+}
+
 int main(int argc, const char** argv)
 {
     U32 seed = 0;
@@ -1235,6 +1471,7 @@ int main(int argc, const char** argv)
     U32 mainPause = 0;
     U32 maxDuration = 0;
     int bigTests = 1;
+    U32 memTestsOnly = 0;
     const char* const programName = argv[0];
 
     /* Check command line */
@@ -1245,6 +1482,9 @@ int main(int argc, const char** argv)
         /* Handle commands. Aggregated commands are allowed */
         if (argument[0]=='-') {
 
+            if (longCommandWArg(&argument, "--memtest=")) { memTestsOnly = readU32FromChar(&argument); continue; }
+
+            if (!strcmp(argument, "--memtest")) { memTestsOnly=1; continue; }
             if (!strcmp(argument, "--no-big-tests")) { bigTests=0; continue; }
 
             argument++;
@@ -1316,6 +1556,11 @@ int main(int argc, const char** argv)
     DISPLAY("Seed = %u\n", seed);
     if (proba!=FUZ_compressibility_default) DISPLAY("Compressibility : %u%%\n", proba);
 
+    if (memTestsOnly) {
+        g_displayLevel = MAX(3, g_displayLevel);
+        return FUZ_mallocTests(seed, ((double)proba) / 100, memTestsOnly);
+    }
+
     if (nbTests < testNb) nbTests = testNb;
 
     if (testNb==0)
diff --git a/tests/invalidDictionaries.c b/tests/invalidDictionaries.c
index fe8b23b5e19f..83fe439d4c43 100644
--- a/tests/invalidDictionaries.c
+++ b/tests/invalidDictionaries.c
@@ -1,3 +1,12 @@
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ */
+
 #include <stddef.h>
 #include "zstd.h"
 
diff --git a/tests/legacy.c b/tests/legacy.c
index e84e31273734..962b2c9c3c9d 100644
--- a/tests/legacy.c
+++ b/tests/legacy.c
@@ -1,10 +1,10 @@
-/**
- * Copyright (c) 2017-present, Facebook, Inc.
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 /*
@@ -226,4 +226,3 @@ const char* const EXPECTED =
     "snowden is snowed in / he's now then in his snow den / when does the snow end?\n"
     "goodbye little dog / you dug some holes in your day / they'll be hard to fill.\n"
     "when life shuts a door, / just open it. it’s a door. / that is how doors work.\n";
-
diff --git a/tests/longmatch.c b/tests/longmatch.c
index 61b81b359a40..ef79337f56d5 100644
--- a/tests/longmatch.c
+++ b/tests/longmatch.c
@@ -1,3 +1,13 @@
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ */
+
+
 #include <stdio.h>
 #include <stddef.h>
 #include <stdlib.h>
diff --git a/tests/namespaceTest.c b/tests/namespaceTest.c
index dd63186d119f..6f6c74fd63d9 100644
--- a/tests/namespaceTest.c
+++ b/tests/namespaceTest.c
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 
diff --git a/tests/paramgrill.c b/tests/paramgrill.c
index da06ccb52aab..ed13e1dacca8 100644
--- a/tests/paramgrill.c
+++ b/tests/paramgrill.c
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 
diff --git a/tests/playTests.sh b/tests/playTests.sh
index 2e1cc6826f11..bc8584e7a9f0 100755
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@@ -7,17 +7,17 @@ die() {
 
 roundTripTest() {
     if [ -n "$3" ]; then
-        local_c="$3"
-        local_p="$2"
+        cLevel="$3"
+        proba="$2"
     else
-        local_c="$2"
-        local_p=""
+        cLevel="$2"
+        proba=""
     fi
 
     rm -f tmp1 tmp2
-    $ECHO "roundTripTest: ./datagen $1 $local_p | $ZSTD -v$local_c | $ZSTD -d"
-    ./datagen $1 $local_p | $MD5SUM > tmp1
-    ./datagen $1 $local_p | $ZSTD --ultra -v$local_c | $ZSTD -d  | $MD5SUM > tmp2
+    $ECHO "roundTripTest: ./datagen $1 $proba | $ZSTD -v$cLevel | $ZSTD -d"
+    ./datagen $1 $proba | $MD5SUM > tmp1
+    ./datagen $1 $proba | $ZSTD --ultra -v$cLevel | $ZSTD -d  | $MD5SUM > tmp2
     $DIFF -q tmp1 tmp2
 }
 
@@ -383,6 +383,14 @@ $ZSTD -t --rm tmp1.zst
 test -f tmp1.zst   # check file is still present
 split -b16384 tmp1.zst tmpSplit.
 $ZSTD -t tmpSplit.* && die "bad file not detected !"
+./datagen | $ZSTD -c | $ZSTD -t
+
+
+
+$ECHO "\n**** golden files tests **** "
+
+$ZSTD -t -r files
+$ZSTD -c -r files | $ZSTD -t
 
 
 $ECHO "\n**** benchmark mode tests **** "
@@ -625,16 +633,15 @@ roundTripTest -g35000000 -P75 10
 roundTripTest -g35000000 -P75 11
 roundTripTest -g35000000 -P75 12
 
-roundTripTest -g18000000 -P80 13
-roundTripTest -g18000000 -P80 14
-roundTripTest -g18000000 -P80 15
-roundTripTest -g18000000 -P80 16
-roundTripTest -g18000000 -P80 17
+roundTripTest -g18000013 -P80 13
+roundTripTest -g18000014 -P80 14
+roundTripTest -g18000015 -P80 15
+roundTripTest -g18000016 -P80 16
+roundTripTest -g18000017 -P80 17
+roundTripTest -g18000018 -P94 18
+roundTripTest -g18000019 -P94 19
 
-roundTripTest -g50000000 -P94 18
-roundTripTest -g50000000 -P94 19
-
-roundTripTest -g99000000 -P99 20
+roundTripTest -g68000020 -P99 20
 roundTripTest -g6000000000 -P99 1
 
 fileRoundTripTest -g4193M -P99 1
@@ -644,7 +651,8 @@ then
     $ECHO "\n**** zstdmt long round-trip tests **** "
     roundTripTest -g99000000 -P99 "20 -T2"
     roundTripTest -g6000000000 -P99 "1 -T2"
-    fileRoundTripTest -g4193M -P98 " -T0"
+    roundTripTest -g1500000000 -P97 "1 -T999"
+    fileRoundTripTest -g4195M -P98 " -T0"
 else
     $ECHO "\n**** no multithreading, skipping zstdmt tests **** "
 fi
diff --git a/tests/pool.c b/tests/poolTests.c
similarity index 62%
rename from tests/pool.c
rename to tests/poolTests.c
index adc5947df629..f3d5c382ae57 100644
--- a/tests/pool.c
+++ b/tests/poolTests.c
@@ -1,5 +1,16 @@
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ */
+
+
 #include "pool.h"
 #include "threading.h"
+#include "util.h"
 #include <stddef.h>
 #include <stdio.h>
 
@@ -50,21 +61,45 @@ int testOrder(size_t numThreads, size_t queueSize) {
   return 0;
 }
 
+void waitFn(void *opaque) {
+  (void)opaque;
+  UTIL_sleepMilli(1);
+}
+
+/* Tests for deadlock */
+int testWait(size_t numThreads, size_t queueSize) {
+  struct data data;
+  POOL_ctx *ctx = POOL_create(numThreads, queueSize);
+  ASSERT_TRUE(ctx);
+  {
+    size_t i;
+    for (i = 0; i < 16; ++i) {
+        POOL_add(ctx, &waitFn, &data);
+    }
+  }
+  POOL_free(ctx);
+  return 0;
+}
+
 int main(int argc, const char **argv) {
   size_t numThreads;
   for (numThreads = 1; numThreads <= 4; ++numThreads) {
     size_t queueSize;
-    for (queueSize = 1; queueSize <= 2; ++queueSize) {
+    for (queueSize = 0; queueSize <= 2; ++queueSize) {
       if (testOrder(numThreads, queueSize)) {
         printf("FAIL: testOrder\n");
         return 1;
       }
+      if (testWait(numThreads, queueSize)) {
+        printf("FAIL: testWait\n");
+        return 1;
+      }
     }
   }
   printf("PASS: testOrder\n");
   (void)argc;
   (void)argv;
-  return (POOL_create(0, 1) || POOL_create(1, 0)) ? printf("FAIL: testInvalid\n"), 1
-                                                  : printf("PASS: testInvalid\n"), 0;
+  return (POOL_create(0, 1)) ? printf("FAIL: testInvalid\n"), 1
+                             : printf("PASS: testInvalid\n"), 0;
   return 0;
 }
diff --git a/tests/roundTripCrash.c b/tests/roundTripCrash.c
index 77c6737eebdb..0b478f6cab3e 100644
--- a/tests/roundTripCrash.c
+++ b/tests/roundTripCrash.c
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 /*
diff --git a/tests/symbols.c b/tests/symbols.c
index 8920187f37f7..f08542dbf669 100644
--- a/tests/symbols.c
+++ b/tests/symbols.c
@@ -1,3 +1,13 @@
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ */
+
+
 #include <stdio.h>
 #include "zstd_errors.h"
 #define ZSTD_STATIC_LINKING_ONLY
diff --git a/tests/zbufftest.c b/tests/zbufftest.c
index 601aa808d027..fe08fdab5bed 100644
--- a/tests/zbufftest.c
+++ b/tests/zbufftest.c
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 
diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c
index 9b2b8eaf81b7..dd044342ea80 100644
--- a/tests/zstreamtest.c
+++ b/tests/zstreamtest.c
@@ -1,10 +1,10 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 
@@ -95,19 +95,6 @@ unsigned int FUZ_rand(unsigned int* seedPtr)
     return rand32 >> 5;
 }
 
-static void* allocFunction(void* opaque, size_t size)
-{
-    void* address = malloc(size);
-    (void)opaque;
-    return address;
-}
-
-static void freeFunction(void* opaque, void* address)
-{
-    (void)opaque;
-    free(address);
-}
-
 
 /*======================================================
 *   Basic Unit tests
@@ -1390,13 +1377,12 @@ static int fuzzerTests_newAPI(U32 seed, U32 nbTests, unsigned startTest, double
         /* multi-segments compression test */
         XXH64_reset(&xxhState, 0);
         {   ZSTD_outBuffer outBuff = { cBuffer, cBufferSize, 0 } ;
-            U32 n;
-            for (n=0, cSize=0, totalTestSize=0 ; totalTestSize < maxTestSize ; n++) {
+            for (cSize=0, totalTestSize=0 ; (totalTestSize < maxTestSize) ; ) {
                 /* compress random chunks into randomly sized dst buffers */
                 size_t const randomSrcSize = FUZ_randomLength(&lseed, maxSampleLog);
                 size_t const srcSize = MIN(maxTestSize-totalTestSize, randomSrcSize);
                 size_t const srcStart = FUZ_rand(&lseed) % (srcBufferSize - srcSize);
-                size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog);
+                size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog+1);
                 size_t const dstBuffSize = MIN(cBufferSize - cSize, randomDstSize);
                 ZSTD_EndDirective const flush = (FUZ_rand(&lseed) & 15) ? ZSTD_e_continue : ZSTD_e_flush;
                 ZSTD_inBuffer inBuff = { srcBuffer+srcStart, srcSize, 0 };
@@ -1415,7 +1401,7 @@ static int fuzzerTests_newAPI(U32 seed, U32 nbTests, unsigned startTest, double
             {   size_t remainingToFlush = (size_t)(-1);
                 while (remainingToFlush) {
                     ZSTD_inBuffer inBuff = { NULL, 0, 0 };
-                    size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog);
+                    size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog+1);
                     size_t const adjustedDstSize = MIN(cBufferSize - cSize, randomDstSize);
                     outBuff.size = outBuff.pos + adjustedDstSize;
                     DISPLAYLEVEL(5, "End-flush into dst buffer of size %u \n", (U32)adjustedDstSize);
@@ -1543,7 +1529,6 @@ int main(int argc, const char** argv)
     int bigTests = (sizeof(size_t) == 8);
     e_api selected_api = simple_api;
     const char* const programName = argv[0];
-    ZSTD_customMem const customMem = { allocFunction, freeFunction, NULL };
     ZSTD_customMem const customNULL = ZSTD_defaultCMem;
 
     /* Check command line */
@@ -1657,10 +1642,7 @@ int main(int argc, const char** argv)
 
     if (testNb==0) {
         result = basicUnitTests(0, ((double)proba) / 100, customNULL);  /* constant seed for predictability */
-        if (!result) {
-            DISPLAYLEVEL(3, "Unit tests using customMem :\n")
-            result = basicUnitTests(0, ((double)proba) / 100, customMem);  /* use custom memory allocation functions */
-    }   }
+    }
 
     if (!result) {
         switch(selected_api)
diff --git a/zlibWrapper/examples/zwrapbench.c b/zlibWrapper/examples/zwrapbench.c
index 1fc2117f69a7..050c9db63218 100644
--- a/zlibWrapper/examples/zwrapbench.c
+++ b/zlibWrapper/examples/zwrapbench.c
@@ -1,10 +1,10 @@
-/**
- * Copyright (c) 2016-present, Yann Collet, Przemyslaw Skibinski, Facebook, Inc.
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 
diff --git a/zlibWrapper/gzcompatibility.h b/zlibWrapper/gzcompatibility.h
index e2ec1addb732..ac9020acc15b 100644
--- a/zlibWrapper/gzcompatibility.h
+++ b/zlibWrapper/gzcompatibility.h
@@ -1,10 +1,10 @@
-/**
- * Copyright (c) 2016-present, Przemyslaw Skibinski, Facebook, Inc.
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 
@@ -12,9 +12,9 @@
 #if ZLIB_VERNUM <= 0x1240
 ZEXTERN int ZEXPORT gzclose_r OF((gzFile file));
 ZEXTERN int ZEXPORT gzclose_w OF((gzFile file));
-ZEXTERN int ZEXPORT gzbuffer OF((gzFile file, unsigned size)); 
+ZEXTERN int ZEXPORT gzbuffer OF((gzFile file, unsigned size));
 ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile file));
- 
+
 #if !defined(_WIN32) && defined(Z_LARGE64)
 #  define z_off64_t off64_t
 #else
@@ -38,7 +38,7 @@ struct gzFile_s {
 
 #if ZLIB_VERNUM <= 0x1270
 #if defined(_WIN32) && !defined(Z_SOLO)
-#    include <stddef.h>         /* for wchar_t */ 
+#    include <stddef.h>         /* for wchar_t */
 ZEXTERN gzFile         ZEXPORT gzopen_w OF((const wchar_t *path,
                                             const char *mode));
 #endif
diff --git a/zlibWrapper/gzlib.c b/zlibWrapper/gzlib.c
index aa94206a8811..8235cff4fda1 100644
--- a/zlibWrapper/gzlib.c
+++ b/zlibWrapper/gzlib.c
@@ -1,5 +1,5 @@
 /* gzlib.c contains minimal changes required to be compiled with zlibWrapper:
- * - gz_statep was converted to union to work with -Wstrict-aliasing=1      */ 
+ * - gz_statep was converted to union to work with -Wstrict-aliasing=1      */
 
 /* gzlib.c -- zlib functions common to reading and writing gzip files
  * Copyright (C) 2004-2017 Mark Adler
diff --git a/zlibWrapper/gzread.c b/zlibWrapper/gzread.c
index d37aaa1d4c25..88fc06c77f42 100644
--- a/zlibWrapper/gzread.c
+++ b/zlibWrapper/gzread.c
@@ -1,6 +1,6 @@
 /* gzread.c contains minimal changes required to be compiled with zlibWrapper:
- * - gz_statep was converted to union to work with -Wstrict-aliasing=1      */ 
- 
+ * - gz_statep was converted to union to work with -Wstrict-aliasing=1      */
+
  /* gzread.c -- zlib functions for reading gzip files
  * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler
  * For conditions of distribution and use, see http://www.zlib.net/zlib_license.html
diff --git a/zlibWrapper/gzwrite.c b/zlibWrapper/gzwrite.c
index bcda4774aeeb..d1250b90084d 100644
--- a/zlibWrapper/gzwrite.c
+++ b/zlibWrapper/gzwrite.c
@@ -1,6 +1,6 @@
 /* gzwrite.c contains minimal changes required to be compiled with zlibWrapper:
- * - gz_statep was converted to union to work with -Wstrict-aliasing=1      */ 
- 
+ * - gz_statep was converted to union to work with -Wstrict-aliasing=1      */
+
  /* gzwrite.c -- zlib functions for writing gzip files
  * Copyright (C) 2004-2017 Mark Adler
  * For conditions of distribution and use, see http://www.zlib.net/zlib_license.html
diff --git a/zlibWrapper/zstd_zlibwrapper.c b/zlibWrapper/zstd_zlibwrapper.c
index ade3b88cd1f4..272369a2874c 100644
--- a/zlibWrapper/zstd_zlibwrapper.c
+++ b/zlibWrapper/zstd_zlibwrapper.c
@@ -1,10 +1,10 @@
-/**
- * Copyright (c) 2016-present, Przemyslaw Skibinski, Facebook, Inc.
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 
diff --git a/zlibWrapper/zstd_zlibwrapper.h b/zlibWrapper/zstd_zlibwrapper.h
index 0ebd87612eea..f8f36800928e 100644
--- a/zlibWrapper/zstd_zlibwrapper.h
+++ b/zlibWrapper/zstd_zlibwrapper.h
@@ -1,10 +1,10 @@
-/**
- * Copyright (c) 2016-present, Przemyslaw Skibinski, Facebook, Inc.
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
  */
 
 #ifndef ZSTD_ZLIBWRAPPER_H
@@ -32,7 +32,7 @@ const char * zstdVersion(void);
 /*** COMPRESSION ***/
 /* ZWRAP_useZSTDcompression() enables/disables zstd compression during runtime.
    By default zstd compression is disabled. To enable zstd compression please use one of the methods:
-   - compilation with the additional option -DZWRAP_USE_ZSTD=1 
+   - compilation with the additional option -DZWRAP_USE_ZSTD=1
    - using '#define ZWRAP_USE_ZSTD 1' in source code before '#include "zstd_zlibwrapper.h"'
    - calling ZWRAP_useZSTDcompression(1)
    All above-mentioned methods will enable zstd compression for all threads.
@@ -45,13 +45,13 @@ int ZWRAP_isUsingZSTDcompression(void);
 /* Changes a pledged source size for a given compression stream.
    It will change ZSTD compression parameters what may improve compression speed and/or ratio.
    The function should be called just after deflateInit() or deflateReset() and before deflate() or deflateSetDictionary().
-   It's only helpful when data is compressed in blocks. 
-   There will be no change in case of deflateInit() or deflateReset() immediately followed by deflate(strm, Z_FINISH) 
+   It's only helpful when data is compressed in blocks.
+   There will be no change in case of deflateInit() or deflateReset() immediately followed by deflate(strm, Z_FINISH)
    as this case is automatically detected.  */
 int ZWRAP_setPledgedSrcSize(z_streamp strm, unsigned long long pledgedSrcSize);
 
 /* Similar to deflateReset but preserves dictionary set using deflateSetDictionary.
-   It should improve compression speed because there will be less calls to deflateSetDictionary 
+   It should improve compression speed because there will be less calls to deflateSetDictionary
    When using zlib compression this method redirects to deflateReset. */
 int ZWRAP_deflateReset_keepDict(z_streamp strm);