import zstd 1.4.1
This commit is contained in:
parent
3f774a5e86
commit
fa94c7381a
30
CHANGELOG
30
CHANGELOG
@ -1,3 +1,33 @@
|
||||
v1.4.1
|
||||
bug: Fix data corruption in niche use cases by @terrelln (#1659)
|
||||
bug: Fuzz legacy modes, fix uncovered bugs by @terrelln (#1593, #1594, #1595)
|
||||
bug: Fix out of bounds read by @terrelln (#1590)
|
||||
perf: Improve decode speed by ~7% @mgrice (#1668)
|
||||
perf: Slightly improved compression ratio of level 3 and 4 (ZSTD_dfast) by @cyan4973 (#1681)
|
||||
perf: Slightly faster compression speed when re-using a context by @cyan4973 (#1658)
|
||||
perf: Improve compression ratio for small windowLog by @cyan4973 (#1624)
|
||||
perf: Faster compression speed in high compression mode for repetitive data by @terrelln (#1635)
|
||||
api: Add parameter to generate smaller dictionaries by @tyler-tran (#1656)
|
||||
cli: Recognize symlinks when built in C99 mode by @felixhandte (#1640)
|
||||
cli: Expose cpu load indicator for each file on -vv mode by @ephiepark (#1631)
|
||||
cli: Restrict read permissions on destination files by @chungy (#1644)
|
||||
cli: zstdgrep: handle -f flag by @felixhandte (#1618)
|
||||
cli: zstdcat: follow symlinks by @vejnar (#1604)
|
||||
doc: Remove extra size limit on compressed blocks by @felixhandte (#1689)
|
||||
doc: Fix typo by @yk-tanigawa (#1633)
|
||||
doc: Improve documentation on streaming buffer sizes by @cyan4973 (#1629)
|
||||
build: CMake: support building with LZ4 @leeyoung624 (#1626)
|
||||
build: CMake: install zstdless and zstdgrep by @leeyoung624 (#1647)
|
||||
build: CMake: respect existing uninstall target by @j301scott (#1619)
|
||||
build: Make: skip multithread tests when built without support by @michaelforney (#1620)
|
||||
build: Make: Fix examples/ test target by @sjnam (#1603)
|
||||
build: Meson: rename options out of deprecated namespace by @lzutao (#1665)
|
||||
build: Meson: fix build by @lzutao (#1602)
|
||||
build: Visual Studio: don't export symbols in static lib by @scharan (#1650)
|
||||
build: Visual Studio: fix linking by @absotively (#1639)
|
||||
build: Fix MinGW-W64 build by @myzhang1029 (#1600)
|
||||
misc: Expand decodecorpus coverage by @ephiepark (#1664)
|
||||
|
||||
v1.4.0
|
||||
perf: Improve level 1 compression speed in most scenarios by 6% by @gbtucker and @terrelln
|
||||
api: Move the advanced API, including all functions in the staging section, to the stable section
|
||||
|
@ -16,7 +16,7 @@ Distribution of this document is unlimited.
|
||||
|
||||
### Version
|
||||
|
||||
0.3.1 (25/10/18)
|
||||
0.3.2 (17/07/19)
|
||||
|
||||
|
||||
Introduction
|
||||
@ -390,9 +390,7 @@ A block can contain any number of bytes (even zero), up to
|
||||
- Window_Size
|
||||
- 128 KB
|
||||
|
||||
A `Compressed_Block` has the extra restriction that `Block_Size` is always
|
||||
strictly less than the decompressed size.
|
||||
If this condition cannot be respected,
|
||||
If this condition cannot be respected when generating a `Compressed_Block`,
|
||||
the block must be sent uncompressed instead (`Raw_Block`).
|
||||
|
||||
|
||||
@ -1655,6 +1653,7 @@ or at least provide a meaningful error code explaining for which reason it canno
|
||||
|
||||
Version changes
|
||||
---------------
|
||||
- 0.3.2 : remove additional block size restriction on compressed blocks
|
||||
- 0.3.1 : minor clarification regarding offset history update rules
|
||||
- 0.3.0 : minor edits to match RFC8478
|
||||
- 0.2.9 : clarifications for huffman weights direct representation, by Ulrich Kunitz
|
||||
|
@ -1,46 +1,36 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>zstd 1.4.0 Manual</title>
|
||||
<title>zstd 1.4.1 Manual</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>zstd 1.4.0 Manual</h1>
|
||||
<h1>zstd 1.4.1 Manual</h1>
|
||||
<hr>
|
||||
<a name="Contents"></a><h2>Contents</h2>
|
||||
<ol>
|
||||
<li><a href="#Chapter1">Introduction</a></li>
|
||||
<li><a href="#Chapter2">Version</a></li>
|
||||
<li><a href="#Chapter3">Default constant</a></li>
|
||||
<li><a href="#Chapter4">Constants</a></li>
|
||||
<li><a href="#Chapter5">Simple API</a></li>
|
||||
<li><a href="#Chapter6">Explicit context</a></li>
|
||||
<li><a href="#Chapter7">Advanced compression API</a></li>
|
||||
<li><a href="#Chapter8">Advanced decompression API</a></li>
|
||||
<li><a href="#Chapter9">Streaming</a></li>
|
||||
<li><a href="#Chapter10">Streaming compression - HowTo</a></li>
|
||||
<li><a href="#Chapter11">This is a legacy streaming API, and can be replaced by ZSTD_CCtx_reset() and</a></li>
|
||||
<li><a href="#Chapter12">Equivalent to:</a></li>
|
||||
<li><a href="#Chapter13">Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue).</a></li>
|
||||
<li><a href="#Chapter14">Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush).</a></li>
|
||||
<li><a href="#Chapter15">Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end).</a></li>
|
||||
<li><a href="#Chapter16">Streaming decompression - HowTo</a></li>
|
||||
<li><a href="#Chapter17">Simple dictionary API</a></li>
|
||||
<li><a href="#Chapter18">Bulk processing dictionary API</a></li>
|
||||
<li><a href="#Chapter19">Dictionary helper functions</a></li>
|
||||
<li><a href="#Chapter20">Advanced dictionary and prefix API</a></li>
|
||||
<li><a href="#Chapter21">ADVANCED AND EXPERIMENTAL FUNCTIONS</a></li>
|
||||
<li><a href="#Chapter22">experimental API (static linking only)</a></li>
|
||||
<li><a href="#Chapter23">Frame size functions</a></li>
|
||||
<li><a href="#Chapter24">ZSTD_decompressBound() :</a></li>
|
||||
<li><a href="#Chapter25">Memory management</a></li>
|
||||
<li><a href="#Chapter26">Advanced compression functions</a></li>
|
||||
<li><a href="#Chapter27">Advanced decompression functions</a></li>
|
||||
<li><a href="#Chapter28">Advanced streaming functions</a></li>
|
||||
<li><a href="#Chapter29">Buffer-less and synchronous inner streaming functions</a></li>
|
||||
<li><a href="#Chapter30">Buffer-less streaming compression (synchronous mode)</a></li>
|
||||
<li><a href="#Chapter31">Buffer-less streaming decompression (synchronous mode)</a></li>
|
||||
<li><a href="#Chapter32">ZSTD_getFrameHeader() :</a></li>
|
||||
<li><a href="#Chapter33">Block level API</a></li>
|
||||
<li><a href="#Chapter3">Simple API</a></li>
|
||||
<li><a href="#Chapter4">Explicit context</a></li>
|
||||
<li><a href="#Chapter5">Advanced compression API</a></li>
|
||||
<li><a href="#Chapter6">Advanced decompression API</a></li>
|
||||
<li><a href="#Chapter7">Streaming</a></li>
|
||||
<li><a href="#Chapter8">Streaming compression - HowTo</a></li>
|
||||
<li><a href="#Chapter9">Streaming decompression - HowTo</a></li>
|
||||
<li><a href="#Chapter10">Simple dictionary API</a></li>
|
||||
<li><a href="#Chapter11">Bulk processing dictionary API</a></li>
|
||||
<li><a href="#Chapter12">Dictionary helper functions</a></li>
|
||||
<li><a href="#Chapter13">Advanced dictionary and prefix API</a></li>
|
||||
<li><a href="#Chapter14">experimental API (static linking only)</a></li>
|
||||
<li><a href="#Chapter15">Frame size functions</a></li>
|
||||
<li><a href="#Chapter16">Memory management</a></li>
|
||||
<li><a href="#Chapter17">Advanced compression functions</a></li>
|
||||
<li><a href="#Chapter18">Advanced decompression functions</a></li>
|
||||
<li><a href="#Chapter19">Advanced streaming functions</a></li>
|
||||
<li><a href="#Chapter20">Buffer-less and synchronous inner streaming functions</a></li>
|
||||
<li><a href="#Chapter21">Buffer-less streaming compression (synchronous mode)</a></li>
|
||||
<li><a href="#Chapter22">Buffer-less streaming decompression (synchronous mode)</a></li>
|
||||
<li><a href="#Chapter23">Block level API</a></li>
|
||||
</ol>
|
||||
<hr>
|
||||
<a name="Chapter1"></a><h2>Introduction</h2><pre>
|
||||
@ -78,11 +68,7 @@
|
||||
|
||||
<pre><b>unsigned ZSTD_versionNumber(void); </b>/**< to check runtime library version */<b>
|
||||
</b></pre><BR>
|
||||
<a name="Chapter3"></a><h2>Default constant</h2><pre></pre>
|
||||
|
||||
<a name="Chapter4"></a><h2>Constants</h2><pre></pre>
|
||||
|
||||
<a name="Chapter5"></a><h2>Simple API</h2><pre></pre>
|
||||
<a name="Chapter3"></a><h2>Simple API</h2><pre></pre>
|
||||
|
||||
<pre><b>size_t ZSTD_compress( void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
@ -152,12 +138,17 @@ const char* ZSTD_getErrorName(size_t code); </b>/*!< provides readable strin
|
||||
int ZSTD_minCLevel(void); </b>/*!< minimum negative compression level allowed */<b>
|
||||
int ZSTD_maxCLevel(void); </b>/*!< maximum compression level available */<b>
|
||||
</pre></b><BR>
|
||||
<a name="Chapter6"></a><h2>Explicit context</h2><pre></pre>
|
||||
<a name="Chapter4"></a><h2>Explicit context</h2><pre></pre>
|
||||
|
||||
<h3>Compression context</h3><pre> When compressing many times,
|
||||
it is recommended to allocate a context just once, and re-use it for each successive compression operation.
|
||||
it is recommended to allocate a context just once,
|
||||
and re-use it for each successive compression operation.
|
||||
This will make workload friendlier for system's memory.
|
||||
Use one context per thread for parallel execution in multi-threaded environments.
|
||||
Note : re-using context is just a speed / resource optimization.
|
||||
It doesn't change the compression ratio, which remains identical.
|
||||
Note 2 : In multi-threaded environments,
|
||||
use one different context per thread for parallel execution.
|
||||
|
||||
</pre><b><pre>typedef struct ZSTD_CCtx_s ZSTD_CCtx;
|
||||
ZSTD_CCtx* ZSTD_createCCtx(void);
|
||||
size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx);
|
||||
@ -189,7 +180,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
|
||||
|
||||
</p></pre><BR>
|
||||
|
||||
<a name="Chapter7"></a><h2>Advanced compression API</h2><pre></pre>
|
||||
<a name="Chapter5"></a><h2>Advanced compression API</h2><pre></pre>
|
||||
|
||||
<pre><b>typedef enum { ZSTD_fast=1,
|
||||
ZSTD_dfast=2,
|
||||
@ -332,6 +323,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
|
||||
* ZSTD_c_forceMaxWindow
|
||||
* ZSTD_c_forceAttachDict
|
||||
* ZSTD_c_literalCompressionMode
|
||||
* ZSTD_c_targetCBlockSize
|
||||
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
|
||||
* note : never ever use experimentalParam? names directly;
|
||||
* also, the enums values themselves are unstable and can still change.
|
||||
@ -341,6 +333,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
|
||||
ZSTD_c_experimentalParam3=1000,
|
||||
ZSTD_c_experimentalParam4=1001,
|
||||
ZSTD_c_experimentalParam5=1002,
|
||||
ZSTD_c_experimentalParam6=1003,
|
||||
} ZSTD_cParameter;
|
||||
</b></pre><BR>
|
||||
<pre><b>typedef struct {
|
||||
@ -424,7 +417,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
|
||||
|
||||
</p></pre><BR>
|
||||
|
||||
<a name="Chapter8"></a><h2>Advanced decompression API</h2><pre></pre>
|
||||
<a name="Chapter6"></a><h2>Advanced decompression API</h2><pre></pre>
|
||||
|
||||
<pre><b>typedef enum {
|
||||
|
||||
@ -472,7 +465,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
|
||||
|
||||
</p></pre><BR>
|
||||
|
||||
<a name="Chapter9"></a><h2>Streaming</h2><pre></pre>
|
||||
<a name="Chapter7"></a><h2>Streaming</h2><pre></pre>
|
||||
|
||||
<pre><b>typedef struct ZSTD_inBuffer_s {
|
||||
const void* src; </b>/**< start of input buffer */<b>
|
||||
@ -486,7 +479,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
|
||||
size_t pos; </b>/**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */<b>
|
||||
} ZSTD_outBuffer;
|
||||
</b></pre><BR>
|
||||
<a name="Chapter10"></a><h2>Streaming compression - HowTo</h2><pre>
|
||||
<a name="Chapter8"></a><h2>Streaming compression - HowTo</h2><pre>
|
||||
A ZSTD_CStream object is required to track streaming operation.
|
||||
Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
|
||||
ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
|
||||
@ -592,31 +585,28 @@ size_t ZSTD_freeCStream(ZSTD_CStream* zcs);
|
||||
|
||||
<pre><b>size_t ZSTD_CStreamInSize(void); </b>/**< recommended size for input buffer */<b>
|
||||
</b></pre><BR>
|
||||
<pre><b>size_t ZSTD_CStreamOutSize(void); </b>/**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */<b>
|
||||
<pre><b>size_t ZSTD_CStreamOutSize(void); </b>/**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */<b>
|
||||
</b></pre><BR>
|
||||
<a name="Chapter11"></a><h2>This is a legacy streaming API, and can be replaced by ZSTD_CCtx_reset() and</h2><pre> ZSTD_compressStream2(). It is redundent, but is still fully supported.
|
||||
Advanced parameters and dictionary compression can only be used through the
|
||||
new API.
|
||||
<BR></pre>
|
||||
|
||||
<a name="Chapter12"></a><h2>Equivalent to:</h2><pre>
|
||||
<pre><b>size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
|
||||
</b>/*!<b>
|
||||
* Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue).
|
||||
* NOTE: The return value is different. ZSTD_compressStream() returns a hint for
|
||||
* the next read size (if non-zero and not an error). ZSTD_compressStream2()
|
||||
* returns the minimum nb of bytes left to flush (if non-zero and not an error).
|
||||
*/
|
||||
size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
|
||||
</b>/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */<b>
|
||||
size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
|
||||
</b>/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */<b>
|
||||
size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
|
||||
</b><p>
|
||||
ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
|
||||
ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
|
||||
ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
|
||||
|
||||
<BR></pre>
|
||||
</p></pre><BR>
|
||||
|
||||
<a name="Chapter13"></a><h2>Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue).</h2><pre> NOTE: The return value is different. ZSTD_compressStream() returns a hint for
|
||||
the next read size (if non-zero and not an error). ZSTD_compressStream2()
|
||||
returns the number of bytes left to flush (if non-zero and not an error).
|
||||
|
||||
<BR></pre>
|
||||
|
||||
<a name="Chapter14"></a><h2>Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush).</h2><pre></pre>
|
||||
|
||||
<a name="Chapter15"></a><h2>Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end).</h2><pre></pre>
|
||||
|
||||
<a name="Chapter16"></a><h2>Streaming decompression - HowTo</h2><pre>
|
||||
<a name="Chapter9"></a><h2>Streaming decompression - HowTo</h2><pre>
|
||||
A ZSTD_DStream object is required to track streaming operations.
|
||||
Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
|
||||
ZSTD_DStream objects can be re-used multiple times.
|
||||
@ -647,14 +637,12 @@ size_t ZSTD_freeCStream(ZSTD_CStream* zcs);
|
||||
<h3>ZSTD_DStream management functions</h3><pre></pre><b><pre>ZSTD_DStream* ZSTD_createDStream(void);
|
||||
size_t ZSTD_freeDStream(ZSTD_DStream* zds);
|
||||
</pre></b><BR>
|
||||
<h3>Streaming decompression functions</h3><pre></pre><b><pre>size_t ZSTD_initDStream(ZSTD_DStream* zds);
|
||||
size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
|
||||
</pre></b><BR>
|
||||
<h3>Streaming decompression functions</h3><pre></pre><b><pre></pre></b><BR>
|
||||
<pre><b>size_t ZSTD_DStreamInSize(void); </b>/*!< recommended size for input buffer */<b>
|
||||
</b></pre><BR>
|
||||
<pre><b>size_t ZSTD_DStreamOutSize(void); </b>/*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */<b>
|
||||
</b></pre><BR>
|
||||
<a name="Chapter17"></a><h2>Simple dictionary API</h2><pre></pre>
|
||||
<a name="Chapter10"></a><h2>Simple dictionary API</h2><pre></pre>
|
||||
|
||||
<pre><b>size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
@ -680,7 +668,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
Note : When `dict == NULL || dictSize < 8` no dictionary is used.
|
||||
</p></pre><BR>
|
||||
|
||||
<a name="Chapter18"></a><h2>Bulk processing dictionary API</h2><pre></pre>
|
||||
<a name="Chapter11"></a><h2>Bulk processing dictionary API</h2><pre></pre>
|
||||
|
||||
<pre><b>ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
|
||||
int compressionLevel);
|
||||
@ -723,7 +711,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
Recommended when same dictionary is used multiple times.
|
||||
</p></pre><BR>
|
||||
|
||||
<a name="Chapter19"></a><h2>Dictionary helper functions</h2><pre></pre>
|
||||
<a name="Chapter12"></a><h2>Dictionary helper functions</h2><pre></pre>
|
||||
|
||||
<pre><b>unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
|
||||
</b><p> Provides the dictID stored within dictionary.
|
||||
@ -749,7 +737,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code.
|
||||
</p></pre><BR>
|
||||
|
||||
<a name="Chapter20"></a><h2>Advanced dictionary and prefix API</h2><pre>
|
||||
<a name="Chapter13"></a><h2>Advanced dictionary and prefix API</h2><pre>
|
||||
This API allows dictionaries to be used with ZSTD_compress2(),
|
||||
ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and
|
||||
only reset with the context is reset with ZSTD_reset_parameters or
|
||||
@ -867,15 +855,7 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
|
||||
Note that object memory usage can evolve (increase or decrease) over time.
|
||||
</p></pre><BR>
|
||||
|
||||
<a name="Chapter21"></a><h2>ADVANCED AND EXPERIMENTAL FUNCTIONS</h2><pre>
|
||||
The definitions in the following section are considered experimental.
|
||||
They are provided for advanced scenarios.
|
||||
They should never be used with a dynamic library, as prototypes may change in the future.
|
||||
Use them only in association with static linking.
|
||||
|
||||
<BR></pre>
|
||||
|
||||
<a name="Chapter22"></a><h2>experimental API (static linking only)</h2><pre>
|
||||
<a name="Chapter14"></a><h2>experimental API (static linking only)</h2><pre>
|
||||
The following symbols and constants
|
||||
are not planned to join "stable API" status in the near future.
|
||||
They can still change in future versions.
|
||||
@ -973,7 +953,7 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
|
||||
ZSTD_lcm_uncompressed = 2, </b>/**< Always emit uncompressed literals. */<b>
|
||||
} ZSTD_literalCompressionMode_e;
|
||||
</b></pre><BR>
|
||||
<a name="Chapter23"></a><h2>Frame size functions</h2><pre></pre>
|
||||
<a name="Chapter15"></a><h2>Frame size functions</h2><pre></pre>
|
||||
|
||||
<pre><b>unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
|
||||
</b><p> `src` should point to the start of a series of ZSTD encoded and/or skippable frames
|
||||
@ -998,7 +978,8 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
|
||||
however it does mean that all frame data must be present and valid.
|
||||
</p></pre><BR>
|
||||
|
||||
<a name="Chapter24"></a><h2>ZSTD_decompressBound() :</h2><pre> `src` should point to the start of a series of ZSTD encoded and/or skippable frames
|
||||
<pre><b>unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize);
|
||||
</b><p> `src` should point to the start of a series of ZSTD encoded and/or skippable frames
|
||||
`srcSize` must be the _exact_ size of this series
|
||||
(i.e. there should be a frame boundary at `src + srcSize`)
|
||||
@return : - upper-bound for the decompressed size of all data in all successive frames
|
||||
@ -1010,7 +991,7 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
|
||||
note 3 : when the decompressed size field isn't available, the upper-bound for that frame is calculated by:
|
||||
upper-bound = # blocks * min(128 KB, Window_Size)
|
||||
|
||||
<BR></pre>
|
||||
</p></pre><BR>
|
||||
|
||||
<pre><b>size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
|
||||
</b><p> srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX.
|
||||
@ -1018,7 +999,7 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
|
||||
or an error code (if srcSize is too small)
|
||||
</p></pre><BR>
|
||||
|
||||
<a name="Chapter25"></a><h2>Memory management</h2><pre></pre>
|
||||
<a name="Chapter16"></a><h2>Memory management</h2><pre></pre>
|
||||
|
||||
<pre><b>size_t ZSTD_estimateCCtxSize(int compressionLevel);
|
||||
size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
|
||||
@ -1098,7 +1079,7 @@ static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; </b>/**< t
|
||||
|
||||
</p></pre><BR>
|
||||
|
||||
<a name="Chapter26"></a><h2>Advanced compression functions</h2><pre></pre>
|
||||
<a name="Chapter17"></a><h2>Advanced compression functions</h2><pre></pre>
|
||||
|
||||
<pre><b>ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
|
||||
</b><p> Create a digested dictionary for compression
|
||||
@ -1243,7 +1224,7 @@ size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
|
||||
|
||||
</p></pre><BR>
|
||||
|
||||
<a name="Chapter27"></a><h2>Advanced decompression functions</h2><pre></pre>
|
||||
<a name="Chapter18"></a><h2>Advanced decompression functions</h2><pre></pre>
|
||||
|
||||
<pre><b>unsigned ZSTD_isFrame(const void* buffer, size_t size);
|
||||
</b><p> Tells if the content of `buffer` starts with a valid Frame Identifier.
|
||||
@ -1305,7 +1286,7 @@ size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
|
||||
|
||||
</p></pre><BR>
|
||||
|
||||
<a name="Chapter28"></a><h2>Advanced streaming functions</h2><pre> Warning : most of these functions are now redundant with the Advanced API.
|
||||
<a name="Chapter19"></a><h2>Advanced streaming functions</h2><pre> Warning : most of these functions are now redundant with the Advanced API.
|
||||
Once Advanced API reaches "stable" status,
|
||||
redundant functions will be deprecated, and then at some point removed.
|
||||
<BR></pre>
|
||||
@ -1407,18 +1388,41 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict*
|
||||
|
||||
</p></pre><BR>
|
||||
|
||||
<h3>Advanced Streaming decompression functions</h3><pre></pre><b><pre>size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); </b>/**< note: no dictionary will be used if dict == NULL or dictSize < 8 */<b>
|
||||
size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); </b>/**< note : ddict is referenced, it must outlive decompression session */<b>
|
||||
size_t ZSTD_resetDStream(ZSTD_DStream* zds); </b>/**< re-use decompression parameters from previous init; saves dictionary loading */<b>
|
||||
<h3>Advanced Streaming decompression functions</h3><pre></pre><b><pre></b>/**<b>
|
||||
* This function is deprecated, and is equivalent to:
|
||||
*
|
||||
* ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
|
||||
* ZSTD_DCtx_loadDictionary(zds, dict, dictSize);
|
||||
*
|
||||
* note: no dictionary will be used if dict == NULL or dictSize < 8
|
||||
*/
|
||||
size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
|
||||
</b>/**<b>
|
||||
* This function is deprecated, and is equivalent to:
|
||||
*
|
||||
* ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
|
||||
* ZSTD_DCtx_refDDict(zds, ddict);
|
||||
*
|
||||
* note : ddict is referenced, it must outlive decompression session
|
||||
*/
|
||||
size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);
|
||||
</b>/**<b>
|
||||
* This function is deprecated, and is equivalent to:
|
||||
*
|
||||
* ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
|
||||
*
|
||||
* re-use decompression parameters from previous init; saves dictionary loading
|
||||
*/
|
||||
size_t ZSTD_resetDStream(ZSTD_DStream* zds);
|
||||
</pre></b><BR>
|
||||
<a name="Chapter29"></a><h2>Buffer-less and synchronous inner streaming functions</h2><pre>
|
||||
<a name="Chapter20"></a><h2>Buffer-less and synchronous inner streaming functions</h2><pre>
|
||||
This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
|
||||
But it's also a complex one, with several restrictions, documented below.
|
||||
Prefer normal streaming API for an easier experience.
|
||||
|
||||
<BR></pre>
|
||||
|
||||
<a name="Chapter30"></a><h2>Buffer-less streaming compression (synchronous mode)</h2><pre>
|
||||
<a name="Chapter21"></a><h2>Buffer-less streaming compression (synchronous mode)</h2><pre>
|
||||
A ZSTD_CCtx object is required to track streaming operations.
|
||||
Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
|
||||
ZSTD_CCtx object can be re-used multiple times within successive compression operations.
|
||||
@ -1454,7 +1458,7 @@ size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
|
||||
size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); </b>/* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */<b>
|
||||
size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); </b>/**< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */<b>
|
||||
</pre></b><BR>
|
||||
<a name="Chapter31"></a><h2>Buffer-less streaming decompression (synchronous mode)</h2><pre>
|
||||
<a name="Chapter22"></a><h2>Buffer-less streaming decompression (synchronous mode)</h2><pre>
|
||||
A ZSTD_DCtx object is required to track streaming operations.
|
||||
Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
|
||||
A ZSTD_DCtx object can be re-used multiple times.
|
||||
@ -1536,23 +1540,21 @@ typedef struct {
|
||||
unsigned checksumFlag;
|
||||
} ZSTD_frameHeader;
|
||||
</pre></b><BR>
|
||||
<a name="Chapter32"></a><h2>ZSTD_getFrameHeader() :</h2><pre> decode Frame Header, or requires larger `srcSize`.
|
||||
<pre><b>size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); </b>/**< doesn't consume input */<b>
|
||||
</b>/*! ZSTD_getFrameHeader_advanced() :<b>
|
||||
* same as ZSTD_getFrameHeader(),
|
||||
* with added capability to select a format (like ZSTD_f_zstd1_magicless) */
|
||||
size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
|
||||
size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); </b>/**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */<b>
|
||||
</b><p> decode Frame Header, or requires larger `srcSize`.
|
||||
@return : 0, `zfhPtr` is correctly filled,
|
||||
>0, `srcSize` is too small, value is wanted `srcSize` amount,
|
||||
or an error code, which can be tested using ZSTD_isError()
|
||||
<BR></pre>
|
||||
|
||||
<pre><b>size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); </b>/**< doesn't consume input */<b>
|
||||
</b></pre><BR>
|
||||
<pre><b>size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
|
||||
size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); </b>/**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */<b>
|
||||
</b><p> same as ZSTD_getFrameHeader(),
|
||||
with added capability to select a format (like ZSTD_f_zstd1_magicless)
|
||||
</p></pre><BR>
|
||||
|
||||
<pre><b>typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
|
||||
</b></pre><BR>
|
||||
<a name="Chapter33"></a><h2>Block level API</h2><pre></pre>
|
||||
<a name="Chapter23"></a><h2>Block level API</h2><pre></pre>
|
||||
|
||||
<pre><b></b><p> Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes).
|
||||
User will have to take in charge required information to regenerate data, such as compressed and content sizes.
|
||||
|
@ -77,7 +77,6 @@ test: all
|
||||
@echo -- Edge cases detection
|
||||
! ./streaming_decompression tmp # invalid input, must fail
|
||||
! ./simple_decompression tmp # invalid input, must fail
|
||||
! ./simple_decompression tmp.zst # unknown input size, must fail
|
||||
touch tmpNull # create 0-size file
|
||||
./simple_compression tmpNull
|
||||
./simple_decompression tmpNull.zst # 0-size frame : must work
|
||||
|
@ -17,6 +17,7 @@ LIBVER_MINOR := $(shell echo $(LIBVER_MINOR_SCRIPT))
|
||||
LIBVER_PATCH := $(shell echo $(LIBVER_PATCH_SCRIPT))
|
||||
LIBVER := $(shell echo $(LIBVER_SCRIPT))
|
||||
VERSION?= $(LIBVER)
|
||||
CCVER := $(shell $(CC) --version)
|
||||
|
||||
CPPFLAGS+= -I. -I./common -DXXH_NAMESPACE=ZSTD_
|
||||
ifeq ($(OS),Windows_NT) # MinGW assumed
|
||||
@ -45,6 +46,10 @@ ZDICT_FILES := $(sort $(wildcard dictBuilder/*.c))
|
||||
ZDEPR_FILES := $(sort $(wildcard deprecated/*.c))
|
||||
ZSTD_FILES := $(ZSTDCOMMON_FILES)
|
||||
|
||||
ifeq ($(findstring GCC,$(CCVER)),GCC)
|
||||
decompress/zstd_decompress_block.o : CFLAGS+=-fno-tree-vectorize
|
||||
endif
|
||||
|
||||
ZSTD_LEGACY_SUPPORT ?= 5
|
||||
ZSTD_LIB_COMPRESSION ?= 1
|
||||
ZSTD_LIB_DECOMPRESSION ?= 1
|
||||
|
@ -127,6 +127,13 @@
|
||||
} \
|
||||
}
|
||||
|
||||
/* vectorization */
|
||||
#if !defined(__clang__) && defined(__GNUC__)
|
||||
# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
|
||||
#else
|
||||
# define DONT_VECTORIZE
|
||||
#endif
|
||||
|
||||
/* disable warnings */
|
||||
#ifdef _MSC_VER /* Visual Studio */
|
||||
# include <intrin.h> /* For Visual 2005 */
|
||||
|
@ -34,7 +34,6 @@
|
||||
#endif
|
||||
#include "xxhash.h" /* XXH_reset, update, digest */
|
||||
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
@ -193,19 +192,72 @@ static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
|
||||
* Shared functions to include for inlining
|
||||
*********************************************/
|
||||
static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
|
||||
|
||||
#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
|
||||
static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); }
|
||||
#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
|
||||
|
||||
#define WILDCOPY_OVERLENGTH 8
|
||||
#define VECLEN 16
|
||||
|
||||
typedef enum {
|
||||
ZSTD_no_overlap,
|
||||
ZSTD_overlap_src_before_dst,
|
||||
/* ZSTD_overlap_dst_before_src, */
|
||||
} ZSTD_overlap_e;
|
||||
|
||||
/*! ZSTD_wildcopy() :
|
||||
* custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */
|
||||
#define WILDCOPY_OVERLENGTH 8
|
||||
MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
|
||||
MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
|
||||
void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
|
||||
{
|
||||
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
|
||||
const BYTE* ip = (const BYTE*)src;
|
||||
BYTE* op = (BYTE*)dst;
|
||||
BYTE* const oend = op + length;
|
||||
do
|
||||
COPY8(op, ip)
|
||||
while (op < oend);
|
||||
|
||||
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
|
||||
if (length < VECLEN || (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN)) {
|
||||
do
|
||||
COPY8(op, ip)
|
||||
while (op < oend);
|
||||
}
|
||||
else {
|
||||
if ((length & 8) == 0)
|
||||
COPY8(op, ip);
|
||||
do {
|
||||
COPY16(op, ip);
|
||||
}
|
||||
while (op < oend);
|
||||
}
|
||||
}
|
||||
|
||||
/*! ZSTD_wildcopy_16min() :
|
||||
* same semantics as ZSTD_wilcopy() except guaranteed to be able to copy 16 bytes at the start */
|
||||
MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
|
||||
void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
|
||||
{
|
||||
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
|
||||
const BYTE* ip = (const BYTE*)src;
|
||||
BYTE* op = (BYTE*)dst;
|
||||
BYTE* const oend = op + length;
|
||||
|
||||
assert(length >= 8);
|
||||
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
|
||||
|
||||
if (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN) {
|
||||
do
|
||||
COPY8(op, ip)
|
||||
while (op < oend);
|
||||
}
|
||||
else {
|
||||
if ((length & 8) == 0)
|
||||
COPY8(op, ip);
|
||||
do {
|
||||
COPY16(op, ip);
|
||||
}
|
||||
while (op < oend);
|
||||
}
|
||||
}
|
||||
|
||||
MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */
|
||||
|
@ -385,6 +385,11 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
|
||||
bounds.upperBound = ZSTD_lcm_uncompressed;
|
||||
return bounds;
|
||||
|
||||
case ZSTD_c_targetCBlockSize:
|
||||
bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN;
|
||||
bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX;
|
||||
return bounds;
|
||||
|
||||
default:
|
||||
{ ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 };
|
||||
return boundError;
|
||||
@ -452,6 +457,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
|
||||
case ZSTD_c_ldmHashRateLog:
|
||||
case ZSTD_c_forceAttachDict:
|
||||
case ZSTD_c_literalCompressionMode:
|
||||
case ZSTD_c_targetCBlockSize:
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
@ -497,6 +503,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
|
||||
case ZSTD_c_ldmHashLog:
|
||||
case ZSTD_c_ldmMinMatch:
|
||||
case ZSTD_c_ldmBucketSizeLog:
|
||||
case ZSTD_c_targetCBlockSize:
|
||||
break;
|
||||
|
||||
default: RETURN_ERROR(parameter_unsupported);
|
||||
@ -671,6 +678,12 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
|
||||
CCtxParams->ldmParams.hashRateLog = value;
|
||||
return CCtxParams->ldmParams.hashRateLog;
|
||||
|
||||
case ZSTD_c_targetCBlockSize :
|
||||
if (value!=0) /* 0 ==> default */
|
||||
BOUNDCHECK(ZSTD_c_targetCBlockSize, value);
|
||||
CCtxParams->targetCBlockSize = value;
|
||||
return CCtxParams->targetCBlockSize;
|
||||
|
||||
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
|
||||
}
|
||||
}
|
||||
@ -692,13 +705,13 @@ size_t ZSTD_CCtxParams_getParameter(
|
||||
*value = CCtxParams->compressionLevel;
|
||||
break;
|
||||
case ZSTD_c_windowLog :
|
||||
*value = CCtxParams->cParams.windowLog;
|
||||
*value = (int)CCtxParams->cParams.windowLog;
|
||||
break;
|
||||
case ZSTD_c_hashLog :
|
||||
*value = CCtxParams->cParams.hashLog;
|
||||
*value = (int)CCtxParams->cParams.hashLog;
|
||||
break;
|
||||
case ZSTD_c_chainLog :
|
||||
*value = CCtxParams->cParams.chainLog;
|
||||
*value = (int)CCtxParams->cParams.chainLog;
|
||||
break;
|
||||
case ZSTD_c_searchLog :
|
||||
*value = CCtxParams->cParams.searchLog;
|
||||
@ -773,6 +786,9 @@ size_t ZSTD_CCtxParams_getParameter(
|
||||
case ZSTD_c_ldmHashRateLog :
|
||||
*value = CCtxParams->ldmParams.hashRateLog;
|
||||
break;
|
||||
case ZSTD_c_targetCBlockSize :
|
||||
*value = (int)CCtxParams->targetCBlockSize;
|
||||
break;
|
||||
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
|
||||
}
|
||||
return 0;
|
||||
@ -930,12 +946,12 @@ size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
|
||||
@return : 0, or an error code if one value is beyond authorized range */
|
||||
size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
|
||||
{
|
||||
BOUNDCHECK(ZSTD_c_windowLog, cParams.windowLog);
|
||||
BOUNDCHECK(ZSTD_c_chainLog, cParams.chainLog);
|
||||
BOUNDCHECK(ZSTD_c_hashLog, cParams.hashLog);
|
||||
BOUNDCHECK(ZSTD_c_searchLog, cParams.searchLog);
|
||||
BOUNDCHECK(ZSTD_c_minMatch, cParams.minMatch);
|
||||
BOUNDCHECK(ZSTD_c_targetLength,cParams.targetLength);
|
||||
BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog);
|
||||
BOUNDCHECK(ZSTD_c_chainLog, (int)cParams.chainLog);
|
||||
BOUNDCHECK(ZSTD_c_hashLog, (int)cParams.hashLog);
|
||||
BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog);
|
||||
BOUNDCHECK(ZSTD_c_minMatch, (int)cParams.minMatch);
|
||||
BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength);
|
||||
BOUNDCHECK(ZSTD_c_strategy, cParams.strategy);
|
||||
return 0;
|
||||
}
|
||||
@ -951,7 +967,7 @@ ZSTD_clampCParams(ZSTD_compressionParameters cParams)
|
||||
if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound; \
|
||||
else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \
|
||||
}
|
||||
# define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, int)
|
||||
# define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned)
|
||||
CLAMP(ZSTD_c_windowLog, cParams.windowLog);
|
||||
CLAMP(ZSTD_c_chainLog, cParams.chainLog);
|
||||
CLAMP(ZSTD_c_hashLog, cParams.hashLog);
|
||||
@ -1282,15 +1298,14 @@ static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
|
||||
}
|
||||
|
||||
/*! ZSTD_invalidateMatchState()
|
||||
* Invalidate all the matches in the match finder tables.
|
||||
* Requires nextSrc and base to be set (can be NULL).
|
||||
* Invalidate all the matches in the match finder tables.
|
||||
* Requires nextSrc and base to be set (can be NULL).
|
||||
*/
|
||||
static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms)
|
||||
{
|
||||
ZSTD_window_clear(&ms->window);
|
||||
|
||||
ms->nextToUpdate = ms->window.dictLimit;
|
||||
ms->nextToUpdate3 = ms->window.dictLimit;
|
||||
ms->loadedDictEnd = 0;
|
||||
ms->opt.litLengthSum = 0; /* force reset of btopt stats */
|
||||
ms->dictMatchState = NULL;
|
||||
@ -1327,15 +1342,17 @@ static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pl
|
||||
|
||||
typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e;
|
||||
|
||||
typedef enum { ZSTD_resetTarget_CDict, ZSTD_resetTarget_CCtx } ZSTD_resetTarget_e;
|
||||
|
||||
static void*
|
||||
ZSTD_reset_matchState(ZSTD_matchState_t* ms,
|
||||
void* ptr,
|
||||
const ZSTD_compressionParameters* cParams,
|
||||
ZSTD_compResetPolicy_e const crp, U32 const forCCtx)
|
||||
ZSTD_compResetPolicy_e const crp, ZSTD_resetTarget_e const forWho)
|
||||
{
|
||||
size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
|
||||
size_t const hSize = ((size_t)1) << cParams->hashLog;
|
||||
U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
|
||||
U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
|
||||
size_t const h3Size = ((size_t)1) << hashLog3;
|
||||
size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
|
||||
|
||||
@ -1349,7 +1366,7 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
|
||||
ZSTD_invalidateMatchState(ms);
|
||||
|
||||
/* opt parser space */
|
||||
if (forCCtx && (cParams->strategy >= ZSTD_btopt)) {
|
||||
if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) {
|
||||
DEBUGLOG(4, "reserving optimal parser space");
|
||||
ms->opt.litFreq = (unsigned*)ptr;
|
||||
ms->opt.litLengthFreq = ms->opt.litFreq + (1<<Litbits);
|
||||
@ -1377,6 +1394,19 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/* ZSTD_indexTooCloseToMax() :
|
||||
* minor optimization : prefer memset() rather than reduceIndex()
|
||||
* which is measurably slow in some circumstances (reported for Visual Studio).
|
||||
* Works when re-using a context for a lot of smallish inputs :
|
||||
* if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN,
|
||||
* memset() will be triggered before reduceIndex().
|
||||
*/
|
||||
#define ZSTD_INDEXOVERFLOW_MARGIN (16 MB)
|
||||
static int ZSTD_indexTooCloseToMax(ZSTD_window_t w)
|
||||
{
|
||||
return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN);
|
||||
}
|
||||
|
||||
#define ZSTD_WORKSPACETOOLARGE_FACTOR 3 /* define "workspace is too large" as this number of times larger than needed */
|
||||
#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 /* when workspace is continuously too large
|
||||
* during at least this number of times,
|
||||
@ -1388,7 +1418,7 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
|
||||
note : `params` are assumed fully validated at this stage */
|
||||
static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
||||
ZSTD_CCtx_params params,
|
||||
U64 pledgedSrcSize,
|
||||
U64 const pledgedSrcSize,
|
||||
ZSTD_compResetPolicy_e const crp,
|
||||
ZSTD_buffered_policy_e const zbuff)
|
||||
{
|
||||
@ -1400,13 +1430,21 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
||||
if (ZSTD_equivalentParams(zc->appliedParams, params,
|
||||
zc->inBuffSize,
|
||||
zc->seqStore.maxNbSeq, zc->seqStore.maxNbLit,
|
||||
zbuff, pledgedSrcSize)) {
|
||||
DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> continue mode (wLog1=%u, blockSize1=%zu)",
|
||||
zc->appliedParams.cParams.windowLog, zc->blockSize);
|
||||
zbuff, pledgedSrcSize) ) {
|
||||
DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> consider continue mode");
|
||||
zc->workSpaceOversizedDuration += (zc->workSpaceOversizedDuration > 0); /* if it was too large, it still is */
|
||||
if (zc->workSpaceOversizedDuration <= ZSTD_WORKSPACETOOLARGE_MAXDURATION)
|
||||
if (zc->workSpaceOversizedDuration <= ZSTD_WORKSPACETOOLARGE_MAXDURATION) {
|
||||
DEBUGLOG(4, "continue mode confirmed (wLog1=%u, blockSize1=%zu)",
|
||||
zc->appliedParams.cParams.windowLog, zc->blockSize);
|
||||
if (ZSTD_indexTooCloseToMax(zc->blockState.matchState.window)) {
|
||||
/* prefer a reset, faster than a rescale */
|
||||
ZSTD_reset_matchState(&zc->blockState.matchState,
|
||||
zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32,
|
||||
¶ms.cParams,
|
||||
crp, ZSTD_resetTarget_CCtx);
|
||||
}
|
||||
return ZSTD_continueCCtx(zc, params, pledgedSrcSize);
|
||||
} }
|
||||
} } }
|
||||
DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx");
|
||||
|
||||
if (params.ldmParams.enableLdm) {
|
||||
@ -1449,7 +1487,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
||||
DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
|
||||
|
||||
if (workSpaceTooSmall || workSpaceWasteful) {
|
||||
DEBUGLOG(4, "Need to resize workSpaceSize from %zuKB to %zuKB",
|
||||
DEBUGLOG(4, "Resize workSpaceSize from %zuKB to %zuKB",
|
||||
zc->workSpaceSize >> 10,
|
||||
neededSpace >> 10);
|
||||
|
||||
@ -1491,7 +1529,10 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
||||
|
||||
ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock);
|
||||
|
||||
ptr = zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32;
|
||||
ptr = ZSTD_reset_matchState(&zc->blockState.matchState,
|
||||
zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32,
|
||||
¶ms.cParams,
|
||||
crp, ZSTD_resetTarget_CCtx);
|
||||
|
||||
/* ldm hash table */
|
||||
/* initialize bucketOffsets table later for pointer alignment */
|
||||
@ -1509,8 +1550,6 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
||||
}
|
||||
assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
|
||||
|
||||
ptr = ZSTD_reset_matchState(&zc->blockState.matchState, ptr, ¶ms.cParams, crp, /* forCCtx */ 1);
|
||||
|
||||
/* sequences storage */
|
||||
zc->seqStore.maxNbSeq = maxNbSeq;
|
||||
zc->seqStore.sequencesStart = (seqDef*)ptr;
|
||||
@ -1587,15 +1626,14 @@ static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict,
|
||||
* handled in _enforceMaxDist */
|
||||
}
|
||||
|
||||
static size_t ZSTD_resetCCtx_byAttachingCDict(
|
||||
ZSTD_CCtx* cctx,
|
||||
const ZSTD_CDict* cdict,
|
||||
ZSTD_CCtx_params params,
|
||||
U64 pledgedSrcSize,
|
||||
ZSTD_buffered_policy_e zbuff)
|
||||
static size_t
|
||||
ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
|
||||
const ZSTD_CDict* cdict,
|
||||
ZSTD_CCtx_params params,
|
||||
U64 pledgedSrcSize,
|
||||
ZSTD_buffered_policy_e zbuff)
|
||||
{
|
||||
{
|
||||
const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;
|
||||
{ const ZSTD_compressionParameters* const cdict_cParams = &cdict->matchState.cParams;
|
||||
unsigned const windowLog = params.cParams.windowLog;
|
||||
assert(windowLog != 0);
|
||||
/* Resize working context table params for input only, since the dict
|
||||
@ -1607,8 +1645,7 @@ static size_t ZSTD_resetCCtx_byAttachingCDict(
|
||||
assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
|
||||
}
|
||||
|
||||
{
|
||||
const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc
|
||||
{ const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc
|
||||
- cdict->matchState.window.base);
|
||||
const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit;
|
||||
if (cdictLen == 0) {
|
||||
@ -1625,9 +1662,9 @@ static size_t ZSTD_resetCCtx_byAttachingCDict(
|
||||
cctx->blockState.matchState.window.base + cdictEnd;
|
||||
ZSTD_window_clear(&cctx->blockState.matchState.window);
|
||||
}
|
||||
/* loadedDictEnd is expressed within the referential of the active context */
|
||||
cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit;
|
||||
}
|
||||
}
|
||||
} }
|
||||
|
||||
cctx->dictID = cdict->dictID;
|
||||
|
||||
@ -1681,7 +1718,6 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
|
||||
ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState;
|
||||
dstMatchState->window = srcMatchState->window;
|
||||
dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
|
||||
dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3;
|
||||
dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
|
||||
}
|
||||
|
||||
@ -1761,7 +1797,6 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
|
||||
ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState;
|
||||
dstMatchState->window = srcMatchState->window;
|
||||
dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
|
||||
dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3;
|
||||
dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
|
||||
}
|
||||
dstCCtx->dictID = srcCCtx->dictID;
|
||||
@ -1831,16 +1866,15 @@ static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const
|
||||
|
||||
/*! ZSTD_reduceIndex() :
|
||||
* rescale all indexes to avoid future overflow (indexes are U32) */
|
||||
static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
|
||||
static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue)
|
||||
{
|
||||
ZSTD_matchState_t* const ms = &zc->blockState.matchState;
|
||||
{ U32 const hSize = (U32)1 << zc->appliedParams.cParams.hashLog;
|
||||
{ U32 const hSize = (U32)1 << params->cParams.hashLog;
|
||||
ZSTD_reduceTable(ms->hashTable, hSize, reducerValue);
|
||||
}
|
||||
|
||||
if (zc->appliedParams.cParams.strategy != ZSTD_fast) {
|
||||
U32 const chainSize = (U32)1 << zc->appliedParams.cParams.chainLog;
|
||||
if (zc->appliedParams.cParams.strategy == ZSTD_btlazy2)
|
||||
if (params->cParams.strategy != ZSTD_fast) {
|
||||
U32 const chainSize = (U32)1 << params->cParams.chainLog;
|
||||
if (params->cParams.strategy == ZSTD_btlazy2)
|
||||
ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue);
|
||||
else
|
||||
ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue);
|
||||
@ -2524,6 +2558,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
|
||||
op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
|
||||
else
|
||||
op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
|
||||
assert(op <= oend);
|
||||
if (nbSeq==0) {
|
||||
/* Copy the old tables over as if we repeated them */
|
||||
memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
|
||||
@ -2532,6 +2567,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
|
||||
|
||||
/* seqHead : flags for FSE encoding type */
|
||||
seqHead = op++;
|
||||
assert(op <= oend);
|
||||
|
||||
/* convert length/distances into codes */
|
||||
ZSTD_seqToCodes(seqStorePtr);
|
||||
@ -2555,6 +2591,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
|
||||
if (LLtype == set_compressed)
|
||||
lastNCount = op;
|
||||
op += countSize;
|
||||
assert(op <= oend);
|
||||
} }
|
||||
/* build CTable for Offsets */
|
||||
{ unsigned max = MaxOff;
|
||||
@ -2577,6 +2614,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
|
||||
if (Offtype == set_compressed)
|
||||
lastNCount = op;
|
||||
op += countSize;
|
||||
assert(op <= oend);
|
||||
} }
|
||||
/* build CTable for MatchLengths */
|
||||
{ unsigned max = MaxML;
|
||||
@ -2597,6 +2635,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
|
||||
if (MLtype == set_compressed)
|
||||
lastNCount = op;
|
||||
op += countSize;
|
||||
assert(op <= oend);
|
||||
} }
|
||||
|
||||
*seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
|
||||
@ -2610,6 +2649,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
|
||||
longOffsets, bmi2);
|
||||
FORWARD_IF_ERROR(bitstreamSize);
|
||||
op += bitstreamSize;
|
||||
assert(op <= oend);
|
||||
/* zstd versions <= 1.3.4 mistakenly report corruption when
|
||||
* FSE_readNCount() receives a buffer < 4 bytes.
|
||||
* Fixed by https://github.com/facebook/zstd/pull/1146.
|
||||
@ -2721,30 +2761,24 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr)
|
||||
ssPtr->longLengthID = 0;
|
||||
}
|
||||
|
||||
static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize)
|
||||
typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
|
||||
|
||||
static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
|
||||
{
|
||||
ZSTD_matchState_t* const ms = &zc->blockState.matchState;
|
||||
size_t cSize;
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
|
||||
(unsigned)dstCapacity, (unsigned)ms->window.dictLimit, (unsigned)ms->nextToUpdate);
|
||||
DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize);
|
||||
assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
|
||||
|
||||
/* Assert that we have correctly flushed the ctx params into the ms's copy */
|
||||
ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams);
|
||||
|
||||
if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
|
||||
ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch);
|
||||
cSize = 0;
|
||||
goto out; /* don't even attempt compression below a certain srcSize */
|
||||
return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */
|
||||
}
|
||||
ZSTD_resetSeqStore(&(zc->seqStore));
|
||||
/* required for optimal parser to read stats from dictionary */
|
||||
ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy;
|
||||
/* tell the optimal parser how we expect to compress literals */
|
||||
ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode;
|
||||
|
||||
/* a gap between an attached dict and the current window is not safe,
|
||||
* they must remain adjacent,
|
||||
* and when that stops being the case, the dict must be unset */
|
||||
@ -2798,6 +2832,21 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
|
||||
{ const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
|
||||
ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize);
|
||||
} }
|
||||
return ZSTDbss_compress;
|
||||
}
|
||||
|
||||
static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize)
|
||||
{
|
||||
size_t cSize;
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
|
||||
(unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate);
|
||||
|
||||
{ const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
|
||||
FORWARD_IF_ERROR(bss);
|
||||
if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; }
|
||||
}
|
||||
|
||||
/* encode sequences and literals */
|
||||
cSize = ZSTD_compressSequences(&zc->seqStore,
|
||||
@ -2826,6 +2875,25 @@ out:
|
||||
}
|
||||
|
||||
|
||||
static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, void const* ip, void const* iend)
|
||||
{
|
||||
if (ZSTD_window_needOverflowCorrection(ms->window, iend)) {
|
||||
U32 const maxDist = (U32)1 << params->cParams.windowLog;
|
||||
U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy);
|
||||
U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip);
|
||||
ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
|
||||
ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
|
||||
ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
|
||||
ZSTD_reduceIndex(ms, params, correction);
|
||||
if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
|
||||
else ms->nextToUpdate -= correction;
|
||||
/* invalidate dictionaries on overflow correction */
|
||||
ms->loadedDictEnd = 0;
|
||||
ms->dictMatchState = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*! ZSTD_compress_frameChunk() :
|
||||
* Compress a chunk of data into one or multiple blocks.
|
||||
* All blocks will be terminated, all input will be consumed.
|
||||
@ -2844,7 +2912,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
|
||||
BYTE* const ostart = (BYTE*)dst;
|
||||
BYTE* op = ostart;
|
||||
U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog;
|
||||
assert(cctx->appliedParams.cParams.windowLog <= 31);
|
||||
assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX);
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize);
|
||||
if (cctx->appliedParams.fParams.checksumFlag && srcSize)
|
||||
@ -2859,19 +2927,10 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
|
||||
"not enough space to store compressed block");
|
||||
if (remaining < blockSize) blockSize = remaining;
|
||||
|
||||
if (ZSTD_window_needOverflowCorrection(ms->window, ip + blockSize)) {
|
||||
U32 const cycleLog = ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy);
|
||||
U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip);
|
||||
ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
|
||||
ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
|
||||
ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
|
||||
ZSTD_reduceIndex(cctx, correction);
|
||||
if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
|
||||
else ms->nextToUpdate -= correction;
|
||||
ms->loadedDictEnd = 0;
|
||||
ms->dictMatchState = NULL;
|
||||
}
|
||||
ZSTD_window_enforceMaxDist(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
|
||||
ZSTD_overflowCorrectIfNeeded(ms, &cctx->appliedParams, ip, ip + blockSize);
|
||||
ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
|
||||
|
||||
/* Ensure hash/chain table insertion resumes no sooner than lowlimit */
|
||||
if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
|
||||
|
||||
{ size_t cSize = ZSTD_compressBlock_internal(cctx,
|
||||
@ -2899,7 +2958,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
|
||||
} }
|
||||
|
||||
if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending;
|
||||
return op-ostart;
|
||||
return (size_t)(op-ostart);
|
||||
}
|
||||
|
||||
|
||||
@ -2991,6 +3050,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
|
||||
fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams,
|
||||
cctx->pledgedSrcSizePlusOne-1, cctx->dictID);
|
||||
FORWARD_IF_ERROR(fhSize);
|
||||
assert(fhSize <= dstCapacity);
|
||||
dstCapacity -= fhSize;
|
||||
dst = (char*)dst + fhSize;
|
||||
cctx->stage = ZSTDcs_ongoing;
|
||||
@ -3007,18 +3067,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
|
||||
|
||||
if (!frame) {
|
||||
/* overflow check and correction for block mode */
|
||||
if (ZSTD_window_needOverflowCorrection(ms->window, (const char*)src + srcSize)) {
|
||||
U32 const cycleLog = ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy);
|
||||
U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, 1 << cctx->appliedParams.cParams.windowLog, src);
|
||||
ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
|
||||
ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
|
||||
ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
|
||||
ZSTD_reduceIndex(cctx, correction);
|
||||
if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
|
||||
else ms->nextToUpdate -= correction;
|
||||
ms->loadedDictEnd = 0;
|
||||
ms->dictMatchState = NULL;
|
||||
}
|
||||
ZSTD_overflowCorrectIfNeeded(ms, &cctx->appliedParams, src, (BYTE const*)src + srcSize);
|
||||
}
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize);
|
||||
@ -3074,7 +3123,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
|
||||
const void* src, size_t srcSize,
|
||||
ZSTD_dictTableLoadMethod_e dtlm)
|
||||
{
|
||||
const BYTE* const ip = (const BYTE*) src;
|
||||
const BYTE* ip = (const BYTE*) src;
|
||||
const BYTE* const iend = ip + srcSize;
|
||||
|
||||
ZSTD_window_update(&ms->window, src, srcSize);
|
||||
@ -3085,32 +3134,42 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
|
||||
|
||||
if (srcSize <= HASH_READ_SIZE) return 0;
|
||||
|
||||
switch(params->cParams.strategy)
|
||||
{
|
||||
case ZSTD_fast:
|
||||
ZSTD_fillHashTable(ms, iend, dtlm);
|
||||
break;
|
||||
case ZSTD_dfast:
|
||||
ZSTD_fillDoubleHashTable(ms, iend, dtlm);
|
||||
break;
|
||||
while (iend - ip > HASH_READ_SIZE) {
|
||||
size_t const remaining = iend - ip;
|
||||
size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX);
|
||||
const BYTE* const ichunk = ip + chunk;
|
||||
|
||||
case ZSTD_greedy:
|
||||
case ZSTD_lazy:
|
||||
case ZSTD_lazy2:
|
||||
if (srcSize >= HASH_READ_SIZE)
|
||||
ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE);
|
||||
break;
|
||||
ZSTD_overflowCorrectIfNeeded(ms, params, ip, ichunk);
|
||||
|
||||
case ZSTD_btlazy2: /* we want the dictionary table fully sorted */
|
||||
case ZSTD_btopt:
|
||||
case ZSTD_btultra:
|
||||
case ZSTD_btultra2:
|
||||
if (srcSize >= HASH_READ_SIZE)
|
||||
ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend);
|
||||
break;
|
||||
switch(params->cParams.strategy)
|
||||
{
|
||||
case ZSTD_fast:
|
||||
ZSTD_fillHashTable(ms, ichunk, dtlm);
|
||||
break;
|
||||
case ZSTD_dfast:
|
||||
ZSTD_fillDoubleHashTable(ms, ichunk, dtlm);
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(0); /* not possible : not a valid strategy id */
|
||||
case ZSTD_greedy:
|
||||
case ZSTD_lazy:
|
||||
case ZSTD_lazy2:
|
||||
if (chunk >= HASH_READ_SIZE)
|
||||
ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE);
|
||||
break;
|
||||
|
||||
case ZSTD_btlazy2: /* we want the dictionary table fully sorted */
|
||||
case ZSTD_btopt:
|
||||
case ZSTD_btultra:
|
||||
case ZSTD_btultra2:
|
||||
if (chunk >= HASH_READ_SIZE)
|
||||
ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk);
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(0); /* not possible : not a valid strategy id */
|
||||
}
|
||||
|
||||
ip = ichunk;
|
||||
}
|
||||
|
||||
ms->nextToUpdate = (U32)(iend - ms->window.base);
|
||||
@ -3297,12 +3356,11 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
|
||||
|
||||
FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
|
||||
ZSTDcrp_continue, zbuff) );
|
||||
{
|
||||
size_t const dictID = ZSTD_compress_insertDictionary(
|
||||
{ size_t const dictID = ZSTD_compress_insertDictionary(
|
||||
cctx->blockState.prevCBlock, &cctx->blockState.matchState,
|
||||
¶ms, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace);
|
||||
FORWARD_IF_ERROR(dictID);
|
||||
assert(dictID <= (size_t)(U32)-1);
|
||||
assert(dictID <= UINT_MAX);
|
||||
cctx->dictID = (U32)dictID;
|
||||
}
|
||||
return 0;
|
||||
@ -3555,10 +3613,10 @@ static size_t ZSTD_initCDict_internal(
|
||||
|
||||
/* Reset the state to no dictionary */
|
||||
ZSTD_reset_compressedBlockState(&cdict->cBlockState);
|
||||
{ void* const end = ZSTD_reset_matchState(
|
||||
&cdict->matchState,
|
||||
(U32*)cdict->workspace + HUF_WORKSPACE_SIZE_U32,
|
||||
&cParams, ZSTDcrp_continue, /* forCCtx */ 0);
|
||||
{ void* const end = ZSTD_reset_matchState(&cdict->matchState,
|
||||
(U32*)cdict->workspace + HUF_WORKSPACE_SIZE_U32,
|
||||
&cParams,
|
||||
ZSTDcrp_continue, ZSTD_resetTarget_CDict);
|
||||
assert(end == (char*)cdict->workspace + cdict->workspaceSize);
|
||||
(void)end;
|
||||
}
|
||||
@ -4068,7 +4126,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
||||
case zcss_flush:
|
||||
DEBUGLOG(5, "flush stage");
|
||||
{ size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
|
||||
size_t const flushed = ZSTD_limitCopy(op, oend-op,
|
||||
size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op),
|
||||
zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
|
||||
DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u",
|
||||
(unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed);
|
||||
@ -4262,7 +4320,7 @@ size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
|
||||
if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */
|
||||
/* single thread mode : attempt to calculate remaining to flush more precisely */
|
||||
{ size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE;
|
||||
size_t const checksumSize = zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4;
|
||||
size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4);
|
||||
size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize;
|
||||
DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush);
|
||||
return toFlush;
|
||||
|
@ -33,13 +33,13 @@ extern "C" {
|
||||
***************************************/
|
||||
#define kSearchStrength 8
|
||||
#define HASH_READ_SIZE 8
|
||||
#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 now means "unsorted".
|
||||
#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
|
||||
It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
|
||||
It's not a big deal though : candidate will just be sorted again.
|
||||
Additionally, candidate position 1 will be lost.
|
||||
But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
|
||||
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy
|
||||
Constant required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
|
||||
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
|
||||
This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
|
||||
|
||||
|
||||
/*-*************************************
|
||||
@ -128,21 +128,20 @@ typedef struct {
|
||||
BYTE const* base; /* All regular indexes relative to this position */
|
||||
BYTE const* dictBase; /* extDict indexes relative to this position */
|
||||
U32 dictLimit; /* below that point, need extDict */
|
||||
U32 lowLimit; /* below that point, no more data */
|
||||
U32 lowLimit; /* below that point, no more valid data */
|
||||
} ZSTD_window_t;
|
||||
|
||||
typedef struct ZSTD_matchState_t ZSTD_matchState_t;
|
||||
struct ZSTD_matchState_t {
|
||||
ZSTD_window_t window; /* State for window round buffer management */
|
||||
U32 loadedDictEnd; /* index of end of dictionary */
|
||||
U32 loadedDictEnd; /* index of end of dictionary, within context's referential. When dict referential is copied into active context (i.e. not attached), effectively same value as dictSize, since referential starts from zero */
|
||||
U32 nextToUpdate; /* index from which to continue table update */
|
||||
U32 nextToUpdate3; /* index from which to continue table update */
|
||||
U32 hashLog3; /* dispatch table : larger == faster, more memory */
|
||||
U32* hashTable;
|
||||
U32* hashTable3;
|
||||
U32* chainTable;
|
||||
optState_t opt; /* optimal parser state */
|
||||
const ZSTD_matchState_t * dictMatchState;
|
||||
const ZSTD_matchState_t* dictMatchState;
|
||||
ZSTD_compressionParameters cParams;
|
||||
};
|
||||
|
||||
@ -195,6 +194,9 @@ struct ZSTD_CCtx_params_s {
|
||||
int compressionLevel;
|
||||
int forceWindow; /* force back-references to respect limit of
|
||||
* 1<<wLog, even for dictionary */
|
||||
size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize.
|
||||
* No target when targetCBlockSize == 0.
|
||||
* There is no guarantee on compressed block size */
|
||||
|
||||
ZSTD_dictAttachPref_e attachDictPref;
|
||||
ZSTD_literalCompressionMode_e literalCompressionMode;
|
||||
@ -324,7 +326,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v
|
||||
/* copy Literals */
|
||||
assert(seqStorePtr->maxNbLit <= 128 KB);
|
||||
assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
|
||||
ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
|
||||
ZSTD_wildcopy(seqStorePtr->lit, literals, litLength, ZSTD_no_overlap);
|
||||
seqStorePtr->lit += litLength;
|
||||
|
||||
/* literal Length */
|
||||
@ -564,6 +566,9 @@ MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64
|
||||
/*-*************************************
|
||||
* Round buffer management
|
||||
***************************************/
|
||||
#if (ZSTD_WINDOWLOG_MAX_64 > 31)
|
||||
# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
|
||||
#endif
|
||||
/* Max current allowed */
|
||||
#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
|
||||
/* Maximum chunk size before overflow correction needs to be called again */
|
||||
@ -675,31 +680,49 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
|
||||
* Updates lowLimit so that:
|
||||
* (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
|
||||
*
|
||||
* This allows a simple check that index >= lowLimit to see if index is valid.
|
||||
* This must be called before a block compression call, with srcEnd as the block
|
||||
* source end.
|
||||
* It ensures index is valid as long as index >= lowLimit.
|
||||
* This must be called before a block compression call.
|
||||
*
|
||||
* If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit.
|
||||
* This is because dictionaries are allowed to be referenced as long as the last
|
||||
* byte of the dictionary is in the window, but once they are out of range,
|
||||
* they cannot be referenced. If loadedDictEndPtr is NULL, we use
|
||||
* loadedDictEnd == 0.
|
||||
* loadedDictEnd is only defined if a dictionary is in use for current compression.
|
||||
* As the name implies, loadedDictEnd represents the index at end of dictionary.
|
||||
* The value lies within context's referential, it can be directly compared to blockEndIdx.
|
||||
*
|
||||
* In normal dict mode, the dict is between lowLimit and dictLimit. In
|
||||
* dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary
|
||||
* is below them. forceWindow and dictMatchState are therefore incompatible.
|
||||
* If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
|
||||
* If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
|
||||
* This is because dictionaries are allowed to be referenced fully
|
||||
* as long as the last byte of the dictionary is in the window.
|
||||
* Once input has progressed beyond window size, dictionary cannot be referenced anymore.
|
||||
*
|
||||
* In normal dict mode, the dictionary lies between lowLimit and dictLimit.
|
||||
* In dictMatchState mode, lowLimit and dictLimit are the same,
|
||||
* and the dictionary is below them.
|
||||
* forceWindow and dictMatchState are therefore incompatible.
|
||||
*/
|
||||
MEM_STATIC void
|
||||
ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
|
||||
void const* srcEnd,
|
||||
U32 maxDist,
|
||||
U32* loadedDictEndPtr,
|
||||
const void* blockEnd,
|
||||
U32 maxDist,
|
||||
U32* loadedDictEndPtr,
|
||||
const ZSTD_matchState_t** dictMatchStatePtr)
|
||||
{
|
||||
U32 const blockEndIdx = (U32)((BYTE const*)srcEnd - window->base);
|
||||
U32 loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
|
||||
DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u",
|
||||
(unsigned)blockEndIdx, (unsigned)maxDist);
|
||||
U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
|
||||
U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
|
||||
DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
|
||||
(unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
|
||||
|
||||
/* - When there is no dictionary : loadedDictEnd == 0.
|
||||
In which case, the test (blockEndIdx > maxDist) is merely to avoid
|
||||
overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
|
||||
- When there is a standard dictionary :
|
||||
Index referential is copied from the dictionary,
|
||||
which means it starts from 0.
|
||||
In which case, loadedDictEnd == dictSize,
|
||||
and it makes sense to compare `blockEndIdx > maxDist + dictSize`
|
||||
since `blockEndIdx` also starts from zero.
|
||||
- When there is an attached dictionary :
|
||||
loadedDictEnd is expressed within the referential of the context,
|
||||
so it can be directly compared against blockEndIdx.
|
||||
*/
|
||||
if (blockEndIdx > maxDist + loadedDictEnd) {
|
||||
U32 const newLowLimit = blockEndIdx - maxDist;
|
||||
if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
|
||||
@ -708,10 +731,31 @@ ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
|
||||
(unsigned)window->dictLimit, (unsigned)window->lowLimit);
|
||||
window->dictLimit = window->lowLimit;
|
||||
}
|
||||
if (loadedDictEndPtr)
|
||||
*loadedDictEndPtr = 0;
|
||||
if (dictMatchStatePtr)
|
||||
*dictMatchStatePtr = NULL;
|
||||
/* On reaching window size, dictionaries are invalidated */
|
||||
if (loadedDictEndPtr) *loadedDictEndPtr = 0;
|
||||
if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Similar to ZSTD_window_enforceMaxDist(),
|
||||
* but only invalidates dictionary
|
||||
* when input progresses beyond window size. */
|
||||
MEM_STATIC void
|
||||
ZSTD_checkDictValidity(ZSTD_window_t* window,
|
||||
const void* blockEnd,
|
||||
U32 maxDist,
|
||||
U32* loadedDictEndPtr,
|
||||
const ZSTD_matchState_t** dictMatchStatePtr)
|
||||
{
|
||||
U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
|
||||
U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
|
||||
DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
|
||||
(unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
|
||||
|
||||
if (loadedDictEnd && (blockEndIdx > maxDist + loadedDictEnd)) {
|
||||
/* On reaching window size, dictionaries are invalidated */
|
||||
if (loadedDictEndPtr) *loadedDictEndPtr = 0;
|
||||
if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -43,8 +43,7 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
|
||||
/* Only load extra positions for ZSTD_dtlm_full */
|
||||
if (dtlm == ZSTD_dtlm_fast)
|
||||
break;
|
||||
}
|
||||
}
|
||||
} }
|
||||
}
|
||||
|
||||
|
||||
@ -63,7 +62,10 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
const BYTE* const istart = (const BYTE*)src;
|
||||
const BYTE* ip = istart;
|
||||
const BYTE* anchor = istart;
|
||||
const U32 prefixLowestIndex = ms->window.dictLimit;
|
||||
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
||||
const U32 lowestValid = ms->window.dictLimit;
|
||||
const U32 maxDistance = 1U << cParams->windowLog;
|
||||
const U32 prefixLowestIndex = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid;
|
||||
const BYTE* const prefixLowest = base + prefixLowestIndex;
|
||||
const BYTE* const iend = istart + srcSize;
|
||||
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
||||
@ -95,8 +97,15 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
dictCParams->chainLog : hBitsS;
|
||||
const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart);
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
|
||||
|
||||
assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
|
||||
|
||||
/* if a dictionary is attached, it must be within window range */
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
assert(lowestValid + maxDistance >= endIndex);
|
||||
}
|
||||
|
||||
/* init */
|
||||
ip += (dictAndPrefixLength == 0);
|
||||
if (dictMode == ZSTD_noDict) {
|
||||
@ -138,7 +147,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
|
||||
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
|
||||
ip++;
|
||||
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
|
||||
goto _match_stored;
|
||||
}
|
||||
|
||||
@ -147,7 +156,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
&& ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
|
||||
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
|
||||
ip++;
|
||||
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
|
||||
goto _match_stored;
|
||||
}
|
||||
|
||||
@ -170,8 +179,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
offset = (U32)(current - dictMatchIndexL - dictIndexDelta);
|
||||
while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
|
||||
goto _match_found;
|
||||
}
|
||||
}
|
||||
} }
|
||||
|
||||
if (matchIndexS > prefixLowestIndex) {
|
||||
/* check prefix short match */
|
||||
@ -186,16 +194,14 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
|
||||
if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {
|
||||
goto _search_next_long;
|
||||
}
|
||||
}
|
||||
} }
|
||||
|
||||
ip += ((ip-anchor) >> kSearchStrength) + 1;
|
||||
continue;
|
||||
|
||||
_search_next_long:
|
||||
|
||||
{
|
||||
size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
|
||||
{ size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
|
||||
size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
|
||||
U32 const matchIndexL3 = hashLong[hl3];
|
||||
const BYTE* matchL3 = base + matchIndexL3;
|
||||
@ -221,9 +227,7 @@ _search_next_long:
|
||||
offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta);
|
||||
while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
|
||||
goto _match_found;
|
||||
}
|
||||
}
|
||||
}
|
||||
} } }
|
||||
|
||||
/* if no long +1 match, explore the short match we found */
|
||||
if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
|
||||
@ -242,7 +246,7 @@ _match_found:
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
|
||||
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
|
||||
_match_stored:
|
||||
/* match found */
|
||||
@ -250,11 +254,14 @@ _match_stored:
|
||||
anchor = ip;
|
||||
|
||||
if (ip <= ilimit) {
|
||||
/* Fill Table */
|
||||
hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] =
|
||||
hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; /* here because current+2 could be > iend-8 */
|
||||
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] =
|
||||
hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
|
||||
/* Complementary insertion */
|
||||
/* done after iLimit test, as candidates could be > iend-8 */
|
||||
{ U32 const indexToInsert = current+2;
|
||||
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
|
||||
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
|
||||
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
|
||||
hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
|
||||
}
|
||||
|
||||
/* check immediate repcode */
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
@ -278,8 +285,7 @@ _match_stored:
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
} }
|
||||
|
||||
if (dictMode == ZSTD_noDict) {
|
||||
while ( (ip <= ilimit)
|
||||
@ -294,14 +300,15 @@ _match_stored:
|
||||
ip += rLength;
|
||||
anchor = ip;
|
||||
continue; /* faster when present ... (?) */
|
||||
} } } }
|
||||
} } }
|
||||
} /* while (ip < ilimit) */
|
||||
|
||||
/* save reps for next block */
|
||||
rep[0] = offset_1 ? offset_1 : offsetSaved;
|
||||
rep[1] = offset_2 ? offset_2 : offsetSaved;
|
||||
|
||||
/* Return the last literals size */
|
||||
return iend - anchor;
|
||||
return (size_t)(iend - anchor);
|
||||
}
|
||||
|
||||
|
||||
@ -360,10 +367,15 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
const BYTE* anchor = istart;
|
||||
const BYTE* const iend = istart + srcSize;
|
||||
const BYTE* const ilimit = iend - 8;
|
||||
const U32 prefixStartIndex = ms->window.dictLimit;
|
||||
const BYTE* const base = ms->window.base;
|
||||
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
||||
const U32 maxDistance = 1U << cParams->windowLog;
|
||||
const U32 lowestValid = ms->window.lowLimit;
|
||||
const U32 lowLimit = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid;
|
||||
const U32 dictStartIndex = lowLimit;
|
||||
const U32 dictLimit = ms->window.dictLimit;
|
||||
const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
|
||||
const BYTE* const prefixStart = base + prefixStartIndex;
|
||||
const U32 dictStartIndex = ms->window.lowLimit;
|
||||
const BYTE* const dictBase = ms->window.dictBase;
|
||||
const BYTE* const dictStart = dictBase + dictStartIndex;
|
||||
const BYTE* const dictEnd = dictBase + prefixStartIndex;
|
||||
@ -371,6 +383,10 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);
|
||||
|
||||
/* if extDict is invalidated due to maxDistance, switch to "regular" variant */
|
||||
if (prefixStartIndex == dictStartIndex)
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict);
|
||||
|
||||
/* Search Loop */
|
||||
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
|
||||
const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
|
||||
@ -396,7 +412,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
||||
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
|
||||
ip++;
|
||||
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
|
||||
} else {
|
||||
if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
|
||||
const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
|
||||
@ -407,7 +423,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
|
||||
} else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
|
||||
size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
|
||||
@ -432,23 +448,27 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
}
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
|
||||
} else {
|
||||
ip += ((ip-anchor) >> kSearchStrength) + 1;
|
||||
continue;
|
||||
} }
|
||||
|
||||
/* found a match : store it */
|
||||
/* move to next sequence start */
|
||||
ip += mLength;
|
||||
anchor = ip;
|
||||
|
||||
if (ip <= ilimit) {
|
||||
/* Fill Table */
|
||||
hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2;
|
||||
hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2;
|
||||
hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
|
||||
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
|
||||
/* Complementary insertion */
|
||||
/* done after iLimit test, as candidates could be > iend-8 */
|
||||
{ U32 const indexToInsert = current+2;
|
||||
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
|
||||
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
|
||||
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
|
||||
hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
|
||||
}
|
||||
|
||||
/* check immediate repcode */
|
||||
while (ip <= ilimit) {
|
||||
U32 const current2 = (U32)(ip-base);
|
||||
@ -475,7 +495,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
rep[1] = offset_2;
|
||||
|
||||
/* Return the last literals size */
|
||||
return iend - anchor;
|
||||
return (size_t)(iend - anchor);
|
||||
}
|
||||
|
||||
|
||||
|
@ -13,7 +13,8 @@
|
||||
|
||||
|
||||
void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
|
||||
void const* end, ZSTD_dictTableLoadMethod_e dtlm)
|
||||
const void* const end,
|
||||
ZSTD_dictTableLoadMethod_e dtlm)
|
||||
{
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
U32* const hashTable = ms->hashTable;
|
||||
@ -41,6 +42,7 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
|
||||
} } } }
|
||||
}
|
||||
|
||||
|
||||
FORCE_INLINE_TEMPLATE
|
||||
size_t ZSTD_compressBlock_fast_generic(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
@ -58,7 +60,10 @@ size_t ZSTD_compressBlock_fast_generic(
|
||||
const BYTE* ip0 = istart;
|
||||
const BYTE* ip1;
|
||||
const BYTE* anchor = istart;
|
||||
const U32 prefixStartIndex = ms->window.dictLimit;
|
||||
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
||||
const U32 maxDistance = 1U << cParams->windowLog;
|
||||
const U32 validStartIndex = ms->window.dictLimit;
|
||||
const U32 prefixStartIndex = (endIndex - validStartIndex > maxDistance) ? endIndex - maxDistance : validStartIndex;
|
||||
const BYTE* const prefixStart = base + prefixStartIndex;
|
||||
const BYTE* const iend = istart + srcSize;
|
||||
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
||||
@ -165,7 +170,7 @@ _match: /* Requires: ip0, match0, offcode */
|
||||
rep[1] = offset_2 ? offset_2 : offsetSaved;
|
||||
|
||||
/* Return the last literals size */
|
||||
return iend - anchor;
|
||||
return (size_t)(iend - anchor);
|
||||
}
|
||||
|
||||
|
||||
@ -222,8 +227,15 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
||||
const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart);
|
||||
const U32 dictHLog = dictCParams->hashLog;
|
||||
|
||||
/* otherwise, we would get index underflow when translating a dict index
|
||||
* into a local index */
|
||||
/* if a dictionary is still attached, it necessarily means that
|
||||
* it is within window size. So we just check it. */
|
||||
const U32 maxDistance = 1U << cParams->windowLog;
|
||||
const U32 endIndex = (U32)((size_t)(ip - base) + srcSize);
|
||||
assert(endIndex - prefixStartIndex <= maxDistance);
|
||||
(void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */
|
||||
|
||||
/* ensure there will be no no underflow
|
||||
* when translating a dict index into a local index */
|
||||
assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
|
||||
|
||||
/* init */
|
||||
@ -251,7 +263,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
||||
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
||||
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
|
||||
ip++;
|
||||
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
|
||||
} else if ( (matchIndex <= prefixStartIndex) ) {
|
||||
size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
|
||||
U32 const dictMatchIndex = dictHashTable[dictHash];
|
||||
@ -271,7 +283,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
||||
} /* catch up */
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
}
|
||||
} else if (MEM_read32(match) != MEM_read32(ip)) {
|
||||
/* it's not a match, and we're not going to check the dictionary */
|
||||
@ -286,7 +298,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
||||
&& (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
}
|
||||
|
||||
/* match found */
|
||||
@ -327,7 +339,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
||||
rep[1] = offset_2 ? offset_2 : offsetSaved;
|
||||
|
||||
/* Return the last literals size */
|
||||
return iend - anchor;
|
||||
return (size_t)(iend - anchor);
|
||||
}
|
||||
|
||||
size_t ZSTD_compressBlock_fast_dictMatchState(
|
||||
@ -366,15 +378,24 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
||||
const BYTE* const istart = (const BYTE*)src;
|
||||
const BYTE* ip = istart;
|
||||
const BYTE* anchor = istart;
|
||||
const U32 dictStartIndex = ms->window.lowLimit;
|
||||
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
||||
const U32 maxDistance = 1U << cParams->windowLog;
|
||||
const U32 validLow = ms->window.lowLimit;
|
||||
const U32 lowLimit = (endIndex - validLow > maxDistance) ? endIndex - maxDistance : validLow;
|
||||
const U32 dictStartIndex = lowLimit;
|
||||
const BYTE* const dictStart = dictBase + dictStartIndex;
|
||||
const U32 prefixStartIndex = ms->window.dictLimit;
|
||||
const U32 dictLimit = ms->window.dictLimit;
|
||||
const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit;
|
||||
const BYTE* const prefixStart = base + prefixStartIndex;
|
||||
const BYTE* const dictEnd = dictBase + prefixStartIndex;
|
||||
const BYTE* const iend = istart + srcSize;
|
||||
const BYTE* const ilimit = iend - 8;
|
||||
U32 offset_1=rep[0], offset_2=rep[1];
|
||||
|
||||
/* switch to "regular" variant if extDict is invalidated due to maxDistance */
|
||||
if (prefixStartIndex == dictStartIndex)
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
|
||||
|
||||
/* Search Loop */
|
||||
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
|
||||
const size_t h = ZSTD_hashPtr(ip, hlog, mls);
|
||||
@ -394,7 +415,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
||||
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
||||
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
|
||||
ip++;
|
||||
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
|
||||
} else {
|
||||
if ( (matchIndex < dictStartIndex) ||
|
||||
(MEM_read32(match) != MEM_read32(ip)) ) {
|
||||
@ -410,7 +431,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
||||
offset = current - matchIndex;
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
} }
|
||||
|
||||
/* found a match : store it */
|
||||
@ -445,7 +466,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
||||
rep[1] = offset_2;
|
||||
|
||||
/* Return the last literals size */
|
||||
return iend - anchor;
|
||||
return (size_t)(iend - anchor);
|
||||
}
|
||||
|
||||
|
||||
|
@ -83,7 +83,10 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
|
||||
U32* largerPtr = smallerPtr + 1;
|
||||
U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
|
||||
U32 dummy32; /* to be nullified at the end */
|
||||
U32 const windowLow = ms->window.lowLimit;
|
||||
U32 const windowValid = ms->window.lowLimit;
|
||||
U32 const maxDistance = 1U << cParams->windowLog;
|
||||
U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
|
||||
|
||||
|
||||
DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
|
||||
current, dictLimit, windowLow);
|
||||
@ -239,7 +242,9 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
|
||||
|
||||
const BYTE* const base = ms->window.base;
|
||||
U32 const current = (U32)(ip-base);
|
||||
U32 const windowLow = ms->window.lowLimit;
|
||||
U32 const maxDistance = 1U << cParams->windowLog;
|
||||
U32 const windowValid = ms->window.lowLimit;
|
||||
U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
|
||||
|
||||
U32* const bt = ms->chainTable;
|
||||
U32 const btLog = cParams->chainLog - 1;
|
||||
@ -490,8 +495,10 @@ size_t ZSTD_HcFindBestMatch_generic (
|
||||
const U32 dictLimit = ms->window.dictLimit;
|
||||
const BYTE* const prefixStart = base + dictLimit;
|
||||
const BYTE* const dictEnd = dictBase + dictLimit;
|
||||
const U32 lowLimit = ms->window.lowLimit;
|
||||
const U32 current = (U32)(ip-base);
|
||||
const U32 maxDistance = 1U << cParams->windowLog;
|
||||
const U32 lowValid = ms->window.lowLimit;
|
||||
const U32 lowLimit = (current - lowValid > maxDistance) ? current - maxDistance : lowValid;
|
||||
const U32 minChain = current > chainSize ? current - chainSize : 0;
|
||||
U32 nbAttempts = 1U << cParams->searchLog;
|
||||
size_t ml=4-1;
|
||||
@ -653,7 +660,6 @@ size_t ZSTD_compressBlock_lazy_generic(
|
||||
|
||||
/* init */
|
||||
ip += (dictAndPrefixLength == 0);
|
||||
ms->nextToUpdate3 = ms->nextToUpdate;
|
||||
if (dictMode == ZSTD_noDict) {
|
||||
U32 const maxRep = (U32)(ip - prefixLowest);
|
||||
if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
|
||||
@ -933,7 +939,6 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
||||
U32 offset_1 = rep[0], offset_2 = rep[1];
|
||||
|
||||
/* init */
|
||||
ms->nextToUpdate3 = ms->nextToUpdate;
|
||||
ip += (ip == prefixStart);
|
||||
|
||||
/* Match Loop */
|
||||
|
@ -447,7 +447,7 @@ size_t ZSTD_ldm_generateSequences(
|
||||
if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
|
||||
U32 const ldmHSize = 1U << params->hashLog;
|
||||
U32 const correction = ZSTD_window_correctOverflow(
|
||||
&ldmState->window, /* cycleLog */ 0, maxDist, src);
|
||||
&ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
|
||||
ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
|
||||
}
|
||||
/* 2. We enforce the maximum offset allowed.
|
||||
|
@ -255,13 +255,13 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
|
||||
* to provide a cost which is directly comparable to a match ending at same position */
|
||||
static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel)
|
||||
{
|
||||
if (optPtr->priceType >= zop_predef) return WEIGHT(litLength, optLevel);
|
||||
if (optPtr->priceType >= zop_predef) return (int)WEIGHT(litLength, optLevel);
|
||||
|
||||
/* dynamic statistics */
|
||||
{ U32 const llCode = ZSTD_LLcode(litLength);
|
||||
int const contribution = (LL_bits[llCode] * BITCOST_MULTIPLIER)
|
||||
+ WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */
|
||||
- WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
|
||||
int const contribution = (int)(LL_bits[llCode] * BITCOST_MULTIPLIER)
|
||||
+ (int)WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */
|
||||
- (int)WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
|
||||
#if 1
|
||||
return contribution;
|
||||
#else
|
||||
@ -278,7 +278,7 @@ static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLe
|
||||
const optState_t* const optPtr,
|
||||
int optLevel)
|
||||
{
|
||||
int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel)
|
||||
int const contribution = (int)ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel)
|
||||
+ ZSTD_litLengthContribution(litLength, optPtr, optLevel);
|
||||
return contribution;
|
||||
}
|
||||
@ -372,13 +372,15 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
|
||||
|
||||
/* Update hashTable3 up to ip (excluded)
|
||||
Assumption : always within prefix (i.e. not within extDict) */
|
||||
static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, const BYTE* const ip)
|
||||
static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
|
||||
U32* nextToUpdate3,
|
||||
const BYTE* const ip)
|
||||
{
|
||||
U32* const hashTable3 = ms->hashTable3;
|
||||
U32 const hashLog3 = ms->hashLog3;
|
||||
const BYTE* const base = ms->window.base;
|
||||
U32 idx = ms->nextToUpdate3;
|
||||
U32 const target = ms->nextToUpdate3 = (U32)(ip - base);
|
||||
U32 idx = *nextToUpdate3;
|
||||
U32 const target = (U32)(ip - base);
|
||||
size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
|
||||
assert(hashLog3 > 0);
|
||||
|
||||
@ -387,6 +389,7 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, const BYTE*
|
||||
idx++;
|
||||
}
|
||||
|
||||
*nextToUpdate3 = target;
|
||||
return hashTable3[hash3];
|
||||
}
|
||||
|
||||
@ -503,9 +506,11 @@ static U32 ZSTD_insertBt1(
|
||||
} }
|
||||
|
||||
*smallerPtr = *largerPtr = 0;
|
||||
if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */
|
||||
assert(matchEndIdx > current + 8);
|
||||
return matchEndIdx - (current + 8);
|
||||
{ U32 positions = 0;
|
||||
if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */
|
||||
assert(matchEndIdx > current + 8);
|
||||
return MAX(positions, matchEndIdx - (current + 8));
|
||||
}
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE
|
||||
@ -520,8 +525,13 @@ void ZSTD_updateTree_internal(
|
||||
DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
|
||||
idx, target, dictMode);
|
||||
|
||||
while(idx < target)
|
||||
idx += ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
|
||||
while(idx < target) {
|
||||
U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
|
||||
assert(idx < (U32)(idx + forward));
|
||||
idx += forward;
|
||||
}
|
||||
assert((size_t)(ip - base) <= (size_t)(U32)(-1));
|
||||
assert((size_t)(iend - base) <= (size_t)(U32)(-1));
|
||||
ms->nextToUpdate = target;
|
||||
}
|
||||
|
||||
@ -531,16 +541,18 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
|
||||
|
||||
FORCE_INLINE_TEMPLATE
|
||||
U32 ZSTD_insertBtAndGetAllMatches (
|
||||
ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
|
||||
ZSTD_matchState_t* ms,
|
||||
U32* nextToUpdate3,
|
||||
const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
|
||||
U32 rep[ZSTD_REP_NUM],
|
||||
const U32 rep[ZSTD_REP_NUM],
|
||||
U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
|
||||
ZSTD_match_t* matches,
|
||||
const U32 lengthToBeat,
|
||||
U32 const mls /* template */)
|
||||
{
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
|
||||
U32 const maxDistance = 1U << cParams->windowLog;
|
||||
const BYTE* const base = ms->window.base;
|
||||
U32 const current = (U32)(ip-base);
|
||||
U32 const hashLog = cParams->hashLog;
|
||||
@ -556,8 +568,9 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
||||
U32 const dictLimit = ms->window.dictLimit;
|
||||
const BYTE* const dictEnd = dictBase + dictLimit;
|
||||
const BYTE* const prefixStart = base + dictLimit;
|
||||
U32 const btLow = btMask >= current ? 0 : current - btMask;
|
||||
U32 const windowLow = ms->window.lowLimit;
|
||||
U32 const btLow = (btMask >= current) ? 0 : current - btMask;
|
||||
U32 const windowValid = ms->window.lowLimit;
|
||||
U32 const windowLow = ((current - windowValid) > maxDistance) ? current - maxDistance : windowValid;
|
||||
U32 const matchLow = windowLow ? windowLow : 1;
|
||||
U32* smallerPtr = bt + 2*(current&btMask);
|
||||
U32* largerPtr = bt + 2*(current&btMask) + 1;
|
||||
@ -627,7 +640,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
||||
|
||||
/* HC3 match finder */
|
||||
if ((mls == 3) /*static*/ && (bestLength < mls)) {
|
||||
U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, ip);
|
||||
U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
|
||||
if ((matchIndex3 >= matchLow)
|
||||
& (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
|
||||
size_t mlen;
|
||||
@ -653,9 +666,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
||||
(ip+mlen == iLimit) ) { /* best possible length */
|
||||
ms->nextToUpdate = current+1; /* skip insertion */
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
} } }
|
||||
/* no dictMatchState lookup: dicts don't have a populated HC3 table */
|
||||
}
|
||||
|
||||
@ -760,10 +771,13 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
||||
|
||||
|
||||
FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
|
||||
ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */
|
||||
ZSTD_matchState_t* ms,
|
||||
U32* nextToUpdate3,
|
||||
const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
|
||||
U32 rep[ZSTD_REP_NUM], U32 const ll0,
|
||||
ZSTD_match_t* matches, U32 const lengthToBeat)
|
||||
const U32 rep[ZSTD_REP_NUM],
|
||||
U32 const ll0,
|
||||
U32 const lengthToBeat)
|
||||
{
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
U32 const matchLengthSearch = cParams->minMatch;
|
||||
@ -772,12 +786,12 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
|
||||
ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
|
||||
switch(matchLengthSearch)
|
||||
{
|
||||
case 3 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 3);
|
||||
case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3);
|
||||
default :
|
||||
case 4 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 4);
|
||||
case 5 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 5);
|
||||
case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4);
|
||||
case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5);
|
||||
case 7 :
|
||||
case 6 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 6);
|
||||
case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6);
|
||||
}
|
||||
}
|
||||
|
||||
@ -853,6 +867,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
||||
|
||||
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
|
||||
U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
|
||||
U32 nextToUpdate3 = ms->nextToUpdate;
|
||||
|
||||
ZSTD_optimal_t* const opt = optStatePtr->priceTable;
|
||||
ZSTD_match_t* const matches = optStatePtr->matchTable;
|
||||
@ -862,7 +877,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
|
||||
(U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
|
||||
assert(optLevel <= 2);
|
||||
ms->nextToUpdate3 = ms->nextToUpdate;
|
||||
ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
|
||||
ip += (ip==prefixStart);
|
||||
|
||||
@ -873,7 +887,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
||||
/* find first match */
|
||||
{ U32 const litlen = (U32)(ip - anchor);
|
||||
U32 const ll0 = !litlen;
|
||||
U32 const nbMatches = ZSTD_BtGetAllMatches(ms, ip, iend, dictMode, rep, ll0, matches, minMatch);
|
||||
U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
|
||||
if (!nbMatches) { ip++; continue; }
|
||||
|
||||
/* initialize opt[0] */
|
||||
@ -970,7 +984,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
||||
U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
|
||||
U32 const previousPrice = opt[cur].price;
|
||||
U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
|
||||
U32 const nbMatches = ZSTD_BtGetAllMatches(ms, inr, iend, dictMode, opt[cur].rep, ll0, matches, minMatch);
|
||||
U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
|
||||
U32 matchNb;
|
||||
if (!nbMatches) {
|
||||
DEBUGLOG(7, "rPos:%u : no match found", cur);
|
||||
@ -1094,7 +1108,7 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
||||
} /* while (ip < ilimit) */
|
||||
|
||||
/* Return the last literals size */
|
||||
return iend - anchor;
|
||||
return (size_t)(iend - anchor);
|
||||
}
|
||||
|
||||
|
||||
@ -1158,7 +1172,6 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
|
||||
ms->window.dictLimit += (U32)srcSize;
|
||||
ms->window.lowLimit = ms->window.dictLimit;
|
||||
ms->nextToUpdate = ms->window.dictLimit;
|
||||
ms->nextToUpdate3 = ms->window.dictLimit;
|
||||
|
||||
/* re-inforce weight of collected statistics */
|
||||
ZSTD_upscaleStats(&ms->opt);
|
||||
|
@ -1129,9 +1129,14 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
|
||||
size_t const produced = ZSTD_isError(cResult) ? 0 : cResult;
|
||||
size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed;
|
||||
assert(flushed <= produced);
|
||||
assert(jobPtr->consumed <= jobPtr->src.size);
|
||||
toFlush = produced - flushed;
|
||||
if (toFlush==0 && (jobPtr->consumed >= jobPtr->src.size)) {
|
||||
/* doneJobID is not-fully-flushed, but toFlush==0 : doneJobID should be compressing some more data */
|
||||
/* if toFlush==0, nothing is available to flush.
|
||||
* However, jobID is expected to still be active:
|
||||
* if jobID was already completed and fully flushed,
|
||||
* ZSTDMT_flushProduced() should have already moved onto next job.
|
||||
* Therefore, some input has not yet been consumed. */
|
||||
if (toFlush==0) {
|
||||
assert(jobPtr->consumed < jobPtr->src.size);
|
||||
}
|
||||
}
|
||||
@ -1148,12 +1153,16 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
|
||||
|
||||
static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
|
||||
{
|
||||
if (params.ldmParams.enableLdm)
|
||||
unsigned jobLog;
|
||||
if (params.ldmParams.enableLdm) {
|
||||
/* In Long Range Mode, the windowLog is typically oversized.
|
||||
* In which case, it's preferable to determine the jobSize
|
||||
* based on chainLog instead. */
|
||||
return MAX(21, params.cParams.chainLog + 4);
|
||||
return MAX(20, params.cParams.windowLog + 2);
|
||||
jobLog = MAX(21, params.cParams.chainLog + 4);
|
||||
} else {
|
||||
jobLog = MAX(20, params.cParams.windowLog + 2);
|
||||
}
|
||||
return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX);
|
||||
}
|
||||
|
||||
static int ZSTDMT_overlapLog_default(ZSTD_strategy strat)
|
||||
@ -1197,7 +1206,7 @@ static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params const params)
|
||||
ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
|
||||
- overlapRLog;
|
||||
}
|
||||
assert(0 <= ovLog && ovLog <= 30);
|
||||
assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX);
|
||||
DEBUGLOG(4, "overlapLog : %i", params.overlapLog);
|
||||
DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
|
||||
return (ovLog==0) ? 0 : (size_t)1 << ovLog;
|
||||
@ -1391,7 +1400,7 @@ size_t ZSTDMT_initCStream_internal(
|
||||
FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) );
|
||||
|
||||
if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
|
||||
if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
|
||||
if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX;
|
||||
|
||||
mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
|
||||
if (mtctx->singleBlockingThread) {
|
||||
@ -1432,6 +1441,8 @@ size_t ZSTDMT_initCStream_internal(
|
||||
if (mtctx->targetSectionSize == 0) {
|
||||
mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
|
||||
}
|
||||
assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX);
|
||||
|
||||
if (params.rsyncable) {
|
||||
/* Aim for the targetsectionSize as the average job size. */
|
||||
U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);
|
||||
|
@ -50,6 +50,7 @@
|
||||
#ifndef ZSTDMT_JOBSIZE_MIN
|
||||
# define ZSTDMT_JOBSIZE_MIN (1 MB)
|
||||
#endif
|
||||
#define ZSTDMT_JOBLOG_MAX (MEM_32bits() ? 29 : 30)
|
||||
#define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB))
|
||||
|
||||
|
||||
|
@ -360,8 +360,11 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize)
|
||||
sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
|
||||
RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
|
||||
frameParameter_unsupported);
|
||||
|
||||
return skippableHeaderSize + sizeU32;
|
||||
{
|
||||
size_t const skippableSize = skippableHeaderSize + sizeU32;
|
||||
RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong);
|
||||
return skippableSize;
|
||||
}
|
||||
}
|
||||
|
||||
/** ZSTD_findDecompressedSize() :
|
||||
@ -378,11 +381,10 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
|
||||
|
||||
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
|
||||
size_t const skippableSize = readSkippableFrameSize(src, srcSize);
|
||||
if (ZSTD_isError(skippableSize))
|
||||
return skippableSize;
|
||||
if (srcSize < skippableSize) {
|
||||
if (ZSTD_isError(skippableSize)) {
|
||||
return ZSTD_CONTENTSIZE_ERROR;
|
||||
}
|
||||
assert(skippableSize <= srcSize);
|
||||
|
||||
src = (const BYTE *)src + skippableSize;
|
||||
srcSize -= skippableSize;
|
||||
@ -467,6 +469,8 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize
|
||||
if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
|
||||
&& (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
|
||||
frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize);
|
||||
assert(ZSTD_isError(frameSizeInfo.compressedSize) ||
|
||||
frameSizeInfo.compressedSize <= srcSize);
|
||||
return frameSizeInfo;
|
||||
} else {
|
||||
const BYTE* ip = (const BYTE*)src;
|
||||
@ -529,7 +533,6 @@ size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
|
||||
return frameSizeInfo.compressedSize;
|
||||
}
|
||||
|
||||
|
||||
/** ZSTD_decompressBound() :
|
||||
* compatible with legacy mode
|
||||
* `src` must point to the start of a ZSTD frame or a skippeable frame
|
||||
@ -546,6 +549,7 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
|
||||
unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
|
||||
if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
|
||||
return ZSTD_CONTENTSIZE_ERROR;
|
||||
assert(srcSize >= compressedSize);
|
||||
src = (const BYTE*)src + compressedSize;
|
||||
srcSize -= compressedSize;
|
||||
bound += decompressedBound;
|
||||
@ -738,9 +742,8 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
|
||||
(unsigned)magicNumber, ZSTD_MAGICNUMBER);
|
||||
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
|
||||
size_t const skippableSize = readSkippableFrameSize(src, srcSize);
|
||||
if (ZSTD_isError(skippableSize))
|
||||
return skippableSize;
|
||||
RETURN_ERROR_IF(srcSize < skippableSize, srcSize_wrong);
|
||||
FORWARD_IF_ERROR(skippableSize);
|
||||
assert(skippableSize <= srcSize);
|
||||
|
||||
src = (const BYTE *)src + skippableSize;
|
||||
srcSize -= skippableSize;
|
||||
|
@ -505,7 +505,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
||||
*nbSeqPtr = nbSeq;
|
||||
|
||||
/* FSE table descriptors */
|
||||
RETURN_ERROR_IF(ip+4 > iend, srcSize_wrong); /* minimum possible size */
|
||||
RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong); /* minimum possible size: 1 byte for symbol encoding types */
|
||||
{ symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
|
||||
symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
|
||||
symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
|
||||
@ -637,9 +637,10 @@ size_t ZSTD_execSequence(BYTE* op,
|
||||
if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
|
||||
|
||||
/* copy Literals */
|
||||
ZSTD_copy8(op, *litPtr);
|
||||
if (sequence.litLength > 8)
|
||||
ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
|
||||
ZSTD_wildcopy_16min(op, (*litPtr), sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
|
||||
else
|
||||
ZSTD_copy8(op, *litPtr);
|
||||
op = oLitEnd;
|
||||
*litPtr = iLitEnd; /* update for next sequence */
|
||||
|
||||
@ -686,13 +687,13 @@ size_t ZSTD_execSequence(BYTE* op,
|
||||
|
||||
if (oMatchEnd > oend-(16-MINMATCH)) {
|
||||
if (op < oend_w) {
|
||||
ZSTD_wildcopy(op, match, oend_w - op);
|
||||
ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
|
||||
match += oend_w - op;
|
||||
op = oend_w;
|
||||
}
|
||||
while (op < oMatchEnd) *op++ = *match++;
|
||||
} else {
|
||||
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
|
||||
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
|
||||
}
|
||||
return sequenceLength;
|
||||
}
|
||||
@ -717,9 +718,11 @@ size_t ZSTD_execSequenceLong(BYTE* op,
|
||||
if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
|
||||
|
||||
/* copy Literals */
|
||||
ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
|
||||
if (sequence.litLength > 8)
|
||||
ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
|
||||
ZSTD_wildcopy_16min(op, *litPtr, sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
|
||||
else
|
||||
ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
|
||||
|
||||
op = oLitEnd;
|
||||
*litPtr = iLitEnd; /* update for next sequence */
|
||||
|
||||
@ -766,13 +769,13 @@ size_t ZSTD_execSequenceLong(BYTE* op,
|
||||
|
||||
if (oMatchEnd > oend-(16-MINMATCH)) {
|
||||
if (op < oend_w) {
|
||||
ZSTD_wildcopy(op, match, oend_w - op);
|
||||
ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
|
||||
match += oend_w - op;
|
||||
op = oend_w;
|
||||
}
|
||||
while (op < oMatchEnd) *op++ = *match++;
|
||||
} else {
|
||||
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
|
||||
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
|
||||
}
|
||||
return sequenceLength;
|
||||
}
|
||||
@ -889,6 +892,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE size_t
|
||||
DONT_VECTORIZE
|
||||
ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
||||
void* dst, size_t maxDstSize,
|
||||
const void* seqStart, size_t seqSize, int nbSeq,
|
||||
@ -918,6 +922,11 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
||||
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
|
||||
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
|
||||
|
||||
ZSTD_STATIC_ASSERT(
|
||||
BIT_DStream_unfinished < BIT_DStream_completed &&
|
||||
BIT_DStream_endOfBuffer < BIT_DStream_completed &&
|
||||
BIT_DStream_completed < BIT_DStream_overflow);
|
||||
|
||||
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
|
||||
nbSeq--;
|
||||
{ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
||||
@ -930,6 +939,7 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
||||
/* check if reached exact end */
|
||||
DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
|
||||
RETURN_ERROR_IF(nbSeq, corruption_detected);
|
||||
RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected);
|
||||
/* save reps for next block */
|
||||
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
|
||||
}
|
||||
@ -1131,6 +1141,7 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
|
||||
|
||||
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
||||
static TARGET_ATTRIBUTE("bmi2") size_t
|
||||
DONT_VECTORIZE
|
||||
ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
|
||||
void* dst, size_t maxDstSize,
|
||||
const void* seqStart, size_t seqSize, int nbSeq,
|
||||
|
@ -526,10 +526,10 @@ static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
|
||||
* Prepare a context for dictionary building.
|
||||
* The context is only dependent on the parameter `d` and can used multiple
|
||||
* times.
|
||||
* Returns 1 on success or zero on error.
|
||||
* Returns 0 on success or error code on error.
|
||||
* The context must be destroyed with `COVER_ctx_destroy()`.
|
||||
*/
|
||||
static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
||||
static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
||||
const size_t *samplesSizes, unsigned nbSamples,
|
||||
unsigned d, double splitPoint) {
|
||||
const BYTE *const samples = (const BYTE *)samplesBuffer;
|
||||
@ -544,17 +544,17 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
||||
totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
|
||||
DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
|
||||
(unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
|
||||
return 0;
|
||||
return ERROR(srcSize_wrong);
|
||||
}
|
||||
/* Check if there are at least 5 training samples */
|
||||
if (nbTrainSamples < 5) {
|
||||
DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
|
||||
return 0;
|
||||
return ERROR(srcSize_wrong);
|
||||
}
|
||||
/* Check if there's testing sample */
|
||||
if (nbTestSamples < 1) {
|
||||
DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
|
||||
return 0;
|
||||
return ERROR(srcSize_wrong);
|
||||
}
|
||||
/* Zero the context */
|
||||
memset(ctx, 0, sizeof(*ctx));
|
||||
@ -577,7 +577,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
||||
if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) {
|
||||
DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
|
||||
COVER_ctx_destroy(ctx);
|
||||
return 0;
|
||||
return ERROR(memory_allocation);
|
||||
}
|
||||
ctx->freqs = NULL;
|
||||
ctx->d = d;
|
||||
@ -624,7 +624,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
||||
(ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group);
|
||||
ctx->freqs = ctx->suffix;
|
||||
ctx->suffix = NULL;
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
|
||||
@ -729,11 +729,11 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
||||
/* Checks */
|
||||
if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
|
||||
DISPLAYLEVEL(1, "Cover parameters incorrect\n");
|
||||
return ERROR(GENERIC);
|
||||
return ERROR(parameter_outOfBound);
|
||||
}
|
||||
if (nbSamples == 0) {
|
||||
DISPLAYLEVEL(1, "Cover must have at least one input file\n");
|
||||
return ERROR(GENERIC);
|
||||
return ERROR(srcSize_wrong);
|
||||
}
|
||||
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
|
||||
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
|
||||
@ -741,15 +741,18 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
||||
return ERROR(dstSize_tooSmall);
|
||||
}
|
||||
/* Initialize context and activeDmers */
|
||||
if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
|
||||
parameters.d, parameters.splitPoint)) {
|
||||
return ERROR(GENERIC);
|
||||
{
|
||||
size_t const initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
|
||||
parameters.d, parameters.splitPoint);
|
||||
if (ZSTD_isError(initVal)) {
|
||||
return initVal;
|
||||
}
|
||||
}
|
||||
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel);
|
||||
if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
|
||||
DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
|
||||
COVER_ctx_destroy(&ctx);
|
||||
return ERROR(GENERIC);
|
||||
return ERROR(memory_allocation);
|
||||
}
|
||||
|
||||
DISPLAYLEVEL(2, "Building dictionary\n");
|
||||
@ -810,7 +813,7 @@ size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
|
||||
cctx, dst, dstCapacity, samples + offsets[i],
|
||||
samplesSizes[i], cdict);
|
||||
if (ZSTD_isError(size)) {
|
||||
totalCompressedSize = ERROR(GENERIC);
|
||||
totalCompressedSize = size;
|
||||
goto _compressCleanup;
|
||||
}
|
||||
totalCompressedSize += size;
|
||||
@ -886,9 +889,11 @@ void COVER_best_start(COVER_best_t *best) {
|
||||
* Decrements liveJobs and signals any waiting threads if liveJobs == 0.
|
||||
* If this dictionary is the best so far save it and its parameters.
|
||||
*/
|
||||
void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
|
||||
ZDICT_cover_params_t parameters, void *dict,
|
||||
size_t dictSize) {
|
||||
void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
|
||||
COVER_dictSelection_t selection) {
|
||||
void* dict = selection.dictContent;
|
||||
size_t compressedSize = selection.totalCompressedSize;
|
||||
size_t dictSize = selection.dictSize;
|
||||
if (!best) {
|
||||
return;
|
||||
}
|
||||
@ -914,6 +919,9 @@ void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
|
||||
}
|
||||
}
|
||||
/* Save the dictionary, parameters, and size */
|
||||
if (!dict) {
|
||||
return;
|
||||
}
|
||||
memcpy(best->dict, dict, dictSize);
|
||||
best->dictSize = dictSize;
|
||||
best->parameters = parameters;
|
||||
@ -926,6 +934,111 @@ void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
|
||||
}
|
||||
}
|
||||
|
||||
COVER_dictSelection_t COVER_dictSelectionError(size_t error) {
|
||||
COVER_dictSelection_t selection = { NULL, 0, error };
|
||||
return selection;
|
||||
}
|
||||
|
||||
unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) {
|
||||
return (ZSTD_isError(selection.totalCompressedSize) || !selection.dictContent);
|
||||
}
|
||||
|
||||
void COVER_dictSelectionFree(COVER_dictSelection_t selection){
|
||||
free(selection.dictContent);
|
||||
}
|
||||
|
||||
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
|
||||
size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
|
||||
size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) {
|
||||
|
||||
size_t largestDict = 0;
|
||||
size_t largestCompressed = 0;
|
||||
BYTE* customDictContentEnd = customDictContent + dictContentSize;
|
||||
|
||||
BYTE * largestDictbuffer = (BYTE *)malloc(dictContentSize);
|
||||
BYTE * candidateDictBuffer = (BYTE *)malloc(dictContentSize);
|
||||
double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
|
||||
|
||||
if (!largestDictbuffer || !candidateDictBuffer) {
|
||||
free(largestDictbuffer);
|
||||
free(candidateDictBuffer);
|
||||
return COVER_dictSelectionError(dictContentSize);
|
||||
}
|
||||
|
||||
/* Initial dictionary size and compressed size */
|
||||
memcpy(largestDictbuffer, customDictContent, dictContentSize);
|
||||
dictContentSize = ZDICT_finalizeDictionary(
|
||||
largestDictbuffer, dictContentSize, customDictContent, dictContentSize,
|
||||
samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
|
||||
|
||||
if (ZDICT_isError(dictContentSize)) {
|
||||
free(largestDictbuffer);
|
||||
free(candidateDictBuffer);
|
||||
return COVER_dictSelectionError(dictContentSize);
|
||||
}
|
||||
|
||||
totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
|
||||
samplesBuffer, offsets,
|
||||
nbCheckSamples, nbSamples,
|
||||
largestDictbuffer, dictContentSize);
|
||||
|
||||
if (ZSTD_isError(totalCompressedSize)) {
|
||||
free(largestDictbuffer);
|
||||
free(candidateDictBuffer);
|
||||
return COVER_dictSelectionError(totalCompressedSize);
|
||||
}
|
||||
|
||||
if (params.shrinkDict == 0) {
|
||||
COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
|
||||
free(candidateDictBuffer);
|
||||
return selection;
|
||||
}
|
||||
|
||||
largestDict = dictContentSize;
|
||||
largestCompressed = totalCompressedSize;
|
||||
dictContentSize = ZDICT_DICTSIZE_MIN;
|
||||
|
||||
/* Largest dict is initially at least ZDICT_DICTSIZE_MIN */
|
||||
while (dictContentSize < largestDict) {
|
||||
memcpy(candidateDictBuffer, largestDictbuffer, largestDict);
|
||||
dictContentSize = ZDICT_finalizeDictionary(
|
||||
candidateDictBuffer, dictContentSize, customDictContentEnd - dictContentSize, dictContentSize,
|
||||
samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
|
||||
|
||||
if (ZDICT_isError(dictContentSize)) {
|
||||
free(largestDictbuffer);
|
||||
free(candidateDictBuffer);
|
||||
return COVER_dictSelectionError(dictContentSize);
|
||||
|
||||
}
|
||||
|
||||
totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
|
||||
samplesBuffer, offsets,
|
||||
nbCheckSamples, nbSamples,
|
||||
candidateDictBuffer, dictContentSize);
|
||||
|
||||
if (ZSTD_isError(totalCompressedSize)) {
|
||||
free(largestDictbuffer);
|
||||
free(candidateDictBuffer);
|
||||
return COVER_dictSelectionError(totalCompressedSize);
|
||||
}
|
||||
|
||||
if (totalCompressedSize <= largestCompressed * regressionTolerance) {
|
||||
COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize };
|
||||
free(largestDictbuffer);
|
||||
return selection;
|
||||
}
|
||||
dictContentSize *= 2;
|
||||
}
|
||||
dictContentSize = largestDict;
|
||||
totalCompressedSize = largestCompressed;
|
||||
{
|
||||
COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
|
||||
free(candidateDictBuffer);
|
||||
return selection;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parameters for COVER_tryParameters().
|
||||
*/
|
||||
@ -951,6 +1064,7 @@ static void COVER_tryParameters(void *opaque) {
|
||||
/* Allocate space for hash table, dict, and freqs */
|
||||
COVER_map_t activeDmers;
|
||||
BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
|
||||
COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
|
||||
U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
|
||||
if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
|
||||
DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
|
||||
@ -966,29 +1080,21 @@ static void COVER_tryParameters(void *opaque) {
|
||||
{
|
||||
const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
|
||||
dictBufferCapacity, parameters);
|
||||
dictBufferCapacity = ZDICT_finalizeDictionary(
|
||||
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
|
||||
ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples,
|
||||
parameters.zParams);
|
||||
if (ZDICT_isError(dictBufferCapacity)) {
|
||||
DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
|
||||
selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
|
||||
ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
|
||||
totalCompressedSize);
|
||||
|
||||
if (COVER_dictSelectionIsError(selection)) {
|
||||
DISPLAYLEVEL(1, "Failed to select dictionary\n");
|
||||
goto _cleanup;
|
||||
}
|
||||
}
|
||||
/* Check total compressed size */
|
||||
totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes,
|
||||
ctx->samples, ctx->offsets,
|
||||
ctx->nbTrainSamples, ctx->nbSamples,
|
||||
dict, dictBufferCapacity);
|
||||
|
||||
_cleanup:
|
||||
COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
|
||||
dictBufferCapacity);
|
||||
free(dict);
|
||||
COVER_best_finish(data->best, parameters, selection);
|
||||
free(data);
|
||||
COVER_map_destroy(&activeDmers);
|
||||
if (dict) {
|
||||
free(dict);
|
||||
}
|
||||
COVER_dictSelectionFree(selection);
|
||||
if (freqs) {
|
||||
free(freqs);
|
||||
}
|
||||
@ -1010,6 +1116,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
||||
const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
|
||||
const unsigned kIterations =
|
||||
(1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
|
||||
const unsigned shrinkDict = 0;
|
||||
/* Local variables */
|
||||
const int displayLevel = parameters->zParams.notificationLevel;
|
||||
unsigned iteration = 1;
|
||||
@ -1022,15 +1129,15 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
||||
/* Checks */
|
||||
if (splitPoint <= 0 || splitPoint > 1) {
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
|
||||
return ERROR(GENERIC);
|
||||
return ERROR(parameter_outOfBound);
|
||||
}
|
||||
if (kMinK < kMaxD || kMaxK < kMinK) {
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
|
||||
return ERROR(GENERIC);
|
||||
return ERROR(parameter_outOfBound);
|
||||
}
|
||||
if (nbSamples == 0) {
|
||||
DISPLAYLEVEL(1, "Cover must have at least one input file\n");
|
||||
return ERROR(GENERIC);
|
||||
return ERROR(srcSize_wrong);
|
||||
}
|
||||
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
|
||||
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
|
||||
@ -1054,11 +1161,14 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
||||
/* Initialize the context for this value of d */
|
||||
COVER_ctx_t ctx;
|
||||
LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
|
||||
if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint)) {
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
|
||||
COVER_best_destroy(&best);
|
||||
POOL_free(pool);
|
||||
return ERROR(GENERIC);
|
||||
{
|
||||
const size_t initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint);
|
||||
if (ZSTD_isError(initVal)) {
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
|
||||
COVER_best_destroy(&best);
|
||||
POOL_free(pool);
|
||||
return initVal;
|
||||
}
|
||||
}
|
||||
if (!warned) {
|
||||
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel);
|
||||
@ -1075,7 +1185,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
||||
COVER_best_destroy(&best);
|
||||
COVER_ctx_destroy(&ctx);
|
||||
POOL_free(pool);
|
||||
return ERROR(GENERIC);
|
||||
return ERROR(memory_allocation);
|
||||
}
|
||||
data->ctx = &ctx;
|
||||
data->best = &best;
|
||||
@ -1085,6 +1195,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
||||
data->parameters.d = d;
|
||||
data->parameters.splitPoint = splitPoint;
|
||||
data->parameters.steps = kSteps;
|
||||
data->parameters.shrinkDict = shrinkDict;
|
||||
data->parameters.zParams.notificationLevel = g_displayLevel;
|
||||
/* Check the parameters */
|
||||
if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) {
|
||||
|
@ -46,6 +46,15 @@ typedef struct {
|
||||
U32 size;
|
||||
} COVER_epoch_info_t;
|
||||
|
||||
/**
|
||||
* Struct used for the dictionary selection function.
|
||||
*/
|
||||
typedef struct COVER_dictSelection {
|
||||
BYTE* dictContent;
|
||||
size_t dictSize;
|
||||
size_t totalCompressedSize;
|
||||
} COVER_dictSelection_t;
|
||||
|
||||
/**
|
||||
* Computes the number of epochs and the size of each epoch.
|
||||
* We will make sure that each epoch gets at least 10 * k bytes.
|
||||
@ -107,6 +116,32 @@ void COVER_best_start(COVER_best_t *best);
|
||||
* Decrements liveJobs and signals any waiting threads if liveJobs == 0.
|
||||
* If this dictionary is the best so far save it and its parameters.
|
||||
*/
|
||||
void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
|
||||
ZDICT_cover_params_t parameters, void *dict,
|
||||
size_t dictSize);
|
||||
void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
|
||||
COVER_dictSelection_t selection);
|
||||
/**
|
||||
* Error function for COVER_selectDict function. Checks if the return
|
||||
* value is an error.
|
||||
*/
|
||||
unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection);
|
||||
|
||||
/**
|
||||
* Error function for COVER_selectDict function. Returns a struct where
|
||||
* return.totalCompressedSize is a ZSTD error.
|
||||
*/
|
||||
COVER_dictSelection_t COVER_dictSelectionError(size_t error);
|
||||
|
||||
/**
|
||||
* Always call after selectDict is called to free up used memory from
|
||||
* newly created dictionary.
|
||||
*/
|
||||
void COVER_dictSelectionFree(COVER_dictSelection_t selection);
|
||||
|
||||
/**
|
||||
* Called to finalize the dictionary and select one based on whether or not
|
||||
* the shrink-dict flag was enabled. If enabled the dictionary used is the
|
||||
* smallest dictionary within a specified regression of the compressed size
|
||||
* from the largest dictionary.
|
||||
*/
|
||||
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
|
||||
size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
|
||||
size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize);
|
||||
|
@ -287,10 +287,10 @@ FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx)
|
||||
* Prepare a context for dictionary building.
|
||||
* The context is only dependent on the parameter `d` and can used multiple
|
||||
* times.
|
||||
* Returns 1 on success or zero on error.
|
||||
* Returns 0 on success or error code on error.
|
||||
* The context must be destroyed with `FASTCOVER_ctx_destroy()`.
|
||||
*/
|
||||
static int
|
||||
static size_t
|
||||
FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
|
||||
const void* samplesBuffer,
|
||||
const size_t* samplesSizes, unsigned nbSamples,
|
||||
@ -310,19 +310,19 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
|
||||
totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) {
|
||||
DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
|
||||
(unsigned)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20));
|
||||
return 0;
|
||||
return ERROR(srcSize_wrong);
|
||||
}
|
||||
|
||||
/* Check if there are at least 5 training samples */
|
||||
if (nbTrainSamples < 5) {
|
||||
DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid\n", nbTrainSamples);
|
||||
return 0;
|
||||
return ERROR(srcSize_wrong);
|
||||
}
|
||||
|
||||
/* Check if there's testing sample */
|
||||
if (nbTestSamples < 1) {
|
||||
DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.\n", nbTestSamples);
|
||||
return 0;
|
||||
return ERROR(srcSize_wrong);
|
||||
}
|
||||
|
||||
/* Zero the context */
|
||||
@ -347,7 +347,7 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
|
||||
if (ctx->offsets == NULL) {
|
||||
DISPLAYLEVEL(1, "Failed to allocate scratch buffers \n");
|
||||
FASTCOVER_ctx_destroy(ctx);
|
||||
return 0;
|
||||
return ERROR(memory_allocation);
|
||||
}
|
||||
|
||||
/* Fill offsets from the samplesSizes */
|
||||
@ -364,13 +364,13 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
|
||||
if (ctx->freqs == NULL) {
|
||||
DISPLAYLEVEL(1, "Failed to allocate frequency table \n");
|
||||
FASTCOVER_ctx_destroy(ctx);
|
||||
return 0;
|
||||
return ERROR(memory_allocation);
|
||||
}
|
||||
|
||||
DISPLAYLEVEL(2, "Computing frequencies\n");
|
||||
FASTCOVER_computeFrequency(ctx->freqs, ctx);
|
||||
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@ -435,7 +435,6 @@ FASTCOVER_buildDictionary(const FASTCOVER_ctx_t* ctx,
|
||||
return tail;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Parameters for FASTCOVER_tryParameters().
|
||||
*/
|
||||
@ -464,6 +463,7 @@ static void FASTCOVER_tryParameters(void *opaque)
|
||||
U16* segmentFreqs = (U16 *)calloc(((U64)1 << ctx->f), sizeof(U16));
|
||||
/* Allocate space for hash table, dict, and freqs */
|
||||
BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
|
||||
COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
|
||||
U32 *freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
|
||||
if (!segmentFreqs || !dict || !freqs) {
|
||||
DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
|
||||
@ -473,27 +473,24 @@ static void FASTCOVER_tryParameters(void *opaque)
|
||||
memcpy(freqs, ctx->freqs, ((U64)1 << ctx->f) * sizeof(U32));
|
||||
/* Build the dictionary */
|
||||
{ const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict, dictBufferCapacity,
|
||||
parameters, segmentFreqs);
|
||||
parameters, segmentFreqs);
|
||||
|
||||
const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100);
|
||||
dictBufferCapacity = ZDICT_finalizeDictionary(
|
||||
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
|
||||
ctx->samples, ctx->samplesSizes, nbFinalizeSamples, parameters.zParams);
|
||||
if (ZDICT_isError(dictBufferCapacity)) {
|
||||
DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
|
||||
selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
|
||||
ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
|
||||
totalCompressedSize);
|
||||
|
||||
if (COVER_dictSelectionIsError(selection)) {
|
||||
DISPLAYLEVEL(1, "Failed to select dictionary\n");
|
||||
goto _cleanup;
|
||||
}
|
||||
}
|
||||
/* Check total compressed size */
|
||||
totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes,
|
||||
ctx->samples, ctx->offsets,
|
||||
ctx->nbTrainSamples, ctx->nbSamples,
|
||||
dict, dictBufferCapacity);
|
||||
_cleanup:
|
||||
COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
|
||||
dictBufferCapacity);
|
||||
free(dict);
|
||||
COVER_best_finish(data->best, parameters, selection);
|
||||
free(data);
|
||||
free(segmentFreqs);
|
||||
free(dict);
|
||||
COVER_dictSelectionFree(selection);
|
||||
free(freqs);
|
||||
}
|
||||
|
||||
@ -508,6 +505,7 @@ FASTCOVER_convertToCoverParams(ZDICT_fastCover_params_t fastCoverParams,
|
||||
coverParams->nbThreads = fastCoverParams.nbThreads;
|
||||
coverParams->splitPoint = fastCoverParams.splitPoint;
|
||||
coverParams->zParams = fastCoverParams.zParams;
|
||||
coverParams->shrinkDict = fastCoverParams.shrinkDict;
|
||||
}
|
||||
|
||||
|
||||
@ -524,6 +522,7 @@ FASTCOVER_convertToFastCoverParams(ZDICT_cover_params_t coverParams,
|
||||
fastCoverParams->f = f;
|
||||
fastCoverParams->accel = accel;
|
||||
fastCoverParams->zParams = coverParams.zParams;
|
||||
fastCoverParams->shrinkDict = coverParams.shrinkDict;
|
||||
}
|
||||
|
||||
|
||||
@ -550,11 +549,11 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
|
||||
if (!FASTCOVER_checkParameters(coverParams, dictBufferCapacity, parameters.f,
|
||||
parameters.accel)) {
|
||||
DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n");
|
||||
return ERROR(GENERIC);
|
||||
return ERROR(parameter_outOfBound);
|
||||
}
|
||||
if (nbSamples == 0) {
|
||||
DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n");
|
||||
return ERROR(GENERIC);
|
||||
return ERROR(srcSize_wrong);
|
||||
}
|
||||
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
|
||||
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
|
||||
@ -564,11 +563,14 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
|
||||
/* Assign corresponding FASTCOVER_accel_t to accelParams*/
|
||||
accelParams = FASTCOVER_defaultAccelParameters[parameters.accel];
|
||||
/* Initialize context */
|
||||
if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
|
||||
{
|
||||
size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
|
||||
coverParams.d, parameters.splitPoint, parameters.f,
|
||||
accelParams)) {
|
||||
DISPLAYLEVEL(1, "Failed to initialize context\n");
|
||||
return ERROR(GENERIC);
|
||||
accelParams);
|
||||
if (ZSTD_isError(initVal)) {
|
||||
DISPLAYLEVEL(1, "Failed to initialize context\n");
|
||||
return initVal;
|
||||
}
|
||||
}
|
||||
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel);
|
||||
/* Build the dictionary */
|
||||
@ -616,6 +618,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
||||
(1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
|
||||
const unsigned f = parameters->f == 0 ? DEFAULT_F : parameters->f;
|
||||
const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel;
|
||||
const unsigned shrinkDict = 0;
|
||||
/* Local variables */
|
||||
const int displayLevel = parameters->zParams.notificationLevel;
|
||||
unsigned iteration = 1;
|
||||
@ -627,19 +630,19 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
||||
/* Checks */
|
||||
if (splitPoint <= 0 || splitPoint > 1) {
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n");
|
||||
return ERROR(GENERIC);
|
||||
return ERROR(parameter_outOfBound);
|
||||
}
|
||||
if (accel == 0 || accel > FASTCOVER_MAX_ACCEL) {
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect accel\n");
|
||||
return ERROR(GENERIC);
|
||||
return ERROR(parameter_outOfBound);
|
||||
}
|
||||
if (kMinK < kMaxD || kMaxK < kMinK) {
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n");
|
||||
return ERROR(GENERIC);
|
||||
return ERROR(parameter_outOfBound);
|
||||
}
|
||||
if (nbSamples == 0) {
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1, "FASTCOVER must have at least one input file\n");
|
||||
return ERROR(GENERIC);
|
||||
return ERROR(srcSize_wrong);
|
||||
}
|
||||
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1, "dictBufferCapacity must be at least %u\n",
|
||||
@ -666,11 +669,14 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
||||
/* Initialize the context for this value of d */
|
||||
FASTCOVER_ctx_t ctx;
|
||||
LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
|
||||
if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams)) {
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
|
||||
COVER_best_destroy(&best);
|
||||
POOL_free(pool);
|
||||
return ERROR(GENERIC);
|
||||
{
|
||||
size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams);
|
||||
if (ZSTD_isError(initVal)) {
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
|
||||
COVER_best_destroy(&best);
|
||||
POOL_free(pool);
|
||||
return initVal;
|
||||
}
|
||||
}
|
||||
if (!warned) {
|
||||
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel);
|
||||
@ -687,7 +693,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
||||
COVER_best_destroy(&best);
|
||||
FASTCOVER_ctx_destroy(&ctx);
|
||||
POOL_free(pool);
|
||||
return ERROR(GENERIC);
|
||||
return ERROR(memory_allocation);
|
||||
}
|
||||
data->ctx = &ctx;
|
||||
data->best = &best;
|
||||
@ -697,6 +703,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
||||
data->parameters.d = d;
|
||||
data->parameters.splitPoint = splitPoint;
|
||||
data->parameters.steps = kSteps;
|
||||
data->parameters.shrinkDict = shrinkDict;
|
||||
data->parameters.zParams.notificationLevel = g_displayLevel;
|
||||
/* Check the parameters */
|
||||
if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity,
|
||||
|
@ -741,7 +741,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||
/* analyze, build stats, starting with literals */
|
||||
{ size_t maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog);
|
||||
if (HUF_isError(maxNbBits)) {
|
||||
eSize = ERROR(GENERIC);
|
||||
eSize = maxNbBits;
|
||||
DISPLAYLEVEL(1, " HUF_buildCTable error \n");
|
||||
goto _cleanup;
|
||||
}
|
||||
@ -764,7 +764,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||
total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u];
|
||||
errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax);
|
||||
if (FSE_isError(errorCode)) {
|
||||
eSize = ERROR(GENERIC);
|
||||
eSize = errorCode;
|
||||
DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n");
|
||||
goto _cleanup;
|
||||
}
|
||||
@ -773,7 +773,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||
total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u];
|
||||
errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML);
|
||||
if (FSE_isError(errorCode)) {
|
||||
eSize = ERROR(GENERIC);
|
||||
eSize = errorCode;
|
||||
DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n");
|
||||
goto _cleanup;
|
||||
}
|
||||
@ -782,7 +782,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||
total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u];
|
||||
errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL);
|
||||
if (FSE_isError(errorCode)) {
|
||||
eSize = ERROR(GENERIC);
|
||||
eSize = errorCode;
|
||||
DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n");
|
||||
goto _cleanup;
|
||||
}
|
||||
@ -791,7 +791,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||
/* write result to buffer */
|
||||
{ size_t const hhSize = HUF_writeCTable(dstPtr, maxDstSize, hufTable, 255, huffLog);
|
||||
if (HUF_isError(hhSize)) {
|
||||
eSize = ERROR(GENERIC);
|
||||
eSize = hhSize;
|
||||
DISPLAYLEVEL(1, "HUF_writeCTable error \n");
|
||||
goto _cleanup;
|
||||
}
|
||||
@ -802,7 +802,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||
|
||||
{ size_t const ohSize = FSE_writeNCount(dstPtr, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog);
|
||||
if (FSE_isError(ohSize)) {
|
||||
eSize = ERROR(GENERIC);
|
||||
eSize = ohSize;
|
||||
DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount \n");
|
||||
goto _cleanup;
|
||||
}
|
||||
@ -813,7 +813,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||
|
||||
{ size_t const mhSize = FSE_writeNCount(dstPtr, maxDstSize, matchLengthNCount, MaxML, mlLog);
|
||||
if (FSE_isError(mhSize)) {
|
||||
eSize = ERROR(GENERIC);
|
||||
eSize = mhSize;
|
||||
DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount \n");
|
||||
goto _cleanup;
|
||||
}
|
||||
@ -824,7 +824,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||
|
||||
{ size_t const lhSize = FSE_writeNCount(dstPtr, maxDstSize, litLengthNCount, MaxLL, llLog);
|
||||
if (FSE_isError(lhSize)) {
|
||||
eSize = ERROR(GENERIC);
|
||||
eSize = lhSize;
|
||||
DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount \n");
|
||||
goto _cleanup;
|
||||
}
|
||||
@ -834,7 +834,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||
}
|
||||
|
||||
if (maxDstSize<12) {
|
||||
eSize = ERROR(GENERIC);
|
||||
eSize = ERROR(dstSize_tooSmall);
|
||||
DISPLAYLEVEL(1, "not enough space to write RepOffsets \n");
|
||||
goto _cleanup;
|
||||
}
|
||||
|
@ -94,6 +94,8 @@ typedef struct {
|
||||
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
|
||||
unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
|
||||
double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */
|
||||
unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */
|
||||
unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */
|
||||
ZDICT_params_t zParams;
|
||||
} ZDICT_cover_params_t;
|
||||
|
||||
@ -105,6 +107,9 @@ typedef struct {
|
||||
unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
|
||||
double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */
|
||||
unsigned accel; /* Acceleration level: constraint: 0 < accel <= 10, higher means faster and less accurate, 0 means default(1) */
|
||||
unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */
|
||||
unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */
|
||||
|
||||
ZDICT_params_t zParams;
|
||||
} ZDICT_fastCover_params_t;
|
||||
|
||||
|
@ -238,6 +238,10 @@ MEM_STATIC ZSTD_frameSizeInfo ZSTD_findFrameSizeInfoLegacy(const void *src, size
|
||||
frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
|
||||
break;
|
||||
}
|
||||
if (!ZSTD_isError(frameSizeInfo.compressedSize) && frameSizeInfo.compressedSize > srcSize) {
|
||||
frameSizeInfo.compressedSize = ERROR(srcSize_wrong);
|
||||
frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
|
||||
}
|
||||
return frameSizeInfo;
|
||||
}
|
||||
|
||||
|
@ -1073,99 +1073,102 @@ static size_t HUF_decompress_usingDTable( /* -3% slower when non static */
|
||||
const void* cSrc, size_t cSrcSize,
|
||||
const U16* DTable)
|
||||
{
|
||||
BYTE* const ostart = (BYTE*) dst;
|
||||
BYTE* op = ostart;
|
||||
BYTE* const omax = op + maxDstSize;
|
||||
BYTE* const olimit = omax-15;
|
||||
|
||||
const void* ptr = DTable;
|
||||
const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1;
|
||||
const U32 dtLog = DTable[0];
|
||||
size_t errorCode;
|
||||
U32 reloadStatus;
|
||||
|
||||
/* Init */
|
||||
|
||||
const U16* jumpTable = (const U16*)cSrc;
|
||||
const size_t length1 = FSE_readLE16(jumpTable);
|
||||
const size_t length2 = FSE_readLE16(jumpTable+1);
|
||||
const size_t length3 = FSE_readLE16(jumpTable+2);
|
||||
const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; // check coherency !!
|
||||
const char* const start1 = (const char*)(cSrc) + 6;
|
||||
const char* const start2 = start1 + length1;
|
||||
const char* const start3 = start2 + length2;
|
||||
const char* const start4 = start3 + length3;
|
||||
FSE_DStream_t bitD1, bitD2, bitD3, bitD4;
|
||||
|
||||
if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
|
||||
|
||||
errorCode = FSE_initDStream(&bitD1, start1, length1);
|
||||
if (FSE_isError(errorCode)) return errorCode;
|
||||
errorCode = FSE_initDStream(&bitD2, start2, length2);
|
||||
if (FSE_isError(errorCode)) return errorCode;
|
||||
errorCode = FSE_initDStream(&bitD3, start3, length3);
|
||||
if (FSE_isError(errorCode)) return errorCode;
|
||||
errorCode = FSE_initDStream(&bitD4, start4, length4);
|
||||
if (FSE_isError(errorCode)) return errorCode;
|
||||
|
||||
reloadStatus=FSE_reloadDStream(&bitD2);
|
||||
|
||||
/* 16 symbols per loop */
|
||||
for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit); /* D2-3-4 are supposed to be synchronized and finish together */
|
||||
op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
|
||||
if (cSrcSize < 6) return (size_t)-FSE_ERROR_srcSize_wrong;
|
||||
{
|
||||
#define HUF_DECODE_SYMBOL_0(n, Dstream) \
|
||||
op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog);
|
||||
BYTE* const ostart = (BYTE*) dst;
|
||||
BYTE* op = ostart;
|
||||
BYTE* const omax = op + maxDstSize;
|
||||
BYTE* const olimit = omax-15;
|
||||
|
||||
#define HUF_DECODE_SYMBOL_1(n, Dstream) \
|
||||
op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
|
||||
if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
|
||||
const void* ptr = DTable;
|
||||
const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1;
|
||||
const U32 dtLog = DTable[0];
|
||||
size_t errorCode;
|
||||
U32 reloadStatus;
|
||||
|
||||
#define HUF_DECODE_SYMBOL_2(n, Dstream) \
|
||||
op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
|
||||
if (FSE_32bits()) FSE_reloadDStream(&Dstream)
|
||||
/* Init */
|
||||
|
||||
HUF_DECODE_SYMBOL_1( 0, bitD1);
|
||||
HUF_DECODE_SYMBOL_1( 1, bitD2);
|
||||
HUF_DECODE_SYMBOL_1( 2, bitD3);
|
||||
HUF_DECODE_SYMBOL_1( 3, bitD4);
|
||||
HUF_DECODE_SYMBOL_2( 4, bitD1);
|
||||
HUF_DECODE_SYMBOL_2( 5, bitD2);
|
||||
HUF_DECODE_SYMBOL_2( 6, bitD3);
|
||||
HUF_DECODE_SYMBOL_2( 7, bitD4);
|
||||
HUF_DECODE_SYMBOL_1( 8, bitD1);
|
||||
HUF_DECODE_SYMBOL_1( 9, bitD2);
|
||||
HUF_DECODE_SYMBOL_1(10, bitD3);
|
||||
HUF_DECODE_SYMBOL_1(11, bitD4);
|
||||
HUF_DECODE_SYMBOL_0(12, bitD1);
|
||||
HUF_DECODE_SYMBOL_0(13, bitD2);
|
||||
HUF_DECODE_SYMBOL_0(14, bitD3);
|
||||
HUF_DECODE_SYMBOL_0(15, bitD4);
|
||||
}
|
||||
const U16* jumpTable = (const U16*)cSrc;
|
||||
const size_t length1 = FSE_readLE16(jumpTable);
|
||||
const size_t length2 = FSE_readLE16(jumpTable+1);
|
||||
const size_t length3 = FSE_readLE16(jumpTable+2);
|
||||
const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; // check coherency !!
|
||||
const char* const start1 = (const char*)(cSrc) + 6;
|
||||
const char* const start2 = start1 + length1;
|
||||
const char* const start3 = start2 + length2;
|
||||
const char* const start4 = start3 + length3;
|
||||
FSE_DStream_t bitD1, bitD2, bitD3, bitD4;
|
||||
|
||||
if (reloadStatus!=FSE_DStream_completed) /* not complete : some bitStream might be FSE_DStream_unfinished */
|
||||
return (size_t)-FSE_ERROR_corruptionDetected;
|
||||
if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
|
||||
|
||||
/* tail */
|
||||
{
|
||||
// bitTail = bitD1; // *much* slower : -20% !??!
|
||||
FSE_DStream_t bitTail;
|
||||
bitTail.ptr = bitD1.ptr;
|
||||
bitTail.bitsConsumed = bitD1.bitsConsumed;
|
||||
bitTail.bitContainer = bitD1.bitContainer; // required in case of FSE_DStream_endOfBuffer
|
||||
bitTail.start = start1;
|
||||
for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
|
||||
errorCode = FSE_initDStream(&bitD1, start1, length1);
|
||||
if (FSE_isError(errorCode)) return errorCode;
|
||||
errorCode = FSE_initDStream(&bitD2, start2, length2);
|
||||
if (FSE_isError(errorCode)) return errorCode;
|
||||
errorCode = FSE_initDStream(&bitD3, start3, length3);
|
||||
if (FSE_isError(errorCode)) return errorCode;
|
||||
errorCode = FSE_initDStream(&bitD4, start4, length4);
|
||||
if (FSE_isError(errorCode)) return errorCode;
|
||||
|
||||
reloadStatus=FSE_reloadDStream(&bitD2);
|
||||
|
||||
/* 16 symbols per loop */
|
||||
for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit); /* D2-3-4 are supposed to be synchronized and finish together */
|
||||
op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
|
||||
{
|
||||
HUF_DECODE_SYMBOL_0(0, bitTail);
|
||||
#define HUF_DECODE_SYMBOL_0(n, Dstream) \
|
||||
op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog);
|
||||
|
||||
#define HUF_DECODE_SYMBOL_1(n, Dstream) \
|
||||
op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
|
||||
if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
|
||||
|
||||
#define HUF_DECODE_SYMBOL_2(n, Dstream) \
|
||||
op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
|
||||
if (FSE_32bits()) FSE_reloadDStream(&Dstream)
|
||||
|
||||
HUF_DECODE_SYMBOL_1( 0, bitD1);
|
||||
HUF_DECODE_SYMBOL_1( 1, bitD2);
|
||||
HUF_DECODE_SYMBOL_1( 2, bitD3);
|
||||
HUF_DECODE_SYMBOL_1( 3, bitD4);
|
||||
HUF_DECODE_SYMBOL_2( 4, bitD1);
|
||||
HUF_DECODE_SYMBOL_2( 5, bitD2);
|
||||
HUF_DECODE_SYMBOL_2( 6, bitD3);
|
||||
HUF_DECODE_SYMBOL_2( 7, bitD4);
|
||||
HUF_DECODE_SYMBOL_1( 8, bitD1);
|
||||
HUF_DECODE_SYMBOL_1( 9, bitD2);
|
||||
HUF_DECODE_SYMBOL_1(10, bitD3);
|
||||
HUF_DECODE_SYMBOL_1(11, bitD4);
|
||||
HUF_DECODE_SYMBOL_0(12, bitD1);
|
||||
HUF_DECODE_SYMBOL_0(13, bitD2);
|
||||
HUF_DECODE_SYMBOL_0(14, bitD3);
|
||||
HUF_DECODE_SYMBOL_0(15, bitD4);
|
||||
}
|
||||
|
||||
if (FSE_endOfDStream(&bitTail))
|
||||
return op-ostart;
|
||||
if (reloadStatus!=FSE_DStream_completed) /* not complete : some bitStream might be FSE_DStream_unfinished */
|
||||
return (size_t)-FSE_ERROR_corruptionDetected;
|
||||
|
||||
/* tail */
|
||||
{
|
||||
// bitTail = bitD1; // *much* slower : -20% !??!
|
||||
FSE_DStream_t bitTail;
|
||||
bitTail.ptr = bitD1.ptr;
|
||||
bitTail.bitsConsumed = bitD1.bitsConsumed;
|
||||
bitTail.bitContainer = bitD1.bitContainer; // required in case of FSE_DStream_endOfBuffer
|
||||
bitTail.start = start1;
|
||||
for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
|
||||
{
|
||||
HUF_DECODE_SYMBOL_0(0, bitTail);
|
||||
}
|
||||
|
||||
if (FSE_endOfDStream(&bitTail))
|
||||
return op-ostart;
|
||||
}
|
||||
|
||||
if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
|
||||
|
||||
return (size_t)-FSE_ERROR_corruptionDetected;
|
||||
}
|
||||
|
||||
if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
|
||||
|
||||
return (size_t)-FSE_ERROR_corruptionDetected;
|
||||
}
|
||||
|
||||
|
||||
@ -1355,8 +1358,6 @@ static unsigned ZSTD_isLittleEndian(void)
|
||||
|
||||
static U16 ZSTD_read16(const void* p) { U16 r; memcpy(&r, p, sizeof(r)); return r; }
|
||||
|
||||
static U32 ZSTD_read32(const void* p) { U32 r; memcpy(&r, p, sizeof(r)); return r; }
|
||||
|
||||
static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
|
||||
|
||||
static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
|
||||
@ -1381,16 +1382,9 @@ static U16 ZSTD_readLE16(const void* memPtr)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static U32 ZSTD_readLE32(const void* memPtr)
|
||||
static U32 ZSTD_readLE24(const void* memPtr)
|
||||
{
|
||||
if (ZSTD_isLittleEndian())
|
||||
return ZSTD_read32(memPtr);
|
||||
else
|
||||
{
|
||||
const BYTE* p = (const BYTE*)memPtr;
|
||||
return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
|
||||
}
|
||||
return ZSTD_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
|
||||
}
|
||||
|
||||
static U32 ZSTD_readBE32(const void* memPtr)
|
||||
@ -1704,13 +1698,13 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
|
||||
seqState->prevOffset = seq->offset;
|
||||
if (litLength == MaxLL)
|
||||
{
|
||||
U32 add = dumps<de ? *dumps++ : 0;
|
||||
const U32 add = dumps<de ? *dumps++ : 0;
|
||||
if (add < 255) litLength += add;
|
||||
else
|
||||
{
|
||||
if (dumps<=(de-3))
|
||||
{
|
||||
litLength = ZSTD_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
|
||||
litLength = ZSTD_readLE24(dumps);
|
||||
dumps += 3;
|
||||
}
|
||||
}
|
||||
@ -1732,13 +1726,13 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
|
||||
matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
|
||||
if (matchLength == MaxML)
|
||||
{
|
||||
U32 add = dumps<de ? *dumps++ : 0;
|
||||
const U32 add = dumps<de ? *dumps++ : 0;
|
||||
if (add < 255) matchLength += add;
|
||||
else
|
||||
{
|
||||
if (dumps<=(de-3))
|
||||
{
|
||||
matchLength = ZSTD_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
|
||||
matchLength = ZSTD_readLE24(dumps);
|
||||
dumps += 3;
|
||||
}
|
||||
}
|
||||
|
@ -217,6 +217,11 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
|
||||
}
|
||||
}
|
||||
|
||||
MEM_STATIC U32 MEM_readLE24(const void* memPtr)
|
||||
{
|
||||
return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
|
||||
}
|
||||
|
||||
MEM_STATIC U32 MEM_readLE32(const void* memPtr)
|
||||
{
|
||||
if (MEM_isLittleEndian())
|
||||
@ -3043,11 +3048,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
|
||||
seqState->prevOffset = seq->offset;
|
||||
if (litLength == MaxLL)
|
||||
{
|
||||
U32 add = *dumps++;
|
||||
const U32 add = dumps<de ? *dumps++ : 0;
|
||||
if (add < 255) litLength += add;
|
||||
else
|
||||
else if (dumps + 3 <= de)
|
||||
{
|
||||
litLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
|
||||
litLength = MEM_readLE24(dumps);
|
||||
dumps += 3;
|
||||
}
|
||||
if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
|
||||
@ -3073,11 +3078,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
|
||||
matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
|
||||
if (matchLength == MaxML)
|
||||
{
|
||||
U32 add = *dumps++;
|
||||
const U32 add = dumps<de ? *dumps++ : 0;
|
||||
if (add < 255) matchLength += add;
|
||||
else
|
||||
else if (dumps + 3 <= de)
|
||||
{
|
||||
matchLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
|
||||
matchLength = MEM_readLE24(dumps);
|
||||
dumps += 3;
|
||||
}
|
||||
if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
|
||||
|
@ -219,6 +219,11 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
|
||||
}
|
||||
}
|
||||
|
||||
MEM_STATIC U32 MEM_readLE24(const void* memPtr)
|
||||
{
|
||||
return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
|
||||
}
|
||||
|
||||
MEM_STATIC U32 MEM_readLE32(const void* memPtr)
|
||||
{
|
||||
if (MEM_isLittleEndian())
|
||||
@ -2684,11 +2689,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
|
||||
seqState->prevOffset = seq->offset;
|
||||
if (litLength == MaxLL)
|
||||
{
|
||||
U32 add = *dumps++;
|
||||
const U32 add = dumps<de ? *dumps++ : 0;
|
||||
if (add < 255) litLength += add;
|
||||
else
|
||||
else if (dumps + 3 <= de)
|
||||
{
|
||||
litLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
|
||||
litLength = MEM_readLE24(dumps);
|
||||
dumps += 3;
|
||||
}
|
||||
if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
|
||||
@ -2714,11 +2719,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
|
||||
matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
|
||||
if (matchLength == MaxML)
|
||||
{
|
||||
U32 add = *dumps++;
|
||||
const U32 add = dumps<de ? *dumps++ : 0;
|
||||
if (add < 255) matchLength += add;
|
||||
else
|
||||
else if (dumps + 3 <= de)
|
||||
{
|
||||
matchLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
|
||||
matchLength = MEM_readLE24(dumps);
|
||||
dumps += 3;
|
||||
}
|
||||
if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
|
||||
|
@ -189,6 +189,11 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
|
||||
}
|
||||
}
|
||||
|
||||
MEM_STATIC U32 MEM_readLE24(const void* memPtr)
|
||||
{
|
||||
return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
|
||||
}
|
||||
|
||||
MEM_STATIC U32 MEM_readLE32(const void* memPtr)
|
||||
{
|
||||
if (MEM_isLittleEndian())
|
||||
@ -2808,13 +2813,12 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
|
||||
litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
|
||||
prevOffset = litLength ? seq->offset : seqState->prevOffset;
|
||||
if (litLength == MaxLL) {
|
||||
U32 add = *dumps++;
|
||||
const U32 add = dumps<de ? *dumps++ : 0;
|
||||
if (add < 255) litLength += add;
|
||||
else {
|
||||
litLength = dumps[0] + (dumps[1]<<8) + (dumps[2]<<16);
|
||||
else if (dumps + 3 <= de) {
|
||||
litLength = MEM_readLE24(dumps);
|
||||
dumps += 3;
|
||||
}
|
||||
if (dumps > de) { litLength = MaxLL+255; } /* late correction, to avoid using uninitialized memory */
|
||||
if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
|
||||
}
|
||||
|
||||
@ -2837,13 +2841,12 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
|
||||
/* MatchLength */
|
||||
matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
|
||||
if (matchLength == MaxML) {
|
||||
U32 add = *dumps++;
|
||||
const U32 add = dumps<de ? *dumps++ : 0;
|
||||
if (add < 255) matchLength += add;
|
||||
else {
|
||||
matchLength = dumps[0] + (dumps[1]<<8) + (dumps[2]<<16);
|
||||
else if (dumps + 3 <= de){
|
||||
matchLength = MEM_readLE24(dumps);
|
||||
dumps += 3;
|
||||
}
|
||||
if (dumps > de) { matchLength = MaxML+255; } /* late correction, to avoid using uninitialized memory */
|
||||
if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
|
||||
}
|
||||
matchLength += MINMATCH;
|
||||
|
@ -218,6 +218,11 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
|
||||
}
|
||||
}
|
||||
|
||||
MEM_STATIC U32 MEM_readLE24(const void* memPtr)
|
||||
{
|
||||
return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
|
||||
}
|
||||
|
||||
MEM_STATIC U32 MEM_readLE32(const void* memPtr)
|
||||
{
|
||||
if (MEM_isLittleEndian())
|
||||
@ -1998,91 +2003,92 @@ size_t HUFv05_decompress4X2_usingDTable(
|
||||
const void* cSrc, size_t cSrcSize,
|
||||
const U16* DTable)
|
||||
{
|
||||
const BYTE* const istart = (const BYTE*) cSrc;
|
||||
BYTE* const ostart = (BYTE*) dst;
|
||||
BYTE* const oend = ostart + dstSize;
|
||||
const void* const dtPtr = DTable;
|
||||
const HUFv05_DEltX2* const dt = ((const HUFv05_DEltX2*)dtPtr) +1;
|
||||
const U32 dtLog = DTable[0];
|
||||
size_t errorCode;
|
||||
|
||||
/* Init */
|
||||
BITv05_DStream_t bitD1;
|
||||
BITv05_DStream_t bitD2;
|
||||
BITv05_DStream_t bitD3;
|
||||
BITv05_DStream_t bitD4;
|
||||
const size_t length1 = MEM_readLE16(istart);
|
||||
const size_t length2 = MEM_readLE16(istart+2);
|
||||
const size_t length3 = MEM_readLE16(istart+4);
|
||||
size_t length4;
|
||||
const BYTE* const istart1 = istart + 6; /* jumpTable */
|
||||
const BYTE* const istart2 = istart1 + length1;
|
||||
const BYTE* const istart3 = istart2 + length2;
|
||||
const BYTE* const istart4 = istart3 + length3;
|
||||
const size_t segmentSize = (dstSize+3) / 4;
|
||||
BYTE* const opStart2 = ostart + segmentSize;
|
||||
BYTE* const opStart3 = opStart2 + segmentSize;
|
||||
BYTE* const opStart4 = opStart3 + segmentSize;
|
||||
BYTE* op1 = ostart;
|
||||
BYTE* op2 = opStart2;
|
||||
BYTE* op3 = opStart3;
|
||||
BYTE* op4 = opStart4;
|
||||
U32 endSignal;
|
||||
|
||||
/* Check */
|
||||
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
|
||||
{
|
||||
const BYTE* const istart = (const BYTE*) cSrc;
|
||||
BYTE* const ostart = (BYTE*) dst;
|
||||
BYTE* const oend = ostart + dstSize;
|
||||
const void* const dtPtr = DTable;
|
||||
const HUFv05_DEltX2* const dt = ((const HUFv05_DEltX2*)dtPtr) +1;
|
||||
const U32 dtLog = DTable[0];
|
||||
size_t errorCode;
|
||||
|
||||
length4 = cSrcSize - (length1 + length2 + length3 + 6);
|
||||
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
|
||||
errorCode = BITv05_initDStream(&bitD1, istart1, length1);
|
||||
if (HUFv05_isError(errorCode)) return errorCode;
|
||||
errorCode = BITv05_initDStream(&bitD2, istart2, length2);
|
||||
if (HUFv05_isError(errorCode)) return errorCode;
|
||||
errorCode = BITv05_initDStream(&bitD3, istart3, length3);
|
||||
if (HUFv05_isError(errorCode)) return errorCode;
|
||||
errorCode = BITv05_initDStream(&bitD4, istart4, length4);
|
||||
if (HUFv05_isError(errorCode)) return errorCode;
|
||||
/* Init */
|
||||
BITv05_DStream_t bitD1;
|
||||
BITv05_DStream_t bitD2;
|
||||
BITv05_DStream_t bitD3;
|
||||
BITv05_DStream_t bitD4;
|
||||
const size_t length1 = MEM_readLE16(istart);
|
||||
const size_t length2 = MEM_readLE16(istart+2);
|
||||
const size_t length3 = MEM_readLE16(istart+4);
|
||||
size_t length4;
|
||||
const BYTE* const istart1 = istart + 6; /* jumpTable */
|
||||
const BYTE* const istart2 = istart1 + length1;
|
||||
const BYTE* const istart3 = istart2 + length2;
|
||||
const BYTE* const istart4 = istart3 + length3;
|
||||
const size_t segmentSize = (dstSize+3) / 4;
|
||||
BYTE* const opStart2 = ostart + segmentSize;
|
||||
BYTE* const opStart3 = opStart2 + segmentSize;
|
||||
BYTE* const opStart4 = opStart3 + segmentSize;
|
||||
BYTE* op1 = ostart;
|
||||
BYTE* op2 = opStart2;
|
||||
BYTE* op3 = opStart3;
|
||||
BYTE* op4 = opStart4;
|
||||
U32 endSignal;
|
||||
|
||||
/* 16-32 symbols per loop (4-8 symbols per stream) */
|
||||
endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
|
||||
for ( ; (endSignal==BITv05_DStream_unfinished) && (op4<(oend-7)) ; ) {
|
||||
HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
|
||||
HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
|
||||
HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
|
||||
HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
|
||||
HUFv05_DECODE_SYMBOLX2_1(op1, &bitD1);
|
||||
HUFv05_DECODE_SYMBOLX2_1(op2, &bitD2);
|
||||
HUFv05_DECODE_SYMBOLX2_1(op3, &bitD3);
|
||||
HUFv05_DECODE_SYMBOLX2_1(op4, &bitD4);
|
||||
HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
|
||||
HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
|
||||
HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
|
||||
HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
|
||||
HUFv05_DECODE_SYMBOLX2_0(op1, &bitD1);
|
||||
HUFv05_DECODE_SYMBOLX2_0(op2, &bitD2);
|
||||
HUFv05_DECODE_SYMBOLX2_0(op3, &bitD3);
|
||||
HUFv05_DECODE_SYMBOLX2_0(op4, &bitD4);
|
||||
length4 = cSrcSize - (length1 + length2 + length3 + 6);
|
||||
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
|
||||
errorCode = BITv05_initDStream(&bitD1, istart1, length1);
|
||||
if (HUFv05_isError(errorCode)) return errorCode;
|
||||
errorCode = BITv05_initDStream(&bitD2, istart2, length2);
|
||||
if (HUFv05_isError(errorCode)) return errorCode;
|
||||
errorCode = BITv05_initDStream(&bitD3, istart3, length3);
|
||||
if (HUFv05_isError(errorCode)) return errorCode;
|
||||
errorCode = BITv05_initDStream(&bitD4, istart4, length4);
|
||||
if (HUFv05_isError(errorCode)) return errorCode;
|
||||
|
||||
/* 16-32 symbols per loop (4-8 symbols per stream) */
|
||||
endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
|
||||
for ( ; (endSignal==BITv05_DStream_unfinished) && (op4<(oend-7)) ; ) {
|
||||
HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
|
||||
HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
|
||||
HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
|
||||
HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
|
||||
HUFv05_DECODE_SYMBOLX2_1(op1, &bitD1);
|
||||
HUFv05_DECODE_SYMBOLX2_1(op2, &bitD2);
|
||||
HUFv05_DECODE_SYMBOLX2_1(op3, &bitD3);
|
||||
HUFv05_DECODE_SYMBOLX2_1(op4, &bitD4);
|
||||
HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
|
||||
HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
|
||||
HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
|
||||
HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
|
||||
HUFv05_DECODE_SYMBOLX2_0(op1, &bitD1);
|
||||
HUFv05_DECODE_SYMBOLX2_0(op2, &bitD2);
|
||||
HUFv05_DECODE_SYMBOLX2_0(op3, &bitD3);
|
||||
HUFv05_DECODE_SYMBOLX2_0(op4, &bitD4);
|
||||
endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
|
||||
}
|
||||
|
||||
/* check corruption */
|
||||
if (op1 > opStart2) return ERROR(corruption_detected);
|
||||
if (op2 > opStart3) return ERROR(corruption_detected);
|
||||
if (op3 > opStart4) return ERROR(corruption_detected);
|
||||
/* note : op4 supposed already verified within main loop */
|
||||
|
||||
/* finish bitStreams one by one */
|
||||
HUFv05_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
|
||||
HUFv05_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
|
||||
HUFv05_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
|
||||
HUFv05_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
|
||||
|
||||
/* check */
|
||||
endSignal = BITv05_endOfDStream(&bitD1) & BITv05_endOfDStream(&bitD2) & BITv05_endOfDStream(&bitD3) & BITv05_endOfDStream(&bitD4);
|
||||
if (!endSignal) return ERROR(corruption_detected);
|
||||
|
||||
/* decoded size */
|
||||
return dstSize;
|
||||
}
|
||||
|
||||
/* check corruption */
|
||||
if (op1 > opStart2) return ERROR(corruption_detected);
|
||||
if (op2 > opStart3) return ERROR(corruption_detected);
|
||||
if (op3 > opStart4) return ERROR(corruption_detected);
|
||||
/* note : op4 supposed already verified within main loop */
|
||||
|
||||
/* finish bitStreams one by one */
|
||||
HUFv05_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
|
||||
HUFv05_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
|
||||
HUFv05_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
|
||||
HUFv05_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
|
||||
|
||||
/* check */
|
||||
endSignal = BITv05_endOfDStream(&bitD1) & BITv05_endOfDStream(&bitD2) & BITv05_endOfDStream(&bitD3) & BITv05_endOfDStream(&bitD4);
|
||||
if (!endSignal) return ERROR(corruption_detected);
|
||||
|
||||
/* decoded size */
|
||||
return dstSize;
|
||||
}
|
||||
|
||||
|
||||
@ -3150,14 +3156,13 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState)
|
||||
litLength = FSEv05_peakSymbol(&(seqState->stateLL));
|
||||
prevOffset = litLength ? seq->offset : seqState->prevOffset;
|
||||
if (litLength == MaxLL) {
|
||||
U32 add = *dumps++;
|
||||
const U32 add = *dumps++;
|
||||
if (add < 255) litLength += add;
|
||||
else {
|
||||
litLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no risk : dumps is always followed by seq tables > 1 byte */
|
||||
else if (dumps + 3 <= de) {
|
||||
litLength = MEM_readLE24(dumps);
|
||||
if (litLength&1) litLength>>=1, dumps += 3;
|
||||
else litLength = (U16)(litLength)>>1, dumps += 2;
|
||||
}
|
||||
if (dumps > de) { litLength = MaxLL+255; } /* late correction, to avoid using uninitialized memory */
|
||||
if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
|
||||
}
|
||||
|
||||
@ -3184,14 +3189,13 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState)
|
||||
/* MatchLength */
|
||||
matchLength = FSEv05_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
|
||||
if (matchLength == MaxML) {
|
||||
U32 add = *dumps++;
|
||||
const U32 add = dumps<de ? *dumps++ : 0;
|
||||
if (add < 255) matchLength += add;
|
||||
else {
|
||||
matchLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
|
||||
else if (dumps + 3 <= de) {
|
||||
matchLength = MEM_readLE24(dumps);
|
||||
if (matchLength&1) matchLength>>=1, dumps += 3;
|
||||
else matchLength = (U16)(matchLength)>>1, dumps += 2;
|
||||
}
|
||||
if (dumps > de) { matchLength = MaxML+255; } /* late correction, to avoid using uninitialized memory */
|
||||
if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
|
||||
}
|
||||
matchLength += MINMATCH;
|
||||
|
@ -3242,14 +3242,12 @@ static size_t ZSTDv06_decodeSeqHeaders(int* nbSeqPtr,
|
||||
}
|
||||
|
||||
/* FSE table descriptors */
|
||||
if (ip + 4 > iend) return ERROR(srcSize_wrong); /* min : header byte + all 3 are "raw", hence no header, but at least xxLog bits per type */
|
||||
{ U32 const LLtype = *ip >> 6;
|
||||
U32 const Offtype = (*ip >> 4) & 3;
|
||||
U32 const MLtype = (*ip >> 2) & 3;
|
||||
ip++;
|
||||
|
||||
/* check */
|
||||
if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
|
||||
|
||||
/* Build DTables */
|
||||
{ size_t const bhSize = ZSTDv06_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog, flagRepeatTable);
|
||||
if (ZSTDv06_isError(bhSize)) return ERROR(corruption_detected);
|
||||
@ -3672,7 +3670,7 @@ void ZSTDv06_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cS
|
||||
blockProperties_t blockProperties = { bt_compressed, 0 };
|
||||
|
||||
/* Frame Header */
|
||||
{ size_t const frameHeaderSize = ZSTDv06_frameHeaderSize(src, ZSTDv06_frameHeaderSize_min);
|
||||
{ size_t const frameHeaderSize = ZSTDv06_frameHeaderSize(src, srcSize);
|
||||
if (ZSTDv06_isError(frameHeaderSize)) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, frameHeaderSize);
|
||||
return;
|
||||
|
@ -3470,14 +3470,12 @@ static size_t ZSTDv07_decodeSeqHeaders(int* nbSeqPtr,
|
||||
}
|
||||
|
||||
/* FSE table descriptors */
|
||||
if (ip + 4 > iend) return ERROR(srcSize_wrong); /* min : header byte + all 3 are "raw", hence no header, but at least xxLog bits per type */
|
||||
{ U32 const LLtype = *ip >> 6;
|
||||
U32 const OFtype = (*ip >> 4) & 3;
|
||||
U32 const MLtype = (*ip >> 2) & 3;
|
||||
ip++;
|
||||
|
||||
/* check */
|
||||
if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
|
||||
|
||||
/* Build DTables */
|
||||
{ size_t const llhSize = ZSTDv07_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog, flagRepeatTable);
|
||||
if (ZSTDv07_isError(llhSize)) return ERROR(corruption_detected);
|
||||
@ -3918,7 +3916,7 @@ void ZSTDv07_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cS
|
||||
}
|
||||
|
||||
/* Frame Header */
|
||||
{ size_t const frameHeaderSize = ZSTDv07_frameHeaderSize(src, ZSTDv07_frameHeaderSize_min);
|
||||
{ size_t const frameHeaderSize = ZSTDv07_frameHeaderSize(src, srcSize);
|
||||
if (ZSTDv07_isError(frameHeaderSize)) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, frameHeaderSize);
|
||||
return;
|
||||
|
76
lib/zstd.h
76
lib/zstd.h
@ -71,7 +71,7 @@ extern "C" {
|
||||
/*------ Version ------*/
|
||||
#define ZSTD_VERSION_MAJOR 1
|
||||
#define ZSTD_VERSION_MINOR 4
|
||||
#define ZSTD_VERSION_RELEASE 0
|
||||
#define ZSTD_VERSION_RELEASE 1
|
||||
|
||||
#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
|
||||
ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library version */
|
||||
@ -82,16 +82,16 @@ ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library v
|
||||
#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
|
||||
ZSTDLIB_API const char* ZSTD_versionString(void); /* requires v1.3.0+ */
|
||||
|
||||
/***************************************
|
||||
* Default constant
|
||||
***************************************/
|
||||
/* *************************************
|
||||
* Default constant
|
||||
***************************************/
|
||||
#ifndef ZSTD_CLEVEL_DEFAULT
|
||||
# define ZSTD_CLEVEL_DEFAULT 3
|
||||
#endif
|
||||
|
||||
/***************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
/* *************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
|
||||
/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */
|
||||
#define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */
|
||||
@ -183,9 +183,14 @@ ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compres
|
||||
***************************************/
|
||||
/*= Compression context
|
||||
* When compressing many times,
|
||||
* it is recommended to allocate a context just once, and re-use it for each successive compression operation.
|
||||
* it is recommended to allocate a context just once,
|
||||
* and re-use it for each successive compression operation.
|
||||
* This will make workload friendlier for system's memory.
|
||||
* Use one context per thread for parallel execution in multi-threaded environments. */
|
||||
* Note : re-using context is just a speed / resource optimization.
|
||||
* It doesn't change the compression ratio, which remains identical.
|
||||
* Note 2 : In multi-threaded environments,
|
||||
* use one different context per thread for parallel execution.
|
||||
*/
|
||||
typedef struct ZSTD_CCtx_s ZSTD_CCtx;
|
||||
ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
|
||||
ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx);
|
||||
@ -380,6 +385,7 @@ typedef enum {
|
||||
* ZSTD_c_forceMaxWindow
|
||||
* ZSTD_c_forceAttachDict
|
||||
* ZSTD_c_literalCompressionMode
|
||||
* ZSTD_c_targetCBlockSize
|
||||
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
|
||||
* note : never ever use experimentalParam? names directly;
|
||||
* also, the enums values themselves are unstable and can still change.
|
||||
@ -389,6 +395,7 @@ typedef enum {
|
||||
ZSTD_c_experimentalParam3=1000,
|
||||
ZSTD_c_experimentalParam4=1001,
|
||||
ZSTD_c_experimentalParam5=1002,
|
||||
ZSTD_c_experimentalParam6=1003,
|
||||
} ZSTD_cParameter;
|
||||
|
||||
typedef struct {
|
||||
@ -657,17 +664,33 @@ ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
|
||||
ZSTD_inBuffer* input,
|
||||
ZSTD_EndDirective endOp);
|
||||
|
||||
ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */
|
||||
ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */
|
||||
|
||||
/*******************************************************************************
|
||||
* This is a legacy streaming API, and can be replaced by ZSTD_CCtx_reset() and
|
||||
* ZSTD_compressStream2(). It is redundant, but is still fully supported.
|
||||
/* These buffer sizes are softly recommended.
|
||||
* They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output.
|
||||
* Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(),
|
||||
* reducing the amount of memory shuffling and buffering, resulting in minor performance savings.
|
||||
*
|
||||
* However, note that these recommendations are from the perspective of a C caller program.
|
||||
* If the streaming interface is invoked from some other language,
|
||||
* especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo,
|
||||
* a major performance rule is to reduce crossing such interface to an absolute minimum.
|
||||
* It's not rare that performance ends being spent more into the interface, rather than compression itself.
|
||||
* In which cases, prefer using large buffers, as large as practical,
|
||||
* for both input and output, to reduce the nb of roundtrips.
|
||||
*/
|
||||
ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */
|
||||
ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */
|
||||
|
||||
|
||||
/* *****************************************************************************
|
||||
* This following is a legacy streaming API.
|
||||
* It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2().
|
||||
* It is redundant, but remains fully supported.
|
||||
* Advanced parameters and dictionary compression can only be used through the
|
||||
* new API.
|
||||
******************************************************************************/
|
||||
|
||||
/**
|
||||
/*!
|
||||
* Equivalent to:
|
||||
*
|
||||
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
|
||||
@ -675,16 +698,16 @@ ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output
|
||||
* ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
|
||||
*/
|
||||
ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
|
||||
/**
|
||||
/*!
|
||||
* Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue).
|
||||
* NOTE: The return value is different. ZSTD_compressStream() returns a hint for
|
||||
* the next read size (if non-zero and not an error). ZSTD_compressStream2()
|
||||
* returns the number of bytes left to flush (if non-zero and not an error).
|
||||
* returns the minimum nb of bytes left to flush (if non-zero and not an error).
|
||||
*/
|
||||
ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
|
||||
/** Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */
|
||||
/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */
|
||||
ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
|
||||
/** Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */
|
||||
/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */
|
||||
ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
|
||||
|
||||
|
||||
@ -969,7 +992,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
|
||||
#endif /* ZSTD_H_235446 */
|
||||
|
||||
|
||||
/****************************************************************************************
|
||||
/* **************************************************************************************
|
||||
* ADVANCED AND EXPERIMENTAL FUNCTIONS
|
||||
****************************************************************************************
|
||||
* The definitions in the following section are considered experimental.
|
||||
@ -1037,6 +1060,10 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
|
||||
#define ZSTD_LDM_HASHRATELOG_MIN 0
|
||||
#define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN)
|
||||
|
||||
/* Advanced parameter bounds */
|
||||
#define ZSTD_TARGETCBLOCKSIZE_MIN 64
|
||||
#define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX
|
||||
|
||||
/* internal */
|
||||
#define ZSTD_HASHLOG3_MAX 17
|
||||
|
||||
@ -1162,7 +1189,7 @@ typedef enum {
|
||||
* however it does mean that all frame data must be present and valid. */
|
||||
ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
|
||||
|
||||
/** ZSTD_decompressBound() :
|
||||
/*! ZSTD_decompressBound() :
|
||||
* `src` should point to the start of a series of ZSTD encoded and/or skippable frames
|
||||
* `srcSize` must be the _exact_ size of this series
|
||||
* (i.e. there should be a frame boundary at `src + srcSize`)
|
||||
@ -1409,6 +1436,11 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre
|
||||
*/
|
||||
#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5
|
||||
|
||||
/* Tries to fit compressed block size to be around targetCBlockSize.
|
||||
* No target when targetCBlockSize == 0.
|
||||
* There is no guarantee on compressed block size (default:0) */
|
||||
#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
|
||||
|
||||
/*! ZSTD_CCtx_getParameter() :
|
||||
* Get the requested compression parameter value, selected by enum ZSTD_cParameter,
|
||||
* and store it into int* value.
|
||||
@ -1843,7 +1875,7 @@ typedef struct {
|
||||
unsigned checksumFlag;
|
||||
} ZSTD_frameHeader;
|
||||
|
||||
/** ZSTD_getFrameHeader() :
|
||||
/*! ZSTD_getFrameHeader() :
|
||||
* decode Frame Header, or requires larger `srcSize`.
|
||||
* @return : 0, `zfhPtr` is correctly filled,
|
||||
* >0, `srcSize` is too small, value is wanted `srcSize` amount,
|
||||
|
1
programs/.gitignore
vendored
1
programs/.gitignore
vendored
@ -33,4 +33,5 @@ afl
|
||||
|
||||
# Misc files
|
||||
*.bat
|
||||
!windres/generate_res.bat
|
||||
dirTest*
|
||||
|
@ -37,7 +37,7 @@ There are however other Makefile targets that create different variations of CLI
|
||||
`.gz` support is automatically enabled when `zlib` library is detected at build time.
|
||||
It's possible to disable `.gz` support, by setting `HAVE_ZLIB=0`.
|
||||
Example : `make zstd HAVE_ZLIB=0`
|
||||
It's also possible to force compilation with zlib support, `using HAVE_ZLIB=1`.
|
||||
It's also possible to force compilation with zlib support, using `HAVE_ZLIB=1`.
|
||||
In which case, linking stage will fail if `zlib` library cannot be found.
|
||||
This is useful to prevent silent feature disabling.
|
||||
|
||||
@ -45,7 +45,7 @@ There are however other Makefile targets that create different variations of CLI
|
||||
This is ordered through commands `--format=xz` and `--format=lzma` respectively.
|
||||
Alternatively, symlinks named `xz`, `unxz`, `lzma`, or `unlzma` will mimic intended behavior.
|
||||
`.xz` and `.lzma` support is automatically enabled when `lzma` library is detected at build time.
|
||||
It's possible to disable `.xz` and `.lzma` support, by setting `HAVE_LZMA=0` .
|
||||
It's possible to disable `.xz` and `.lzma` support, by setting `HAVE_LZMA=0`.
|
||||
Example : `make zstd HAVE_LZMA=0`
|
||||
It's also possible to force compilation with lzma support, using `HAVE_LZMA=1`.
|
||||
In which case, linking stage will fail if `lzma` library cannot be found.
|
||||
|
@ -15,7 +15,6 @@
|
||||
***************************************/
|
||||
#include <stdlib.h> /* malloc, free */
|
||||
#include <string.h> /* memset */
|
||||
#undef NDEBUG /* assert must not be disabled */
|
||||
#include <assert.h> /* assert */
|
||||
|
||||
#include "timefn.h" /* UTIL_time_t, UTIL_getTime */
|
||||
@ -54,6 +53,9 @@
|
||||
return retValue; \
|
||||
}
|
||||
|
||||
/* Abort execution if a condition is not met */
|
||||
#define CONTROL(c) { if (!(c)) { DEBUGOUTPUT("error: %s \n", #c); abort(); } }
|
||||
|
||||
|
||||
/* *************************************
|
||||
* Benchmarking an arbitrary function
|
||||
@ -68,13 +70,13 @@ int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome)
|
||||
* check outcome validity first, using BMK_isValid_runResult() */
|
||||
BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome)
|
||||
{
|
||||
assert(outcome.error_tag_never_ever_use_directly == 0);
|
||||
CONTROL(outcome.error_tag_never_ever_use_directly == 0);
|
||||
return outcome.internal_never_ever_use_directly;
|
||||
}
|
||||
|
||||
size_t BMK_extract_errorResult(BMK_runOutcome_t outcome)
|
||||
{
|
||||
assert(outcome.error_tag_never_ever_use_directly != 0);
|
||||
CONTROL(outcome.error_tag_never_ever_use_directly != 0);
|
||||
return outcome.error_result_never_ever_use_directly;
|
||||
}
|
||||
|
||||
|
@ -175,7 +175,7 @@ static void clearHandler(void)
|
||||
|
||||
#if !defined(BACKTRACE_ENABLE)
|
||||
/* automatic detector : backtrace enabled by default on linux+glibc and osx */
|
||||
# if (defined(__linux__) && defined(__GLIBC__)) \
|
||||
# if (defined(__linux__) && (defined(__GLIBC__) && !defined(__UCLIBC__))) \
|
||||
|| (defined(__APPLE__) && defined(__MACH__))
|
||||
# define BACKTRACE_ENABLE 1
|
||||
# else
|
||||
@ -269,6 +269,13 @@ void FIO_addAbortHandler()
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
static __int64 LONG_TELL(FILE* file) {
|
||||
LARGE_INTEGER off, newOff;
|
||||
off.QuadPart = 0;
|
||||
newOff.QuadPart = 0;
|
||||
SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, &newOff, FILE_CURRENT);
|
||||
return newOff.QuadPart;
|
||||
}
|
||||
#else
|
||||
# define LONG_SEEK fseek
|
||||
# define LONG_TELL ftell
|
||||
@ -297,6 +304,7 @@ struct FIO_prefs_s {
|
||||
int ldmMinMatch;
|
||||
int ldmBucketSizeLog;
|
||||
int ldmHashRateLog;
|
||||
size_t targetCBlockSize;
|
||||
ZSTD_literalCompressionMode_e literalCompressionMode;
|
||||
|
||||
/* IO preferences */
|
||||
@ -341,6 +349,7 @@ FIO_prefs_t* FIO_createPreferences(void)
|
||||
ret->ldmMinMatch = 0;
|
||||
ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET;
|
||||
ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET;
|
||||
ret->targetCBlockSize = 0;
|
||||
ret->literalCompressionMode = ZSTD_lcm_auto;
|
||||
return ret;
|
||||
}
|
||||
@ -409,6 +418,10 @@ void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable) {
|
||||
prefs->rsyncable = rsyncable;
|
||||
}
|
||||
|
||||
void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize) {
|
||||
prefs->targetCBlockSize = targetCBlockSize;
|
||||
}
|
||||
|
||||
void FIO_setLiteralCompressionMode(
|
||||
FIO_prefs_t* const prefs,
|
||||
ZSTD_literalCompressionMode_e mode) {
|
||||
@ -557,8 +570,11 @@ static FILE* FIO_openDstFile(FIO_prefs_t* const prefs, const char* srcFileName,
|
||||
} }
|
||||
|
||||
{ FILE* const f = fopen( dstFileName, "wb" );
|
||||
if (f == NULL)
|
||||
if (f == NULL) {
|
||||
DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno));
|
||||
} else {
|
||||
chmod(dstFileName, 00600);
|
||||
}
|
||||
return f;
|
||||
}
|
||||
}
|
||||
@ -649,6 +665,8 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
|
||||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_checksumFlag, prefs->checksumFlag) );
|
||||
/* compression level */
|
||||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, cLevel) );
|
||||
/* max compressed block size */
|
||||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetCBlockSize, (int)prefs->targetCBlockSize) );
|
||||
/* long distance matching */
|
||||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableLongDistanceMatching, prefs->ldmFlag) );
|
||||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashLog, prefs->ldmHashLog) );
|
||||
@ -1158,6 +1176,8 @@ FIO_compressFilename_internal(FIO_prefs_t* const prefs,
|
||||
const char* dstFileName, const char* srcFileName,
|
||||
int compressionLevel)
|
||||
{
|
||||
UTIL_time_t const timeStart = UTIL_getTime();
|
||||
clock_t const cpuStart = clock();
|
||||
U64 readsize = 0;
|
||||
U64 compressedfilesize = 0;
|
||||
U64 const fileSize = UTIL_getFileSize(srcFileName);
|
||||
@ -1210,6 +1230,15 @@ FIO_compressFilename_internal(FIO_prefs_t* const prefs,
|
||||
(unsigned long long)readsize, (unsigned long long) compressedfilesize,
|
||||
dstFileName);
|
||||
|
||||
/* Elapsed Time and CPU Load */
|
||||
{ clock_t const cpuEnd = clock();
|
||||
double const cpuLoad_s = (double)(cpuEnd - cpuStart) / CLOCKS_PER_SEC;
|
||||
U64 const timeLength_ns = UTIL_clockSpanNano(timeStart);
|
||||
double const timeLength_s = (double)timeLength_ns / 1000000000;
|
||||
double const cpuLoad_pct = (cpuLoad_s / timeLength_s) * 100;
|
||||
DISPLAYLEVEL(4, "%-20s : Completed in %.2f sec (cpu load : %.0f%%)\n",
|
||||
srcFileName, timeLength_s, cpuLoad_pct);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1332,15 +1361,12 @@ int FIO_compressFilename(FIO_prefs_t* const prefs,
|
||||
const char* dictFileName, int compressionLevel,
|
||||
ZSTD_compressionParameters comprParams)
|
||||
{
|
||||
clock_t const start = clock();
|
||||
U64 const fileSize = UTIL_getFileSize(srcFileName);
|
||||
U64 const srcSize = (fileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : fileSize;
|
||||
|
||||
cRess_t const ress = FIO_createCResources(prefs, dictFileName, compressionLevel, srcSize, comprParams);
|
||||
int const result = FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel);
|
||||
|
||||
double const seconds = (double)(clock() - start) / CLOCKS_PER_SEC;
|
||||
DISPLAYLEVEL(4, "Completed in %.2f sec \n", seconds);
|
||||
|
||||
FIO_freeCResources(ress);
|
||||
return result;
|
||||
|
@ -71,6 +71,7 @@ void FIO_setOverlapLog(FIO_prefs_t* const prefs, int overlapLog);
|
||||
void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag);
|
||||
void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse); /**< 0: no sparse; 1: disable on stdout; 2: always enabled */
|
||||
void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable);
|
||||
void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize);
|
||||
void FIO_setLiteralCompressionMode(
|
||||
FIO_prefs_t* const prefs,
|
||||
ZSTD_literalCompressionMode_e mode);
|
||||
|
@ -107,19 +107,11 @@ int UTIL_isSameFile(const char* file1, const char* file2)
|
||||
U32 UTIL_isLink(const char* infilename)
|
||||
{
|
||||
/* macro guards, as defined in : https://linux.die.net/man/2/lstat */
|
||||
#ifndef __STRICT_ANSI__
|
||||
#if defined(_BSD_SOURCE) \
|
||||
|| (defined(_XOPEN_SOURCE) && (_XOPEN_SOURCE >= 500)) \
|
||||
|| (defined(_XOPEN_SOURCE) && defined(_XOPEN_SOURCE_EXTENDED)) \
|
||||
|| (defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L)) \
|
||||
|| (defined(__APPLE__) && defined(__MACH__)) \
|
||||
|| defined(__OpenBSD__) \
|
||||
|| defined(__FreeBSD__)
|
||||
#if PLATFORM_POSIX_VERSION >= 200112L
|
||||
int r;
|
||||
stat_t statbuf;
|
||||
r = lstat(infilename, &statbuf);
|
||||
if (!r && S_ISLNK(statbuf.st_mode)) return 1;
|
||||
#endif
|
||||
#endif
|
||||
(void)infilename;
|
||||
return 0;
|
||||
|
@ -1,5 +1,5 @@
|
||||
.
|
||||
.TH "ZSTD" "1" "December 2018" "zstd 1.3.8" "User Commands"
|
||||
.TH "ZSTD" "1" "July 2019" "zstd 1.4.1" "User Commands"
|
||||
.
|
||||
.SH "NAME"
|
||||
\fBzstd\fR \- zstd, zstdmt, unzstd, zstdcat \- Compress or decompress \.zst files
|
||||
@ -187,6 +187,10 @@ verbose mode
|
||||
suppress warnings, interactivity, and notifications\. specify twice to suppress errors too\.
|
||||
.
|
||||
.TP
|
||||
\fB\-\-no\-progress\fR
|
||||
do not display the progress bar, but keep all other messages\.
|
||||
.
|
||||
.TP
|
||||
\fB\-C\fR, \fB\-\-[no\-]check\fR
|
||||
add integrity check computed from uncompressed data (default: enabled)
|
||||
.
|
||||
|
@ -141,6 +141,7 @@ static int usage_advanced(const char* programName)
|
||||
DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog);
|
||||
DISPLAY( "--fast[=#]: switch to ultra fast compression level (default: %u)\n", 1);
|
||||
DISPLAY( "--adapt : dynamically adapt compression level to I/O conditions \n");
|
||||
DISPLAY( "--target-compressed-block-size=# : make compressed block near targeted size \n");
|
||||
#ifdef ZSTD_MULTITHREAD
|
||||
DISPLAY( " -T# : spawns # compression threads (default: 1, 0==# cores) \n");
|
||||
DISPLAY( " -B# : select size of each job (default: 0==automatic) \n");
|
||||
@ -179,8 +180,8 @@ static int usage_advanced(const char* programName)
|
||||
DISPLAY( "\n");
|
||||
DISPLAY( "Dictionary builder : \n");
|
||||
DISPLAY( "--train ## : create a dictionary from a training set of files \n");
|
||||
DISPLAY( "--train-cover[=k=#,d=#,steps=#,split=#] : use the cover algorithm with optional args\n");
|
||||
DISPLAY( "--train-fastcover[=k=#,d=#,f=#,steps=#,split=#,accel=#] : use the fast cover algorithm with optional args\n");
|
||||
DISPLAY( "--train-cover[=k=#,d=#,steps=#,split=#,shrink[=#]] : use the cover algorithm with optional args\n");
|
||||
DISPLAY( "--train-fastcover[=k=#,d=#,f=#,steps=#,split=#,accel=#,shrink[=#]] : use the fast cover algorithm with optional args\n");
|
||||
DISPLAY( "--train-legacy[=s=#] : use the legacy algorithm with selectivity (default: %u)\n", g_defaultSelectivityLevel);
|
||||
DISPLAY( " -o file : `file` is dictionary name (default: %s) \n", g_defaultDictName);
|
||||
DISPLAY( "--maxdict=# : limit dictionary to specified size (default: %u) \n", g_defaultMaxDictSize);
|
||||
@ -299,6 +300,7 @@ static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
|
||||
* @return 1 means that cover parameters were correct
|
||||
* @return 0 in case of malformed parameters
|
||||
*/
|
||||
static const unsigned kDefaultRegression = 1;
|
||||
static unsigned parseCoverParameters(const char* stringPtr, ZDICT_cover_params_t* params)
|
||||
{
|
||||
memset(params, 0, sizeof(*params));
|
||||
@ -311,10 +313,23 @@ static unsigned parseCoverParameters(const char* stringPtr, ZDICT_cover_params_t
|
||||
params->splitPoint = (double)splitPercentage / 100.0;
|
||||
if (stringPtr[0]==',') { stringPtr++; continue; } else break;
|
||||
}
|
||||
if (longCommandWArg(&stringPtr, "shrink")) {
|
||||
params->shrinkDictMaxRegression = kDefaultRegression;
|
||||
params->shrinkDict = 1;
|
||||
if (stringPtr[0]=='=') {
|
||||
stringPtr++;
|
||||
params->shrinkDictMaxRegression = readU32FromChar(&stringPtr);
|
||||
}
|
||||
if (stringPtr[0]==',') {
|
||||
stringPtr++;
|
||||
continue;
|
||||
}
|
||||
else break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
if (stringPtr[0] != 0) return 0;
|
||||
DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nsteps=%u\nsplit=%u\n", params->k, params->d, params->steps, (unsigned)(params->splitPoint * 100));
|
||||
DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nsteps=%u\nsplit=%u\nshrink%u\n", params->k, params->d, params->steps, (unsigned)(params->splitPoint * 100), params->shrinkDictMaxRegression);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -338,10 +353,23 @@ static unsigned parseFastCoverParameters(const char* stringPtr, ZDICT_fastCover_
|
||||
params->splitPoint = (double)splitPercentage / 100.0;
|
||||
if (stringPtr[0]==',') { stringPtr++; continue; } else break;
|
||||
}
|
||||
if (longCommandWArg(&stringPtr, "shrink")) {
|
||||
params->shrinkDictMaxRegression = kDefaultRegression;
|
||||
params->shrinkDict = 1;
|
||||
if (stringPtr[0]=='=') {
|
||||
stringPtr++;
|
||||
params->shrinkDictMaxRegression = readU32FromChar(&stringPtr);
|
||||
}
|
||||
if (stringPtr[0]==',') {
|
||||
stringPtr++;
|
||||
continue;
|
||||
}
|
||||
else break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
if (stringPtr[0] != 0) return 0;
|
||||
DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\n", params->k, params->d, params->f, params->steps, (unsigned)(params->splitPoint * 100), params->accel);
|
||||
DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\nshrink=%u\n", params->k, params->d, params->f, params->steps, (unsigned)(params->splitPoint * 100), params->accel, params->shrinkDictMaxRegression);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -367,6 +395,8 @@ static ZDICT_cover_params_t defaultCoverParams(void)
|
||||
params.d = 8;
|
||||
params.steps = 4;
|
||||
params.splitPoint = 1.0;
|
||||
params.shrinkDict = 0;
|
||||
params.shrinkDictMaxRegression = kDefaultRegression;
|
||||
return params;
|
||||
}
|
||||
|
||||
@ -379,6 +409,8 @@ static ZDICT_fastCover_params_t defaultFastCoverParams(void)
|
||||
params.steps = 4;
|
||||
params.splitPoint = 0.75; /* different from default splitPoint of cover */
|
||||
params.accel = DEFAULT_ACCEL;
|
||||
params.shrinkDict = 0;
|
||||
params.shrinkDictMaxRegression = kDefaultRegression;
|
||||
return params;
|
||||
}
|
||||
#endif
|
||||
@ -555,6 +587,7 @@ int main(int argCount, const char* argv[])
|
||||
const char* suffix = ZSTD_EXTENSION;
|
||||
unsigned maxDictSize = g_defaultMaxDictSize;
|
||||
unsigned dictID = 0;
|
||||
size_t targetCBlockSize = 0;
|
||||
int dictCLevel = g_defaultDictCLevel;
|
||||
unsigned dictSelect = g_defaultSelectivityLevel;
|
||||
#ifdef UTIL_HAS_CREATEFILELIST
|
||||
@ -588,11 +621,11 @@ int main(int argCount, const char* argv[])
|
||||
/* preset behaviors */
|
||||
if (exeNameMatch(programName, ZSTD_ZSTDMT)) nbWorkers=0, singleThread=0;
|
||||
if (exeNameMatch(programName, ZSTD_UNZSTD)) operation=zom_decompress;
|
||||
if (exeNameMatch(programName, ZSTD_CAT)) { operation=zom_decompress; forceStdout=1; FIO_overwriteMode(prefs); outFileName=stdoutmark; g_displayLevel=1; } /* supports multiple formats */
|
||||
if (exeNameMatch(programName, ZSTD_ZCAT)) { operation=zom_decompress; forceStdout=1; FIO_overwriteMode(prefs); outFileName=stdoutmark; g_displayLevel=1; } /* behave like zcat, also supports multiple formats */
|
||||
if (exeNameMatch(programName, ZSTD_CAT)) { operation=zom_decompress; FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; outFileName=stdoutmark; g_displayLevel=1; } /* supports multiple formats */
|
||||
if (exeNameMatch(programName, ZSTD_ZCAT)) { operation=zom_decompress; FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; outFileName=stdoutmark; g_displayLevel=1; } /* behave like zcat, also supports multiple formats */
|
||||
if (exeNameMatch(programName, ZSTD_GZ)) { suffix = GZ_EXTENSION; FIO_setCompressionType(prefs, FIO_gzipCompression); FIO_setRemoveSrcFile(prefs, 1); } /* behave like gzip */
|
||||
if (exeNameMatch(programName, ZSTD_GUNZIP)) { operation=zom_decompress; FIO_setRemoveSrcFile(prefs, 1); } /* behave like gunzip, also supports multiple formats */
|
||||
if (exeNameMatch(programName, ZSTD_GZCAT)) { operation=zom_decompress; forceStdout=1; FIO_overwriteMode(prefs); outFileName=stdoutmark; g_displayLevel=1; } /* behave like gzcat, also supports multiple formats */
|
||||
if (exeNameMatch(programName, ZSTD_GZCAT)) { operation=zom_decompress; FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; outFileName=stdoutmark; g_displayLevel=1; } /* behave like gzcat, also supports multiple formats */
|
||||
if (exeNameMatch(programName, ZSTD_LZMA)) { suffix = LZMA_EXTENSION; FIO_setCompressionType(prefs, FIO_lzmaCompression); FIO_setRemoveSrcFile(prefs, 1); } /* behave like lzma */
|
||||
if (exeNameMatch(programName, ZSTD_UNLZMA)) { operation=zom_decompress; FIO_setCompressionType(prefs, FIO_lzmaCompression); FIO_setRemoveSrcFile(prefs, 1); } /* behave like unlzma, also supports multiple formats */
|
||||
if (exeNameMatch(programName, ZSTD_XZ)) { suffix = XZ_EXTENSION; FIO_setCompressionType(prefs, FIO_xzCompression); FIO_setRemoveSrcFile(prefs, 1); } /* behave like xz */
|
||||
@ -711,6 +744,7 @@ int main(int argCount, const char* argv[])
|
||||
if (longCommandWArg(&argument, "--maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--dictID=")) { dictID = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) CLEAN_RETURN(badusage(programName)); continue; }
|
||||
if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--long")) {
|
||||
unsigned ldmWindowLog = 0;
|
||||
ldmFlag = 1;
|
||||
@ -1115,6 +1149,7 @@ int main(int argCount, const char* argv[])
|
||||
FIO_setAdaptMin(prefs, adaptMin);
|
||||
FIO_setAdaptMax(prefs, adaptMax);
|
||||
FIO_setRsyncable(prefs, rsyncable);
|
||||
FIO_setTargetCBlockSize(prefs, targetCBlockSize);
|
||||
FIO_setLiteralCompressionMode(prefs, literalCompressionMode);
|
||||
if (adaptMin > cLevel) cLevel = adaptMin;
|
||||
if (adaptMax < cLevel) cLevel = adaptMax;
|
||||
@ -1124,7 +1159,7 @@ int main(int argCount, const char* argv[])
|
||||
else
|
||||
operationResult = FIO_compressMultipleFilenames(prefs, filenameTable, filenameIdx, outFileName, suffix, dictFileName, cLevel, compressionParams);
|
||||
#else
|
||||
(void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; /* not used when ZSTD_NOCOMPRESS set */
|
||||
(void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; /* not used when ZSTD_NOCOMPRESS set */
|
||||
DISPLAY("Compression not supported \n");
|
||||
#endif
|
||||
} else { /* decompression or test */
|
||||
|
@ -58,6 +58,9 @@ while [ "$#" -gt 0 ] && [ "${endofopts}" -eq 0 ]; do
|
||||
shift 2
|
||||
break
|
||||
;;
|
||||
-f)
|
||||
pattern_found=2
|
||||
;;
|
||||
*)
|
||||
;;
|
||||
esac
|
||||
@ -117,7 +120,11 @@ else
|
||||
set -f
|
||||
while [ "$#" -gt 0 ]; do
|
||||
# shellcheck disable=SC2086
|
||||
"${zcat}" -fq -- "$1" | "${grep}" --label="${1}" ${grep_args} -- "${pattern}" -
|
||||
if [ $pattern_found -eq 2 ]; then
|
||||
"${zcat}" -fq -- "$1" | "${grep}" --label="${1}" ${grep_args} -- -
|
||||
else
|
||||
"${zcat}" -fq -- "$1" | "${grep}" --label="${1}" ${grep_args} -- "${pattern}" -
|
||||
fi
|
||||
[ "$?" -ne 0 ] && EXIT_CODE=1
|
||||
shift
|
||||
done
|
||||
|
@ -1,5 +1,5 @@
|
||||
.
|
||||
.TH "ZSTDGREP" "1" "December 2018" "zstd 1.3.8" "User Commands"
|
||||
.TH "ZSTDGREP" "1" "July 2019" "zstd 1.4.1" "User Commands"
|
||||
.
|
||||
.SH "NAME"
|
||||
\fBzstdgrep\fR \- print lines matching a pattern in zstandard\-compressed files
|
||||
|
@ -1,5 +1,5 @@
|
||||
.
|
||||
.TH "ZSTDLESS" "1" "December 2018" "zstd 1.3.8" "User Commands"
|
||||
.TH "ZSTDLESS" "1" "July 2019" "zstd 1.4.1" "User Commands"
|
||||
.
|
||||
.SH "NAME"
|
||||
\fBzstdless\fR \- view zstandard\-compressed files
|
||||
|
1
tests/.gitignore
vendored
1
tests/.gitignore
vendored
@ -55,6 +55,7 @@ _*
|
||||
tmp*
|
||||
*.zst
|
||||
*.gz
|
||||
!gzip/hufts-segv.gz
|
||||
result
|
||||
out
|
||||
*.zstd
|
||||
|
@ -215,6 +215,9 @@ roundTripCrash : $(ZSTD_OBJECTS) roundTripCrash.c
|
||||
longmatch : $(ZSTD_OBJECTS) longmatch.c
|
||||
$(CC) $(FLAGS) $^ -o $@$(EXT)
|
||||
|
||||
bigdict: $(ZSTDMT_OBJECTS) $(PRGDIR)/datagen.c bigdict.c
|
||||
$(CC) $(FLAGS) $(MULTITHREAD) $^ -o $@$(EXT)
|
||||
|
||||
invalidDictionaries : $(ZSTD_OBJECTS) invalidDictionaries.c
|
||||
$(CC) $(FLAGS) $^ -o $@$(EXT)
|
||||
|
||||
@ -247,7 +250,7 @@ clean:
|
||||
$(MAKE) -C $(ZSTDDIR) clean
|
||||
$(MAKE) -C $(PRGDIR) clean
|
||||
@$(RM) -fR $(TESTARTEFACT)
|
||||
@$(RM) -f core *.o tmp* result* *.gcda dictionary *.zst \
|
||||
@$(RM) -f core *.o tmp* *.tmp result* *.gcda dictionary *.zst \
|
||||
$(PRGDIR)/zstd$(EXT) $(PRGDIR)/zstd32$(EXT) \
|
||||
fullbench$(EXT) fullbench32$(EXT) \
|
||||
fullbench-lib$(EXT) fullbench-dll$(EXT) \
|
||||
@ -256,7 +259,7 @@ clean:
|
||||
zstreamtest$(EXT) zstreamtest32$(EXT) \
|
||||
datagen$(EXT) paramgrill$(EXT) roundTripCrash$(EXT) longmatch$(EXT) \
|
||||
symbols$(EXT) invalidDictionaries$(EXT) legacy$(EXT) poolTests$(EXT) \
|
||||
decodecorpus$(EXT) checkTag$(EXT)
|
||||
decodecorpus$(EXT) checkTag$(EXT) bigdict$(EXT)
|
||||
@echo Cleaning completed
|
||||
|
||||
|
||||
@ -359,6 +362,9 @@ test-zstdgrep: gzstd
|
||||
-echo 'hello world' > test.txt && $(PRGDIR)/zstd test.txt
|
||||
env ZCAT=/tmp/zstdcat $(PRGDIR)/zstdgrep hello test.txt.zst
|
||||
env ZCAT=/tmp/zstdcat $(PRGDIR)/zstdgrep weird test.txt.zst && return 1 || return 0
|
||||
-echo 'hello' > pattern.txt
|
||||
env ZCAT=/tmp/zstdcat $(PRGDIR)/zstdgrep -f pattern.txt test.txt.zst
|
||||
$(RM) test.txt test.txt.zst pattern.txt
|
||||
|
||||
test-fullbench: fullbench datagen
|
||||
$(QEMU_SYS) ./fullbench -i1
|
||||
@ -394,6 +400,9 @@ test-zstream32: zstreamtest32
|
||||
test-longmatch: longmatch
|
||||
$(QEMU_SYS) ./longmatch
|
||||
|
||||
test-bigdict: bigdict
|
||||
$(QEMU_SYS) ./bigdict
|
||||
|
||||
test-invalidDictionaries: invalidDictionaries
|
||||
$(QEMU_SYS) ./invalidDictionaries
|
||||
|
||||
|
128
tests/bigdict.c
Normal file
128
tests/bigdict.c
Normal file
@ -0,0 +1,128 @@
|
||||
/*
|
||||
* Copyright (c) 2017-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include "datagen.h"
|
||||
#include "mem.h"
|
||||
#define ZSTD_STATIC_LINKING_ONLY
|
||||
#include "zstd.h"
|
||||
|
||||
static int
|
||||
compress(ZSTD_CCtx* cctx, ZSTD_DCtx* dctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
void const* src, size_t srcSize,
|
||||
void* roundtrip, ZSTD_EndDirective end)
|
||||
{
|
||||
ZSTD_inBuffer in = {src, srcSize, 0};
|
||||
ZSTD_outBuffer out = {dst, dstCapacity, 0};
|
||||
int ended = 0;
|
||||
|
||||
while (!ended && (in.pos < in.size || out.pos > 0)) {
|
||||
size_t rc;
|
||||
out.pos = 0;
|
||||
rc = ZSTD_compressStream2(cctx, &out, &in, end);
|
||||
if (ZSTD_isError(rc))
|
||||
return 1;
|
||||
if (end == ZSTD_e_end && rc == 0)
|
||||
ended = 1;
|
||||
{
|
||||
ZSTD_inBuffer rtIn = {dst, out.pos, 0};
|
||||
ZSTD_outBuffer rtOut = {roundtrip, srcSize, 0};
|
||||
rc = 1;
|
||||
while (rtIn.pos < rtIn.size || rtOut.pos > 0) {
|
||||
rtOut.pos = 0;
|
||||
rc = ZSTD_decompressStream(dctx, &rtOut, &rtIn);
|
||||
if (ZSTD_isError(rc)) {
|
||||
fprintf(stderr, "Decompression error: %s\n", ZSTD_getErrorName(rc));
|
||||
return 1;
|
||||
}
|
||||
if (rc == 0)
|
||||
break;
|
||||
}
|
||||
if (ended && rc != 0) {
|
||||
fprintf(stderr, "Frame not finished!\n");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, const char** argv)
|
||||
{
|
||||
ZSTD_CCtx* cctx = ZSTD_createCCtx();
|
||||
ZSTD_DCtx* dctx = ZSTD_createDCtx();
|
||||
const size_t dataSize = (size_t)1 << 30;
|
||||
const size_t outSize = ZSTD_compressBound(dataSize);
|
||||
const size_t bufferSize = (size_t)1 << 31;
|
||||
char* buffer = (char*)malloc(bufferSize);
|
||||
void* out = malloc(outSize);
|
||||
void* roundtrip = malloc(dataSize);
|
||||
(void)argc;
|
||||
(void)argv;
|
||||
|
||||
if (!buffer || !out || !roundtrip || !cctx || !dctx) {
|
||||
fprintf(stderr, "Allocation failure\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, 31)))
|
||||
return 1;
|
||||
if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 1)))
|
||||
return 1;
|
||||
if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_overlapLog, 9)))
|
||||
return 1;
|
||||
if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1)))
|
||||
return 1;
|
||||
if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_strategy, ZSTD_btopt)))
|
||||
return 1;
|
||||
if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_targetLength, 7)))
|
||||
return 1;
|
||||
if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, 7)))
|
||||
return 1;
|
||||
if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_searchLog, 1)))
|
||||
return 1;
|
||||
if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_hashLog, 10)))
|
||||
return 1;
|
||||
if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_chainLog, 10)))
|
||||
return 1;
|
||||
|
||||
if (ZSTD_isError(ZSTD_DCtx_setParameter(dctx, ZSTD_d_windowLogMax, 31)))
|
||||
return 1;
|
||||
|
||||
RDG_genBuffer(buffer, bufferSize, 1.0, 0.0, 0xbeefcafe);
|
||||
|
||||
/* Compress 30 GB */
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 10; ++i) {
|
||||
fprintf(stderr, "Compressing 1 GB\n");
|
||||
if (compress(cctx, dctx, out, outSize, buffer, dataSize, roundtrip, ZSTD_e_continue))
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
fprintf(stderr, "Compressing 1 GB\n");
|
||||
if (compress(cctx, dctx, out, outSize, buffer, dataSize, roundtrip, ZSTD_e_end))
|
||||
return 1;
|
||||
|
||||
fprintf(stderr, "Success!\n");
|
||||
|
||||
free(roundtrip);
|
||||
free(out);
|
||||
free(buffer);
|
||||
ZSTD_freeDCtx(dctx);
|
||||
ZSTD_freeCCtx(cctx);
|
||||
return 0;
|
||||
}
|
@ -840,16 +840,16 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
|
||||
{ unsigned max = MaxLL;
|
||||
size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, WKSP, sizeof(WKSP)); /* cannot fail */
|
||||
assert(!HIST_isError(mostFrequent));
|
||||
if (mostFrequent == nbSeq) {
|
||||
/* do RLE if we have the chance */
|
||||
*op++ = llCodeTable[0];
|
||||
FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
|
||||
LLtype = set_rle;
|
||||
} else if (frame->stats.fseInit && !(RAND(seed) & 3) &&
|
||||
if (frame->stats.fseInit && !(RAND(seed) & 3) &&
|
||||
isSymbolSubset(llCodeTable, nbSeq,
|
||||
frame->stats.litlengthSymbolSet, 35)) {
|
||||
/* maybe do repeat mode if we're allowed to */
|
||||
LLtype = set_repeat;
|
||||
} else if (mostFrequent == nbSeq) {
|
||||
/* do RLE if we have the chance */
|
||||
*op++ = llCodeTable[0];
|
||||
FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
|
||||
LLtype = set_rle;
|
||||
} else if (!(RAND(seed) & 3)) {
|
||||
/* maybe use the default distribution */
|
||||
FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
|
||||
@ -872,14 +872,14 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
|
||||
{ unsigned max = MaxOff;
|
||||
size_t const mostFrequent = HIST_countFast_wksp(count, &max, ofCodeTable, nbSeq, WKSP, sizeof(WKSP)); /* cannot fail */
|
||||
assert(!HIST_isError(mostFrequent));
|
||||
if (mostFrequent == nbSeq) {
|
||||
*op++ = ofCodeTable[0];
|
||||
FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
|
||||
Offtype = set_rle;
|
||||
} else if (frame->stats.fseInit && !(RAND(seed) & 3) &&
|
||||
if (frame->stats.fseInit && !(RAND(seed) & 3) &&
|
||||
isSymbolSubset(ofCodeTable, nbSeq,
|
||||
frame->stats.offsetSymbolSet, 28)) {
|
||||
Offtype = set_repeat;
|
||||
} else if (mostFrequent == nbSeq) {
|
||||
*op++ = ofCodeTable[0];
|
||||
FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
|
||||
Offtype = set_rle;
|
||||
} else if (!(RAND(seed) & 3)) {
|
||||
FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, DefaultMaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
|
||||
Offtype = set_basic;
|
||||
@ -900,14 +900,14 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
|
||||
{ unsigned max = MaxML;
|
||||
size_t const mostFrequent = HIST_countFast_wksp(count, &max, mlCodeTable, nbSeq, WKSP, sizeof(WKSP)); /* cannot fail */
|
||||
assert(!HIST_isError(mostFrequent));
|
||||
if (mostFrequent == nbSeq) {
|
||||
*op++ = *mlCodeTable;
|
||||
FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
|
||||
MLtype = set_rle;
|
||||
} else if (frame->stats.fseInit && !(RAND(seed) & 3) &&
|
||||
if (frame->stats.fseInit && !(RAND(seed) & 3) &&
|
||||
isSymbolSubset(mlCodeTable, nbSeq,
|
||||
frame->stats.matchlengthSymbolSet, 52)) {
|
||||
MLtype = set_repeat;
|
||||
} else if (mostFrequent == nbSeq) {
|
||||
*op++ = *mlCodeTable;
|
||||
FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
|
||||
MLtype = set_rle;
|
||||
} else if (!(RAND(seed) & 3)) {
|
||||
/* sometimes do default distribution */
|
||||
FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
|
||||
|
@ -15,7 +15,7 @@
|
||||
#include "util.h" /* Compiler options, UTIL_GetFileSize */
|
||||
#include <stdlib.h> /* malloc */
|
||||
#include <stdio.h> /* fprintf, fopen, ftello64 */
|
||||
#include <assert.h> /* assert */
|
||||
#include <assert.h>
|
||||
|
||||
#include "timefn.h" /* UTIL_clockSpanNano, UTIL_getTime */
|
||||
#include "mem.h" /* U32 */
|
||||
@ -31,8 +31,8 @@
|
||||
#include "zstd.h" /* ZSTD_versionString */
|
||||
#include "util.h" /* time functions */
|
||||
#include "datagen.h"
|
||||
#include "benchfn.h" /* CustomBench*/
|
||||
#include "benchzstd.h" /* MB_UNIT */
|
||||
#include "benchfn.h" /* CustomBench */
|
||||
#include "benchzstd.h" /* MB_UNIT */
|
||||
|
||||
|
||||
/*_************************************
|
||||
@ -51,7 +51,7 @@
|
||||
#define DEFAULT_CLEVEL 1
|
||||
|
||||
#define COMPRESSIBILITY_DEFAULT 0.50
|
||||
static const size_t g_sampleSize = 10000000;
|
||||
static const size_t kSampleSizeDefault = 10000000;
|
||||
|
||||
#define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */
|
||||
|
||||
@ -61,12 +61,12 @@ static const size_t g_sampleSize = 10000000;
|
||||
**************************************/
|
||||
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
|
||||
|
||||
#define CONTROL(c) { if (!(c)) { abort(); } } /* like assert(), but cannot be disabled */
|
||||
|
||||
/*_************************************
|
||||
* Benchmark Parameters
|
||||
**************************************/
|
||||
static unsigned g_nbIterations = NBLOOPS;
|
||||
static double g_compressibility = COMPRESSIBILITY_DEFAULT;
|
||||
|
||||
|
||||
/*_*******************************************************
|
||||
@ -100,12 +100,12 @@ static ZSTD_CCtx* g_zcc = NULL;
|
||||
static size_t
|
||||
local_ZSTD_compress(const void* src, size_t srcSize,
|
||||
void* dst, size_t dstSize,
|
||||
void* buff2)
|
||||
void* payload)
|
||||
{
|
||||
ZSTD_parameters p;
|
||||
ZSTD_frameParameters f = { 1 /* contentSizeHeader*/, 0, 0 };
|
||||
p.fParams = f;
|
||||
p.cParams = *(ZSTD_compressionParameters*)buff2;
|
||||
p.cParams = *(ZSTD_compressionParameters*)payload;
|
||||
return ZSTD_compress_advanced (g_zcc, dst, dstSize, src, srcSize, NULL ,0, p);
|
||||
//return ZSTD_compress(dst, dstSize, src, srcSize, cLevel);
|
||||
}
|
||||
@ -126,7 +126,7 @@ extern size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* ctx, const void* src, size_t s
|
||||
static size_t local_ZSTD_decodeLiteralsBlock(const void* src, size_t srcSize, void* dst, size_t dstSize, void* buff2)
|
||||
{
|
||||
(void)src; (void)srcSize; (void)dst; (void)dstSize;
|
||||
return ZSTD_decodeLiteralsBlock((ZSTD_DCtx*)g_zdc, buff2, g_cSize);
|
||||
return ZSTD_decodeLiteralsBlock(g_zdc, buff2, g_cSize);
|
||||
}
|
||||
|
||||
static size_t local_ZSTD_decodeSeqHeaders(const void* src, size_t srcSize, void* dst, size_t dstSize, void* buff2)
|
||||
@ -141,14 +141,14 @@ static ZSTD_CStream* g_cstream= NULL;
|
||||
static size_t
|
||||
local_ZSTD_compressStream(const void* src, size_t srcSize,
|
||||
void* dst, size_t dstCapacity,
|
||||
void* buff2)
|
||||
void* payload)
|
||||
{
|
||||
ZSTD_outBuffer buffOut;
|
||||
ZSTD_inBuffer buffIn;
|
||||
ZSTD_parameters p;
|
||||
ZSTD_frameParameters f = {1 /* contentSizeHeader*/, 0, 0};
|
||||
p.fParams = f;
|
||||
p.cParams = *(ZSTD_compressionParameters*)buff2;
|
||||
p.cParams = *(ZSTD_compressionParameters*)payload;
|
||||
ZSTD_initCStream_advanced(g_cstream, NULL, 0, p, ZSTD_CONTENTSIZE_UNKNOWN);
|
||||
buffOut.dst = dst;
|
||||
buffOut.size = dstCapacity;
|
||||
@ -161,23 +161,39 @@ local_ZSTD_compressStream(const void* src, size_t srcSize,
|
||||
return buffOut.pos;
|
||||
}
|
||||
|
||||
static size_t
|
||||
local_ZSTD_compressStream_freshCCtx(const void* src, size_t srcSize,
|
||||
void* dst, size_t dstCapacity,
|
||||
void* payload)
|
||||
{
|
||||
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
|
||||
size_t r;
|
||||
assert(cctx != NULL);
|
||||
|
||||
r = local_ZSTD_compressStream(src, srcSize, dst, dstCapacity, payload);
|
||||
|
||||
ZSTD_freeCCtx(cctx);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static size_t
|
||||
local_ZSTD_compress_generic_end(const void* src, size_t srcSize,
|
||||
void* dst, size_t dstCapacity,
|
||||
void* buff2)
|
||||
void* payload)
|
||||
{
|
||||
(void)buff2;
|
||||
(void)payload;
|
||||
return ZSTD_compress2(g_cstream, dst, dstCapacity, src, srcSize);
|
||||
}
|
||||
|
||||
static size_t
|
||||
local_ZSTD_compress_generic_continue(const void* src, size_t srcSize,
|
||||
void* dst, size_t dstCapacity,
|
||||
void* buff2)
|
||||
void* payload)
|
||||
{
|
||||
ZSTD_outBuffer buffOut;
|
||||
ZSTD_inBuffer buffIn;
|
||||
(void)buff2;
|
||||
(void)payload;
|
||||
buffOut.dst = dst;
|
||||
buffOut.size = dstCapacity;
|
||||
buffOut.pos = 0;
|
||||
@ -192,9 +208,9 @@ local_ZSTD_compress_generic_continue(const void* src, size_t srcSize,
|
||||
static size_t
|
||||
local_ZSTD_compress_generic_T2_end(const void* src, size_t srcSize,
|
||||
void* dst, size_t dstCapacity,
|
||||
void* buff2)
|
||||
void* payload)
|
||||
{
|
||||
(void)buff2;
|
||||
(void)payload;
|
||||
ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_nbWorkers, 2);
|
||||
return ZSTD_compress2(g_cstream, dst, dstCapacity, src, srcSize);
|
||||
}
|
||||
@ -202,11 +218,11 @@ local_ZSTD_compress_generic_T2_end(const void* src, size_t srcSize,
|
||||
static size_t
|
||||
local_ZSTD_compress_generic_T2_continue(const void* src, size_t srcSize,
|
||||
void* dst, size_t dstCapacity,
|
||||
void* buff2)
|
||||
void* payload)
|
||||
{
|
||||
ZSTD_outBuffer buffOut;
|
||||
ZSTD_inBuffer buffIn;
|
||||
(void)buff2;
|
||||
(void)payload;
|
||||
ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_nbWorkers, 2);
|
||||
buffOut.dst = dst;
|
||||
buffOut.size = dstCapacity;
|
||||
@ -242,27 +258,28 @@ local_ZSTD_decompressStream(const void* src, size_t srcSize,
|
||||
#ifndef ZSTD_DLL_IMPORT
|
||||
static size_t local_ZSTD_compressContinue(const void* src, size_t srcSize,
|
||||
void* dst, size_t dstCapacity,
|
||||
void* buff2)
|
||||
void* payload)
|
||||
{
|
||||
ZSTD_parameters p;
|
||||
ZSTD_frameParameters f = { 1 /* contentSizeHeader*/, 0, 0 };
|
||||
p.fParams = f;
|
||||
p.cParams = *(ZSTD_compressionParameters*)buff2;
|
||||
p.cParams = *(ZSTD_compressionParameters*)payload;
|
||||
ZSTD_compressBegin_advanced(g_zcc, NULL, 0, p, srcSize);
|
||||
return ZSTD_compressEnd(g_zcc, dst, dstCapacity, src, srcSize);
|
||||
}
|
||||
|
||||
#define FIRST_BLOCK_SIZE 8
|
||||
static size_t local_ZSTD_compressContinue_extDict(const void* src, size_t srcSize,
|
||||
void* dst, size_t dstCapacity,
|
||||
void* buff2)
|
||||
static size_t
|
||||
local_ZSTD_compressContinue_extDict(const void* src, size_t srcSize,
|
||||
void* dst, size_t dstCapacity,
|
||||
void* payload)
|
||||
{
|
||||
BYTE firstBlockBuf[FIRST_BLOCK_SIZE];
|
||||
|
||||
ZSTD_parameters p;
|
||||
ZSTD_frameParameters f = { 1, 0, 0 };
|
||||
ZSTD_frameParameters const f = { 1, 0, 0 };
|
||||
p.fParams = f;
|
||||
p.cParams = *(ZSTD_compressionParameters*)buff2;
|
||||
p.cParams = *(ZSTD_compressionParameters*)payload;
|
||||
ZSTD_compressBegin_advanced(g_zcc, NULL, 0, p, srcSize);
|
||||
memcpy(firstBlockBuf, src, FIRST_BLOCK_SIZE);
|
||||
|
||||
@ -318,7 +335,7 @@ static int benchMem(unsigned benchNb,
|
||||
size_t dstBuffSize = ZSTD_compressBound(srcSize);
|
||||
BYTE* dstBuff;
|
||||
void* dstBuff2;
|
||||
void* buff2;
|
||||
void* payload;
|
||||
const char* benchName;
|
||||
BMK_benchFn_t benchFunction;
|
||||
int errorcode = 0;
|
||||
@ -355,6 +372,9 @@ static int benchMem(unsigned benchNb,
|
||||
case 42:
|
||||
benchFunction = local_ZSTD_decompressStream; benchName = "decompressStream";
|
||||
break;
|
||||
case 43:
|
||||
benchFunction = local_ZSTD_compressStream_freshCCtx; benchName = "compressStream_freshCCtx";
|
||||
break;
|
||||
case 51:
|
||||
benchFunction = local_ZSTD_compress_generic_continue; benchName = "compress_generic, continue";
|
||||
break;
|
||||
@ -379,7 +399,7 @@ static int benchMem(unsigned benchNb,
|
||||
free(dstBuff); free(dstBuff2);
|
||||
return 12;
|
||||
}
|
||||
buff2 = dstBuff2;
|
||||
payload = dstBuff2;
|
||||
if (g_zcc==NULL) g_zcc = ZSTD_createCCtx();
|
||||
if (g_zdc==NULL) g_zdc = ZSTD_createDCtx();
|
||||
if (g_cstream==NULL) g_cstream = ZSTD_createCStream();
|
||||
@ -412,62 +432,66 @@ static int benchMem(unsigned benchNb,
|
||||
switch(benchNb)
|
||||
{
|
||||
case 1:
|
||||
buff2 = &cparams;
|
||||
payload = &cparams;
|
||||
break;
|
||||
case 2:
|
||||
g_cSize = ZSTD_compress(buff2, dstBuffSize, src, srcSize, cLevel);
|
||||
g_cSize = ZSTD_compress(dstBuff2, dstBuffSize, src, srcSize, cLevel);
|
||||
break;
|
||||
#ifndef ZSTD_DLL_IMPORT
|
||||
case 11:
|
||||
buff2 = &cparams;
|
||||
payload = &cparams;
|
||||
break;
|
||||
case 12:
|
||||
buff2 = &cparams;
|
||||
payload = &cparams;
|
||||
break;
|
||||
case 13 :
|
||||
g_cSize = ZSTD_compress(buff2, dstBuffSize, src, srcSize, cLevel);
|
||||
g_cSize = ZSTD_compress(dstBuff2, dstBuffSize, src, srcSize, cLevel);
|
||||
break;
|
||||
case 31: /* ZSTD_decodeLiteralsBlock */
|
||||
{ blockProperties_t bp;
|
||||
ZSTD_frameHeader zfp;
|
||||
size_t frameHeaderSize, skippedSize;
|
||||
case 31: /* ZSTD_decodeLiteralsBlock : starts literals block in dstBuff2 */
|
||||
{ size_t frameHeaderSize;
|
||||
g_cSize = ZSTD_compress(dstBuff, dstBuffSize, src, srcSize, cLevel);
|
||||
frameHeaderSize = ZSTD_getFrameHeader(&zfp, dstBuff, ZSTD_FRAMEHEADERSIZE_MIN);
|
||||
if (frameHeaderSize==0) frameHeaderSize = ZSTD_FRAMEHEADERSIZE_MIN;
|
||||
ZSTD_getcBlockSize(dstBuff+frameHeaderSize, dstBuffSize, &bp); /* Get 1st block type */
|
||||
if (bp.blockType != bt_compressed) {
|
||||
DISPLAY("ZSTD_decodeLiteralsBlock : impossible to test on this sample (not compressible)\n");
|
||||
goto _cleanOut;
|
||||
frameHeaderSize = ZSTD_frameHeaderSize(dstBuff, ZSTD_FRAMEHEADERSIZE_PREFIX);
|
||||
CONTROL(!ZSTD_isError(frameHeaderSize));
|
||||
/* check block is compressible, hence contains a literals section */
|
||||
{ blockProperties_t bp;
|
||||
ZSTD_getcBlockSize(dstBuff+frameHeaderSize, dstBuffSize, &bp); /* Get 1st block type */
|
||||
if (bp.blockType != bt_compressed) {
|
||||
DISPLAY("ZSTD_decodeLiteralsBlock : impossible to test on this sample (not compressible)\n");
|
||||
goto _cleanOut;
|
||||
} }
|
||||
{ size_t const skippedSize = frameHeaderSize + ZSTD_blockHeaderSize;
|
||||
memcpy(dstBuff2, dstBuff+skippedSize, g_cSize-skippedSize);
|
||||
}
|
||||
skippedSize = frameHeaderSize + ZSTD_blockHeaderSize;
|
||||
memcpy(buff2, dstBuff+skippedSize, g_cSize-skippedSize);
|
||||
srcSize = srcSize > 128 KB ? 128 KB : srcSize; /* speed relative to block */
|
||||
ZSTD_decompressBegin(g_zdc);
|
||||
break;
|
||||
}
|
||||
case 32: /* ZSTD_decodeSeqHeaders */
|
||||
{ blockProperties_t bp;
|
||||
ZSTD_frameHeader zfp;
|
||||
const BYTE* ip = dstBuff;
|
||||
const BYTE* iend;
|
||||
size_t frameHeaderSize, cBlockSize;
|
||||
ZSTD_compress(dstBuff, dstBuffSize, src, srcSize, cLevel); /* it would be better to use direct block compression here */
|
||||
g_cSize = ZSTD_compress(dstBuff, dstBuffSize, src, srcSize, cLevel);
|
||||
frameHeaderSize = ZSTD_getFrameHeader(&zfp, dstBuff, ZSTD_FRAMEHEADERSIZE_MIN);
|
||||
if (frameHeaderSize==0) frameHeaderSize = ZSTD_FRAMEHEADERSIZE_MIN;
|
||||
ip += frameHeaderSize; /* Skip frame Header */
|
||||
cBlockSize = ZSTD_getcBlockSize(ip, dstBuffSize, &bp); /* Get 1st block type */
|
||||
if (bp.blockType != bt_compressed) {
|
||||
DISPLAY("ZSTD_decodeSeqHeaders : impossible to test on this sample (not compressible)\n");
|
||||
goto _cleanOut;
|
||||
{ size_t const cSize = ZSTD_compress(dstBuff, dstBuffSize, src, srcSize, cLevel);
|
||||
CONTROL(cSize > ZSTD_FRAMEHEADERSIZE_PREFIX);
|
||||
}
|
||||
iend = ip + ZSTD_blockHeaderSize + cBlockSize; /* End of first block */
|
||||
ip += ZSTD_blockHeaderSize; /* skip block header */
|
||||
/* Skip frame Header */
|
||||
{ size_t const frameHeaderSize = ZSTD_frameHeaderSize(dstBuff, ZSTD_FRAMEHEADERSIZE_PREFIX);
|
||||
CONTROL(!ZSTD_isError(frameHeaderSize));
|
||||
ip += frameHeaderSize;
|
||||
}
|
||||
/* Find end of block */
|
||||
{ size_t const cBlockSize = ZSTD_getcBlockSize(ip, dstBuffSize, &bp); /* Get 1st block type */
|
||||
if (bp.blockType != bt_compressed) {
|
||||
DISPLAY("ZSTD_decodeSeqHeaders : impossible to test on this sample (not compressible)\n");
|
||||
goto _cleanOut;
|
||||
}
|
||||
iend = ip + ZSTD_blockHeaderSize + cBlockSize; /* End of first block */
|
||||
}
|
||||
ip += ZSTD_blockHeaderSize; /* skip block header */
|
||||
ZSTD_decompressBegin(g_zdc);
|
||||
assert(iend > ip);
|
||||
CONTROL(iend > ip);
|
||||
ip += ZSTD_decodeLiteralsBlock(g_zdc, ip, (size_t)(iend-ip)); /* skip literal segment */
|
||||
g_cSize = (size_t)(iend-ip);
|
||||
memcpy(buff2, ip, g_cSize); /* copy rest of block (it starts by SeqHeader) */
|
||||
memcpy(dstBuff2, ip, g_cSize); /* copy rest of block (it starts by SeqHeader) */
|
||||
srcSize = srcSize > 128 KB ? 128 KB : srcSize; /* speed relative to block */
|
||||
break;
|
||||
}
|
||||
@ -476,10 +500,13 @@ static int benchMem(unsigned benchNb,
|
||||
goto _cleanOut;
|
||||
#endif
|
||||
case 41 :
|
||||
buff2 = &cparams;
|
||||
payload = &cparams;
|
||||
break;
|
||||
case 42 :
|
||||
g_cSize = ZSTD_compress(buff2, dstBuffSize, src, srcSize, cLevel);
|
||||
g_cSize = ZSTD_compress(payload, dstBuffSize, src, srcSize, cLevel);
|
||||
break;
|
||||
case 43 :
|
||||
payload = &cparams;
|
||||
break;
|
||||
|
||||
/* test functions */
|
||||
@ -498,10 +525,10 @@ static int benchMem(unsigned benchNb,
|
||||
BMK_runTime_t bestResult;
|
||||
bestResult.sumOfReturn = 0;
|
||||
bestResult.nanoSecPerRun = (double)TIMELOOP_NANOSEC * 2000000000; /* hopefully large enough : must be larger than any potential measurement */
|
||||
assert(tfs != NULL);
|
||||
CONTROL(tfs != NULL);
|
||||
|
||||
bp.benchFn = benchFunction;
|
||||
bp.benchPayload = buff2;
|
||||
bp.benchPayload = payload;
|
||||
bp.initFn = NULL;
|
||||
bp.initPayload = NULL;
|
||||
bp.errorFn = ZSTD_isError;
|
||||
@ -549,21 +576,19 @@ _cleanOut:
|
||||
|
||||
|
||||
static int benchSample(U32 benchNb,
|
||||
size_t benchedSize, double compressibility,
|
||||
int cLevel, ZSTD_compressionParameters cparams)
|
||||
{
|
||||
size_t const benchedSize = g_sampleSize;
|
||||
const char* const name = "Sample 10MiB";
|
||||
|
||||
/* Allocation */
|
||||
void* const origBuff = malloc(benchedSize);
|
||||
if (!origBuff) { DISPLAY("\nError: not enough memory!\n"); return 12; }
|
||||
|
||||
/* Fill buffer */
|
||||
RDG_genBuffer(origBuff, benchedSize, g_compressibility, 0.0, 0);
|
||||
RDG_genBuffer(origBuff, benchedSize, compressibility, 0.0, 0);
|
||||
|
||||
/* bench */
|
||||
DISPLAY("\r%70s\r", "");
|
||||
DISPLAY(" %s : \n", name);
|
||||
DISPLAY(" Sample %u bytes : \n", (unsigned)benchedSize);
|
||||
if (benchNb) {
|
||||
benchMem(benchNb, origBuff, benchedSize, cLevel, cparams);
|
||||
} else { /* 0 == run all tests */
|
||||
@ -696,10 +721,11 @@ static int usage_advanced(const char* exename)
|
||||
usage(exename);
|
||||
DISPLAY( "\nAdvanced options :\n");
|
||||
DISPLAY( " -b# : test only function # \n");
|
||||
DISPLAY( " -i# : iteration loops [1-9](default : %i)\n", NBLOOPS);
|
||||
DISPLAY( " -P# : sample compressibility (default : %.1f%%)\n", COMPRESSIBILITY_DEFAULT * 100);
|
||||
DISPLAY( " -l# : benchmark functions at that compression level (default : %i)\n", DEFAULT_CLEVEL);
|
||||
DISPLAY( " --zstd : custom parameter selection. Format same as zstdcli \n");
|
||||
DISPLAY( " -P# : sample compressibility (default : %.1f%%)\n", COMPRESSIBILITY_DEFAULT * 100);
|
||||
DISPLAY( " -B# : sample size (default : %u)\n", (unsigned)kSampleSizeDefault);
|
||||
DISPLAY( " -i# : iteration loops [1-9](default : %i)\n", NBLOOPS);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -718,13 +744,15 @@ int main(int argc, const char** argv)
|
||||
U32 benchNb = 0, main_pause = 0;
|
||||
int cLevel = DEFAULT_CLEVEL;
|
||||
ZSTD_compressionParameters cparams = ZSTD_getCParams(cLevel, 0, 0);
|
||||
size_t sampleSize = kSampleSizeDefault;
|
||||
double compressibility = COMPRESSIBILITY_DEFAULT;
|
||||
|
||||
DISPLAY(WELCOME_MESSAGE);
|
||||
if (argc<1) return badusage(exename);
|
||||
|
||||
for (argNb=1; argNb<argc; argNb++) {
|
||||
const char* argument = argv[argNb];
|
||||
assert(argument != NULL);
|
||||
CONTROL(argument != NULL);
|
||||
|
||||
if (longCommandWArg(&argument, "--zstd=")) {
|
||||
for ( ; ;) {
|
||||
@ -767,21 +795,29 @@ int main(int argc, const char** argv)
|
||||
benchNb = readU32FromChar(&argument);
|
||||
break;
|
||||
|
||||
/* Modify Nb Iterations */
|
||||
case 'i':
|
||||
/* Select compression level to use */
|
||||
case 'l':
|
||||
argument++;
|
||||
g_nbIterations = readU32FromChar(&argument);
|
||||
cLevel = (int)readU32FromChar(&argument);
|
||||
cparams = ZSTD_getCParams(cLevel, 0, 0);
|
||||
break;
|
||||
|
||||
/* Select compressibility of synthetic sample */
|
||||
case 'P':
|
||||
argument++;
|
||||
g_compressibility = (double)readU32FromChar(&argument) / 100.;
|
||||
compressibility = (double)readU32FromChar(&argument) / 100.;
|
||||
break;
|
||||
case 'l':
|
||||
|
||||
/* Select size of synthetic sample */
|
||||
case 'B':
|
||||
argument++;
|
||||
cLevel = (int)readU32FromChar(&argument);
|
||||
cparams = ZSTD_getCParams(cLevel, 0, 0);
|
||||
sampleSize = (size_t)readU32FromChar(&argument);
|
||||
break;
|
||||
|
||||
/* Modify Nb Iterations */
|
||||
case 'i':
|
||||
argument++;
|
||||
g_nbIterations = readU32FromChar(&argument);
|
||||
break;
|
||||
|
||||
/* Unknown command */
|
||||
@ -798,7 +834,7 @@ int main(int argc, const char** argv)
|
||||
|
||||
|
||||
if (filenamesStart==0) /* no input file */
|
||||
result = benchSample(benchNb, cLevel, cparams);
|
||||
result = benchSample(benchNb, sampleSize, compressibility, cLevel, cparams);
|
||||
else
|
||||
result = benchFiles(benchNb, argv+filenamesStart, argc-filenamesStart, cLevel, cparams);
|
||||
|
||||
|
@ -26,8 +26,8 @@ ZSTDDIR = ../../lib
|
||||
PRGDIR = ../../programs
|
||||
|
||||
FUZZ_CPPFLAGS := -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
|
||||
-I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR) \
|
||||
-DZSTD_MULTITHREAD $(CPPFLAGS)
|
||||
-I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(ZSTDDIR)/legacy \
|
||||
-I$(PRGDIR) -DZSTD_MULTITHREAD -DZSTD_LEGACY_SUPPORT=1 $(CPPFLAGS)
|
||||
FUZZ_EXTRA_FLAGS := -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
|
||||
-Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
|
||||
-Wstrict-prototypes -Wundef \
|
||||
@ -47,12 +47,14 @@ ZSTDCOMMON_SRC := $(ZSTDDIR)/common/*.c
|
||||
ZSTDCOMP_SRC := $(ZSTDDIR)/compress/*.c
|
||||
ZSTDDECOMP_SRC := $(ZSTDDIR)/decompress/*.c
|
||||
ZSTDDICT_SRC := $(ZSTDDIR)/dictBuilder/*.c
|
||||
ZSTDLEGACY_SRC := $(ZSTDDIR)/legacy/*.c
|
||||
FUZZ_SRC := \
|
||||
$(FUZZ_SRC) \
|
||||
$(ZSTDDECOMP_SRC) \
|
||||
$(ZSTDCOMMON_SRC) \
|
||||
$(ZSTDCOMP_SRC) \
|
||||
$(ZSTDDICT_SRC)
|
||||
$(ZSTDDICT_SRC) \
|
||||
$(ZSTDLEGACY_SRC)
|
||||
|
||||
FUZZ_OBJ := $(patsubst %.c,%.o, $(wildcard $(FUZZ_SRC)))
|
||||
|
||||
@ -69,7 +71,9 @@ FUZZ_TARGETS := \
|
||||
stream_decompress \
|
||||
block_decompress \
|
||||
dictionary_round_trip \
|
||||
dictionary_decompress
|
||||
dictionary_decompress \
|
||||
zstd_frame_info \
|
||||
simple_compress
|
||||
|
||||
all: $(FUZZ_TARGETS)
|
||||
|
||||
@ -100,6 +104,12 @@ dictionary_round_trip: $(FUZZ_HEADERS) $(FUZZ_OBJ) dictionary_round_trip.o
|
||||
dictionary_decompress: $(FUZZ_HEADERS) $(FUZZ_OBJ) dictionary_decompress.o
|
||||
$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_OBJ) dictionary_decompress.o $(LIB_FUZZING_ENGINE) -o $@
|
||||
|
||||
simple_compress: $(FUZZ_HEADERS) $(FUZZ_OBJ) simple_compress.o
|
||||
$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_OBJ) simple_compress.o $(LIB_FUZZING_ENGINE) -o $@
|
||||
|
||||
zstd_frame_info: $(FUZZ_HEADERS) $(FUZZ_OBJ) zstd_frame_info.o
|
||||
$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_OBJ) zstd_frame_info.o $(LIB_FUZZING_ENGINE) -o $@
|
||||
|
||||
libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c regression_driver.o
|
||||
$(AR) $(FUZZ_ARFLAGS) $@ regression_driver.o
|
||||
|
||||
@ -122,6 +132,9 @@ corpora/%: corpora/%_seed_corpus.zip
|
||||
.PHONY: corpora
|
||||
corpora: $(patsubst %,corpora/%,$(FUZZ_TARGETS))
|
||||
|
||||
.PHONY: seedcorpora
|
||||
seedcorpora: $(patsubst %,corpora/%_seed_corpus.zip,$(FUZZ_TARGETS))
|
||||
|
||||
regressiontest: corpora
|
||||
CC="$(CC)" CXX="$(CXX)" CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(LDFLAGS)" $(PYTHON) ./fuzz.py build all
|
||||
$(PYTHON) ./fuzz.py regression all
|
||||
@ -130,7 +143,9 @@ clean:
|
||||
@$(MAKE) -C $(ZSTDDIR) clean
|
||||
@$(RM) *.a *.o
|
||||
@$(RM) simple_round_trip stream_round_trip simple_decompress \
|
||||
stream_decompress block_decompress block_round_trip
|
||||
stream_decompress block_decompress block_round_trip \
|
||||
simple_compress dictionary_round_trip dictionary_decompress \
|
||||
zstd_frame_info
|
||||
|
||||
cleanall:
|
||||
@$(RM) -r Fuzzer
|
||||
|
@ -1,2 +0,0 @@
|
||||
[libfuzzer]
|
||||
max_len = 8192
|
@ -20,43 +20,42 @@
|
||||
#include "zstd_helpers.h"
|
||||
|
||||
static ZSTD_DCtx *dctx = NULL;
|
||||
static void* rBuf = NULL;
|
||||
static size_t bufSize = 0;
|
||||
|
||||
int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
|
||||
{
|
||||
FUZZ_dict_t dict;
|
||||
size_t neededBufSize;
|
||||
|
||||
uint32_t seed = FUZZ_seed(&src, &size);
|
||||
neededBufSize = MAX(20 * size, (size_t)256 << 10);
|
||||
FUZZ_dict_t dict;
|
||||
ZSTD_DDict* ddict = NULL;
|
||||
int i;
|
||||
|
||||
/* Allocate all buffers and contexts if not already allocated */
|
||||
if (neededBufSize > bufSize) {
|
||||
free(rBuf);
|
||||
rBuf = malloc(neededBufSize);
|
||||
bufSize = neededBufSize;
|
||||
FUZZ_ASSERT(rBuf);
|
||||
}
|
||||
if (!dctx) {
|
||||
dctx = ZSTD_createDCtx();
|
||||
FUZZ_ASSERT(dctx);
|
||||
}
|
||||
dict = FUZZ_train(src, size, &seed);
|
||||
if (FUZZ_rand32(&seed, 0, 1) == 0) {
|
||||
ZSTD_decompress_usingDict(dctx,
|
||||
rBuf, neededBufSize,
|
||||
src, size,
|
||||
dict.buff, dict.size);
|
||||
ddict = ZSTD_createDDict(dict.buff, dict.size);
|
||||
FUZZ_ASSERT(ddict);
|
||||
} else {
|
||||
FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary_advanced(
|
||||
dctx, dict.buff, dict.size,
|
||||
(ZSTD_dictLoadMethod_e)FUZZ_rand32(&seed, 0, 1),
|
||||
(ZSTD_dictContentType_e)FUZZ_rand32(&seed, 0, 2)));
|
||||
ZSTD_decompressDCtx(dctx, rBuf, neededBufSize, src, size);
|
||||
}
|
||||
|
||||
/* Run it 10 times over 10 output sizes. Reuse the context and dict. */
|
||||
for (i = 0; i < 10; ++i) {
|
||||
size_t const bufSize = FUZZ_rand32(&seed, 0, 2 * size);
|
||||
void* rBuf = malloc(bufSize);
|
||||
FUZZ_ASSERT(rBuf);
|
||||
if (ddict) {
|
||||
ZSTD_decompress_usingDDict(dctx, rBuf, bufSize, src, size, ddict);
|
||||
} else {
|
||||
ZSTD_decompressDCtx(dctx, rBuf, bufSize, src, size);
|
||||
}
|
||||
free(rBuf);
|
||||
}
|
||||
free(dict.buff);
|
||||
ZSTD_freeDDict(ddict);
|
||||
#ifndef STATEFUL_FUZZING
|
||||
ZSTD_freeDCtx(dctx); dctx = NULL;
|
||||
#endif
|
||||
|
@ -36,6 +36,8 @@ TARGETS = [
|
||||
'block_decompress',
|
||||
'dictionary_round_trip',
|
||||
'dictionary_decompress',
|
||||
'zstd_frame_info',
|
||||
'simple_compress',
|
||||
]
|
||||
ALL_TARGETS = TARGETS + ['all']
|
||||
FUZZ_RNG_SEED_SIZE = 4
|
||||
|
47
tests/fuzz/simple_compress.c
Normal file
47
tests/fuzz/simple_compress.c
Normal file
@ -0,0 +1,47 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
*/
|
||||
|
||||
/**
|
||||
* This fuzz target attempts to comprss the fuzzed data with the simple
|
||||
* compression function with an output buffer that may be too small to
|
||||
* ensure that the compressor never crashes.
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "fuzz_helpers.h"
|
||||
#include "zstd.h"
|
||||
|
||||
static ZSTD_CCtx *cctx = NULL;
|
||||
|
||||
int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
|
||||
{
|
||||
uint32_t seed = FUZZ_seed(&src, &size);
|
||||
size_t const maxSize = ZSTD_compressBound(size);
|
||||
int i;
|
||||
if (!cctx) {
|
||||
cctx = ZSTD_createCCtx();
|
||||
FUZZ_ASSERT(cctx);
|
||||
}
|
||||
/* Run it 10 times over 10 output sizes. Reuse the context. */
|
||||
for (i = 0; i < 10; ++i) {
|
||||
int const level = (int)FUZZ_rand32(&seed, 0, 19 + 3) - 3; /* [-3, 19] */
|
||||
size_t const bufSize = FUZZ_rand32(&seed, 0, maxSize);
|
||||
void* rBuf = malloc(bufSize);
|
||||
FUZZ_ASSERT(rBuf);
|
||||
ZSTD_compressCCtx(cctx, rBuf, bufSize, src, size, level);
|
||||
free(rBuf);
|
||||
}
|
||||
|
||||
#ifndef STATEFUL_FUZZING
|
||||
ZSTD_freeCCtx(cctx); cctx = NULL;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
@ -19,28 +19,24 @@
|
||||
#include "zstd.h"
|
||||
|
||||
static ZSTD_DCtx *dctx = NULL;
|
||||
static void* rBuf = NULL;
|
||||
static size_t bufSize = 0;
|
||||
|
||||
int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
|
||||
{
|
||||
size_t neededBufSize;
|
||||
|
||||
FUZZ_seed(&src, &size);
|
||||
neededBufSize = MAX(20 * size, (size_t)256 << 10);
|
||||
|
||||
/* Allocate all buffers and contexts if not already allocated */
|
||||
if (neededBufSize > bufSize) {
|
||||
free(rBuf);
|
||||
rBuf = malloc(neededBufSize);
|
||||
bufSize = neededBufSize;
|
||||
FUZZ_ASSERT(rBuf);
|
||||
}
|
||||
uint32_t seed = FUZZ_seed(&src, &size);
|
||||
int i;
|
||||
if (!dctx) {
|
||||
dctx = ZSTD_createDCtx();
|
||||
FUZZ_ASSERT(dctx);
|
||||
}
|
||||
ZSTD_decompressDCtx(dctx, rBuf, neededBufSize, src, size);
|
||||
/* Run it 10 times over 10 output sizes. Reuse the context. */
|
||||
for (i = 0; i < 10; ++i) {
|
||||
size_t const bufSize = FUZZ_rand32(&seed, 0, 2 * size);
|
||||
void* rBuf = malloc(bufSize);
|
||||
FUZZ_ASSERT(rBuf);
|
||||
ZSTD_decompressDCtx(dctx, rBuf, bufSize, src, size);
|
||||
free(rBuf);
|
||||
}
|
||||
|
||||
#ifndef STATEFUL_FUZZING
|
||||
ZSTD_freeDCtx(dctx); dctx = NULL;
|
||||
|
43
tests/fuzz/zstd_frame_info.c
Normal file
43
tests/fuzz/zstd_frame_info.c
Normal file
@ -0,0 +1,43 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
*/
|
||||
|
||||
/**
|
||||
* This fuzz target fuzzes all of the helper functions that consume compressed
|
||||
* input.
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "fuzz_helpers.h"
|
||||
#include "zstd_helpers.h"
|
||||
|
||||
int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
|
||||
{
|
||||
ZSTD_frameHeader zfh;
|
||||
/* Consume the seed to be compatible with the corpora of other decompression
|
||||
* fuzzers.
|
||||
*/
|
||||
FUZZ_seed(&src, &size);
|
||||
/* You can fuzz any helper functions here that are fast, and take zstd
|
||||
* compressed data as input. E.g. don't expect the input to be a dictionary,
|
||||
* so don't fuzz ZSTD_getDictID_fromDict().
|
||||
*/
|
||||
ZSTD_getFrameContentSize(src, size);
|
||||
ZSTD_getDecompressedSize(src, size);
|
||||
ZSTD_findFrameCompressedSize(src, size);
|
||||
ZSTD_getDictID_fromFrame(src, size);
|
||||
ZSTD_findDecompressedSize(src, size);
|
||||
ZSTD_decompressBound(src, size);
|
||||
ZSTD_frameHeaderSize(src, size);
|
||||
ZSTD_isFrame(src, size);
|
||||
ZSTD_getFrameHeader(&zfh, src, size);
|
||||
ZSTD_getFrameHeader_advanced(&zfh, src, size, ZSTD_f_zstd1);
|
||||
return 0;
|
||||
}
|
@ -62,10 +62,12 @@ static U32 g_displayLevel = 2;
|
||||
static const U64 g_refreshRate = SEC_TO_MICRO / 6;
|
||||
static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
|
||||
|
||||
#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
|
||||
if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (g_displayLevel>=4)) \
|
||||
{ g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
|
||||
if (g_displayLevel>=4) fflush(stderr); } }
|
||||
#define DISPLAYUPDATE(l, ...) \
|
||||
if (g_displayLevel>=l) { \
|
||||
if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (g_displayLevel>=4)) \
|
||||
{ g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
|
||||
if (g_displayLevel>=4) fflush(stderr); } \
|
||||
}
|
||||
|
||||
|
||||
/*-*******************************************************
|
||||
@ -73,7 +75,7 @@ static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
|
||||
*********************************************************/
|
||||
#undef MIN
|
||||
#undef MAX
|
||||
/* Declaring the function is it isn't unused */
|
||||
/* Declaring the function, to avoid -Wmissing-prototype */
|
||||
void FUZ_bug976(void);
|
||||
void FUZ_bug976(void)
|
||||
{ /* these constants shall not depend on MIN() macro */
|
||||
@ -247,7 +249,7 @@ static int FUZ_mallocTests_internal(unsigned seed, double compressibility, unsig
|
||||
|
||||
/* advanced MT streaming API test */
|
||||
if (part <= 4)
|
||||
{ unsigned nbThreads;
|
||||
{ int nbThreads;
|
||||
for (nbThreads=1; nbThreads<=4; nbThreads++) {
|
||||
int compressionLevel;
|
||||
for (compressionLevel=1; compressionLevel<=6; compressionLevel++) {
|
||||
@ -261,7 +263,7 @@ static int FUZ_mallocTests_internal(unsigned seed, double compressibility, unsig
|
||||
CHECK_Z( ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_continue) );
|
||||
while ( ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end) ) {}
|
||||
ZSTD_freeCCtx(cctx);
|
||||
DISPLAYLEVEL(3, "compress_generic,-T%u,continue level %i : ",
|
||||
DISPLAYLEVEL(3, "compress_generic,-T%i,continue level %i : ",
|
||||
nbThreads, compressionLevel);
|
||||
FUZ_displayMallocStats(malcount);
|
||||
} } }
|
||||
@ -768,13 +770,11 @@ static int basicUnitTests(U32 seed, double compressibility)
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : init CCtx for small level %u (should work again) : ", testNb++, 1);
|
||||
{ size_t const r = ZSTD_compressBegin(staticCCtx, 1);
|
||||
if (ZSTD_isError(r)) goto _output_error; }
|
||||
CHECK( ZSTD_compressBegin(staticCCtx, 1) );
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : init CStream for small level %u : ", testNb++, 1);
|
||||
{ size_t const r = ZSTD_initCStream(staticCCtx, 1);
|
||||
if (ZSTD_isError(r)) goto _output_error; }
|
||||
CHECK( ZSTD_initCStream(staticCCtx, 1) );
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : init CStream with dictionary (should fail) : ", testNb++);
|
||||
@ -1059,7 +1059,7 @@ static int basicUnitTests(U32 seed, double compressibility)
|
||||
/* Dictionary and dictBuilder tests */
|
||||
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
|
||||
size_t const dictBufferCapacity = 16 KB;
|
||||
void* dictBuffer = malloc(dictBufferCapacity);
|
||||
void* const dictBuffer = malloc(dictBufferCapacity);
|
||||
size_t const totalSampleSize = 1 MB;
|
||||
size_t const sampleUnitSize = 8 KB;
|
||||
U32 const nbSamples = (U32)(totalSampleSize / sampleUnitSize);
|
||||
@ -1104,6 +1104,22 @@ static int basicUnitTests(U32 seed, double compressibility)
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK, created dictionary of size %u \n", (unsigned)dictSize);
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : COVER dictBuilder with shrinkDict: ", testNb++);
|
||||
{ U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
|
||||
{ ZDICT_cover_params_t coverParams;
|
||||
memset(&coverParams, 0, sizeof(coverParams));
|
||||
coverParams.steps = 8;
|
||||
coverParams.nbThreads = 4;
|
||||
coverParams.shrinkDict = 1;
|
||||
coverParams.shrinkDictMaxRegression = 1;
|
||||
dictSize = ZDICT_optimizeTrainFromBuffer_cover(
|
||||
dictBuffer, dictBufferCapacity,
|
||||
CNBuffer, samplesSizes, nbSamples/8, /* less samples for faster tests */
|
||||
&coverParams);
|
||||
if (ZDICT_isError(dictSize)) goto _output_error;
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK, created dictionary of size %u \n", (unsigned)dictSize);
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : Multithreaded FASTCOVER dictBuilder : ", testNb++);
|
||||
{ U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
|
||||
{ ZDICT_fastCover_params_t fastCoverParams;
|
||||
@ -1118,6 +1134,22 @@ static int basicUnitTests(U32 seed, double compressibility)
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK, created dictionary of size %u \n", (unsigned)dictSize);
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : FASTCOVER dictBuilder with shrinkDict: ", testNb++);
|
||||
{ U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
|
||||
{ ZDICT_fastCover_params_t fastCoverParams;
|
||||
memset(&fastCoverParams, 0, sizeof(fastCoverParams));
|
||||
fastCoverParams.steps = 8;
|
||||
fastCoverParams.nbThreads = 4;
|
||||
fastCoverParams.shrinkDict = 1;
|
||||
fastCoverParams.shrinkDictMaxRegression = 1;
|
||||
dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(
|
||||
dictBuffer, dictBufferCapacity,
|
||||
CNBuffer, samplesSizes, nbSamples,
|
||||
&fastCoverParams);
|
||||
if (ZDICT_isError(dictSize)) goto _output_error;
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK, created dictionary of size %u \n", (unsigned)dictSize);
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : check dictID : ", testNb++);
|
||||
dictID = ZDICT_getDictID(dictBuffer, dictSize);
|
||||
if (dictID==0) goto _output_error;
|
||||
@ -1164,6 +1196,7 @@ static int basicUnitTests(U32 seed, double compressibility)
|
||||
ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize,
|
||||
ZSTD_dlm_byRef, ZSTD_dct_auto,
|
||||
cParams, ZSTD_defaultCMem);
|
||||
assert(cdict != NULL);
|
||||
DISPLAYLEVEL(3, "(size : %u) : ", (unsigned)ZSTD_sizeof_CDict(cdict));
|
||||
cSize = ZSTD_compress_usingCDict(cctx, compressedBuffer, compressedBufferSize,
|
||||
CNBuffer, CNBuffSize, cdict);
|
||||
@ -1221,8 +1254,11 @@ static int basicUnitTests(U32 seed, double compressibility)
|
||||
{ ZSTD_frameParameters const fParams = { 0 /* frameSize */, 1 /* checksum */, 1 /* noDictID*/ };
|
||||
ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize);
|
||||
ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, cParams, ZSTD_defaultCMem);
|
||||
cSize = ZSTD_compress_usingCDict_advanced(cctx, compressedBuffer, compressedBufferSize,
|
||||
CNBuffer, CNBuffSize, cdict, fParams);
|
||||
assert(cdict != NULL);
|
||||
cSize = ZSTD_compress_usingCDict_advanced(cctx,
|
||||
compressedBuffer, compressedBufferSize,
|
||||
CNBuffer, CNBuffSize,
|
||||
cdict, fParams);
|
||||
ZSTD_freeCDict(cdict);
|
||||
if (ZSTD_isError(cSize)) goto _output_error;
|
||||
}
|
||||
@ -1235,7 +1271,8 @@ static int basicUnitTests(U32 seed, double compressibility)
|
||||
DISPLAYLEVEL(3, "OK (unknown)\n");
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : frame built without dictID should be decompressible : ", testNb++);
|
||||
{ ZSTD_DCtx* const dctx = ZSTD_createDCtx(); assert(dctx != NULL);
|
||||
{ ZSTD_DCtx* const dctx = ZSTD_createDCtx();
|
||||
assert(dctx != NULL);
|
||||
CHECKPLUS(r, ZSTD_decompress_usingDict(dctx,
|
||||
decodedBuffer, CNBuffSize,
|
||||
compressedBuffer, cSize,
|
||||
@ -2459,7 +2496,7 @@ static unsigned readU32FromChar(const char** stringPtr)
|
||||
* If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
|
||||
* @return 0 and doesn't modify *stringPtr otherwise.
|
||||
*/
|
||||
static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
|
||||
static int longCommandWArg(const char** stringPtr, const char* longCommand)
|
||||
{
|
||||
size_t const comSize = strlen(longCommand);
|
||||
int const result = !strncmp(*stringPtr, longCommand, comSize);
|
||||
@ -2519,7 +2556,7 @@ int main(int argc, const char** argv)
|
||||
|
||||
case 'i':
|
||||
argument++; maxDuration = 0;
|
||||
nbTests = readU32FromChar(&argument);
|
||||
nbTests = (int)readU32FromChar(&argument);
|
||||
break;
|
||||
|
||||
case 'T':
|
||||
@ -2539,12 +2576,12 @@ int main(int argc, const char** argv)
|
||||
|
||||
case 't':
|
||||
argument++;
|
||||
testNb = readU32FromChar(&argument);
|
||||
testNb = (int)readU32FromChar(&argument);
|
||||
break;
|
||||
|
||||
case 'P': /* compressibility % */
|
||||
argument++;
|
||||
proba = readU32FromChar(&argument);
|
||||
proba = (int)readU32FromChar(&argument);
|
||||
if (proba>100) proba = 100;
|
||||
break;
|
||||
|
||||
|
@ -609,8 +609,8 @@ compareResultLT(const BMK_benchResult_t result1, const BMK_benchResult_t result2
|
||||
|
||||
static constraint_t relaxTarget(constraint_t target) {
|
||||
target.cMem = (U32)-1;
|
||||
target.cSpeed *= ((double)g_strictness) / 100;
|
||||
target.dSpeed *= ((double)g_strictness) / 100;
|
||||
target.cSpeed = (target.cSpeed * g_strictness) / 100;
|
||||
target.dSpeed = (target.dSpeed * g_strictness) / 100;
|
||||
return target;
|
||||
}
|
||||
|
||||
@ -1737,8 +1737,8 @@ static int allBench(BMK_benchResult_t* resultPtr,
|
||||
|
||||
/* optimistic assumption of benchres */
|
||||
{ BMK_benchResult_t resultMax = benchres;
|
||||
resultMax.cSpeed *= uncertaintyConstantC * VARIANCE;
|
||||
resultMax.dSpeed *= uncertaintyConstantD * VARIANCE;
|
||||
resultMax.cSpeed = (unsigned long long)(resultMax.cSpeed * uncertaintyConstantC * VARIANCE);
|
||||
resultMax.dSpeed = (unsigned long long)(resultMax.dSpeed * uncertaintyConstantD * VARIANCE);
|
||||
|
||||
/* disregard infeasible results in feas mode */
|
||||
/* disregard if resultMax < winner in infeas mode */
|
||||
@ -2429,9 +2429,9 @@ optimizeForSize(const char* const * const fileNamesTable, const size_t nbFiles,
|
||||
}
|
||||
|
||||
g_lvltarget = winner.result;
|
||||
g_lvltarget.cSpeed *= ((double)g_strictness) / 100;
|
||||
g_lvltarget.dSpeed *= ((double)g_strictness) / 100;
|
||||
g_lvltarget.cSize /= ((double)g_strictness) / 100;
|
||||
g_lvltarget.cSpeed = (g_lvltarget.cSpeed * g_strictness) / 100;
|
||||
g_lvltarget.dSpeed = (g_lvltarget.dSpeed * g_strictness) / 100;
|
||||
g_lvltarget.cSize = (g_lvltarget.cSize * 100) / g_strictness;
|
||||
|
||||
target.cSpeed = (U32)g_lvltarget.cSpeed;
|
||||
target.dSpeed = (U32)g_lvltarget.dSpeed;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -90,6 +90,7 @@ static int testWait(size_t numThreads, size_t queueSize) {
|
||||
|
||||
typedef struct {
|
||||
ZSTD_pthread_mutex_t mut;
|
||||
int countdown;
|
||||
int val;
|
||||
int max;
|
||||
ZSTD_pthread_cond_t cond;
|
||||
@ -97,48 +98,56 @@ typedef struct {
|
||||
|
||||
static void waitLongFn(void *opaque) {
|
||||
poolTest_t* const test = (poolTest_t*) opaque;
|
||||
UTIL_sleepMilli(10);
|
||||
ZSTD_pthread_mutex_lock(&test->mut);
|
||||
test->val = test->val + 1;
|
||||
if (test->val == test->max)
|
||||
ZSTD_pthread_cond_signal(&test->cond);
|
||||
test->val++;
|
||||
if (test->val > test->max)
|
||||
test->max = test->val;
|
||||
ZSTD_pthread_mutex_unlock(&test->mut);
|
||||
|
||||
UTIL_sleepMilli(10);
|
||||
|
||||
ZSTD_pthread_mutex_lock(&test->mut);
|
||||
test->val--;
|
||||
test->countdown--;
|
||||
if (test->countdown == 0)
|
||||
ZSTD_pthread_cond_signal(&test->cond);
|
||||
ZSTD_pthread_mutex_unlock(&test->mut);
|
||||
}
|
||||
|
||||
static int testThreadReduction_internal(POOL_ctx* ctx, poolTest_t test)
|
||||
{
|
||||
int const nbWaits = 16;
|
||||
UTIL_time_t startTime;
|
||||
U64 time4threads, time2threads;
|
||||
|
||||
test.countdown = nbWaits;
|
||||
test.val = 0;
|
||||
test.max = nbWaits;
|
||||
test.max = 0;
|
||||
|
||||
startTime = UTIL_getTime();
|
||||
{ int i;
|
||||
for (i=0; i<nbWaits; i++)
|
||||
POOL_add(ctx, &waitLongFn, &test);
|
||||
}
|
||||
ZSTD_pthread_mutex_lock(&test.mut);
|
||||
ZSTD_pthread_cond_wait(&test.cond, &test.mut);
|
||||
ASSERT_EQ(test.val, nbWaits);
|
||||
while (test.countdown > 0)
|
||||
ZSTD_pthread_cond_wait(&test.cond, &test.mut);
|
||||
ASSERT_EQ(test.val, 0);
|
||||
ASSERT_EQ(test.max, 4);
|
||||
ZSTD_pthread_mutex_unlock(&test.mut);
|
||||
time4threads = UTIL_clockSpanNano(startTime);
|
||||
|
||||
ASSERT_EQ( POOL_resize(ctx, 2/*nbThreads*/) , 0 );
|
||||
test.countdown = nbWaits;
|
||||
test.val = 0;
|
||||
startTime = UTIL_getTime();
|
||||
test.max = 0;
|
||||
{ int i;
|
||||
for (i=0; i<nbWaits; i++)
|
||||
POOL_add(ctx, &waitLongFn, &test);
|
||||
}
|
||||
ZSTD_pthread_mutex_lock(&test.mut);
|
||||
ZSTD_pthread_cond_wait(&test.cond, &test.mut);
|
||||
ASSERT_EQ(test.val, nbWaits);
|
||||
while (test.countdown > 0)
|
||||
ZSTD_pthread_cond_wait(&test.cond, &test.mut);
|
||||
ASSERT_EQ(test.val, 0);
|
||||
ASSERT_EQ(test.max, 2);
|
||||
ZSTD_pthread_mutex_unlock(&test.mut);
|
||||
time2threads = UTIL_clockSpanNano(startTime);
|
||||
|
||||
if (time4threads >= time2threads) return 1; /* check 4 threads were effectively faster than 2 */
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -246,7 +255,7 @@ int main(int argc, const char **argv) {
|
||||
printf("FAIL: thread reduction not effective \n");
|
||||
return 1;
|
||||
} else {
|
||||
printf("SUCCESS: thread reduction effective (slower execution) \n");
|
||||
printf("SUCCESS: thread reduction effective \n");
|
||||
}
|
||||
|
||||
if (testAbruptEnding()) {
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1184,6 +1184,58 @@ static int basicUnitTests(U32 seed, double compressibility)
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
/* Small Sequence Section bug */
|
||||
DISPLAYLEVEL(3, "test%3i : decompress blocks with small sequences section : ", testNb++);
|
||||
{ /* This test consists of 3 blocks. Each block has one sequence.
|
||||
The sequence has literal length of 10, match length of 10 and offset of 10.
|
||||
The sequence value and compression mode for the blocks are following:
|
||||
The order of values are ll, ml, of.
|
||||
- First block : (10, 7, 13) (rle, rle, rle)
|
||||
- size of sequences section: 6 bytes (1 byte for nbSeq, 1 byte for encoding mode, 3 bytes for rle, 1 byte bitstream)
|
||||
- Second block : (10, 7, 1) (repeat, repeat, rle)
|
||||
- size of sequences section: 4 bytes (1 byte for nbSeq, 1 byte for encoding mode, 1 bytes for rle, 1 byte bitstream)
|
||||
- Third block : (10, 7, 1) (repeat, repeat, repeat)
|
||||
- size of sequences section: 3 bytes (1 byte for nbSeq, 1 byte for encoding mode, 1 byte bitstream) */
|
||||
|
||||
unsigned char compressed[] = {
|
||||
0x28, 0xb5, 0x2f, 0xfd, 0x24, 0x3c, 0x35, 0x01, 0x00, 0xf0, 0x85, 0x08,
|
||||
0xc2, 0xc4, 0x70, 0xcf, 0xd7, 0xc0, 0x96, 0x7e, 0x4c, 0x6b, 0xa9, 0x8b,
|
||||
0xbc, 0xc5, 0xb6, 0xd9, 0x7f, 0x4c, 0xf1, 0x05, 0xa6, 0x54, 0xef, 0xac,
|
||||
0x69, 0x94, 0x89, 0x1c, 0x03, 0x44, 0x0a, 0x07, 0x00, 0xb4, 0x04, 0x80,
|
||||
0x40, 0x0a, 0xa4
|
||||
};
|
||||
unsigned int compressedSize = 51;
|
||||
unsigned char decompressed[] = {
|
||||
0x85, 0x08, 0xc2, 0xc4, 0x70, 0xcf, 0xd7, 0xc0, 0x96, 0x7e, 0x85, 0x08,
|
||||
0xc2, 0xc4, 0x70, 0xcf, 0xd7, 0xc0, 0x96, 0x7e, 0x4c, 0x6b, 0xa9, 0x8b,
|
||||
0xbc, 0xc5, 0xb6, 0xd9, 0x7f, 0x4c, 0x4c, 0x6b, 0xa9, 0x8b, 0xbc, 0xc5,
|
||||
0xb6, 0xd9, 0x7f, 0x4c, 0xf1, 0x05, 0xa6, 0x54, 0xef, 0xac, 0x69, 0x94,
|
||||
0x89, 0x1c, 0xf1, 0x05, 0xa6, 0x54, 0xef, 0xac, 0x69, 0x94, 0x89, 0x1c
|
||||
};
|
||||
unsigned int decompressedSize = 60;
|
||||
|
||||
ZSTD_DStream* const zds = ZSTD_createDStream();
|
||||
if (zds==NULL) goto _output_error;
|
||||
|
||||
CHECK_Z( ZSTD_initDStream(zds) );
|
||||
inBuff.src = compressed;
|
||||
inBuff.size = compressedSize;
|
||||
inBuff.pos = 0;
|
||||
outBuff.dst = decodedBuffer;
|
||||
outBuff.size = CNBufferSize;
|
||||
outBuff.pos = 0;
|
||||
|
||||
CHECK(ZSTD_decompressStream(zds, &outBuff, &inBuff) != 0,
|
||||
"Decompress did not reach the end of frame");
|
||||
CHECK(inBuff.pos != inBuff.size, "Decompress did not fully consume input");
|
||||
CHECK(outBuff.pos != decompressedSize, "Decompressed size does not match");
|
||||
CHECK(memcmp(outBuff.dst, decompressed, decompressedSize) != 0,
|
||||
"Decompressed data does not match");
|
||||
|
||||
ZSTD_freeDStream(zds);
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : dictionary + uncompressible block + reusing tables checks offset table validity: ", testNb++);
|
||||
{ ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(
|
||||
dictionary.start, dictionary.filled,
|
||||
|
9
zlibWrapper/.gitignore
vendored
9
zlibWrapper/.gitignore
vendored
@ -1,11 +1,14 @@
|
||||
# object artifacts
|
||||
*.o
|
||||
|
||||
# Default result files
|
||||
_*
|
||||
example.*
|
||||
example
|
||||
example_zstd.*
|
||||
example_gz.*
|
||||
fitblk.*
|
||||
fitblk
|
||||
fitblk_zstd.*
|
||||
zwrapbench.*
|
||||
zwrapbench
|
||||
foo.gz
|
||||
|
||||
minigzip
|
||||
|
Loading…
x
Reference in New Issue
Block a user