import zstd 1.4.0
This commit is contained in:
parent
af73257b09
commit
3f774a5e86
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/vendor/zstd/dist/; revision=346361 svn path=/vendor/zstd/1.4.0/; revision=346362; tag=vendor/zstd/1.4.0
461
CHANGELOG
Normal file
461
CHANGELOG
Normal file
@ -0,0 +1,461 @@
|
||||
v1.4.0
|
||||
perf: Improve level 1 compression speed in most scenarios by 6% by @gbtucker and @terrelln
|
||||
api: Move the advanced API, including all functions in the staging section, to the stable section
|
||||
api: Make ZSTD_e_flush and ZSTD_e_end block for maximum forward progress
|
||||
api: Rename ZSTD_CCtxParam_getParameter to ZSTD_CCtxParams_getParameter
|
||||
api: Rename ZSTD_CCtxParam_setParameter to ZSTD_CCtxParams_setParameter
|
||||
api: Don't export ZSTDMT functions from the shared library by default
|
||||
api: Require ZSTD_MULTITHREAD to be defined to use ZSTDMT
|
||||
api: Add ZSTD_decompressBound() to provide an upper bound on decompressed size by @shakeelrao
|
||||
api: Fix ZSTD_decompressDCtx() corner cases with a dictionary
|
||||
api: Move ZSTD_getDictID_*() functions to the stable section
|
||||
api: Add ZSTD_c_literalCompressionMode flag to enable or disable literal compression by @terrelln
|
||||
api: Allow compression parameters to be set when a dictionary is used
|
||||
api: Allow setting parameters before or after ZSTD_CCtx_loadDictionary() is called
|
||||
api: Fix ZSTD_estimateCStreamSize_usingCCtxParams()
|
||||
api: Setting ZSTD_d_maxWindowLog to 0 means use the default
|
||||
cli: Ensure that a dictionary is not used to compress itself by @shakeelrao
|
||||
cli: Add --[no-]compress-literals flag to enable or disable literal compression
|
||||
doc: Update the examples to use the advanced API
|
||||
doc: Explain how to transition from old streaming functions to the advanced API in the header
|
||||
build: Improve the Windows release packages
|
||||
build: Improve CMake build by @hjmjohnson
|
||||
build: Build fixes for FreeBSD by @lwhsu
|
||||
build: Remove redundant warnings by @thatsafunnyname
|
||||
build: Fix tests on OpenBSD by @bket
|
||||
build: Extend fuzzer build system to work with the new clang engine
|
||||
build: CMake now creates the libzstd.so.1 symlink
|
||||
build: Improve Menson build by @lzutao
|
||||
misc: Fix symbolic link detection on FreeBSD
|
||||
misc: Use physical core count for -T0 on FreeBSD by @cemeyer
|
||||
misc: Fix zstd --list on truncated files by @kostmo
|
||||
misc: Improve logging in debug mode by @felixhandte
|
||||
misc: Add CirrusCI tests by @lwhsu
|
||||
misc: Optimize dictionary memory usage in corner cases
|
||||
misc: Improve the dictionary builder on small or homogeneous data
|
||||
misc: Fix spelling across the repo by @jsoref
|
||||
|
||||
v1.3.8
|
||||
perf: better decompression speed on large files (+7%) and cold dictionaries (+15%)
|
||||
perf: slightly better compression ratio at high compression modes
|
||||
api : finalized advanced API, last stage before "stable" status
|
||||
api : new --rsyncable mode, by @terrelln
|
||||
api : support decompression of empty frames into NULL (used to be an error) (#1385)
|
||||
build: new set of macros to build a minimal size decoder, by @felixhandte
|
||||
build: fix compilation on MIPS32, reported by @clbr (#1441)
|
||||
build: fix compilation with multiple -arch flags, by @ryandesign
|
||||
build: highly upgraded meson build, by @lzutao
|
||||
build: improved buck support, by @obelisk
|
||||
build: fix cmake script : can create debug build, by @pitrou
|
||||
build: Makefile : grep works on both colored consoles and systems without color support
|
||||
build: fixed zstd-pgo, by @bmwiedemann
|
||||
cli : support ZSTD_CLEVEL environment variable, by @yijinfb (#1423)
|
||||
cli : --no-progress flag, preserving final summary (#1371), by @terrelln
|
||||
cli : ensure destination file is not source file (#1422)
|
||||
cli : clearer error messages, especially when input file not present
|
||||
doc : clarified zstd_compression_format.md, by @ulikunitz
|
||||
misc: fixed zstdgrep, returns 1 on failure, by @lzutao
|
||||
misc: NEWS renamed as CHANGELOG, in accordance with fboss
|
||||
|
||||
v1.3.7
|
||||
perf: slightly better decompression speed on clang (depending on hardware target)
|
||||
fix : performance of dictionary compression for small input < 4 KB at levels 9 and 10
|
||||
build: no longer build backtrace by default in release mode; restrict further automatic mode
|
||||
build: control backtrace support through build macro BACKTRACE
|
||||
misc: added man pages for zstdless and zstdgrep, by @samrussell
|
||||
|
||||
v1.3.6
|
||||
perf: much faster dictionary builder, by @jenniferliu
|
||||
perf: faster dictionary compression on small data when using multiple contexts, by @felixhandte
|
||||
perf: faster dictionary decompression when using a very large number of dictionaries simultaneously
|
||||
cli : fix : does no longer overwrite destination when source does not exist (#1082)
|
||||
cli : new command --adapt, for automatic compression level adaptation
|
||||
api : fix : block api can be streamed with > 4 GB, reported by @catid
|
||||
api : reduced ZSTD_DDict size by 2 KB
|
||||
api : minimum negative compression level is defined, and can be queried using ZSTD_minCLevel().
|
||||
build: support Haiku target, by @korli
|
||||
build: Read Legacy format is limited to v0.5+ by default. Can be changed at compile time with macro ZSTD_LEGACY_SUPPORT.
|
||||
doc : zstd_compression_format.md updated to match wording in IETF RFC 8478
|
||||
misc: tests/paramgrill, a parameter optimizer, by @GeorgeLu97
|
||||
|
||||
v1.3.5
|
||||
perf: much faster dictionary compression, by @felixhandte
|
||||
perf: small quality improvement for dictionary generation, by @terrelln
|
||||
perf: slightly improved high compression levels (notably level 19)
|
||||
mem : automatic memory release for long duration contexts
|
||||
cli : fix : overlapLog can be manually set
|
||||
cli : fix : decoding invalid lz4 frames
|
||||
api : fix : performance degradation for dictionary compression when using advanced API, by @terrelln
|
||||
api : change : clarify ZSTD_CCtx_reset() vs ZSTD_CCtx_resetParameters(), by @terrelln
|
||||
build: select custom libzstd scope through control macros, by @GeorgeLu97
|
||||
build: OpenBSD patch, by @bket
|
||||
build: make and make all are compatible with -j
|
||||
doc : clarify zstd_compression_format.md, updated for IETF RFC process
|
||||
misc: pzstd compatible with reproducible compilation, by @lamby
|
||||
|
||||
v1.3.4
|
||||
perf: faster speed (especially decoding speed) on recent cpus (haswell+)
|
||||
perf: much better performance associating --long with multi-threading, by @terrelln
|
||||
perf: better compression at levels 13-15
|
||||
cli : asynchronous compression by default, for faster experience (use --single-thread for former behavior)
|
||||
cli : smoother status report in multi-threading mode
|
||||
cli : added command --fast=#, for faster compression modes
|
||||
cli : fix crash when not overwriting existing files, by Pádraig Brady (@pixelb)
|
||||
api : `nbThreads` becomes `nbWorkers` : 1 triggers asynchronous mode
|
||||
api : compression levels can be negative, for even more speed
|
||||
api : ZSTD_getFrameProgression() : get precise progress status of ZSTDMT anytime
|
||||
api : ZSTDMT can accept new compression parameters during compression
|
||||
api : implemented all advanced dictionary decompression prototypes
|
||||
build: improved meson recipe, by Shawn Landden (@shawnl)
|
||||
build: VS2017 scripts, by @HaydnTrigg
|
||||
misc: all /contrib projects fixed
|
||||
misc: added /contrib/docker script by @gyscos
|
||||
|
||||
v1.3.3
|
||||
perf: faster zstd_opt strategy (levels 16-19)
|
||||
fix : bug #944 : multithreading with shared ditionary and large data, reported by @gsliepen
|
||||
cli : fix : content size written in header by default
|
||||
cli : fix : improved LZ4 format support, by @felixhandte
|
||||
cli : new : hidden command `-S`, to benchmark multiple files while generating one result per file
|
||||
api : fix : support large skippable frames, by @terrelln
|
||||
api : fix : streaming interface was adding a useless 3-bytes null block to small frames
|
||||
api : change : when setting `pledgedSrcSize`, use `ZSTD_CONTENTSIZE_UNKNOWN` macro value to mean "unknown"
|
||||
build: fix : compilation under rhel6 and centos6, reported by @pixelb
|
||||
build: added `check` target
|
||||
|
||||
v1.3.2
|
||||
new : long range mode, using --long command, by Stella Lau (@stellamplau)
|
||||
new : ability to generate and decode magicless frames (#591)
|
||||
changed : maximum nb of threads reduced to 200, to avoid address space exhaustion in 32-bits mode
|
||||
fix : multi-threading compression works with custom allocators
|
||||
fix : ZSTD_sizeof_CStream() was over-evaluating memory usage
|
||||
fix : a rare compression bug when compression generates very large distances and bunch of other conditions (only possible at --ultra -22)
|
||||
fix : 32-bits build can now decode large offsets (levels 21+)
|
||||
cli : added LZ4 frame support by default, by Felix Handte (@felixhandte)
|
||||
cli : improved --list output
|
||||
cli : new : can split input file for dictionary training, using command -B#
|
||||
cli : new : clean operation artefact on Ctrl-C interruption
|
||||
cli : fix : do not change /dev/null permissions when using command -t with root access, reported by @mike155 (#851)
|
||||
cli : fix : write file size in header in multiple-files mode
|
||||
api : added macro ZSTD_COMPRESSBOUND() for static allocation
|
||||
api : experimental : new advanced decompression API
|
||||
api : fix : sizeof_CCtx() used to over-estimate
|
||||
build: fix : no-multithread variant compiles without pool.c dependency, reported by Mitchell Blank Jr (@mitchblank) (#819)
|
||||
build: better compatibility with reproducible builds, by Bernhard M. Wiedemann (@bmwiedemann) (#818)
|
||||
example : added streaming_memory_usage
|
||||
license : changed /examples license to BSD + GPLv2
|
||||
license : fix a few header files to reflect new license (#825)
|
||||
|
||||
v1.3.1
|
||||
New license : BSD + GPLv2
|
||||
perf: substantially decreased memory usage in Multi-threading mode, thanks to reports by Tino Reichardt (@mcmilk)
|
||||
perf: Multi-threading supports up to 256 threads. Cap at 256 when more are requested (#760)
|
||||
cli : improved and fixed --list command, by @ib (#772)
|
||||
cli : command -vV to list supported formats, by @ib (#771)
|
||||
build : fixed binary variants, reported by @svenha (#788)
|
||||
build : fix Visual compilation for non x86/x64 targets, reported by Greg Slazinski (@GregSlazinski) (#718)
|
||||
API exp : breaking change : ZSTD_getframeHeader() provides more information
|
||||
API exp : breaking change : pinned down values of error codes
|
||||
doc : fixed huffman example, by Ulrich Kunitz (@ulikunitz)
|
||||
new : contrib/adaptive-compression, I/O driven compression strength, by Paul Cruz (@paulcruz74)
|
||||
new : contrib/long_distance_matching, statistics by Stella Lau (@stellamplau)
|
||||
updated : contrib/linux-kernel, by Nick Terrell (@terrelln)
|
||||
|
||||
v1.3.0
|
||||
cli : new : `--list` command, by Paul Cruz
|
||||
cli : changed : xz/lzma support enabled by default
|
||||
cli : changed : `-t *` continue processing list after a decompression error
|
||||
API : added : ZSTD_versionString()
|
||||
API : promoted to stable status : ZSTD_getFrameContentSize(), by Sean Purcell
|
||||
API exp : new advanced API : ZSTD_compress_generic(), ZSTD_CCtx_setParameter()
|
||||
API exp : new : API for static or external allocation : ZSTD_initStatic?Ctx()
|
||||
API exp : added : ZSTD_decompressBegin_usingDDict(), requested by Guy Riddle (#700)
|
||||
API exp : clarified memory estimation / measurement functions.
|
||||
API exp : changed : strongest strategy renamed ZSTD_btultra, fastest strategy ZSTD_fast set to 1
|
||||
tools : decodecorpus can generate random dictionary-compressed samples, by Paul Cruz
|
||||
new : contrib/seekable_format, demo and API, by Sean Purcell
|
||||
changed : contrib/linux-kernel, updated version and license, by Nick Terrell
|
||||
|
||||
v1.2.0
|
||||
cli : changed : Multithreading enabled by default (use target zstd-nomt or HAVE_THREAD=0 to disable)
|
||||
cli : new : command -T0 means "detect and use nb of cores", by Sean Purcell
|
||||
cli : new : zstdmt symlink hardwired to `zstd -T0`
|
||||
cli : new : command --threads=# (#671)
|
||||
cli : changed : cover dictionary builder by default, for improved quality, by Nick Terrell
|
||||
cli : new : commands --train-cover and --train-legacy, to select dictionary algorithm and parameters
|
||||
cli : experimental targets `zstd4` and `xzstd4`, with support for lz4 format, by Sean Purcell
|
||||
cli : fix : does not output compressed data on console
|
||||
cli : fix : ignore symbolic links unless --force specified,
|
||||
API : breaking change : ZSTD_createCDict_advanced(), only use compressionParameters as argument
|
||||
API : added : prototypes ZSTD_*_usingCDict_advanced(), for direct control over frameParameters.
|
||||
API : improved: ZSTDMT_compressCCtx() reduced memory usage
|
||||
API : fix : ZSTDMT_compressCCtx() now provides srcSize in header (#634)
|
||||
API : fix : src size stored in frame header is controlled at end of frame
|
||||
API : fix : enforced consistent rules for pledgedSrcSize==0 (#641)
|
||||
API : fix : error code "GENERIC" replaced by "dstSizeTooSmall" when appropriate
|
||||
build: improved cmake script, by @Majlen
|
||||
build: enabled Multi-threading support for *BSD, by Baptiste Daroussin
|
||||
tools: updated Paramgrill. Command -O# provides best parameters for sample and speed target.
|
||||
new : contrib/linux-kernel version, by Nick Terrell
|
||||
|
||||
v1.1.4
|
||||
cli : new : can compress in *.gz format, using --format=gzip command, by Przemyslaw Skibinski
|
||||
cli : new : advanced benchmark command --priority=rt
|
||||
cli : fix : write on sparse-enabled file systems in 32-bits mode, by @ds77
|
||||
cli : fix : --rm remains silent when input is stdin
|
||||
cli : experimental : xzstd, with support for xz/lzma decoding, by Przemyslaw Skibinski
|
||||
speed : improved decompression speed in streaming mode for single shot scenarios (+5%)
|
||||
memory: DDict (decompression dictionary) memory usage down from 150 KB to 20 KB
|
||||
arch: 32-bits variant able to generate and decode very long matches (>32 MB), by Sean Purcell
|
||||
API : new : ZSTD_findFrameCompressedSize(), ZSTD_getFrameContentSize(), ZSTD_findDecompressedSize()
|
||||
API : changed : dropped support of legacy versions <= v0.3 (can be changed by modifying ZSTD_LEGACY_SUPPORT value)
|
||||
build : new: meson build system in contrib/meson, by Dima Krasner
|
||||
build : improved cmake script, by @Majlen
|
||||
build : added -Wformat-security flag, as recommended by Padraig Brady
|
||||
doc : new : educational decoder, by Sean Purcell
|
||||
|
||||
v1.1.3
|
||||
cli : zstd can decompress .gz files (can be disabled with `make zstd-nogz` or `make HAVE_ZLIB=0`)
|
||||
cli : new : experimental target `make zstdmt`, with multi-threading support
|
||||
cli : new : improved dictionary builder "cover" (experimental), by Nick Terrell, based on prior work by Giuseppe Ottaviano.
|
||||
cli : new : advanced commands for detailed parameters, by Przemyslaw Skibinski
|
||||
cli : fix zstdless on Mac OS-X, by Andrew Janke
|
||||
cli : fix #232 "compress non-files"
|
||||
dictBuilder : improved dictionary generation quality, thanks to Nick Terrell
|
||||
API : new : lib/compress/ZSTDMT_compress.h multithreading API (experimental)
|
||||
API : new : ZSTD_create?Dict_byReference(), requested by Bartosz Taudul
|
||||
API : new : ZDICT_finalizeDictionary()
|
||||
API : fix : ZSTD_initCStream_usingCDict() properly writes dictID into frame header, by Gregory Szorc (#511)
|
||||
API : fix : all symbols properly exposed in libzstd, by Nick Terrell
|
||||
build : support for Solaris target, by Przemyslaw Skibinski
|
||||
doc : clarified specification, by Sean Purcell
|
||||
|
||||
v1.1.2
|
||||
API : streaming : decompression : changed : automatic implicit reset when chain-decoding new frames without init
|
||||
API : experimental : added : dictID retrieval functions, and ZSTD_initCStream_srcSize()
|
||||
API : zbuff : changed : prototypes now generate deprecation warnings
|
||||
lib : improved : faster decompression speed at ultra compression settings and 32-bits mode
|
||||
lib : changed : only public ZSTD_ symbols are now exposed
|
||||
lib : changed : reduced usage of stack memory
|
||||
lib : fixed : several corner case bugs, by Nick Terrell
|
||||
cli : new : gzstd, experimental version able to decode .gz files, by Przemyslaw Skibinski
|
||||
cli : new : preserve file attributes
|
||||
cli : new : added zstdless and zstdgrep tools
|
||||
cli : fixed : status displays total amount decoded, even for file consisting of multiple frames (like pzstd)
|
||||
cli : fixed : zstdcat
|
||||
zlib_wrapper : added support for gz* functions, by Przemyslaw Skibinski
|
||||
install : better compatibility with FreeBSD, by Dimitry Andric
|
||||
source tree : changed : zbuff source files moved to lib/deprecated
|
||||
|
||||
v1.1.1
|
||||
New : command -M#, --memory=, --memlimit=, --memlimit-decompress= to limit allowed memory consumption
|
||||
New : doc/zstd_manual.html, by Przemyslaw Skibinski
|
||||
Improved : slightly better compression ratio at --ultra levels (>= 20)
|
||||
Improved : better memory usage when using streaming compression API, thanks to @Rogier-5 report
|
||||
Added : API : ZSTD_initCStream_usingCDict(), ZSTD_initDStream_usingDDict() (experimental section)
|
||||
Added : example/multiple_streaming_compression.c
|
||||
Changed : zstd_errors.h is now installed within /include (and replaces errors_public.h)
|
||||
Updated man page
|
||||
Fixed : zstd-small, zstd-compress and zstd-decompress compilation targets
|
||||
|
||||
v1.1.0
|
||||
New : contrib/pzstd, parallel version of zstd, by Nick Terrell
|
||||
added : NetBSD install target (#338)
|
||||
Improved : speed for batches of small files
|
||||
Improved : speed of zlib wrapper, by Przemyslaw Skibinski
|
||||
Changed : libzstd on Windows supports legacy formats, by Christophe Chevalier
|
||||
Fixed : CLI -d output to stdout by default when input is stdin (#322)
|
||||
Fixed : CLI correctly detects console on Mac OS-X
|
||||
Fixed : CLI supports recursive mode `-r` on Mac OS-X
|
||||
Fixed : Legacy decoders use unified error codes, reported by benrg (#341), fixed by Przemyslaw Skibinski
|
||||
Fixed : compatibility with OpenBSD, reported by Juan Francisco Cantero Hurtado (#319)
|
||||
Fixed : compatibility with Hurd, by Przemyslaw Skibinski (#365)
|
||||
Fixed : zstd-pgo, reported by octoploid (#329)
|
||||
|
||||
v1.0.0
|
||||
Change Licensing, all project is now BSD, Copyright Facebook
|
||||
Small decompression speed improvement
|
||||
API : Streaming API supports legacy format
|
||||
API : ZDICT_getDictID(), ZSTD_sizeof_{CCtx, DCtx, CStream, DStream}(), ZSTD_setDStreamParameter()
|
||||
CLI supports legacy formats v0.4+
|
||||
Fixed : compression fails on certain huge files, reported by Jesse McGrew
|
||||
Enhanced documentation, by Przemyslaw Skibinski
|
||||
|
||||
v0.8.1
|
||||
New streaming API
|
||||
Changed : --ultra now enables levels beyond 19
|
||||
Changed : -i# now selects benchmark time in second
|
||||
Fixed : ZSTD_compress* can now compress > 4 GB in a single pass, reported by Nick Terrell
|
||||
Fixed : speed regression on specific patterns (#272)
|
||||
Fixed : support for Z_SYNC_FLUSH, by Dmitry Krot (#291)
|
||||
Fixed : ICC compilation, by Przemyslaw Skibinski
|
||||
|
||||
v0.8.0
|
||||
Improved : better speed on clang and gcc -O2, thanks to Eric Biggers
|
||||
New : Build on FreeBSD and DragonFly, thanks to JrMarino
|
||||
Changed : modified API : ZSTD_compressEnd()
|
||||
Fixed : legacy mode with ZSTD_HEAPMODE=0, by Christopher Bergqvist
|
||||
Fixed : premature end of frame when zero-sized raw block, reported by Eric Biggers
|
||||
Fixed : large dictionaries (> 384 KB), reported by Ilona Papava
|
||||
Fixed : checksum correctly checked in single-pass mode
|
||||
Fixed : combined --test amd --rm, reported by Andreas M. Nilsson
|
||||
Modified : minor compression level adaptations
|
||||
Updated : compression format specification to v0.2.0
|
||||
changed : zstd.h moved to /lib directory
|
||||
|
||||
v0.7.5
|
||||
Transition version, supporting decoding of v0.8.x
|
||||
|
||||
v0.7.4
|
||||
Added : homebrew for Mac, by Daniel Cade
|
||||
Added : more examples
|
||||
Fixed : segfault when using small dictionaries, reported by Felix Handte
|
||||
Modified : default compression level for CLI is now 3
|
||||
Updated : specification, to v0.1.1
|
||||
|
||||
v0.7.3
|
||||
New : compression format specification
|
||||
New : `--` separator, stating that all following arguments are file names. Suggested by Chip Turner.
|
||||
New : `ZSTD_getDecompressedSize()`
|
||||
New : OpenBSD target, by Juan Francisco Cantero Hurtado
|
||||
New : `examples` directory
|
||||
fixed : dictBuilder using HC levels, reported by Bartosz Taudul
|
||||
fixed : legacy support from ZSTD_decompress_usingDDict(), reported by Felix Handte
|
||||
fixed : multi-blocks decoding with intermediate uncompressed blocks, reported by Greg Slazinski
|
||||
modified : removed "mem.h" and "error_public.h" dependencies from "zstd.h" (experimental section)
|
||||
modified : legacy functions no longer need magic number
|
||||
|
||||
v0.7.2
|
||||
fixed : ZSTD_decompressBlock() using multiple consecutive blocks. Reported by Greg Slazinski.
|
||||
fixed : potential segfault on very large files (many gigabytes). Reported by Chip Turner.
|
||||
fixed : CLI displays system error message when destination file cannot be created (#231). Reported by Chip Turner.
|
||||
|
||||
v0.7.1
|
||||
fixed : ZBUFF_compressEnd() called multiple times with too small `dst` buffer, reported by Christophe Chevalier
|
||||
fixed : dictBuilder fails if first sample is too small, reported by Руслан Ковалёв
|
||||
fixed : corruption issue, reported by cj
|
||||
modified : checksum enabled by default in command line mode
|
||||
|
||||
v0.7.0
|
||||
New : Support for directory compression, using `-r`, thanks to Przemyslaw Skibinski
|
||||
New : Command `--rm`, to remove source file after successful de/compression
|
||||
New : Visual build scripts, by Christophe Chevalier
|
||||
New : Support for Sparse File-systems (do not use space for zero-filled sectors)
|
||||
New : Frame checksum support
|
||||
New : Support pass-through mode (when using `-df`)
|
||||
API : more efficient Dictionary API : `ZSTD_compress_usingCDict()`, `ZSTD_decompress_usingDDict()`
|
||||
API : create dictionary files from custom content, by Giuseppe Ottaviano
|
||||
API : support for custom malloc/free functions
|
||||
New : controllable Dictionary ID
|
||||
New : Support for skippable frames
|
||||
|
||||
v0.6.1
|
||||
New : zlib wrapper API, thanks to Przemyslaw Skibinski
|
||||
New : Ability to compile compressor / decompressor separately
|
||||
Changed : new lib directory structure
|
||||
Fixed : Legacy codec v0.5 compatible with dictionary decompression
|
||||
Fixed : Decoder corruption error (#173)
|
||||
Fixed : null-string roundtrip (#176)
|
||||
New : benchmark mode can select directory as input
|
||||
Experimental : midipix support, VMS support
|
||||
|
||||
v0.6.0
|
||||
Stronger high compression modes, thanks to Przemyslaw Skibinski
|
||||
API : ZSTD_getFrameParams() provides size of decompressed content
|
||||
New : highest compression modes require `--ultra` command to fully unleash their capacity
|
||||
Fixed : zstd cli return error code > 0 and removes dst file artifact when decompression fails, thanks to Chip Turner
|
||||
|
||||
v0.5.1
|
||||
New : Optimal parsing => Very high compression modes, thanks to Przemyslaw Skibinski
|
||||
Changed : Dictionary builder integrated into libzstd and zstd cli
|
||||
Changed (!) : zstd cli now uses "multiple input files" as default mode. See `zstd -h`.
|
||||
Fix : high compression modes for big-endian platforms
|
||||
New : zstd cli : `-t` | `--test` command
|
||||
|
||||
v0.5.0
|
||||
New : dictionary builder utility
|
||||
Changed : streaming & dictionary API
|
||||
Improved : better compression of small data
|
||||
|
||||
v0.4.7
|
||||
Improved : small compression speed improvement in HC mode
|
||||
Changed : `zstd_decompress.c` has ZSTD_LEGACY_SUPPORT to 0 by default
|
||||
fix : bt search bug
|
||||
|
||||
v0.4.6
|
||||
fix : fast compression mode on Windows
|
||||
New : cmake configuration file, thanks to Artyom Dymchenko
|
||||
Improved : high compression mode on repetitive data
|
||||
New : block-level API
|
||||
New : ZSTD_duplicateCCtx()
|
||||
|
||||
v0.4.5
|
||||
new : -m/--multiple : compress/decompress multiple files
|
||||
|
||||
v0.4.4
|
||||
Fixed : high compression modes for Windows 32 bits
|
||||
new : external dictionary API extended to buffered mode and accessible through command line
|
||||
new : windows DLL project, thanks to Christophe Chevalier
|
||||
|
||||
v0.4.3 :
|
||||
new : external dictionary API
|
||||
new : zstd-frugal
|
||||
|
||||
v0.4.2 :
|
||||
Generic minor improvements for small blocks
|
||||
Fixed : big-endian compatibility, by Peter Harris (#85)
|
||||
|
||||
v0.4.1
|
||||
Fixed : ZSTD_LEGACY_SUPPORT=0 build mode (reported by Luben)
|
||||
removed `zstd.c`
|
||||
|
||||
v0.4.0
|
||||
Command line utility compatible with high compression levels
|
||||
Removed zstdhc => merged into zstd
|
||||
Added : ZBUFF API (see zstd_buffered.h)
|
||||
Rolling buffer support
|
||||
|
||||
v0.3.6
|
||||
small blocks params
|
||||
|
||||
v0.3.5
|
||||
minor generic compression improvements
|
||||
|
||||
v0.3.4
|
||||
Faster fast cLevels
|
||||
|
||||
v0.3.3
|
||||
Small compression ratio improvement
|
||||
|
||||
v0.3.2
|
||||
Fixed Visual Studio
|
||||
|
||||
v0.3.1 :
|
||||
Small compression ratio improvement
|
||||
|
||||
v0.3
|
||||
HC mode : compression levels 2-26
|
||||
|
||||
v0.2.2
|
||||
Fix : Visual Studio 2013 & 2015 release compilation, by Christophe Chevalier
|
||||
|
||||
v0.2.1
|
||||
Fix : Read errors, advanced fuzzer tests, by Hanno Böck
|
||||
|
||||
v0.2.0
|
||||
**Breaking format change**
|
||||
Faster decompression speed
|
||||
Can still decode v0.1 format
|
||||
|
||||
v0.1.3
|
||||
fix uninitialization warning, reported by Evan Nemerson
|
||||
|
||||
v0.1.2
|
||||
frame concatenation support
|
||||
|
||||
v0.1.1
|
||||
fix compression bug
|
||||
detects write-flush errors
|
||||
|
||||
v0.1.0
|
||||
first release
|
8
Makefile
8
Makefile
@ -156,7 +156,7 @@ list:
|
||||
done \
|
||||
} | column -t -s $$'\t'
|
||||
|
||||
.PHONY: install clangtest armtest usan asan uasan
|
||||
.PHONY: install armtest usan asan uasan
|
||||
install:
|
||||
@$(MAKE) -C $(ZSTDDIR) $@
|
||||
@$(MAKE) -C $(PRGDIR) $@
|
||||
@ -188,7 +188,7 @@ gcc7build: clean
|
||||
.PHONY: clangbuild
|
||||
clangbuild: clean
|
||||
clang -v
|
||||
CXX=clang++ CC=clang $(MAKE) all MOREFLAGS="-Werror -Wconversion -Wno-sign-conversion -Wdocumentation"
|
||||
CXX=clang++ CC=clang CFLAGS="-Werror -Wconversion -Wno-sign-conversion -Wdocumentation" $(MAKE) all
|
||||
|
||||
m32build: clean
|
||||
gcc -v
|
||||
@ -232,10 +232,6 @@ gcc6test: clean
|
||||
gcc-6 -v
|
||||
$(MAKE) all CC=gcc-6 MOREFLAGS="-Werror"
|
||||
|
||||
clangtest: clean
|
||||
clang -v
|
||||
$(MAKE) all CXX=clang++ CC=clang MOREFLAGS="-Werror -Wconversion -Wno-sign-conversion -Wdocumentation"
|
||||
|
||||
armtest: clean
|
||||
$(MAKE) -C $(TESTDIR) datagen # use native, faster
|
||||
$(MAKE) -C $(TESTDIR) test CC=arm-linux-gnueabi-gcc QEMU_SYS=qemu-arm-static ZSTDRTTEST= MOREFLAGS="-Werror -static" FUZZER_FLAGS=--no-big-tests
|
||||
|
25
README.md
25
README.md
@ -14,6 +14,7 @@ a list of known ports and bindings is provided on [Zstandard homepage](http://ww
|
||||
[![Build Status][travisDevBadge]][travisLink]
|
||||
[![Build status][AppveyorDevBadge]][AppveyorLink]
|
||||
[![Build status][CircleDevBadge]][CircleLink]
|
||||
[![Build status][CirrusDevBadge]][CirrusLink]
|
||||
|
||||
[travisDevBadge]: https://travis-ci.org/facebook/zstd.svg?branch=dev "Continuous Integration test suite"
|
||||
[travisLink]: https://travis-ci.org/facebook/zstd
|
||||
@ -21,14 +22,16 @@ a list of known ports and bindings is provided on [Zstandard homepage](http://ww
|
||||
[AppveyorLink]: https://ci.appveyor.com/project/YannCollet/zstd-p0yf0
|
||||
[CircleDevBadge]: https://circleci.com/gh/facebook/zstd/tree/dev.svg?style=shield "Short test suite"
|
||||
[CircleLink]: https://circleci.com/gh/facebook/zstd
|
||||
[CirrusDevBadge]: https://api.cirrus-ci.com/github/facebook/zstd.svg?branch=dev
|
||||
[CirrusLink]: https://cirrus-ci.com/github/facebook/zstd
|
||||
|
||||
## Benchmarks
|
||||
|
||||
For reference, several fast compression algorithms were tested and compared
|
||||
on a server running Linux Debian (`Linux version 4.14.0-3-amd64`),
|
||||
with a Core i7-6700K CPU @ 4.0GHz,
|
||||
on a server running Arch Linux (`Linux version 5.0.5-arch1-1`),
|
||||
with a Core i9-9900K CPU @ 5.0GHz,
|
||||
using [lzbench], an open-source in-memory benchmark by @inikep
|
||||
compiled with [gcc] 7.3.0,
|
||||
compiled with [gcc] 8.2.1,
|
||||
on the [Silesia compression corpus].
|
||||
|
||||
[lzbench]: https://github.com/inikep/lzbench
|
||||
@ -37,14 +40,14 @@ on the [Silesia compression corpus].
|
||||
|
||||
| Compressor name | Ratio | Compression| Decompress.|
|
||||
| --------------- | ------| -----------| ---------- |
|
||||
| **zstd 1.3.4 -1** | 2.877 | 470 MB/s | 1380 MB/s |
|
||||
| zlib 1.2.11 -1 | 2.743 | 110 MB/s | 400 MB/s |
|
||||
| brotli 1.0.2 -0 | 2.701 | 410 MB/s | 430 MB/s |
|
||||
| quicklz 1.5.0 -1 | 2.238 | 550 MB/s | 710 MB/s |
|
||||
| lzo1x 2.09 -1 | 2.108 | 650 MB/s | 830 MB/s |
|
||||
| lz4 1.8.1 | 2.101 | 750 MB/s | 3700 MB/s |
|
||||
| snappy 1.1.4 | 2.091 | 530 MB/s | 1800 MB/s |
|
||||
| lzf 3.6 -1 | 2.077 | 400 MB/s | 860 MB/s |
|
||||
| **zstd 1.4.0 -1** | 2.884 | 530 MB/s | 1360 MB/s |
|
||||
| zlib 1.2.11 -1 | 2.743 | 110 MB/s | 440 MB/s |
|
||||
| brotli 1.0.7 -0 | 2.701 | 430 MB/s | 470 MB/s |
|
||||
| quicklz 1.5.0 -1 | 2.238 | 600 MB/s | 800 MB/s |
|
||||
| lzo1x 2.09 -1 | 2.106 | 680 MB/s | 950 MB/s |
|
||||
| lz4 1.8.3 | 2.101 | 800 MB/s | 4220 MB/s |
|
||||
| snappy 1.1.4 | 2.073 | 580 MB/s | 2020 MB/s |
|
||||
| lzf 3.6 -1 | 2.077 | 440 MB/s | 930 MB/s |
|
||||
|
||||
[zlib]: http://www.zlib.net/
|
||||
[LZ4]: http://www.lz4.org/
|
||||
|
@ -13,7 +13,7 @@ CPPFLAGS += -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
|
||||
CFLAGS ?= -O3
|
||||
CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
|
||||
-Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
|
||||
-Wstrict-prototypes -Wundef -Wformat-security \
|
||||
-Wstrict-prototypes -Wundef \
|
||||
-Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
|
||||
-Wredundant-decls
|
||||
CFLAGS += $(DEBUGFLAGS)
|
||||
@ -22,10 +22,10 @@ FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MULTITHREAD_LDFLAGS)
|
||||
|
||||
all: adapt datagen
|
||||
|
||||
adapt: $(ZSTD_FILES) $(PRGDIR)/util.c adapt.c
|
||||
adapt: $(ZSTD_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c adapt.c
|
||||
$(CC) $(FLAGS) $^ -o $@
|
||||
|
||||
adapt-debug: $(ZSTD_FILES) $(PRGDIR)/util.c adapt.c
|
||||
adapt-debug: $(ZSTD_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c adapt.c
|
||||
$(CC) $(FLAGS) -DDEBUG_MODE=2 $^ -o adapt
|
||||
|
||||
datagen : $(PRGDIR)/datagen.c datagencli.c
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <string.h> /* memset */
|
||||
#include "zstd_internal.h"
|
||||
#include "util.h"
|
||||
#include "timefn.h" /* UTIL_time_t, UTIL_getTime, UTIL_getSpanTimeMicro */
|
||||
|
||||
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
|
||||
#define PRINT(...) fprintf(stdout, __VA_ARGS__)
|
||||
|
20
contrib/docker/Dockerfile
Normal file
20
contrib/docker/Dockerfile
Normal file
@ -0,0 +1,20 @@
|
||||
# Dockerfile
|
||||
# First image to build the binary
|
||||
FROM alpine as builder
|
||||
|
||||
RUN apk --no-cache add make gcc libc-dev
|
||||
COPY . /src
|
||||
RUN mkdir /pkg && cd /src && make && make DESTDIR=/pkg install
|
||||
|
||||
# Second minimal image to only keep the built binary
|
||||
FROM alpine
|
||||
|
||||
# Copy the built files
|
||||
COPY --from=builder /pkg /
|
||||
|
||||
# Copy the license as well
|
||||
RUN mkdir -p /usr/local/share/licenses/zstd
|
||||
COPY --from=builder /src/LICENSE /usr/local/share/licences/zstd/
|
||||
|
||||
# Just run `zstd` if no other command is given
|
||||
CMD ["/usr/local/bin/zstd"]
|
20
contrib/docker/README.md
Normal file
20
contrib/docker/README.md
Normal file
@ -0,0 +1,20 @@
|
||||
|
||||
## Requirement
|
||||
|
||||
The `Dockerfile` script requires a version of `docker` >= 17.05
|
||||
|
||||
## Installing docker
|
||||
|
||||
The official docker install docs use a ppa with a modern version available:
|
||||
https://docs.docker.com/install/linux/docker-ce/ubuntu/
|
||||
|
||||
## How to run
|
||||
|
||||
`docker build -t zstd .`
|
||||
|
||||
## test
|
||||
|
||||
```
|
||||
echo foo | docker run -i --rm zstd | docker run -i --rm zstd zstdcat
|
||||
foo
|
||||
```
|
@ -0,0 +1,44 @@
|
||||
ARG :=
|
||||
|
||||
CC ?= gcc
|
||||
CFLAGS ?= -O3
|
||||
INCLUDES := -I ../randomDictBuilder -I ../../../programs -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder
|
||||
|
||||
RANDOM_FILE := ../randomDictBuilder/random.c
|
||||
IO_FILE := ../randomDictBuilder/io.c
|
||||
|
||||
all: run clean
|
||||
|
||||
.PHONY: run
|
||||
run: benchmark
|
||||
echo "Benchmarking with $(ARG)"
|
||||
./benchmark $(ARG)
|
||||
|
||||
.PHONY: test
|
||||
test: benchmarkTest clean
|
||||
|
||||
.PHONY: benchmarkTest
|
||||
benchmarkTest: benchmark test.sh
|
||||
sh test.sh
|
||||
|
||||
benchmark: benchmark.o io.o random.o libzstd.a
|
||||
$(CC) $(CFLAGS) benchmark.o io.o random.o libzstd.a -o benchmark
|
||||
|
||||
benchmark.o: benchmark.c
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c benchmark.c
|
||||
|
||||
random.o: $(RANDOM_FILE)
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c $(RANDOM_FILE)
|
||||
|
||||
io.o: $(IO_FILE)
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c $(IO_FILE)
|
||||
|
||||
libzstd.a:
|
||||
$(MAKE) -C ../../../lib libzstd.a
|
||||
mv ../../../lib/libzstd.a .
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
rm -f *.o benchmark libzstd.a
|
||||
$(MAKE) -C ../../../lib clean
|
||||
echo "Cleaning is completed"
|
@ -0,0 +1,849 @@
|
||||
Benchmarking Dictionary Builder
|
||||
|
||||
### Permitted Argument:
|
||||
Input File/Directory (in=fileName): required; file/directory used to build dictionary; if directory, will operate recursively for files inside directory; can include multiple files/directories, each following "in="
|
||||
|
||||
###Running Test:
|
||||
make test
|
||||
|
||||
###Usage:
|
||||
Benchmark given input files: make ARG= followed by permitted arguments
|
||||
|
||||
### Examples:
|
||||
make ARG="in=../../../lib/dictBuilder in=../../../lib/compress"
|
||||
|
||||
###Benchmarking Result:
|
||||
- First Cover is optimize cover, second Cover uses optimized d and k from first one.
|
||||
- For every f value of fastCover, the first one is optimize fastCover and the second one uses optimized d and k from first one. This is run for accel values from 1 to 10.
|
||||
- Fourth column is chosen d and fifth column is chosen k
|
||||
|
||||
github:
|
||||
NODICT 0.000004 2.999642
|
||||
RANDOM 0.024560 8.791189
|
||||
LEGACY 0.727109 8.173529
|
||||
COVER 40.565676 10.652243 8 1298
|
||||
COVER 3.608284 10.652243 8 1298
|
||||
FAST f=15 a=1 4.181024 10.570882 8 1154
|
||||
FAST f=15 a=1 0.040788 10.570882 8 1154
|
||||
FAST f=15 a=2 3.548352 10.574287 6 1970
|
||||
FAST f=15 a=2 0.035535 10.574287 6 1970
|
||||
FAST f=15 a=3 3.287364 10.613950 6 1010
|
||||
FAST f=15 a=3 0.032182 10.613950 6 1010
|
||||
FAST f=15 a=4 3.184976 10.573883 6 1058
|
||||
FAST f=15 a=4 0.029878 10.573883 6 1058
|
||||
FAST f=15 a=5 3.045513 10.580640 8 1154
|
||||
FAST f=15 a=5 0.022162 10.580640 8 1154
|
||||
FAST f=15 a=6 3.003296 10.583677 6 1010
|
||||
FAST f=15 a=6 0.028091 10.583677 6 1010
|
||||
FAST f=15 a=7 2.952655 10.622551 6 1106
|
||||
FAST f=15 a=7 0.02724 10.622551 6 1106
|
||||
FAST f=15 a=8 2.945674 10.614657 6 1010
|
||||
FAST f=15 a=8 0.027264 10.614657 6 1010
|
||||
FAST f=15 a=9 3.153439 10.564018 8 1154
|
||||
FAST f=15 a=9 0.020635 10.564018 8 1154
|
||||
FAST f=15 a=10 2.950416 10.511454 6 1010
|
||||
FAST f=15 a=10 0.026606 10.511454 6 1010
|
||||
FAST f=16 a=1 3.970029 10.681035 8 1154
|
||||
FAST f=16 a=1 0.038188 10.681035 8 1154
|
||||
FAST f=16 a=2 3.422892 10.484978 6 1874
|
||||
FAST f=16 a=2 0.034702 10.484978 6 1874
|
||||
FAST f=16 a=3 3.215836 10.632631 8 1154
|
||||
FAST f=16 a=3 0.026084 10.632631 8 1154
|
||||
FAST f=16 a=4 3.081353 10.626533 6 1106
|
||||
FAST f=16 a=4 0.030032 10.626533 6 1106
|
||||
FAST f=16 a=5 3.041241 10.545027 8 1922
|
||||
FAST f=16 a=5 0.022882 10.545027 8 1922
|
||||
FAST f=16 a=6 2.989390 10.638284 6 1874
|
||||
FAST f=16 a=6 0.028308 10.638284 6 1874
|
||||
FAST f=16 a=7 3.001581 10.797136 6 1106
|
||||
FAST f=16 a=7 0.027479 10.797136 6 1106
|
||||
FAST f=16 a=8 2.984107 10.658356 8 1058
|
||||
FAST f=16 a=8 0.021099 10.658356 8 1058
|
||||
FAST f=16 a=9 2.925788 10.523869 6 1010
|
||||
FAST f=16 a=9 0.026905 10.523869 6 1010
|
||||
FAST f=16 a=10 2.889605 10.745841 6 1874
|
||||
FAST f=16 a=10 0.026846 10.745841 6 1874
|
||||
FAST f=17 a=1 4.031953 10.672080 8 1202
|
||||
FAST f=17 a=1 0.040658 10.672080 8 1202
|
||||
FAST f=17 a=2 3.458107 10.589352 8 1106
|
||||
FAST f=17 a=2 0.02926 10.589352 8 1106
|
||||
FAST f=17 a=3 3.291189 10.662714 8 1154
|
||||
FAST f=17 a=3 0.026531 10.662714 8 1154
|
||||
FAST f=17 a=4 3.154950 10.549456 8 1346
|
||||
FAST f=17 a=4 0.024991 10.549456 8 1346
|
||||
FAST f=17 a=5 3.092271 10.541670 6 1202
|
||||
FAST f=17 a=5 0.038285 10.541670 6 1202
|
||||
FAST f=17 a=6 3.166146 10.729112 6 1874
|
||||
FAST f=17 a=6 0.038217 10.729112 6 1874
|
||||
FAST f=17 a=7 3.035467 10.810485 6 1106
|
||||
FAST f=17 a=7 0.036655 10.810485 6 1106
|
||||
FAST f=17 a=8 3.035668 10.530532 6 1058
|
||||
FAST f=17 a=8 0.037715 10.530532 6 1058
|
||||
FAST f=17 a=9 2.987917 10.589802 8 1922
|
||||
FAST f=17 a=9 0.02217 10.589802 8 1922
|
||||
FAST f=17 a=10 2.981647 10.722579 8 1106
|
||||
FAST f=17 a=10 0.021948 10.722579 8 1106
|
||||
FAST f=18 a=1 4.067144 10.634943 8 1154
|
||||
FAST f=18 a=1 0.041386 10.634943 8 1154
|
||||
FAST f=18 a=2 3.507377 10.546230 6 1970
|
||||
FAST f=18 a=2 0.037572 10.546230 6 1970
|
||||
FAST f=18 a=3 3.323015 10.648061 8 1154
|
||||
FAST f=18 a=3 0.028306 10.648061 8 1154
|
||||
FAST f=18 a=4 3.216735 10.705402 6 1010
|
||||
FAST f=18 a=4 0.030755 10.705402 6 1010
|
||||
FAST f=18 a=5 3.175794 10.588154 8 1874
|
||||
FAST f=18 a=5 0.025315 10.588154 8 1874
|
||||
FAST f=18 a=6 3.127459 10.751104 8 1106
|
||||
FAST f=18 a=6 0.023897 10.751104 8 1106
|
||||
FAST f=18 a=7 3.083017 10.780402 6 1106
|
||||
FAST f=18 a=7 0.029158 10.780402 6 1106
|
||||
FAST f=18 a=8 3.069700 10.547226 8 1346
|
||||
FAST f=18 a=8 0.024046 10.547226 8 1346
|
||||
FAST f=18 a=9 3.056591 10.674759 6 1010
|
||||
FAST f=18 a=9 0.028496 10.674759 6 1010
|
||||
FAST f=18 a=10 3.063588 10.737578 8 1106
|
||||
FAST f=18 a=10 0.023033 10.737578 8 1106
|
||||
FAST f=19 a=1 4.164041 10.650333 8 1154
|
||||
FAST f=19 a=1 0.042906 10.650333 8 1154
|
||||
FAST f=19 a=2 3.585409 10.577066 6 1058
|
||||
FAST f=19 a=2 0.038994 10.577066 6 1058
|
||||
FAST f=19 a=3 3.439643 10.639403 8 1154
|
||||
FAST f=19 a=3 0.028427 10.639403 8 1154
|
||||
FAST f=19 a=4 3.268869 10.554410 8 1298
|
||||
FAST f=19 a=4 0.026866 10.554410 8 1298
|
||||
FAST f=19 a=5 3.238225 10.615109 6 1010
|
||||
FAST f=19 a=5 0.03078 10.615109 6 1010
|
||||
FAST f=19 a=6 3.199558 10.609782 6 1874
|
||||
FAST f=19 a=6 0.030099 10.609782 6 1874
|
||||
FAST f=19 a=7 3.132395 10.794753 6 1106
|
||||
FAST f=19 a=7 0.028964 10.794753 6 1106
|
||||
FAST f=19 a=8 3.148446 10.554842 8 1298
|
||||
FAST f=19 a=8 0.024277 10.554842 8 1298
|
||||
FAST f=19 a=9 3.108324 10.668763 6 1010
|
||||
FAST f=19 a=9 0.02896 10.668763 6 1010
|
||||
FAST f=19 a=10 3.159863 10.757347 8 1106
|
||||
FAST f=19 a=10 0.023351 10.757347 8 1106
|
||||
FAST f=20 a=1 4.462698 10.661788 8 1154
|
||||
FAST f=20 a=1 0.047174 10.661788 8 1154
|
||||
FAST f=20 a=2 3.820269 10.678612 6 1106
|
||||
FAST f=20 a=2 0.040807 10.678612 6 1106
|
||||
FAST f=20 a=3 3.644955 10.648424 8 1154
|
||||
FAST f=20 a=3 0.031398 10.648424 8 1154
|
||||
FAST f=20 a=4 3.546257 10.559756 8 1298
|
||||
FAST f=20 a=4 0.029856 10.559756 8 1298
|
||||
FAST f=20 a=5 3.485248 10.646637 6 1010
|
||||
FAST f=20 a=5 0.033756 10.646637 6 1010
|
||||
FAST f=20 a=6 3.490438 10.775824 8 1106
|
||||
FAST f=20 a=6 0.028338 10.775824 8 1106
|
||||
FAST f=20 a=7 3.631289 10.801795 6 1106
|
||||
FAST f=20 a=7 0.035228 10.801795 6 1106
|
||||
FAST f=20 a=8 3.758936 10.545116 8 1346
|
||||
FAST f=20 a=8 0.027495 10.545116 8 1346
|
||||
FAST f=20 a=9 3.707024 10.677454 6 1010
|
||||
FAST f=20 a=9 0.031326 10.677454 6 1010
|
||||
FAST f=20 a=10 3.586593 10.756017 8 1106
|
||||
FAST f=20 a=10 0.027122 10.756017 8 1106
|
||||
FAST f=21 a=1 5.701396 10.655398 8 1154
|
||||
FAST f=21 a=1 0.067744 10.655398 8 1154
|
||||
FAST f=21 a=2 5.270542 10.650743 6 1106
|
||||
FAST f=21 a=2 0.052999 10.650743 6 1106
|
||||
FAST f=21 a=3 4.945294 10.652380 8 1154
|
||||
FAST f=21 a=3 0.052678 10.652380 8 1154
|
||||
FAST f=21 a=4 4.894079 10.543185 8 1298
|
||||
FAST f=21 a=4 0.04997 10.543185 8 1298
|
||||
FAST f=21 a=5 4.785417 10.630321 6 1010
|
||||
FAST f=21 a=5 0.045294 10.630321 6 1010
|
||||
FAST f=21 a=6 4.789381 10.664477 6 1874
|
||||
FAST f=21 a=6 0.046578 10.664477 6 1874
|
||||
FAST f=21 a=7 4.302955 10.805179 6 1106
|
||||
FAST f=21 a=7 0.041205 10.805179 6 1106
|
||||
FAST f=21 a=8 4.034630 10.551211 8 1298
|
||||
FAST f=21 a=8 0.040121 10.551211 8 1298
|
||||
FAST f=21 a=9 4.523868 10.799114 6 1010
|
||||
FAST f=21 a=9 0.043592 10.799114 6 1010
|
||||
FAST f=21 a=10 4.760736 10.750255 8 1106
|
||||
FAST f=21 a=10 0.043483 10.750255 8 1106
|
||||
FAST f=22 a=1 6.743064 10.640537 8 1154
|
||||
FAST f=22 a=1 0.086967 10.640537 8 1154
|
||||
FAST f=22 a=2 6.121739 10.626638 6 1970
|
||||
FAST f=22 a=2 0.066337 10.626638 6 1970
|
||||
FAST f=22 a=3 5.248851 10.640688 8 1154
|
||||
FAST f=22 a=3 0.054935 10.640688 8 1154
|
||||
FAST f=22 a=4 5.436579 10.588333 8 1298
|
||||
FAST f=22 a=4 0.064113 10.588333 8 1298
|
||||
FAST f=22 a=5 5.812815 10.652653 6 1010
|
||||
FAST f=22 a=5 0.058189 10.652653 6 1010
|
||||
FAST f=22 a=6 5.745472 10.666437 6 1874
|
||||
FAST f=22 a=6 0.057188 10.666437 6 1874
|
||||
FAST f=22 a=7 5.716393 10.806911 6 1106
|
||||
FAST f=22 a=7 0.056 10.806911 6 1106
|
||||
FAST f=22 a=8 5.698799 10.530784 8 1298
|
||||
FAST f=22 a=8 0.0583 10.530784 8 1298
|
||||
FAST f=22 a=9 5.710533 10.777391 6 1010
|
||||
FAST f=22 a=9 0.054945 10.777391 6 1010
|
||||
FAST f=22 a=10 5.685395 10.745023 8 1106
|
||||
FAST f=22 a=10 0.056526 10.745023 8 1106
|
||||
FAST f=23 a=1 7.836923 10.638828 8 1154
|
||||
FAST f=23 a=1 0.099522 10.638828 8 1154
|
||||
FAST f=23 a=2 6.627834 10.631061 6 1970
|
||||
FAST f=23 a=2 0.066769 10.631061 6 1970
|
||||
FAST f=23 a=3 5.602533 10.647288 8 1154
|
||||
FAST f=23 a=3 0.064513 10.647288 8 1154
|
||||
FAST f=23 a=4 6.005580 10.568747 8 1298
|
||||
FAST f=23 a=4 0.062022 10.568747 8 1298
|
||||
FAST f=23 a=5 5.481816 10.676921 6 1010
|
||||
FAST f=23 a=5 0.058959 10.676921 6 1010
|
||||
FAST f=23 a=6 5.460444 10.666194 6 1874
|
||||
FAST f=23 a=6 0.057687 10.666194 6 1874
|
||||
FAST f=23 a=7 5.659822 10.800377 6 1106
|
||||
FAST f=23 a=7 0.06783 10.800377 6 1106
|
||||
FAST f=23 a=8 6.826940 10.522167 8 1298
|
||||
FAST f=23 a=8 0.070533 10.522167 8 1298
|
||||
FAST f=23 a=9 6.804757 10.577799 8 1682
|
||||
FAST f=23 a=9 0.069949 10.577799 8 1682
|
||||
FAST f=23 a=10 6.774933 10.742093 8 1106
|
||||
FAST f=23 a=10 0.068395 10.742093 8 1106
|
||||
FAST f=24 a=1 8.444110 10.632783 8 1154
|
||||
FAST f=24 a=1 0.094357 10.632783 8 1154
|
||||
FAST f=24 a=2 7.289578 10.631061 6 1970
|
||||
FAST f=24 a=2 0.098515 10.631061 6 1970
|
||||
FAST f=24 a=3 8.619780 10.646289 8 1154
|
||||
FAST f=24 a=3 0.098041 10.646289 8 1154
|
||||
FAST f=24 a=4 8.508455 10.555199 8 1298
|
||||
FAST f=24 a=4 0.093885 10.555199 8 1298
|
||||
FAST f=24 a=5 8.471145 10.674363 6 1010
|
||||
FAST f=24 a=5 0.088676 10.674363 6 1010
|
||||
FAST f=24 a=6 8.426727 10.667228 6 1874
|
||||
FAST f=24 a=6 0.087247 10.667228 6 1874
|
||||
FAST f=24 a=7 8.356826 10.803027 6 1106
|
||||
FAST f=24 a=7 0.085835 10.803027 6 1106
|
||||
FAST f=24 a=8 6.756811 10.522049 8 1298
|
||||
FAST f=24 a=8 0.07107 10.522049 8 1298
|
||||
FAST f=24 a=9 6.548169 10.571882 8 1682
|
||||
FAST f=24 a=9 0.0713 10.571882 8 1682
|
||||
FAST f=24 a=10 8.238079 10.736453 8 1106
|
||||
FAST f=24 a=10 0.07004 10.736453 8 1106
|
||||
|
||||
|
||||
hg-commands:
|
||||
NODICT 0.000005 2.425276
|
||||
RANDOM 0.046332 3.490331
|
||||
LEGACY 0.720351 3.911682
|
||||
COVER 45.507731 4.132653 8 386
|
||||
COVER 1.868810 4.132653 8 386
|
||||
FAST f=15 a=1 4.561427 3.866894 8 1202
|
||||
FAST f=15 a=1 0.048946 3.866894 8 1202
|
||||
FAST f=15 a=2 3.574462 3.892119 8 1538
|
||||
FAST f=15 a=2 0.033677 3.892119 8 1538
|
||||
FAST f=15 a=3 3.230227 3.888791 6 1346
|
||||
FAST f=15 a=3 0.034312 3.888791 6 1346
|
||||
FAST f=15 a=4 3.042388 3.899739 8 1010
|
||||
FAST f=15 a=4 0.024307 3.899739 8 1010
|
||||
FAST f=15 a=5 2.800148 3.896220 8 818
|
||||
FAST f=15 a=5 0.022331 3.896220 8 818
|
||||
FAST f=15 a=6 2.706518 3.882039 8 578
|
||||
FAST f=15 a=6 0.020955 3.882039 8 578
|
||||
FAST f=15 a=7 2.701820 3.885430 6 866
|
||||
FAST f=15 a=7 0.026074 3.885430 6 866
|
||||
FAST f=15 a=8 2.604445 3.906932 8 1826
|
||||
FAST f=15 a=8 0.021789 3.906932 8 1826
|
||||
FAST f=15 a=9 2.598568 3.870324 6 1682
|
||||
FAST f=15 a=9 0.026004 3.870324 6 1682
|
||||
FAST f=15 a=10 2.575920 3.920783 8 1442
|
||||
FAST f=15 a=10 0.020228 3.920783 8 1442
|
||||
FAST f=16 a=1 4.630623 4.001430 8 770
|
||||
FAST f=16 a=1 0.047497 4.001430 8 770
|
||||
FAST f=16 a=2 3.674721 3.974431 8 1874
|
||||
FAST f=16 a=2 0.035761 3.974431 8 1874
|
||||
FAST f=16 a=3 3.338384 3.978703 8 1010
|
||||
FAST f=16 a=3 0.029436 3.978703 8 1010
|
||||
FAST f=16 a=4 3.004412 3.983035 8 1010
|
||||
FAST f=16 a=4 0.025744 3.983035 8 1010
|
||||
FAST f=16 a=5 2.881892 3.987710 8 770
|
||||
FAST f=16 a=5 0.023211 3.987710 8 770
|
||||
FAST f=16 a=6 2.807410 3.952717 8 1298
|
||||
FAST f=16 a=6 0.023199 3.952717 8 1298
|
||||
FAST f=16 a=7 2.819623 3.994627 8 770
|
||||
FAST f=16 a=7 0.021806 3.994627 8 770
|
||||
FAST f=16 a=8 2.740092 3.954032 8 1826
|
||||
FAST f=16 a=8 0.0226 3.954032 8 1826
|
||||
FAST f=16 a=9 2.682564 3.969879 6 1442
|
||||
FAST f=16 a=9 0.026324 3.969879 6 1442
|
||||
FAST f=16 a=10 2.657959 3.969755 8 674
|
||||
FAST f=16 a=10 0.020413 3.969755 8 674
|
||||
FAST f=17 a=1 4.729228 4.046000 8 530
|
||||
FAST f=17 a=1 0.049703 4.046000 8 530
|
||||
FAST f=17 a=2 3.764510 3.991519 8 1970
|
||||
FAST f=17 a=2 0.038195 3.991519 8 1970
|
||||
FAST f=17 a=3 3.416992 4.006296 6 914
|
||||
FAST f=17 a=3 0.036244 4.006296 6 914
|
||||
FAST f=17 a=4 3.145626 3.979182 8 1970
|
||||
FAST f=17 a=4 0.028676 3.979182 8 1970
|
||||
FAST f=17 a=5 2.995070 4.050070 8 770
|
||||
FAST f=17 a=5 0.025707 4.050070 8 770
|
||||
FAST f=17 a=6 2.911833 4.040024 8 770
|
||||
FAST f=17 a=6 0.02453 4.040024 8 770
|
||||
FAST f=17 a=7 2.894796 4.015884 8 818
|
||||
FAST f=17 a=7 0.023956 4.015884 8 818
|
||||
FAST f=17 a=8 2.789962 4.039303 8 530
|
||||
FAST f=17 a=8 0.023219 4.039303 8 530
|
||||
FAST f=17 a=9 2.787625 3.996762 8 1634
|
||||
FAST f=17 a=9 0.023651 3.996762 8 1634
|
||||
FAST f=17 a=10 2.754796 4.005059 8 1058
|
||||
FAST f=17 a=10 0.022537 4.005059 8 1058
|
||||
FAST f=18 a=1 4.779117 4.038214 8 242
|
||||
FAST f=18 a=1 0.048814 4.038214 8 242
|
||||
FAST f=18 a=2 3.829753 4.045768 8 722
|
||||
FAST f=18 a=2 0.036541 4.045768 8 722
|
||||
FAST f=18 a=3 3.495053 4.021497 8 770
|
||||
FAST f=18 a=3 0.032648 4.021497 8 770
|
||||
FAST f=18 a=4 3.221395 4.039623 8 770
|
||||
FAST f=18 a=4 0.027818 4.039623 8 770
|
||||
FAST f=18 a=5 3.059369 4.050414 8 530
|
||||
FAST f=18 a=5 0.026296 4.050414 8 530
|
||||
FAST f=18 a=6 3.019292 4.010714 6 962
|
||||
FAST f=18 a=6 0.031104 4.010714 6 962
|
||||
FAST f=18 a=7 2.949322 4.031439 6 770
|
||||
FAST f=18 a=7 0.030745 4.031439 6 770
|
||||
FAST f=18 a=8 2.876425 4.032088 6 386
|
||||
FAST f=18 a=8 0.027407 4.032088 6 386
|
||||
FAST f=18 a=9 2.850958 4.053372 8 674
|
||||
FAST f=18 a=9 0.023799 4.053372 8 674
|
||||
FAST f=18 a=10 2.884352 4.020148 8 1730
|
||||
FAST f=18 a=10 0.024401 4.020148 8 1730
|
||||
FAST f=19 a=1 4.815669 4.061203 8 674
|
||||
FAST f=19 a=1 0.051425 4.061203 8 674
|
||||
FAST f=19 a=2 3.951356 4.013822 8 1442
|
||||
FAST f=19 a=2 0.039968 4.013822 8 1442
|
||||
FAST f=19 a=3 3.554682 4.050425 8 722
|
||||
FAST f=19 a=3 0.032725 4.050425 8 722
|
||||
FAST f=19 a=4 3.242585 4.054677 8 722
|
||||
FAST f=19 a=4 0.028194 4.054677 8 722
|
||||
FAST f=19 a=5 3.105909 4.064524 8 818
|
||||
FAST f=19 a=5 0.02675 4.064524 8 818
|
||||
FAST f=19 a=6 3.059901 4.036857 8 1250
|
||||
FAST f=19 a=6 0.026396 4.036857 8 1250
|
||||
FAST f=19 a=7 3.016151 4.068234 6 770
|
||||
FAST f=19 a=7 0.031501 4.068234 6 770
|
||||
FAST f=19 a=8 2.962902 4.077509 8 530
|
||||
FAST f=19 a=8 0.023333 4.077509 8 530
|
||||
FAST f=19 a=9 2.899607 4.067328 8 530
|
||||
FAST f=19 a=9 0.024553 4.067328 8 530
|
||||
FAST f=19 a=10 2.950978 4.059901 8 434
|
||||
FAST f=19 a=10 0.023852 4.059901 8 434
|
||||
FAST f=20 a=1 5.259834 4.027579 8 1634
|
||||
FAST f=20 a=1 0.061123 4.027579 8 1634
|
||||
FAST f=20 a=2 4.382150 4.025093 8 1634
|
||||
FAST f=20 a=2 0.048009 4.025093 8 1634
|
||||
FAST f=20 a=3 4.104323 4.060842 8 530
|
||||
FAST f=20 a=3 0.040965 4.060842 8 530
|
||||
FAST f=20 a=4 3.853340 4.023504 6 914
|
||||
FAST f=20 a=4 0.041072 4.023504 6 914
|
||||
FAST f=20 a=5 3.728841 4.018089 6 1634
|
||||
FAST f=20 a=5 0.037469 4.018089 6 1634
|
||||
FAST f=20 a=6 3.683045 4.069138 8 578
|
||||
FAST f=20 a=6 0.028011 4.069138 8 578
|
||||
FAST f=20 a=7 3.726973 4.063160 8 722
|
||||
FAST f=20 a=7 0.028437 4.063160 8 722
|
||||
FAST f=20 a=8 3.555073 4.057690 8 386
|
||||
FAST f=20 a=8 0.027588 4.057690 8 386
|
||||
FAST f=20 a=9 3.551095 4.067253 8 482
|
||||
FAST f=20 a=9 0.025976 4.067253 8 482
|
||||
FAST f=20 a=10 3.490127 4.068518 8 530
|
||||
FAST f=20 a=10 0.025971 4.068518 8 530
|
||||
FAST f=21 a=1 7.343816 4.064945 8 770
|
||||
FAST f=21 a=1 0.085035 4.064945 8 770
|
||||
FAST f=21 a=2 5.930894 4.048206 8 386
|
||||
FAST f=21 a=2 0.067349 4.048206 8 386
|
||||
FAST f=21 a=3 6.770775 4.063417 8 578
|
||||
FAST f=21 a=3 0.077104 4.063417 8 578
|
||||
FAST f=21 a=4 6.889409 4.066761 8 626
|
||||
FAST f=21 a=4 0.0717 4.066761 8 626
|
||||
FAST f=21 a=5 6.714896 4.051813 8 914
|
||||
FAST f=21 a=5 0.071026 4.051813 8 914
|
||||
FAST f=21 a=6 6.539890 4.047263 8 1922
|
||||
FAST f=21 a=6 0.07127 4.047263 8 1922
|
||||
FAST f=21 a=7 6.511052 4.068373 8 482
|
||||
FAST f=21 a=7 0.065467 4.068373 8 482
|
||||
FAST f=21 a=8 6.458788 4.071597 8 482
|
||||
FAST f=21 a=8 0.063817 4.071597 8 482
|
||||
FAST f=21 a=9 6.377591 4.052905 8 434
|
||||
FAST f=21 a=9 0.063112 4.052905 8 434
|
||||
FAST f=21 a=10 6.360752 4.047773 8 530
|
||||
FAST f=21 a=10 0.063606 4.047773 8 530
|
||||
FAST f=22 a=1 10.523471 4.040812 8 962
|
||||
FAST f=22 a=1 0.14214 4.040812 8 962
|
||||
FAST f=22 a=2 9.454758 4.059396 8 914
|
||||
FAST f=22 a=2 0.118343 4.059396 8 914
|
||||
FAST f=22 a=3 9.043197 4.043019 8 1922
|
||||
FAST f=22 a=3 0.109798 4.043019 8 1922
|
||||
FAST f=22 a=4 8.716261 4.044819 8 770
|
||||
FAST f=22 a=4 0.099687 4.044819 8 770
|
||||
FAST f=22 a=5 8.529472 4.070576 8 530
|
||||
FAST f=22 a=5 0.093127 4.070576 8 530
|
||||
FAST f=22 a=6 8.424241 4.070565 8 722
|
||||
FAST f=22 a=6 0.093703 4.070565 8 722
|
||||
FAST f=22 a=7 8.403391 4.070591 8 578
|
||||
FAST f=22 a=7 0.089763 4.070591 8 578
|
||||
FAST f=22 a=8 8.285221 4.089171 8 530
|
||||
FAST f=22 a=8 0.087716 4.089171 8 530
|
||||
FAST f=22 a=9 8.282506 4.047470 8 722
|
||||
FAST f=22 a=9 0.089773 4.047470 8 722
|
||||
FAST f=22 a=10 8.241809 4.064151 8 818
|
||||
FAST f=22 a=10 0.090413 4.064151 8 818
|
||||
FAST f=23 a=1 12.389208 4.051635 6 530
|
||||
FAST f=23 a=1 0.147796 4.051635 6 530
|
||||
FAST f=23 a=2 11.300910 4.042835 6 914
|
||||
FAST f=23 a=2 0.133178 4.042835 6 914
|
||||
FAST f=23 a=3 10.879455 4.047415 8 626
|
||||
FAST f=23 a=3 0.129571 4.047415 8 626
|
||||
FAST f=23 a=4 10.522718 4.038269 6 914
|
||||
FAST f=23 a=4 0.118121 4.038269 6 914
|
||||
FAST f=23 a=5 10.348043 4.066884 8 434
|
||||
FAST f=23 a=5 0.112098 4.066884 8 434
|
||||
FAST f=23 a=6 10.238630 4.048635 8 1010
|
||||
FAST f=23 a=6 0.120281 4.048635 8 1010
|
||||
FAST f=23 a=7 10.213255 4.061809 8 530
|
||||
FAST f=23 a=7 0.1121 4.061809 8 530
|
||||
FAST f=23 a=8 10.107879 4.074104 8 818
|
||||
FAST f=23 a=8 0.116544 4.074104 8 818
|
||||
FAST f=23 a=9 10.063424 4.064811 8 674
|
||||
FAST f=23 a=9 0.109045 4.064811 8 674
|
||||
FAST f=23 a=10 10.035801 4.054918 8 530
|
||||
FAST f=23 a=10 0.108735 4.054918 8 530
|
||||
FAST f=24 a=1 14.963878 4.073490 8 722
|
||||
FAST f=24 a=1 0.206344 4.073490 8 722
|
||||
FAST f=24 a=2 13.833472 4.036100 8 962
|
||||
FAST f=24 a=2 0.17486 4.036100 8 962
|
||||
FAST f=24 a=3 13.404631 4.026281 6 1106
|
||||
FAST f=24 a=3 0.153961 4.026281 6 1106
|
||||
FAST f=24 a=4 13.041164 4.065448 8 674
|
||||
FAST f=24 a=4 0.155509 4.065448 8 674
|
||||
FAST f=24 a=5 12.879412 4.054636 8 674
|
||||
FAST f=24 a=5 0.148282 4.054636 8 674
|
||||
FAST f=24 a=6 12.773736 4.081376 8 530
|
||||
FAST f=24 a=6 0.142563 4.081376 8 530
|
||||
FAST f=24 a=7 12.711310 4.059834 8 770
|
||||
FAST f=24 a=7 0.149321 4.059834 8 770
|
||||
FAST f=24 a=8 12.635459 4.052050 8 1298
|
||||
FAST f=24 a=8 0.15095 4.052050 8 1298
|
||||
FAST f=24 a=9 12.558104 4.076516 8 722
|
||||
FAST f=24 a=9 0.144361 4.076516 8 722
|
||||
FAST f=24 a=10 10.661348 4.062137 8 818
|
||||
FAST f=24 a=10 0.108232 4.062137 8 818
|
||||
|
||||
|
||||
hg-changelog:
|
||||
NODICT 0.000017 1.377590
|
||||
RANDOM 0.186171 2.097487
|
||||
LEGACY 1.670867 2.058907
|
||||
COVER 173.561948 2.189685 8 98
|
||||
COVER 4.811180 2.189685 8 98
|
||||
FAST f=15 a=1 18.685906 2.129682 8 434
|
||||
FAST f=15 a=1 0.173376 2.129682 8 434
|
||||
FAST f=15 a=2 12.928259 2.131890 8 482
|
||||
FAST f=15 a=2 0.102582 2.131890 8 482
|
||||
FAST f=15 a=3 11.132343 2.128027 8 386
|
||||
FAST f=15 a=3 0.077122 2.128027 8 386
|
||||
FAST f=15 a=4 10.120683 2.125797 8 434
|
||||
FAST f=15 a=4 0.065175 2.125797 8 434
|
||||
FAST f=15 a=5 9.479092 2.127697 8 386
|
||||
FAST f=15 a=5 0.057905 2.127697 8 386
|
||||
FAST f=15 a=6 9.159523 2.127132 8 1682
|
||||
FAST f=15 a=6 0.058604 2.127132 8 1682
|
||||
FAST f=15 a=7 8.724003 2.129914 8 434
|
||||
FAST f=15 a=7 0.0493 2.129914 8 434
|
||||
FAST f=15 a=8 8.595001 2.127137 8 338
|
||||
FAST f=15 a=8 0.0474 2.127137 8 338
|
||||
FAST f=15 a=9 8.356405 2.125512 8 482
|
||||
FAST f=15 a=9 0.046126 2.125512 8 482
|
||||
FAST f=15 a=10 8.207111 2.126066 8 338
|
||||
FAST f=15 a=10 0.043292 2.126066 8 338
|
||||
FAST f=16 a=1 18.464436 2.144040 8 242
|
||||
FAST f=16 a=1 0.172156 2.144040 8 242
|
||||
FAST f=16 a=2 12.844825 2.148171 8 194
|
||||
FAST f=16 a=2 0.099619 2.148171 8 194
|
||||
FAST f=16 a=3 11.082568 2.140837 8 290
|
||||
FAST f=16 a=3 0.079165 2.140837 8 290
|
||||
FAST f=16 a=4 10.066749 2.144405 8 386
|
||||
FAST f=16 a=4 0.068411 2.144405 8 386
|
||||
FAST f=16 a=5 9.501121 2.140720 8 386
|
||||
FAST f=16 a=5 0.061316 2.140720 8 386
|
||||
FAST f=16 a=6 9.179332 2.139478 8 386
|
||||
FAST f=16 a=6 0.056322 2.139478 8 386
|
||||
FAST f=16 a=7 8.849438 2.142412 8 194
|
||||
FAST f=16 a=7 0.050493 2.142412 8 194
|
||||
FAST f=16 a=8 8.810919 2.143454 8 434
|
||||
FAST f=16 a=8 0.051304 2.143454 8 434
|
||||
FAST f=16 a=9 8.553900 2.140339 8 194
|
||||
FAST f=16 a=9 0.047285 2.140339 8 194
|
||||
FAST f=16 a=10 8.398027 2.143130 8 386
|
||||
FAST f=16 a=10 0.046386 2.143130 8 386
|
||||
FAST f=17 a=1 18.644657 2.157192 8 98
|
||||
FAST f=17 a=1 0.173884 2.157192 8 98
|
||||
FAST f=17 a=2 13.071242 2.159830 8 146
|
||||
FAST f=17 a=2 0.10388 2.159830 8 146
|
||||
FAST f=17 a=3 11.332366 2.153654 6 194
|
||||
FAST f=17 a=3 0.08983 2.153654 6 194
|
||||
FAST f=17 a=4 10.362413 2.156813 8 242
|
||||
FAST f=17 a=4 0.070389 2.156813 8 242
|
||||
FAST f=17 a=5 9.808159 2.155098 6 338
|
||||
FAST f=17 a=5 0.072661 2.155098 6 338
|
||||
FAST f=17 a=6 9.451165 2.153845 6 146
|
||||
FAST f=17 a=6 0.064959 2.153845 6 146
|
||||
FAST f=17 a=7 9.163097 2.155424 6 242
|
||||
FAST f=17 a=7 0.064323 2.155424 6 242
|
||||
FAST f=17 a=8 9.047276 2.156640 8 242
|
||||
FAST f=17 a=8 0.053382 2.156640 8 242
|
||||
FAST f=17 a=9 8.807671 2.152396 8 146
|
||||
FAST f=17 a=9 0.049617 2.152396 8 146
|
||||
FAST f=17 a=10 8.649827 2.152370 8 146
|
||||
FAST f=17 a=10 0.047849 2.152370 8 146
|
||||
FAST f=18 a=1 18.809502 2.168116 8 98
|
||||
FAST f=18 a=1 0.175226 2.168116 8 98
|
||||
FAST f=18 a=2 13.756502 2.170870 6 242
|
||||
FAST f=18 a=2 0.119507 2.170870 6 242
|
||||
FAST f=18 a=3 12.059748 2.163094 6 98
|
||||
FAST f=18 a=3 0.093912 2.163094 6 98
|
||||
FAST f=18 a=4 11.410294 2.172372 8 98
|
||||
FAST f=18 a=4 0.073048 2.172372 8 98
|
||||
FAST f=18 a=5 10.560297 2.166388 8 98
|
||||
FAST f=18 a=5 0.065136 2.166388 8 98
|
||||
FAST f=18 a=6 10.071390 2.162672 8 98
|
||||
FAST f=18 a=6 0.059402 2.162672 8 98
|
||||
FAST f=18 a=7 10.084214 2.166624 6 194
|
||||
FAST f=18 a=7 0.073276 2.166624 6 194
|
||||
FAST f=18 a=8 9.953226 2.167454 8 98
|
||||
FAST f=18 a=8 0.053659 2.167454 8 98
|
||||
FAST f=18 a=9 8.982461 2.161593 6 146
|
||||
FAST f=18 a=9 0.05955 2.161593 6 146
|
||||
FAST f=18 a=10 8.986092 2.164373 6 242
|
||||
FAST f=18 a=10 0.059135 2.164373 6 242
|
||||
FAST f=19 a=1 18.908277 2.176021 8 98
|
||||
FAST f=19 a=1 0.177316 2.176021 8 98
|
||||
FAST f=19 a=2 13.471313 2.176103 8 98
|
||||
FAST f=19 a=2 0.106344 2.176103 8 98
|
||||
FAST f=19 a=3 11.571406 2.172812 8 98
|
||||
FAST f=19 a=3 0.083293 2.172812 8 98
|
||||
FAST f=19 a=4 10.632775 2.177770 6 146
|
||||
FAST f=19 a=4 0.079864 2.177770 6 146
|
||||
FAST f=19 a=5 10.030190 2.175574 6 146
|
||||
FAST f=19 a=5 0.07223 2.175574 6 146
|
||||
FAST f=19 a=6 9.717818 2.169997 8 98
|
||||
FAST f=19 a=6 0.060049 2.169997 8 98
|
||||
FAST f=19 a=7 9.397531 2.172770 8 146
|
||||
FAST f=19 a=7 0.057188 2.172770 8 146
|
||||
FAST f=19 a=8 9.281061 2.175822 8 98
|
||||
FAST f=19 a=8 0.053711 2.175822 8 98
|
||||
FAST f=19 a=9 9.165242 2.169849 6 146
|
||||
FAST f=19 a=9 0.059898 2.169849 6 146
|
||||
FAST f=19 a=10 9.048763 2.173394 8 98
|
||||
FAST f=19 a=10 0.049757 2.173394 8 98
|
||||
FAST f=20 a=1 21.166917 2.183923 6 98
|
||||
FAST f=20 a=1 0.205425 2.183923 6 98
|
||||
FAST f=20 a=2 15.642753 2.182349 6 98
|
||||
FAST f=20 a=2 0.135957 2.182349 6 98
|
||||
FAST f=20 a=3 14.053730 2.173544 6 98
|
||||
FAST f=20 a=3 0.11266 2.173544 6 98
|
||||
FAST f=20 a=4 15.270019 2.183656 8 98
|
||||
FAST f=20 a=4 0.107892 2.183656 8 98
|
||||
FAST f=20 a=5 15.497927 2.174661 6 98
|
||||
FAST f=20 a=5 0.100305 2.174661 6 98
|
||||
FAST f=20 a=6 13.973505 2.172391 8 98
|
||||
FAST f=20 a=6 0.087565 2.172391 8 98
|
||||
FAST f=20 a=7 14.083296 2.172443 8 98
|
||||
FAST f=20 a=7 0.078062 2.172443 8 98
|
||||
FAST f=20 a=8 12.560048 2.175581 8 98
|
||||
FAST f=20 a=8 0.070282 2.175581 8 98
|
||||
FAST f=20 a=9 13.078645 2.173975 6 146
|
||||
FAST f=20 a=9 0.081041 2.173975 6 146
|
||||
FAST f=20 a=10 12.823328 2.177778 8 98
|
||||
FAST f=20 a=10 0.074522 2.177778 8 98
|
||||
FAST f=21 a=1 29.825370 2.183057 6 98
|
||||
FAST f=21 a=1 0.334453 2.183057 6 98
|
||||
FAST f=21 a=2 29.476474 2.182752 8 98
|
||||
FAST f=21 a=2 0.286602 2.182752 8 98
|
||||
FAST f=21 a=3 25.937186 2.175867 8 98
|
||||
FAST f=21 a=3 0.17626 2.175867 8 98
|
||||
FAST f=21 a=4 20.413865 2.179780 8 98
|
||||
FAST f=21 a=4 0.206085 2.179780 8 98
|
||||
FAST f=21 a=5 20.541889 2.178328 6 146
|
||||
FAST f=21 a=5 0.199157 2.178328 6 146
|
||||
FAST f=21 a=6 21.090670 2.174443 6 146
|
||||
FAST f=21 a=6 0.190645 2.174443 6 146
|
||||
FAST f=21 a=7 20.221569 2.177384 6 146
|
||||
FAST f=21 a=7 0.184278 2.177384 6 146
|
||||
FAST f=21 a=8 20.322357 2.179456 6 98
|
||||
FAST f=21 a=8 0.178458 2.179456 6 98
|
||||
FAST f=21 a=9 20.683912 2.174396 6 146
|
||||
FAST f=21 a=9 0.190829 2.174396 6 146
|
||||
FAST f=21 a=10 20.840865 2.174905 8 98
|
||||
FAST f=21 a=10 0.172515 2.174905 8 98
|
||||
FAST f=22 a=1 36.822827 2.181612 6 98
|
||||
FAST f=22 a=1 0.437389 2.181612 6 98
|
||||
FAST f=22 a=2 30.616902 2.183142 8 98
|
||||
FAST f=22 a=2 0.324284 2.183142 8 98
|
||||
FAST f=22 a=3 28.472482 2.178130 8 98
|
||||
FAST f=22 a=3 0.236538 2.178130 8 98
|
||||
FAST f=22 a=4 25.847028 2.181878 8 98
|
||||
FAST f=22 a=4 0.263744 2.181878 8 98
|
||||
FAST f=22 a=5 27.095881 2.180775 8 98
|
||||
FAST f=22 a=5 0.24988 2.180775 8 98
|
||||
FAST f=22 a=6 25.939172 2.170916 8 98
|
||||
FAST f=22 a=6 0.240033 2.170916 8 98
|
||||
FAST f=22 a=7 27.064194 2.177849 8 98
|
||||
FAST f=22 a=7 0.242383 2.177849 8 98
|
||||
FAST f=22 a=8 25.140221 2.178216 8 98
|
||||
FAST f=22 a=8 0.237601 2.178216 8 98
|
||||
FAST f=22 a=9 25.505283 2.177455 6 146
|
||||
FAST f=22 a=9 0.223217 2.177455 6 146
|
||||
FAST f=22 a=10 24.529362 2.176705 6 98
|
||||
FAST f=22 a=10 0.222876 2.176705 6 98
|
||||
FAST f=23 a=1 39.127310 2.183006 6 98
|
||||
FAST f=23 a=1 0.417338 2.183006 6 98
|
||||
FAST f=23 a=2 32.468161 2.183524 6 98
|
||||
FAST f=23 a=2 0.351645 2.183524 6 98
|
||||
FAST f=23 a=3 31.577620 2.172604 6 98
|
||||
FAST f=23 a=3 0.319659 2.172604 6 98
|
||||
FAST f=23 a=4 30.129247 2.183932 6 98
|
||||
FAST f=23 a=4 0.307239 2.183932 6 98
|
||||
FAST f=23 a=5 29.103376 2.183529 6 146
|
||||
FAST f=23 a=5 0.285533 2.183529 6 146
|
||||
FAST f=23 a=6 29.776045 2.174367 8 98
|
||||
FAST f=23 a=6 0.276846 2.174367 8 98
|
||||
FAST f=23 a=7 28.940407 2.178022 6 146
|
||||
FAST f=23 a=7 0.274082 2.178022 6 146
|
||||
FAST f=23 a=8 29.256009 2.179462 6 98
|
||||
FAST f=23 a=8 0.26949 2.179462 6 98
|
||||
FAST f=23 a=9 29.347312 2.170407 8 98
|
||||
FAST f=23 a=9 0.265034 2.170407 8 98
|
||||
FAST f=23 a=10 29.140081 2.171762 8 98
|
||||
FAST f=23 a=10 0.259183 2.171762 8 98
|
||||
FAST f=24 a=1 44.871179 2.182115 6 98
|
||||
FAST f=24 a=1 0.509433 2.182115 6 98
|
||||
FAST f=24 a=2 38.694867 2.180549 8 98
|
||||
FAST f=24 a=2 0.406695 2.180549 8 98
|
||||
FAST f=24 a=3 38.363769 2.172821 8 98
|
||||
FAST f=24 a=3 0.359581 2.172821 8 98
|
||||
FAST f=24 a=4 36.580797 2.184142 8 98
|
||||
FAST f=24 a=4 0.340614 2.184142 8 98
|
||||
FAST f=24 a=5 33.125701 2.183301 8 98
|
||||
FAST f=24 a=5 0.324874 2.183301 8 98
|
||||
FAST f=24 a=6 34.776068 2.173019 6 146
|
||||
FAST f=24 a=6 0.340397 2.173019 6 146
|
||||
FAST f=24 a=7 34.417625 2.176561 6 146
|
||||
FAST f=24 a=7 0.308223 2.176561 6 146
|
||||
FAST f=24 a=8 35.470291 2.182161 6 98
|
||||
FAST f=24 a=8 0.307724 2.182161 6 98
|
||||
FAST f=24 a=9 34.927252 2.172682 6 146
|
||||
FAST f=24 a=9 0.300598 2.172682 6 146
|
||||
FAST f=24 a=10 33.238355 2.173395 6 98
|
||||
FAST f=24 a=10 0.249916 2.173395 6 98
|
||||
|
||||
|
||||
hg-manifest:
|
||||
NODICT 0.000004 1.866377
|
||||
RANDOM 0.696346 2.309436
|
||||
LEGACY 7.064527 2.506977
|
||||
COVER 876.312865 2.582528 8 434
|
||||
COVER 35.684533 2.582528 8 434
|
||||
FAST f=15 a=1 76.618201 2.404013 8 1202
|
||||
FAST f=15 a=1 0.700722 2.404013 8 1202
|
||||
FAST f=15 a=2 49.213058 2.409248 6 1826
|
||||
FAST f=15 a=2 0.473393 2.409248 6 1826
|
||||
FAST f=15 a=3 41.753197 2.409677 8 1490
|
||||
FAST f=15 a=3 0.336848 2.409677 8 1490
|
||||
FAST f=15 a=4 38.648295 2.407996 8 1538
|
||||
FAST f=15 a=4 0.283952 2.407996 8 1538
|
||||
FAST f=15 a=5 36.144936 2.402895 8 1874
|
||||
FAST f=15 a=5 0.270128 2.402895 8 1874
|
||||
FAST f=15 a=6 35.484675 2.394873 8 1586
|
||||
FAST f=15 a=6 0.251637 2.394873 8 1586
|
||||
FAST f=15 a=7 34.280599 2.397311 8 1778
|
||||
FAST f=15 a=7 0.23984 2.397311 8 1778
|
||||
FAST f=15 a=8 32.122572 2.396089 6 1490
|
||||
FAST f=15 a=8 0.251508 2.396089 6 1490
|
||||
FAST f=15 a=9 29.909842 2.390092 6 1970
|
||||
FAST f=15 a=9 0.251233 2.390092 6 1970
|
||||
FAST f=15 a=10 30.102938 2.400086 6 1682
|
||||
FAST f=15 a=10 0.23688 2.400086 6 1682
|
||||
FAST f=16 a=1 67.750401 2.475460 6 1346
|
||||
FAST f=16 a=1 0.796035 2.475460 6 1346
|
||||
FAST f=16 a=2 52.812027 2.480860 6 1730
|
||||
FAST f=16 a=2 0.480384 2.480860 6 1730
|
||||
FAST f=16 a=3 44.179259 2.469304 8 1970
|
||||
FAST f=16 a=3 0.332657 2.469304 8 1970
|
||||
FAST f=16 a=4 37.612728 2.478208 6 1970
|
||||
FAST f=16 a=4 0.32498 2.478208 6 1970
|
||||
FAST f=16 a=5 35.056222 2.475568 6 1298
|
||||
FAST f=16 a=5 0.302824 2.475568 6 1298
|
||||
FAST f=16 a=6 34.713012 2.486079 8 1730
|
||||
FAST f=16 a=6 0.24755 2.486079 8 1730
|
||||
FAST f=16 a=7 33.713687 2.477180 6 1682
|
||||
FAST f=16 a=7 0.280358 2.477180 6 1682
|
||||
FAST f=16 a=8 31.571412 2.475418 8 1538
|
||||
FAST f=16 a=8 0.241241 2.475418 8 1538
|
||||
FAST f=16 a=9 31.608069 2.478263 8 1922
|
||||
FAST f=16 a=9 0.241764 2.478263 8 1922
|
||||
FAST f=16 a=10 31.358002 2.472263 8 1442
|
||||
FAST f=16 a=10 0.221661 2.472263 8 1442
|
||||
FAST f=17 a=1 66.185775 2.536085 6 1346
|
||||
FAST f=17 a=1 0.713549 2.536085 6 1346
|
||||
FAST f=17 a=2 50.365000 2.546105 8 1298
|
||||
FAST f=17 a=2 0.467846 2.546105 8 1298
|
||||
FAST f=17 a=3 42.712843 2.536250 8 1298
|
||||
FAST f=17 a=3 0.34047 2.536250 8 1298
|
||||
FAST f=17 a=4 39.514227 2.535555 8 1442
|
||||
FAST f=17 a=4 0.302989 2.535555 8 1442
|
||||
FAST f=17 a=5 35.189292 2.524925 8 1202
|
||||
FAST f=17 a=5 0.273451 2.524925 8 1202
|
||||
FAST f=17 a=6 35.791683 2.523466 8 1202
|
||||
FAST f=17 a=6 0.268261 2.523466 8 1202
|
||||
FAST f=17 a=7 37.416136 2.526625 6 1010
|
||||
FAST f=17 a=7 0.277558 2.526625 6 1010
|
||||
FAST f=17 a=8 37.084707 2.533274 6 1250
|
||||
FAST f=17 a=8 0.285104 2.533274 6 1250
|
||||
FAST f=17 a=9 34.183814 2.532765 8 1298
|
||||
FAST f=17 a=9 0.235133 2.532765 8 1298
|
||||
FAST f=17 a=10 31.149235 2.528722 8 1346
|
||||
FAST f=17 a=10 0.232679 2.528722 8 1346
|
||||
FAST f=18 a=1 72.942176 2.559857 6 386
|
||||
FAST f=18 a=1 0.718618 2.559857 6 386
|
||||
FAST f=18 a=2 51.690440 2.559572 8 290
|
||||
FAST f=18 a=2 0.403978 2.559572 8 290
|
||||
FAST f=18 a=3 45.344908 2.561040 8 962
|
||||
FAST f=18 a=3 0.357205 2.561040 8 962
|
||||
FAST f=18 a=4 39.804522 2.558446 8 1010
|
||||
FAST f=18 a=4 0.310526 2.558446 8 1010
|
||||
FAST f=18 a=5 38.134888 2.561811 8 626
|
||||
FAST f=18 a=5 0.273743 2.561811 8 626
|
||||
FAST f=18 a=6 35.091890 2.555518 8 722
|
||||
FAST f=18 a=6 0.260135 2.555518 8 722
|
||||
FAST f=18 a=7 34.639523 2.562938 8 290
|
||||
FAST f=18 a=7 0.234294 2.562938 8 290
|
||||
FAST f=18 a=8 36.076431 2.563567 8 1586
|
||||
FAST f=18 a=8 0.274075 2.563567 8 1586
|
||||
FAST f=18 a=9 36.376433 2.560950 8 722
|
||||
FAST f=18 a=9 0.240106 2.560950 8 722
|
||||
FAST f=18 a=10 32.624790 2.559340 8 578
|
||||
FAST f=18 a=10 0.234704 2.559340 8 578
|
||||
FAST f=19 a=1 70.513761 2.572441 8 194
|
||||
FAST f=19 a=1 0.726112 2.572441 8 194
|
||||
FAST f=19 a=2 59.263032 2.574560 8 482
|
||||
FAST f=19 a=2 0.451554 2.574560 8 482
|
||||
FAST f=19 a=3 51.509594 2.571546 6 194
|
||||
FAST f=19 a=3 0.393014 2.571546 6 194
|
||||
FAST f=19 a=4 55.393906 2.573386 8 482
|
||||
FAST f=19 a=4 0.38819 2.573386 8 482
|
||||
FAST f=19 a=5 43.201736 2.567589 8 674
|
||||
FAST f=19 a=5 0.292155 2.567589 8 674
|
||||
FAST f=19 a=6 42.911687 2.572666 6 434
|
||||
FAST f=19 a=6 0.303988 2.572666 6 434
|
||||
FAST f=19 a=7 44.687591 2.573613 6 290
|
||||
FAST f=19 a=7 0.308721 2.573613 6 290
|
||||
FAST f=19 a=8 37.372868 2.571039 6 194
|
||||
FAST f=19 a=8 0.287137 2.571039 6 194
|
||||
FAST f=19 a=9 36.074230 2.566473 6 482
|
||||
FAST f=19 a=9 0.280721 2.566473 6 482
|
||||
FAST f=19 a=10 33.731720 2.570306 8 194
|
||||
FAST f=19 a=10 0.224073 2.570306 8 194
|
||||
FAST f=20 a=1 79.670634 2.581146 6 290
|
||||
FAST f=20 a=1 0.899986 2.581146 6 290
|
||||
FAST f=20 a=2 58.827141 2.579782 8 386
|
||||
FAST f=20 a=2 0.602288 2.579782 8 386
|
||||
FAST f=20 a=3 51.289004 2.579627 8 722
|
||||
FAST f=20 a=3 0.446091 2.579627 8 722
|
||||
FAST f=20 a=4 47.711068 2.581508 8 722
|
||||
FAST f=20 a=4 0.473007 2.581508 8 722
|
||||
FAST f=20 a=5 47.402929 2.578062 6 434
|
||||
FAST f=20 a=5 0.497131 2.578062 6 434
|
||||
FAST f=20 a=6 54.797102 2.577365 8 482
|
||||
FAST f=20 a=6 0.515061 2.577365 8 482
|
||||
FAST f=20 a=7 51.370877 2.583050 8 386
|
||||
FAST f=20 a=7 0.402878 2.583050 8 386
|
||||
FAST f=20 a=8 51.437931 2.574875 6 242
|
||||
FAST f=20 a=8 0.453094 2.574875 6 242
|
||||
FAST f=20 a=9 44.105456 2.576700 6 242
|
||||
FAST f=20 a=9 0.456633 2.576700 6 242
|
||||
FAST f=20 a=10 44.447580 2.578305 8 338
|
||||
FAST f=20 a=10 0.409121 2.578305 8 338
|
||||
FAST f=21 a=1 113.031686 2.582449 6 242
|
||||
FAST f=21 a=1 1.456971 2.582449 6 242
|
||||
FAST f=21 a=2 97.700932 2.582124 8 194
|
||||
FAST f=21 a=2 1.072078 2.582124 8 194
|
||||
FAST f=21 a=3 96.563648 2.585479 8 434
|
||||
FAST f=21 a=3 0.949528 2.585479 8 434
|
||||
FAST f=21 a=4 90.597813 2.582366 6 386
|
||||
FAST f=21 a=4 0.76944 2.582366 6 386
|
||||
FAST f=21 a=5 86.815980 2.579043 8 434
|
||||
FAST f=21 a=5 0.858167 2.579043 8 434
|
||||
FAST f=21 a=6 91.235820 2.578378 8 530
|
||||
FAST f=21 a=6 0.684274 2.578378 8 530
|
||||
FAST f=21 a=7 84.392788 2.581243 8 386
|
||||
FAST f=21 a=7 0.814386 2.581243 8 386
|
||||
FAST f=21 a=8 82.052310 2.582547 8 338
|
||||
FAST f=21 a=8 0.822633 2.582547 8 338
|
||||
FAST f=21 a=9 74.696074 2.579319 8 194
|
||||
FAST f=21 a=9 0.811028 2.579319 8 194
|
||||
FAST f=21 a=10 76.211170 2.578766 8 290
|
||||
FAST f=21 a=10 0.809715 2.578766 8 290
|
||||
FAST f=22 a=1 138.976871 2.580478 8 194
|
||||
FAST f=22 a=1 1.748932 2.580478 8 194
|
||||
FAST f=22 a=2 120.164097 2.583633 8 386
|
||||
FAST f=22 a=2 1.333239 2.583633 8 386
|
||||
FAST f=22 a=3 111.986474 2.582566 6 194
|
||||
FAST f=22 a=3 1.305734 2.582566 6 194
|
||||
FAST f=22 a=4 108.548148 2.583068 6 194
|
||||
FAST f=22 a=4 1.314026 2.583068 6 194
|
||||
FAST f=22 a=5 103.173017 2.583495 6 290
|
||||
FAST f=22 a=5 1.228664 2.583495 6 290
|
||||
FAST f=22 a=6 108.421262 2.582349 8 530
|
||||
FAST f=22 a=6 1.076773 2.582349 8 530
|
||||
FAST f=22 a=7 103.284127 2.581022 8 386
|
||||
FAST f=22 a=7 1.112117 2.581022 8 386
|
||||
FAST f=22 a=8 96.330279 2.581073 8 290
|
||||
FAST f=22 a=8 1.109303 2.581073 8 290
|
||||
FAST f=22 a=9 97.651348 2.580075 6 194
|
||||
FAST f=22 a=9 0.933032 2.580075 6 194
|
||||
FAST f=22 a=10 101.660621 2.584886 8 194
|
||||
FAST f=22 a=10 0.796823 2.584886 8 194
|
||||
FAST f=23 a=1 159.322978 2.581474 6 242
|
||||
FAST f=23 a=1 2.015878 2.581474 6 242
|
||||
FAST f=23 a=2 134.331775 2.581619 8 194
|
||||
FAST f=23 a=2 1.545845 2.581619 8 194
|
||||
FAST f=23 a=3 127.724552 2.579888 6 338
|
||||
FAST f=23 a=3 1.444496 2.579888 6 338
|
||||
FAST f=23 a=4 126.077675 2.578137 6 242
|
||||
FAST f=23 a=4 1.364394 2.578137 6 242
|
||||
FAST f=23 a=5 124.914027 2.580843 8 338
|
||||
FAST f=23 a=5 1.116059 2.580843 8 338
|
||||
FAST f=23 a=6 122.874153 2.577637 6 338
|
||||
FAST f=23 a=6 1.164584 2.577637 6 338
|
||||
FAST f=23 a=7 123.099257 2.582715 6 386
|
||||
FAST f=23 a=7 1.354042 2.582715 6 386
|
||||
FAST f=23 a=8 122.026753 2.577681 8 194
|
||||
FAST f=23 a=8 1.210966 2.577681 8 194
|
||||
FAST f=23 a=9 121.164312 2.584599 6 290
|
||||
FAST f=23 a=9 1.174859 2.584599 6 290
|
||||
FAST f=23 a=10 117.462222 2.580358 8 194
|
||||
FAST f=23 a=10 1.075258 2.580358 8 194
|
||||
FAST f=24 a=1 169.539659 2.581642 6 194
|
||||
FAST f=24 a=1 1.916804 2.581642 6 194
|
||||
FAST f=24 a=2 160.539270 2.580421 6 290
|
||||
FAST f=24 a=2 1.71087 2.580421 6 290
|
||||
FAST f=24 a=3 155.455874 2.580449 6 242
|
||||
FAST f=24 a=3 1.60307 2.580449 6 242
|
||||
FAST f=24 a=4 147.630320 2.582953 6 338
|
||||
FAST f=24 a=4 1.396364 2.582953 6 338
|
||||
FAST f=24 a=5 133.767428 2.580589 6 290
|
||||
FAST f=24 a=5 1.19933 2.580589 6 290
|
||||
FAST f=24 a=6 146.437535 2.579453 8 194
|
||||
FAST f=24 a=6 1.385405 2.579453 8 194
|
||||
FAST f=24 a=7 147.227507 2.584155 8 386
|
||||
FAST f=24 a=7 1.48942 2.584155 8 386
|
||||
FAST f=24 a=8 138.005773 2.584115 8 194
|
||||
FAST f=24 a=8 1.352 2.584115 8 194
|
||||
FAST f=24 a=9 141.442625 2.582902 8 290
|
||||
FAST f=24 a=9 1.39647 2.582902 8 290
|
||||
FAST f=24 a=10 142.157446 2.582701 8 434
|
||||
FAST f=24 a=10 1.498889 2.582701 8 434
|
@ -0,0 +1,442 @@
|
||||
#include <stdio.h> /* fprintf */
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* strcmp, strlen */
|
||||
#include <errno.h> /* errno */
|
||||
#include <ctype.h>
|
||||
#include <time.h>
|
||||
#include "random.h"
|
||||
#include "dictBuilder.h"
|
||||
#include "zstd_internal.h" /* includes zstd.h */
|
||||
#include "io.h"
|
||||
#include "util.h"
|
||||
#include "zdict.h"
|
||||
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Console display
|
||||
***************************************/
|
||||
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
|
||||
#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
|
||||
|
||||
static const U64 g_refreshRate = SEC_TO_MICRO / 6;
|
||||
static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
|
||||
|
||||
#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
|
||||
if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
|
||||
{ g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
|
||||
if (displayLevel>=4) fflush(stderr); } } }
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Exceptions
|
||||
***************************************/
|
||||
#ifndef DEBUG
|
||||
# define DEBUG 0
|
||||
#endif
|
||||
#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
|
||||
#define EXM_THROW(error, ...) \
|
||||
{ \
|
||||
DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
|
||||
DISPLAY("Error %i : ", error); \
|
||||
DISPLAY(__VA_ARGS__); \
|
||||
DISPLAY("\n"); \
|
||||
exit(error); \
|
||||
}
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
static const unsigned g_defaultMaxDictSize = 110 KB;
|
||||
#define DEFAULT_CLEVEL 3
|
||||
#define DEFAULT_DISPLAYLEVEL 2
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Struct
|
||||
***************************************/
|
||||
typedef struct {
|
||||
const void* dictBuffer;
|
||||
size_t dictSize;
|
||||
} dictInfo;
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Dictionary related operations
|
||||
***************************************/
|
||||
/** createDictFromFiles() :
|
||||
* Based on type of param given, train dictionary using the corresponding algorithm
|
||||
* @return dictInfo containing dictionary buffer and dictionary size
|
||||
*/
|
||||
dictInfo* createDictFromFiles(sampleInfo *info, unsigned maxDictSize,
|
||||
ZDICT_random_params_t *randomParams, ZDICT_cover_params_t *coverParams,
|
||||
ZDICT_legacy_params_t *legacyParams, ZDICT_fastCover_params_t *fastParams) {
|
||||
unsigned const displayLevel = randomParams ? randomParams->zParams.notificationLevel :
|
||||
coverParams ? coverParams->zParams.notificationLevel :
|
||||
legacyParams ? legacyParams->zParams.notificationLevel :
|
||||
fastParams ? fastParams->zParams.notificationLevel :
|
||||
DEFAULT_DISPLAYLEVEL; /* no dict */
|
||||
void* const dictBuffer = malloc(maxDictSize);
|
||||
|
||||
dictInfo* dInfo = NULL;
|
||||
|
||||
/* Checks */
|
||||
if (!dictBuffer)
|
||||
EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */
|
||||
|
||||
{ size_t dictSize;
|
||||
if(randomParams) {
|
||||
dictSize = ZDICT_trainFromBuffer_random(dictBuffer, maxDictSize, info->srcBuffer,
|
||||
info->samplesSizes, info->nbSamples, *randomParams);
|
||||
}else if(coverParams) {
|
||||
/* Run the optimize version if either k or d is not provided */
|
||||
if (!coverParams->d || !coverParams->k){
|
||||
dictSize = ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, maxDictSize, info->srcBuffer,
|
||||
info->samplesSizes, info->nbSamples, coverParams);
|
||||
} else {
|
||||
dictSize = ZDICT_trainFromBuffer_cover(dictBuffer, maxDictSize, info->srcBuffer,
|
||||
info->samplesSizes, info->nbSamples, *coverParams);
|
||||
}
|
||||
} else if(legacyParams) {
|
||||
dictSize = ZDICT_trainFromBuffer_legacy(dictBuffer, maxDictSize, info->srcBuffer,
|
||||
info->samplesSizes, info->nbSamples, *legacyParams);
|
||||
} else if(fastParams) {
|
||||
/* Run the optimize version if either k or d is not provided */
|
||||
if (!fastParams->d || !fastParams->k) {
|
||||
dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
|
||||
info->samplesSizes, info->nbSamples, fastParams);
|
||||
} else {
|
||||
dictSize = ZDICT_trainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
|
||||
info->samplesSizes, info->nbSamples, *fastParams);
|
||||
}
|
||||
} else {
|
||||
dictSize = 0;
|
||||
}
|
||||
if (ZDICT_isError(dictSize)) {
|
||||
DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
|
||||
free(dictBuffer);
|
||||
return dInfo;
|
||||
}
|
||||
dInfo = (dictInfo *)malloc(sizeof(dictInfo));
|
||||
dInfo->dictBuffer = dictBuffer;
|
||||
dInfo->dictSize = dictSize;
|
||||
}
|
||||
return dInfo;
|
||||
}
|
||||
|
||||
|
||||
/** compressWithDict() :
|
||||
* Compress samples from sample buffer given dictionary stored on dictionary buffer and compression level
|
||||
* @return compression ratio
|
||||
*/
|
||||
double compressWithDict(sampleInfo *srcInfo, dictInfo* dInfo, int compressionLevel, int displayLevel) {
|
||||
/* Local variables */
|
||||
size_t totalCompressedSize = 0;
|
||||
size_t totalOriginalSize = 0;
|
||||
const unsigned hasDict = dInfo->dictSize > 0 ? 1 : 0;
|
||||
double cRatio;
|
||||
size_t dstCapacity;
|
||||
int i;
|
||||
|
||||
/* Pointers */
|
||||
ZSTD_CDict *cdict = NULL;
|
||||
ZSTD_CCtx* cctx = NULL;
|
||||
size_t *offsets = NULL;
|
||||
void* dst = NULL;
|
||||
|
||||
/* Allocate dst with enough space to compress the maximum sized sample */
|
||||
{
|
||||
size_t maxSampleSize = 0;
|
||||
for (i = 0; i < srcInfo->nbSamples; i++) {
|
||||
maxSampleSize = MAX(srcInfo->samplesSizes[i], maxSampleSize);
|
||||
}
|
||||
dstCapacity = ZSTD_compressBound(maxSampleSize);
|
||||
dst = malloc(dstCapacity);
|
||||
}
|
||||
|
||||
/* Calculate offset for each sample */
|
||||
offsets = (size_t *)malloc((srcInfo->nbSamples + 1) * sizeof(size_t));
|
||||
offsets[0] = 0;
|
||||
for (i = 1; i <= srcInfo->nbSamples; i++) {
|
||||
offsets[i] = offsets[i - 1] + srcInfo->samplesSizes[i - 1];
|
||||
}
|
||||
|
||||
/* Create the cctx */
|
||||
cctx = ZSTD_createCCtx();
|
||||
if(!cctx || !dst) {
|
||||
cRatio = -1;
|
||||
goto _cleanup;
|
||||
}
|
||||
|
||||
/* Create CDict if there's a dictionary stored on buffer */
|
||||
if (hasDict) {
|
||||
cdict = ZSTD_createCDict(dInfo->dictBuffer, dInfo->dictSize, compressionLevel);
|
||||
if(!cdict) {
|
||||
cRatio = -1;
|
||||
goto _cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
/* Compress each sample and sum their sizes*/
|
||||
const BYTE *const samples = (const BYTE *)srcInfo->srcBuffer;
|
||||
for (i = 0; i < srcInfo->nbSamples; i++) {
|
||||
size_t compressedSize;
|
||||
if(hasDict) {
|
||||
compressedSize = ZSTD_compress_usingCDict(cctx, dst, dstCapacity, samples + offsets[i], srcInfo->samplesSizes[i], cdict);
|
||||
} else {
|
||||
compressedSize = ZSTD_compressCCtx(cctx, dst, dstCapacity,samples + offsets[i], srcInfo->samplesSizes[i], compressionLevel);
|
||||
}
|
||||
if (ZSTD_isError(compressedSize)) {
|
||||
cRatio = -1;
|
||||
goto _cleanup;
|
||||
}
|
||||
totalCompressedSize += compressedSize;
|
||||
}
|
||||
|
||||
/* Sum original sizes */
|
||||
for (i = 0; i<srcInfo->nbSamples; i++) {
|
||||
totalOriginalSize += srcInfo->samplesSizes[i];
|
||||
}
|
||||
|
||||
/* Calculate compression ratio */
|
||||
DISPLAYLEVEL(2, "original size is %lu\n", totalOriginalSize);
|
||||
DISPLAYLEVEL(2, "compressed size is %lu\n", totalCompressedSize);
|
||||
cRatio = (double)totalOriginalSize/(double)totalCompressedSize;
|
||||
|
||||
_cleanup:
|
||||
free(dst);
|
||||
free(offsets);
|
||||
ZSTD_freeCCtx(cctx);
|
||||
ZSTD_freeCDict(cdict);
|
||||
return cRatio;
|
||||
}
|
||||
|
||||
|
||||
/** FreeDictInfo() :
|
||||
* Free memory allocated for dictInfo
|
||||
*/
|
||||
void freeDictInfo(dictInfo* info) {
|
||||
if (!info) return;
|
||||
if (info->dictBuffer) free((void*)(info->dictBuffer));
|
||||
free(info);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*-********************************************************
|
||||
* Benchmarking functions
|
||||
**********************************************************/
|
||||
/** benchmarkDictBuilder() :
|
||||
* Measure how long a dictionary builder takes and compression ratio with the dictionary built
|
||||
* @return 0 if benchmark successfully, 1 otherwise
|
||||
*/
|
||||
int benchmarkDictBuilder(sampleInfo *srcInfo, unsigned maxDictSize, ZDICT_random_params_t *randomParam,
|
||||
ZDICT_cover_params_t *coverParam, ZDICT_legacy_params_t *legacyParam,
|
||||
ZDICT_fastCover_params_t *fastParam) {
|
||||
/* Local variables */
|
||||
const unsigned displayLevel = randomParam ? randomParam->zParams.notificationLevel :
|
||||
coverParam ? coverParam->zParams.notificationLevel :
|
||||
legacyParam ? legacyParam->zParams.notificationLevel :
|
||||
fastParam ? fastParam->zParams.notificationLevel:
|
||||
DEFAULT_DISPLAYLEVEL; /* no dict */
|
||||
const char* name = randomParam ? "RANDOM" :
|
||||
coverParam ? "COVER" :
|
||||
legacyParam ? "LEGACY" :
|
||||
fastParam ? "FAST":
|
||||
"NODICT"; /* no dict */
|
||||
const unsigned cLevel = randomParam ? randomParam->zParams.compressionLevel :
|
||||
coverParam ? coverParam->zParams.compressionLevel :
|
||||
legacyParam ? legacyParam->zParams.compressionLevel :
|
||||
fastParam ? fastParam->zParams.compressionLevel:
|
||||
DEFAULT_CLEVEL; /* no dict */
|
||||
int result = 0;
|
||||
|
||||
/* Calculate speed */
|
||||
const UTIL_time_t begin = UTIL_getTime();
|
||||
dictInfo* dInfo = createDictFromFiles(srcInfo, maxDictSize, randomParam, coverParam, legacyParam, fastParam);
|
||||
const U64 timeMicro = UTIL_clockSpanMicro(begin);
|
||||
const double timeSec = timeMicro / (double)SEC_TO_MICRO;
|
||||
if (!dInfo) {
|
||||
DISPLAYLEVEL(1, "%s does not train successfully\n", name);
|
||||
result = 1;
|
||||
goto _cleanup;
|
||||
}
|
||||
DISPLAYLEVEL(1, "%s took %f seconds to execute \n", name, timeSec);
|
||||
|
||||
/* Calculate compression ratio */
|
||||
const double cRatio = compressWithDict(srcInfo, dInfo, cLevel, displayLevel);
|
||||
if (cRatio < 0) {
|
||||
DISPLAYLEVEL(1, "Compressing with %s dictionary does not work\n", name);
|
||||
result = 1;
|
||||
goto _cleanup;
|
||||
|
||||
}
|
||||
DISPLAYLEVEL(1, "Compression ratio with %s dictionary is %f\n", name, cRatio);
|
||||
|
||||
_cleanup:
|
||||
freeDictInfo(dInfo);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int main(int argCount, const char* argv[])
|
||||
{
|
||||
const int displayLevel = DEFAULT_DISPLAYLEVEL;
|
||||
const char* programName = argv[0];
|
||||
int result = 0;
|
||||
|
||||
/* Initialize arguments to default values */
|
||||
unsigned k = 200;
|
||||
unsigned d = 8;
|
||||
unsigned f;
|
||||
unsigned accel;
|
||||
unsigned i;
|
||||
const unsigned cLevel = DEFAULT_CLEVEL;
|
||||
const unsigned dictID = 0;
|
||||
const unsigned maxDictSize = g_defaultMaxDictSize;
|
||||
|
||||
/* Initialize table to store input files */
|
||||
const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*));
|
||||
unsigned filenameIdx = 0;
|
||||
|
||||
char* fileNamesBuf = NULL;
|
||||
unsigned fileNamesNb = filenameIdx;
|
||||
const int followLinks = 0;
|
||||
const char** extendedFileList = NULL;
|
||||
|
||||
/* Parse arguments */
|
||||
for (i = 1; i < argCount; i++) {
|
||||
const char* argument = argv[i];
|
||||
if (longCommandWArg(&argument, "in=")) {
|
||||
filenameTable[filenameIdx] = argument;
|
||||
filenameIdx++;
|
||||
continue;
|
||||
}
|
||||
DISPLAYLEVEL(1, "benchmark: Incorrect parameters\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Get the list of all files recursively (because followLinks==0)*/
|
||||
extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf,
|
||||
&fileNamesNb, followLinks);
|
||||
if (extendedFileList) {
|
||||
unsigned u;
|
||||
for (u=0; u<fileNamesNb; u++) DISPLAYLEVEL(4, "%u %s\n", u, extendedFileList[u]);
|
||||
free((void*)filenameTable);
|
||||
filenameTable = extendedFileList;
|
||||
filenameIdx = fileNamesNb;
|
||||
}
|
||||
|
||||
/* get sampleInfo */
|
||||
size_t blockSize = 0;
|
||||
sampleInfo* srcInfo= getSampleInfo(filenameTable,
|
||||
filenameIdx, blockSize, maxDictSize, displayLevel);
|
||||
|
||||
/* set up zParams */
|
||||
ZDICT_params_t zParams;
|
||||
zParams.compressionLevel = cLevel;
|
||||
zParams.notificationLevel = displayLevel;
|
||||
zParams.dictID = dictID;
|
||||
|
||||
/* with no dict */
|
||||
{
|
||||
const int noDictResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, NULL);
|
||||
if(noDictResult) {
|
||||
result = 1;
|
||||
goto _cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
/* for random */
|
||||
{
|
||||
ZDICT_random_params_t randomParam;
|
||||
randomParam.zParams = zParams;
|
||||
randomParam.k = k;
|
||||
const int randomResult = benchmarkDictBuilder(srcInfo, maxDictSize, &randomParam, NULL, NULL, NULL);
|
||||
DISPLAYLEVEL(2, "k=%u\n", randomParam.k);
|
||||
if(randomResult) {
|
||||
result = 1;
|
||||
goto _cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
/* for legacy */
|
||||
{
|
||||
ZDICT_legacy_params_t legacyParam;
|
||||
legacyParam.zParams = zParams;
|
||||
legacyParam.selectivityLevel = 9;
|
||||
const int legacyResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, &legacyParam, NULL);
|
||||
DISPLAYLEVEL(2, "selectivityLevel=%u\n", legacyParam.selectivityLevel);
|
||||
if(legacyResult) {
|
||||
result = 1;
|
||||
goto _cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
/* for cover */
|
||||
{
|
||||
/* for cover (optimizing k and d) */
|
||||
ZDICT_cover_params_t coverParam;
|
||||
memset(&coverParam, 0, sizeof(coverParam));
|
||||
coverParam.zParams = zParams;
|
||||
coverParam.splitPoint = 1.0;
|
||||
coverParam.steps = 40;
|
||||
coverParam.nbThreads = 1;
|
||||
const int coverOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, &coverParam, NULL, NULL);
|
||||
DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\nsplit=%u\n", coverParam.k, coverParam.d, coverParam.steps, (unsigned)(coverParam.splitPoint * 100));
|
||||
if(coverOptResult) {
|
||||
result = 1;
|
||||
goto _cleanup;
|
||||
}
|
||||
|
||||
/* for cover (with k and d provided) */
|
||||
const int coverResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, &coverParam, NULL, NULL);
|
||||
DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\nsplit=%u\n", coverParam.k, coverParam.d, coverParam.steps, (unsigned)(coverParam.splitPoint * 100));
|
||||
if(coverResult) {
|
||||
result = 1;
|
||||
goto _cleanup;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* for fastCover */
|
||||
for (f = 15; f < 25; f++){
|
||||
DISPLAYLEVEL(2, "current f is %u\n", f);
|
||||
for (accel = 1; accel < 11; accel++) {
|
||||
DISPLAYLEVEL(2, "current accel is %u\n", accel);
|
||||
/* for fastCover (optimizing k and d) */
|
||||
ZDICT_fastCover_params_t fastParam;
|
||||
memset(&fastParam, 0, sizeof(fastParam));
|
||||
fastParam.zParams = zParams;
|
||||
fastParam.f = f;
|
||||
fastParam.steps = 40;
|
||||
fastParam.nbThreads = 1;
|
||||
fastParam.accel = accel;
|
||||
const int fastOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, &fastParam);
|
||||
DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\n", fastParam.k, fastParam.d, fastParam.f, fastParam.steps, (unsigned)(fastParam.splitPoint * 100), fastParam.accel);
|
||||
if(fastOptResult) {
|
||||
result = 1;
|
||||
goto _cleanup;
|
||||
}
|
||||
|
||||
/* for fastCover (with k and d provided) */
|
||||
for (i = 0; i < 5; i++) {
|
||||
const int fastResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, &fastParam);
|
||||
DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\n", fastParam.k, fastParam.d, fastParam.f, fastParam.steps, (unsigned)(fastParam.splitPoint * 100), fastParam.accel);
|
||||
if(fastResult) {
|
||||
result = 1;
|
||||
goto _cleanup;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Free allocated memory */
|
||||
_cleanup:
|
||||
UTIL_freeFileList(extendedFileList, fileNamesBuf);
|
||||
freeSampleInfo(srcInfo);
|
||||
return result;
|
||||
}
|
@ -0,0 +1,6 @@
|
||||
/* ZDICT_trainFromBuffer_legacy() :
|
||||
* issue : samplesBuffer need to be followed by a noisy guard band.
|
||||
* work around : duplicate the buffer, and add the noise */
|
||||
size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity,
|
||||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
||||
ZDICT_legacy_params_t params);
|
2
contrib/experimental_dict_builders/benchmarkDictBuilder/test.sh
Executable file
2
contrib/experimental_dict_builders/benchmarkDictBuilder/test.sh
Executable file
@ -0,0 +1,2 @@
|
||||
echo "Benchmark with in=../../lib/common"
|
||||
./benchmark in=../../../lib/common
|
54
contrib/experimental_dict_builders/fastCover/Makefile
Normal file
54
contrib/experimental_dict_builders/fastCover/Makefile
Normal file
@ -0,0 +1,54 @@
|
||||
ARG :=
|
||||
|
||||
CC ?= gcc
|
||||
CFLAGS ?= -O3 -g
|
||||
INCLUDES := -I ../../../programs -I ../randomDictBuilder -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder
|
||||
|
||||
IO_FILE := ../randomDictBuilder/io.c
|
||||
|
||||
TEST_INPUT := ../../../lib
|
||||
TEST_OUTPUT := fastCoverDict
|
||||
|
||||
all: main run clean
|
||||
|
||||
.PHONY: test
|
||||
test: main testrun testshell clean
|
||||
|
||||
.PHONY: run
|
||||
run:
|
||||
echo "Building a fastCover dictionary with given arguments"
|
||||
./main $(ARG)
|
||||
|
||||
main: main.o io.o fastCover.o libzstd.a
|
||||
$(CC) $(CFLAGS) main.o io.o fastCover.o libzstd.a -o main
|
||||
|
||||
main.o: main.c
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c main.c
|
||||
|
||||
fastCover.o: fastCover.c
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c fastCover.c
|
||||
|
||||
io.o: $(IO_FILE)
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c $(IO_FILE)
|
||||
|
||||
libzstd.a:
|
||||
$(MAKE) MOREFLAGS=-g -C ../../../lib libzstd.a
|
||||
mv ../../../lib/libzstd.a .
|
||||
|
||||
.PHONY: testrun
|
||||
testrun: main
|
||||
echo "Run with $(TEST_INPUT) and $(TEST_OUTPUT) "
|
||||
./main in=$(TEST_INPUT) out=$(TEST_OUTPUT)
|
||||
zstd -be3 -D $(TEST_OUTPUT) -r $(TEST_INPUT) -q
|
||||
rm -f $(TEST_OUTPUT)
|
||||
|
||||
.PHONY: testshell
|
||||
testshell: test.sh
|
||||
sh test.sh
|
||||
echo "Finish running test.sh"
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
rm -f *.o main libzstd.a
|
||||
$(MAKE) -C ../../../lib clean
|
||||
echo "Cleaning is completed"
|
24
contrib/experimental_dict_builders/fastCover/README.md
Normal file
24
contrib/experimental_dict_builders/fastCover/README.md
Normal file
@ -0,0 +1,24 @@
|
||||
FastCover Dictionary Builder
|
||||
|
||||
### Permitted Arguments:
|
||||
Input File/Directory (in=fileName): required; file/directory used to build dictionary; if directory, will operate recursively for files inside directory; can include multiple files/directories, each following "in="
|
||||
Output Dictionary (out=dictName): if not provided, default to fastCoverDict
|
||||
Dictionary ID (dictID=#): nonnegative number; if not provided, default to 0
|
||||
Maximum Dictionary Size (maxdict=#): positive number; in bytes, if not provided, default to 110KB
|
||||
Size of Selected Segment (k=#): positive number; in bytes; if not provided, default to 200
|
||||
Size of Dmer (d=#): either 6 or 8; if not provided, default to 8
|
||||
Number of steps (steps=#): positive number, if not provided, default to 32
|
||||
Percentage of samples used for training(split=#): positive number; if not provided, default to 100
|
||||
|
||||
|
||||
###Running Test:
|
||||
make test
|
||||
|
||||
|
||||
###Usage:
|
||||
To build a FASTCOVER dictionary with the provided arguments: make ARG= followed by arguments
|
||||
If k or d is not provided, the optimize version of FASTCOVER is run.
|
||||
|
||||
### Examples:
|
||||
make ARG="in=../../../lib/dictBuilder out=dict100 dictID=520"
|
||||
make ARG="in=../../../lib/dictBuilder in=../../../lib/compress"
|
809
contrib/experimental_dict_builders/fastCover/fastCover.c
Normal file
809
contrib/experimental_dict_builders/fastCover/fastCover.c
Normal file
@ -0,0 +1,809 @@
|
||||
/*-*************************************
|
||||
* Dependencies
|
||||
***************************************/
|
||||
#include <stdio.h> /* fprintf */
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* memset */
|
||||
#include <time.h> /* clock */
|
||||
#include "mem.h" /* read */
|
||||
#include "pool.h"
|
||||
#include "threading.h"
|
||||
#include "fastCover.h"
|
||||
#include "zstd_internal.h" /* includes zstd.h */
|
||||
#include "zdict.h"
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
#define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
|
||||
#define FASTCOVER_MAX_F 32
|
||||
#define DEFAULT_SPLITPOINT 1.0
|
||||
|
||||
/*-*************************************
|
||||
* Console display
|
||||
***************************************/
|
||||
static int g_displayLevel = 2;
|
||||
#define DISPLAY(...) \
|
||||
{ \
|
||||
fprintf(stderr, __VA_ARGS__); \
|
||||
fflush(stderr); \
|
||||
}
|
||||
#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
|
||||
if (displayLevel >= l) { \
|
||||
DISPLAY(__VA_ARGS__); \
|
||||
} /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
|
||||
#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
|
||||
|
||||
#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
|
||||
if (displayLevel >= l) { \
|
||||
if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
|
||||
g_time = clock(); \
|
||||
DISPLAY(__VA_ARGS__); \
|
||||
} \
|
||||
}
|
||||
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
|
||||
static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
||||
static clock_t g_time = 0;
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Hash Functions
|
||||
***************************************/
|
||||
static const U64 prime6bytes = 227718039650203ULL;
|
||||
static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
|
||||
static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
|
||||
|
||||
static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
|
||||
static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
|
||||
static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
|
||||
|
||||
|
||||
/**
|
||||
* Hash the d-byte value pointed to by p and mod 2^f
|
||||
*/
|
||||
static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 h, unsigned d) {
|
||||
if (d == 6) {
|
||||
return ZSTD_hash6Ptr(p, h) & ((1 << h) - 1);
|
||||
}
|
||||
return ZSTD_hash8Ptr(p, h) & ((1 << h) - 1);
|
||||
}
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Context
|
||||
***************************************/
|
||||
typedef struct {
|
||||
const BYTE *samples;
|
||||
size_t *offsets;
|
||||
const size_t *samplesSizes;
|
||||
size_t nbSamples;
|
||||
size_t nbTrainSamples;
|
||||
size_t nbTestSamples;
|
||||
size_t nbDmers;
|
||||
U32 *freqs;
|
||||
U16 *segmentFreqs;
|
||||
unsigned d;
|
||||
} FASTCOVER_ctx_t;
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Helper functions
|
||||
***************************************/
|
||||
/**
|
||||
* Returns the sum of the sample sizes.
|
||||
*/
|
||||
static size_t FASTCOVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
|
||||
size_t sum = 0;
|
||||
unsigned i;
|
||||
for (i = 0; i < nbSamples; ++i) {
|
||||
sum += samplesSizes[i];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* fast functions
|
||||
***************************************/
|
||||
/**
|
||||
* A segment is a range in the source as well as the score of the segment.
|
||||
*/
|
||||
typedef struct {
|
||||
U32 begin;
|
||||
U32 end;
|
||||
U32 score;
|
||||
} FASTCOVER_segment_t;
|
||||
|
||||
|
||||
/**
|
||||
* Selects the best segment in an epoch.
|
||||
* Segments of are scored according to the function:
|
||||
*
|
||||
* Let F(d) be the frequency of all dmers with hash value d.
|
||||
* Let S_i be hash value of the dmer at position i of segment S which has length k.
|
||||
*
|
||||
* Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
|
||||
*
|
||||
* Once the dmer with hash value d is in the dictionary we set F(d) = F(d)/2.
|
||||
*/
|
||||
static FASTCOVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx,
|
||||
U32 *freqs, U32 begin,U32 end,
|
||||
ZDICT_fastCover_params_t parameters) {
|
||||
/* Constants */
|
||||
const U32 k = parameters.k;
|
||||
const U32 d = parameters.d;
|
||||
const U32 dmersInK = k - d + 1;
|
||||
/* Try each segment (activeSegment) and save the best (bestSegment) */
|
||||
FASTCOVER_segment_t bestSegment = {0, 0, 0};
|
||||
FASTCOVER_segment_t activeSegment;
|
||||
/* Reset the activeDmers in the segment */
|
||||
/* The activeSegment starts at the beginning of the epoch. */
|
||||
activeSegment.begin = begin;
|
||||
activeSegment.end = begin;
|
||||
activeSegment.score = 0;
|
||||
{
|
||||
/* Slide the activeSegment through the whole epoch.
|
||||
* Save the best segment in bestSegment.
|
||||
*/
|
||||
while (activeSegment.end < end) {
|
||||
/* Get hash value of current dmer */
|
||||
const size_t index = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, parameters.f, ctx->d);
|
||||
/* Add frequency of this index to score if this is the first occurrence of index in active segment */
|
||||
if (ctx->segmentFreqs[index] == 0) {
|
||||
activeSegment.score += freqs[index];
|
||||
}
|
||||
ctx->segmentFreqs[index] += 1;
|
||||
/* Increment end of segment */
|
||||
activeSegment.end += 1;
|
||||
/* If the window is now too large, drop the first position */
|
||||
if (activeSegment.end - activeSegment.begin == dmersInK + 1) {
|
||||
/* Get hash value of the dmer to be eliminated from active segment */
|
||||
const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, parameters.f, ctx->d);
|
||||
ctx->segmentFreqs[delIndex] -= 1;
|
||||
/* Subtract frequency of this index from score if this is the last occurrence of this index in active segment */
|
||||
if (ctx->segmentFreqs[delIndex] == 0) {
|
||||
activeSegment.score -= freqs[delIndex];
|
||||
}
|
||||
/* Increment start of segment */
|
||||
activeSegment.begin += 1;
|
||||
}
|
||||
/* If this segment is the best so far save it */
|
||||
if (activeSegment.score > bestSegment.score) {
|
||||
bestSegment = activeSegment;
|
||||
}
|
||||
}
|
||||
/* Zero out rest of segmentFreqs array */
|
||||
while (activeSegment.begin < end) {
|
||||
const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, parameters.f, ctx->d);
|
||||
ctx->segmentFreqs[delIndex] -= 1;
|
||||
activeSegment.begin += 1;
|
||||
}
|
||||
}
|
||||
{
|
||||
/* Trim off the zero frequency head and tail from the segment. */
|
||||
U32 newBegin = bestSegment.end;
|
||||
U32 newEnd = bestSegment.begin;
|
||||
U32 pos;
|
||||
for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
|
||||
const size_t index = FASTCOVER_hashPtrToIndex(ctx->samples + pos, parameters.f, ctx->d);
|
||||
U32 freq = freqs[index];
|
||||
if (freq != 0) {
|
||||
newBegin = MIN(newBegin, pos);
|
||||
newEnd = pos + 1;
|
||||
}
|
||||
}
|
||||
bestSegment.begin = newBegin;
|
||||
bestSegment.end = newEnd;
|
||||
}
|
||||
{
|
||||
/* Zero the frequency of hash value of each dmer covered by the chosen segment. */
|
||||
U32 pos;
|
||||
for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
|
||||
const size_t i = FASTCOVER_hashPtrToIndex(ctx->samples + pos, parameters.f, ctx->d);
|
||||
freqs[i] = 0;
|
||||
}
|
||||
}
|
||||
return bestSegment;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check the validity of the parameters.
|
||||
* Returns non-zero if the parameters are valid and 0 otherwise.
|
||||
*/
|
||||
static int FASTCOVER_checkParameters(ZDICT_fastCover_params_t parameters,
|
||||
size_t maxDictSize) {
|
||||
/* k, d, and f are required parameters */
|
||||
if (parameters.d == 0 || parameters.k == 0 || parameters.f == 0) {
|
||||
return 0;
|
||||
}
|
||||
/* d has to be 6 or 8 */
|
||||
if (parameters.d != 6 && parameters.d != 8) {
|
||||
return 0;
|
||||
}
|
||||
/* 0 < f <= FASTCOVER_MAX_F */
|
||||
if (parameters.f > FASTCOVER_MAX_F) {
|
||||
return 0;
|
||||
}
|
||||
/* k <= maxDictSize */
|
||||
if (parameters.k > maxDictSize) {
|
||||
return 0;
|
||||
}
|
||||
/* d <= k */
|
||||
if (parameters.d > parameters.k) {
|
||||
return 0;
|
||||
}
|
||||
/* 0 < splitPoint <= 1 */
|
||||
if (parameters.splitPoint <= 0 || parameters.splitPoint > 1) {
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Clean up a context initialized with `FASTCOVER_ctx_init()`.
|
||||
*/
|
||||
static void FASTCOVER_ctx_destroy(FASTCOVER_ctx_t *ctx) {
|
||||
if (!ctx) {
|
||||
return;
|
||||
}
|
||||
if (ctx->segmentFreqs) {
|
||||
free(ctx->segmentFreqs);
|
||||
ctx->segmentFreqs = NULL;
|
||||
}
|
||||
if (ctx->freqs) {
|
||||
free(ctx->freqs);
|
||||
ctx->freqs = NULL;
|
||||
}
|
||||
if (ctx->offsets) {
|
||||
free(ctx->offsets);
|
||||
ctx->offsets = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate for frequency of hash value of each dmer in ctx->samples
|
||||
*/
|
||||
static void FASTCOVER_computeFrequency(U32 *freqs, unsigned f, FASTCOVER_ctx_t *ctx){
|
||||
size_t start; /* start of current dmer */
|
||||
for (unsigned i = 0; i < ctx->nbTrainSamples; i++) {
|
||||
size_t currSampleStart = ctx->offsets[i];
|
||||
size_t currSampleEnd = ctx->offsets[i+1];
|
||||
start = currSampleStart;
|
||||
while (start + ctx->d <= currSampleEnd) {
|
||||
const size_t dmerIndex = FASTCOVER_hashPtrToIndex(ctx->samples + start, f, ctx->d);
|
||||
freqs[dmerIndex]++;
|
||||
start++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare a context for dictionary building.
|
||||
* The context is only dependent on the parameter `d` and can used multiple
|
||||
* times.
|
||||
* Returns 1 on success or zero on error.
|
||||
* The context must be destroyed with `FASTCOVER_ctx_destroy()`.
|
||||
*/
|
||||
static int FASTCOVER_ctx_init(FASTCOVER_ctx_t *ctx, const void *samplesBuffer,
|
||||
const size_t *samplesSizes, unsigned nbSamples,
|
||||
unsigned d, double splitPoint, unsigned f) {
|
||||
const BYTE *const samples = (const BYTE *)samplesBuffer;
|
||||
const size_t totalSamplesSize = FASTCOVER_sum(samplesSizes, nbSamples);
|
||||
/* Split samples into testing and training sets */
|
||||
const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples;
|
||||
const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples;
|
||||
const size_t trainingSamplesSize = splitPoint < 1.0 ? FASTCOVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize;
|
||||
const size_t testSamplesSize = splitPoint < 1.0 ? FASTCOVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize;
|
||||
/* Checks */
|
||||
if (totalSamplesSize < MAX(d, sizeof(U64)) ||
|
||||
totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) {
|
||||
DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
|
||||
(U32)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20));
|
||||
return 0;
|
||||
}
|
||||
/* Check if there are at least 5 training samples */
|
||||
if (nbTrainSamples < 5) {
|
||||
DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
|
||||
return 0;
|
||||
}
|
||||
/* Check if there's testing sample */
|
||||
if (nbTestSamples < 1) {
|
||||
DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
|
||||
return 0;
|
||||
}
|
||||
/* Zero the context */
|
||||
memset(ctx, 0, sizeof(*ctx));
|
||||
DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples,
|
||||
(U32)trainingSamplesSize);
|
||||
DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples,
|
||||
(U32)testSamplesSize);
|
||||
|
||||
ctx->samples = samples;
|
||||
ctx->samplesSizes = samplesSizes;
|
||||
ctx->nbSamples = nbSamples;
|
||||
ctx->nbTrainSamples = nbTrainSamples;
|
||||
ctx->nbTestSamples = nbTestSamples;
|
||||
ctx->nbDmers = trainingSamplesSize - d + 1;
|
||||
ctx->d = d;
|
||||
|
||||
/* The offsets of each file */
|
||||
ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t));
|
||||
if (!ctx->offsets) {
|
||||
DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
|
||||
FASTCOVER_ctx_destroy(ctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Fill offsets from the samplesSizes */
|
||||
{
|
||||
U32 i;
|
||||
ctx->offsets[0] = 0;
|
||||
for (i = 1; i <= nbSamples; ++i) {
|
||||
ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1];
|
||||
}
|
||||
}
|
||||
|
||||
/* Initialize frequency array of size 2^f */
|
||||
ctx->freqs = (U32 *)calloc((1 << f), sizeof(U32));
|
||||
ctx->segmentFreqs = (U16 *)calloc((1 << f), sizeof(U16));
|
||||
DISPLAYLEVEL(2, "Computing frequencies\n");
|
||||
FASTCOVER_computeFrequency(ctx->freqs, f, ctx);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Given the prepared context build the dictionary.
|
||||
*/
|
||||
static size_t FASTCOVER_buildDictionary(const FASTCOVER_ctx_t *ctx, U32 *freqs,
|
||||
void *dictBuffer,
|
||||
size_t dictBufferCapacity,
|
||||
ZDICT_fastCover_params_t parameters){
|
||||
BYTE *const dict = (BYTE *)dictBuffer;
|
||||
size_t tail = dictBufferCapacity;
|
||||
/* Divide the data up into epochs of equal size.
|
||||
* We will select at least one segment from each epoch.
|
||||
*/
|
||||
const U32 epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k));
|
||||
const U32 epochSize = (U32)(ctx->nbDmers / epochs);
|
||||
size_t epoch;
|
||||
DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", epochs,
|
||||
epochSize);
|
||||
/* Loop through the epochs until there are no more segments or the dictionary
|
||||
* is full.
|
||||
*/
|
||||
for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) {
|
||||
const U32 epochBegin = (U32)(epoch * epochSize);
|
||||
const U32 epochEnd = epochBegin + epochSize;
|
||||
size_t segmentSize;
|
||||
/* Select a segment */
|
||||
FASTCOVER_segment_t segment = FASTCOVER_selectSegment(
|
||||
ctx, freqs, epochBegin, epochEnd, parameters);
|
||||
|
||||
/* If the segment covers no dmers, then we are out of content */
|
||||
if (segment.score == 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* Trim the segment if necessary and if it is too small then we are done */
|
||||
segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
|
||||
if (segmentSize < parameters.d) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* We fill the dictionary from the back to allow the best segments to be
|
||||
* referenced with the smallest offsets.
|
||||
*/
|
||||
tail -= segmentSize;
|
||||
memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
|
||||
DISPLAYUPDATE(
|
||||
2, "\r%u%% ",
|
||||
(U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
|
||||
}
|
||||
DISPLAYLEVEL(2, "\r%79s\r", "");
|
||||
return tail;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* FASTCOVER_best_t is used for two purposes:
|
||||
* 1. Synchronizing threads.
|
||||
* 2. Saving the best parameters and dictionary.
|
||||
*
|
||||
* All of the methods except FASTCOVER_best_init() are thread safe if zstd is
|
||||
* compiled with multithreaded support.
|
||||
*/
|
||||
typedef struct fast_best_s {
|
||||
ZSTD_pthread_mutex_t mutex;
|
||||
ZSTD_pthread_cond_t cond;
|
||||
size_t liveJobs;
|
||||
void *dict;
|
||||
size_t dictSize;
|
||||
ZDICT_fastCover_params_t parameters;
|
||||
size_t compressedSize;
|
||||
} FASTCOVER_best_t;
|
||||
|
||||
/**
|
||||
* Initialize the `FASTCOVER_best_t`.
|
||||
*/
|
||||
static void FASTCOVER_best_init(FASTCOVER_best_t *best) {
|
||||
if (best==NULL) return; /* compatible with init on NULL */
|
||||
(void)ZSTD_pthread_mutex_init(&best->mutex, NULL);
|
||||
(void)ZSTD_pthread_cond_init(&best->cond, NULL);
|
||||
best->liveJobs = 0;
|
||||
best->dict = NULL;
|
||||
best->dictSize = 0;
|
||||
best->compressedSize = (size_t)-1;
|
||||
memset(&best->parameters, 0, sizeof(best->parameters));
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait until liveJobs == 0.
|
||||
*/
|
||||
static void FASTCOVER_best_wait(FASTCOVER_best_t *best) {
|
||||
if (!best) {
|
||||
return;
|
||||
}
|
||||
ZSTD_pthread_mutex_lock(&best->mutex);
|
||||
while (best->liveJobs != 0) {
|
||||
ZSTD_pthread_cond_wait(&best->cond, &best->mutex);
|
||||
}
|
||||
ZSTD_pthread_mutex_unlock(&best->mutex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Call FASTCOVER_best_wait() and then destroy the FASTCOVER_best_t.
|
||||
*/
|
||||
static void FASTCOVER_best_destroy(FASTCOVER_best_t *best) {
|
||||
if (!best) {
|
||||
return;
|
||||
}
|
||||
FASTCOVER_best_wait(best);
|
||||
if (best->dict) {
|
||||
free(best->dict);
|
||||
}
|
||||
ZSTD_pthread_mutex_destroy(&best->mutex);
|
||||
ZSTD_pthread_cond_destroy(&best->cond);
|
||||
}
|
||||
|
||||
/**
|
||||
* Called when a thread is about to be launched.
|
||||
* Increments liveJobs.
|
||||
*/
|
||||
static void FASTCOVER_best_start(FASTCOVER_best_t *best) {
|
||||
if (!best) {
|
||||
return;
|
||||
}
|
||||
ZSTD_pthread_mutex_lock(&best->mutex);
|
||||
++best->liveJobs;
|
||||
ZSTD_pthread_mutex_unlock(&best->mutex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Called when a thread finishes executing, both on error or success.
|
||||
* Decrements liveJobs and signals any waiting threads if liveJobs == 0.
|
||||
* If this dictionary is the best so far save it and its parameters.
|
||||
*/
|
||||
static void FASTCOVER_best_finish(FASTCOVER_best_t *best, size_t compressedSize,
|
||||
ZDICT_fastCover_params_t parameters, void *dict,
|
||||
size_t dictSize) {
|
||||
if (!best) {
|
||||
return;
|
||||
}
|
||||
{
|
||||
size_t liveJobs;
|
||||
ZSTD_pthread_mutex_lock(&best->mutex);
|
||||
--best->liveJobs;
|
||||
liveJobs = best->liveJobs;
|
||||
/* If the new dictionary is better */
|
||||
if (compressedSize < best->compressedSize) {
|
||||
/* Allocate space if necessary */
|
||||
if (!best->dict || best->dictSize < dictSize) {
|
||||
if (best->dict) {
|
||||
free(best->dict);
|
||||
}
|
||||
best->dict = malloc(dictSize);
|
||||
if (!best->dict) {
|
||||
best->compressedSize = ERROR(GENERIC);
|
||||
best->dictSize = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
/* Save the dictionary, parameters, and size */
|
||||
memcpy(best->dict, dict, dictSize);
|
||||
best->dictSize = dictSize;
|
||||
best->parameters = parameters;
|
||||
best->compressedSize = compressedSize;
|
||||
}
|
||||
ZSTD_pthread_mutex_unlock(&best->mutex);
|
||||
if (liveJobs == 0) {
|
||||
ZSTD_pthread_cond_broadcast(&best->cond);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parameters for FASTCOVER_tryParameters().
|
||||
*/
|
||||
typedef struct FASTCOVER_tryParameters_data_s {
|
||||
const FASTCOVER_ctx_t *ctx;
|
||||
FASTCOVER_best_t *best;
|
||||
size_t dictBufferCapacity;
|
||||
ZDICT_fastCover_params_t parameters;
|
||||
} FASTCOVER_tryParameters_data_t;
|
||||
|
||||
/**
|
||||
* Tries a set of parameters and updates the FASTCOVER_best_t with the results.
|
||||
* This function is thread safe if zstd is compiled with multithreaded support.
|
||||
* It takes its parameters as an *OWNING* opaque pointer to support threading.
|
||||
*/
|
||||
static void FASTCOVER_tryParameters(void *opaque) {
|
||||
/* Save parameters as local variables */
|
||||
FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t *)opaque;
|
||||
const FASTCOVER_ctx_t *const ctx = data->ctx;
|
||||
const ZDICT_fastCover_params_t parameters = data->parameters;
|
||||
size_t dictBufferCapacity = data->dictBufferCapacity;
|
||||
size_t totalCompressedSize = ERROR(GENERIC);
|
||||
/* Allocate space for hash table, dict, and freqs */
|
||||
BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
|
||||
U32 *freqs = (U32*) malloc((1 << parameters.f) * sizeof(U32));
|
||||
if (!dict || !freqs) {
|
||||
DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
|
||||
goto _cleanup;
|
||||
}
|
||||
/* Copy the frequencies because we need to modify them */
|
||||
memcpy(freqs, ctx->freqs, (1 << parameters.f) * sizeof(U32));
|
||||
/* Build the dictionary */
|
||||
{
|
||||
const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict,
|
||||
dictBufferCapacity, parameters);
|
||||
|
||||
dictBufferCapacity = ZDICT_finalizeDictionary(
|
||||
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
|
||||
ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples,
|
||||
parameters.zParams);
|
||||
if (ZDICT_isError(dictBufferCapacity)) {
|
||||
DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
|
||||
goto _cleanup;
|
||||
}
|
||||
}
|
||||
/* Check total compressed size */
|
||||
{
|
||||
/* Pointers */
|
||||
ZSTD_CCtx *cctx;
|
||||
ZSTD_CDict *cdict;
|
||||
void *dst;
|
||||
/* Local variables */
|
||||
size_t dstCapacity;
|
||||
size_t i;
|
||||
/* Allocate dst with enough space to compress the maximum sized sample */
|
||||
{
|
||||
size_t maxSampleSize = 0;
|
||||
i = parameters.splitPoint < 1.0 ? ctx->nbTrainSamples : 0;
|
||||
for (; i < ctx->nbSamples; ++i) {
|
||||
maxSampleSize = MAX(ctx->samplesSizes[i], maxSampleSize);
|
||||
}
|
||||
dstCapacity = ZSTD_compressBound(maxSampleSize);
|
||||
dst = malloc(dstCapacity);
|
||||
}
|
||||
/* Create the cctx and cdict */
|
||||
cctx = ZSTD_createCCtx();
|
||||
cdict = ZSTD_createCDict(dict, dictBufferCapacity,
|
||||
parameters.zParams.compressionLevel);
|
||||
if (!dst || !cctx || !cdict) {
|
||||
goto _compressCleanup;
|
||||
}
|
||||
/* Compress each sample and sum their sizes (or error) */
|
||||
totalCompressedSize = dictBufferCapacity;
|
||||
i = parameters.splitPoint < 1.0 ? ctx->nbTrainSamples : 0;
|
||||
for (; i < ctx->nbSamples; ++i) {
|
||||
const size_t size = ZSTD_compress_usingCDict(
|
||||
cctx, dst, dstCapacity, ctx->samples + ctx->offsets[i],
|
||||
ctx->samplesSizes[i], cdict);
|
||||
if (ZSTD_isError(size)) {
|
||||
totalCompressedSize = ERROR(GENERIC);
|
||||
goto _compressCleanup;
|
||||
}
|
||||
totalCompressedSize += size;
|
||||
}
|
||||
_compressCleanup:
|
||||
ZSTD_freeCCtx(cctx);
|
||||
ZSTD_freeCDict(cdict);
|
||||
if (dst) {
|
||||
free(dst);
|
||||
}
|
||||
}
|
||||
|
||||
_cleanup:
|
||||
FASTCOVER_best_finish(data->best, totalCompressedSize, parameters, dict,
|
||||
dictBufferCapacity);
|
||||
free(data);
|
||||
if (dict) {
|
||||
free(dict);
|
||||
}
|
||||
if (freqs) {
|
||||
free(freqs);
|
||||
}
|
||||
}
|
||||
|
||||
ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(
|
||||
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
||||
const size_t *samplesSizes, unsigned nbSamples, ZDICT_fastCover_params_t parameters) {
|
||||
BYTE* const dict = (BYTE*)dictBuffer;
|
||||
FASTCOVER_ctx_t ctx;
|
||||
parameters.splitPoint = 1.0;
|
||||
/* Initialize global data */
|
||||
g_displayLevel = parameters.zParams.notificationLevel;
|
||||
/* Checks */
|
||||
if (!FASTCOVER_checkParameters(parameters, dictBufferCapacity)) {
|
||||
DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n");
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
if (nbSamples == 0) {
|
||||
DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n");
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
|
||||
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
|
||||
ZDICT_DICTSIZE_MIN);
|
||||
return ERROR(dstSize_tooSmall);
|
||||
}
|
||||
/* Initialize context */
|
||||
if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
|
||||
parameters.d, parameters.splitPoint, parameters.f)) {
|
||||
DISPLAYLEVEL(1, "Failed to initialize context\n");
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
/* Build the dictionary */
|
||||
DISPLAYLEVEL(2, "Building dictionary\n");
|
||||
{
|
||||
const size_t tail = FASTCOVER_buildDictionary(&ctx, ctx.freqs, dictBuffer,
|
||||
dictBufferCapacity, parameters);
|
||||
|
||||
const size_t dictionarySize = ZDICT_finalizeDictionary(
|
||||
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
|
||||
samplesBuffer, samplesSizes, (unsigned)ctx.nbTrainSamples,
|
||||
parameters.zParams);
|
||||
if (!ZSTD_isError(dictionarySize)) {
|
||||
DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
|
||||
(U32)dictionarySize);
|
||||
}
|
||||
FASTCOVER_ctx_destroy(&ctx);
|
||||
return dictionarySize;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(
|
||||
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
||||
const size_t *samplesSizes, unsigned nbSamples,
|
||||
ZDICT_fastCover_params_t *parameters) {
|
||||
/* constants */
|
||||
const unsigned nbThreads = parameters->nbThreads;
|
||||
const double splitPoint =
|
||||
parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint;
|
||||
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
|
||||
const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
|
||||
const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
|
||||
const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k;
|
||||
const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps;
|
||||
const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
|
||||
const unsigned kIterations =
|
||||
(1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
|
||||
const unsigned f = parameters->f == 0 ? 23 : parameters->f;
|
||||
|
||||
/* Local variables */
|
||||
const int displayLevel = parameters->zParams.notificationLevel;
|
||||
unsigned iteration = 1;
|
||||
unsigned d;
|
||||
unsigned k;
|
||||
FASTCOVER_best_t best;
|
||||
POOL_ctx *pool = NULL;
|
||||
|
||||
/* Checks */
|
||||
if (splitPoint <= 0 || splitPoint > 1) {
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n");
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
if (kMinK < kMaxD || kMaxK < kMinK) {
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n");
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
if (nbSamples == 0) {
|
||||
DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n");
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
|
||||
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
|
||||
ZDICT_DICTSIZE_MIN);
|
||||
return ERROR(dstSize_tooSmall);
|
||||
}
|
||||
if (nbThreads > 1) {
|
||||
pool = POOL_create(nbThreads, 1);
|
||||
if (!pool) {
|
||||
return ERROR(memory_allocation);
|
||||
}
|
||||
}
|
||||
/* Initialization */
|
||||
FASTCOVER_best_init(&best);
|
||||
/* Turn down global display level to clean up display at level 2 and below */
|
||||
g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1;
|
||||
/* Loop through d first because each new value needs a new context */
|
||||
LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
|
||||
kIterations);
|
||||
for (d = kMinD; d <= kMaxD; d += 2) {
|
||||
/* Initialize the context for this value of d */
|
||||
FASTCOVER_ctx_t ctx;
|
||||
LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
|
||||
if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f)) {
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
|
||||
FASTCOVER_best_destroy(&best);
|
||||
POOL_free(pool);
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
/* Loop through k reusing the same context */
|
||||
for (k = kMinK; k <= kMaxK; k += kStepSize) {
|
||||
/* Prepare the arguments */
|
||||
FASTCOVER_tryParameters_data_t *data = (FASTCOVER_tryParameters_data_t *)malloc(
|
||||
sizeof(FASTCOVER_tryParameters_data_t));
|
||||
LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k);
|
||||
if (!data) {
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n");
|
||||
FASTCOVER_best_destroy(&best);
|
||||
FASTCOVER_ctx_destroy(&ctx);
|
||||
POOL_free(pool);
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
data->ctx = &ctx;
|
||||
data->best = &best;
|
||||
data->dictBufferCapacity = dictBufferCapacity;
|
||||
data->parameters = *parameters;
|
||||
data->parameters.k = k;
|
||||
data->parameters.d = d;
|
||||
data->parameters.f = f;
|
||||
data->parameters.splitPoint = splitPoint;
|
||||
data->parameters.steps = kSteps;
|
||||
data->parameters.zParams.notificationLevel = g_displayLevel;
|
||||
/* Check the parameters */
|
||||
if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity)) {
|
||||
DISPLAYLEVEL(1, "fastCover parameters incorrect\n");
|
||||
free(data);
|
||||
continue;
|
||||
}
|
||||
/* Call the function and pass ownership of data to it */
|
||||
FASTCOVER_best_start(&best);
|
||||
if (pool) {
|
||||
POOL_add(pool, &FASTCOVER_tryParameters, data);
|
||||
} else {
|
||||
FASTCOVER_tryParameters(data);
|
||||
}
|
||||
/* Print status */
|
||||
LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ",
|
||||
(U32)((iteration * 100) / kIterations));
|
||||
++iteration;
|
||||
}
|
||||
FASTCOVER_best_wait(&best);
|
||||
FASTCOVER_ctx_destroy(&ctx);
|
||||
}
|
||||
LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", "");
|
||||
/* Fill the output buffer and parameters with output of the best parameters */
|
||||
{
|
||||
const size_t dictSize = best.dictSize;
|
||||
if (ZSTD_isError(best.compressedSize)) {
|
||||
const size_t compressedSize = best.compressedSize;
|
||||
FASTCOVER_best_destroy(&best);
|
||||
POOL_free(pool);
|
||||
return compressedSize;
|
||||
}
|
||||
*parameters = best.parameters;
|
||||
memcpy(dictBuffer, best.dict, dictSize);
|
||||
FASTCOVER_best_destroy(&best);
|
||||
POOL_free(pool);
|
||||
return dictSize;
|
||||
}
|
||||
|
||||
}
|
57
contrib/experimental_dict_builders/fastCover/fastCover.h
Normal file
57
contrib/experimental_dict_builders/fastCover/fastCover.h
Normal file
@ -0,0 +1,57 @@
|
||||
#include <stdio.h> /* fprintf */
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* memset */
|
||||
#include <time.h> /* clock */
|
||||
#include "mem.h" /* read */
|
||||
#include "pool.h"
|
||||
#include "threading.h"
|
||||
#include "zstd_internal.h" /* includes zstd.h */
|
||||
#ifndef ZDICT_STATIC_LINKING_ONLY
|
||||
#define ZDICT_STATIC_LINKING_ONLY
|
||||
#endif
|
||||
#include "zdict.h"
|
||||
|
||||
|
||||
typedef struct {
|
||||
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
|
||||
unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
|
||||
unsigned f; /* log of size of frequency array */
|
||||
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
|
||||
unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
|
||||
double splitPoint; /* Percentage of samples used for training: the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */
|
||||
ZDICT_params_t zParams;
|
||||
} ZDICT_fastCover_params_t;
|
||||
|
||||
|
||||
/*! ZDICT_optimizeTrainFromBuffer_fastCover():
|
||||
* Train a dictionary from an array of samples using a modified version of the COVER algorithm.
|
||||
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
||||
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
||||
* The resulting dictionary will be saved into `dictBuffer`.
|
||||
* All of the parameters except for f are optional.
|
||||
* If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
|
||||
* if steps is zero it defaults to its default value.
|
||||
* If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
|
||||
*
|
||||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
* or an error code, which can be tested with ZDICT_isError().
|
||||
* On success `*parameters` contains the parameters selected.
|
||||
*/
|
||||
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(
|
||||
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
||||
const size_t *samplesSizes, unsigned nbSamples,
|
||||
ZDICT_fastCover_params_t *parameters);
|
||||
|
||||
|
||||
/*! ZDICT_trainFromBuffer_fastCover():
|
||||
* Train a dictionary from an array of samples using a modified version of the COVER algorithm.
|
||||
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
||||
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
||||
* The resulting dictionary will be saved into `dictBuffer`.
|
||||
* d, k, and f are required.
|
||||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
* or an error code, which can be tested with ZDICT_isError().
|
||||
*/
|
||||
ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(
|
||||
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
||||
const size_t *samplesSizes, unsigned nbSamples, ZDICT_fastCover_params_t parameters);
|
183
contrib/experimental_dict_builders/fastCover/main.c
Normal file
183
contrib/experimental_dict_builders/fastCover/main.c
Normal file
@ -0,0 +1,183 @@
|
||||
#include <stdio.h> /* fprintf */
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* strcmp, strlen */
|
||||
#include <errno.h> /* errno */
|
||||
#include <ctype.h>
|
||||
#include "fastCover.h"
|
||||
#include "io.h"
|
||||
#include "util.h"
|
||||
#include "zdict.h"
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Console display
|
||||
***************************************/
|
||||
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
|
||||
#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
|
||||
|
||||
static const U64 g_refreshRate = SEC_TO_MICRO / 6;
|
||||
static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
|
||||
|
||||
#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
|
||||
if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
|
||||
{ g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
|
||||
if (displayLevel>=4) fflush(stderr); } } }
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Exceptions
|
||||
***************************************/
|
||||
#ifndef DEBUG
|
||||
# define DEBUG 0
|
||||
#endif
|
||||
#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
|
||||
#define EXM_THROW(error, ...) \
|
||||
{ \
|
||||
DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
|
||||
DISPLAY("Error %i : ", error); \
|
||||
DISPLAY(__VA_ARGS__); \
|
||||
DISPLAY("\n"); \
|
||||
exit(error); \
|
||||
}
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
static const unsigned g_defaultMaxDictSize = 110 KB;
|
||||
#define DEFAULT_CLEVEL 3
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* FASTCOVER
|
||||
***************************************/
|
||||
int FASTCOVER_trainFromFiles(const char* dictFileName, sampleInfo *info,
|
||||
unsigned maxDictSize,
|
||||
ZDICT_fastCover_params_t *params) {
|
||||
unsigned const displayLevel = params->zParams.notificationLevel;
|
||||
void* const dictBuffer = malloc(maxDictSize);
|
||||
|
||||
int result = 0;
|
||||
|
||||
/* Checks */
|
||||
if (!dictBuffer)
|
||||
EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */
|
||||
|
||||
{ size_t dictSize;
|
||||
/* Run the optimize version if either k or d is not provided */
|
||||
if (!params->d || !params->k) {
|
||||
dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
|
||||
info->samplesSizes, info->nbSamples, params);
|
||||
} else {
|
||||
dictSize = ZDICT_trainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
|
||||
info->samplesSizes, info->nbSamples, *params);
|
||||
}
|
||||
DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\n", params->k, params->d, params->f, params->steps, (unsigned)(params->splitPoint*100));
|
||||
if (ZDICT_isError(dictSize)) {
|
||||
DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
|
||||
result = 1;
|
||||
goto _done;
|
||||
}
|
||||
/* save dict */
|
||||
DISPLAYLEVEL(2, "Save dictionary of size %u into file %s \n", (U32)dictSize, dictFileName);
|
||||
saveDict(dictFileName, dictBuffer, dictSize);
|
||||
}
|
||||
|
||||
/* clean up */
|
||||
_done:
|
||||
free(dictBuffer);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int main(int argCount, const char* argv[])
|
||||
{
|
||||
int displayLevel = 2;
|
||||
const char* programName = argv[0];
|
||||
int operationResult = 0;
|
||||
|
||||
/* Initialize arguments to default values */
|
||||
unsigned k = 0;
|
||||
unsigned d = 0;
|
||||
unsigned f = 23;
|
||||
unsigned steps = 32;
|
||||
unsigned nbThreads = 1;
|
||||
unsigned split = 100;
|
||||
const char* outputFile = "fastCoverDict";
|
||||
unsigned dictID = 0;
|
||||
unsigned maxDictSize = g_defaultMaxDictSize;
|
||||
|
||||
/* Initialize table to store input files */
|
||||
const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*));
|
||||
unsigned filenameIdx = 0;
|
||||
|
||||
char* fileNamesBuf = NULL;
|
||||
unsigned fileNamesNb = filenameIdx;
|
||||
int followLinks = 0; /* follow directory recursively */
|
||||
const char** extendedFileList = NULL;
|
||||
|
||||
/* Parse arguments */
|
||||
for (int i = 1; i < argCount; i++) {
|
||||
const char* argument = argv[i];
|
||||
if (longCommandWArg(&argument, "k=")) { k = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "d=")) { d = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "f=")) { f = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "steps=")) { steps = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "split=")) { split = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "dictID=")) { dictID = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "in=")) {
|
||||
filenameTable[filenameIdx] = argument;
|
||||
filenameIdx++;
|
||||
continue;
|
||||
}
|
||||
if (longCommandWArg(&argument, "out=")) {
|
||||
outputFile = argument;
|
||||
continue;
|
||||
}
|
||||
DISPLAYLEVEL(1, "Incorrect parameters\n");
|
||||
operationResult = 1;
|
||||
return operationResult;
|
||||
}
|
||||
|
||||
/* Get the list of all files recursively (because followLinks==0)*/
|
||||
extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf,
|
||||
&fileNamesNb, followLinks);
|
||||
if (extendedFileList) {
|
||||
unsigned u;
|
||||
for (u=0; u<fileNamesNb; u++) DISPLAYLEVEL(4, "%u %s\n", u, extendedFileList[u]);
|
||||
free((void*)filenameTable);
|
||||
filenameTable = extendedFileList;
|
||||
filenameIdx = fileNamesNb;
|
||||
}
|
||||
|
||||
size_t blockSize = 0;
|
||||
|
||||
/* Set up zParams */
|
||||
ZDICT_params_t zParams;
|
||||
zParams.compressionLevel = DEFAULT_CLEVEL;
|
||||
zParams.notificationLevel = displayLevel;
|
||||
zParams.dictID = dictID;
|
||||
|
||||
/* Set up fastCover params */
|
||||
ZDICT_fastCover_params_t params;
|
||||
params.zParams = zParams;
|
||||
params.k = k;
|
||||
params.d = d;
|
||||
params.f = f;
|
||||
params.steps = steps;
|
||||
params.nbThreads = nbThreads;
|
||||
params.splitPoint = (double)split/100;
|
||||
|
||||
/* Build dictionary */
|
||||
sampleInfo* info = getSampleInfo(filenameTable,
|
||||
filenameIdx, blockSize, maxDictSize, zParams.notificationLevel);
|
||||
operationResult = FASTCOVER_trainFromFiles(outputFile, info, maxDictSize, ¶ms);
|
||||
|
||||
/* Free allocated memory */
|
||||
UTIL_freeFileList(extendedFileList, fileNamesBuf);
|
||||
freeSampleInfo(info);
|
||||
|
||||
return operationResult;
|
||||
}
|
15
contrib/experimental_dict_builders/fastCover/test.sh
Executable file
15
contrib/experimental_dict_builders/fastCover/test.sh
Executable file
@ -0,0 +1,15 @@
|
||||
echo "Building fastCover dictionary with in=../../lib/common f=20 out=dict1"
|
||||
./main in=../../../lib/common f=20 out=dict1
|
||||
zstd -be3 -D dict1 -r ../../../lib/common -q
|
||||
echo "Building fastCover dictionary with in=../../lib/common k=500 d=6 f=24 out=dict2 dictID=100 maxdict=140000"
|
||||
./main in=../../../lib/common k=500 d=6 f=24 out=dict2 dictID=100 maxdict=140000
|
||||
zstd -be3 -D dict2 -r ../../../lib/common -q
|
||||
echo "Building fastCover dictionary with 2 sample sources"
|
||||
./main in=../../../lib/common in=../../../lib/compress out=dict3
|
||||
zstd -be3 -D dict3 -r ../../../lib/common -q
|
||||
echo "Removing dict1 dict2 dict3"
|
||||
rm -f dict1 dict2 dict3
|
||||
|
||||
echo "Testing with invalid parameters, should fail"
|
||||
! ./main in=../../../lib/common r=10
|
||||
! ./main in=../../../lib/common d=10
|
@ -0,0 +1,52 @@
|
||||
ARG :=
|
||||
|
||||
CC ?= gcc
|
||||
CFLAGS ?= -O3
|
||||
INCLUDES := -I ../../../programs -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder
|
||||
|
||||
TEST_INPUT := ../../../lib
|
||||
TEST_OUTPUT := randomDict
|
||||
|
||||
all: main run clean
|
||||
|
||||
.PHONY: test
|
||||
test: main testrun testshell clean
|
||||
|
||||
.PHONY: run
|
||||
run:
|
||||
echo "Building a random dictionary with given arguments"
|
||||
./main $(ARG)
|
||||
|
||||
main: main.o io.o random.o libzstd.a
|
||||
$(CC) $(CFLAGS) main.o io.o random.o libzstd.a -o main
|
||||
|
||||
main.o: main.c
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c main.c
|
||||
|
||||
random.o: random.c
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c random.c
|
||||
|
||||
io.o: io.c
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c io.c
|
||||
|
||||
libzstd.a:
|
||||
$(MAKE) -C ../../../lib libzstd.a
|
||||
mv ../../../lib/libzstd.a .
|
||||
|
||||
.PHONY: testrun
|
||||
testrun: main
|
||||
echo "Run with $(TEST_INPUT) and $(TEST_OUTPUT) "
|
||||
./main in=$(TEST_INPUT) out=$(TEST_OUTPUT)
|
||||
zstd -be3 -D $(TEST_OUTPUT) -r $(TEST_INPUT) -q
|
||||
rm -f $(TEST_OUTPUT)
|
||||
|
||||
.PHONY: testshell
|
||||
testshell: test.sh
|
||||
sh test.sh
|
||||
echo "Finish running test.sh"
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
rm -f *.o main libzstd.a
|
||||
$(MAKE) -C ../../../lib clean
|
||||
echo "Cleaning is completed"
|
@ -0,0 +1,20 @@
|
||||
Random Dictionary Builder
|
||||
|
||||
### Permitted Arguments:
|
||||
Input File/Directory (in=fileName): required; file/directory used to build dictionary; if directory, will operate recursively for files inside directory; can include multiple files/directories, each following "in="
|
||||
Output Dictionary (out=dictName): if not provided, default to defaultDict
|
||||
Dictionary ID (dictID=#): nonnegative number; if not provided, default to 0
|
||||
Maximum Dictionary Size (maxdict=#): positive number; in bytes, if not provided, default to 110KB
|
||||
Size of Randomly Selected Segment (k=#): positive number; in bytes; if not provided, default to 200
|
||||
|
||||
###Running Test:
|
||||
make test
|
||||
|
||||
|
||||
###Usage:
|
||||
To build a random dictionary with the provided arguments: make ARG= followed by arguments
|
||||
|
||||
|
||||
### Examples:
|
||||
make ARG="in=../../../lib/dictBuilder out=dict100 dictID=520"
|
||||
make ARG="in=../../../lib/dictBuilder in=../../../lib/compress"
|
284
contrib/experimental_dict_builders/randomDictBuilder/io.c
Normal file
284
contrib/experimental_dict_builders/randomDictBuilder/io.c
Normal file
@ -0,0 +1,284 @@
|
||||
#include <stdio.h> /* fprintf */
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* strcmp, strlen */
|
||||
#include <errno.h> /* errno */
|
||||
#include <ctype.h>
|
||||
#include "io.h"
|
||||
#include "fileio.h" /* stdinmark, stdoutmark, ZSTD_EXTENSION */
|
||||
#include "platform.h" /* Large Files support */
|
||||
#include "util.h"
|
||||
#include "zdict.h"
|
||||
|
||||
/*-*************************************
|
||||
* Console display
|
||||
***************************************/
|
||||
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
|
||||
#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
|
||||
|
||||
static const U64 g_refreshRate = SEC_TO_MICRO / 6;
|
||||
static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
|
||||
|
||||
#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
|
||||
if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
|
||||
{ g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
|
||||
if (displayLevel>=4) fflush(stderr); } } }
|
||||
|
||||
/*-*************************************
|
||||
* Exceptions
|
||||
***************************************/
|
||||
#ifndef DEBUG
|
||||
# define DEBUG 0
|
||||
#endif
|
||||
#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
|
||||
#define EXM_THROW(error, ...) \
|
||||
{ \
|
||||
DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
|
||||
DISPLAY("Error %i : ", error); \
|
||||
DISPLAY(__VA_ARGS__); \
|
||||
DISPLAY("\n"); \
|
||||
exit(error); \
|
||||
}
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
|
||||
#define SAMPLESIZE_MAX (128 KB)
|
||||
#define RANDOM_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
|
||||
#define RANDOM_MEMMULT 9
|
||||
static const size_t g_maxMemory = (sizeof(size_t) == 4) ?
|
||||
(2 GB - 64 MB) : ((size_t)(512 MB) << sizeof(size_t));
|
||||
|
||||
#define NOISELENGTH 32
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Commandline related functions
|
||||
***************************************/
|
||||
unsigned readU32FromChar(const char** stringPtr){
|
||||
const char errorMsg[] = "error: numeric value too large";
|
||||
unsigned result = 0;
|
||||
while ((**stringPtr >='0') && (**stringPtr <='9')) {
|
||||
unsigned const max = (((unsigned)(-1)) / 10) - 1;
|
||||
if (result > max) exit(1);
|
||||
result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
|
||||
}
|
||||
if ((**stringPtr=='K') || (**stringPtr=='M')) {
|
||||
unsigned const maxK = ((unsigned)(-1)) >> 10;
|
||||
if (result > maxK) exit(1);
|
||||
result <<= 10;
|
||||
if (**stringPtr=='M') {
|
||||
if (result > maxK) exit(1);
|
||||
result <<= 10;
|
||||
}
|
||||
(*stringPtr)++; /* skip `K` or `M` */
|
||||
if (**stringPtr=='i') (*stringPtr)++;
|
||||
if (**stringPtr=='B') (*stringPtr)++;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
unsigned longCommandWArg(const char** stringPtr, const char* longCommand){
|
||||
size_t const comSize = strlen(longCommand);
|
||||
int const result = !strncmp(*stringPtr, longCommand, comSize);
|
||||
if (result) *stringPtr += comSize;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/* ********************************************************
|
||||
* File related operations
|
||||
**********************************************************/
|
||||
/** loadFiles() :
|
||||
* load samples from files listed in fileNamesTable into buffer.
|
||||
* works even if buffer is too small to load all samples.
|
||||
* Also provides the size of each sample into sampleSizes table
|
||||
* which must be sized correctly, using DiB_fileStats().
|
||||
* @return : nb of samples effectively loaded into `buffer`
|
||||
* *bufferSizePtr is modified, it provides the amount data loaded within buffer.
|
||||
* sampleSizes is filled with the size of each sample.
|
||||
*/
|
||||
static unsigned loadFiles(void* buffer, size_t* bufferSizePtr, size_t* sampleSizes,
|
||||
unsigned sstSize, const char** fileNamesTable, unsigned nbFiles,
|
||||
size_t targetChunkSize, unsigned displayLevel) {
|
||||
char* const buff = (char*)buffer;
|
||||
size_t pos = 0;
|
||||
unsigned nbLoadedChunks = 0, fileIndex;
|
||||
|
||||
for (fileIndex=0; fileIndex<nbFiles; fileIndex++) {
|
||||
const char* const fileName = fileNamesTable[fileIndex];
|
||||
unsigned long long const fs64 = UTIL_getFileSize(fileName);
|
||||
unsigned long long remainingToLoad = (fs64 == UTIL_FILESIZE_UNKNOWN) ? 0 : fs64;
|
||||
U32 const nbChunks = targetChunkSize ? (U32)((fs64 + (targetChunkSize-1)) / targetChunkSize) : 1;
|
||||
U64 const chunkSize = targetChunkSize ? MIN(targetChunkSize, fs64) : fs64;
|
||||
size_t const maxChunkSize = (size_t)MIN(chunkSize, SAMPLESIZE_MAX);
|
||||
U32 cnb;
|
||||
FILE* const f = fopen(fileName, "rb");
|
||||
if (f==NULL) EXM_THROW(10, "zstd: dictBuilder: %s %s ", fileName, strerror(errno));
|
||||
DISPLAYUPDATE(2, "Loading %s... \r", fileName);
|
||||
for (cnb=0; cnb<nbChunks; cnb++) {
|
||||
size_t const toLoad = (size_t)MIN(maxChunkSize, remainingToLoad);
|
||||
if (toLoad > *bufferSizePtr-pos) break;
|
||||
{ size_t const readSize = fread(buff+pos, 1, toLoad, f);
|
||||
if (readSize != toLoad) EXM_THROW(11, "Pb reading %s", fileName);
|
||||
pos += readSize;
|
||||
sampleSizes[nbLoadedChunks++] = toLoad;
|
||||
remainingToLoad -= targetChunkSize;
|
||||
if (nbLoadedChunks == sstSize) { /* no more space left in sampleSizes table */
|
||||
fileIndex = nbFiles; /* stop there */
|
||||
break;
|
||||
}
|
||||
if (toLoad < targetChunkSize) {
|
||||
fseek(f, (long)(targetChunkSize - toLoad), SEEK_CUR);
|
||||
} } }
|
||||
fclose(f);
|
||||
}
|
||||
DISPLAYLEVEL(2, "\r%79s\r", "");
|
||||
*bufferSizePtr = pos;
|
||||
DISPLAYLEVEL(4, "loaded : %u KB \n", (U32)(pos >> 10))
|
||||
return nbLoadedChunks;
|
||||
}
|
||||
|
||||
#define rotl32(x,r) ((x << r) | (x >> (32 - r)))
|
||||
static U32 getRand(U32* src)
|
||||
{
|
||||
static const U32 prime1 = 2654435761U;
|
||||
static const U32 prime2 = 2246822519U;
|
||||
U32 rand32 = *src;
|
||||
rand32 *= prime1;
|
||||
rand32 ^= prime2;
|
||||
rand32 = rotl32(rand32, 13);
|
||||
*src = rand32;
|
||||
return rand32 >> 5;
|
||||
}
|
||||
|
||||
/* shuffle() :
|
||||
* shuffle a table of file names in a semi-random way
|
||||
* It improves dictionary quality by reducing "locality" impact, so if sample set is very large,
|
||||
* it will load random elements from it, instead of just the first ones. */
|
||||
static void shuffle(const char** fileNamesTable, unsigned nbFiles) {
|
||||
U32 seed = 0xFD2FB528;
|
||||
unsigned i;
|
||||
for (i = nbFiles - 1; i > 0; --i) {
|
||||
unsigned const j = getRand(&seed) % (i + 1);
|
||||
const char* const tmp = fileNamesTable[j];
|
||||
fileNamesTable[j] = fileNamesTable[i];
|
||||
fileNamesTable[i] = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*-********************************************************
|
||||
* Dictionary training functions
|
||||
**********************************************************/
|
||||
size_t findMaxMem(unsigned long long requiredMem) {
|
||||
size_t const step = 8 MB;
|
||||
void* testmem = NULL;
|
||||
|
||||
requiredMem = (((requiredMem >> 23) + 1) << 23);
|
||||
requiredMem += step;
|
||||
if (requiredMem > g_maxMemory) requiredMem = g_maxMemory;
|
||||
|
||||
while (!testmem) {
|
||||
testmem = malloc((size_t)requiredMem);
|
||||
requiredMem -= step;
|
||||
}
|
||||
|
||||
free(testmem);
|
||||
return (size_t)requiredMem;
|
||||
}
|
||||
|
||||
void saveDict(const char* dictFileName,
|
||||
const void* buff, size_t buffSize) {
|
||||
FILE* const f = fopen(dictFileName, "wb");
|
||||
if (f==NULL) EXM_THROW(3, "cannot open %s ", dictFileName);
|
||||
|
||||
{ size_t const n = fwrite(buff, 1, buffSize, f);
|
||||
if (n!=buffSize) EXM_THROW(4, "%s : write error", dictFileName) }
|
||||
|
||||
{ size_t const n = (size_t)fclose(f);
|
||||
if (n!=0) EXM_THROW(5, "%s : flush error", dictFileName) }
|
||||
}
|
||||
|
||||
/*! getFileStats() :
|
||||
* Given a list of files, and a chunkSize (0 == no chunk, whole files)
|
||||
* provides the amount of data to be loaded and the resulting nb of samples.
|
||||
* This is useful primarily for allocation purpose => sample buffer, and sample sizes table.
|
||||
*/
|
||||
static fileStats getFileStats(const char** fileNamesTable, unsigned nbFiles,
|
||||
size_t chunkSize, unsigned displayLevel) {
|
||||
fileStats fs;
|
||||
unsigned n;
|
||||
memset(&fs, 0, sizeof(fs));
|
||||
for (n=0; n<nbFiles; n++) {
|
||||
U64 const fileSize = UTIL_getFileSize(fileNamesTable[n]);
|
||||
U64 const srcSize = (fileSize == UTIL_FILESIZE_UNKNOWN) ? 0 : fileSize;
|
||||
U32 const nbSamples = (U32)(chunkSize ? (srcSize + (chunkSize-1)) / chunkSize : 1);
|
||||
U64 const chunkToLoad = chunkSize ? MIN(chunkSize, srcSize) : srcSize;
|
||||
size_t const cappedChunkSize = (size_t)MIN(chunkToLoad, SAMPLESIZE_MAX);
|
||||
fs.totalSizeToLoad += cappedChunkSize * nbSamples;
|
||||
fs.oneSampleTooLarge |= (chunkSize > 2*SAMPLESIZE_MAX);
|
||||
fs.nbSamples += nbSamples;
|
||||
}
|
||||
DISPLAYLEVEL(4, "Preparing to load : %u KB \n", (U32)(fs.totalSizeToLoad >> 10));
|
||||
return fs;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
sampleInfo* getSampleInfo(const char** fileNamesTable, unsigned nbFiles, size_t chunkSize,
|
||||
unsigned maxDictSize, const unsigned displayLevel) {
|
||||
fileStats const fs = getFileStats(fileNamesTable, nbFiles, chunkSize, displayLevel);
|
||||
size_t* const sampleSizes = (size_t*)malloc(fs.nbSamples * sizeof(size_t));
|
||||
size_t const memMult = RANDOM_MEMMULT;
|
||||
size_t const maxMem = findMaxMem(fs.totalSizeToLoad * memMult) / memMult;
|
||||
size_t loadedSize = (size_t) MIN ((unsigned long long)maxMem, fs.totalSizeToLoad);
|
||||
void* const srcBuffer = malloc(loadedSize+NOISELENGTH);
|
||||
|
||||
/* Checks */
|
||||
if ((!sampleSizes) || (!srcBuffer))
|
||||
EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */
|
||||
if (fs.oneSampleTooLarge) {
|
||||
DISPLAYLEVEL(2, "! Warning : some sample(s) are very large \n");
|
||||
DISPLAYLEVEL(2, "! Note that dictionary is only useful for small samples. \n");
|
||||
DISPLAYLEVEL(2, "! As a consequence, only the first %u bytes of each sample are loaded \n", SAMPLESIZE_MAX);
|
||||
}
|
||||
if (fs.nbSamples < 5) {
|
||||
DISPLAYLEVEL(2, "! Warning : nb of samples too low for proper processing ! \n");
|
||||
DISPLAYLEVEL(2, "! Please provide _one file per sample_. \n");
|
||||
DISPLAYLEVEL(2, "! Alternatively, split files into fixed-size blocks representative of samples, with -B# \n");
|
||||
EXM_THROW(14, "nb of samples too low"); /* we now clearly forbid this case */
|
||||
}
|
||||
if (fs.totalSizeToLoad < (unsigned long long)(8 * maxDictSize)) {
|
||||
DISPLAYLEVEL(2, "! Warning : data size of samples too small for target dictionary size \n");
|
||||
DISPLAYLEVEL(2, "! Samples should be about 100x larger than target dictionary size \n");
|
||||
}
|
||||
|
||||
/* init */
|
||||
if (loadedSize < fs.totalSizeToLoad)
|
||||
DISPLAYLEVEL(1, "Not enough memory; training on %u MB only...\n", (unsigned)(loadedSize >> 20));
|
||||
|
||||
/* Load input buffer */
|
||||
DISPLAYLEVEL(3, "Shuffling input files\n");
|
||||
shuffle(fileNamesTable, nbFiles);
|
||||
nbFiles = loadFiles(srcBuffer, &loadedSize, sampleSizes, fs.nbSamples,
|
||||
fileNamesTable, nbFiles, chunkSize, displayLevel);
|
||||
|
||||
sampleInfo *info = (sampleInfo *)malloc(sizeof(sampleInfo));
|
||||
|
||||
info->nbSamples = fs.nbSamples;
|
||||
info->samplesSizes = sampleSizes;
|
||||
info->srcBuffer = srcBuffer;
|
||||
|
||||
return info;
|
||||
}
|
||||
|
||||
|
||||
void freeSampleInfo(sampleInfo *info) {
|
||||
if (!info) return;
|
||||
if (info->samplesSizes) free((void*)(info->samplesSizes));
|
||||
if (info->srcBuffer) free((void*)(info->srcBuffer));
|
||||
free(info);
|
||||
}
|
60
contrib/experimental_dict_builders/randomDictBuilder/io.h
Normal file
60
contrib/experimental_dict_builders/randomDictBuilder/io.h
Normal file
@ -0,0 +1,60 @@
|
||||
#include <stdio.h> /* fprintf */
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* strcmp, strlen */
|
||||
#include <errno.h> /* errno */
|
||||
#include <ctype.h>
|
||||
#include "zstd_internal.h" /* includes zstd.h */
|
||||
#include "fileio.h" /* stdinmark, stdoutmark, ZSTD_EXTENSION */
|
||||
#include "platform.h" /* Large Files support */
|
||||
#include "util.h"
|
||||
#include "zdict.h"
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Structs
|
||||
***************************************/
|
||||
typedef struct {
|
||||
U64 totalSizeToLoad;
|
||||
unsigned oneSampleTooLarge;
|
||||
unsigned nbSamples;
|
||||
} fileStats;
|
||||
|
||||
typedef struct {
|
||||
const void* srcBuffer;
|
||||
const size_t *samplesSizes;
|
||||
size_t nbSamples;
|
||||
}sampleInfo;
|
||||
|
||||
|
||||
|
||||
/*! getSampleInfo():
|
||||
* Load from input files and add samples to buffer
|
||||
* @return: a sampleInfo struct containing infomation about buffer where samples are stored,
|
||||
* size of each sample, and total number of samples
|
||||
*/
|
||||
sampleInfo* getSampleInfo(const char** fileNamesTable, unsigned nbFiles, size_t chunkSize,
|
||||
unsigned maxDictSize, const unsigned displayLevel);
|
||||
|
||||
|
||||
|
||||
/*! freeSampleInfo():
|
||||
* Free memory allocated for info
|
||||
*/
|
||||
void freeSampleInfo(sampleInfo *info);
|
||||
|
||||
|
||||
|
||||
/*! saveDict():
|
||||
* Save data stored on buff to dictFileName
|
||||
*/
|
||||
void saveDict(const char* dictFileName, const void* buff, size_t buffSize);
|
||||
|
||||
|
||||
unsigned readU32FromChar(const char** stringPtr);
|
||||
|
||||
/** longCommandWArg() :
|
||||
* check if *stringPtr is the same as longCommand.
|
||||
* If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
|
||||
* @return 0 and doesn't modify *stringPtr otherwise.
|
||||
*/
|
||||
unsigned longCommandWArg(const char** stringPtr, const char* longCommand);
|
161
contrib/experimental_dict_builders/randomDictBuilder/main.c
Normal file
161
contrib/experimental_dict_builders/randomDictBuilder/main.c
Normal file
@ -0,0 +1,161 @@
|
||||
#include <stdio.h> /* fprintf */
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* strcmp, strlen */
|
||||
#include <errno.h> /* errno */
|
||||
#include <ctype.h>
|
||||
#include "random.h"
|
||||
#include "io.h"
|
||||
#include "util.h"
|
||||
#include "zdict.h"
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Console display
|
||||
***************************************/
|
||||
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
|
||||
#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
|
||||
|
||||
static const U64 g_refreshRate = SEC_TO_MICRO / 6;
|
||||
static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
|
||||
|
||||
#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
|
||||
if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
|
||||
{ g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
|
||||
if (displayLevel>=4) fflush(stderr); } } }
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Exceptions
|
||||
***************************************/
|
||||
#ifndef DEBUG
|
||||
# define DEBUG 0
|
||||
#endif
|
||||
#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
|
||||
#define EXM_THROW(error, ...) \
|
||||
{ \
|
||||
DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
|
||||
DISPLAY("Error %i : ", error); \
|
||||
DISPLAY(__VA_ARGS__); \
|
||||
DISPLAY("\n"); \
|
||||
exit(error); \
|
||||
}
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
static const unsigned g_defaultMaxDictSize = 110 KB;
|
||||
#define DEFAULT_CLEVEL 3
|
||||
#define DEFAULT_k 200
|
||||
#define DEFAULT_OUTPUTFILE "defaultDict"
|
||||
#define DEFAULT_DICTID 0
|
||||
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* RANDOM
|
||||
***************************************/
|
||||
int RANDOM_trainFromFiles(const char* dictFileName, sampleInfo *info,
|
||||
unsigned maxDictSize,
|
||||
ZDICT_random_params_t *params) {
|
||||
unsigned const displayLevel = params->zParams.notificationLevel;
|
||||
void* const dictBuffer = malloc(maxDictSize);
|
||||
|
||||
int result = 0;
|
||||
|
||||
/* Checks */
|
||||
if (!dictBuffer)
|
||||
EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */
|
||||
|
||||
{ size_t dictSize;
|
||||
dictSize = ZDICT_trainFromBuffer_random(dictBuffer, maxDictSize, info->srcBuffer,
|
||||
info->samplesSizes, info->nbSamples, *params);
|
||||
DISPLAYLEVEL(2, "k=%u\n", params->k);
|
||||
if (ZDICT_isError(dictSize)) {
|
||||
DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
|
||||
result = 1;
|
||||
goto _done;
|
||||
}
|
||||
/* save dict */
|
||||
DISPLAYLEVEL(2, "Save dictionary of size %u into file %s \n", (U32)dictSize, dictFileName);
|
||||
saveDict(dictFileName, dictBuffer, dictSize);
|
||||
}
|
||||
|
||||
/* clean up */
|
||||
_done:
|
||||
free(dictBuffer);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int main(int argCount, const char* argv[])
|
||||
{
|
||||
int displayLevel = 2;
|
||||
const char* programName = argv[0];
|
||||
int operationResult = 0;
|
||||
|
||||
/* Initialize arguments to default values */
|
||||
unsigned k = DEFAULT_k;
|
||||
const char* outputFile = DEFAULT_OUTPUTFILE;
|
||||
unsigned dictID = DEFAULT_DICTID;
|
||||
unsigned maxDictSize = g_defaultMaxDictSize;
|
||||
|
||||
/* Initialize table to store input files */
|
||||
const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*));
|
||||
unsigned filenameIdx = 0;
|
||||
|
||||
/* Parse arguments */
|
||||
for (int i = 1; i < argCount; i++) {
|
||||
const char* argument = argv[i];
|
||||
if (longCommandWArg(&argument, "k=")) { k = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "dictID=")) { dictID = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "in=")) {
|
||||
filenameTable[filenameIdx] = argument;
|
||||
filenameIdx++;
|
||||
continue;
|
||||
}
|
||||
if (longCommandWArg(&argument, "out=")) {
|
||||
outputFile = argument;
|
||||
continue;
|
||||
}
|
||||
DISPLAYLEVEL(1, "Incorrect parameters\n");
|
||||
operationResult = 1;
|
||||
return operationResult;
|
||||
}
|
||||
|
||||
char* fileNamesBuf = NULL;
|
||||
unsigned fileNamesNb = filenameIdx;
|
||||
int followLinks = 0; /* follow directory recursively */
|
||||
const char** extendedFileList = NULL;
|
||||
extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf,
|
||||
&fileNamesNb, followLinks);
|
||||
if (extendedFileList) {
|
||||
unsigned u;
|
||||
for (u=0; u<fileNamesNb; u++) DISPLAYLEVEL(4, "%u %s\n", u, extendedFileList[u]);
|
||||
free((void*)filenameTable);
|
||||
filenameTable = extendedFileList;
|
||||
filenameIdx = fileNamesNb;
|
||||
}
|
||||
|
||||
size_t blockSize = 0;
|
||||
|
||||
ZDICT_random_params_t params;
|
||||
ZDICT_params_t zParams;
|
||||
zParams.compressionLevel = DEFAULT_CLEVEL;
|
||||
zParams.notificationLevel = displayLevel;
|
||||
zParams.dictID = dictID;
|
||||
params.zParams = zParams;
|
||||
params.k = k;
|
||||
|
||||
sampleInfo* info = getSampleInfo(filenameTable,
|
||||
filenameIdx, blockSize, maxDictSize, zParams.notificationLevel);
|
||||
operationResult = RANDOM_trainFromFiles(outputFile, info, maxDictSize, ¶ms);
|
||||
|
||||
/* Free allocated memory */
|
||||
UTIL_freeFileList(extendedFileList, fileNamesBuf);
|
||||
freeSampleInfo(info);
|
||||
|
||||
return operationResult;
|
||||
}
|
163
contrib/experimental_dict_builders/randomDictBuilder/random.c
Normal file
163
contrib/experimental_dict_builders/randomDictBuilder/random.c
Normal file
@ -0,0 +1,163 @@
|
||||
/*-*************************************
|
||||
* Dependencies
|
||||
***************************************/
|
||||
#include <stdio.h> /* fprintf */
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* memset */
|
||||
#include <time.h> /* clock */
|
||||
#include "random.h"
|
||||
#include "util.h" /* UTIL_getFileSize, UTIL_getTotalFileSize */
|
||||
#ifndef ZDICT_STATIC_LINKING_ONLY
|
||||
#define ZDICT_STATIC_LINKING_ONLY
|
||||
#endif
|
||||
#include "zdict.h"
|
||||
|
||||
/*-*************************************
|
||||
* Console display
|
||||
***************************************/
|
||||
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
|
||||
#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
|
||||
|
||||
#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
|
||||
if (displayLevel >= l) { \
|
||||
if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
|
||||
g_time = clock(); \
|
||||
DISPLAY(__VA_ARGS__); \
|
||||
} \
|
||||
}
|
||||
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(displayLevel, l, __VA_ARGS__)
|
||||
static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
||||
static clock_t g_time = 0;
|
||||
|
||||
|
||||
|
||||
/* ********************************************************
|
||||
* Random Dictionary Builder
|
||||
**********************************************************/
|
||||
/**
|
||||
* Returns the sum of the sample sizes.
|
||||
*/
|
||||
static size_t RANDOM_sum(const size_t *samplesSizes, unsigned nbSamples) {
|
||||
size_t sum = 0;
|
||||
unsigned i;
|
||||
for (i = 0; i < nbSamples; ++i) {
|
||||
sum += samplesSizes[i];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* A segment is an inclusive range in the source.
|
||||
*/
|
||||
typedef struct {
|
||||
U32 begin;
|
||||
U32 end;
|
||||
} RANDOM_segment_t;
|
||||
|
||||
|
||||
/**
|
||||
* Selects a random segment from totalSamplesSize - k + 1 possible segments
|
||||
*/
|
||||
static RANDOM_segment_t RANDOM_selectSegment(const size_t totalSamplesSize,
|
||||
ZDICT_random_params_t parameters) {
|
||||
const U32 k = parameters.k;
|
||||
RANDOM_segment_t segment;
|
||||
unsigned index;
|
||||
|
||||
/* Randomly generate a number from 0 to sampleSizes - k */
|
||||
index = rand()%(totalSamplesSize - k + 1);
|
||||
|
||||
/* inclusive */
|
||||
segment.begin = index;
|
||||
segment.end = index + k - 1;
|
||||
|
||||
return segment;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Check the validity of the parameters.
|
||||
* Returns non-zero if the parameters are valid and 0 otherwise.
|
||||
*/
|
||||
static int RANDOM_checkParameters(ZDICT_random_params_t parameters,
|
||||
size_t maxDictSize) {
|
||||
/* k is a required parameter */
|
||||
if (parameters.k == 0) {
|
||||
return 0;
|
||||
}
|
||||
/* k <= maxDictSize */
|
||||
if (parameters.k > maxDictSize) {
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Given the prepared context build the dictionary.
|
||||
*/
|
||||
static size_t RANDOM_buildDictionary(const size_t totalSamplesSize, const BYTE *samples,
|
||||
void *dictBuffer, size_t dictBufferCapacity,
|
||||
ZDICT_random_params_t parameters) {
|
||||
BYTE *const dict = (BYTE *)dictBuffer;
|
||||
size_t tail = dictBufferCapacity;
|
||||
const int displayLevel = parameters.zParams.notificationLevel;
|
||||
while (tail > 0) {
|
||||
|
||||
/* Select a segment */
|
||||
RANDOM_segment_t segment = RANDOM_selectSegment(totalSamplesSize, parameters);
|
||||
|
||||
size_t segmentSize;
|
||||
segmentSize = MIN(segment.end - segment.begin + 1, tail);
|
||||
|
||||
tail -= segmentSize;
|
||||
memcpy(dict + tail, samples + segment.begin, segmentSize);
|
||||
DISPLAYUPDATE(
|
||||
2, "\r%u%% ",
|
||||
(U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
|
||||
}
|
||||
|
||||
return tail;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
ZDICTLIB_API size_t ZDICT_trainFromBuffer_random(
|
||||
void *dictBuffer, size_t dictBufferCapacity,
|
||||
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
|
||||
ZDICT_random_params_t parameters) {
|
||||
const int displayLevel = parameters.zParams.notificationLevel;
|
||||
BYTE* const dict = (BYTE*)dictBuffer;
|
||||
/* Checks */
|
||||
if (!RANDOM_checkParameters(parameters, dictBufferCapacity)) {
|
||||
DISPLAYLEVEL(1, "k is incorrect\n");
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
if (nbSamples == 0) {
|
||||
DISPLAYLEVEL(1, "Random must have at least one input file\n");
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
|
||||
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
|
||||
ZDICT_DICTSIZE_MIN);
|
||||
return ERROR(dstSize_tooSmall);
|
||||
}
|
||||
const size_t totalSamplesSize = RANDOM_sum(samplesSizes, nbSamples);
|
||||
const BYTE *const samples = (const BYTE *)samplesBuffer;
|
||||
|
||||
DISPLAYLEVEL(2, "Building dictionary\n");
|
||||
{
|
||||
const size_t tail = RANDOM_buildDictionary(totalSamplesSize, samples,
|
||||
dictBuffer, dictBufferCapacity, parameters);
|
||||
const size_t dictSize = ZDICT_finalizeDictionary(
|
||||
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
|
||||
samplesBuffer, samplesSizes, nbSamples, parameters.zParams);
|
||||
if (!ZSTD_isError(dictSize)) {
|
||||
DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
|
||||
(U32)dictSize);
|
||||
}
|
||||
return dictSize;
|
||||
}
|
||||
}
|
@ -0,0 +1,29 @@
|
||||
#include <stdio.h> /* fprintf */
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* memset */
|
||||
#include <time.h> /* clock */
|
||||
#include "zstd_internal.h" /* includes zstd.h */
|
||||
#ifndef ZDICT_STATIC_LINKING_ONLY
|
||||
#define ZDICT_STATIC_LINKING_ONLY
|
||||
#endif
|
||||
#include "zdict.h"
|
||||
|
||||
|
||||
|
||||
typedef struct {
|
||||
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+]; Default to 200 */
|
||||
ZDICT_params_t zParams;
|
||||
} ZDICT_random_params_t;
|
||||
|
||||
|
||||
/*! ZDICT_trainFromBuffer_random():
|
||||
* Train a dictionary from an array of samples.
|
||||
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
||||
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
||||
* The resulting dictionary will be saved into `dictBuffer`.
|
||||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
* or an error code, which can be tested with ZDICT_isError().
|
||||
*/
|
||||
ZDICTLIB_API size_t ZDICT_trainFromBuffer_random( void *dictBuffer, size_t dictBufferCapacity,
|
||||
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
|
||||
ZDICT_random_params_t parameters);
|
14
contrib/experimental_dict_builders/randomDictBuilder/test.sh
Executable file
14
contrib/experimental_dict_builders/randomDictBuilder/test.sh
Executable file
@ -0,0 +1,14 @@
|
||||
echo "Building random dictionary with in=../../lib/common k=200 out=dict1"
|
||||
./main in=../../../lib/common k=200 out=dict1
|
||||
zstd -be3 -D dict1 -r ../../../lib/common -q
|
||||
echo "Building random dictionary with in=../../lib/common k=500 out=dict2 dictID=100 maxdict=140000"
|
||||
./main in=../../../lib/common k=500 out=dict2 dictID=100 maxdict=140000
|
||||
zstd -be3 -D dict2 -r ../../../lib/common -q
|
||||
echo "Building random dictionary with 2 sample sources"
|
||||
./main in=../../../lib/common in=../../../lib/compress out=dict3
|
||||
zstd -be3 -D dict3 -r ../../../lib/common -q
|
||||
echo "Removing dict1 dict2 dict3"
|
||||
rm -f dict1 dict2 dict3
|
||||
|
||||
echo "Testing with invalid parameters, should fail"
|
||||
! ./main r=10
|
58
contrib/largeNbDicts/Makefile
Normal file
58
contrib/largeNbDicts/Makefile
Normal file
@ -0,0 +1,58 @@
|
||||
# ################################################################
|
||||
# Copyright (c) 2018-present, Yann Collet, Facebook, Inc.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under both the BSD-style license (found in the
|
||||
# LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
# in the COPYING file in the root directory of this source tree).
|
||||
# ################################################################
|
||||
|
||||
PROGDIR = ../../programs
|
||||
LIBDIR = ../../lib
|
||||
|
||||
LIBZSTD = $(LIBDIR)/libzstd.a
|
||||
|
||||
CPPFLAGS+= -I$(LIBDIR) -I$(LIBDIR)/common -I$(LIBDIR)/dictBuilder -I$(PROGDIR)
|
||||
|
||||
CFLAGS ?= -O3
|
||||
CFLAGS += -std=gnu99
|
||||
DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
|
||||
-Wstrict-aliasing=1 -Wswitch-enum \
|
||||
-Wstrict-prototypes -Wundef -Wpointer-arith \
|
||||
-Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
|
||||
-Wredundant-decls
|
||||
CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS)
|
||||
|
||||
|
||||
default: largeNbDicts
|
||||
|
||||
all : largeNbDicts
|
||||
|
||||
largeNbDicts: util.o timefn.o benchfn.o datagen.o xxhash.o largeNbDicts.c $(LIBZSTD)
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
.PHONY: $(LIBZSTD)
|
||||
$(LIBZSTD):
|
||||
$(MAKE) -C $(LIBDIR) libzstd.a CFLAGS="$(CFLAGS)"
|
||||
|
||||
benchfn.o: $(PROGDIR)/benchfn.c
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $^ -c
|
||||
|
||||
timefn.o: $(PROGDIR)/timefn.c
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $^ -c
|
||||
|
||||
datagen.o: $(PROGDIR)/datagen.c
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $^ -c
|
||||
|
||||
util.o: $(PROGDIR)/util.c
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $^ -c
|
||||
|
||||
|
||||
xxhash.o : $(LIBDIR)/common/xxhash.c
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $^ -c
|
||||
|
||||
|
||||
clean:
|
||||
$(RM) *.o
|
||||
$(MAKE) -C $(LIBDIR) clean > /dev/null
|
||||
$(RM) largeNbDicts
|
25
contrib/largeNbDicts/README.md
Normal file
25
contrib/largeNbDicts/README.md
Normal file
@ -0,0 +1,25 @@
|
||||
largeNbDicts
|
||||
=====================
|
||||
|
||||
`largeNbDicts` is a benchmark test tool
|
||||
dedicated to the specific scenario of
|
||||
dictionary decompression using a very large number of dictionaries.
|
||||
When dictionaries are constantly changing, they are always "cold",
|
||||
suffering from increased latency due to cache misses.
|
||||
|
||||
The tool is created in a bid to investigate performance for this scenario,
|
||||
and experiment mitigation techniques.
|
||||
|
||||
Command line :
|
||||
```
|
||||
largeNbDicts [Options] filename(s)
|
||||
|
||||
Options :
|
||||
-r : recursively load all files in subdirectories (default: off)
|
||||
-B# : split input into blocks of size # (default: no split)
|
||||
-# : use compression level # (default: 3)
|
||||
-D # : use # as a dictionary (default: create one)
|
||||
-i# : nb benchmark rounds (default: 6)
|
||||
--nbDicts=# : set nb of dictionaries to # (default: one per block)
|
||||
-h : help (this text)
|
||||
```
|
817
contrib/largeNbDicts/largeNbDicts.c
Normal file
817
contrib/largeNbDicts/largeNbDicts.c
Normal file
@ -0,0 +1,817 @@
|
||||
/*
|
||||
* Copyright (c) 2018-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
/* largeNbDicts
|
||||
* This is a benchmark test tool
|
||||
* dedicated to the specific case of dictionary decompression
|
||||
* using a very large nb of dictionaries
|
||||
* thus suffering latency from lots of cache misses.
|
||||
* It's created in a bid to investigate performance and find optimizations. */
|
||||
|
||||
|
||||
/*--- Dependencies ---*/
|
||||
|
||||
#include <stddef.h> /* size_t */
|
||||
#include <stdlib.h> /* malloc, free, abort */
|
||||
#include <stdio.h> /* fprintf */
|
||||
#include <limits.h> /* UINT_MAX */
|
||||
#include <assert.h> /* assert */
|
||||
|
||||
#include "util.h"
|
||||
#include "benchfn.h"
|
||||
#define ZSTD_STATIC_LINKING_ONLY
|
||||
#include "zstd.h"
|
||||
#include "zdict.h"
|
||||
|
||||
|
||||
/*--- Constants --- */
|
||||
|
||||
#define KB *(1<<10)
|
||||
#define MB *(1<<20)
|
||||
|
||||
#define BLOCKSIZE_DEFAULT 0 /* no slicing into blocks */
|
||||
#define DICTSIZE (4 KB)
|
||||
#define CLEVEL_DEFAULT 3
|
||||
|
||||
#define BENCH_TIME_DEFAULT_S 6
|
||||
#define RUN_TIME_DEFAULT_MS 1000
|
||||
#define BENCH_TIME_DEFAULT_MS (BENCH_TIME_DEFAULT_S * RUN_TIME_DEFAULT_MS)
|
||||
|
||||
#define DISPLAY_LEVEL_DEFAULT 3
|
||||
|
||||
#define BENCH_SIZE_MAX (1200 MB)
|
||||
|
||||
|
||||
/*--- Macros ---*/
|
||||
|
||||
#define CONTROL(c) { if (!(c)) abort(); }
|
||||
#undef MIN
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
|
||||
|
||||
/*--- Display Macros ---*/
|
||||
|
||||
#define DISPLAY(...) fprintf(stdout, __VA_ARGS__)
|
||||
#define DISPLAYLEVEL(l, ...) { if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } }
|
||||
static int g_displayLevel = DISPLAY_LEVEL_DEFAULT; /* 0 : no display, 1: errors, 2 : + result + interaction + warnings, 3 : + progression, 4 : + information */
|
||||
|
||||
|
||||
/*--- buffer_t ---*/
|
||||
|
||||
typedef struct {
|
||||
void* ptr;
|
||||
size_t size;
|
||||
size_t capacity;
|
||||
} buffer_t;
|
||||
|
||||
static const buffer_t kBuffNull = { NULL, 0, 0 };
|
||||
|
||||
/* @return : kBuffNull if any error */
|
||||
static buffer_t createBuffer(size_t capacity)
|
||||
{
|
||||
assert(capacity > 0);
|
||||
void* const ptr = malloc(capacity);
|
||||
if (ptr==NULL) return kBuffNull;
|
||||
|
||||
buffer_t buffer;
|
||||
buffer.ptr = ptr;
|
||||
buffer.capacity = capacity;
|
||||
buffer.size = 0;
|
||||
return buffer;
|
||||
}
|
||||
|
||||
static void freeBuffer(buffer_t buff)
|
||||
{
|
||||
free(buff.ptr);
|
||||
}
|
||||
|
||||
|
||||
static void fillBuffer_fromHandle(buffer_t* buff, FILE* f)
|
||||
{
|
||||
size_t const readSize = fread(buff->ptr, 1, buff->capacity, f);
|
||||
buff->size = readSize;
|
||||
}
|
||||
|
||||
|
||||
/* @return : kBuffNull if any error */
|
||||
static buffer_t createBuffer_fromFile(const char* fileName)
|
||||
{
|
||||
U64 const fileSize = UTIL_getFileSize(fileName);
|
||||
size_t const bufferSize = (size_t) fileSize;
|
||||
|
||||
if (fileSize == UTIL_FILESIZE_UNKNOWN) return kBuffNull;
|
||||
assert((U64)bufferSize == fileSize); /* check overflow */
|
||||
|
||||
{ FILE* const f = fopen(fileName, "rb");
|
||||
if (f == NULL) return kBuffNull;
|
||||
|
||||
buffer_t buff = createBuffer(bufferSize);
|
||||
CONTROL(buff.ptr != NULL);
|
||||
|
||||
fillBuffer_fromHandle(&buff, f);
|
||||
CONTROL(buff.size == buff.capacity);
|
||||
|
||||
fclose(f); /* do nothing specific if fclose() fails */
|
||||
return buff;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* @return : kBuffNull if any error */
|
||||
static buffer_t
|
||||
createDictionaryBuffer(const char* dictionaryName,
|
||||
const void* srcBuffer,
|
||||
const size_t* srcBlockSizes, size_t nbBlocks,
|
||||
size_t requestedDictSize)
|
||||
{
|
||||
if (dictionaryName) {
|
||||
DISPLAYLEVEL(3, "loading dictionary %s \n", dictionaryName);
|
||||
return createBuffer_fromFile(dictionaryName); /* note : result might be kBuffNull */
|
||||
|
||||
} else {
|
||||
|
||||
DISPLAYLEVEL(3, "creating dictionary, of target size %u bytes \n",
|
||||
(unsigned)requestedDictSize);
|
||||
void* const dictBuffer = malloc(requestedDictSize);
|
||||
CONTROL(dictBuffer != NULL);
|
||||
|
||||
assert(nbBlocks <= UINT_MAX);
|
||||
size_t const dictSize = ZDICT_trainFromBuffer(dictBuffer, requestedDictSize,
|
||||
srcBuffer,
|
||||
srcBlockSizes, (unsigned)nbBlocks);
|
||||
CONTROL(!ZSTD_isError(dictSize));
|
||||
|
||||
buffer_t result;
|
||||
result.ptr = dictBuffer;
|
||||
result.capacity = requestedDictSize;
|
||||
result.size = dictSize;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*! BMK_loadFiles() :
|
||||
* Loads `buffer`, with content from files listed within `fileNamesTable`.
|
||||
* Fills `buffer` entirely.
|
||||
* @return : 0 on success, !=0 on error */
|
||||
static int loadFiles(void* buffer, size_t bufferSize,
|
||||
size_t* fileSizes,
|
||||
const char* const * fileNamesTable, unsigned nbFiles)
|
||||
{
|
||||
size_t pos = 0, totalSize = 0;
|
||||
|
||||
for (unsigned n=0; n<nbFiles; n++) {
|
||||
U64 fileSize = UTIL_getFileSize(fileNamesTable[n]);
|
||||
if (UTIL_isDirectory(fileNamesTable[n])) {
|
||||
fileSizes[n] = 0;
|
||||
continue;
|
||||
}
|
||||
if (fileSize == UTIL_FILESIZE_UNKNOWN) {
|
||||
fileSizes[n] = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
FILE* const f = fopen(fileNamesTable[n], "rb");
|
||||
assert(f!=NULL);
|
||||
|
||||
assert(pos <= bufferSize);
|
||||
assert(fileSize <= bufferSize - pos);
|
||||
|
||||
{ size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
|
||||
assert(readSize == fileSize);
|
||||
pos += readSize;
|
||||
}
|
||||
fileSizes[n] = (size_t)fileSize;
|
||||
totalSize += (size_t)fileSize;
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
assert(totalSize == bufferSize);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*--- slice_collection_t ---*/
|
||||
|
||||
typedef struct {
|
||||
void** slicePtrs;
|
||||
size_t* capacities;
|
||||
size_t nbSlices;
|
||||
} slice_collection_t;
|
||||
|
||||
static const slice_collection_t kNullCollection = { NULL, NULL, 0 };
|
||||
|
||||
static void freeSliceCollection(slice_collection_t collection)
|
||||
{
|
||||
free(collection.slicePtrs);
|
||||
free(collection.capacities);
|
||||
}
|
||||
|
||||
/* shrinkSizes() :
|
||||
* downsizes sizes of slices within collection, according to `newSizes`.
|
||||
* every `newSizes` entry must be <= than its corresponding collection size */
|
||||
void shrinkSizes(slice_collection_t collection,
|
||||
const size_t* newSizes) /* presumed same size as collection */
|
||||
{
|
||||
size_t const nbSlices = collection.nbSlices;
|
||||
for (size_t blockNb = 0; blockNb < nbSlices; blockNb++) {
|
||||
assert(newSizes[blockNb] <= collection.capacities[blockNb]);
|
||||
collection.capacities[blockNb] = newSizes[blockNb];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* splitSlices() :
|
||||
* nbSlices : if == 0, nbSlices is automatically determined from srcSlices and blockSize.
|
||||
* otherwise, creates exactly nbSlices slices,
|
||||
* by either truncating input (when smaller)
|
||||
* or repeating input from beginning */
|
||||
static slice_collection_t
|
||||
splitSlices(slice_collection_t srcSlices, size_t blockSize, size_t nbSlices)
|
||||
{
|
||||
if (blockSize==0) blockSize = (size_t)(-1); /* means "do not cut" */
|
||||
size_t nbSrcBlocks = 0;
|
||||
for (size_t ssnb=0; ssnb < srcSlices.nbSlices; ssnb++) {
|
||||
size_t pos = 0;
|
||||
while (pos <= srcSlices.capacities[ssnb]) {
|
||||
nbSrcBlocks++;
|
||||
pos += blockSize;
|
||||
}
|
||||
}
|
||||
|
||||
if (nbSlices == 0) nbSlices = nbSrcBlocks;
|
||||
|
||||
void** const sliceTable = (void**)malloc(nbSlices * sizeof(*sliceTable));
|
||||
size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
|
||||
if (sliceTable == NULL || capacities == NULL) {
|
||||
free(sliceTable);
|
||||
free(capacities);
|
||||
return kNullCollection;
|
||||
}
|
||||
|
||||
size_t ssnb = 0;
|
||||
for (size_t sliceNb=0; sliceNb < nbSlices; ) {
|
||||
ssnb = (ssnb + 1) % srcSlices.nbSlices;
|
||||
size_t pos = 0;
|
||||
char* const ptr = (char*)srcSlices.slicePtrs[ssnb];
|
||||
while (pos < srcSlices.capacities[ssnb] && sliceNb < nbSlices) {
|
||||
size_t const size = MIN(blockSize, srcSlices.capacities[ssnb] - pos);
|
||||
sliceTable[sliceNb] = ptr + pos;
|
||||
capacities[sliceNb] = size;
|
||||
sliceNb++;
|
||||
pos += blockSize;
|
||||
}
|
||||
}
|
||||
|
||||
slice_collection_t result;
|
||||
result.nbSlices = nbSlices;
|
||||
result.slicePtrs = sliceTable;
|
||||
result.capacities = capacities;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
static size_t sliceCollection_totalCapacity(slice_collection_t sc)
|
||||
{
|
||||
size_t totalSize = 0;
|
||||
for (size_t n=0; n<sc.nbSlices; n++)
|
||||
totalSize += sc.capacities[n];
|
||||
return totalSize;
|
||||
}
|
||||
|
||||
|
||||
/* --- buffer collection --- */
|
||||
|
||||
typedef struct {
|
||||
buffer_t buffer;
|
||||
slice_collection_t slices;
|
||||
} buffer_collection_t;
|
||||
|
||||
|
||||
static void freeBufferCollection(buffer_collection_t bc)
|
||||
{
|
||||
freeBuffer(bc.buffer);
|
||||
freeSliceCollection(bc.slices);
|
||||
}
|
||||
|
||||
|
||||
static buffer_collection_t
|
||||
createBufferCollection_fromSliceCollectionSizes(slice_collection_t sc)
|
||||
{
|
||||
size_t const bufferSize = sliceCollection_totalCapacity(sc);
|
||||
|
||||
buffer_t buffer = createBuffer(bufferSize);
|
||||
CONTROL(buffer.ptr != NULL);
|
||||
|
||||
size_t const nbSlices = sc.nbSlices;
|
||||
void** const slices = (void**)malloc(nbSlices * sizeof(*slices));
|
||||
CONTROL(slices != NULL);
|
||||
|
||||
size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
|
||||
CONTROL(capacities != NULL);
|
||||
|
||||
char* const ptr = (char*)buffer.ptr;
|
||||
size_t pos = 0;
|
||||
for (size_t n=0; n < nbSlices; n++) {
|
||||
capacities[n] = sc.capacities[n];
|
||||
slices[n] = ptr + pos;
|
||||
pos += capacities[n];
|
||||
}
|
||||
|
||||
buffer_collection_t result;
|
||||
result.buffer = buffer;
|
||||
result.slices.nbSlices = nbSlices;
|
||||
result.slices.capacities = capacities;
|
||||
result.slices.slicePtrs = slices;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/* @return : kBuffNull if any error */
|
||||
static buffer_collection_t
|
||||
createBufferCollection_fromFiles(const char* const * fileNamesTable, unsigned nbFiles)
|
||||
{
|
||||
U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles);
|
||||
assert(totalSizeToLoad != UTIL_FILESIZE_UNKNOWN);
|
||||
assert(totalSizeToLoad <= BENCH_SIZE_MAX);
|
||||
size_t const loadedSize = (size_t)totalSizeToLoad;
|
||||
assert(loadedSize > 0);
|
||||
void* const srcBuffer = malloc(loadedSize);
|
||||
assert(srcBuffer != NULL);
|
||||
|
||||
assert(nbFiles > 0);
|
||||
size_t* const fileSizes = (size_t*)calloc(nbFiles, sizeof(*fileSizes));
|
||||
assert(fileSizes != NULL);
|
||||
|
||||
/* Load input buffer */
|
||||
int const errorCode = loadFiles(srcBuffer, loadedSize,
|
||||
fileSizes,
|
||||
fileNamesTable, nbFiles);
|
||||
assert(errorCode == 0);
|
||||
|
||||
void** sliceTable = (void**)malloc(nbFiles * sizeof(*sliceTable));
|
||||
assert(sliceTable != NULL);
|
||||
|
||||
char* const ptr = (char*)srcBuffer;
|
||||
size_t pos = 0;
|
||||
unsigned fileNb = 0;
|
||||
for ( ; (pos < loadedSize) && (fileNb < nbFiles); fileNb++) {
|
||||
sliceTable[fileNb] = ptr + pos;
|
||||
pos += fileSizes[fileNb];
|
||||
}
|
||||
assert(pos == loadedSize);
|
||||
assert(fileNb == nbFiles);
|
||||
|
||||
|
||||
buffer_t buffer;
|
||||
buffer.ptr = srcBuffer;
|
||||
buffer.capacity = loadedSize;
|
||||
buffer.size = loadedSize;
|
||||
|
||||
slice_collection_t slices;
|
||||
slices.slicePtrs = sliceTable;
|
||||
slices.capacities = fileSizes;
|
||||
slices.nbSlices = nbFiles;
|
||||
|
||||
buffer_collection_t bc;
|
||||
bc.buffer = buffer;
|
||||
bc.slices = slices;
|
||||
return bc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*--- ddict_collection_t ---*/
|
||||
|
||||
typedef struct {
|
||||
ZSTD_DDict** ddicts;
|
||||
size_t nbDDict;
|
||||
} ddict_collection_t;
|
||||
|
||||
static const ddict_collection_t kNullDDictCollection = { NULL, 0 };
|
||||
|
||||
static void freeDDictCollection(ddict_collection_t ddictc)
|
||||
{
|
||||
for (size_t dictNb=0; dictNb < ddictc.nbDDict; dictNb++) {
|
||||
ZSTD_freeDDict(ddictc.ddicts[dictNb]);
|
||||
}
|
||||
free(ddictc.ddicts);
|
||||
}
|
||||
|
||||
/* returns .buffers=NULL if operation fails */
|
||||
static ddict_collection_t createDDictCollection(const void* dictBuffer, size_t dictSize, size_t nbDDict)
|
||||
{
|
||||
ZSTD_DDict** const ddicts = malloc(nbDDict * sizeof(ZSTD_DDict*));
|
||||
assert(ddicts != NULL);
|
||||
if (ddicts==NULL) return kNullDDictCollection;
|
||||
for (size_t dictNb=0; dictNb < nbDDict; dictNb++) {
|
||||
ddicts[dictNb] = ZSTD_createDDict(dictBuffer, dictSize);
|
||||
assert(ddicts[dictNb] != NULL);
|
||||
}
|
||||
ddict_collection_t ddictc;
|
||||
ddictc.ddicts = ddicts;
|
||||
ddictc.nbDDict = nbDDict;
|
||||
return ddictc;
|
||||
}
|
||||
|
||||
|
||||
/* mess with addresses, so that linear scanning dictionaries != linear address scanning */
|
||||
void shuffleDictionaries(ddict_collection_t dicts)
|
||||
{
|
||||
size_t const nbDicts = dicts.nbDDict;
|
||||
for (size_t r=0; r<nbDicts; r++) {
|
||||
size_t const d = rand() % nbDicts;
|
||||
ZSTD_DDict* tmpd = dicts.ddicts[d];
|
||||
dicts.ddicts[d] = dicts.ddicts[r];
|
||||
dicts.ddicts[r] = tmpd;
|
||||
}
|
||||
for (size_t r=0; r<nbDicts; r++) {
|
||||
size_t const d1 = rand() % nbDicts;
|
||||
size_t const d2 = rand() % nbDicts;
|
||||
ZSTD_DDict* tmpd = dicts.ddicts[d1];
|
||||
dicts.ddicts[d1] = dicts.ddicts[d2];
|
||||
dicts.ddicts[d2] = tmpd;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* --- Compression --- */
|
||||
|
||||
/* compressBlocks() :
|
||||
* @return : total compressed size of all blocks,
|
||||
* or 0 if error.
|
||||
*/
|
||||
static size_t compressBlocks(size_t* cSizes, /* optional (can be NULL). If present, must contain at least nbBlocks fields */
|
||||
slice_collection_t dstBlockBuffers,
|
||||
slice_collection_t srcBlockBuffers,
|
||||
ZSTD_CDict* cdict, int cLevel)
|
||||
{
|
||||
size_t const nbBlocks = srcBlockBuffers.nbSlices;
|
||||
assert(dstBlockBuffers.nbSlices == srcBlockBuffers.nbSlices);
|
||||
|
||||
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
|
||||
assert(cctx != NULL);
|
||||
|
||||
size_t totalCSize = 0;
|
||||
for (size_t blockNb=0; blockNb < nbBlocks; blockNb++) {
|
||||
size_t cBlockSize;
|
||||
if (cdict == NULL) {
|
||||
cBlockSize = ZSTD_compressCCtx(cctx,
|
||||
dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb],
|
||||
srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb],
|
||||
cLevel);
|
||||
} else {
|
||||
cBlockSize = ZSTD_compress_usingCDict(cctx,
|
||||
dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb],
|
||||
srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb],
|
||||
cdict);
|
||||
}
|
||||
CONTROL(!ZSTD_isError(cBlockSize));
|
||||
if (cSizes) cSizes[blockNb] = cBlockSize;
|
||||
totalCSize += cBlockSize;
|
||||
}
|
||||
return totalCSize;
|
||||
}
|
||||
|
||||
|
||||
/* --- Benchmark --- */
|
||||
|
||||
typedef struct {
|
||||
ZSTD_DCtx* dctx;
|
||||
size_t nbDicts;
|
||||
size_t dictNb;
|
||||
ddict_collection_t dictionaries;
|
||||
} decompressInstructions;
|
||||
|
||||
decompressInstructions createDecompressInstructions(ddict_collection_t dictionaries)
|
||||
{
|
||||
decompressInstructions di;
|
||||
di.dctx = ZSTD_createDCtx();
|
||||
assert(di.dctx != NULL);
|
||||
di.nbDicts = dictionaries.nbDDict;
|
||||
di.dictNb = 0;
|
||||
di.dictionaries = dictionaries;
|
||||
return di;
|
||||
}
|
||||
|
||||
void freeDecompressInstructions(decompressInstructions di)
|
||||
{
|
||||
ZSTD_freeDCtx(di.dctx);
|
||||
}
|
||||
|
||||
/* benched function */
|
||||
size_t decompress(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* payload)
|
||||
{
|
||||
decompressInstructions* const di = (decompressInstructions*) payload;
|
||||
|
||||
size_t const result = ZSTD_decompress_usingDDict(di->dctx,
|
||||
dst, dstCapacity,
|
||||
src, srcSize,
|
||||
di->dictionaries.ddicts[di->dictNb]);
|
||||
|
||||
di->dictNb = di->dictNb + 1;
|
||||
if (di->dictNb >= di->nbDicts) di->dictNb = 0;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
static int benchMem(slice_collection_t dstBlocks,
|
||||
slice_collection_t srcBlocks,
|
||||
ddict_collection_t dictionaries,
|
||||
int nbRounds)
|
||||
{
|
||||
assert(dstBlocks.nbSlices == srcBlocks.nbSlices);
|
||||
|
||||
unsigned const ms_per_round = RUN_TIME_DEFAULT_MS;
|
||||
unsigned const total_time_ms = nbRounds * ms_per_round;
|
||||
|
||||
double bestSpeed = 0.;
|
||||
|
||||
BMK_timedFnState_t* const benchState =
|
||||
BMK_createTimedFnState(total_time_ms, ms_per_round);
|
||||
decompressInstructions di = createDecompressInstructions(dictionaries);
|
||||
BMK_benchParams_t const bp = {
|
||||
.benchFn = decompress,
|
||||
.benchPayload = &di,
|
||||
.initFn = NULL,
|
||||
.initPayload = NULL,
|
||||
.errorFn = ZSTD_isError,
|
||||
.blockCount = dstBlocks.nbSlices,
|
||||
.srcBuffers = (const void* const*) srcBlocks.slicePtrs,
|
||||
.srcSizes = srcBlocks.capacities,
|
||||
.dstBuffers = dstBlocks.slicePtrs,
|
||||
.dstCapacities = dstBlocks.capacities,
|
||||
.blockResults = NULL
|
||||
};
|
||||
|
||||
for (;;) {
|
||||
BMK_runOutcome_t const outcome = BMK_benchTimedFn(benchState, bp);
|
||||
CONTROL(BMK_isSuccessful_runOutcome(outcome));
|
||||
|
||||
BMK_runTime_t const result = BMK_extract_runTime(outcome);
|
||||
U64 const dTime_ns = result.nanoSecPerRun;
|
||||
double const dTime_sec = (double)dTime_ns / 1000000000;
|
||||
size_t const srcSize = result.sumOfReturn;
|
||||
double const dSpeed_MBps = (double)srcSize / dTime_sec / (1 MB);
|
||||
if (dSpeed_MBps > bestSpeed) bestSpeed = dSpeed_MBps;
|
||||
DISPLAY("Decompression Speed : %.1f MB/s \r", bestSpeed);
|
||||
fflush(stdout);
|
||||
if (BMK_isCompleted_TimedFn(benchState)) break;
|
||||
}
|
||||
DISPLAY("\n");
|
||||
|
||||
freeDecompressInstructions(di);
|
||||
BMK_freeTimedFnState(benchState);
|
||||
|
||||
return 0; /* success */
|
||||
}
|
||||
|
||||
|
||||
/*! bench() :
|
||||
* fileName : file to load for benchmarking purpose
|
||||
* dictionary : optional (can be NULL), file to load as dictionary,
|
||||
* if none provided : will be calculated on the fly by the program.
|
||||
* @return : 0 is success, 1+ otherwise */
|
||||
int bench(const char** fileNameTable, unsigned nbFiles,
|
||||
const char* dictionary,
|
||||
size_t blockSize, int clevel,
|
||||
unsigned nbDictMax, unsigned nbBlocks,
|
||||
int nbRounds)
|
||||
{
|
||||
int result = 0;
|
||||
|
||||
DISPLAYLEVEL(3, "loading %u files... \n", nbFiles);
|
||||
buffer_collection_t const srcs = createBufferCollection_fromFiles(fileNameTable, nbFiles);
|
||||
CONTROL(srcs.buffer.ptr != NULL);
|
||||
buffer_t srcBuffer = srcs.buffer;
|
||||
size_t const srcSize = srcBuffer.size;
|
||||
DISPLAYLEVEL(3, "created src buffer of size %.1f MB \n",
|
||||
(double)srcSize / (1 MB));
|
||||
|
||||
slice_collection_t const srcSlices = splitSlices(srcs.slices, blockSize, nbBlocks);
|
||||
nbBlocks = (unsigned)(srcSlices.nbSlices);
|
||||
DISPLAYLEVEL(3, "split input into %u blocks ", nbBlocks);
|
||||
if (blockSize)
|
||||
DISPLAYLEVEL(3, "of max size %u bytes ", (unsigned)blockSize);
|
||||
DISPLAYLEVEL(3, "\n");
|
||||
size_t const totalSrcSlicesSize = sliceCollection_totalCapacity(srcSlices);
|
||||
|
||||
|
||||
size_t* const dstCapacities = malloc(nbBlocks * sizeof(*dstCapacities));
|
||||
CONTROL(dstCapacities != NULL);
|
||||
size_t dstBufferCapacity = 0;
|
||||
for (size_t bnb=0; bnb<nbBlocks; bnb++) {
|
||||
dstCapacities[bnb] = ZSTD_compressBound(srcSlices.capacities[bnb]);
|
||||
dstBufferCapacity += dstCapacities[bnb];
|
||||
}
|
||||
|
||||
buffer_t dstBuffer = createBuffer(dstBufferCapacity);
|
||||
CONTROL(dstBuffer.ptr != NULL);
|
||||
|
||||
void** const sliceTable = malloc(nbBlocks * sizeof(*sliceTable));
|
||||
CONTROL(sliceTable != NULL);
|
||||
|
||||
{ char* const ptr = dstBuffer.ptr;
|
||||
size_t pos = 0;
|
||||
for (size_t snb=0; snb < nbBlocks; snb++) {
|
||||
sliceTable[snb] = ptr + pos;
|
||||
pos += dstCapacities[snb];
|
||||
} }
|
||||
|
||||
slice_collection_t dstSlices;
|
||||
dstSlices.capacities = dstCapacities;
|
||||
dstSlices.slicePtrs = sliceTable;
|
||||
dstSlices.nbSlices = nbBlocks;
|
||||
|
||||
|
||||
/* dictionary determination */
|
||||
buffer_t const dictBuffer = createDictionaryBuffer(dictionary,
|
||||
srcs.buffer.ptr,
|
||||
srcs.slices.capacities, srcs.slices.nbSlices,
|
||||
DICTSIZE);
|
||||
CONTROL(dictBuffer.ptr != NULL);
|
||||
|
||||
ZSTD_CDict* const cdict = ZSTD_createCDict(dictBuffer.ptr, dictBuffer.size, clevel);
|
||||
CONTROL(cdict != NULL);
|
||||
|
||||
size_t const cTotalSizeNoDict = compressBlocks(NULL, dstSlices, srcSlices, NULL, clevel);
|
||||
CONTROL(cTotalSizeNoDict != 0);
|
||||
DISPLAYLEVEL(3, "compressing at level %u without dictionary : Ratio=%.2f (%u bytes) \n",
|
||||
clevel,
|
||||
(double)totalSrcSlicesSize / cTotalSizeNoDict, (unsigned)cTotalSizeNoDict);
|
||||
|
||||
size_t* const cSizes = malloc(nbBlocks * sizeof(size_t));
|
||||
CONTROL(cSizes != NULL);
|
||||
|
||||
size_t const cTotalSize = compressBlocks(cSizes, dstSlices, srcSlices, cdict, clevel);
|
||||
CONTROL(cTotalSize != 0);
|
||||
DISPLAYLEVEL(3, "compressed using a %u bytes dictionary : Ratio=%.2f (%u bytes) \n",
|
||||
(unsigned)dictBuffer.size,
|
||||
(double)totalSrcSlicesSize / cTotalSize, (unsigned)cTotalSize);
|
||||
|
||||
/* now dstSlices contain the real compressed size of each block, instead of the maximum capacity */
|
||||
shrinkSizes(dstSlices, cSizes);
|
||||
|
||||
size_t const dictMem = ZSTD_estimateDDictSize(dictBuffer.size, ZSTD_dlm_byCopy);
|
||||
unsigned const nbDicts = nbDictMax ? nbDictMax : nbBlocks;
|
||||
size_t const allDictMem = dictMem * nbDicts;
|
||||
DISPLAYLEVEL(3, "generating %u dictionaries, using %.1f MB of memory \n",
|
||||
nbDicts, (double)allDictMem / (1 MB));
|
||||
|
||||
ddict_collection_t const dictionaries = createDDictCollection(dictBuffer.ptr, dictBuffer.size, nbDicts);
|
||||
CONTROL(dictionaries.ddicts != NULL);
|
||||
|
||||
shuffleDictionaries(dictionaries);
|
||||
|
||||
buffer_collection_t resultCollection = createBufferCollection_fromSliceCollectionSizes(srcSlices);
|
||||
CONTROL(resultCollection.buffer.ptr != NULL);
|
||||
|
||||
result = benchMem(resultCollection.slices, dstSlices, dictionaries, nbRounds);
|
||||
|
||||
/* free all heap objects in reverse order */
|
||||
freeBufferCollection(resultCollection);
|
||||
freeDDictCollection(dictionaries);
|
||||
free(cSizes);
|
||||
ZSTD_freeCDict(cdict);
|
||||
freeBuffer(dictBuffer);
|
||||
freeSliceCollection(dstSlices);
|
||||
freeBuffer(dstBuffer);
|
||||
freeSliceCollection(srcSlices);
|
||||
freeBufferCollection(srcs);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* --- Command Line --- */
|
||||
|
||||
/*! readU32FromChar() :
|
||||
* @return : unsigned integer value read from input in `char` format.
|
||||
* allows and interprets K, KB, KiB, M, MB and MiB suffix.
|
||||
* Will also modify `*stringPtr`, advancing it to position where it stopped reading.
|
||||
* Note : function will exit() program if digit sequence overflows */
|
||||
static unsigned readU32FromChar(const char** stringPtr)
|
||||
{
|
||||
unsigned result = 0;
|
||||
while ((**stringPtr >='0') && (**stringPtr <='9')) {
|
||||
unsigned const max = (((unsigned)(-1)) / 10) - 1;
|
||||
assert(result <= max); /* check overflow */
|
||||
result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
|
||||
}
|
||||
if ((**stringPtr=='K') || (**stringPtr=='M')) {
|
||||
unsigned const maxK = ((unsigned)(-1)) >> 10;
|
||||
assert(result <= maxK); /* check overflow */
|
||||
result <<= 10;
|
||||
if (**stringPtr=='M') {
|
||||
assert(result <= maxK); /* check overflow */
|
||||
result <<= 10;
|
||||
}
|
||||
(*stringPtr)++; /* skip `K` or `M` */
|
||||
if (**stringPtr=='i') (*stringPtr)++;
|
||||
if (**stringPtr=='B') (*stringPtr)++;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/** longCommandWArg() :
|
||||
* check if *stringPtr is the same as longCommand.
|
||||
* If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
|
||||
* @return 0 and doesn't modify *stringPtr otherwise.
|
||||
*/
|
||||
static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
|
||||
{
|
||||
size_t const comSize = strlen(longCommand);
|
||||
int const result = !strncmp(*stringPtr, longCommand, comSize);
|
||||
if (result) *stringPtr += comSize;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
int usage(const char* exeName)
|
||||
{
|
||||
DISPLAY (" \n");
|
||||
DISPLAY (" %s [Options] filename(s) \n", exeName);
|
||||
DISPLAY (" \n");
|
||||
DISPLAY ("Options : \n");
|
||||
DISPLAY ("-r : recursively load all files in subdirectories (default: off) \n");
|
||||
DISPLAY ("-B# : split input into blocks of size # (default: no split) \n");
|
||||
DISPLAY ("-# : use compression level # (default: %u) \n", CLEVEL_DEFAULT);
|
||||
DISPLAY ("-D # : use # as a dictionary (default: create one) \n");
|
||||
DISPLAY ("-i# : nb benchmark rounds (default: %u) \n", BENCH_TIME_DEFAULT_S);
|
||||
DISPLAY ("--nbBlocks=#: use # blocks for bench (default: one per file) \n");
|
||||
DISPLAY ("--nbDicts=# : create # dictionaries for bench (default: one per block) \n");
|
||||
DISPLAY ("-h : help (this text) \n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bad_usage(const char* exeName)
|
||||
{
|
||||
DISPLAY (" bad usage : \n");
|
||||
usage(exeName);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int main (int argc, const char** argv)
|
||||
{
|
||||
int recursiveMode = 0;
|
||||
int nbRounds = BENCH_TIME_DEFAULT_S;
|
||||
const char* const exeName = argv[0];
|
||||
|
||||
if (argc < 2) return bad_usage(exeName);
|
||||
|
||||
const char** nameTable = (const char**)malloc(argc * sizeof(const char*));
|
||||
assert(nameTable != NULL);
|
||||
unsigned nameIdx = 0;
|
||||
|
||||
const char* dictionary = NULL;
|
||||
int cLevel = CLEVEL_DEFAULT;
|
||||
size_t blockSize = BLOCKSIZE_DEFAULT;
|
||||
unsigned nbDicts = 0; /* determine nbDicts automatically: 1 dictionary per block */
|
||||
unsigned nbBlocks = 0; /* determine nbBlocks automatically, from source and blockSize */
|
||||
|
||||
for (int argNb = 1; argNb < argc ; argNb++) {
|
||||
const char* argument = argv[argNb];
|
||||
if (!strcmp(argument, "-h")) { free(nameTable); return usage(exeName); }
|
||||
if (!strcmp(argument, "-r")) { recursiveMode = 1; continue; }
|
||||
if (!strcmp(argument, "-D")) { argNb++; assert(argNb < argc); dictionary = argv[argNb]; continue; }
|
||||
if (longCommandWArg(&argument, "-i")) { nbRounds = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--dictionary=")) { dictionary = argument; continue; }
|
||||
if (longCommandWArg(&argument, "-B")) { blockSize = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--blockSize=")) { blockSize = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--nbDicts=")) { nbDicts = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--nbBlocks=")) { nbBlocks = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--clevel=")) { cLevel = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "-")) { cLevel = readU32FromChar(&argument); continue; }
|
||||
/* anything that's not a command is a filename */
|
||||
nameTable[nameIdx++] = argument;
|
||||
}
|
||||
|
||||
const char** filenameTable = nameTable;
|
||||
unsigned nbFiles = nameIdx;
|
||||
char* buffer_containing_filenames = NULL;
|
||||
|
||||
if (recursiveMode) {
|
||||
#ifndef UTIL_HAS_CREATEFILELIST
|
||||
assert(0); /* missing capability, do not run */
|
||||
#endif
|
||||
filenameTable = UTIL_createFileList(nameTable, nameIdx, &buffer_containing_filenames, &nbFiles, 1 /* follow_links */);
|
||||
}
|
||||
|
||||
int result = bench(filenameTable, nbFiles, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds);
|
||||
|
||||
free(buffer_containing_filenames);
|
||||
free(nameTable);
|
||||
|
||||
return result;
|
||||
}
|
6
contrib/premake/premake4.lua
Normal file
6
contrib/premake/premake4.lua
Normal file
@ -0,0 +1,6 @@
|
||||
-- Include zstd.lua in your GENie or premake4 file, which exposes a project_zstd function
|
||||
dofile('zstd.lua')
|
||||
|
||||
solution 'example'
|
||||
configurations { 'Debug', 'Release' }
|
||||
project_zstd('../../lib/')
|
80
contrib/premake/zstd.lua
Normal file
80
contrib/premake/zstd.lua
Normal file
@ -0,0 +1,80 @@
|
||||
-- This GENie/premake file copies the behavior of the Makefile in the lib folder.
|
||||
-- Basic usage: project_zstd(ZSTD_DIR)
|
||||
|
||||
function project_zstd(dir, compression, decompression, deprecated, dictbuilder, legacy)
|
||||
if compression == nil then compression = true end
|
||||
if decompression == nil then decompression = true end
|
||||
if deprecated == nil then deprecated = false end
|
||||
if dictbuilder == nil then dictbuilder = false end
|
||||
|
||||
if legacy == nil then legacy = 0 end
|
||||
|
||||
if not compression then
|
||||
dictbuilder = false
|
||||
deprecated = false
|
||||
end
|
||||
|
||||
if not decompression then
|
||||
legacy = 0
|
||||
deprecated = false
|
||||
end
|
||||
|
||||
project 'zstd'
|
||||
kind 'StaticLib'
|
||||
language 'C'
|
||||
|
||||
files {
|
||||
dir .. 'zstd.h',
|
||||
dir .. 'common/**.c',
|
||||
dir .. 'common/**.h'
|
||||
}
|
||||
|
||||
if compression then
|
||||
files {
|
||||
dir .. 'compress/**.c',
|
||||
dir .. 'compress/**.h'
|
||||
}
|
||||
end
|
||||
|
||||
if decompression then
|
||||
files {
|
||||
dir .. 'decompress/**.c',
|
||||
dir .. 'decompress/**.h'
|
||||
}
|
||||
end
|
||||
|
||||
if dictbuilder then
|
||||
files {
|
||||
dir .. 'dictBuilder/**.c',
|
||||
dir .. 'dictBuilder/**.h'
|
||||
}
|
||||
end
|
||||
|
||||
if deprecated then
|
||||
files {
|
||||
dir .. 'deprecated/**.c',
|
||||
dir .. 'deprecated/**.h'
|
||||
}
|
||||
end
|
||||
|
||||
if legacy ~= 0 then
|
||||
if legacy >= 8 then
|
||||
files {
|
||||
dir .. 'legacy/zstd_v0' .. (legacy - 7) .. '.*'
|
||||
}
|
||||
end
|
||||
includedirs {
|
||||
dir .. 'legacy'
|
||||
}
|
||||
end
|
||||
|
||||
includedirs {
|
||||
dir,
|
||||
dir .. 'common'
|
||||
}
|
||||
|
||||
defines {
|
||||
'XXH_NAMESPACE=ZSTD_',
|
||||
'ZSTD_LEGACY_SUPPORT=' .. legacy
|
||||
}
|
||||
end
|
@ -190,13 +190,15 @@ $(ZSTDDIR)/libzstd.a: $(ZSTD_FILES)
|
||||
CFLAGS="$(ALL_CFLAGS)" LDFLAGS="$(ALL_LDFLAGS)" $(MAKE) -C $(ZSTDDIR) libzstd.a
|
||||
|
||||
# Rules to build the tests
|
||||
test/RoundTripTest$(EXT): test/RoundTripTest.o $(PROGDIR)/datagen.o Options.o \
|
||||
test/RoundTripTest$(EXT): test/RoundTripTest.o $(PROGDIR)/datagen.o \
|
||||
$(PROGDIR)/util.o Options.o \
|
||||
Pzstd.o SkippableFrame.o $(ZSTDDIR)/libzstd.a
|
||||
$(LD_COMMAND)
|
||||
|
||||
test/%Test$(EXT): PZSTD_LDFLAGS += $(GTEST_LIB)
|
||||
test/%Test$(EXT): LIBS += -lgtest -lgtest_main
|
||||
test/%Test$(EXT): test/%Test.o $(PROGDIR)/datagen.o Options.o Pzstd.o \
|
||||
test/%Test$(EXT): test/%Test.o $(PROGDIR)/datagen.o \
|
||||
$(PROGDIR)/util.o Options.o Pzstd.o \
|
||||
SkippableFrame.o $(ZSTDDIR)/libzstd.a
|
||||
$(LD_COMMAND)
|
||||
|
||||
|
@ -55,7 +55,7 @@ static std::uint64_t handleOneInput(const Options &options,
|
||||
SharedState& state) {
|
||||
auto inputSize = fileSizeOrZero(inputFile);
|
||||
// WorkQueue outlives ThreadPool so in the case of error we are certain
|
||||
// we don't accidently try to call push() on it after it is destroyed
|
||||
// we don't accidentally try to call push() on it after it is destroyed
|
||||
WorkQueue<std::shared_ptr<BufferWorkQueue>> outs{options.numThreads + 1};
|
||||
std::uint64_t bytesRead;
|
||||
std::uint64_t bytesWritten;
|
||||
|
@ -9,7 +9,7 @@
|
||||
|
||||
/**
|
||||
* A subset of `folly/Range.h`.
|
||||
* All code copied verbatiam modulo formatting
|
||||
* All code copied verbatim modulo formatting
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
|
@ -54,7 +54,7 @@ class ResourcePool {
|
||||
|
||||
/**
|
||||
* @returns A unique pointer to a resource. The resource is null iff
|
||||
* there are no avaiable resources and `factory()` returns null.
|
||||
* there are no available resources and `factory()` returns null.
|
||||
*/
|
||||
UniquePtr get() {
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
|
28
contrib/snap/snapcraft.yaml
Normal file
28
contrib/snap/snapcraft.yaml
Normal file
@ -0,0 +1,28 @@
|
||||
name: zstd
|
||||
version: git
|
||||
summary: Zstandard - Fast real-time compression algorithm
|
||||
description: |
|
||||
Zstandard, or zstd as short version, is a fast lossless compression
|
||||
algorithm, targeting real-time compression scenarios at zlib-level and better
|
||||
compression ratios. It's backed by a very fast entropy stage, provided by
|
||||
Huff0 and FSE library
|
||||
|
||||
grade: devel # must be 'stable' to release into candidate/stable channels
|
||||
confinement: devmode # use 'strict' once you have the right plugs and slots
|
||||
|
||||
apps:
|
||||
zstd:
|
||||
command: usr/local/bin/zstd
|
||||
plugs: [home, removable-media]
|
||||
zstdgrep:
|
||||
command: usr/local/bin/zstdgrep
|
||||
plugs: [home, removable-media]
|
||||
zstdless:
|
||||
command: usr/local/bin/zstdless
|
||||
plugs: [home, removable-media]
|
||||
|
||||
parts:
|
||||
zstd:
|
||||
source: .
|
||||
plugin: make
|
||||
build-packages: [g++]
|
@ -12,8 +12,8 @@ __`zstd_compression_format.md`__ : This document defines the Zstandard compressi
|
||||
Compliant decoders must adhere to this document,
|
||||
and compliant encoders must generate data that follows it.
|
||||
|
||||
Should you look for ressources to develop your own port of Zstandard algorithm,
|
||||
you may find the following ressources useful :
|
||||
Should you look for resources to develop your own port of Zstandard algorithm,
|
||||
you may find the following resources useful :
|
||||
|
||||
__`educational_decoder`__ : This directory contains an implementation of a Zstandard decoder,
|
||||
compliant with the Zstandard compression format.
|
||||
|
@ -7,7 +7,7 @@ CPPFLAGS += -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
|
||||
CFLAGS ?= -O3
|
||||
CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
|
||||
-Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
|
||||
-Wstrict-prototypes -Wundef -Wformat-security \
|
||||
-Wstrict-prototypes -Wundef \
|
||||
-Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
|
||||
-Wredundant-decls
|
||||
CFLAGS += $(DEBUGFLAGS)
|
||||
|
@ -358,7 +358,7 @@ static u32 copy_literals(const size_t seq, istream_t *litstream,
|
||||
ostream_t *const out);
|
||||
|
||||
// Given an offset code from a sequence command (either an actual offset value
|
||||
// or an index for previous offset), computes the correct offset and udpates
|
||||
// or an index for previous offset), computes the correct offset and updates
|
||||
// the offset history
|
||||
static size_t compute_offset(sequence_command_t seq, u64 *const offset_hist);
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
15
examples/.gitignore
vendored
Normal file
15
examples/.gitignore
vendored
Normal file
@ -0,0 +1,15 @@
|
||||
#build
|
||||
simple_compression
|
||||
simple_decompression
|
||||
multiple_simple_compression
|
||||
dictionary_compression
|
||||
dictionary_decompression
|
||||
streaming_compression
|
||||
streaming_decompression
|
||||
multiple_streaming_compression
|
||||
streaming_memory_usage
|
||||
|
||||
#test artefact
|
||||
tmp*
|
||||
test*
|
||||
*.zst
|
90
examples/Makefile
Normal file
90
examples/Makefile
Normal file
@ -0,0 +1,90 @@
|
||||
# ################################################################
|
||||
# Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under both the BSD-style license (found in the
|
||||
# LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
# in the COPYING file in the root directory of this source tree).
|
||||
# ################################################################
|
||||
|
||||
# This Makefile presumes libzstd is installed, using `sudo make install`
|
||||
|
||||
CPPFLAGS += -I../lib
|
||||
LIB = ../lib/libzstd.a
|
||||
|
||||
.PHONY: default all clean test
|
||||
|
||||
default: all
|
||||
|
||||
all: simple_compression simple_decompression \
|
||||
multiple_simple_compression\
|
||||
dictionary_compression dictionary_decompression \
|
||||
streaming_compression streaming_decompression \
|
||||
multiple_streaming_compression streaming_memory_usage
|
||||
|
||||
$(LIB) :
|
||||
$(MAKE) -C ../lib libzstd.a
|
||||
|
||||
simple_compression : simple_compression.c common.h $(LIB)
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $< $(LIB) $(LDFLAGS) -o $@
|
||||
|
||||
simple_decompression : simple_decompression.c common.h $(LIB)
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $< $(LIB) $(LDFLAGS) -o $@
|
||||
|
||||
multiple_simple_compression : multiple_simple_compression.c common.h $(LIB)
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $< $(LIB) $(LDFLAGS) -o $@
|
||||
|
||||
dictionary_compression : dictionary_compression.c common.h $(LIB)
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $< $(LIB) $(LDFLAGS) -o $@
|
||||
|
||||
dictionary_decompression : dictionary_decompression.c common.h $(LIB)
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $< $(LIB) $(LDFLAGS) -o $@
|
||||
|
||||
streaming_compression : streaming_compression.c common.h $(LIB)
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $< $(LIB) $(LDFLAGS) -o $@
|
||||
|
||||
multiple_streaming_compression : multiple_streaming_compression.c common.h $(LIB)
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $< $(LIB) $(LDFLAGS) -o $@
|
||||
|
||||
streaming_decompression : streaming_decompression.c common.h $(LIB)
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $< $(LIB) $(LDFLAGS) -o $@
|
||||
|
||||
streaming_memory_usage : streaming_memory_usage.c $(LIB)
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $< $(LIB) $(LDFLAGS) -o $@
|
||||
|
||||
clean:
|
||||
@rm -f core *.o tmp* result* *.zst \
|
||||
simple_compression simple_decompression \
|
||||
multiple_simple_compression \
|
||||
dictionary_compression dictionary_decompression \
|
||||
streaming_compression streaming_decompression \
|
||||
multiple_streaming_compression streaming_memory_usage
|
||||
@echo Cleaning completed
|
||||
|
||||
test: all
|
||||
cp README.md tmp
|
||||
cp Makefile tmp2
|
||||
@echo -- Simple compression tests
|
||||
./simple_compression tmp
|
||||
./simple_decompression tmp.zst
|
||||
./multiple_simple_compression *.c
|
||||
./streaming_decompression tmp.zst > /dev/null
|
||||
@echo -- Streaming memory usage
|
||||
./streaming_memory_usage
|
||||
@echo -- Streaming compression tests
|
||||
./streaming_compression tmp
|
||||
./streaming_decompression tmp.zst > /dev/null
|
||||
@echo -- Edge cases detection
|
||||
! ./streaming_decompression tmp # invalid input, must fail
|
||||
! ./simple_decompression tmp # invalid input, must fail
|
||||
! ./simple_decompression tmp.zst # unknown input size, must fail
|
||||
touch tmpNull # create 0-size file
|
||||
./simple_compression tmpNull
|
||||
./simple_decompression tmpNull.zst # 0-size frame : must work
|
||||
@echo -- Multiple streaming tests
|
||||
./multiple_streaming_compression *.c
|
||||
@echo -- Dictionary compression tests
|
||||
./dictionary_compression tmp2 tmp README.md
|
||||
./dictionary_decompression tmp2.zst tmp.zst README.md
|
||||
$(RM) tmp* *.zst
|
||||
@echo tests completed
|
46
examples/README.md
Normal file
46
examples/README.md
Normal file
@ -0,0 +1,46 @@
|
||||
Zstandard library : usage examples
|
||||
==================================
|
||||
|
||||
- [Simple compression](simple_compression.c) :
|
||||
Compress a single file.
|
||||
Introduces usage of : `ZSTD_compress()`
|
||||
|
||||
- [Simple decompression](simple_decompression.c) :
|
||||
Decompress a single file.
|
||||
Only compatible with simple compression.
|
||||
Result remains in memory.
|
||||
Introduces usage of : `ZSTD_decompress()`
|
||||
|
||||
- [Multiple simple compression](multiple_simple_compression.c) :
|
||||
Compress multiple files (in simple mode) in a single command line.
|
||||
Demonstrates memory preservation technique that
|
||||
minimizes malloc()/free() calls by re-using existing resources.
|
||||
Introduces usage of : `ZSTD_compressCCtx()`
|
||||
|
||||
- [Streaming memory usage](streaming_memory_usage.c) :
|
||||
Provides amount of memory used by streaming context.
|
||||
Introduces usage of : `ZSTD_sizeof_CStream()`
|
||||
|
||||
- [Streaming compression](streaming_compression.c) :
|
||||
Compress a single file.
|
||||
Introduces usage of : `ZSTD_compressStream()`
|
||||
|
||||
- [Multiple Streaming compression](multiple_streaming_compression.c) :
|
||||
Compress multiple files (in streaming mode) in a single command line.
|
||||
Introduces memory usage preservation technique,
|
||||
reducing impact of malloc()/free() and memset() by re-using existing resources.
|
||||
|
||||
- [Streaming decompression](streaming_decompression.c) :
|
||||
Decompress a single file compressed by zstd.
|
||||
Compatible with both simple and streaming compression.
|
||||
Result is sent to stdout.
|
||||
Introduces usage of : `ZSTD_decompressStream()`
|
||||
|
||||
- [Dictionary compression](dictionary_compression.c) :
|
||||
Compress multiple files using the same dictionary.
|
||||
Introduces usage of : `ZSTD_createCDict()` and `ZSTD_compress_usingCDict()`
|
||||
|
||||
- [Dictionary decompression](dictionary_decompression.c) :
|
||||
Decompress multiple files using the same dictionary.
|
||||
Result remains in memory.
|
||||
Introduces usage of : `ZSTD_createDDict()` and `ZSTD_decompress_usingDDict()`
|
234
examples/common.h
Normal file
234
examples/common.h
Normal file
@ -0,0 +1,234 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This header file has common utility functions used in examples.
|
||||
*/
|
||||
#ifndef COMMON_H
|
||||
#define COMMON_H
|
||||
|
||||
#include <stdlib.h> // malloc, free, exit
|
||||
#include <stdio.h> // fprintf, perror, fopen, etc.
|
||||
#include <string.h> // strerror
|
||||
#include <errno.h> // errno
|
||||
#include <sys/stat.h> // stat
|
||||
#include <zstd.h>
|
||||
|
||||
/*
|
||||
* Define the returned error code from utility functions.
|
||||
*/
|
||||
typedef enum {
|
||||
ERROR_fsize = 1,
|
||||
ERROR_fopen = 2,
|
||||
ERROR_fclose = 3,
|
||||
ERROR_fread = 4,
|
||||
ERROR_fwrite = 5,
|
||||
ERROR_loadFile = 6,
|
||||
ERROR_saveFile = 7,
|
||||
ERROR_malloc = 8,
|
||||
ERROR_largeFile = 9,
|
||||
} COMMON_ErrorCode;
|
||||
|
||||
/*! CHECK
|
||||
* Check that the condition holds. If it doesn't print a message and die.
|
||||
*/
|
||||
#define CHECK(cond, ...) \
|
||||
do { \
|
||||
if (!(cond)) { \
|
||||
fprintf(stderr, \
|
||||
"%s:%d CHECK(%s) failed: ", \
|
||||
__FILE__, \
|
||||
__LINE__, \
|
||||
#cond); \
|
||||
fprintf(stderr, "" __VA_ARGS__); \
|
||||
fprintf(stderr, "\n"); \
|
||||
exit(1); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/*! CHECK_ZSTD
|
||||
* Check the zstd error code and die if an error occurred after printing a
|
||||
* message.
|
||||
*/
|
||||
#define CHECK_ZSTD(fn, ...) \
|
||||
do { \
|
||||
size_t const err = (fn); \
|
||||
CHECK(!ZSTD_isError(err), "%s", ZSTD_getErrorName(err)); \
|
||||
} while (0)
|
||||
|
||||
/*! fsize_orDie() :
|
||||
* Get the size of a given file path.
|
||||
*
|
||||
* @return The size of a given file path.
|
||||
*/
|
||||
static size_t fsize_orDie(const char *filename)
|
||||
{
|
||||
struct stat st;
|
||||
if (stat(filename, &st) != 0) {
|
||||
/* error */
|
||||
perror(filename);
|
||||
exit(ERROR_fsize);
|
||||
}
|
||||
|
||||
off_t const fileSize = st.st_size;
|
||||
size_t const size = (size_t)fileSize;
|
||||
/* 1. fileSize should be non-negative,
|
||||
* 2. if off_t -> size_t type conversion results in discrepancy,
|
||||
* the file size is too large for type size_t.
|
||||
*/
|
||||
if ((fileSize < 0) || (fileSize != (off_t)size)) {
|
||||
fprintf(stderr, "%s : filesize too large \n", filename);
|
||||
exit(ERROR_largeFile);
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
/*! fopen_orDie() :
|
||||
* Open a file using given file path and open option.
|
||||
*
|
||||
* @return If successful this function will return a FILE pointer to an
|
||||
* opened file otherwise it sends an error to stderr and exits.
|
||||
*/
|
||||
static FILE* fopen_orDie(const char *filename, const char *instruction)
|
||||
{
|
||||
FILE* const inFile = fopen(filename, instruction);
|
||||
if (inFile) return inFile;
|
||||
/* error */
|
||||
perror(filename);
|
||||
exit(ERROR_fopen);
|
||||
}
|
||||
|
||||
/*! fclose_orDie() :
|
||||
* Close an opened file using given FILE pointer.
|
||||
*/
|
||||
static void fclose_orDie(FILE* file)
|
||||
{
|
||||
if (!fclose(file)) { return; };
|
||||
/* error */
|
||||
perror("fclose");
|
||||
exit(ERROR_fclose);
|
||||
}
|
||||
|
||||
/*! fread_orDie() :
|
||||
*
|
||||
* Read sizeToRead bytes from a given file, storing them at the
|
||||
* location given by buffer.
|
||||
*
|
||||
* @return The number of bytes read.
|
||||
*/
|
||||
static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file)
|
||||
{
|
||||
size_t const readSize = fread(buffer, 1, sizeToRead, file);
|
||||
if (readSize == sizeToRead) return readSize; /* good */
|
||||
if (feof(file)) return readSize; /* good, reached end of file */
|
||||
/* error */
|
||||
perror("fread");
|
||||
exit(ERROR_fread);
|
||||
}
|
||||
|
||||
/*! fwrite_orDie() :
|
||||
*
|
||||
* Write sizeToWrite bytes to a file pointed to by file, obtaining
|
||||
* them from a location given by buffer.
|
||||
*
|
||||
* Note: This function will send an error to stderr and exit if it
|
||||
* cannot write data to the given file pointer.
|
||||
*
|
||||
* @return The number of bytes written.
|
||||
*/
|
||||
static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file)
|
||||
{
|
||||
size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file);
|
||||
if (writtenSize == sizeToWrite) return sizeToWrite; /* good */
|
||||
/* error */
|
||||
perror("fwrite");
|
||||
exit(ERROR_fwrite);
|
||||
}
|
||||
|
||||
/*! malloc_orDie() :
|
||||
* Allocate memory.
|
||||
*
|
||||
* @return If successful this function returns a pointer to allo-
|
||||
* cated memory. If there is an error, this function will send that
|
||||
* error to stderr and exit.
|
||||
*/
|
||||
static void* malloc_orDie(size_t size)
|
||||
{
|
||||
void* const buff = malloc(size);
|
||||
if (buff) return buff;
|
||||
/* error */
|
||||
perror("malloc");
|
||||
exit(ERROR_malloc);
|
||||
}
|
||||
|
||||
/*! loadFile_orDie() :
|
||||
* load file into buffer (memory).
|
||||
*
|
||||
* Note: This function will send an error to stderr and exit if it
|
||||
* cannot read data from the given file path.
|
||||
*
|
||||
* @return If successful this function will load file into buffer and
|
||||
* return file size, otherwise it will printout an error to stderr and exit.
|
||||
*/
|
||||
static size_t loadFile_orDie(const char* fileName, void* buffer, size_t bufferSize)
|
||||
{
|
||||
size_t const fileSize = fsize_orDie(fileName);
|
||||
CHECK(fileSize <= bufferSize, "File too large!");
|
||||
|
||||
FILE* const inFile = fopen_orDie(fileName, "rb");
|
||||
size_t const readSize = fread(buffer, 1, fileSize, inFile);
|
||||
if (readSize != (size_t)fileSize) {
|
||||
fprintf(stderr, "fread: %s : %s \n", fileName, strerror(errno));
|
||||
exit(ERROR_fread);
|
||||
}
|
||||
fclose(inFile); /* can't fail, read only */
|
||||
return fileSize;
|
||||
}
|
||||
|
||||
/*! mallocAndLoadFile_orDie() :
|
||||
* allocate memory buffer and then load file into it.
|
||||
*
|
||||
* Note: This function will send an error to stderr and exit if memory allocation
|
||||
* fails or it cannot read data from the given file path.
|
||||
*
|
||||
* @return If successful this function will return buffer and bufferSize(=fileSize),
|
||||
* otherwise it will printout an error to stderr and exit.
|
||||
*/
|
||||
static void* mallocAndLoadFile_orDie(const char* fileName, size_t* bufferSize) {
|
||||
size_t const fileSize = fsize_orDie(fileName);
|
||||
*bufferSize = fileSize;
|
||||
void* const buffer = malloc_orDie(*bufferSize);
|
||||
loadFile_orDie(fileName, buffer, *bufferSize);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
/*! saveFile_orDie() :
|
||||
*
|
||||
* Save buffSize bytes to a given file path, obtaining them from a location pointed
|
||||
* to by buff.
|
||||
*
|
||||
* Note: This function will send an error to stderr and exit if it
|
||||
* cannot write to a given file.
|
||||
*/
|
||||
static void saveFile_orDie(const char* fileName, const void* buff, size_t buffSize)
|
||||
{
|
||||
FILE* const oFile = fopen_orDie(fileName, "wb");
|
||||
size_t const wSize = fwrite(buff, 1, buffSize, oFile);
|
||||
if (wSize != (size_t)buffSize) {
|
||||
fprintf(stderr, "fwrite: %s : %s \n", fileName, strerror(errno));
|
||||
exit(ERROR_fwrite);
|
||||
}
|
||||
if (fclose(oFile)) {
|
||||
perror(fileName);
|
||||
exit(ERROR_fclose);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
97
examples/dictionary_compression.c
Normal file
97
examples/dictionary_compression.c
Normal file
@ -0,0 +1,97 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
#include <stdio.h> // printf
|
||||
#include <stdlib.h> // free
|
||||
#include <string.h> // memset, strcat
|
||||
#include <zstd.h> // presumes zstd library is installed
|
||||
#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD()
|
||||
|
||||
/* createDict() :
|
||||
`dictFileName` is supposed to have been created using `zstd --train` */
|
||||
static ZSTD_CDict* createCDict_orDie(const char* dictFileName, int cLevel)
|
||||
{
|
||||
size_t dictSize;
|
||||
printf("loading dictionary %s \n", dictFileName);
|
||||
void* const dictBuffer = mallocAndLoadFile_orDie(dictFileName, &dictSize);
|
||||
ZSTD_CDict* const cdict = ZSTD_createCDict(dictBuffer, dictSize, cLevel);
|
||||
CHECK(cdict != NULL, "ZSTD_createCDict() failed!");
|
||||
free(dictBuffer);
|
||||
return cdict;
|
||||
}
|
||||
|
||||
|
||||
static void compress(const char* fname, const char* oname, const ZSTD_CDict* cdict)
|
||||
{
|
||||
size_t fSize;
|
||||
void* const fBuff = mallocAndLoadFile_orDie(fname, &fSize);
|
||||
size_t const cBuffSize = ZSTD_compressBound(fSize);
|
||||
void* const cBuff = malloc_orDie(cBuffSize);
|
||||
|
||||
/* Compress using the dictionary.
|
||||
* This function writes the dictionary id, and content size into the header.
|
||||
* But, it doesn't use a checksum. You can control these options using the
|
||||
* advanced API: ZSTD_CCtx_setParameter(), ZSTD_CCtx_refCDict(),
|
||||
* and ZSTD_compress2().
|
||||
*/
|
||||
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
|
||||
CHECK(cctx != NULL, "ZSTD_createCCtx() failed!");
|
||||
size_t const cSize = ZSTD_compress_usingCDict(cctx, cBuff, cBuffSize, fBuff, fSize, cdict);
|
||||
CHECK_ZSTD(cSize);
|
||||
|
||||
saveFile_orDie(oname, cBuff, cSize);
|
||||
|
||||
/* success */
|
||||
printf("%25s : %6u -> %7u - %s \n", fname, (unsigned)fSize, (unsigned)cSize, oname);
|
||||
|
||||
ZSTD_freeCCtx(cctx); /* never fails */
|
||||
free(fBuff);
|
||||
free(cBuff);
|
||||
}
|
||||
|
||||
|
||||
static char* createOutFilename_orDie(const char* filename)
|
||||
{
|
||||
size_t const inL = strlen(filename);
|
||||
size_t const outL = inL + 5;
|
||||
void* outSpace = malloc_orDie(outL);
|
||||
memset(outSpace, 0, outL);
|
||||
strcat(outSpace, filename);
|
||||
strcat(outSpace, ".zst");
|
||||
return (char*)outSpace;
|
||||
}
|
||||
|
||||
int main(int argc, const char** argv)
|
||||
{
|
||||
const char* const exeName = argv[0];
|
||||
int const cLevel = 3;
|
||||
|
||||
if (argc<3) {
|
||||
fprintf(stderr, "wrong arguments\n");
|
||||
fprintf(stderr, "usage:\n");
|
||||
fprintf(stderr, "%s [FILES] dictionary\n", exeName);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* load dictionary only once */
|
||||
const char* const dictName = argv[argc-1];
|
||||
ZSTD_CDict* const dictPtr = createCDict_orDie(dictName, cLevel);
|
||||
|
||||
int u;
|
||||
for (u=1; u<argc-1; u++) {
|
||||
const char* inFilename = argv[u];
|
||||
char* const outFilename = createOutFilename_orDie(inFilename);
|
||||
compress(inFilename, outFilename, dictPtr);
|
||||
free(outFilename);
|
||||
}
|
||||
|
||||
ZSTD_freeCDict(dictPtr);
|
||||
printf("All %u files compressed. \n", argc-2);
|
||||
return 0;
|
||||
}
|
99
examples/dictionary_decompression.c
Normal file
99
examples/dictionary_decompression.c
Normal file
@ -0,0 +1,99 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdio.h> // printf
|
||||
#include <stdlib.h> // free
|
||||
#include <zstd.h> // presumes zstd library is installed
|
||||
#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD()
|
||||
|
||||
/* createDict() :
|
||||
`dictFileName` is supposed to have been created using `zstd --train` */
|
||||
static ZSTD_DDict* createDict_orDie(const char* dictFileName)
|
||||
{
|
||||
size_t dictSize;
|
||||
printf("loading dictionary %s \n", dictFileName);
|
||||
void* const dictBuffer = mallocAndLoadFile_orDie(dictFileName, &dictSize);
|
||||
ZSTD_DDict* const ddict = ZSTD_createDDict(dictBuffer, dictSize);
|
||||
CHECK(ddict != NULL, "ZSTD_createDDict() failed!");
|
||||
free(dictBuffer);
|
||||
return ddict;
|
||||
}
|
||||
|
||||
static void decompress(const char* fname, const ZSTD_DDict* ddict)
|
||||
{
|
||||
size_t cSize;
|
||||
void* const cBuff = mallocAndLoadFile_orDie(fname, &cSize);
|
||||
/* Read the content size from the frame header. For simplicity we require
|
||||
* that it is always present. By default, zstd will write the content size
|
||||
* in the header when it is known. If you can't guarantee that the frame
|
||||
* content size is always written into the header, either use streaming
|
||||
* decompression, or ZSTD_decompressBound().
|
||||
*/
|
||||
unsigned long long const rSize = ZSTD_getFrameContentSize(cBuff, cSize);
|
||||
CHECK(rSize != ZSTD_CONTENTSIZE_ERROR, "%s: not compressed by zstd!", fname);
|
||||
CHECK(rSize != ZSTD_CONTENTSIZE_UNKNOWN, "%s: original size unknown!", fname);
|
||||
void* const rBuff = malloc_orDie((size_t)rSize);
|
||||
|
||||
/* Check that the dictionary ID matches.
|
||||
* If a non-zstd dictionary is used, then both will be zero.
|
||||
* By default zstd always writes the dictionary ID into the frame.
|
||||
* Zstd will check if there is a dictionary ID mismatch as well.
|
||||
*/
|
||||
unsigned const expectedDictID = ZSTD_getDictID_fromDDict(ddict);
|
||||
unsigned const actualDictID = ZSTD_getDictID_fromFrame(cBuff, cSize);
|
||||
CHECK(actualDictID == expectedDictID,
|
||||
"DictID mismatch: expected %u got %u",
|
||||
expectedDictID,
|
||||
actualDictID);
|
||||
|
||||
/* Decompress using the dictionary.
|
||||
* If you need to control the decompression parameters, then use the
|
||||
* advanced API: ZSTD_DCtx_setParameter(), ZSTD_DCtx_refDDict(), and
|
||||
* ZSTD_decompressDCtx().
|
||||
*/
|
||||
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
|
||||
CHECK(dctx != NULL, "ZSTD_createDCtx() failed!");
|
||||
size_t const dSize = ZSTD_decompress_usingDDict(dctx, rBuff, rSize, cBuff, cSize, ddict);
|
||||
CHECK_ZSTD(dSize);
|
||||
/* When zstd knows the content size, it will error if it doesn't match. */
|
||||
CHECK(dSize == rSize, "Impossible because zstd will check this condition!");
|
||||
|
||||
/* success */
|
||||
printf("%25s : %6u -> %7u \n", fname, (unsigned)cSize, (unsigned)rSize);
|
||||
|
||||
ZSTD_freeDCtx(dctx);
|
||||
free(rBuff);
|
||||
free(cBuff);
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, const char** argv)
|
||||
{
|
||||
const char* const exeName = argv[0];
|
||||
|
||||
if (argc<3) {
|
||||
printf("wrong arguments\n");
|
||||
printf("usage:\n");
|
||||
printf("%s [FILES] dictionary\n", exeName);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* load dictionary only once */
|
||||
const char* const dictName = argv[argc-1];
|
||||
ZSTD_DDict* const dictPtr = createDict_orDie(dictName);
|
||||
|
||||
int u;
|
||||
for (u=1; u<argc-1; u++) decompress(argv[u], dictPtr);
|
||||
|
||||
ZSTD_freeDDict(dictPtr);
|
||||
printf("All %u files correctly decoded (in memory) \n", argc-2);
|
||||
return 0;
|
||||
}
|
116
examples/multiple_simple_compression.c
Normal file
116
examples/multiple_simple_compression.c
Normal file
@ -0,0 +1,116 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
#include <stdio.h> // printf
|
||||
#include <stdlib.h> // free
|
||||
#include <string.h> // memcpy, strlen
|
||||
#include <zstd.h> // presumes zstd library is installed
|
||||
#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD()
|
||||
|
||||
typedef struct {
|
||||
void* fBuffer;
|
||||
void* cBuffer;
|
||||
size_t fBufferSize;
|
||||
size_t cBufferSize;
|
||||
ZSTD_CCtx* cctx;
|
||||
} resources;
|
||||
|
||||
/*
|
||||
* allocate memory for buffers big enough to compress all files
|
||||
* as well as memory for output file name (ofn)
|
||||
*/
|
||||
static resources createResources_orDie(int argc, const char** argv, char **ofn, size_t* ofnBufferLen)
|
||||
{
|
||||
size_t maxFilenameLength=0;
|
||||
size_t maxFileSize = 0;
|
||||
|
||||
int argNb;
|
||||
for (argNb = 1; argNb < argc; argNb++) {
|
||||
const char* const filename = argv[argNb];
|
||||
size_t const filenameLength = strlen(filename);
|
||||
size_t const fileSize = fsize_orDie(filename);
|
||||
|
||||
if (filenameLength > maxFilenameLength) maxFilenameLength = filenameLength;
|
||||
if (fileSize > maxFileSize) maxFileSize = fileSize;
|
||||
}
|
||||
|
||||
resources ress;
|
||||
ress.fBufferSize = maxFileSize;
|
||||
ress.cBufferSize = ZSTD_compressBound(maxFileSize);
|
||||
|
||||
*ofnBufferLen = maxFilenameLength + 5;
|
||||
*ofn = (char*)malloc_orDie(*ofnBufferLen);
|
||||
ress.fBuffer = malloc_orDie(ress.fBufferSize);
|
||||
ress.cBuffer = malloc_orDie(ress.cBufferSize);
|
||||
ress.cctx = ZSTD_createCCtx();
|
||||
CHECK(ress.cctx != NULL, "ZSTD_createCCtx() failed!");
|
||||
return ress;
|
||||
}
|
||||
|
||||
static void freeResources(resources ress, char *outFilename)
|
||||
{
|
||||
free(ress.fBuffer);
|
||||
free(ress.cBuffer);
|
||||
ZSTD_freeCCtx(ress.cctx); /* never fails */
|
||||
free(outFilename);
|
||||
}
|
||||
|
||||
/* compress with pre-allocated context (ZSTD_CCtx) and input/output buffers*/
|
||||
static void compressFile_orDie(resources ress, const char* fname, const char* oname)
|
||||
{
|
||||
size_t fSize = loadFile_orDie(fname, ress.fBuffer, ress.fBufferSize);
|
||||
|
||||
/* Compress using the context.
|
||||
* If you need more control over parameters, use the advanced API:
|
||||
* ZSTD_CCtx_setParameter(), and ZSTD_compress2().
|
||||
*/
|
||||
size_t const cSize = ZSTD_compressCCtx(ress.cctx, ress.cBuffer, ress.cBufferSize, ress.fBuffer, fSize, 1);
|
||||
CHECK_ZSTD(cSize);
|
||||
|
||||
saveFile_orDie(oname, ress.cBuffer, cSize);
|
||||
|
||||
/* success */
|
||||
printf("%25s : %6u -> %7u - %s \n", fname, (unsigned)fSize, (unsigned)cSize, oname);
|
||||
}
|
||||
|
||||
int main(int argc, const char** argv)
|
||||
{
|
||||
const char* const exeName = argv[0];
|
||||
|
||||
if (argc<2) {
|
||||
printf("wrong arguments\n");
|
||||
printf("usage:\n");
|
||||
printf("%s FILE(s)\n", exeName);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* memory allocation for outFilename and resources */
|
||||
char* outFilename;
|
||||
size_t outFilenameBufferLen;
|
||||
resources const ress = createResources_orDie(argc, argv, &outFilename, &outFilenameBufferLen);
|
||||
|
||||
/* compress files with shared context, input and output buffers */
|
||||
int argNb;
|
||||
for (argNb = 1; argNb < argc; argNb++) {
|
||||
const char* const inFilename = argv[argNb];
|
||||
size_t const inFilenameLen = strlen(inFilename);
|
||||
CHECK(inFilenameLen + 5 <= outFilenameBufferLen, "File name too long!");
|
||||
memcpy(outFilename, inFilename, inFilenameLen);
|
||||
memcpy(outFilename+inFilenameLen, ".zst", 5);
|
||||
compressFile_orDie(ress, inFilename, outFilename);
|
||||
}
|
||||
|
||||
/* free memory */
|
||||
freeResources(ress,outFilename);
|
||||
|
||||
printf("compressed %i files \n", argc-1);
|
||||
|
||||
return 0;
|
||||
}
|
133
examples/multiple_streaming_compression.c
Normal file
133
examples/multiple_streaming_compression.c
Normal file
@ -0,0 +1,133 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
|
||||
/* The objective of this example is to show of to compress multiple successive files
|
||||
* while preserving memory management.
|
||||
* All structures and buffers will be created only once,
|
||||
* and shared across all compression operations */
|
||||
|
||||
#include <stdio.h> // printf
|
||||
#include <stdlib.h> // free
|
||||
#include <string.h> // memset, strcat
|
||||
#include <zstd.h> // presumes zstd library is installed
|
||||
#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD()
|
||||
|
||||
typedef struct {
|
||||
void* buffIn;
|
||||
void* buffOut;
|
||||
size_t buffInSize;
|
||||
size_t buffOutSize;
|
||||
ZSTD_CCtx* cctx;
|
||||
} resources;
|
||||
|
||||
static resources createResources_orDie(int cLevel)
|
||||
{
|
||||
resources ress;
|
||||
ress.buffInSize = ZSTD_CStreamInSize(); /* can always read one full block */
|
||||
ress.buffOutSize= ZSTD_CStreamOutSize(); /* can always flush a full block */
|
||||
ress.buffIn = malloc_orDie(ress.buffInSize);
|
||||
ress.buffOut= malloc_orDie(ress.buffOutSize);
|
||||
ress.cctx = ZSTD_createCCtx();
|
||||
CHECK(ress.cctx != NULL, "ZSTD_createCCtx() failed!");
|
||||
|
||||
/* Set any compression parameters you want here.
|
||||
* They will persist for every compression operation.
|
||||
* Here we set the compression level, and enable the checksum.
|
||||
*/
|
||||
CHECK_ZSTD( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, cLevel) );
|
||||
CHECK_ZSTD( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_checksumFlag, 1) );
|
||||
return ress;
|
||||
}
|
||||
|
||||
static void freeResources(resources ress)
|
||||
{
|
||||
ZSTD_freeCCtx(ress.cctx);
|
||||
free(ress.buffIn);
|
||||
free(ress.buffOut);
|
||||
}
|
||||
|
||||
static void compressFile_orDie(resources ress, const char* fname, const char* outName)
|
||||
{
|
||||
// Open the input and output files.
|
||||
FILE* const fin = fopen_orDie(fname, "rb");
|
||||
FILE* const fout = fopen_orDie(outName, "wb");
|
||||
|
||||
/* Reset the context to a clean state to start a new compression operation.
|
||||
* The parameters are sticky, so we keep the compression level and extra
|
||||
* parameters that we set in createResources_orDie().
|
||||
*/
|
||||
CHECK_ZSTD( ZSTD_CCtx_reset(ress.cctx, ZSTD_reset_session_only) );
|
||||
|
||||
size_t const toRead = ress.buffInSize;
|
||||
size_t read;
|
||||
while ( (read = fread_orDie(ress.buffIn, toRead, fin)) ) {
|
||||
/* This loop is the same as streaming_compression.c.
|
||||
* See that file for detailed comments.
|
||||
*/
|
||||
int const lastChunk = (read < toRead);
|
||||
ZSTD_EndDirective const mode = lastChunk ? ZSTD_e_end : ZSTD_e_continue;
|
||||
|
||||
ZSTD_inBuffer input = { ress.buffIn, read, 0 };
|
||||
int finished;
|
||||
do {
|
||||
ZSTD_outBuffer output = { ress.buffOut, ress.buffOutSize, 0 };
|
||||
size_t const remaining = ZSTD_compressStream2(ress.cctx, &output, &input, mode);
|
||||
CHECK_ZSTD(remaining);
|
||||
fwrite_orDie(ress.buffOut, output.pos, fout);
|
||||
finished = lastChunk ? (remaining == 0) : (input.pos == input.size);
|
||||
} while (!finished);
|
||||
CHECK(input.pos == input.size,
|
||||
"Impossible: zstd only returns 0 when the input is completely consumed!");
|
||||
}
|
||||
|
||||
fclose_orDie(fout);
|
||||
fclose_orDie(fin);
|
||||
}
|
||||
|
||||
int main(int argc, const char** argv)
|
||||
{
|
||||
const char* const exeName = argv[0];
|
||||
|
||||
if (argc<2) {
|
||||
printf("wrong arguments\n");
|
||||
printf("usage:\n");
|
||||
printf("%s FILE(s)\n", exeName);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int const cLevel = 7;
|
||||
resources const ress = createResources_orDie(cLevel);
|
||||
void* ofnBuffer = NULL;
|
||||
size_t ofnbSize = 0;
|
||||
|
||||
int argNb;
|
||||
for (argNb = 1; argNb < argc; argNb++) {
|
||||
const char* const ifn = argv[argNb];
|
||||
size_t const ifnSize = strlen(ifn);
|
||||
size_t const ofnSize = ifnSize + 5;
|
||||
if (ofnbSize <= ofnSize) {
|
||||
ofnbSize = ofnSize + 16;
|
||||
free(ofnBuffer);
|
||||
ofnBuffer = malloc_orDie(ofnbSize);
|
||||
}
|
||||
memset(ofnBuffer, 0, ofnSize);
|
||||
strcat(ofnBuffer, ifn);
|
||||
strcat(ofnBuffer, ".zst");
|
||||
compressFile_orDie(ress, ifn, ofnBuffer);
|
||||
}
|
||||
|
||||
freeResources(ress);
|
||||
free(ofnBuffer);
|
||||
|
||||
printf("compressed %i files \n", argc-1);
|
||||
|
||||
return 0;
|
||||
}
|
68
examples/simple_compression.c
Normal file
68
examples/simple_compression.c
Normal file
@ -0,0 +1,68 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
#include <stdio.h> // printf
|
||||
#include <stdlib.h> // free
|
||||
#include <string.h> // strlen, strcat, memset
|
||||
#include <zstd.h> // presumes zstd library is installed
|
||||
#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD()
|
||||
|
||||
static void compress_orDie(const char* fname, const char* oname)
|
||||
{
|
||||
size_t fSize;
|
||||
void* const fBuff = mallocAndLoadFile_orDie(fname, &fSize);
|
||||
size_t const cBuffSize = ZSTD_compressBound(fSize);
|
||||
void* const cBuff = malloc_orDie(cBuffSize);
|
||||
|
||||
/* Compress.
|
||||
* If you are doing many compressions, you may want to reuse the context.
|
||||
* See the multiple_simple_compression.c example.
|
||||
*/
|
||||
size_t const cSize = ZSTD_compress(cBuff, cBuffSize, fBuff, fSize, 1);
|
||||
CHECK_ZSTD(cSize);
|
||||
|
||||
saveFile_orDie(oname, cBuff, cSize);
|
||||
|
||||
/* success */
|
||||
printf("%25s : %6u -> %7u - %s \n", fname, (unsigned)fSize, (unsigned)cSize, oname);
|
||||
|
||||
free(fBuff);
|
||||
free(cBuff);
|
||||
}
|
||||
|
||||
static char* createOutFilename_orDie(const char* filename)
|
||||
{
|
||||
size_t const inL = strlen(filename);
|
||||
size_t const outL = inL + 5;
|
||||
void* const outSpace = malloc_orDie(outL);
|
||||
memset(outSpace, 0, outL);
|
||||
strcat(outSpace, filename);
|
||||
strcat(outSpace, ".zst");
|
||||
return (char*)outSpace;
|
||||
}
|
||||
|
||||
int main(int argc, const char** argv)
|
||||
{
|
||||
const char* const exeName = argv[0];
|
||||
|
||||
if (argc!=2) {
|
||||
printf("wrong arguments\n");
|
||||
printf("usage:\n");
|
||||
printf("%s FILE\n", exeName);
|
||||
return 1;
|
||||
}
|
||||
|
||||
const char* const inFilename = argv[1];
|
||||
|
||||
char* const outFilename = createOutFilename_orDie(inFilename);
|
||||
compress_orDie(inFilename, outFilename);
|
||||
free(outFilename);
|
||||
return 0;
|
||||
}
|
65
examples/simple_decompression.c
Normal file
65
examples/simple_decompression.c
Normal file
@ -0,0 +1,65 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
#include <stdio.h> // printf
|
||||
#include <stdlib.h> // free
|
||||
#include <zstd.h> // presumes zstd library is installed
|
||||
#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD()
|
||||
|
||||
static void decompress(const char* fname)
|
||||
{
|
||||
size_t cSize;
|
||||
void* const cBuff = mallocAndLoadFile_orDie(fname, &cSize);
|
||||
/* Read the content size from the frame header. For simplicity we require
|
||||
* that it is always present. By default, zstd will write the content size
|
||||
* in the header when it is known. If you can't guarantee that the frame
|
||||
* content size is always written into the header, either use streaming
|
||||
* decompression, or ZSTD_decompressBound().
|
||||
*/
|
||||
unsigned long long const rSize = ZSTD_getFrameContentSize(cBuff, cSize);
|
||||
CHECK(rSize != ZSTD_CONTENTSIZE_ERROR, "%s: not compressed by zstd!", fname);
|
||||
CHECK(rSize != ZSTD_CONTENTSIZE_UNKNOWN, "%s: original size unknown!", fname);
|
||||
|
||||
void* const rBuff = malloc_orDie((size_t)rSize);
|
||||
|
||||
/* Decompress.
|
||||
* If you are doing many decompressions, you may want to reuse the context
|
||||
* and use ZSTD_decompressDCtx(). If you want to set advanced parameters,
|
||||
* use ZSTD_DCtx_setParameter().
|
||||
*/
|
||||
size_t const dSize = ZSTD_decompress(rBuff, rSize, cBuff, cSize);
|
||||
CHECK_ZSTD(dSize);
|
||||
/* When zstd knows the content size, it will error if it doesn't match. */
|
||||
CHECK(dSize == rSize, "Impossible because zstd will check this condition!");
|
||||
|
||||
/* success */
|
||||
printf("%25s : %6u -> %7u \n", fname, (unsigned)cSize, (unsigned)rSize);
|
||||
|
||||
free(rBuff);
|
||||
free(cBuff);
|
||||
}
|
||||
|
||||
int main(int argc, const char** argv)
|
||||
{
|
||||
const char* const exeName = argv[0];
|
||||
|
||||
if (argc!=2) {
|
||||
printf("wrong arguments\n");
|
||||
printf("usage:\n");
|
||||
printf("%s FILE\n", exeName);
|
||||
return 1;
|
||||
}
|
||||
|
||||
decompress(argv[1]);
|
||||
|
||||
printf("%s correctly decoded (in memory). \n", argv[1]);
|
||||
|
||||
return 0;
|
||||
}
|
119
examples/streaming_compression.c
Normal file
119
examples/streaming_compression.c
Normal file
@ -0,0 +1,119 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdio.h> // printf
|
||||
#include <stdlib.h> // free
|
||||
#include <string.h> // memset, strcat, strlen
|
||||
#include <zstd.h> // presumes zstd library is installed
|
||||
#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD()
|
||||
|
||||
|
||||
static void compressFile_orDie(const char* fname, const char* outName, int cLevel)
|
||||
{
|
||||
/* Open the input and output files. */
|
||||
FILE* const fin = fopen_orDie(fname, "rb");
|
||||
FILE* const fout = fopen_orDie(outName, "wb");
|
||||
/* Create the input and output buffers.
|
||||
* They may be any size, but we recommend using these functions to size them.
|
||||
* Performance will only suffer significantly for very tiny buffers.
|
||||
*/
|
||||
size_t const buffInSize = ZSTD_CStreamInSize();
|
||||
void* const buffIn = malloc_orDie(buffInSize);
|
||||
size_t const buffOutSize = ZSTD_CStreamOutSize();
|
||||
void* const buffOut = malloc_orDie(buffOutSize);
|
||||
|
||||
/* Create the context. */
|
||||
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
|
||||
CHECK(cctx != NULL, "ZSTD_createCCtx() failed!");
|
||||
|
||||
/* Set any parameters you want.
|
||||
* Here we set the compression level, and enable the checksum.
|
||||
*/
|
||||
CHECK_ZSTD( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel) );
|
||||
CHECK_ZSTD( ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1) );
|
||||
|
||||
/* This loop read from the input file, compresses that entire chunk,
|
||||
* and writes all output produced to the output file.
|
||||
*/
|
||||
size_t const toRead = buffInSize;
|
||||
size_t read;
|
||||
while ((read = fread_orDie(buffIn, toRead, fin))) {
|
||||
/* Select the flush mode.
|
||||
* If the read may not be finished (read == toRead) we use
|
||||
* ZSTD_e_continue. If this is the last chunk, we use ZSTD_e_end.
|
||||
* Zstd optimizes the case where the first flush mode is ZSTD_e_end,
|
||||
* since it knows it is compressing the entire source in one pass.
|
||||
*/
|
||||
int const lastChunk = (read < toRead);
|
||||
ZSTD_EndDirective const mode = lastChunk ? ZSTD_e_end : ZSTD_e_continue;
|
||||
/* Set the input buffer to what we just read.
|
||||
* We compress until the input buffer is empty, each time flushing the
|
||||
* output.
|
||||
*/
|
||||
ZSTD_inBuffer input = { buffIn, read, 0 };
|
||||
int finished;
|
||||
do {
|
||||
/* Compress into the output buffer and write all of the output to
|
||||
* the file so we can reuse the buffer next iteration.
|
||||
*/
|
||||
ZSTD_outBuffer output = { buffOut, buffOutSize, 0 };
|
||||
size_t const remaining = ZSTD_compressStream2(cctx, &output , &input, mode);
|
||||
CHECK_ZSTD(remaining);
|
||||
fwrite_orDie(buffOut, output.pos, fout);
|
||||
/* If we're on the last chunk we're finished when zstd returns 0,
|
||||
* which means its consumed all the input AND finished the frame.
|
||||
* Otherwise, we're finished when we've consumed all the input.
|
||||
*/
|
||||
finished = lastChunk ? (remaining == 0) : (input.pos == input.size);
|
||||
} while (!finished);
|
||||
CHECK(input.pos == input.size,
|
||||
"Impossible: zstd only returns 0 when the input is completely consumed!");
|
||||
}
|
||||
|
||||
ZSTD_freeCCtx(cctx);
|
||||
fclose_orDie(fout);
|
||||
fclose_orDie(fin);
|
||||
free(buffIn);
|
||||
free(buffOut);
|
||||
}
|
||||
|
||||
|
||||
static char* createOutFilename_orDie(const char* filename)
|
||||
{
|
||||
size_t const inL = strlen(filename);
|
||||
size_t const outL = inL + 5;
|
||||
void* const outSpace = malloc_orDie(outL);
|
||||
memset(outSpace, 0, outL);
|
||||
strcat(outSpace, filename);
|
||||
strcat(outSpace, ".zst");
|
||||
return (char*)outSpace;
|
||||
}
|
||||
|
||||
int main(int argc, const char** argv)
|
||||
{
|
||||
const char* const exeName = argv[0];
|
||||
|
||||
if (argc!=2) {
|
||||
printf("wrong arguments\n");
|
||||
printf("usage:\n");
|
||||
printf("%s FILE\n", exeName);
|
||||
return 1;
|
||||
}
|
||||
|
||||
const char* const inFilename = argv[1];
|
||||
|
||||
char* const outFilename = createOutFilename_orDie(inFilename);
|
||||
compressFile_orDie(inFilename, outFilename, 1);
|
||||
|
||||
free(outFilename); /* not strictly required, since program execution stops there,
|
||||
* but some static analyzer main complain otherwise */
|
||||
return 0;
|
||||
}
|
82
examples/streaming_decompression.c
Normal file
82
examples/streaming_decompression.c
Normal file
@ -0,0 +1,82 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdio.h> // fprintf
|
||||
#include <stdlib.h> // free
|
||||
#include <zstd.h> // presumes zstd library is installed
|
||||
#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD()
|
||||
|
||||
static void decompressFile_orDie(const char* fname)
|
||||
{
|
||||
FILE* const fin = fopen_orDie(fname, "rb");
|
||||
size_t const buffInSize = ZSTD_DStreamInSize();
|
||||
void* const buffIn = malloc_orDie(buffInSize);
|
||||
FILE* const fout = stdout;
|
||||
size_t const buffOutSize = ZSTD_DStreamOutSize(); /* Guarantee to successfully flush at least one complete compressed block in all circumstances. */
|
||||
void* const buffOut = malloc_orDie(buffOutSize);
|
||||
|
||||
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
|
||||
CHECK(dctx != NULL, "ZSTD_createDCtx() failed!");
|
||||
|
||||
/* This loop assumes that the input file is one or more concatenated zstd
|
||||
* streams. This example won't work if there is trailing non-zstd data at
|
||||
* the end, but streaming decompression in general handles this case.
|
||||
* ZSTD_decompressStream() returns 0 exactly when the frame is completed,
|
||||
* and doesn't consume input after the frame.
|
||||
*/
|
||||
size_t const toRead = buffInSize;
|
||||
size_t read;
|
||||
while ( (read = fread_orDie(buffIn, toRead, fin)) ) {
|
||||
ZSTD_inBuffer input = { buffIn, read, 0 };
|
||||
/* Given a valid frame, zstd won't consume the last byte of the frame
|
||||
* until it has flushed all of the decompressed data of the frame.
|
||||
* Therefore, instead of checking if the return code is 0, we can
|
||||
* decompress just check if input.pos < input.size.
|
||||
*/
|
||||
while (input.pos < input.size) {
|
||||
ZSTD_outBuffer output = { buffOut, buffOutSize, 0 };
|
||||
/* The return code is zero if the frame is complete, but there may
|
||||
* be multiple frames concatenated together. Zstd will automatically
|
||||
* reset the context when a frame is complete. Still, calling
|
||||
* ZSTD_DCtx_reset() can be useful to reset the context to a clean
|
||||
* state, for instance if the last decompression call returned an
|
||||
* error.
|
||||
*/
|
||||
size_t const ret = ZSTD_decompressStream(dctx, &output , &input);
|
||||
CHECK_ZSTD(ret);
|
||||
fwrite_orDie(buffOut, output.pos, fout);
|
||||
}
|
||||
}
|
||||
|
||||
ZSTD_freeDCtx(dctx);
|
||||
fclose_orDie(fin);
|
||||
fclose_orDie(fout);
|
||||
free(buffIn);
|
||||
free(buffOut);
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, const char** argv)
|
||||
{
|
||||
const char* const exeName = argv[0];
|
||||
|
||||
if (argc!=2) {
|
||||
fprintf(stderr, "wrong arguments\n");
|
||||
fprintf(stderr, "usage:\n");
|
||||
fprintf(stderr, "%s FILE\n", exeName);
|
||||
return 1;
|
||||
}
|
||||
|
||||
const char* const inFilename = argv[1];
|
||||
|
||||
decompressFile_orDie(inFilename);
|
||||
return 0;
|
||||
}
|
137
examples/streaming_memory_usage.c
Normal file
137
examples/streaming_memory_usage.c
Normal file
@ -0,0 +1,137 @@
|
||||
/*
|
||||
* Copyright (c) 2017-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
|
||||
/*=== Tuning parameter ===*/
|
||||
#ifndef MAX_TESTED_LEVEL
|
||||
#define MAX_TESTED_LEVEL 12
|
||||
#endif
|
||||
|
||||
|
||||
/*=== Dependencies ===*/
|
||||
#include <stdio.h> // printf
|
||||
#define ZSTD_STATIC_LINKING_ONLY
|
||||
#include <zstd.h> // presumes zstd library is installed
|
||||
#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD()
|
||||
|
||||
|
||||
/*=== functions ===*/
|
||||
|
||||
/*! readU32FromChar() :
|
||||
@return : unsigned integer value read from input in `char` format
|
||||
allows and interprets K, KB, KiB, M, MB and MiB suffix.
|
||||
Will also modify `*stringPtr`, advancing it to position where it stopped reading.
|
||||
Note : function result can overflow if digit string > MAX_UINT */
|
||||
static unsigned readU32FromChar(const char** stringPtr)
|
||||
{
|
||||
unsigned result = 0;
|
||||
while ((**stringPtr >='0') && (**stringPtr <='9'))
|
||||
result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
|
||||
if ((**stringPtr=='K') || (**stringPtr=='M')) {
|
||||
result <<= 10;
|
||||
if (**stringPtr=='M') result <<= 10;
|
||||
(*stringPtr)++ ;
|
||||
if (**stringPtr=='i') (*stringPtr)++;
|
||||
if (**stringPtr=='B') (*stringPtr)++;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char const *argv[]) {
|
||||
|
||||
printf("\n Zstandard (v%s) memory usage for streaming : \n\n", ZSTD_versionString());
|
||||
|
||||
unsigned wLog = 0;
|
||||
if (argc > 1) {
|
||||
const char* valStr = argv[1];
|
||||
wLog = readU32FromChar(&valStr);
|
||||
}
|
||||
|
||||
int compressionLevel;
|
||||
for (compressionLevel = 1; compressionLevel <= MAX_TESTED_LEVEL; compressionLevel++) {
|
||||
#define INPUT_SIZE 5
|
||||
#define COMPRESSED_SIZE 128
|
||||
char const dataToCompress[INPUT_SIZE] = "abcde";
|
||||
char compressedData[COMPRESSED_SIZE];
|
||||
char decompressedData[INPUT_SIZE];
|
||||
/* the ZSTD_CCtx_params structure is a way to save parameters and use
|
||||
* them across multiple contexts. We use them here so we can call the
|
||||
* function ZSTD_estimateCStreamSize_usingCCtxParams().
|
||||
*/
|
||||
ZSTD_CCtx_params* const cctxParams = ZSTD_createCCtxParams();
|
||||
CHECK(cctxParams != NULL, "ZSTD_createCCtxParams() failed!");
|
||||
|
||||
/* Set the compression level. */
|
||||
CHECK_ZSTD( ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_compressionLevel, compressionLevel) );
|
||||
/* Set the window log.
|
||||
* The value 0 means use the default window log, which is equivalent to
|
||||
* not setting it.
|
||||
*/
|
||||
CHECK_ZSTD( ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_windowLog, wLog) );
|
||||
|
||||
/* Force the compressor to allocate the maximum memory size for a given
|
||||
* level by not providing the pledged source size, or calling
|
||||
* ZSTD_compressStream2() with ZSTD_e_end.
|
||||
*/
|
||||
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
|
||||
CHECK(cctx != NULL, "ZSTD_createCCtx() failed!");
|
||||
CHECK_ZSTD( ZSTD_CCtx_setParametersUsingCCtxParams(cctx, cctxParams) );
|
||||
size_t compressedSize;
|
||||
{
|
||||
ZSTD_inBuffer inBuff = { dataToCompress, sizeof(dataToCompress), 0 };
|
||||
ZSTD_outBuffer outBuff = { compressedData, sizeof(compressedData), 0 };
|
||||
CHECK_ZSTD( ZSTD_compressStream(cctx, &outBuff, &inBuff) );
|
||||
size_t const remaining = ZSTD_endStream(cctx, &outBuff);
|
||||
CHECK_ZSTD(remaining);
|
||||
CHECK(remaining == 0, "Frame not flushed!");
|
||||
compressedSize = outBuff.pos;
|
||||
}
|
||||
|
||||
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
|
||||
CHECK(dctx != NULL, "ZSTD_createDCtx() failed!");
|
||||
/* Set the maximum allowed window log.
|
||||
* The value 0 means use the default window log, which is equivalent to
|
||||
* not setting it.
|
||||
*/
|
||||
CHECK_ZSTD( ZSTD_DCtx_setParameter(dctx, ZSTD_d_windowLogMax, wLog) );
|
||||
/* forces decompressor to use maximum memory size, since the
|
||||
* decompressed size is not stored in the frame header.
|
||||
*/
|
||||
{ ZSTD_inBuffer inBuff = { compressedData, compressedSize, 0 };
|
||||
ZSTD_outBuffer outBuff = { decompressedData, sizeof(decompressedData), 0 };
|
||||
size_t const remaining = ZSTD_decompressStream(dctx, &outBuff, &inBuff);
|
||||
CHECK_ZSTD(remaining);
|
||||
CHECK(remaining == 0, "Frame not complete!");
|
||||
CHECK(outBuff.pos == sizeof(dataToCompress), "Bad decompression!");
|
||||
}
|
||||
|
||||
size_t const cstreamSize = ZSTD_sizeof_CStream(cctx);
|
||||
size_t const cstreamEstimatedSize = ZSTD_estimateCStreamSize_usingCCtxParams(cctxParams);
|
||||
size_t const dstreamSize = ZSTD_sizeof_DStream(dctx);
|
||||
size_t const dstreamEstimatedSize = ZSTD_estimateDStreamSize_fromFrame(compressedData, compressedSize);
|
||||
|
||||
CHECK(cstreamSize <= cstreamEstimatedSize, "Compression mem (%u) > estimated (%u)",
|
||||
(unsigned)cstreamSize, (unsigned)cstreamEstimatedSize);
|
||||
CHECK(dstreamSize <= dstreamEstimatedSize, "Decompression mem (%u) > estimated (%u)",
|
||||
(unsigned)dstreamSize, (unsigned)dstreamEstimatedSize);
|
||||
|
||||
printf("Level %2i : Compression Mem = %5u KB (estimated : %5u KB) ; Decompression Mem = %4u KB (estimated : %5u KB)\n",
|
||||
compressionLevel,
|
||||
(unsigned)(cstreamSize>>10), (unsigned)(cstreamEstimatedSize>>10),
|
||||
(unsigned)(dstreamSize>>10), (unsigned)(dstreamEstimatedSize>>10));
|
||||
|
||||
ZSTD_freeDCtx(dctx);
|
||||
ZSTD_freeCCtx(cctx);
|
||||
ZSTD_freeCCtxParams(cctxParams);
|
||||
if (wLog) break; /* single test */
|
||||
}
|
||||
return 0;
|
||||
}
|
10
lib/Makefile
10
lib/Makefile
@ -25,7 +25,7 @@ endif
|
||||
CFLAGS ?= -O3
|
||||
DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
|
||||
-Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
|
||||
-Wstrict-prototypes -Wundef -Wpointer-arith -Wformat-security \
|
||||
-Wstrict-prototypes -Wundef -Wpointer-arith \
|
||||
-Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
|
||||
-Wredundant-decls -Wmissing-prototypes -Wc++-compat
|
||||
CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS)
|
||||
@ -56,6 +56,7 @@ ZSTD_FORCE_DECOMPRESS_SHORT ?= 0
|
||||
ZSTD_FORCE_DECOMPRESS_LONG ?= 0
|
||||
ZSTD_NO_INLINE ?= 0
|
||||
ZSTD_STRIP_ERROR_STRINGS ?= 0
|
||||
ZSTD_LEGACY_MULTITHREADED_API ?= 0
|
||||
|
||||
ifeq ($(ZSTD_LIB_COMPRESSION), 0)
|
||||
ZSTD_LIB_DICTBUILDER = 0
|
||||
@ -107,6 +108,10 @@ ifneq ($(ZSTD_STRIP_ERROR_STRINGS), 0)
|
||||
CFLAGS += -DZSTD_STRIP_ERROR_STRINGS
|
||||
endif
|
||||
|
||||
ifneq ($(ZSTD_LEGACY_MULTITHREADED_API), 0)
|
||||
CFLAGS += -DZSTD_LEGACY_MULTITHREADED_API
|
||||
endif
|
||||
|
||||
ifneq ($(ZSTD_LEGACY_SUPPORT), 0)
|
||||
ifeq ($(shell test $(ZSTD_LEGACY_SUPPORT) -lt 8; echo $$?), 0)
|
||||
ZSTD_FILES += $(shell ls legacy/*.c | $(GREP) 'v0[$(ZSTD_LEGACY_SUPPORT)-7]')
|
||||
@ -151,8 +156,7 @@ ifneq (,$(filter Windows%,$(OS)))
|
||||
LIBZSTD = dll\libzstd.dll
|
||||
$(LIBZSTD): $(ZSTD_FILES)
|
||||
@echo compiling dynamic library $(LIBVER)
|
||||
@$(CC) $(FLAGS) -DZSTD_DLL_EXPORT=1 -shared $^ -o $@
|
||||
dlltool -D $@ -d dll\libzstd.def -l dll\libzstd.lib
|
||||
$(CC) $(FLAGS) -DZSTD_DLL_EXPORT=1 -Wl,--out-implib,dll\libzstd.lib -shared $^ -o $@
|
||||
|
||||
else
|
||||
|
||||
|
@ -31,8 +31,6 @@ note that it's necessary to request the `-pthread` flag during link stage.
|
||||
|
||||
Multithreading capabilities are exposed
|
||||
via the [advanced API defined in `lib/zstd.h`](https://github.com/facebook/zstd/blob/v1.3.8/lib/zstd.h#L592).
|
||||
This API is still labelled experimental,
|
||||
but is expected to become "stable" in the near future.
|
||||
|
||||
|
||||
#### API
|
||||
@ -110,6 +108,10 @@ The file structure is designed to make this selection manually achievable for an
|
||||
which removes the error messages that are otherwise returned by
|
||||
`ZSTD_getErrorName`.
|
||||
|
||||
- While invoking `make libzstd`, the build macro `ZSTD_LEGACY_MULTITHREADED_API=1`
|
||||
will expose the deprecated `ZSTDMT` API exposed by `zstdmt_compress.h` in
|
||||
the shared library, which is now hidden by default.
|
||||
|
||||
|
||||
#### Windows : using MinGW+MSYS to create DLL
|
||||
|
||||
|
@ -40,7 +40,7 @@
|
||||
|
||||
/**
|
||||
* FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
|
||||
* parameters. They must be inlined for the compiler to elimininate the constant
|
||||
* parameters. They must be inlined for the compiler to eliminate the constant
|
||||
* branches.
|
||||
*/
|
||||
#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
|
||||
|
@ -358,7 +358,7 @@ size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size
|
||||
typedef enum {
|
||||
FSE_repeat_none, /**< Cannot use the previous table */
|
||||
FSE_repeat_check, /**< Can use the previous table but it must be checked */
|
||||
FSE_repeat_valid /**< Can use the previous table and it is asumed to be valid */
|
||||
FSE_repeat_valid /**< Can use the previous table and it is assumed to be valid */
|
||||
} FSE_repeat;
|
||||
|
||||
/* *****************************************
|
||||
|
@ -14,8 +14,8 @@
|
||||
* This file will hold wrapper for systems, which do not support pthreads
|
||||
*/
|
||||
|
||||
/* create fake symbol to avoid empty trnaslation unit warning */
|
||||
int g_ZSTD_threading_useles_symbol;
|
||||
/* create fake symbol to avoid empty translation unit warning */
|
||||
int g_ZSTD_threading_useless_symbol;
|
||||
|
||||
#if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
|
||||
|
||||
|
@ -66,10 +66,10 @@
|
||||
/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */
|
||||
|
||||
/*!XXH_FORCE_NATIVE_FORMAT :
|
||||
* By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
|
||||
* By default, xxHash library provides endian-independent Hash values, based on little-endian convention.
|
||||
* Results are therefore identical for little-endian and big-endian CPU.
|
||||
* This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
|
||||
* Should endian-independance be of no importance for your application, you may set the #define below to 1,
|
||||
* Should endian-independence be of no importance for your application, you may set the #define below to 1,
|
||||
* to improve speed for Big-endian CPU.
|
||||
* This option has no impact on Little_Endian CPU.
|
||||
*/
|
||||
|
@ -53,8 +53,50 @@ extern "C" {
|
||||
#undef MAX
|
||||
#define MIN(a,b) ((a)<(b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a)>(b) ? (a) : (b))
|
||||
#define CHECK_F(f) { size_t const errcod = f; if (ERR_isError(errcod)) return errcod; } /* check and Forward error code */
|
||||
#define CHECK_E(f, e) { size_t const errcod = f; if (ERR_isError(errcod)) return ERROR(e); } /* check and send Error code */
|
||||
|
||||
/**
|
||||
* Return the specified error if the condition evaluates to true.
|
||||
*
|
||||
* In debug modes, prints additional information. In order to do that
|
||||
* (particularly, printing the conditional that failed), this can't just wrap
|
||||
* RETURN_ERROR().
|
||||
*/
|
||||
#define RETURN_ERROR_IF(cond, err, ...) \
|
||||
if (cond) { \
|
||||
RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \
|
||||
RAWLOG(3, ": " __VA_ARGS__); \
|
||||
RAWLOG(3, "\n"); \
|
||||
return ERROR(err); \
|
||||
}
|
||||
|
||||
/**
|
||||
* Unconditionally return the specified error.
|
||||
*
|
||||
* In debug modes, prints additional information.
|
||||
*/
|
||||
#define RETURN_ERROR(err, ...) \
|
||||
do { \
|
||||
RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \
|
||||
RAWLOG(3, ": " __VA_ARGS__); \
|
||||
RAWLOG(3, "\n"); \
|
||||
return ERROR(err); \
|
||||
} while(0);
|
||||
|
||||
/**
|
||||
* If the provided expression evaluates to an error code, returns that error code.
|
||||
*
|
||||
* In debug modes, prints additional information.
|
||||
*/
|
||||
#define FORWARD_IF_ERROR(err, ...) \
|
||||
do { \
|
||||
size_t const err_code = (err); \
|
||||
if (ERR_isError(err_code)) { \
|
||||
RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \
|
||||
RAWLOG(3, ": " __VA_ARGS__); \
|
||||
RAWLOG(3, "\n"); \
|
||||
return err_code; \
|
||||
} \
|
||||
} while(0);
|
||||
|
||||
|
||||
/*-*************************************
|
||||
@ -200,6 +242,17 @@ typedef struct {
|
||||
U32 longLengthPos;
|
||||
} seqStore_t;
|
||||
|
||||
/**
|
||||
* Contains the compressed frame size and an upper-bound for the decompressed frame size.
|
||||
* Note: before using `compressedSize`, check for errors using ZSTD_isError().
|
||||
* similarly, before using `decompressedBound`, check for errors using:
|
||||
* `decompressedBound != ZSTD_CONTENTSIZE_ERROR`
|
||||
*/
|
||||
typedef struct {
|
||||
size_t compressedSize;
|
||||
unsigned long long decompressedBound;
|
||||
} ZSTD_frameSizeInfo; /* decompress & legacy */
|
||||
|
||||
const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */
|
||||
void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
|
||||
|
||||
|
@ -129,9 +129,9 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
||||
{ U32 position = 0;
|
||||
U32 symbol;
|
||||
for (symbol=0; symbol<=maxSymbolValue; symbol++) {
|
||||
int nbOccurences;
|
||||
int nbOccurrences;
|
||||
int const freq = normalizedCounter[symbol];
|
||||
for (nbOccurences=0; nbOccurences<freq; nbOccurences++) {
|
||||
for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
|
||||
tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
|
||||
position = (position + step) & tableMask;
|
||||
while (position > highThreshold)
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -36,9 +36,9 @@ extern "C" {
|
||||
#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 now means "unsorted".
|
||||
It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
|
||||
It's not a big deal though : candidate will just be sorted again.
|
||||
Additionnally, candidate position 1 will be lost.
|
||||
Additionally, candidate position 1 will be lost.
|
||||
But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
|
||||
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be misdhandled after table re-use with a different strategy
|
||||
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy
|
||||
Constant required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
|
||||
|
||||
|
||||
@ -54,6 +54,14 @@ typedef struct ZSTD_prefixDict_s {
|
||||
ZSTD_dictContentType_e dictContentType;
|
||||
} ZSTD_prefixDict;
|
||||
|
||||
typedef struct {
|
||||
void* dictBuffer;
|
||||
void const* dict;
|
||||
size_t dictSize;
|
||||
ZSTD_dictContentType_e dictContentType;
|
||||
ZSTD_CDict* cdict;
|
||||
} ZSTD_localDict;
|
||||
|
||||
typedef struct {
|
||||
U32 CTable[HUF_CTABLE_SIZE_U32(255)];
|
||||
HUF_repeat repeatMode;
|
||||
@ -107,6 +115,7 @@ typedef struct {
|
||||
U32 offCodeSumBasePrice; /* to compare to log2(offreq) */
|
||||
ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */
|
||||
const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */
|
||||
ZSTD_literalCompressionMode_e literalCompressionMode;
|
||||
} optState_t;
|
||||
|
||||
typedef struct {
|
||||
@ -188,6 +197,7 @@ struct ZSTD_CCtx_params_s {
|
||||
* 1<<wLog, even for dictionary */
|
||||
|
||||
ZSTD_dictAttachPref_e attachDictPref;
|
||||
ZSTD_literalCompressionMode_e literalCompressionMode;
|
||||
|
||||
/* Multithreading: used to pass parameters to mtctx */
|
||||
int nbWorkers;
|
||||
@ -243,7 +253,7 @@ struct ZSTD_CCtx_s {
|
||||
U32 frameEnded;
|
||||
|
||||
/* Dictionary */
|
||||
ZSTD_CDict* cdictLocal;
|
||||
ZSTD_localDict localDict;
|
||||
const ZSTD_CDict* cdict;
|
||||
ZSTD_prefixDict prefixDict; /* single-usage dictionary */
|
||||
|
||||
@ -806,13 +816,6 @@ size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
|
||||
|
||||
void ZSTD_resetSeqStore(seqStore_t* ssPtr);
|
||||
|
||||
/*! ZSTD_compressStream_generic() :
|
||||
* Private use only. To be called from zstdmt_compress.c in single-thread mode. */
|
||||
size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
||||
ZSTD_outBuffer* output,
|
||||
ZSTD_inBuffer* input,
|
||||
ZSTD_EndDirective const flushMode);
|
||||
|
||||
/*! ZSTD_getCParamsFromCDict() :
|
||||
* as the name implies */
|
||||
ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
|
||||
@ -839,7 +842,7 @@ size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
|
||||
/* ZSTD_writeLastEmptyBlock() :
|
||||
* output an empty Block with end-of-frame mark to complete a frame
|
||||
* @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
|
||||
* or an error code if `dstCapcity` is too small (<ZSTD_blockHeaderSize)
|
||||
* or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
|
||||
*/
|
||||
size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
|
||||
|
||||
|
@ -45,7 +45,155 @@ FORCE_INLINE_TEMPLATE
|
||||
size_t ZSTD_compressBlock_fast_generic(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize,
|
||||
U32 const mls, ZSTD_dictMode_e const dictMode)
|
||||
U32 const mls)
|
||||
{
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
U32* const hashTable = ms->hashTable;
|
||||
U32 const hlog = cParams->hashLog;
|
||||
/* support stepSize of 0 */
|
||||
size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1;
|
||||
const BYTE* const base = ms->window.base;
|
||||
const BYTE* const istart = (const BYTE*)src;
|
||||
/* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */
|
||||
const BYTE* ip0 = istart;
|
||||
const BYTE* ip1;
|
||||
const BYTE* anchor = istart;
|
||||
const U32 prefixStartIndex = ms->window.dictLimit;
|
||||
const BYTE* const prefixStart = base + prefixStartIndex;
|
||||
const BYTE* const iend = istart + srcSize;
|
||||
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
||||
U32 offset_1=rep[0], offset_2=rep[1];
|
||||
U32 offsetSaved = 0;
|
||||
|
||||
/* init */
|
||||
ip0 += (ip0 == prefixStart);
|
||||
ip1 = ip0 + 1;
|
||||
{
|
||||
U32 const maxRep = (U32)(ip0 - prefixStart);
|
||||
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
|
||||
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
|
||||
}
|
||||
|
||||
/* Main Search Loop */
|
||||
while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */
|
||||
size_t mLength;
|
||||
BYTE const* ip2 = ip0 + 2;
|
||||
size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls);
|
||||
U32 const val0 = MEM_read32(ip0);
|
||||
size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls);
|
||||
U32 const val1 = MEM_read32(ip1);
|
||||
U32 const current0 = (U32)(ip0-base);
|
||||
U32 const current1 = (U32)(ip1-base);
|
||||
U32 const matchIndex0 = hashTable[h0];
|
||||
U32 const matchIndex1 = hashTable[h1];
|
||||
BYTE const* repMatch = ip2-offset_1;
|
||||
const BYTE* match0 = base + matchIndex0;
|
||||
const BYTE* match1 = base + matchIndex1;
|
||||
U32 offcode;
|
||||
hashTable[h0] = current0; /* update hash table */
|
||||
hashTable[h1] = current1; /* update hash table */
|
||||
|
||||
assert(ip0 + 1 == ip1);
|
||||
|
||||
if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) {
|
||||
mLength = ip2[-1] == repMatch[-1] ? 1 : 0;
|
||||
ip0 = ip2 - mLength;
|
||||
match0 = repMatch - mLength;
|
||||
offcode = 0;
|
||||
goto _match;
|
||||
}
|
||||
if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) {
|
||||
/* found a regular match */
|
||||
goto _offset;
|
||||
}
|
||||
if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) {
|
||||
/* found a regular match after one literal */
|
||||
ip0 = ip1;
|
||||
match0 = match1;
|
||||
goto _offset;
|
||||
}
|
||||
{
|
||||
size_t const step = ((ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
|
||||
assert(step >= 2);
|
||||
ip0 += step;
|
||||
ip1 += step;
|
||||
continue;
|
||||
}
|
||||
_offset: /* Requires: ip0, match0 */
|
||||
/* Compute the offset code */
|
||||
offset_2 = offset_1;
|
||||
offset_1 = (U32)(ip0-match0);
|
||||
offcode = offset_1 + ZSTD_REP_MOVE;
|
||||
mLength = 0;
|
||||
/* Count the backwards match length */
|
||||
while (((ip0>anchor) & (match0>prefixStart))
|
||||
&& (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */
|
||||
|
||||
_match: /* Requires: ip0, match0, offcode */
|
||||
/* Count the forward length */
|
||||
mLength += ZSTD_count(ip0+mLength+4, match0+mLength+4, iend) + 4;
|
||||
ZSTD_storeSeq(seqStore, ip0-anchor, anchor, offcode, mLength-MINMATCH);
|
||||
/* match found */
|
||||
ip0 += mLength;
|
||||
anchor = ip0;
|
||||
ip1 = ip0 + 1;
|
||||
|
||||
if (ip0 <= ilimit) {
|
||||
/* Fill Table */
|
||||
assert(base+current0+2 > istart); /* check base overflow */
|
||||
hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */
|
||||
hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
|
||||
|
||||
while ( (ip0 <= ilimit)
|
||||
&& ( (offset_2>0)
|
||||
& (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) )) {
|
||||
/* store sequence */
|
||||
size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
|
||||
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
|
||||
hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
|
||||
ip0 += rLength;
|
||||
ip1 = ip0 + 1;
|
||||
ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
|
||||
anchor = ip0;
|
||||
continue; /* faster when present (confirmed on gcc-8) ... (?) */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* save reps for next block */
|
||||
rep[0] = offset_1 ? offset_1 : offsetSaved;
|
||||
rep[1] = offset_2 ? offset_2 : offsetSaved;
|
||||
|
||||
/* Return the last literals size */
|
||||
return iend - anchor;
|
||||
}
|
||||
|
||||
|
||||
size_t ZSTD_compressBlock_fast(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize)
|
||||
{
|
||||
ZSTD_compressionParameters const* cParams = &ms->cParams;
|
||||
U32 const mls = cParams->minMatch;
|
||||
assert(ms->dictMatchState == NULL);
|
||||
switch(mls)
|
||||
{
|
||||
default: /* includes case 3 */
|
||||
case 4 :
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4);
|
||||
case 5 :
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5);
|
||||
case 6 :
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6);
|
||||
case 7 :
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7);
|
||||
}
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE
|
||||
size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize, U32 const mls)
|
||||
{
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
U32* const hashTable = ms->hashTable;
|
||||
@ -64,46 +212,26 @@ size_t ZSTD_compressBlock_fast_generic(
|
||||
U32 offsetSaved = 0;
|
||||
|
||||
const ZSTD_matchState_t* const dms = ms->dictMatchState;
|
||||
const ZSTD_compressionParameters* const dictCParams =
|
||||
dictMode == ZSTD_dictMatchState ?
|
||||
&dms->cParams : NULL;
|
||||
const U32* const dictHashTable = dictMode == ZSTD_dictMatchState ?
|
||||
dms->hashTable : NULL;
|
||||
const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ?
|
||||
dms->window.dictLimit : 0;
|
||||
const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
|
||||
dms->window.base : NULL;
|
||||
const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ?
|
||||
dictBase + dictStartIndex : NULL;
|
||||
const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
|
||||
dms->window.nextSrc : NULL;
|
||||
const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
|
||||
prefixStartIndex - (U32)(dictEnd - dictBase) :
|
||||
0;
|
||||
const ZSTD_compressionParameters* const dictCParams = &dms->cParams ;
|
||||
const U32* const dictHashTable = dms->hashTable;
|
||||
const U32 dictStartIndex = dms->window.dictLimit;
|
||||
const BYTE* const dictBase = dms->window.base;
|
||||
const BYTE* const dictStart = dictBase + dictStartIndex;
|
||||
const BYTE* const dictEnd = dms->window.nextSrc;
|
||||
const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase);
|
||||
const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart);
|
||||
const U32 dictHLog = dictMode == ZSTD_dictMatchState ?
|
||||
dictCParams->hashLog : hlog;
|
||||
|
||||
assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
|
||||
const U32 dictHLog = dictCParams->hashLog;
|
||||
|
||||
/* otherwise, we would get index underflow when translating a dict index
|
||||
* into a local index */
|
||||
assert(dictMode != ZSTD_dictMatchState
|
||||
|| prefixStartIndex >= (U32)(dictEnd - dictBase));
|
||||
assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
|
||||
|
||||
/* init */
|
||||
ip += (dictAndPrefixLength == 0);
|
||||
if (dictMode == ZSTD_noDict) {
|
||||
U32 const maxRep = (U32)(ip - prefixStart);
|
||||
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
|
||||
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
|
||||
}
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
/* dictMatchState repCode checks don't currently handle repCode == 0
|
||||
* disabling. */
|
||||
assert(offset_1 <= dictAndPrefixLength);
|
||||
assert(offset_2 <= dictAndPrefixLength);
|
||||
}
|
||||
/* dictMatchState repCode checks don't currently handle repCode == 0
|
||||
* disabling. */
|
||||
assert(offset_1 <= dictAndPrefixLength);
|
||||
assert(offset_2 <= dictAndPrefixLength);
|
||||
|
||||
/* Main Search Loop */
|
||||
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
|
||||
@ -113,50 +241,37 @@ size_t ZSTD_compressBlock_fast_generic(
|
||||
U32 const matchIndex = hashTable[h];
|
||||
const BYTE* match = base + matchIndex;
|
||||
const U32 repIndex = current + 1 - offset_1;
|
||||
const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
|
||||
&& repIndex < prefixStartIndex) ?
|
||||
const BYTE* repMatch = (repIndex < prefixStartIndex) ?
|
||||
dictBase + (repIndex - dictIndexDelta) :
|
||||
base + repIndex;
|
||||
hashTable[h] = current; /* update hash table */
|
||||
|
||||
if ( (dictMode == ZSTD_dictMatchState)
|
||||
&& ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
|
||||
if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
|
||||
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
||||
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
||||
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
|
||||
ip++;
|
||||
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
|
||||
} else if ( dictMode == ZSTD_noDict
|
||||
&& ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
|
||||
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
|
||||
ip++;
|
||||
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
|
||||
} else if ( (matchIndex <= prefixStartIndex) ) {
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
|
||||
U32 const dictMatchIndex = dictHashTable[dictHash];
|
||||
const BYTE* dictMatch = dictBase + dictMatchIndex;
|
||||
if (dictMatchIndex <= dictStartIndex ||
|
||||
MEM_read32(dictMatch) != MEM_read32(ip)) {
|
||||
assert(stepSize >= 1);
|
||||
ip += ((ip-anchor) >> kSearchStrength) + stepSize;
|
||||
continue;
|
||||
} else {
|
||||
/* found a dict match */
|
||||
U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta);
|
||||
mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
|
||||
while (((ip>anchor) & (dictMatch>dictStart))
|
||||
&& (ip[-1] == dictMatch[-1])) {
|
||||
ip--; dictMatch--; mLength++;
|
||||
} /* catch up */
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
}
|
||||
} else {
|
||||
size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
|
||||
U32 const dictMatchIndex = dictHashTable[dictHash];
|
||||
const BYTE* dictMatch = dictBase + dictMatchIndex;
|
||||
if (dictMatchIndex <= dictStartIndex ||
|
||||
MEM_read32(dictMatch) != MEM_read32(ip)) {
|
||||
assert(stepSize >= 1);
|
||||
ip += ((ip-anchor) >> kSearchStrength) + stepSize;
|
||||
continue;
|
||||
} else {
|
||||
/* found a dict match */
|
||||
U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta);
|
||||
mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
|
||||
while (((ip>anchor) & (dictMatch>dictStart))
|
||||
&& (ip[-1] == dictMatch[-1])) {
|
||||
ip--; dictMatch--; mLength++;
|
||||
} /* catch up */
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
}
|
||||
} else if (MEM_read32(match) != MEM_read32(ip)) {
|
||||
/* it's not a match, and we're not going to check the dictionary */
|
||||
@ -185,41 +300,27 @@ size_t ZSTD_compressBlock_fast_generic(
|
||||
hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
|
||||
|
||||
/* check immediate repcode */
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
while (ip <= ilimit) {
|
||||
U32 const current2 = (U32)(ip-base);
|
||||
U32 const repIndex2 = current2 - offset_2;
|
||||
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
|
||||
dictBase - dictIndexDelta + repIndex2 :
|
||||
base + repIndex2;
|
||||
if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
|
||||
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
||||
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
|
||||
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
|
||||
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
||||
ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
|
||||
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
|
||||
ip += repLength2;
|
||||
anchor = ip;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (dictMode == ZSTD_noDict) {
|
||||
while ( (ip <= ilimit)
|
||||
&& ( (offset_2>0)
|
||||
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
|
||||
/* store sequence */
|
||||
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
|
||||
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
|
||||
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = (U32)(ip-base);
|
||||
ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
|
||||
ip += rLength;
|
||||
while (ip <= ilimit) {
|
||||
U32 const current2 = (U32)(ip-base);
|
||||
U32 const repIndex2 = current2 - offset_2;
|
||||
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
|
||||
dictBase - dictIndexDelta + repIndex2 :
|
||||
base + repIndex2;
|
||||
if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
|
||||
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
||||
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
|
||||
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
|
||||
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
||||
ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
|
||||
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
|
||||
ip += repLength2;
|
||||
anchor = ip;
|
||||
continue; /* faster when present ... (?) */
|
||||
} } } }
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* save reps for next block */
|
||||
rep[0] = offset_1 ? offset_1 : offsetSaved;
|
||||
@ -229,28 +330,6 @@ size_t ZSTD_compressBlock_fast_generic(
|
||||
return iend - anchor;
|
||||
}
|
||||
|
||||
|
||||
size_t ZSTD_compressBlock_fast(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize)
|
||||
{
|
||||
ZSTD_compressionParameters const* cParams = &ms->cParams;
|
||||
U32 const mls = cParams->minMatch;
|
||||
assert(ms->dictMatchState == NULL);
|
||||
switch(mls)
|
||||
{
|
||||
default: /* includes case 3 */
|
||||
case 4 :
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict);
|
||||
case 5 :
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict);
|
||||
case 6 :
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict);
|
||||
case 7 :
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict);
|
||||
}
|
||||
}
|
||||
|
||||
size_t ZSTD_compressBlock_fast_dictMatchState(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize)
|
||||
@ -262,13 +341,13 @@ size_t ZSTD_compressBlock_fast_dictMatchState(
|
||||
{
|
||||
default: /* includes case 3 */
|
||||
case 4 :
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState);
|
||||
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4);
|
||||
case 5 :
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState);
|
||||
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5);
|
||||
case 6 :
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState);
|
||||
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6);
|
||||
case 7 :
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState);
|
||||
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -19,7 +19,7 @@ extern "C" {
|
||||
|
||||
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
|
||||
|
||||
void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). pre-emptively increase value of ZSTD_DUBT_UNSORTED_MARK */
|
||||
void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */
|
||||
|
||||
size_t ZSTD_compressBlock_btlazy2(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
|
@ -429,7 +429,7 @@ size_t ZSTD_ldm_generateSequences(
|
||||
*/
|
||||
assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);
|
||||
/* The input could be very large (in zstdmt), so it must be broken up into
|
||||
* chunks to enforce the maximmum distance and handle overflow correction.
|
||||
* chunks to enforce the maximum distance and handle overflow correction.
|
||||
*/
|
||||
assert(sequences->pos <= sequences->size);
|
||||
assert(sequences->size <= sequences->capacity);
|
||||
|
@ -64,9 +64,15 @@ MEM_STATIC double ZSTD_fCost(U32 price)
|
||||
}
|
||||
#endif
|
||||
|
||||
static int ZSTD_compressedLiterals(optState_t const* const optPtr)
|
||||
{
|
||||
return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed;
|
||||
}
|
||||
|
||||
static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
|
||||
{
|
||||
optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
|
||||
if (ZSTD_compressedLiterals(optPtr))
|
||||
optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
|
||||
optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
|
||||
optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
|
||||
optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
|
||||
@ -99,6 +105,7 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
||||
const BYTE* const src, size_t const srcSize,
|
||||
int const optLevel)
|
||||
{
|
||||
int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
|
||||
DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
|
||||
optPtr->priceType = zop_dynamic;
|
||||
|
||||
@ -113,9 +120,10 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
||||
/* huffman table presumed generated by dictionary */
|
||||
optPtr->priceType = zop_dynamic;
|
||||
|
||||
assert(optPtr->litFreq != NULL);
|
||||
optPtr->litSum = 0;
|
||||
{ unsigned lit;
|
||||
if (compressedLiterals) {
|
||||
unsigned lit;
|
||||
assert(optPtr->litFreq != NULL);
|
||||
optPtr->litSum = 0;
|
||||
for (lit=0; lit<=MaxLit; lit++) {
|
||||
U32 const scaleLog = 11; /* scale to 2K */
|
||||
U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
|
||||
@ -163,10 +171,11 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
||||
} else { /* not a dictionary */
|
||||
|
||||
assert(optPtr->litFreq != NULL);
|
||||
{ unsigned lit = MaxLit;
|
||||
if (compressedLiterals) {
|
||||
unsigned lit = MaxLit;
|
||||
HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
|
||||
optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
|
||||
}
|
||||
optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
|
||||
|
||||
{ unsigned ll;
|
||||
for (ll=0; ll<=MaxLL; ll++)
|
||||
@ -190,7 +199,8 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
||||
|
||||
} else { /* new block : re-use previous statistics, scaled down */
|
||||
|
||||
optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
|
||||
if (compressedLiterals)
|
||||
optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
|
||||
optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0);
|
||||
optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0);
|
||||
optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0);
|
||||
@ -207,6 +217,10 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
|
||||
int optLevel)
|
||||
{
|
||||
if (litLength == 0) return 0;
|
||||
|
||||
if (!ZSTD_compressedLiterals(optPtr))
|
||||
return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */
|
||||
|
||||
if (optPtr->priceType == zop_predef)
|
||||
return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */
|
||||
|
||||
@ -310,7 +324,8 @@ static void ZSTD_updateStats(optState_t* const optPtr,
|
||||
U32 offsetCode, U32 matchLength)
|
||||
{
|
||||
/* literals */
|
||||
{ U32 u;
|
||||
if (ZSTD_compressedLiterals(optPtr)) {
|
||||
U32 u;
|
||||
for (u=0; u < litLength; u++)
|
||||
optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
|
||||
optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
|
||||
@ -870,7 +885,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
||||
/* large match -> immediate encoding */
|
||||
{ U32 const maxML = matches[nbMatches-1].len;
|
||||
U32 const maxOffset = matches[nbMatches-1].off;
|
||||
DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new serie",
|
||||
DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
|
||||
nbMatches, maxML, maxOffset, (U32)(ip-prefixStart));
|
||||
|
||||
if (maxML > sufficient_len) {
|
||||
@ -1108,7 +1123,8 @@ static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
|
||||
/* used in 2-pass strategy */
|
||||
MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
|
||||
{
|
||||
optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
|
||||
if (ZSTD_compressedLiterals(optPtr))
|
||||
optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
|
||||
optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
|
||||
optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
|
||||
optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
|
||||
@ -1117,7 +1133,7 @@ MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
|
||||
/* ZSTD_initStats_ultra():
|
||||
* make a first compression pass, just to seed stats with more accurate starting values.
|
||||
* only works on first block, with no dictionary and no ldm.
|
||||
* this function cannot error, hence its constract must be respected.
|
||||
* this function cannot error, hence its contract must be respected.
|
||||
*/
|
||||
static void
|
||||
ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
|
||||
|
@ -22,6 +22,7 @@
|
||||
/* ====== Dependencies ====== */
|
||||
#include <string.h> /* memcpy, memset */
|
||||
#include <limits.h> /* INT_MAX, UINT_MAX */
|
||||
#include "mem.h" /* MEM_STATIC */
|
||||
#include "pool.h" /* threadpool */
|
||||
#include "threading.h" /* mutex */
|
||||
#include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
|
||||
@ -456,7 +457,7 @@ typedef struct {
|
||||
* Must be acquired after the main mutex when acquiring both.
|
||||
*/
|
||||
ZSTD_pthread_mutex_t ldmWindowMutex;
|
||||
ZSTD_pthread_cond_t ldmWindowCond; /* Signaled when ldmWindow is udpated */
|
||||
ZSTD_pthread_cond_t ldmWindowCond; /* Signaled when ldmWindow is updated */
|
||||
ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */
|
||||
} serialState_t;
|
||||
|
||||
@ -647,7 +648,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
||||
buffer_t dstBuff = job->dstBuff;
|
||||
size_t lastCBlockSize = 0;
|
||||
|
||||
/* ressources */
|
||||
/* resources */
|
||||
if (cctx==NULL) JOB_ERROR(ERROR(memory_allocation));
|
||||
if (dstBuff.start == NULL) { /* streaming job : doesn't provide a dstBuffer */
|
||||
dstBuff = ZSTDMT_getBuffer(job->bufPool);
|
||||
@ -672,7 +673,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
||||
if (ZSTD_isError(initError)) JOB_ERROR(initError);
|
||||
} else { /* srcStart points at reloaded section */
|
||||
U64 const pledgedSrcSize = job->firstJob ? job->fullFrameSize : job->src.size;
|
||||
{ size_t const forceWindowError = ZSTD_CCtxParam_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob);
|
||||
{ size_t const forceWindowError = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob);
|
||||
if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError);
|
||||
}
|
||||
{ size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
|
||||
@ -864,14 +865,10 @@ static size_t ZSTDMT_expandJobsTable (ZSTDMT_CCtx* mtctx, U32 nbWorkers) {
|
||||
* Internal use only */
|
||||
size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers)
|
||||
{
|
||||
if (nbWorkers > ZSTDMT_NBWORKERS_MAX) nbWorkers = ZSTDMT_NBWORKERS_MAX;
|
||||
params->nbWorkers = nbWorkers;
|
||||
params->overlapLog = ZSTDMT_OVERLAPLOG_DEFAULT;
|
||||
params->jobSize = 0;
|
||||
return nbWorkers;
|
||||
return ZSTD_CCtxParams_setParameter(params, ZSTD_c_nbWorkers, (int)nbWorkers);
|
||||
}
|
||||
|
||||
ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem)
|
||||
MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers, ZSTD_customMem cMem)
|
||||
{
|
||||
ZSTDMT_CCtx* mtctx;
|
||||
U32 nbJobs = nbWorkers + 2;
|
||||
@ -906,6 +903,17 @@ ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem)
|
||||
return mtctx;
|
||||
}
|
||||
|
||||
ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem)
|
||||
{
|
||||
#ifdef ZSTD_MULTITHREAD
|
||||
return ZSTDMT_createCCtx_advanced_internal(nbWorkers, cMem);
|
||||
#else
|
||||
(void)nbWorkers;
|
||||
(void)cMem;
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers)
|
||||
{
|
||||
return ZSTDMT_createCCtx_advanced(nbWorkers, ZSTD_defaultCMem);
|
||||
@ -986,26 +994,13 @@ ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params,
|
||||
{
|
||||
case ZSTDMT_p_jobSize :
|
||||
DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %i", value);
|
||||
if ( value != 0 /* default */
|
||||
&& value < ZSTDMT_JOBSIZE_MIN)
|
||||
value = ZSTDMT_JOBSIZE_MIN;
|
||||
assert(value >= 0);
|
||||
if (value > ZSTDMT_JOBSIZE_MAX) value = ZSTDMT_JOBSIZE_MAX;
|
||||
params->jobSize = value;
|
||||
return value;
|
||||
|
||||
return ZSTD_CCtxParams_setParameter(params, ZSTD_c_jobSize, value);
|
||||
case ZSTDMT_p_overlapLog :
|
||||
DEBUGLOG(4, "ZSTDMT_p_overlapLog : %i", value);
|
||||
if (value < ZSTD_OVERLAPLOG_MIN) value = ZSTD_OVERLAPLOG_MIN;
|
||||
if (value > ZSTD_OVERLAPLOG_MAX) value = ZSTD_OVERLAPLOG_MAX;
|
||||
params->overlapLog = value;
|
||||
return value;
|
||||
|
||||
return ZSTD_CCtxParams_setParameter(params, ZSTD_c_overlapLog, value);
|
||||
case ZSTDMT_p_rsyncable :
|
||||
value = (value != 0);
|
||||
params->rsyncable = value;
|
||||
return value;
|
||||
|
||||
DEBUGLOG(4, "ZSTD_p_rsyncable : %i", value);
|
||||
return ZSTD_CCtxParams_setParameter(params, ZSTD_c_rsyncable, value);
|
||||
default :
|
||||
return ERROR(parameter_unsupported);
|
||||
}
|
||||
@ -1021,32 +1016,29 @@ size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter,
|
||||
{
|
||||
switch (parameter) {
|
||||
case ZSTDMT_p_jobSize:
|
||||
assert(mtctx->params.jobSize <= INT_MAX);
|
||||
*value = (int)(mtctx->params.jobSize);
|
||||
break;
|
||||
return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_jobSize, value);
|
||||
case ZSTDMT_p_overlapLog:
|
||||
*value = mtctx->params.overlapLog;
|
||||
break;
|
||||
return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_overlapLog, value);
|
||||
case ZSTDMT_p_rsyncable:
|
||||
*value = mtctx->params.rsyncable;
|
||||
break;
|
||||
return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_rsyncable, value);
|
||||
default:
|
||||
return ERROR(parameter_unsupported);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Sets parameters relevant to the compression job,
|
||||
* initializing others to default values. */
|
||||
static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
|
||||
{
|
||||
ZSTD_CCtx_params jobParams;
|
||||
memset(&jobParams, 0, sizeof(jobParams));
|
||||
|
||||
jobParams.cParams = params.cParams;
|
||||
jobParams.fParams = params.fParams;
|
||||
jobParams.compressionLevel = params.compressionLevel;
|
||||
|
||||
ZSTD_CCtx_params jobParams = params;
|
||||
/* Clear parameters related to multithreading */
|
||||
jobParams.forceWindow = 0;
|
||||
jobParams.nbWorkers = 0;
|
||||
jobParams.jobSize = 0;
|
||||
jobParams.overlapLog = 0;
|
||||
jobParams.rsyncable = 0;
|
||||
memset(&jobParams.ldmParams, 0, sizeof(ldmParams_t));
|
||||
memset(&jobParams.customMem, 0, sizeof(ZSTD_customMem));
|
||||
return jobParams;
|
||||
}
|
||||
|
||||
@ -1056,7 +1048,7 @@ static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
|
||||
static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers)
|
||||
{
|
||||
if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation);
|
||||
CHECK_F( ZSTDMT_expandJobsTable(mtctx, nbWorkers) );
|
||||
FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) );
|
||||
mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers);
|
||||
if (mtctx->bufPool == NULL) return ERROR(memory_allocation);
|
||||
mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers);
|
||||
@ -1263,7 +1255,7 @@ static size_t ZSTDMT_compress_advanced_internal(
|
||||
if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize))
|
||||
return ERROR(memory_allocation);
|
||||
|
||||
CHECK_F( ZSTDMT_expandJobsTable(mtctx, nbJobs) ); /* only expands if necessary */
|
||||
FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbJobs) ); /* only expands if necessary */
|
||||
|
||||
{ unsigned u;
|
||||
for (u=0; u<nbJobs; u++) {
|
||||
@ -1396,7 +1388,7 @@ size_t ZSTDMT_initCStream_internal(
|
||||
|
||||
/* init */
|
||||
if (params.nbWorkers != mtctx->params.nbWorkers)
|
||||
CHECK_F( ZSTDMT_resize(mtctx, params.nbWorkers) );
|
||||
FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) );
|
||||
|
||||
if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
|
||||
if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
|
||||
@ -1547,7 +1539,7 @@ size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel) {
|
||||
/* ZSTDMT_writeLastEmptyBlock()
|
||||
* Write a single empty block with an end-of-frame to finish a frame.
|
||||
* Job must be created from streaming variant.
|
||||
* This function is always successfull if expected conditions are fulfilled.
|
||||
* This function is always successful if expected conditions are fulfilled.
|
||||
*/
|
||||
static void ZSTDMT_writeLastEmptyBlock(ZSTDMT_jobDescription* job)
|
||||
{
|
||||
@ -1987,7 +1979,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
||||
assert(input->pos <= input->size);
|
||||
|
||||
if (mtctx->singleBlockingThread) { /* delegate to single-thread (synchronous) */
|
||||
return ZSTD_compressStream_generic(mtctx->cctxPool->cctx[0], output, input, endOp);
|
||||
return ZSTD_compressStream2(mtctx->cctxPool->cctx[0], output, input, endOp);
|
||||
}
|
||||
|
||||
if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) {
|
||||
@ -2051,7 +2043,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
||||
|| ((endOp == ZSTD_e_end) && (!mtctx->frameEnded)) ) { /* must finish the frame with a zero-size block */
|
||||
size_t const jobSize = mtctx->inBuff.filled;
|
||||
assert(mtctx->inBuff.filled <= mtctx->targetSectionSize);
|
||||
CHECK_F( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) );
|
||||
FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) );
|
||||
}
|
||||
|
||||
/* check for potential compressed data ready to be flushed */
|
||||
@ -2065,7 +2057,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
||||
|
||||
size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
|
||||
{
|
||||
CHECK_F( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) );
|
||||
FORWARD_IF_ERROR( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) );
|
||||
|
||||
/* recommended next input size : fill current input buffer */
|
||||
return mtctx->targetSectionSize - mtctx->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */
|
||||
@ -2082,7 +2074,7 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* ou
|
||||
|| ((endFrame==ZSTD_e_end) && !mtctx->frameEnded)) { /* need a last 0-size block to end frame */
|
||||
DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job (%u bytes, end:%u)",
|
||||
(U32)srcSize, (U32)endFrame);
|
||||
CHECK_F( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) );
|
||||
FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) );
|
||||
}
|
||||
|
||||
/* check if there is any data available to flush */
|
||||
|
@ -17,10 +17,25 @@
|
||||
|
||||
|
||||
/* Note : This is an internal API.
|
||||
* Some methods are still exposed (ZSTDLIB_API),
|
||||
* These APIs used to be exposed with ZSTDLIB_API,
|
||||
* because it used to be the only way to invoke MT compression.
|
||||
* Now, it's recommended to use ZSTD_compress_generic() instead.
|
||||
* These methods will stop being exposed in a future version */
|
||||
* Now, it's recommended to use ZSTD_compress2 and ZSTD_compressStream2()
|
||||
* instead.
|
||||
*
|
||||
* If you depend on these APIs and can't switch, then define
|
||||
* ZSTD_LEGACY_MULTITHREADED_API when making the dynamic library.
|
||||
* However, we may completely remove these functions in a future
|
||||
* release, so please switch soon.
|
||||
*
|
||||
* This API requires ZSTD_MULTITHREAD to be defined during compilation,
|
||||
* otherwise ZSTDMT_createCCtx*() will fail.
|
||||
*/
|
||||
|
||||
#ifdef ZSTD_LEGACY_MULTITHREADED_API
|
||||
# define ZSTDMT_API ZSTDLIB_API
|
||||
#else
|
||||
# define ZSTDMT_API
|
||||
#endif
|
||||
|
||||
/* === Dependencies === */
|
||||
#include <stddef.h> /* size_t */
|
||||
@ -40,17 +55,19 @@
|
||||
|
||||
/* === Memory management === */
|
||||
typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
|
||||
ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers);
|
||||
ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers,
|
||||
/* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */
|
||||
ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers);
|
||||
/* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */
|
||||
ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers,
|
||||
ZSTD_customMem cMem);
|
||||
ZSTDLIB_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
|
||||
ZSTDMT_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
|
||||
|
||||
ZSTDLIB_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
|
||||
ZSTDMT_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
|
||||
|
||||
|
||||
/* === Simple one-pass compression function === */
|
||||
|
||||
ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
|
||||
ZSTDMT_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
int compressionLevel);
|
||||
@ -59,31 +76,31 @@ ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
|
||||
|
||||
/* === Streaming functions === */
|
||||
|
||||
ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
|
||||
ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */
|
||||
ZSTDMT_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
|
||||
ZSTDMT_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */
|
||||
|
||||
ZSTDLIB_API size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx);
|
||||
ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
|
||||
ZSTDMT_API size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx);
|
||||
ZSTDMT_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
|
||||
|
||||
ZSTDLIB_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
|
||||
ZSTDLIB_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
|
||||
ZSTDMT_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
|
||||
ZSTDMT_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
|
||||
|
||||
|
||||
/* === Advanced functions and parameters === */
|
||||
|
||||
ZSTDLIB_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
const ZSTD_CDict* cdict,
|
||||
ZSTD_parameters params,
|
||||
int overlapLog);
|
||||
ZSTDMT_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
const ZSTD_CDict* cdict,
|
||||
ZSTD_parameters params,
|
||||
int overlapLog);
|
||||
|
||||
ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
|
||||
ZSTDMT_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
|
||||
const void* dict, size_t dictSize, /* dict can be released after init, a local copy is preserved within zcs */
|
||||
ZSTD_parameters params,
|
||||
unsigned long long pledgedSrcSize); /* pledgedSrcSize is optional and can be zero == unknown */
|
||||
|
||||
ZSTDLIB_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
|
||||
ZSTDMT_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
|
||||
const ZSTD_CDict* cdict,
|
||||
ZSTD_frameParameters fparams,
|
||||
unsigned long long pledgedSrcSize); /* note : zero means empty */
|
||||
@ -92,7 +109,7 @@ ZSTDLIB_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
|
||||
* List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
|
||||
typedef enum {
|
||||
ZSTDMT_p_jobSize, /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */
|
||||
ZSTDMT_p_overlapLog, /* Each job may reload a part of previous job to enhance compressionr ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */
|
||||
ZSTDMT_p_overlapLog, /* Each job may reload a part of previous job to enhance compression ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */
|
||||
ZSTDMT_p_rsyncable /* Enables rsyncable mode. */
|
||||
} ZSTDMT_parameter;
|
||||
|
||||
@ -101,12 +118,12 @@ typedef enum {
|
||||
* The function must be called typically after ZSTD_createCCtx() but __before ZSTDMT_init*() !__
|
||||
* Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions.
|
||||
* @return : 0, or an error code (which can be tested using ZSTD_isError()) */
|
||||
ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value);
|
||||
ZSTDMT_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value);
|
||||
|
||||
/* ZSTDMT_getMTCtxParameter() :
|
||||
* Query the ZSTDMT_CCtx for a parameter value.
|
||||
* @return : 0, or an error code (which can be tested using ZSTD_isError()) */
|
||||
ZSTDLIB_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value);
|
||||
ZSTDMT_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value);
|
||||
|
||||
|
||||
/*! ZSTDMT_compressStream_generic() :
|
||||
@ -116,7 +133,7 @@ ZSTDLIB_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter
|
||||
* 0 if fully flushed
|
||||
* or an error code
|
||||
* note : needs to be init using any ZSTD_initCStream*() variant */
|
||||
ZSTDLIB_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
||||
ZSTDMT_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
||||
ZSTD_outBuffer* output,
|
||||
ZSTD_inBuffer* input,
|
||||
ZSTD_EndDirective endOp);
|
||||
|
@ -105,9 +105,9 @@ ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
|
||||
ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
|
||||
|
||||
/* load entropy tables */
|
||||
CHECK_E( ZSTD_loadDEntropy(&ddict->entropy,
|
||||
ddict->dictContent, ddict->dictSize),
|
||||
dictionary_corrupted );
|
||||
RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
|
||||
&ddict->entropy, ddict->dictContent, ddict->dictSize)),
|
||||
dictionary_corrupted);
|
||||
ddict->entropyPresent = 1;
|
||||
return 0;
|
||||
}
|
||||
@ -133,7 +133,7 @@ static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
|
||||
ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
|
||||
|
||||
/* parse dictionary content */
|
||||
CHECK_F( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) );
|
||||
FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -106,6 +106,7 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
|
||||
dctx->ddictLocal = NULL;
|
||||
dctx->dictEnd = NULL;
|
||||
dctx->ddictIsCold = 0;
|
||||
dctx->dictUses = ZSTD_dont_use;
|
||||
dctx->inBuff = NULL;
|
||||
dctx->inBuffSize = 0;
|
||||
dctx->outBuffSize = 0;
|
||||
@ -147,13 +148,20 @@ ZSTD_DCtx* ZSTD_createDCtx(void)
|
||||
return ZSTD_createDCtx_advanced(ZSTD_defaultCMem);
|
||||
}
|
||||
|
||||
static void ZSTD_clearDict(ZSTD_DCtx* dctx)
|
||||
{
|
||||
ZSTD_freeDDict(dctx->ddictLocal);
|
||||
dctx->ddictLocal = NULL;
|
||||
dctx->ddict = NULL;
|
||||
dctx->dictUses = ZSTD_dont_use;
|
||||
}
|
||||
|
||||
size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
|
||||
{
|
||||
if (dctx==NULL) return 0; /* support free on NULL */
|
||||
if (dctx->staticSize) return ERROR(memory_allocation); /* not compatible with static DCtx */
|
||||
RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx");
|
||||
{ ZSTD_customMem const cMem = dctx->customMem;
|
||||
ZSTD_freeDDict(dctx->ddictLocal);
|
||||
dctx->ddictLocal = NULL;
|
||||
ZSTD_clearDict(dctx);
|
||||
ZSTD_free(dctx->inBuff, cMem);
|
||||
dctx->inBuff = NULL;
|
||||
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
|
||||
@ -203,7 +211,7 @@ unsigned ZSTD_isFrame(const void* buffer, size_t size)
|
||||
static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format)
|
||||
{
|
||||
size_t const minInputSize = ZSTD_startingInputLength(format);
|
||||
if (srcSize < minInputSize) return ERROR(srcSize_wrong);
|
||||
RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong);
|
||||
|
||||
{ BYTE const fhd = ((const BYTE*)src)[minInputSize-1];
|
||||
U32 const dictID= fhd & 3;
|
||||
@ -238,7 +246,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s
|
||||
|
||||
memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */
|
||||
if (srcSize < minInputSize) return minInputSize;
|
||||
if (src==NULL) return ERROR(GENERIC); /* invalid parameter */
|
||||
RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter");
|
||||
|
||||
if ( (format != ZSTD_f_zstd1_magicless)
|
||||
&& (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) {
|
||||
@ -251,7 +259,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s
|
||||
zfhPtr->frameType = ZSTD_skippableFrame;
|
||||
return 0;
|
||||
}
|
||||
return ERROR(prefix_unknown);
|
||||
RETURN_ERROR(prefix_unknown);
|
||||
}
|
||||
|
||||
/* ensure there is enough `srcSize` to fully read/decode frame header */
|
||||
@ -269,14 +277,13 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s
|
||||
U64 windowSize = 0;
|
||||
U32 dictID = 0;
|
||||
U64 frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN;
|
||||
if ((fhdByte & 0x08) != 0)
|
||||
return ERROR(frameParameter_unsupported); /* reserved bits, must be zero */
|
||||
RETURN_ERROR_IF((fhdByte & 0x08) != 0, frameParameter_unsupported,
|
||||
"reserved bits, must be zero");
|
||||
|
||||
if (!singleSegment) {
|
||||
BYTE const wlByte = ip[pos++];
|
||||
U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
|
||||
if (windowLog > ZSTD_WINDOWLOG_MAX)
|
||||
return ERROR(frameParameter_windowTooLarge);
|
||||
RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge);
|
||||
windowSize = (1ULL << windowLog);
|
||||
windowSize += (windowSize >> 3) * (wlByte&7);
|
||||
}
|
||||
@ -348,12 +355,11 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize)
|
||||
size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE;
|
||||
U32 sizeU32;
|
||||
|
||||
if (srcSize < ZSTD_SKIPPABLEHEADERSIZE)
|
||||
return ERROR(srcSize_wrong);
|
||||
RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong);
|
||||
|
||||
sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
|
||||
if ((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32)
|
||||
return ERROR(frameParameter_unsupported);
|
||||
RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
|
||||
frameParameter_unsupported);
|
||||
|
||||
return skippableHeaderSize + sizeU32;
|
||||
}
|
||||
@ -428,13 +434,89 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t he
|
||||
{
|
||||
size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format);
|
||||
if (ZSTD_isError(result)) return result; /* invalid header */
|
||||
if (result>0) return ERROR(srcSize_wrong); /* headerSize too small */
|
||||
if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID))
|
||||
return ERROR(dictionary_wrong);
|
||||
RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small");
|
||||
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
/* Skip the dictID check in fuzzing mode, because it makes the search
|
||||
* harder.
|
||||
*/
|
||||
RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID),
|
||||
dictionary_wrong);
|
||||
#endif
|
||||
if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret)
|
||||
{
|
||||
ZSTD_frameSizeInfo frameSizeInfo;
|
||||
frameSizeInfo.compressedSize = ret;
|
||||
frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
|
||||
return frameSizeInfo;
|
||||
}
|
||||
|
||||
static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize)
|
||||
{
|
||||
ZSTD_frameSizeInfo frameSizeInfo;
|
||||
memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo));
|
||||
|
||||
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
|
||||
if (ZSTD_isLegacy(src, srcSize))
|
||||
return ZSTD_findFrameSizeInfoLegacy(src, srcSize);
|
||||
#endif
|
||||
|
||||
if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
|
||||
&& (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
|
||||
frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize);
|
||||
return frameSizeInfo;
|
||||
} else {
|
||||
const BYTE* ip = (const BYTE*)src;
|
||||
const BYTE* const ipstart = ip;
|
||||
size_t remainingSize = srcSize;
|
||||
size_t nbBlocks = 0;
|
||||
ZSTD_frameHeader zfh;
|
||||
|
||||
/* Extract Frame Header */
|
||||
{ size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize);
|
||||
if (ZSTD_isError(ret))
|
||||
return ZSTD_errorFrameSizeInfo(ret);
|
||||
if (ret > 0)
|
||||
return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
|
||||
}
|
||||
|
||||
ip += zfh.headerSize;
|
||||
remainingSize -= zfh.headerSize;
|
||||
|
||||
/* Iterate over each block */
|
||||
while (1) {
|
||||
blockProperties_t blockProperties;
|
||||
size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
|
||||
if (ZSTD_isError(cBlockSize))
|
||||
return ZSTD_errorFrameSizeInfo(cBlockSize);
|
||||
|
||||
if (ZSTD_blockHeaderSize + cBlockSize > remainingSize)
|
||||
return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
|
||||
|
||||
ip += ZSTD_blockHeaderSize + cBlockSize;
|
||||
remainingSize -= ZSTD_blockHeaderSize + cBlockSize;
|
||||
nbBlocks++;
|
||||
|
||||
if (blockProperties.lastBlock) break;
|
||||
}
|
||||
|
||||
/* Final frame content checksum */
|
||||
if (zfh.checksumFlag) {
|
||||
if (remainingSize < 4)
|
||||
return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
|
||||
ip += 4;
|
||||
}
|
||||
|
||||
frameSizeInfo.compressedSize = ip - ipstart;
|
||||
frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN)
|
||||
? zfh.frameContentSize
|
||||
: nbBlocks * zfh.blockSizeMax;
|
||||
return frameSizeInfo;
|
||||
}
|
||||
}
|
||||
|
||||
/** ZSTD_findFrameCompressedSize() :
|
||||
* compatible with legacy mode
|
||||
@ -443,53 +525,34 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t he
|
||||
* @return : the compressed size of the frame starting at `src` */
|
||||
size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
|
||||
{
|
||||
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
|
||||
if (ZSTD_isLegacy(src, srcSize))
|
||||
return ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
|
||||
#endif
|
||||
if ( (srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
|
||||
&& (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START ) {
|
||||
return readSkippableFrameSize(src, srcSize);
|
||||
} else {
|
||||
const BYTE* ip = (const BYTE*)src;
|
||||
const BYTE* const ipstart = ip;
|
||||
size_t remainingSize = srcSize;
|
||||
ZSTD_frameHeader zfh;
|
||||
|
||||
/* Extract Frame Header */
|
||||
{ size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize);
|
||||
if (ZSTD_isError(ret)) return ret;
|
||||
if (ret > 0) return ERROR(srcSize_wrong);
|
||||
}
|
||||
|
||||
ip += zfh.headerSize;
|
||||
remainingSize -= zfh.headerSize;
|
||||
|
||||
/* Loop on each block */
|
||||
while (1) {
|
||||
blockProperties_t blockProperties;
|
||||
size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
|
||||
if (ZSTD_isError(cBlockSize)) return cBlockSize;
|
||||
|
||||
if (ZSTD_blockHeaderSize + cBlockSize > remainingSize)
|
||||
return ERROR(srcSize_wrong);
|
||||
|
||||
ip += ZSTD_blockHeaderSize + cBlockSize;
|
||||
remainingSize -= ZSTD_blockHeaderSize + cBlockSize;
|
||||
|
||||
if (blockProperties.lastBlock) break;
|
||||
}
|
||||
|
||||
if (zfh.checksumFlag) { /* Final frame content checksum */
|
||||
if (remainingSize < 4) return ERROR(srcSize_wrong);
|
||||
ip += 4;
|
||||
}
|
||||
|
||||
return ip - ipstart;
|
||||
}
|
||||
ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
|
||||
return frameSizeInfo.compressedSize;
|
||||
}
|
||||
|
||||
|
||||
/** ZSTD_decompressBound() :
|
||||
* compatible with legacy mode
|
||||
* `src` must point to the start of a ZSTD frame or a skippeable frame
|
||||
* `srcSize` must be at least as large as the frame contained
|
||||
* @return : the maximum decompressed size of the compressed source
|
||||
*/
|
||||
unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
|
||||
{
|
||||
unsigned long long bound = 0;
|
||||
/* Iterate over each frame */
|
||||
while (srcSize > 0) {
|
||||
ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
|
||||
size_t const compressedSize = frameSizeInfo.compressedSize;
|
||||
unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
|
||||
if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
|
||||
return ZSTD_CONTENTSIZE_ERROR;
|
||||
src = (const BYTE*)src + compressedSize;
|
||||
srcSize -= compressedSize;
|
||||
bound += decompressedBound;
|
||||
}
|
||||
return bound;
|
||||
}
|
||||
|
||||
|
||||
/*-*************************************************************
|
||||
* Frame decoding
|
||||
@ -522,9 +585,9 @@ static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity,
|
||||
DEBUGLOG(5, "ZSTD_copyRawBlock");
|
||||
if (dst == NULL) {
|
||||
if (srcSize == 0) return 0;
|
||||
return ERROR(dstBuffer_null);
|
||||
RETURN_ERROR(dstBuffer_null);
|
||||
}
|
||||
if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall);
|
||||
RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall);
|
||||
memcpy(dst, src, srcSize);
|
||||
return srcSize;
|
||||
}
|
||||
@ -535,9 +598,9 @@ static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity,
|
||||
{
|
||||
if (dst == NULL) {
|
||||
if (regenSize == 0) return 0;
|
||||
return ERROR(dstBuffer_null);
|
||||
RETURN_ERROR(dstBuffer_null);
|
||||
}
|
||||
if (regenSize > dstCapacity) return ERROR(dstSize_tooSmall);
|
||||
RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall);
|
||||
memset(dst, b, regenSize);
|
||||
return regenSize;
|
||||
}
|
||||
@ -560,15 +623,16 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
|
||||
DEBUGLOG(4, "ZSTD_decompressFrame (srcSize:%i)", (int)*srcSizePtr);
|
||||
|
||||
/* check */
|
||||
if (remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN+ZSTD_blockHeaderSize)
|
||||
return ERROR(srcSize_wrong);
|
||||
RETURN_ERROR_IF(
|
||||
remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN+ZSTD_blockHeaderSize,
|
||||
srcSize_wrong);
|
||||
|
||||
/* Frame Header */
|
||||
{ size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_FRAMEHEADERSIZE_PREFIX);
|
||||
if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
|
||||
if (remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize)
|
||||
return ERROR(srcSize_wrong);
|
||||
CHECK_F( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) );
|
||||
RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize,
|
||||
srcSize_wrong);
|
||||
FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) );
|
||||
ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize;
|
||||
}
|
||||
|
||||
@ -581,7 +645,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
|
||||
|
||||
ip += ZSTD_blockHeaderSize;
|
||||
remainingSrcSize -= ZSTD_blockHeaderSize;
|
||||
if (cBlockSize > remainingSrcSize) return ERROR(srcSize_wrong);
|
||||
RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong);
|
||||
|
||||
switch(blockProperties.blockType)
|
||||
{
|
||||
@ -596,7 +660,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
|
||||
break;
|
||||
case bt_reserved :
|
||||
default:
|
||||
return ERROR(corruption_detected);
|
||||
RETURN_ERROR(corruption_detected);
|
||||
}
|
||||
|
||||
if (ZSTD_isError(decodedSize)) return decodedSize;
|
||||
@ -609,15 +673,15 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
|
||||
}
|
||||
|
||||
if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) {
|
||||
if ((U64)(op-ostart) != dctx->fParams.frameContentSize) {
|
||||
return ERROR(corruption_detected);
|
||||
} }
|
||||
RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize,
|
||||
corruption_detected);
|
||||
}
|
||||
if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */
|
||||
U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState);
|
||||
U32 checkRead;
|
||||
if (remainingSrcSize<4) return ERROR(checksum_wrong);
|
||||
RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong);
|
||||
checkRead = MEM_readLE32(ip);
|
||||
if (checkRead != checkCalc) return ERROR(checksum_wrong);
|
||||
RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong);
|
||||
ip += 4;
|
||||
remainingSrcSize -= 4;
|
||||
}
|
||||
@ -652,8 +716,8 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
|
||||
size_t decodedSize;
|
||||
size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
|
||||
if (ZSTD_isError(frameSize)) return frameSize;
|
||||
/* legacy support is not compatible with static dctx */
|
||||
if (dctx->staticSize) return ERROR(memory_allocation);
|
||||
RETURN_ERROR_IF(dctx->staticSize, memory_allocation,
|
||||
"legacy support is not compatible with static dctx");
|
||||
|
||||
decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize);
|
||||
if (ZSTD_isError(decodedSize)) return decodedSize;
|
||||
@ -676,7 +740,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
|
||||
size_t const skippableSize = readSkippableFrameSize(src, srcSize);
|
||||
if (ZSTD_isError(skippableSize))
|
||||
return skippableSize;
|
||||
if (srcSize < skippableSize) return ERROR(srcSize_wrong);
|
||||
RETURN_ERROR_IF(srcSize < skippableSize, srcSize_wrong);
|
||||
|
||||
src = (const BYTE *)src + skippableSize;
|
||||
srcSize -= skippableSize;
|
||||
@ -685,29 +749,29 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
|
||||
|
||||
if (ddict) {
|
||||
/* we were called from ZSTD_decompress_usingDDict */
|
||||
CHECK_F(ZSTD_decompressBegin_usingDDict(dctx, ddict));
|
||||
FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict));
|
||||
} else {
|
||||
/* this will initialize correctly with no dict if dict == NULL, so
|
||||
* use this in all cases but ddict */
|
||||
CHECK_F(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize));
|
||||
FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize));
|
||||
}
|
||||
ZSTD_checkContinuity(dctx, dst);
|
||||
|
||||
{ const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity,
|
||||
&src, &srcSize);
|
||||
if ( (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown)
|
||||
&& (moreThan1Frame==1) ) {
|
||||
/* at least one frame successfully completed,
|
||||
* but following bytes are garbage :
|
||||
* it's more likely to be a srcSize error,
|
||||
* specifying more bytes than compressed size of frame(s).
|
||||
* This error message replaces ERROR(prefix_unknown),
|
||||
* which would be confusing, as the first header is actually correct.
|
||||
* Note that one could be unlucky, it might be a corruption error instead,
|
||||
* happening right at the place where we expect zstd magic bytes.
|
||||
* But this is _much_ less likely than a srcSize field error. */
|
||||
return ERROR(srcSize_wrong);
|
||||
}
|
||||
RETURN_ERROR_IF(
|
||||
(ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown)
|
||||
&& (moreThan1Frame==1),
|
||||
srcSize_wrong,
|
||||
"at least one frame successfully completed, but following "
|
||||
"bytes are garbage: it's more likely to be a srcSize error, "
|
||||
"specifying more bytes than compressed size of frame(s). This "
|
||||
"error message replaces ERROR(prefix_unknown), which would be "
|
||||
"confusing, as the first header is actually correct. Note that "
|
||||
"one could be unlucky, it might be a corruption error instead, "
|
||||
"happening right at the place where we expect zstd magic "
|
||||
"bytes. But this is _much_ less likely than a srcSize field "
|
||||
"error.");
|
||||
if (ZSTD_isError(res)) return res;
|
||||
assert(res <= dstCapacity);
|
||||
dst = (BYTE*)dst + res;
|
||||
@ -716,7 +780,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
|
||||
moreThan1Frame = 1;
|
||||
} /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */
|
||||
|
||||
if (srcSize) return ERROR(srcSize_wrong); /* input not entirely consumed */
|
||||
RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed");
|
||||
|
||||
return (BYTE*)dst - (BYTE*)dststart;
|
||||
}
|
||||
@ -730,9 +794,26 @@ size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
|
||||
}
|
||||
|
||||
|
||||
static ZSTD_DDict const* ZSTD_getDDict(ZSTD_DCtx* dctx)
|
||||
{
|
||||
switch (dctx->dictUses) {
|
||||
default:
|
||||
assert(0 /* Impossible */);
|
||||
/* fall-through */
|
||||
case ZSTD_dont_use:
|
||||
ZSTD_clearDict(dctx);
|
||||
return NULL;
|
||||
case ZSTD_use_indefinitely:
|
||||
return dctx->ddict;
|
||||
case ZSTD_use_once:
|
||||
dctx->dictUses = ZSTD_dont_use;
|
||||
return dctx->ddict;
|
||||
}
|
||||
}
|
||||
|
||||
size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
|
||||
{
|
||||
return ZSTD_decompress_usingDict(dctx, dst, dstCapacity, src, srcSize, NULL, 0);
|
||||
return ZSTD_decompress_usingDDict(dctx, dst, dstCapacity, src, srcSize, ZSTD_getDDict(dctx));
|
||||
}
|
||||
|
||||
|
||||
@ -741,7 +822,7 @@ size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t sr
|
||||
#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1)
|
||||
size_t regenSize;
|
||||
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
|
||||
if (dctx==NULL) return ERROR(memory_allocation);
|
||||
RETURN_ERROR_IF(dctx==NULL, memory_allocation);
|
||||
regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize);
|
||||
ZSTD_freeDCtx(dctx);
|
||||
return regenSize;
|
||||
@ -791,8 +872,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
|
||||
{
|
||||
DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize);
|
||||
/* Sanity check */
|
||||
if (srcSize != dctx->expected)
|
||||
return ERROR(srcSize_wrong); /* not allowed */
|
||||
RETURN_ERROR_IF(srcSize != dctx->expected, srcSize_wrong, "not allowed");
|
||||
if (dstCapacity) ZSTD_checkContinuity(dctx, dst);
|
||||
|
||||
switch (dctx->stage)
|
||||
@ -817,7 +897,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
|
||||
case ZSTDds_decodeFrameHeader:
|
||||
assert(src != NULL);
|
||||
memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize);
|
||||
CHECK_F(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize));
|
||||
FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize));
|
||||
dctx->expected = ZSTD_blockHeaderSize;
|
||||
dctx->stage = ZSTDds_decodeBlockHeader;
|
||||
return 0;
|
||||
@ -867,7 +947,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
|
||||
break;
|
||||
case bt_reserved : /* should never happen */
|
||||
default:
|
||||
return ERROR(corruption_detected);
|
||||
RETURN_ERROR(corruption_detected);
|
||||
}
|
||||
if (ZSTD_isError(rSize)) return rSize;
|
||||
DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize);
|
||||
@ -876,10 +956,10 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
|
||||
|
||||
if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */
|
||||
DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (unsigned)dctx->decodedSize);
|
||||
if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) {
|
||||
if (dctx->decodedSize != dctx->fParams.frameContentSize) {
|
||||
return ERROR(corruption_detected);
|
||||
} }
|
||||
RETURN_ERROR_IF(
|
||||
dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
|
||||
&& dctx->decodedSize != dctx->fParams.frameContentSize,
|
||||
corruption_detected);
|
||||
if (dctx->fParams.checksumFlag) { /* another round for frame checksum */
|
||||
dctx->expected = 4;
|
||||
dctx->stage = ZSTDds_checkChecksum;
|
||||
@ -900,7 +980,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
|
||||
{ U32 const h32 = (U32)XXH64_digest(&dctx->xxhState);
|
||||
U32 const check32 = MEM_readLE32(src);
|
||||
DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32);
|
||||
if (check32 != h32) return ERROR(checksum_wrong);
|
||||
RETURN_ERROR_IF(check32 != h32, checksum_wrong);
|
||||
dctx->expected = 0;
|
||||
dctx->stage = ZSTDds_getFrameHeaderSize;
|
||||
return 0;
|
||||
@ -921,7 +1001,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
|
||||
|
||||
default:
|
||||
assert(0); /* impossible */
|
||||
return ERROR(GENERIC); /* some compiler require default to do something */
|
||||
RETURN_ERROR(GENERIC); /* some compiler require default to do something */
|
||||
}
|
||||
}
|
||||
|
||||
@ -945,7 +1025,7 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
||||
const BYTE* dictPtr = (const BYTE*)dict;
|
||||
const BYTE* const dictEnd = dictPtr + dictSize;
|
||||
|
||||
if (dictSize <= 8) return ERROR(dictionary_corrupted);
|
||||
RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted);
|
||||
assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY); /* dict must be valid */
|
||||
dictPtr += 8; /* skip header = magic + dictID */
|
||||
|
||||
@ -964,16 +1044,16 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
||||
dictPtr, dictEnd - dictPtr,
|
||||
workspace, workspaceSize);
|
||||
#endif
|
||||
if (HUF_isError(hSize)) return ERROR(dictionary_corrupted);
|
||||
RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted);
|
||||
dictPtr += hSize;
|
||||
}
|
||||
|
||||
{ short offcodeNCount[MaxOff+1];
|
||||
unsigned offcodeMaxValue = MaxOff, offcodeLog;
|
||||
size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
|
||||
if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
|
||||
if (offcodeMaxValue > MaxOff) return ERROR(dictionary_corrupted);
|
||||
if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
|
||||
RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted);
|
||||
RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted);
|
||||
RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted);
|
||||
ZSTD_buildFSETable( entropy->OFTable,
|
||||
offcodeNCount, offcodeMaxValue,
|
||||
OF_base, OF_bits,
|
||||
@ -984,9 +1064,9 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
||||
{ short matchlengthNCount[MaxML+1];
|
||||
unsigned matchlengthMaxValue = MaxML, matchlengthLog;
|
||||
size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
|
||||
if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
|
||||
if (matchlengthMaxValue > MaxML) return ERROR(dictionary_corrupted);
|
||||
if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
|
||||
RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted);
|
||||
RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted);
|
||||
RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted);
|
||||
ZSTD_buildFSETable( entropy->MLTable,
|
||||
matchlengthNCount, matchlengthMaxValue,
|
||||
ML_base, ML_bits,
|
||||
@ -997,9 +1077,9 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
||||
{ short litlengthNCount[MaxLL+1];
|
||||
unsigned litlengthMaxValue = MaxLL, litlengthLog;
|
||||
size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
|
||||
if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
|
||||
if (litlengthMaxValue > MaxLL) return ERROR(dictionary_corrupted);
|
||||
if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
|
||||
RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted);
|
||||
RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted);
|
||||
RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted);
|
||||
ZSTD_buildFSETable( entropy->LLTable,
|
||||
litlengthNCount, litlengthMaxValue,
|
||||
LL_base, LL_bits,
|
||||
@ -1007,12 +1087,13 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
||||
dictPtr += litlengthHeaderSize;
|
||||
}
|
||||
|
||||
if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
|
||||
RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted);
|
||||
{ int i;
|
||||
size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12));
|
||||
for (i=0; i<3; i++) {
|
||||
U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4;
|
||||
if (rep==0 || rep >= dictContentSize) return ERROR(dictionary_corrupted);
|
||||
RETURN_ERROR_IF(rep==0 || rep >= dictContentSize,
|
||||
dictionary_corrupted);
|
||||
entropy->rep[i] = rep;
|
||||
} }
|
||||
|
||||
@ -1030,7 +1111,7 @@ static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict
|
||||
|
||||
/* load entropy tables */
|
||||
{ size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize);
|
||||
if (ZSTD_isError(eSize)) return ERROR(dictionary_corrupted);
|
||||
RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted);
|
||||
dict = (const char*)dict + eSize;
|
||||
dictSize -= eSize;
|
||||
}
|
||||
@ -1064,9 +1145,11 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
|
||||
|
||||
size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
|
||||
{
|
||||
CHECK_F( ZSTD_decompressBegin(dctx) );
|
||||
FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) );
|
||||
if (dict && dictSize)
|
||||
CHECK_E(ZSTD_decompress_insertDictionary(dctx, dict, dictSize), dictionary_corrupted);
|
||||
RETURN_ERROR_IF(
|
||||
ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)),
|
||||
dictionary_corrupted);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1085,7 +1168,7 @@ size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
|
||||
DEBUGLOG(4, "DDict is %s",
|
||||
dctx->ddictIsCold ? "~cold~" : "hot!");
|
||||
}
|
||||
CHECK_F( ZSTD_decompressBegin(dctx) );
|
||||
FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) );
|
||||
if (ddict) { /* NULL ddict is equivalent to no dictionary */
|
||||
ZSTD_copyDDictParameters(dctx, ddict);
|
||||
}
|
||||
@ -1104,7 +1187,7 @@ unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize)
|
||||
}
|
||||
|
||||
/*! ZSTD_getDictID_fromFrame() :
|
||||
* Provides the dictID required to decompresse frame stored within `src`.
|
||||
* Provides the dictID required to decompress frame stored within `src`.
|
||||
* If @return == 0, the dictID could not be decoded.
|
||||
* This could for one of the following reasons :
|
||||
* - The frame does not require a dictionary (most common case).
|
||||
@ -1176,15 +1259,14 @@ size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx,
|
||||
ZSTD_dictLoadMethod_e dictLoadMethod,
|
||||
ZSTD_dictContentType_e dictContentType)
|
||||
{
|
||||
if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
|
||||
ZSTD_freeDDict(dctx->ddictLocal);
|
||||
RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
|
||||
ZSTD_clearDict(dctx);
|
||||
if (dict && dictSize >= 8) {
|
||||
dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem);
|
||||
if (dctx->ddictLocal == NULL) return ERROR(memory_allocation);
|
||||
} else {
|
||||
dctx->ddictLocal = NULL;
|
||||
RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation);
|
||||
dctx->ddict = dctx->ddictLocal;
|
||||
dctx->dictUses = ZSTD_use_indefinitely;
|
||||
}
|
||||
dctx->ddict = dctx->ddictLocal;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1200,7 +1282,9 @@ size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSi
|
||||
|
||||
size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
|
||||
{
|
||||
return ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType);
|
||||
FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType));
|
||||
dctx->dictUses = ZSTD_use_once;
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize)
|
||||
@ -1215,9 +1299,8 @@ size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSiz
|
||||
size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
|
||||
{
|
||||
DEBUGLOG(4, "ZSTD_initDStream_usingDict");
|
||||
zds->streamStage = zdss_init;
|
||||
zds->noForwardProgress = 0;
|
||||
CHECK_F( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) );
|
||||
FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) );
|
||||
FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) );
|
||||
return ZSTD_FRAMEHEADERSIZE_PREFIX;
|
||||
}
|
||||
|
||||
@ -1225,7 +1308,7 @@ size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t di
|
||||
size_t ZSTD_initDStream(ZSTD_DStream* zds)
|
||||
{
|
||||
DEBUGLOG(4, "ZSTD_initDStream");
|
||||
return ZSTD_initDStream_usingDict(zds, NULL, 0);
|
||||
return ZSTD_initDStream_usingDDict(zds, NULL);
|
||||
}
|
||||
|
||||
/* ZSTD_initDStream_usingDDict() :
|
||||
@ -1233,9 +1316,9 @@ size_t ZSTD_initDStream(ZSTD_DStream* zds)
|
||||
* this function cannot fail */
|
||||
size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
|
||||
{
|
||||
size_t const initResult = ZSTD_initDStream(dctx);
|
||||
dctx->ddict = ddict;
|
||||
return initResult;
|
||||
FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) );
|
||||
FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) );
|
||||
return ZSTD_FRAMEHEADERSIZE_PREFIX;
|
||||
}
|
||||
|
||||
/* ZSTD_resetDStream() :
|
||||
@ -1243,19 +1326,19 @@ size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
|
||||
* this function cannot fail */
|
||||
size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
|
||||
{
|
||||
DEBUGLOG(4, "ZSTD_resetDStream");
|
||||
dctx->streamStage = zdss_loadHeader;
|
||||
dctx->lhSize = dctx->inPos = dctx->outStart = dctx->outEnd = 0;
|
||||
dctx->legacyVersion = 0;
|
||||
dctx->hostageByte = 0;
|
||||
FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only));
|
||||
return ZSTD_FRAMEHEADERSIZE_PREFIX;
|
||||
}
|
||||
|
||||
|
||||
size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
|
||||
{
|
||||
if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
|
||||
dctx->ddict = ddict;
|
||||
RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
|
||||
ZSTD_clearDict(dctx);
|
||||
if (ddict) {
|
||||
dctx->ddict = ddict;
|
||||
dctx->dictUses = ZSTD_use_indefinitely;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1267,9 +1350,9 @@ size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize)
|
||||
ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax);
|
||||
size_t const min = (size_t)1 << bounds.lowerBound;
|
||||
size_t const max = (size_t)1 << bounds.upperBound;
|
||||
if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
|
||||
if (maxWindowSize < min) return ERROR(parameter_outOfBound);
|
||||
if (maxWindowSize > max) return ERROR(parameter_outOfBound);
|
||||
RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
|
||||
RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound);
|
||||
RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound);
|
||||
dctx->maxWindowSize = maxWindowSize;
|
||||
return 0;
|
||||
}
|
||||
@ -1311,15 +1394,15 @@ static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value)
|
||||
}
|
||||
|
||||
#define CHECK_DBOUNDS(p,v) { \
|
||||
if (!ZSTD_dParam_withinBounds(p, v)) \
|
||||
return ERROR(parameter_outOfBound); \
|
||||
RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound); \
|
||||
}
|
||||
|
||||
size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value)
|
||||
{
|
||||
if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
|
||||
RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
|
||||
switch(dParam) {
|
||||
case ZSTD_d_windowLogMax:
|
||||
if (value == 0) value = ZSTD_WINDOWLOG_LIMIT_DEFAULT;
|
||||
CHECK_DBOUNDS(ZSTD_d_windowLogMax, value);
|
||||
dctx->maxWindowSize = ((size_t)1) << value;
|
||||
return 0;
|
||||
@ -1329,19 +1412,20 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value
|
||||
return 0;
|
||||
default:;
|
||||
}
|
||||
return ERROR(parameter_unsupported);
|
||||
RETURN_ERROR(parameter_unsupported);
|
||||
}
|
||||
|
||||
size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset)
|
||||
{
|
||||
if ( (reset == ZSTD_reset_session_only)
|
||||
|| (reset == ZSTD_reset_session_and_parameters) ) {
|
||||
(void)ZSTD_initDStream(dctx);
|
||||
dctx->streamStage = zdss_init;
|
||||
dctx->noForwardProgress = 0;
|
||||
}
|
||||
if ( (reset == ZSTD_reset_parameters)
|
||||
|| (reset == ZSTD_reset_session_and_parameters) ) {
|
||||
if (dctx->streamStage != zdss_init)
|
||||
return ERROR(stage_wrong);
|
||||
RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
|
||||
ZSTD_clearDict(dctx);
|
||||
dctx->format = ZSTD_f_zstd1;
|
||||
dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
|
||||
}
|
||||
@ -1360,7 +1444,8 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
|
||||
unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2);
|
||||
unsigned long long const neededSize = MIN(frameContentSize, neededRBSize);
|
||||
size_t const minRBSize = (size_t) neededSize;
|
||||
if ((unsigned long long)minRBSize != neededSize) return ERROR(frameParameter_windowTooLarge);
|
||||
RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize,
|
||||
frameParameter_windowTooLarge);
|
||||
return minRBSize;
|
||||
}
|
||||
|
||||
@ -1378,9 +1463,9 @@ size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize)
|
||||
ZSTD_frameHeader zfh;
|
||||
size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize);
|
||||
if (ZSTD_isError(err)) return err;
|
||||
if (err>0) return ERROR(srcSize_wrong);
|
||||
if (zfh.windowSize > windowSizeMax)
|
||||
return ERROR(frameParameter_windowTooLarge);
|
||||
RETURN_ERROR_IF(err>0, srcSize_wrong);
|
||||
RETURN_ERROR_IF(zfh.windowSize > windowSizeMax,
|
||||
frameParameter_windowTooLarge);
|
||||
return ZSTD_estimateDStreamSize((size_t)zfh.windowSize);
|
||||
}
|
||||
|
||||
@ -1406,16 +1491,16 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
U32 someMoreWork = 1;
|
||||
|
||||
DEBUGLOG(5, "ZSTD_decompressStream");
|
||||
if (input->pos > input->size) { /* forbidden */
|
||||
DEBUGLOG(5, "in: pos: %u vs size: %u",
|
||||
(U32)input->pos, (U32)input->size);
|
||||
return ERROR(srcSize_wrong);
|
||||
}
|
||||
if (output->pos > output->size) { /* forbidden */
|
||||
DEBUGLOG(5, "out: pos: %u vs size: %u",
|
||||
(U32)output->pos, (U32)output->size);
|
||||
return ERROR(dstSize_tooSmall);
|
||||
}
|
||||
RETURN_ERROR_IF(
|
||||
input->pos > input->size,
|
||||
srcSize_wrong,
|
||||
"forbidden. in: pos: %u vs size: %u",
|
||||
(U32)input->pos, (U32)input->size);
|
||||
RETURN_ERROR_IF(
|
||||
output->pos > output->size,
|
||||
dstSize_tooSmall,
|
||||
"forbidden. out: pos: %u vs size: %u",
|
||||
(U32)output->pos, (U32)output->size);
|
||||
DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos));
|
||||
|
||||
while (someMoreWork) {
|
||||
@ -1423,15 +1508,18 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
{
|
||||
case zdss_init :
|
||||
DEBUGLOG(5, "stage zdss_init => transparent reset ");
|
||||
ZSTD_resetDStream(zds); /* transparent reset on starting decoding a new frame */
|
||||
zds->streamStage = zdss_loadHeader;
|
||||
zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
|
||||
zds->legacyVersion = 0;
|
||||
zds->hostageByte = 0;
|
||||
/* fall-through */
|
||||
|
||||
case zdss_loadHeader :
|
||||
DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip));
|
||||
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
|
||||
if (zds->legacyVersion) {
|
||||
/* legacy support is incompatible with static dctx */
|
||||
if (zds->staticSize) return ERROR(memory_allocation);
|
||||
RETURN_ERROR_IF(zds->staticSize, memory_allocation,
|
||||
"legacy support is incompatible with static dctx");
|
||||
{ size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input);
|
||||
if (hint==0) zds->streamStage = zdss_init;
|
||||
return hint;
|
||||
@ -1443,12 +1531,13 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
|
||||
U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart);
|
||||
if (legacyVersion) {
|
||||
const void* const dict = zds->ddict ? ZSTD_DDict_dictContent(zds->ddict) : NULL;
|
||||
size_t const dictSize = zds->ddict ? ZSTD_DDict_dictSize(zds->ddict) : 0;
|
||||
ZSTD_DDict const* const ddict = ZSTD_getDDict(zds);
|
||||
const void* const dict = ddict ? ZSTD_DDict_dictContent(ddict) : NULL;
|
||||
size_t const dictSize = ddict ? ZSTD_DDict_dictSize(ddict) : 0;
|
||||
DEBUGLOG(5, "ZSTD_decompressStream: detected legacy version v0.%u", legacyVersion);
|
||||
/* legacy support is incompatible with static dctx */
|
||||
if (zds->staticSize) return ERROR(memory_allocation);
|
||||
CHECK_F(ZSTD_initLegacyStream(&zds->legacyContext,
|
||||
RETURN_ERROR_IF(zds->staticSize, memory_allocation,
|
||||
"legacy support is incompatible with static dctx");
|
||||
FORWARD_IF_ERROR(ZSTD_initLegacyStream(&zds->legacyContext,
|
||||
zds->previousLegacyVersion, legacyVersion,
|
||||
dict, dictSize));
|
||||
zds->legacyVersion = zds->previousLegacyVersion = legacyVersion;
|
||||
@ -1482,7 +1571,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend-istart);
|
||||
if (cSize <= (size_t)(iend-istart)) {
|
||||
/* shortcut : using single-pass mode */
|
||||
size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, oend-op, istart, cSize, zds->ddict);
|
||||
size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, oend-op, istart, cSize, ZSTD_getDDict(zds));
|
||||
if (ZSTD_isError(decompressedSize)) return decompressedSize;
|
||||
DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()")
|
||||
ip = istart + cSize;
|
||||
@ -1495,13 +1584,13 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
|
||||
/* Consume header (see ZSTDds_decodeFrameHeader) */
|
||||
DEBUGLOG(4, "Consume header");
|
||||
CHECK_F(ZSTD_decompressBegin_usingDDict(zds, zds->ddict));
|
||||
FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)));
|
||||
|
||||
if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
|
||||
zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE);
|
||||
zds->stage = ZSTDds_skipFrame;
|
||||
} else {
|
||||
CHECK_F(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize));
|
||||
FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize));
|
||||
zds->expected = ZSTD_blockHeaderSize;
|
||||
zds->stage = ZSTDds_decodeBlockHeader;
|
||||
}
|
||||
@ -1511,7 +1600,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
(U32)(zds->fParams.windowSize >>10),
|
||||
(U32)(zds->maxWindowSize >> 10) );
|
||||
zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
|
||||
if (zds->fParams.windowSize > zds->maxWindowSize) return ERROR(frameParameter_windowTooLarge);
|
||||
RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize,
|
||||
frameParameter_windowTooLarge);
|
||||
|
||||
/* Adapt buffer sizes to frame header instructions */
|
||||
{ size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */);
|
||||
@ -1525,14 +1615,15 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
if (zds->staticSize) { /* static DCtx */
|
||||
DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize);
|
||||
assert(zds->staticSize >= sizeof(ZSTD_DCtx)); /* controlled at init */
|
||||
if (bufferSize > zds->staticSize - sizeof(ZSTD_DCtx))
|
||||
return ERROR(memory_allocation);
|
||||
RETURN_ERROR_IF(
|
||||
bufferSize > zds->staticSize - sizeof(ZSTD_DCtx),
|
||||
memory_allocation);
|
||||
} else {
|
||||
ZSTD_free(zds->inBuff, zds->customMem);
|
||||
zds->inBuffSize = 0;
|
||||
zds->outBuffSize = 0;
|
||||
zds->inBuff = (char*)ZSTD_malloc(bufferSize, zds->customMem);
|
||||
if (zds->inBuff == NULL) return ERROR(memory_allocation);
|
||||
RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation);
|
||||
}
|
||||
zds->inBuffSize = neededInBuffSize;
|
||||
zds->outBuff = zds->inBuff + zds->inBuffSize;
|
||||
@ -1574,7 +1665,9 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
if (isSkipFrame) {
|
||||
loadedSize = MIN(toLoad, (size_t)(iend-ip));
|
||||
} else {
|
||||
if (toLoad > zds->inBuffSize - zds->inPos) return ERROR(corruption_detected); /* should never happen */
|
||||
RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos,
|
||||
corruption_detected,
|
||||
"should never happen");
|
||||
loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend-ip);
|
||||
}
|
||||
ip += loadedSize;
|
||||
@ -1615,7 +1708,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
|
||||
default:
|
||||
assert(0); /* impossible */
|
||||
return ERROR(GENERIC); /* some compiler require default to do something */
|
||||
RETURN_ERROR(GENERIC); /* some compiler require default to do something */
|
||||
} }
|
||||
|
||||
/* result */
|
||||
@ -1624,8 +1717,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
if ((ip==istart) && (op==ostart)) { /* no forward progress */
|
||||
zds->noForwardProgress ++;
|
||||
if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) {
|
||||
if (op==oend) return ERROR(dstSize_tooSmall);
|
||||
if (ip==iend) return ERROR(srcSize_wrong);
|
||||
RETURN_ERROR_IF(op==oend, dstSize_tooSmall);
|
||||
RETURN_ERROR_IF(ip==iend, srcSize_wrong);
|
||||
assert(0);
|
||||
}
|
||||
} else {
|
||||
|
@ -56,14 +56,15 @@ static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
|
||||
size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
|
||||
blockProperties_t* bpPtr)
|
||||
{
|
||||
if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
|
||||
RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong);
|
||||
|
||||
{ U32 const cBlockHeader = MEM_readLE24(src);
|
||||
U32 const cSize = cBlockHeader >> 3;
|
||||
bpPtr->lastBlock = cBlockHeader & 1;
|
||||
bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
|
||||
bpPtr->origSize = cSize; /* only useful for RLE */
|
||||
if (bpPtr->blockType == bt_rle) return 1;
|
||||
if (bpPtr->blockType == bt_reserved) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected);
|
||||
return cSize;
|
||||
}
|
||||
}
|
||||
@ -78,7 +79,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
||||
size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
||||
const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
|
||||
{
|
||||
if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected);
|
||||
|
||||
{ const BYTE* const istart = (const BYTE*) src;
|
||||
symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
|
||||
@ -86,11 +87,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
||||
switch(litEncType)
|
||||
{
|
||||
case set_repeat:
|
||||
if (dctx->litEntropy==0) return ERROR(dictionary_corrupted);
|
||||
RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted);
|
||||
/* fall-through */
|
||||
|
||||
case set_compressed:
|
||||
if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */
|
||||
RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
|
||||
{ size_t lhSize, litSize, litCSize;
|
||||
U32 singleStream=0;
|
||||
U32 const lhlCode = (istart[0] >> 2) & 3;
|
||||
@ -118,8 +119,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
||||
litCSize = (lhc >> 22) + (istart[4] << 10);
|
||||
break;
|
||||
}
|
||||
if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
|
||||
if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected);
|
||||
RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected);
|
||||
|
||||
/* prefetch huffman table if cold */
|
||||
if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
|
||||
@ -157,7 +158,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
||||
}
|
||||
}
|
||||
|
||||
if (HUF_isError(hufSuccess)) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected);
|
||||
|
||||
dctx->litPtr = dctx->litBuffer;
|
||||
dctx->litSize = litSize;
|
||||
@ -187,7 +188,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
||||
}
|
||||
|
||||
if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
|
||||
if (litSize+lhSize > srcSize) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected);
|
||||
memcpy(dctx->litBuffer, istart+lhSize, litSize);
|
||||
dctx->litPtr = dctx->litBuffer;
|
||||
dctx->litSize = litSize;
|
||||
@ -216,17 +217,17 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
||||
case 3:
|
||||
lhSize = 3;
|
||||
litSize = MEM_readLE24(istart) >> 4;
|
||||
if (srcSize<4) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */
|
||||
RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
|
||||
break;
|
||||
}
|
||||
if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected);
|
||||
memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
|
||||
dctx->litPtr = dctx->litBuffer;
|
||||
dctx->litSize = litSize;
|
||||
return lhSize+1;
|
||||
}
|
||||
default:
|
||||
return ERROR(corruption_detected); /* impossible */
|
||||
RETURN_ERROR(corruption_detected, "impossible");
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -436,8 +437,8 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
||||
switch(type)
|
||||
{
|
||||
case set_rle :
|
||||
if (!srcSize) return ERROR(srcSize_wrong);
|
||||
if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(!srcSize, srcSize_wrong);
|
||||
RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected);
|
||||
{ U32 const symbol = *(const BYTE*)src;
|
||||
U32 const baseline = baseValue[symbol];
|
||||
U32 const nbBits = nbAdditionalBits[symbol];
|
||||
@ -449,7 +450,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
||||
*DTablePtr = defaultTable;
|
||||
return 0;
|
||||
case set_repeat:
|
||||
if (!flagRepeatTable) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(!flagRepeatTable, corruption_detected);
|
||||
/* prefetch FSE table if used */
|
||||
if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
|
||||
const void* const pStart = *DTablePtr;
|
||||
@ -461,15 +462,15 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
||||
{ unsigned tableLog;
|
||||
S16 norm[MaxSeq+1];
|
||||
size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
|
||||
if (FSE_isError(headerSize)) return ERROR(corruption_detected);
|
||||
if (tableLog > maxLog) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected);
|
||||
RETURN_ERROR_IF(tableLog > maxLog, corruption_detected);
|
||||
ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
|
||||
*DTablePtr = DTableSpace;
|
||||
return headerSize;
|
||||
}
|
||||
default : /* impossible */
|
||||
default :
|
||||
assert(0);
|
||||
return ERROR(GENERIC);
|
||||
RETURN_ERROR(GENERIC, "impossible");
|
||||
}
|
||||
}
|
||||
|
||||
@ -483,28 +484,28 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
||||
DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
|
||||
|
||||
/* check */
|
||||
if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong);
|
||||
RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong);
|
||||
|
||||
/* SeqHead */
|
||||
nbSeq = *ip++;
|
||||
if (!nbSeq) {
|
||||
*nbSeqPtr=0;
|
||||
if (srcSize != 1) return ERROR(srcSize_wrong);
|
||||
RETURN_ERROR_IF(srcSize != 1, srcSize_wrong);
|
||||
return 1;
|
||||
}
|
||||
if (nbSeq > 0x7F) {
|
||||
if (nbSeq == 0xFF) {
|
||||
if (ip+2 > iend) return ERROR(srcSize_wrong);
|
||||
RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong);
|
||||
nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
|
||||
} else {
|
||||
if (ip >= iend) return ERROR(srcSize_wrong);
|
||||
RETURN_ERROR_IF(ip >= iend, srcSize_wrong);
|
||||
nbSeq = ((nbSeq-0x80)<<8) + *ip++;
|
||||
}
|
||||
}
|
||||
*nbSeqPtr = nbSeq;
|
||||
|
||||
/* FSE table descriptors */
|
||||
if (ip+4 > iend) return ERROR(srcSize_wrong); /* minimum possible size */
|
||||
RETURN_ERROR_IF(ip+4 > iend, srcSize_wrong); /* minimum possible size */
|
||||
{ symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
|
||||
symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
|
||||
symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
|
||||
@ -517,7 +518,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
||||
LL_base, LL_bits,
|
||||
LL_defaultDTable, dctx->fseEntropy,
|
||||
dctx->ddictIsCold, nbSeq);
|
||||
if (ZSTD_isError(llhSize)) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected);
|
||||
ip += llhSize;
|
||||
}
|
||||
|
||||
@ -527,7 +528,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
||||
OF_base, OF_bits,
|
||||
OF_defaultDTable, dctx->fseEntropy,
|
||||
dctx->ddictIsCold, nbSeq);
|
||||
if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected);
|
||||
ip += ofhSize;
|
||||
}
|
||||
|
||||
@ -537,7 +538,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
||||
ML_base, ML_bits,
|
||||
ML_defaultDTable, dctx->fseEntropy,
|
||||
dctx->ddictIsCold, nbSeq);
|
||||
if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected);
|
||||
ip += mlhSize;
|
||||
}
|
||||
}
|
||||
@ -590,8 +591,8 @@ size_t ZSTD_execSequenceLast7(BYTE* op,
|
||||
const BYTE* match = oLitEnd - sequence.offset;
|
||||
|
||||
/* check */
|
||||
if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must fit within dstBuffer */
|
||||
if (iLitEnd > litLimit) return ERROR(corruption_detected); /* try to read beyond literal buffer */
|
||||
RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must fit within dstBuffer");
|
||||
RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "try to read beyond literal buffer");
|
||||
|
||||
/* copy literals */
|
||||
while (op < oLitEnd) *op++ = *(*litPtr)++;
|
||||
@ -599,7 +600,7 @@ size_t ZSTD_execSequenceLast7(BYTE* op,
|
||||
/* copy Match */
|
||||
if (sequence.offset > (size_t)(oLitEnd - base)) {
|
||||
/* offset beyond prefix */
|
||||
if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - vBase),corruption_detected);
|
||||
match = dictEnd - (base-match);
|
||||
if (match + sequence.matchLength <= dictEnd) {
|
||||
memmove(oLitEnd, match, sequence.matchLength);
|
||||
@ -631,8 +632,8 @@ size_t ZSTD_execSequence(BYTE* op,
|
||||
const BYTE* match = oLitEnd - sequence.offset;
|
||||
|
||||
/* check */
|
||||
if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
|
||||
if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
|
||||
RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
|
||||
RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
|
||||
if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
|
||||
|
||||
/* copy Literals */
|
||||
@ -645,8 +646,7 @@ size_t ZSTD_execSequence(BYTE* op,
|
||||
/* copy Match */
|
||||
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
||||
/* offset beyond prefix -> go into extDict */
|
||||
if (sequence.offset > (size_t)(oLitEnd - virtualStart))
|
||||
return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
|
||||
match = dictEnd + (match - prefixStart);
|
||||
if (match + sequence.matchLength <= dictEnd) {
|
||||
memmove(oLitEnd, match, sequence.matchLength);
|
||||
@ -712,8 +712,8 @@ size_t ZSTD_execSequenceLong(BYTE* op,
|
||||
const BYTE* match = sequence.match;
|
||||
|
||||
/* check */
|
||||
if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
|
||||
if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
|
||||
RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
|
||||
RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
|
||||
if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
|
||||
|
||||
/* copy Literals */
|
||||
@ -726,7 +726,7 @@ size_t ZSTD_execSequenceLong(BYTE* op,
|
||||
/* copy Match */
|
||||
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
||||
/* offset beyond prefix */
|
||||
if (sequence.offset > (size_t)(oLitEnd - dictStart)) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - dictStart), corruption_detected);
|
||||
if (match + sequence.matchLength <= dictEnd) {
|
||||
memmove(oLitEnd, match, sequence.matchLength);
|
||||
return sequenceLength;
|
||||
@ -801,7 +801,7 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
|
||||
/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
|
||||
* offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
|
||||
* bits before reloading. This value is the maximum number of bytes we read
|
||||
* after reloading when we are decoding long offets.
|
||||
* after reloading when we are decoding long offsets.
|
||||
*/
|
||||
#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
|
||||
(ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
|
||||
@ -911,7 +911,9 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
||||
seqState_t seqState;
|
||||
dctx->fseEntropy = 1;
|
||||
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
|
||||
CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
|
||||
RETURN_ERROR_IF(
|
||||
ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
|
||||
corruption_detected);
|
||||
ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
|
||||
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
|
||||
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
|
||||
@ -927,14 +929,14 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
||||
|
||||
/* check if reached exact end */
|
||||
DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
|
||||
if (nbSeq) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(nbSeq, corruption_detected);
|
||||
/* save reps for next block */
|
||||
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
|
||||
}
|
||||
|
||||
/* last literal segment */
|
||||
{ size_t const lastLLSize = litEnd - litPtr;
|
||||
if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
|
||||
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall);
|
||||
memcpy(op, litPtr, lastLLSize);
|
||||
op += lastLLSize;
|
||||
}
|
||||
@ -1066,7 +1068,9 @@ ZSTD_decompressSequencesLong_body(
|
||||
seqState.pos = (size_t)(op-prefixStart);
|
||||
seqState.dictEnd = dictEnd;
|
||||
assert(iend >= ip);
|
||||
CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
|
||||
RETURN_ERROR_IF(
|
||||
ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
|
||||
corruption_detected);
|
||||
ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
|
||||
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
|
||||
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
|
||||
@ -1076,7 +1080,7 @@ ZSTD_decompressSequencesLong_body(
|
||||
sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
|
||||
PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
|
||||
}
|
||||
if (seqNb<seqAdvance) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected);
|
||||
|
||||
/* decode and decompress */
|
||||
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
|
||||
@ -1087,7 +1091,7 @@ ZSTD_decompressSequencesLong_body(
|
||||
sequences[seqNb & STORED_SEQS_MASK] = sequence;
|
||||
op += oneSeqSize;
|
||||
}
|
||||
if (seqNb<nbSeq) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected);
|
||||
|
||||
/* finish queue */
|
||||
seqNb -= seqAdvance;
|
||||
@ -1103,7 +1107,7 @@ ZSTD_decompressSequencesLong_body(
|
||||
|
||||
/* last literal segment */
|
||||
{ size_t const lastLLSize = litEnd - litPtr;
|
||||
if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
|
||||
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall);
|
||||
memcpy(op, litPtr, lastLLSize);
|
||||
op += lastLLSize;
|
||||
}
|
||||
@ -1176,7 +1180,7 @@ ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
|
||||
/* ZSTD_decompressSequencesLong() :
|
||||
* decompression function triggered when a minimum share of offsets is considered "long",
|
||||
* aka out of cache.
|
||||
* note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes mearning "farther than memory cache distance".
|
||||
* note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
|
||||
* This function will try to mitigate main memory latency through the use of prefetching */
|
||||
static size_t
|
||||
ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
|
||||
@ -1240,7 +1244,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
||||
ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
|
||||
DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
|
||||
|
||||
if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);
|
||||
RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong);
|
||||
|
||||
/* Decode literals section */
|
||||
{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
|
||||
|
@ -89,6 +89,12 @@ typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
|
||||
typedef enum { zdss_init=0, zdss_loadHeader,
|
||||
zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
|
||||
|
||||
typedef enum {
|
||||
ZSTD_use_indefinitely = -1, /* Use the dictionary indefinitely */
|
||||
ZSTD_dont_use = 0, /* Do not use the dictionary (if one exists free it) */
|
||||
ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */
|
||||
} ZSTD_dictUses_e;
|
||||
|
||||
struct ZSTD_DCtx_s
|
||||
{
|
||||
const ZSTD_seqSymbol* LLTptr;
|
||||
@ -123,6 +129,7 @@ struct ZSTD_DCtx_s
|
||||
const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */
|
||||
U32 dictID;
|
||||
int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
|
||||
ZSTD_dictUses_e dictUses;
|
||||
|
||||
/* streaming */
|
||||
ZSTD_dStreamStage streamStage;
|
||||
|
@ -391,7 +391,7 @@ static void COVER_group(COVER_ctx_t *ctx, const void *group,
|
||||
*
|
||||
* Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
|
||||
*
|
||||
* Once the dmer d is in the dictionay we set F(d) = 0.
|
||||
* Once the dmer d is in the dictionary we set F(d) = 0.
|
||||
*/
|
||||
static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
|
||||
COVER_map_t *activeDmers, U32 begin,
|
||||
@ -435,7 +435,7 @@ static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
|
||||
U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer);
|
||||
activeSegment.begin += 1;
|
||||
*delDmerOcc -= 1;
|
||||
/* If this is the last occurence of the dmer, subtract its score */
|
||||
/* If this is the last occurrence of the dmer, subtract its score */
|
||||
if (*delDmerOcc == 0) {
|
||||
COVER_map_remove(activeDmers, delDmer);
|
||||
activeSegment.score -= freqs[delDmer];
|
||||
@ -627,6 +627,39 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
||||
return 1;
|
||||
}
|
||||
|
||||
void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
|
||||
{
|
||||
const double ratio = (double)nbDmers / maxDictSize;
|
||||
if (ratio >= 10) {
|
||||
return;
|
||||
}
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1,
|
||||
"WARNING: The maximum dictionary size %u is too large "
|
||||
"compared to the source size %u! "
|
||||
"size(source)/size(dictionary) = %f, but it should be >= "
|
||||
"10! This may lead to a subpar dictionary! We recommend "
|
||||
"training on sources at least 10x, and up to 100x the "
|
||||
"size of the dictionary!\n", (U32)maxDictSize,
|
||||
(U32)nbDmers, ratio);
|
||||
}
|
||||
|
||||
COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize,
|
||||
U32 nbDmers, U32 k, U32 passes)
|
||||
{
|
||||
const U32 minEpochSize = k * 10;
|
||||
COVER_epoch_info_t epochs;
|
||||
epochs.num = MAX(1, maxDictSize / k / passes);
|
||||
epochs.size = nbDmers / epochs.num;
|
||||
if (epochs.size >= minEpochSize) {
|
||||
assert(epochs.size * epochs.num <= nbDmers);
|
||||
return epochs;
|
||||
}
|
||||
epochs.size = MIN(minEpochSize, nbDmers);
|
||||
epochs.num = nbDmers / epochs.size;
|
||||
assert(epochs.size * epochs.num <= nbDmers);
|
||||
return epochs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Given the prepared context build the dictionary.
|
||||
*/
|
||||
@ -636,28 +669,34 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
|
||||
ZDICT_cover_params_t parameters) {
|
||||
BYTE *const dict = (BYTE *)dictBuffer;
|
||||
size_t tail = dictBufferCapacity;
|
||||
/* Divide the data up into epochs of equal size.
|
||||
* We will select at least one segment from each epoch.
|
||||
*/
|
||||
const unsigned epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k / 4));
|
||||
const unsigned epochSize = (U32)(ctx->suffixSize / epochs);
|
||||
/* Divide the data into epochs. We will select one segment from each epoch. */
|
||||
const COVER_epoch_info_t epochs = COVER_computeEpochs(
|
||||
(U32)dictBufferCapacity, (U32)ctx->suffixSize, parameters.k, 4);
|
||||
const size_t maxZeroScoreRun = MAX(10, MIN(100, epochs.num >> 3));
|
||||
size_t zeroScoreRun = 0;
|
||||
size_t epoch;
|
||||
DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n",
|
||||
epochs, epochSize);
|
||||
(U32)epochs.num, (U32)epochs.size);
|
||||
/* Loop through the epochs until there are no more segments or the dictionary
|
||||
* is full.
|
||||
*/
|
||||
for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) {
|
||||
const U32 epochBegin = (U32)(epoch * epochSize);
|
||||
const U32 epochEnd = epochBegin + epochSize;
|
||||
for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) {
|
||||
const U32 epochBegin = (U32)(epoch * epochs.size);
|
||||
const U32 epochEnd = epochBegin + epochs.size;
|
||||
size_t segmentSize;
|
||||
/* Select a segment */
|
||||
COVER_segment_t segment = COVER_selectSegment(
|
||||
ctx, freqs, activeDmers, epochBegin, epochEnd, parameters);
|
||||
/* If the segment covers no dmers, then we are out of content */
|
||||
/* If the segment covers no dmers, then we are out of content.
|
||||
* There may be new content in other epochs, for continue for some time.
|
||||
*/
|
||||
if (segment.score == 0) {
|
||||
break;
|
||||
if (++zeroScoreRun >= maxZeroScoreRun) {
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
zeroScoreRun = 0;
|
||||
/* Trim the segment if necessary and if it is too small then we are done */
|
||||
segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
|
||||
if (segmentSize < parameters.d) {
|
||||
@ -706,6 +745,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
||||
parameters.d, parameters.splitPoint)) {
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel);
|
||||
if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
|
||||
DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
|
||||
COVER_ctx_destroy(&ctx);
|
||||
@ -977,6 +1017,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
||||
unsigned k;
|
||||
COVER_best_t best;
|
||||
POOL_ctx *pool = NULL;
|
||||
int warned = 0;
|
||||
|
||||
/* Checks */
|
||||
if (splitPoint <= 0 || splitPoint > 1) {
|
||||
@ -1019,6 +1060,10 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
||||
POOL_free(pool);
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
if (!warned) {
|
||||
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel);
|
||||
warned = 1;
|
||||
}
|
||||
/* Loop through k reusing the same context */
|
||||
for (k = kMinK; k <= kMaxK; k += kStepSize) {
|
||||
/* Prepare the arguments */
|
||||
|
@ -38,6 +38,35 @@ typedef struct {
|
||||
U32 score;
|
||||
} COVER_segment_t;
|
||||
|
||||
/**
|
||||
*Number of epochs and size of each epoch.
|
||||
*/
|
||||
typedef struct {
|
||||
U32 num;
|
||||
U32 size;
|
||||
} COVER_epoch_info_t;
|
||||
|
||||
/**
|
||||
* Computes the number of epochs and the size of each epoch.
|
||||
* We will make sure that each epoch gets at least 10 * k bytes.
|
||||
*
|
||||
* The COVER algorithms divide the data up into epochs of equal size and
|
||||
* select one segment from each epoch.
|
||||
*
|
||||
* @param maxDictSize The maximum allowed dictionary size.
|
||||
* @param nbDmers The number of dmers we are training on.
|
||||
* @param k The parameter k (segment size).
|
||||
* @param passes The target number of passes over the dmer corpus.
|
||||
* More passes means a better dictionary.
|
||||
*/
|
||||
COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, U32 nbDmers,
|
||||
U32 k, U32 passes);
|
||||
|
||||
/**
|
||||
* Warns the user when their corpus is too small.
|
||||
*/
|
||||
void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel);
|
||||
|
||||
/**
|
||||
* Checks total compressed size of a dictionary
|
||||
*/
|
||||
|
@ -132,7 +132,7 @@ typedef struct {
|
||||
*
|
||||
* Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
|
||||
*
|
||||
* Once the dmer with hash value d is in the dictionay we set F(d) = 0.
|
||||
* Once the dmer with hash value d is in the dictionary we set F(d) = 0.
|
||||
*/
|
||||
static COVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx,
|
||||
U32 *freqs, U32 begin, U32 end,
|
||||
@ -161,7 +161,7 @@ static COVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx,
|
||||
/* Get hash value of current dmer */
|
||||
const size_t idx = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, f, d);
|
||||
|
||||
/* Add frequency of this index to score if this is the first occurence of index in active segment */
|
||||
/* Add frequency of this index to score if this is the first occurrence of index in active segment */
|
||||
if (segmentFreqs[idx] == 0) {
|
||||
activeSegment.score += freqs[idx];
|
||||
}
|
||||
@ -386,29 +386,35 @@ FASTCOVER_buildDictionary(const FASTCOVER_ctx_t* ctx,
|
||||
{
|
||||
BYTE *const dict = (BYTE *)dictBuffer;
|
||||
size_t tail = dictBufferCapacity;
|
||||
/* Divide the data up into epochs of equal size.
|
||||
* We will select at least one segment from each epoch.
|
||||
*/
|
||||
const unsigned epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k));
|
||||
const unsigned epochSize = (U32)(ctx->nbDmers / epochs);
|
||||
/* Divide the data into epochs. We will select one segment from each epoch. */
|
||||
const COVER_epoch_info_t epochs = COVER_computeEpochs(
|
||||
(U32)dictBufferCapacity, (U32)ctx->nbDmers, parameters.k, 1);
|
||||
const size_t maxZeroScoreRun = 10;
|
||||
size_t zeroScoreRun = 0;
|
||||
size_t epoch;
|
||||
DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n",
|
||||
epochs, epochSize);
|
||||
(U32)epochs.num, (U32)epochs.size);
|
||||
/* Loop through the epochs until there are no more segments or the dictionary
|
||||
* is full.
|
||||
*/
|
||||
for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) {
|
||||
const U32 epochBegin = (U32)(epoch * epochSize);
|
||||
const U32 epochEnd = epochBegin + epochSize;
|
||||
for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) {
|
||||
const U32 epochBegin = (U32)(epoch * epochs.size);
|
||||
const U32 epochEnd = epochBegin + epochs.size;
|
||||
size_t segmentSize;
|
||||
/* Select a segment */
|
||||
COVER_segment_t segment = FASTCOVER_selectSegment(
|
||||
ctx, freqs, epochBegin, epochEnd, parameters, segmentFreqs);
|
||||
|
||||
/* If the segment covers no dmers, then we are out of content */
|
||||
/* If the segment covers no dmers, then we are out of content.
|
||||
* There may be new content in other epochs, for continue for some time.
|
||||
*/
|
||||
if (segment.score == 0) {
|
||||
break;
|
||||
if (++zeroScoreRun >= maxZeroScoreRun) {
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
zeroScoreRun = 0;
|
||||
|
||||
/* Trim the segment if necessary and if it is too small then we are done */
|
||||
segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
|
||||
@ -564,6 +570,7 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
|
||||
DISPLAYLEVEL(1, "Failed to initialize context\n");
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel);
|
||||
/* Build the dictionary */
|
||||
DISPLAYLEVEL(2, "Building dictionary\n");
|
||||
{
|
||||
@ -616,6 +623,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
||||
unsigned k;
|
||||
COVER_best_t best;
|
||||
POOL_ctx *pool = NULL;
|
||||
int warned = 0;
|
||||
/* Checks */
|
||||
if (splitPoint <= 0 || splitPoint > 1) {
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n");
|
||||
@ -664,6 +672,10 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
||||
POOL_free(pool);
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
if (!warned) {
|
||||
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel);
|
||||
warned = 1;
|
||||
}
|
||||
/* Loop through k reusing the same context */
|
||||
for (k = kMinK; k <= kMaxK; k += kStepSize) {
|
||||
/* Prepare the arguments */
|
||||
|
@ -46,7 +46,12 @@ extern "C" {
|
||||
* The resulting dictionary will be saved into `dictBuffer`.
|
||||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
* or an error code, which can be tested with ZDICT_isError().
|
||||
* Note: ZDICT_trainFromBuffer() requires about 9 bytes of memory for each input byte.
|
||||
* Note: Dictionary training will fail if there are not enough samples to construct a
|
||||
* dictionary, or if most of the samples are too small (< 8 bytes being the lower limit).
|
||||
* If dictionary training fails, you should use zstd without a dictionary, as the dictionary
|
||||
* would've been ineffective anyways. If you believe your samples would benefit from a dictionary
|
||||
* please open an issue with details, and we can look into it.
|
||||
* Note: ZDICT_trainFromBuffer()'s memory usage is about 6 MB.
|
||||
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
||||
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
|
||||
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
|
||||
@ -110,6 +115,7 @@ typedef struct {
|
||||
* The resulting dictionary will be saved into `dictBuffer`.
|
||||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
* or an error code, which can be tested with ZDICT_isError().
|
||||
* See ZDICT_trainFromBuffer() for details on failure modes.
|
||||
* Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
|
||||
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
||||
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
|
||||
@ -133,8 +139,9 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
||||
* If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
|
||||
*
|
||||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
* or an error code, which can be tested with ZDICT_isError().
|
||||
* On success `*parameters` contains the parameters selected.
|
||||
* or an error code, which can be tested with ZDICT_isError().
|
||||
* On success `*parameters` contains the parameters selected.
|
||||
* See ZDICT_trainFromBuffer() for details on failure modes.
|
||||
* Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
|
||||
*/
|
||||
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
||||
@ -151,7 +158,8 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
||||
* The resulting dictionary will be saved into `dictBuffer`.
|
||||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
* or an error code, which can be tested with ZDICT_isError().
|
||||
* Note: ZDICT_trainFromBuffer_fastCover() requires about 1 bytes of memory for each input byte and additionally another 6 * 2^f bytes of memory .
|
||||
* See ZDICT_trainFromBuffer() for details on failure modes.
|
||||
* Note: ZDICT_trainFromBuffer_fastCover() requires 6 * 2^f bytes of memory.
|
||||
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
||||
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
|
||||
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
|
||||
@ -175,9 +183,10 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer,
|
||||
* If accel is zero, default value of 1 is used.
|
||||
*
|
||||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
* or an error code, which can be tested with ZDICT_isError().
|
||||
* On success `*parameters` contains the parameters selected.
|
||||
* Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 1 byte of memory for each input byte and additionally another 6 * 2^f bytes of memory for each thread.
|
||||
* or an error code, which can be tested with ZDICT_isError().
|
||||
* On success `*parameters` contains the parameters selected.
|
||||
* See ZDICT_trainFromBuffer() for details on failure modes.
|
||||
* Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 6 * 2^f bytes of memory for each thread.
|
||||
*/
|
||||
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
|
||||
size_t dictBufferCapacity, const void* samplesBuffer,
|
||||
@ -195,7 +204,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
|
||||
* maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
|
||||
*
|
||||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
|
||||
* or an error code, which can be tested by ZDICT_isError().
|
||||
* or an error code, which can be tested by ZDICT_isError().
|
||||
* Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
|
||||
* Note 2: dictBuffer and dictContent can overlap
|
||||
*/
|
||||
@ -219,6 +228,7 @@ typedef struct {
|
||||
* `parameters` is optional and can be provided with values set to 0 to mean "default".
|
||||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
* or an error code, which can be tested with ZDICT_isError().
|
||||
* See ZDICT_trainFromBuffer() for details on failure modes.
|
||||
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
||||
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
|
||||
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
|
||||
|
@ -20,7 +20,7 @@ extern "C" {
|
||||
***************************************/
|
||||
#include "mem.h" /* MEM_STATIC */
|
||||
#include "error_private.h" /* ERROR */
|
||||
#include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer */
|
||||
#include "zstd_internal.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTD_frameSizeInfo */
|
||||
|
||||
#if !defined (ZSTD_LEGACY_SUPPORT) || (ZSTD_LEGACY_SUPPORT == 0)
|
||||
# undef ZSTD_LEGACY_SUPPORT
|
||||
@ -178,43 +178,73 @@ MEM_STATIC size_t ZSTD_decompressLegacy(
|
||||
}
|
||||
}
|
||||
|
||||
MEM_STATIC size_t ZSTD_findFrameCompressedSizeLegacy(const void *src,
|
||||
size_t compressedSize)
|
||||
MEM_STATIC ZSTD_frameSizeInfo ZSTD_findFrameSizeInfoLegacy(const void *src, size_t srcSize)
|
||||
{
|
||||
U32 const version = ZSTD_isLegacy(src, compressedSize);
|
||||
ZSTD_frameSizeInfo frameSizeInfo;
|
||||
U32 const version = ZSTD_isLegacy(src, srcSize);
|
||||
switch(version)
|
||||
{
|
||||
#if (ZSTD_LEGACY_SUPPORT <= 1)
|
||||
case 1 :
|
||||
return ZSTDv01_findFrameCompressedSize(src, compressedSize);
|
||||
ZSTDv01_findFrameSizeInfoLegacy(src, srcSize,
|
||||
&frameSizeInfo.compressedSize,
|
||||
&frameSizeInfo.decompressedBound);
|
||||
break;
|
||||
#endif
|
||||
#if (ZSTD_LEGACY_SUPPORT <= 2)
|
||||
case 2 :
|
||||
return ZSTDv02_findFrameCompressedSize(src, compressedSize);
|
||||
ZSTDv02_findFrameSizeInfoLegacy(src, srcSize,
|
||||
&frameSizeInfo.compressedSize,
|
||||
&frameSizeInfo.decompressedBound);
|
||||
break;
|
||||
#endif
|
||||
#if (ZSTD_LEGACY_SUPPORT <= 3)
|
||||
case 3 :
|
||||
return ZSTDv03_findFrameCompressedSize(src, compressedSize);
|
||||
ZSTDv03_findFrameSizeInfoLegacy(src, srcSize,
|
||||
&frameSizeInfo.compressedSize,
|
||||
&frameSizeInfo.decompressedBound);
|
||||
break;
|
||||
#endif
|
||||
#if (ZSTD_LEGACY_SUPPORT <= 4)
|
||||
case 4 :
|
||||
return ZSTDv04_findFrameCompressedSize(src, compressedSize);
|
||||
ZSTDv04_findFrameSizeInfoLegacy(src, srcSize,
|
||||
&frameSizeInfo.compressedSize,
|
||||
&frameSizeInfo.decompressedBound);
|
||||
break;
|
||||
#endif
|
||||
#if (ZSTD_LEGACY_SUPPORT <= 5)
|
||||
case 5 :
|
||||
return ZSTDv05_findFrameCompressedSize(src, compressedSize);
|
||||
ZSTDv05_findFrameSizeInfoLegacy(src, srcSize,
|
||||
&frameSizeInfo.compressedSize,
|
||||
&frameSizeInfo.decompressedBound);
|
||||
break;
|
||||
#endif
|
||||
#if (ZSTD_LEGACY_SUPPORT <= 6)
|
||||
case 6 :
|
||||
return ZSTDv06_findFrameCompressedSize(src, compressedSize);
|
||||
ZSTDv06_findFrameSizeInfoLegacy(src, srcSize,
|
||||
&frameSizeInfo.compressedSize,
|
||||
&frameSizeInfo.decompressedBound);
|
||||
break;
|
||||
#endif
|
||||
#if (ZSTD_LEGACY_SUPPORT <= 7)
|
||||
case 7 :
|
||||
return ZSTDv07_findFrameCompressedSize(src, compressedSize);
|
||||
ZSTDv07_findFrameSizeInfoLegacy(src, srcSize,
|
||||
&frameSizeInfo.compressedSize,
|
||||
&frameSizeInfo.decompressedBound);
|
||||
break;
|
||||
#endif
|
||||
default :
|
||||
return ERROR(prefix_unknown);
|
||||
frameSizeInfo.compressedSize = ERROR(prefix_unknown);
|
||||
frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
|
||||
break;
|
||||
}
|
||||
return frameSizeInfo;
|
||||
}
|
||||
|
||||
MEM_STATIC size_t ZSTD_findFrameCompressedSizeLegacy(const void *src, size_t srcSize)
|
||||
{
|
||||
ZSTD_frameSizeInfo frameSizeInfo = ZSTD_findFrameSizeInfoLegacy(src, srcSize);
|
||||
return frameSizeInfo.compressedSize;
|
||||
}
|
||||
|
||||
MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version)
|
||||
|
@ -1336,6 +1336,8 @@ static const U32 ZSTD_magicNumber = 0xFD2FB51E; /* 3rd version : seqNb header
|
||||
#define LITERAL_NOENTROPY 63
|
||||
#define COMMAND_NOENTROPY 7 /* to remove */
|
||||
|
||||
#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2)
|
||||
|
||||
static const size_t ZSTD_blockHeaderSize = 3;
|
||||
static const size_t ZSTD_frameHeaderSize = 4;
|
||||
|
||||
@ -1757,7 +1759,7 @@ static size_t ZSTD_execSequence(BYTE* op,
|
||||
BYTE* const base, BYTE* const oend)
|
||||
{
|
||||
static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */
|
||||
static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* substracted */
|
||||
static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* subtracted */
|
||||
const BYTE* const ostart = op;
|
||||
const size_t litLength = sequence.litLength;
|
||||
BYTE* const endMatch = op + litLength + sequence.matchLength; /* risk : address space overflow (32-bits) */
|
||||
@ -1999,36 +2001,59 @@ size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t
|
||||
return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
|
||||
}
|
||||
|
||||
size_t ZSTDv01_findFrameCompressedSize(const void* src, size_t srcSize)
|
||||
/* ZSTD_errorFrameSizeInfoLegacy() :
|
||||
assumes `cSize` and `dBound` are _not_ NULL */
|
||||
static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
|
||||
{
|
||||
*cSize = ret;
|
||||
*dBound = ZSTD_CONTENTSIZE_ERROR;
|
||||
}
|
||||
|
||||
void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
|
||||
{
|
||||
const BYTE* ip = (const BYTE*)src;
|
||||
size_t remainingSize = srcSize;
|
||||
size_t nbBlocks = 0;
|
||||
U32 magicNumber;
|
||||
blockProperties_t blockProperties;
|
||||
|
||||
/* Frame Header */
|
||||
if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
|
||||
if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
|
||||
return;
|
||||
}
|
||||
magicNumber = ZSTD_readBE32(src);
|
||||
if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
|
||||
if (magicNumber != ZSTD_magicNumber) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
|
||||
return;
|
||||
}
|
||||
ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
|
||||
|
||||
/* Loop on each block */
|
||||
while (1)
|
||||
{
|
||||
size_t blockSize = ZSTDv01_getcBlockSize(ip, remainingSize, &blockProperties);
|
||||
if (ZSTDv01_isError(blockSize)) return blockSize;
|
||||
if (ZSTDv01_isError(blockSize)) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, blockSize);
|
||||
return;
|
||||
}
|
||||
|
||||
ip += ZSTD_blockHeaderSize;
|
||||
remainingSize -= ZSTD_blockHeaderSize;
|
||||
if (blockSize > remainingSize) return ERROR(srcSize_wrong);
|
||||
if (blockSize > remainingSize) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
|
||||
return;
|
||||
}
|
||||
|
||||
if (blockSize == 0) break; /* bt_end */
|
||||
|
||||
ip += blockSize;
|
||||
remainingSize -= blockSize;
|
||||
nbBlocks++;
|
||||
}
|
||||
|
||||
return ip - (const BYTE*)src;
|
||||
*cSize = ip - (const BYTE*)src;
|
||||
*dBound = nbBlocks * BLOCKSIZE;
|
||||
}
|
||||
|
||||
/*******************************
|
||||
|
@ -35,13 +35,18 @@ ZSTDv01_decompress() : decompress ZSTD frames compliant with v0.1.x format
|
||||
size_t ZSTDv01_decompress( void* dst, size_t maxOriginalSize,
|
||||
const void* src, size_t compressedSize);
|
||||
|
||||
/**
|
||||
ZSTDv01_getFrameSrcSize() : get the source length of a ZSTD frame compliant with v0.1.x format
|
||||
compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
|
||||
return : the number of bytes that would be read to decompress this frame
|
||||
or an errorCode if it fails (which can be tested using ZSTDv01_isError())
|
||||
*/
|
||||
size_t ZSTDv01_findFrameCompressedSize(const void* src, size_t compressedSize);
|
||||
/**
|
||||
ZSTDv01_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.1.x format
|
||||
srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
|
||||
cSize (output parameter) : the number of bytes that would be read to decompress this frame
|
||||
or an error code if it fails (which can be tested using ZSTDv01_isError())
|
||||
dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
|
||||
or ZSTD_CONTENTSIZE_ERROR if an error occurs
|
||||
|
||||
note : assumes `cSize` and `dBound` are _not_ NULL.
|
||||
*/
|
||||
void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
|
||||
size_t* cSize, unsigned long long* dBound);
|
||||
|
||||
/**
|
||||
ZSTDv01_isError() : tells if the result of ZSTDv01_decompress() is an error
|
||||
|
@ -2728,6 +2728,8 @@ static size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_
|
||||
#define LITERAL_NOENTROPY 63
|
||||
#define COMMAND_NOENTROPY 7 /* to remove */
|
||||
|
||||
#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2)
|
||||
|
||||
static const size_t ZSTD_blockHeaderSize = 3;
|
||||
static const size_t ZSTD_frameHeaderSize = 4;
|
||||
|
||||
@ -3096,7 +3098,7 @@ static size_t ZSTD_execSequence(BYTE* op,
|
||||
BYTE* const base, BYTE* const oend)
|
||||
{
|
||||
static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */
|
||||
static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* substracted */
|
||||
static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* subtracted */
|
||||
const BYTE* const ostart = op;
|
||||
BYTE* const oLitEnd = op + sequence.litLength;
|
||||
BYTE* const oMatchEnd = op + sequence.litLength + sequence.matchLength; /* risk : address space overflow (32-bits) */
|
||||
@ -3312,37 +3314,59 @@ static size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, siz
|
||||
return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
|
||||
}
|
||||
|
||||
static size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
|
||||
/* ZSTD_errorFrameSizeInfoLegacy() :
|
||||
assumes `cSize` and `dBound` are _not_ NULL */
|
||||
static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
|
||||
{
|
||||
*cSize = ret;
|
||||
*dBound = ZSTD_CONTENTSIZE_ERROR;
|
||||
}
|
||||
|
||||
void ZSTDv02_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
|
||||
{
|
||||
const BYTE* ip = (const BYTE*)src;
|
||||
size_t remainingSize = srcSize;
|
||||
size_t nbBlocks = 0;
|
||||
U32 magicNumber;
|
||||
blockProperties_t blockProperties;
|
||||
|
||||
/* Frame Header */
|
||||
if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
|
||||
if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
|
||||
return;
|
||||
}
|
||||
magicNumber = MEM_readLE32(src);
|
||||
if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
|
||||
if (magicNumber != ZSTD_magicNumber) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
|
||||
return;
|
||||
}
|
||||
ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
|
||||
|
||||
/* Loop on each block */
|
||||
while (1)
|
||||
{
|
||||
size_t cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
|
||||
if (ZSTD_isError(cBlockSize)) return cBlockSize;
|
||||
if (ZSTD_isError(cBlockSize)) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, cBlockSize);
|
||||
return;
|
||||
}
|
||||
|
||||
ip += ZSTD_blockHeaderSize;
|
||||
remainingSize -= ZSTD_blockHeaderSize;
|
||||
if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
|
||||
if (cBlockSize > remainingSize) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
|
||||
return;
|
||||
}
|
||||
|
||||
if (cBlockSize == 0) break; /* bt_end */
|
||||
|
||||
ip += cBlockSize;
|
||||
remainingSize -= cBlockSize;
|
||||
nbBlocks++;
|
||||
}
|
||||
|
||||
return ip - (const BYTE*)src;
|
||||
*cSize = ip - (const BYTE*)src;
|
||||
*dBound = nbBlocks * BLOCKSIZE;
|
||||
}
|
||||
|
||||
/*******************************
|
||||
@ -3458,11 +3482,6 @@ size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize,
|
||||
return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize);
|
||||
}
|
||||
|
||||
size_t ZSTDv02_findFrameCompressedSize(const void *src, size_t compressedSize)
|
||||
{
|
||||
return ZSTD_findFrameCompressedSize(src, compressedSize);
|
||||
}
|
||||
|
||||
ZSTDv02_Dctx* ZSTDv02_createDCtx(void)
|
||||
{
|
||||
return (ZSTDv02_Dctx*)ZSTD_createDCtx();
|
||||
|
@ -35,13 +35,18 @@ ZSTDv02_decompress() : decompress ZSTD frames compliant with v0.2.x format
|
||||
size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize,
|
||||
const void* src, size_t compressedSize);
|
||||
|
||||
/**
|
||||
ZSTDv02_getFrameSrcSize() : get the source length of a ZSTD frame compliant with v0.2.x format
|
||||
compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
|
||||
return : the number of bytes that would be read to decompress this frame
|
||||
or an errorCode if it fails (which can be tested using ZSTDv02_isError())
|
||||
*/
|
||||
size_t ZSTDv02_findFrameCompressedSize(const void* src, size_t compressedSize);
|
||||
/**
|
||||
ZSTDv02_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.2.x format
|
||||
srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
|
||||
cSize (output parameter) : the number of bytes that would be read to decompress this frame
|
||||
or an error code if it fails (which can be tested using ZSTDv01_isError())
|
||||
dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
|
||||
or ZSTD_CONTENTSIZE_ERROR if an error occurs
|
||||
|
||||
note : assumes `cSize` and `dBound` are _not_ NULL.
|
||||
*/
|
||||
void ZSTDv02_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
|
||||
size_t* cSize, unsigned long long* dBound);
|
||||
|
||||
/**
|
||||
ZSTDv02_isError() : tells if the result of ZSTDv02_decompress() is an error
|
||||
|
@ -2369,6 +2369,8 @@ static size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_
|
||||
#define LITERAL_NOENTROPY 63
|
||||
#define COMMAND_NOENTROPY 7 /* to remove */
|
||||
|
||||
#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2)
|
||||
|
||||
static const size_t ZSTD_blockHeaderSize = 3;
|
||||
static const size_t ZSTD_frameHeaderSize = 4;
|
||||
|
||||
@ -2737,7 +2739,7 @@ static size_t ZSTD_execSequence(BYTE* op,
|
||||
BYTE* const base, BYTE* const oend)
|
||||
{
|
||||
static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */
|
||||
static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* substracted */
|
||||
static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* subtracted */
|
||||
const BYTE* const ostart = op;
|
||||
BYTE* const oLitEnd = op + sequence.litLength;
|
||||
BYTE* const oMatchEnd = op + sequence.litLength + sequence.matchLength; /* risk : address space overflow (32-bits) */
|
||||
@ -2953,36 +2955,59 @@ static size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, siz
|
||||
return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
|
||||
}
|
||||
|
||||
static size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize)
|
||||
/* ZSTD_errorFrameSizeInfoLegacy() :
|
||||
assumes `cSize` and `dBound` are _not_ NULL */
|
||||
MEM_STATIC void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
|
||||
{
|
||||
*cSize = ret;
|
||||
*dBound = ZSTD_CONTENTSIZE_ERROR;
|
||||
}
|
||||
|
||||
void ZSTDv03_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
|
||||
{
|
||||
const BYTE* ip = (const BYTE*)src;
|
||||
size_t remainingSize = srcSize;
|
||||
size_t nbBlocks = 0;
|
||||
U32 magicNumber;
|
||||
blockProperties_t blockProperties;
|
||||
|
||||
/* Frame Header */
|
||||
if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
|
||||
if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
|
||||
return;
|
||||
}
|
||||
magicNumber = MEM_readLE32(src);
|
||||
if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
|
||||
if (magicNumber != ZSTD_magicNumber) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
|
||||
return;
|
||||
}
|
||||
ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
|
||||
|
||||
/* Loop on each block */
|
||||
while (1)
|
||||
{
|
||||
size_t cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
|
||||
if (ZSTD_isError(cBlockSize)) return cBlockSize;
|
||||
if (ZSTD_isError(cBlockSize)) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, cBlockSize);
|
||||
return;
|
||||
}
|
||||
|
||||
ip += ZSTD_blockHeaderSize;
|
||||
remainingSize -= ZSTD_blockHeaderSize;
|
||||
if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
|
||||
if (cBlockSize > remainingSize) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
|
||||
return;
|
||||
}
|
||||
|
||||
if (cBlockSize == 0) break; /* bt_end */
|
||||
|
||||
ip += cBlockSize;
|
||||
remainingSize -= cBlockSize;
|
||||
nbBlocks++;
|
||||
}
|
||||
|
||||
return ip - (const BYTE*)src;
|
||||
*cSize = ip - (const BYTE*)src;
|
||||
*dBound = nbBlocks * BLOCKSIZE;
|
||||
}
|
||||
|
||||
|
||||
@ -3099,11 +3124,6 @@ size_t ZSTDv03_decompress( void* dst, size_t maxOriginalSize,
|
||||
return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize);
|
||||
}
|
||||
|
||||
size_t ZSTDv03_findFrameCompressedSize(const void* src, size_t srcSize)
|
||||
{
|
||||
return ZSTD_findFrameCompressedSize(src, srcSize);
|
||||
}
|
||||
|
||||
ZSTDv03_Dctx* ZSTDv03_createDCtx(void)
|
||||
{
|
||||
return (ZSTDv03_Dctx*)ZSTD_createDCtx();
|
||||
|
@ -35,13 +35,18 @@ ZSTDv03_decompress() : decompress ZSTD frames compliant with v0.3.x format
|
||||
size_t ZSTDv03_decompress( void* dst, size_t maxOriginalSize,
|
||||
const void* src, size_t compressedSize);
|
||||
|
||||
/**
|
||||
ZSTDv03_getFrameSrcSize() : get the source length of a ZSTD frame compliant with v0.3.x format
|
||||
compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
|
||||
return : the number of bytes that would be read to decompress this frame
|
||||
or an errorCode if it fails (which can be tested using ZSTDv03_isError())
|
||||
*/
|
||||
size_t ZSTDv03_findFrameCompressedSize(const void* src, size_t compressedSize);
|
||||
/**
|
||||
ZSTDv03_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.3.x format
|
||||
srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
|
||||
cSize (output parameter) : the number of bytes that would be read to decompress this frame
|
||||
or an error code if it fails (which can be tested using ZSTDv01_isError())
|
||||
dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
|
||||
or ZSTD_CONTENTSIZE_ERROR if an error occurs
|
||||
|
||||
note : assumes `cSize` and `dBound` are _not_ NULL.
|
||||
*/
|
||||
void ZSTDv03_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
|
||||
size_t* cSize, unsigned long long* dBound);
|
||||
|
||||
/**
|
||||
ZSTDv03_isError() : tells if the result of ZSTDv03_decompress() is an error
|
||||
|
@ -373,6 +373,8 @@ static const size_t ZSTD_frameHeaderSize_min = 5;
|
||||
#define MIN_SEQUENCES_SIZE (2 /*seqNb*/ + 2 /*dumps*/ + 3 /*seqTables*/ + 1 /*bitStream*/)
|
||||
#define MIN_CBLOCK_SIZE (3 /*litCSize*/ + MIN_SEQUENCES_SIZE)
|
||||
|
||||
#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2)
|
||||
|
||||
typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
|
||||
|
||||
|
||||
@ -2860,7 +2862,7 @@ static size_t ZSTD_execSequence(BYTE* op,
|
||||
const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
|
||||
{
|
||||
static const int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
|
||||
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* substracted */
|
||||
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
|
||||
BYTE* const oLitEnd = op + sequence.litLength;
|
||||
const size_t sequenceLength = sequence.litLength + sequence.matchLength;
|
||||
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
|
||||
@ -3119,34 +3121,57 @@ static size_t ZSTD_decompress_usingDict(ZSTD_DCtx* ctx,
|
||||
return op-ostart;
|
||||
}
|
||||
|
||||
static size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize)
|
||||
/* ZSTD_errorFrameSizeInfoLegacy() :
|
||||
assumes `cSize` and `dBound` are _not_ NULL */
|
||||
static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
|
||||
{
|
||||
*cSize = ret;
|
||||
*dBound = ZSTD_CONTENTSIZE_ERROR;
|
||||
}
|
||||
|
||||
void ZSTDv04_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
|
||||
{
|
||||
const BYTE* ip = (const BYTE*)src;
|
||||
size_t remainingSize = srcSize;
|
||||
size_t nbBlocks = 0;
|
||||
blockProperties_t blockProperties;
|
||||
|
||||
/* Frame Header */
|
||||
if (srcSize < ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong);
|
||||
if (MEM_readLE32(src) != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown);
|
||||
if (srcSize < ZSTD_frameHeaderSize_min) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
|
||||
return;
|
||||
}
|
||||
if (MEM_readLE32(src) != ZSTD_MAGICNUMBER) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
|
||||
return;
|
||||
}
|
||||
ip += ZSTD_frameHeaderSize_min; remainingSize -= ZSTD_frameHeaderSize_min;
|
||||
|
||||
/* Loop on each block */
|
||||
while (1)
|
||||
{
|
||||
size_t cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
|
||||
if (ZSTD_isError(cBlockSize)) return cBlockSize;
|
||||
if (ZSTD_isError(cBlockSize)) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, cBlockSize);
|
||||
return;
|
||||
}
|
||||
|
||||
ip += ZSTD_blockHeaderSize;
|
||||
remainingSize -= ZSTD_blockHeaderSize;
|
||||
if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
|
||||
if (cBlockSize > remainingSize) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
|
||||
return;
|
||||
}
|
||||
|
||||
if (cBlockSize == 0) break; /* bt_end */
|
||||
|
||||
ip += cBlockSize;
|
||||
remainingSize -= cBlockSize;
|
||||
nbBlocks++;
|
||||
}
|
||||
|
||||
return ip - (const BYTE*)src;
|
||||
*cSize = ip - (const BYTE*)src;
|
||||
*dBound = nbBlocks * BLOCKSIZE;
|
||||
}
|
||||
|
||||
/* ******************************
|
||||
@ -3578,11 +3603,6 @@ size_t ZSTDv04_decompress(void* dst, size_t maxDstSize, const void* src, size_t
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t ZSTDv04_findFrameCompressedSize(const void* src, size_t srcSize)
|
||||
{
|
||||
return ZSTD_findFrameCompressedSize(src, srcSize);
|
||||
}
|
||||
|
||||
size_t ZSTDv04_resetDCtx(ZSTDv04_Dctx* dctx) { return ZSTD_resetDCtx(dctx); }
|
||||
|
||||
size_t ZSTDv04_nextSrcSizeToDecompress(ZSTDv04_Dctx* dctx)
|
||||
|
@ -35,13 +35,18 @@ ZSTDv04_decompress() : decompress ZSTD frames compliant with v0.4.x format
|
||||
size_t ZSTDv04_decompress( void* dst, size_t maxOriginalSize,
|
||||
const void* src, size_t compressedSize);
|
||||
|
||||
/**
|
||||
ZSTDv04_getFrameSrcSize() : get the source length of a ZSTD frame compliant with v0.4.x format
|
||||
compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
|
||||
return : the number of bytes that would be read to decompress this frame
|
||||
or an errorCode if it fails (which can be tested using ZSTDv04_isError())
|
||||
*/
|
||||
size_t ZSTDv04_findFrameCompressedSize(const void* src, size_t compressedSize);
|
||||
/**
|
||||
ZSTDv04_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.4.x format
|
||||
srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
|
||||
cSize (output parameter) : the number of bytes that would be read to decompress this frame
|
||||
or an error code if it fails (which can be tested using ZSTDv01_isError())
|
||||
dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
|
||||
or ZSTD_CONTENTSIZE_ERROR if an error occurs
|
||||
|
||||
note : assumes `cSize` and `dBound` are _not_ NULL.
|
||||
*/
|
||||
void ZSTDv04_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
|
||||
size_t* cSize, unsigned long long* dBound);
|
||||
|
||||
/**
|
||||
ZSTDv04_isError() : tells if the result of ZSTDv04_decompress() is an error
|
||||
|
@ -491,6 +491,8 @@ static const size_t ZSTDv05_frameHeaderSize_min = 5;
|
||||
|
||||
#define WILDCOPY_OVERLENGTH 8
|
||||
|
||||
#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2)
|
||||
|
||||
typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
|
||||
|
||||
|
||||
@ -3217,7 +3219,7 @@ static size_t ZSTDv05_execSequence(BYTE* op,
|
||||
const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
|
||||
{
|
||||
static const int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
|
||||
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* substracted */
|
||||
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
|
||||
BYTE* const oLitEnd = op + sequence.litLength;
|
||||
const size_t sequenceLength = sequence.litLength + sequence.matchLength;
|
||||
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
|
||||
@ -3508,34 +3510,57 @@ size_t ZSTDv05_decompress(void* dst, size_t maxDstSize, const void* src, size_t
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t ZSTDv05_findFrameCompressedSize(const void *src, size_t srcSize)
|
||||
/* ZSTD_errorFrameSizeInfoLegacy() :
|
||||
assumes `cSize` and `dBound` are _not_ NULL */
|
||||
static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
|
||||
{
|
||||
*cSize = ret;
|
||||
*dBound = ZSTD_CONTENTSIZE_ERROR;
|
||||
}
|
||||
|
||||
void ZSTDv05_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
|
||||
{
|
||||
const BYTE* ip = (const BYTE*)src;
|
||||
size_t remainingSize = srcSize;
|
||||
size_t nbBlocks = 0;
|
||||
blockProperties_t blockProperties;
|
||||
|
||||
/* Frame Header */
|
||||
if (srcSize < ZSTDv05_frameHeaderSize_min) return ERROR(srcSize_wrong);
|
||||
if (MEM_readLE32(src) != ZSTDv05_MAGICNUMBER) return ERROR(prefix_unknown);
|
||||
if (srcSize < ZSTDv05_frameHeaderSize_min) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
|
||||
return;
|
||||
}
|
||||
if (MEM_readLE32(src) != ZSTDv05_MAGICNUMBER) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
|
||||
return;
|
||||
}
|
||||
ip += ZSTDv05_frameHeaderSize_min; remainingSize -= ZSTDv05_frameHeaderSize_min;
|
||||
|
||||
/* Loop on each block */
|
||||
while (1)
|
||||
{
|
||||
size_t cBlockSize = ZSTDv05_getcBlockSize(ip, remainingSize, &blockProperties);
|
||||
if (ZSTDv05_isError(cBlockSize)) return cBlockSize;
|
||||
if (ZSTDv05_isError(cBlockSize)) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, cBlockSize);
|
||||
return;
|
||||
}
|
||||
|
||||
ip += ZSTDv05_blockHeaderSize;
|
||||
remainingSize -= ZSTDv05_blockHeaderSize;
|
||||
if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
|
||||
if (cBlockSize > remainingSize) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
|
||||
return;
|
||||
}
|
||||
|
||||
if (cBlockSize == 0) break; /* bt_end */
|
||||
|
||||
ip += cBlockSize;
|
||||
remainingSize -= cBlockSize;
|
||||
nbBlocks++;
|
||||
}
|
||||
|
||||
return ip - (const BYTE*)src;
|
||||
*cSize = ip - (const BYTE*)src;
|
||||
*dBound = nbBlocks * BLOCKSIZE;
|
||||
}
|
||||
|
||||
/* ******************************
|
||||
|
@ -33,13 +33,18 @@ extern "C" {
|
||||
size_t ZSTDv05_decompress( void* dst, size_t dstCapacity,
|
||||
const void* src, size_t compressedSize);
|
||||
|
||||
/**
|
||||
ZSTDv05_getFrameSrcSize() : get the source length of a ZSTD frame
|
||||
compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
|
||||
return : the number of bytes that would be read to decompress this frame
|
||||
or an errorCode if it fails (which can be tested using ZSTDv05_isError())
|
||||
*/
|
||||
size_t ZSTDv05_findFrameCompressedSize(const void* src, size_t compressedSize);
|
||||
/**
|
||||
ZSTDv05_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.5.x format
|
||||
srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
|
||||
cSize (output parameter) : the number of bytes that would be read to decompress this frame
|
||||
or an error code if it fails (which can be tested using ZSTDv01_isError())
|
||||
dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
|
||||
or ZSTD_CONTENTSIZE_ERROR if an error occurs
|
||||
|
||||
note : assumes `cSize` and `dBound` are _not_ NULL.
|
||||
*/
|
||||
void ZSTDv05_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
|
||||
size_t* cSize, unsigned long long* dBound);
|
||||
|
||||
/* *************************************
|
||||
* Helper functions
|
||||
|
@ -506,6 +506,8 @@ typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
|
||||
#define FSEv06_ENCODING_STATIC 2
|
||||
#define FSEv06_ENCODING_DYNAMIC 3
|
||||
|
||||
#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2)
|
||||
|
||||
static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9,10,11,12,
|
||||
13,14,15,16 };
|
||||
@ -3406,7 +3408,7 @@ static size_t ZSTDv06_execSequence(BYTE* op,
|
||||
if (sequence.offset < 8) {
|
||||
/* close range match, overlap */
|
||||
static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
|
||||
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* substracted */
|
||||
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
|
||||
int const sub2 = dec64table[sequence.offset];
|
||||
op[0] = match[0];
|
||||
op[1] = match[1];
|
||||
@ -3654,36 +3656,62 @@ size_t ZSTDv06_decompress(void* dst, size_t dstCapacity, const void* src, size_t
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t ZSTDv06_findFrameCompressedSize(const void* src, size_t srcSize)
|
||||
/* ZSTD_errorFrameSizeInfoLegacy() :
|
||||
assumes `cSize` and `dBound` are _not_ NULL */
|
||||
static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
|
||||
{
|
||||
*cSize = ret;
|
||||
*dBound = ZSTD_CONTENTSIZE_ERROR;
|
||||
}
|
||||
|
||||
void ZSTDv06_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
|
||||
{
|
||||
const BYTE* ip = (const BYTE*)src;
|
||||
size_t remainingSize = srcSize;
|
||||
size_t nbBlocks = 0;
|
||||
blockProperties_t blockProperties = { bt_compressed, 0 };
|
||||
|
||||
/* Frame Header */
|
||||
{ size_t const frameHeaderSize = ZSTDv06_frameHeaderSize(src, ZSTDv06_frameHeaderSize_min);
|
||||
if (ZSTDv06_isError(frameHeaderSize)) return frameHeaderSize;
|
||||
if (MEM_readLE32(src) != ZSTDv06_MAGICNUMBER) return ERROR(prefix_unknown);
|
||||
if (srcSize < frameHeaderSize+ZSTDv06_blockHeaderSize) return ERROR(srcSize_wrong);
|
||||
if (ZSTDv06_isError(frameHeaderSize)) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, frameHeaderSize);
|
||||
return;
|
||||
}
|
||||
if (MEM_readLE32(src) != ZSTDv06_MAGICNUMBER) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
|
||||
return;
|
||||
}
|
||||
if (srcSize < frameHeaderSize+ZSTDv06_blockHeaderSize) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
|
||||
return;
|
||||
}
|
||||
ip += frameHeaderSize; remainingSize -= frameHeaderSize;
|
||||
}
|
||||
|
||||
/* Loop on each block */
|
||||
while (1) {
|
||||
size_t const cBlockSize = ZSTDv06_getcBlockSize(ip, remainingSize, &blockProperties);
|
||||
if (ZSTDv06_isError(cBlockSize)) return cBlockSize;
|
||||
if (ZSTDv06_isError(cBlockSize)) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, cBlockSize);
|
||||
return;
|
||||
}
|
||||
|
||||
ip += ZSTDv06_blockHeaderSize;
|
||||
remainingSize -= ZSTDv06_blockHeaderSize;
|
||||
if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
|
||||
if (cBlockSize > remainingSize) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
|
||||
return;
|
||||
}
|
||||
|
||||
if (cBlockSize == 0) break; /* bt_end */
|
||||
|
||||
ip += cBlockSize;
|
||||
remainingSize -= cBlockSize;
|
||||
nbBlocks++;
|
||||
}
|
||||
|
||||
return ip - (const BYTE*)src;
|
||||
*cSize = ip - (const BYTE*)src;
|
||||
*dBound = nbBlocks * ZSTDv06_BLOCKSIZE_MAX;
|
||||
}
|
||||
|
||||
/*_******************************
|
||||
|
@ -43,12 +43,17 @@ ZSTDLIBv06_API size_t ZSTDv06_decompress( void* dst, size_t dstCapacity,
|
||||
const void* src, size_t compressedSize);
|
||||
|
||||
/**
|
||||
ZSTDv06_getFrameSrcSize() : get the source length of a ZSTD frame
|
||||
compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
|
||||
return : the number of bytes that would be read to decompress this frame
|
||||
or an errorCode if it fails (which can be tested using ZSTDv06_isError())
|
||||
ZSTDv06_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.6.x format
|
||||
srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
|
||||
cSize (output parameter) : the number of bytes that would be read to decompress this frame
|
||||
or an error code if it fails (which can be tested using ZSTDv01_isError())
|
||||
dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
|
||||
or ZSTD_CONTENTSIZE_ERROR if an error occurs
|
||||
|
||||
note : assumes `cSize` and `dBound` are _not_ NULL.
|
||||
*/
|
||||
size_t ZSTDv06_findFrameCompressedSize(const void* src, size_t compressedSize);
|
||||
void ZSTDv06_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
|
||||
size_t* cSize, unsigned long long* dBound);
|
||||
|
||||
/* *************************************
|
||||
* Helper functions
|
||||
|
@ -2740,6 +2740,8 @@ typedef enum { lbt_huffman, lbt_repeat, lbt_raw, lbt_rle } litBlockType_t;
|
||||
#define FSEv07_ENCODING_STATIC 2
|
||||
#define FSEv07_ENCODING_DYNAMIC 3
|
||||
|
||||
#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2)
|
||||
|
||||
static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9,10,11,12,
|
||||
13,14,15,16 };
|
||||
@ -3631,7 +3633,7 @@ size_t ZSTDv07_execSequence(BYTE* op,
|
||||
if (sequence.offset < 8) {
|
||||
/* close range match, overlap */
|
||||
static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
|
||||
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* substracted */
|
||||
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
|
||||
int const sub2 = dec64table[sequence.offset];
|
||||
op[0] = match[0];
|
||||
op[1] = match[1];
|
||||
@ -3895,19 +3897,40 @@ size_t ZSTDv07_decompress(void* dst, size_t dstCapacity, const void* src, size_t
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t ZSTDv07_findFrameCompressedSize(const void* src, size_t srcSize)
|
||||
/* ZSTD_errorFrameSizeInfoLegacy() :
|
||||
assumes `cSize` and `dBound` are _not_ NULL */
|
||||
static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
|
||||
{
|
||||
*cSize = ret;
|
||||
*dBound = ZSTD_CONTENTSIZE_ERROR;
|
||||
}
|
||||
|
||||
void ZSTDv07_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
|
||||
{
|
||||
const BYTE* ip = (const BYTE*)src;
|
||||
size_t remainingSize = srcSize;
|
||||
size_t nbBlocks = 0;
|
||||
|
||||
/* check */
|
||||
if (srcSize < ZSTDv07_frameHeaderSize_min+ZSTDv07_blockHeaderSize) return ERROR(srcSize_wrong);
|
||||
if (srcSize < ZSTDv07_frameHeaderSize_min+ZSTDv07_blockHeaderSize) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
|
||||
return;
|
||||
}
|
||||
|
||||
/* Frame Header */
|
||||
{ size_t const frameHeaderSize = ZSTDv07_frameHeaderSize(src, ZSTDv07_frameHeaderSize_min);
|
||||
if (ZSTDv07_isError(frameHeaderSize)) return frameHeaderSize;
|
||||
if (MEM_readLE32(src) != ZSTDv07_MAGICNUMBER) return ERROR(prefix_unknown);
|
||||
if (srcSize < frameHeaderSize+ZSTDv07_blockHeaderSize) return ERROR(srcSize_wrong);
|
||||
if (ZSTDv07_isError(frameHeaderSize)) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, frameHeaderSize);
|
||||
return;
|
||||
}
|
||||
if (MEM_readLE32(src) != ZSTDv07_MAGICNUMBER) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
|
||||
return;
|
||||
}
|
||||
if (srcSize < frameHeaderSize+ZSTDv07_blockHeaderSize) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
|
||||
return;
|
||||
}
|
||||
ip += frameHeaderSize; remainingSize -= frameHeaderSize;
|
||||
}
|
||||
|
||||
@ -3915,20 +3938,28 @@ size_t ZSTDv07_findFrameCompressedSize(const void* src, size_t srcSize)
|
||||
while (1) {
|
||||
blockProperties_t blockProperties;
|
||||
size_t const cBlockSize = ZSTDv07_getcBlockSize(ip, remainingSize, &blockProperties);
|
||||
if (ZSTDv07_isError(cBlockSize)) return cBlockSize;
|
||||
if (ZSTDv07_isError(cBlockSize)) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, cBlockSize);
|
||||
return;
|
||||
}
|
||||
|
||||
ip += ZSTDv07_blockHeaderSize;
|
||||
remainingSize -= ZSTDv07_blockHeaderSize;
|
||||
|
||||
if (blockProperties.blockType == bt_end) break;
|
||||
|
||||
if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
|
||||
if (cBlockSize > remainingSize) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
|
||||
return;
|
||||
}
|
||||
|
||||
ip += cBlockSize;
|
||||
remainingSize -= cBlockSize;
|
||||
nbBlocks++;
|
||||
}
|
||||
|
||||
return ip - (const BYTE*)src;
|
||||
*cSize = ip - (const BYTE*)src;
|
||||
*dBound = nbBlocks * ZSTDv07_BLOCKSIZE_ABSOLUTEMAX;
|
||||
}
|
||||
|
||||
/*_******************************
|
||||
|
@ -50,12 +50,17 @@ ZSTDLIBv07_API size_t ZSTDv07_decompress( void* dst, size_t dstCapacity,
|
||||
const void* src, size_t compressedSize);
|
||||
|
||||
/**
|
||||
ZSTDv07_getFrameSrcSize() : get the source length of a ZSTD frame
|
||||
compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
|
||||
return : the number of bytes that would be read to decompress this frame
|
||||
or an errorCode if it fails (which can be tested using ZSTDv07_isError())
|
||||
ZSTDv07_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.7.x format
|
||||
srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
|
||||
cSize (output parameter) : the number of bytes that would be read to decompress this frame
|
||||
or an error code if it fails (which can be tested using ZSTDv01_isError())
|
||||
dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
|
||||
or ZSTD_CONTENTSIZE_ERROR if an error occurs
|
||||
|
||||
note : assumes `cSize` and `dBound` are _not_ NULL.
|
||||
*/
|
||||
size_t ZSTDv07_findFrameCompressedSize(const void* src, size_t compressedSize);
|
||||
void ZSTDv07_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
|
||||
size_t* cSize, unsigned long long* dBound);
|
||||
|
||||
/*====== Helper functions ======*/
|
||||
ZSTDLIBv07_API unsigned ZSTDv07_isError(size_t code); /*!< tells if a `size_t` function result is an error code */
|
||||
|
1085
lib/zstd.h
1085
lib/zstd.h
File diff suppressed because it is too large
Load Diff
@ -51,7 +51,7 @@ endif
|
||||
CFLAGS ?= -O3
|
||||
DEBUGFLAGS+=-Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
|
||||
-Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
|
||||
-Wstrict-prototypes -Wundef -Wpointer-arith -Wformat-security \
|
||||
-Wstrict-prototypes -Wundef -Wpointer-arith \
|
||||
-Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
|
||||
-Wredundant-decls -Wmissing-prototypes -Wc++-compat
|
||||
CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS)
|
||||
@ -165,7 +165,7 @@ $(ZSTDDECOMP_O): CFLAGS += $(ALIGN_LOOP)
|
||||
zstd : CPPFLAGS += $(THREAD_CPP) $(ZLIBCPP) $(LZMACPP) $(LZ4CPP)
|
||||
zstd : LDFLAGS += $(THREAD_LD) $(ZLIBLD) $(LZMALD) $(LZ4LD) $(DEBUGFLAGS_LD)
|
||||
zstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
|
||||
zstd : $(ZSTDLIB_FILES) zstdcli.o util.o fileio.o benchfn.o benchzstd.o datagen.o dibio.o
|
||||
zstd : $(ZSTDLIB_FILES) zstdcli.o util.o timefn.o fileio.o benchfn.o benchzstd.o datagen.o dibio.o
|
||||
@echo "$(THREAD_MSG)"
|
||||
@echo "$(ZLIB_MSG)"
|
||||
@echo "$(LZMA_MSG)"
|
||||
@ -183,13 +183,13 @@ zstd-release: zstd
|
||||
zstd32 : CPPFLAGS += $(THREAD_CPP)
|
||||
zstd32 : LDFLAGS += $(THREAD_LD)
|
||||
zstd32 : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
|
||||
zstd32 : $(ZSTDLIB_FILES) zstdcli.c util.c fileio.c benchfn.c benchzstd.c datagen.c dibio.c
|
||||
zstd32 : $(ZSTDLIB_FILES) zstdcli.c util.c timefn.c fileio.c benchfn.c benchzstd.c datagen.c dibio.c
|
||||
ifneq (,$(filter Windows%,$(OS)))
|
||||
windres/generate_res.bat
|
||||
endif
|
||||
$(CC) -m32 $(FLAGS) $^ $(RES32_FILE) -o $@$(EXT)
|
||||
|
||||
zstd-nolegacy : $(ZSTD_FILES) $(ZDICT_FILES) zstdcli.o util.o fileio.c benchfn.o benchzstd.o datagen.o dibio.o
|
||||
zstd-nolegacy : $(ZSTD_FILES) $(ZDICT_FILES) zstdcli.o util.o fileio.c benchfn.o benchzstd.o timefn.o datagen.o dibio.o
|
||||
$(CC) $(FLAGS) $^ -o $@$(EXT) $(LDFLAGS)
|
||||
|
||||
zstd-nomt : THREAD_CPP :=
|
||||
@ -222,13 +222,13 @@ zstd-pgo :
|
||||
|
||||
# minimal target, with only zstd compression and decompression. no bench. no legacy.
|
||||
zstd-small: CFLAGS = -Os -s
|
||||
zstd-frugal zstd-small: $(ZSTD_FILES) zstdcli.c util.c fileio.c
|
||||
zstd-frugal zstd-small: $(ZSTD_FILES) zstdcli.c util.c timefn.c fileio.c
|
||||
$(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT $^ -o $@$(EXT)
|
||||
|
||||
zstd-decompress: $(ZSTDCOMMON_FILES) $(ZSTDDECOMP_FILES) zstdcli.c util.c fileio.c
|
||||
zstd-decompress: $(ZSTDCOMMON_FILES) $(ZSTDDECOMP_FILES) zstdcli.c util.c timefn.c fileio.c
|
||||
$(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NOCOMPRESS $^ -o $@$(EXT)
|
||||
|
||||
zstd-compress: $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) zstdcli.c util.c fileio.c
|
||||
zstd-compress: $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) zstdcli.c util.c timefn.c fileio.c
|
||||
$(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NODECOMPRESS $^ -o $@$(EXT)
|
||||
|
||||
zstdmt: zstd
|
||||
@ -265,9 +265,9 @@ man: zstd.1 zstdgrep.1 zstdless.1
|
||||
|
||||
.PHONY: clean-man
|
||||
clean-man:
|
||||
rm zstd.1
|
||||
rm zstdgrep.1
|
||||
rm zstdless.1
|
||||
$(RM) zstd.1
|
||||
$(RM) zstdgrep.1
|
||||
$(RM) zstdless.1
|
||||
|
||||
.PHONY: preview-man
|
||||
preview-man: clean-man man
|
||||
|
@ -13,25 +13,20 @@
|
||||
/* *************************************
|
||||
* Includes
|
||||
***************************************/
|
||||
#include "platform.h" /* Large Files support */
|
||||
#include "util.h" /* UTIL_getFileSize, UTIL_sleep */
|
||||
#include <stdlib.h> /* malloc, free */
|
||||
#include <string.h> /* memset */
|
||||
#include <stdio.h> /* fprintf, fopen */
|
||||
#undef NDEBUG /* assert must not be disabled */
|
||||
#include <assert.h> /* assert */
|
||||
|
||||
#include "mem.h"
|
||||
#include "timefn.h" /* UTIL_time_t, UTIL_getTime */
|
||||
#include "benchfn.h"
|
||||
|
||||
|
||||
/* *************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
#define TIMELOOP_MICROSEC (1*1000000ULL) /* 1 second */
|
||||
#define TIMELOOP_MICROSEC SEC_TO_MICRO /* 1 second */
|
||||
#define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */
|
||||
#define ACTIVEPERIOD_MICROSEC (70*TIMELOOP_MICROSEC) /* 70 seconds */
|
||||
#define COOLPERIOD_SEC 10
|
||||
|
||||
#define KB *(1 <<10)
|
||||
#define MB *(1 <<20)
|
||||
@ -39,14 +34,16 @@
|
||||
|
||||
|
||||
/* *************************************
|
||||
* Errors
|
||||
* Debug errors
|
||||
***************************************/
|
||||
#ifndef DEBUG
|
||||
# define DEBUG 0
|
||||
#if defined(DEBUG) && (DEBUG >= 1)
|
||||
# include <stdio.h> /* fprintf */
|
||||
# define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
|
||||
# define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
|
||||
#else
|
||||
# define DEBUGOUTPUT(...)
|
||||
#endif
|
||||
|
||||
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
|
||||
#define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
|
||||
|
||||
/* error without displaying */
|
||||
#define RETURN_QUIET_ERROR(retValue, ...) { \
|
||||
@ -116,15 +113,7 @@ BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p,
|
||||
{ size_t i;
|
||||
for(i = 0; i < p.blockCount; i++) {
|
||||
memset(p.dstBuffers[i], 0xE5, p.dstCapacities[i]); /* warm up and erase result buffer */
|
||||
}
|
||||
#if 0
|
||||
/* based on testing these seem to lower accuracy of multiple calls of 1 nbLoops vs 1 call of multiple nbLoops
|
||||
* (Makes former slower)
|
||||
*/
|
||||
UTIL_sleepMilli(5); /* give processor time to other processes */
|
||||
UTIL_waitForNextTick();
|
||||
#endif
|
||||
}
|
||||
} }
|
||||
|
||||
/* benchmark */
|
||||
{ UTIL_time_t const clockStart = UTIL_getTime();
|
||||
@ -146,9 +135,9 @@ BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p,
|
||||
} }
|
||||
} /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */
|
||||
|
||||
{ U64 const totalTime = UTIL_clockSpanNano(clockStart);
|
||||
{ PTime const totalTime = UTIL_clockSpanNano(clockStart);
|
||||
BMK_runTime_t rt;
|
||||
rt.nanoSecPerRun = totalTime / nbLoops;
|
||||
rt.nanoSecPerRun = (double)totalTime / nbLoops;
|
||||
rt.sumOfReturn = dstSize;
|
||||
return BMK_setValid_runTime(rt);
|
||||
} }
|
||||
@ -158,9 +147,9 @@ BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p,
|
||||
/* ==== Benchmarking any function, providing intermediate results ==== */
|
||||
|
||||
struct BMK_timedFnState_s {
|
||||
U64 timeSpent_ns;
|
||||
U64 timeBudget_ns;
|
||||
U64 runBudget_ns;
|
||||
PTime timeSpent_ns;
|
||||
PTime timeBudget_ns;
|
||||
PTime runBudget_ns;
|
||||
BMK_runTime_t fastestRun;
|
||||
unsigned nbLoops;
|
||||
UTIL_time_t coolTime;
|
||||
@ -174,8 +163,20 @@ BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms)
|
||||
return r;
|
||||
}
|
||||
|
||||
void BMK_freeTimedFnState(BMK_timedFnState_t* state) {
|
||||
free(state);
|
||||
void BMK_freeTimedFnState(BMK_timedFnState_t* state) { free(state); }
|
||||
|
||||
BMK_timedFnState_t*
|
||||
BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms)
|
||||
{
|
||||
typedef char check_size[ 2 * (sizeof(BMK_timedFnState_shell) >= sizeof(struct BMK_timedFnState_s)) - 1]; /* static assert : a compilation failure indicates that BMK_timedFnState_shell is not large enough */
|
||||
typedef struct { check_size c; BMK_timedFnState_t tfs; } tfs_align; /* force tfs to be aligned at its next best position */
|
||||
size_t const tfs_alignment = offsetof(tfs_align, tfs); /* provides the minimal alignment restriction for BMK_timedFnState_t */
|
||||
BMK_timedFnState_t* const r = (BMK_timedFnState_t*)buffer;
|
||||
if (buffer == NULL) return NULL;
|
||||
if (size < sizeof(struct BMK_timedFnState_s)) return NULL;
|
||||
if ((size_t)buffer % tfs_alignment) return NULL; /* buffer must be properly aligned */
|
||||
BMK_resetTimedFnState(r, total_ms, run_ms);
|
||||
return r;
|
||||
}
|
||||
|
||||
void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms)
|
||||
@ -184,9 +185,9 @@ void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms,
|
||||
if (!run_ms) run_ms = 1;
|
||||
if (run_ms > total_ms) run_ms = total_ms;
|
||||
timedFnState->timeSpent_ns = 0;
|
||||
timedFnState->timeBudget_ns = (U64)total_ms * TIMELOOP_NANOSEC / 1000;
|
||||
timedFnState->runBudget_ns = (U64)run_ms * TIMELOOP_NANOSEC / 1000;
|
||||
timedFnState->fastestRun.nanoSecPerRun = (U64)(-1LL);
|
||||
timedFnState->timeBudget_ns = (PTime)total_ms * TIMELOOP_NANOSEC / 1000;
|
||||
timedFnState->runBudget_ns = (PTime)run_ms * TIMELOOP_NANOSEC / 1000;
|
||||
timedFnState->fastestRun.nanoSecPerRun = (double)TIMELOOP_NANOSEC * 2000000000; /* hopefully large enough : must be larger than any potential measurement */
|
||||
timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL);
|
||||
timedFnState->nbLoops = 1;
|
||||
timedFnState->coolTime = UTIL_getTime();
|
||||
@ -208,37 +209,27 @@ int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState)
|
||||
BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont,
|
||||
BMK_benchParams_t p)
|
||||
{
|
||||
U64 const runBudget_ns = cont->runBudget_ns;
|
||||
U64 const runTimeMin_ns = runBudget_ns / 2;
|
||||
PTime const runBudget_ns = cont->runBudget_ns;
|
||||
PTime const runTimeMin_ns = runBudget_ns / 2;
|
||||
int completed = 0;
|
||||
BMK_runTime_t bestRunTime = cont->fastestRun;
|
||||
|
||||
while (!completed) {
|
||||
BMK_runOutcome_t runResult;
|
||||
|
||||
/* Overheat protection */
|
||||
if (UTIL_clockSpanMicro(cont->coolTime) > ACTIVEPERIOD_MICROSEC) {
|
||||
DEBUGOUTPUT("\rcooling down ... \r");
|
||||
UTIL_sleep(COOLPERIOD_SEC);
|
||||
cont->coolTime = UTIL_getTime();
|
||||
}
|
||||
|
||||
/* reinitialize capacity */
|
||||
runResult = BMK_benchFunction(p, cont->nbLoops);
|
||||
BMK_runOutcome_t const runResult = BMK_benchFunction(p, cont->nbLoops);
|
||||
|
||||
if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */
|
||||
return runResult;
|
||||
}
|
||||
|
||||
{ BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult);
|
||||
U64 const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops;
|
||||
double const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops;
|
||||
|
||||
cont->timeSpent_ns += loopDuration_ns;
|
||||
cont->timeSpent_ns += (unsigned long long)loopDuration_ns;
|
||||
|
||||
/* estimate nbLoops for next run to last approximately 1 second */
|
||||
if (loopDuration_ns > (runBudget_ns / 50)) {
|
||||
U64 const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun);
|
||||
cont->nbLoops = (U32)(runBudget_ns / fastestRun_ns) + 1;
|
||||
double const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun);
|
||||
cont->nbLoops = (unsigned)(runBudget_ns / fastestRun_ns) + 1;
|
||||
} else {
|
||||
/* previous run was too short : blindly increase workload by x multiplier */
|
||||
const unsigned multiplier = 10;
|
||||
|
@ -31,7 +31,7 @@ extern "C" {
|
||||
/* BMK_runTime_t: valid result return type */
|
||||
|
||||
typedef struct {
|
||||
unsigned long long nanoSecPerRun; /* time per iteration (over all blocks) */
|
||||
double nanoSecPerRun; /* time per iteration (over all blocks) */
|
||||
size_t sumOfReturn; /* sum of return values */
|
||||
} BMK_runTime_t;
|
||||
|
||||
@ -58,30 +58,31 @@ typedef size_t (*BMK_initFn_t)(void* initPayload);
|
||||
typedef unsigned (*BMK_errorFn_t)(size_t);
|
||||
|
||||
|
||||
/* BMK_benchFunction() parameters are provided through following structure.
|
||||
* This is preferable for readability,
|
||||
* as the number of parameters required is pretty large.
|
||||
/* BMK_benchFunction() parameters are provided via the following structure.
|
||||
* A structure is preferable for readability,
|
||||
* as the number of parameters required is fairly large.
|
||||
* No initializer is provided, because it doesn't make sense to provide some "default" :
|
||||
* all parameters should be specified by the caller */
|
||||
* all parameters must be specified by the caller.
|
||||
* optional parameters are labelled explicitly, and accept value NULL when not used */
|
||||
typedef struct {
|
||||
BMK_benchFn_t benchFn; /* the function to benchmark, over the set of blocks */
|
||||
void* benchPayload; /* pass custom parameters to benchFn :
|
||||
* (*benchFn)(srcBuffers[i], srcSizes[i], dstBuffers[i], dstCapacities[i], benchPayload) */
|
||||
BMK_initFn_t initFn; /* (*initFn)(initPayload) is run once per run, at the beginning. */
|
||||
void* initPayload; /* Both arguments can be NULL, in which case nothing is run. */
|
||||
BMK_errorFn_t errorFn; /* errorFn will check each return value of benchFn over each block, to determine if it failed or not.
|
||||
* errorFn can be NULL, in which case no check is performed.
|
||||
* errorFn must return 0 when benchFn was successful, and >= 1 if it detects an error.
|
||||
* Execution is stopped as soon as an error is detected.
|
||||
* the triggering return value can be retrieved using BMK_extract_errorResult(). */
|
||||
size_t blockCount; /* number of blocks to operate benchFn on.
|
||||
* It's also the size of all array parameters :
|
||||
* srcBuffers, srcSizes, dstBuffers, dstCapacities, blockResults */
|
||||
const void *const * srcBuffers; /* array of buffers to be operated on by benchFn */
|
||||
const size_t* srcSizes; /* array of the sizes of srcBuffers buffers */
|
||||
void *const * dstBuffers;/* array of buffers to be written into by benchFn */
|
||||
const size_t* dstCapacities; /* array of the capacities of dstBuffers buffers */
|
||||
size_t* blockResults; /* Optional: store the return value of benchFn for each block. Use NULL if this result is not requested. */
|
||||
BMK_benchFn_t benchFn; /* the function to benchmark, over the set of blocks */
|
||||
void* benchPayload; /* pass custom parameters to benchFn :
|
||||
* (*benchFn)(srcBuffers[i], srcSizes[i], dstBuffers[i], dstCapacities[i], benchPayload) */
|
||||
BMK_initFn_t initFn; /* (*initFn)(initPayload) is run once per run, at the beginning. */
|
||||
void* initPayload; /* Both arguments can be NULL, in which case nothing is run. */
|
||||
BMK_errorFn_t errorFn; /* errorFn will check each return value of benchFn over each block, to determine if it failed or not.
|
||||
* errorFn can be NULL, in which case no check is performed.
|
||||
* errorFn must return 0 when benchFn was successful, and >= 1 if it detects an error.
|
||||
* Execution is stopped as soon as an error is detected.
|
||||
* the triggering return value can be retrieved using BMK_extract_errorResult(). */
|
||||
size_t blockCount; /* number of blocks to operate benchFn on.
|
||||
* It's also the size of all array parameters :
|
||||
* srcBuffers, srcSizes, dstBuffers, dstCapacities, blockResults */
|
||||
const void *const * srcBuffers; /* read-only array of buffers to be operated on by benchFn */
|
||||
const size_t* srcSizes; /* read-only array containing sizes of srcBuffers */
|
||||
void *const * dstBuffers; /* array of buffers to be written into by benchFn. This array is not optional, it must be provided even if unused by benchfn. */
|
||||
const size_t* dstCapacities; /* read-only array containing capacities of dstBuffers. This array must be present. */
|
||||
size_t* blockResults; /* Optional: store the return value of benchFn for each block. Use NULL if this result is not requested. */
|
||||
} BMK_benchParams_t;
|
||||
|
||||
|
||||
@ -159,6 +160,21 @@ void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms,
|
||||
void BMK_freeTimedFnState(BMK_timedFnState_t* state);
|
||||
|
||||
|
||||
/* BMK_timedFnState_shell and BMK_initStatic_timedFnState() :
|
||||
* Makes it possible to statically allocate a BMK_timedFnState_t on stack.
|
||||
* BMK_timedFnState_shell is only there to allocate space,
|
||||
* never ever access its members.
|
||||
* BMK_timedFnState_t() actually accepts any buffer.
|
||||
* It will check if provided buffer is large enough and is correctly aligned,
|
||||
* and will return NULL if conditions are not respected.
|
||||
*/
|
||||
#define BMK_TIMEDFNSTATE_SIZE 64
|
||||
typedef union {
|
||||
char never_access_space[BMK_TIMEDFNSTATE_SIZE];
|
||||
long long alignment_enforcer; /* must be aligned on 8-bytes boundaries */
|
||||
} BMK_timedFnState_shell;
|
||||
BMK_timedFnState_t* BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms);
|
||||
|
||||
|
||||
#endif /* BENCH_FN_H_23876 */
|
||||
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include <errno.h>
|
||||
#include <assert.h> /* assert */
|
||||
|
||||
#include "timefn.h" /* UTIL_time_t */
|
||||
#include "benchfn.h"
|
||||
#include "mem.h"
|
||||
#define ZSTD_STATIC_LINKING_ONLY
|
||||
@ -135,7 +136,8 @@ BMK_advancedParams_t BMK_initAdvancedParams(void) {
|
||||
0, /* ldmMinMatch */
|
||||
0, /* ldmHashLog */
|
||||
0, /* ldmBuckSizeLog */
|
||||
0 /* ldmHashRateLog */
|
||||
0, /* ldmHashRateLog */
|
||||
ZSTD_lcm_auto /* literalCompressionMode */
|
||||
};
|
||||
return res;
|
||||
}
|
||||
@ -159,9 +161,13 @@ typedef struct {
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
static void BMK_initCCtx(ZSTD_CCtx* ctx,
|
||||
const void* dictBuffer, size_t dictBufferSize, int cLevel,
|
||||
const ZSTD_compressionParameters* comprParams, const BMK_advancedParams_t* adv) {
|
||||
static void
|
||||
BMK_initCCtx(ZSTD_CCtx* ctx,
|
||||
const void* dictBuffer, size_t dictBufferSize,
|
||||
int cLevel,
|
||||
const ZSTD_compressionParameters* comprParams,
|
||||
const BMK_advancedParams_t* adv)
|
||||
{
|
||||
ZSTD_CCtx_reset(ctx, ZSTD_reset_session_and_parameters);
|
||||
if (adv->nbWorkers==1) {
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_nbWorkers, 0));
|
||||
@ -174,12 +180,13 @@ static void BMK_initCCtx(ZSTD_CCtx* ctx,
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmHashLog, adv->ldmHashLog));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmBucketSizeLog, adv->ldmBucketSizeLog));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmHashRateLog, adv->ldmHashRateLog));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_windowLog, comprParams->windowLog));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_hashLog, comprParams->hashLog));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_chainLog, comprParams->chainLog));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_searchLog, comprParams->searchLog));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_minMatch, comprParams->minMatch));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_targetLength, comprParams->targetLength));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_windowLog, (int)comprParams->windowLog));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_hashLog, (int)comprParams->hashLog));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_chainLog, (int)comprParams->chainLog));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_searchLog, (int)comprParams->searchLog));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_minMatch, (int)comprParams->minMatch));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_targetLength, (int)comprParams->targetLength));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_literalCompressionMode, (int)adv->literalCompressionMode));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_strategy, comprParams->strategy));
|
||||
CHECK_Z(ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize));
|
||||
}
|
||||
@ -376,7 +383,7 @@ BMK_benchMemAdvancedNoAlloc(
|
||||
}
|
||||
}
|
||||
|
||||
/* warmimg up `compressedBuffer` */
|
||||
/* warming up `compressedBuffer` */
|
||||
if (adv->mode == BMK_decodeOnly) {
|
||||
memcpy(compressedBuffer, srcBuffer, loadedCompressedSize);
|
||||
} else {
|
||||
@ -444,7 +451,7 @@ BMK_benchMemAdvancedNoAlloc(
|
||||
cSize = cResult.sumOfReturn;
|
||||
ratio = (double)srcSize / cSize;
|
||||
{ BMK_benchResult_t newResult;
|
||||
newResult.cSpeed = ((U64)srcSize * TIMELOOP_NANOSEC / cResult.nanoSecPerRun);
|
||||
newResult.cSpeed = (U64)((double)srcSize * TIMELOOP_NANOSEC / cResult.nanoSecPerRun);
|
||||
benchResult.cSize = cSize;
|
||||
if (newResult.cSpeed > benchResult.cSpeed)
|
||||
benchResult.cSpeed = newResult.cSpeed;
|
||||
@ -468,7 +475,7 @@ BMK_benchMemAdvancedNoAlloc(
|
||||
}
|
||||
|
||||
{ BMK_runTime_t const dResult = BMK_extract_runTime(dOutcome);
|
||||
U64 const newDSpeed = (srcSize * TIMELOOP_NANOSEC / dResult.nanoSecPerRun);
|
||||
U64 const newDSpeed = (U64)((double)srcSize * TIMELOOP_NANOSEC / dResult.nanoSecPerRun);
|
||||
if (newDSpeed > benchResult.dSpeed)
|
||||
benchResult.dSpeed = newDSpeed;
|
||||
}
|
||||
@ -505,17 +512,21 @@ BMK_benchMemAdvancedNoAlloc(
|
||||
pos = (U32)(u - bacc);
|
||||
bNb = pos / (128 KB);
|
||||
DISPLAY("(sample %u, block %u, pos %u) \n", segNb, bNb, pos);
|
||||
if (u>5) {
|
||||
int n;
|
||||
{ size_t const lowest = (u>5) ? 5 : u;
|
||||
size_t n;
|
||||
DISPLAY("origin: ");
|
||||
for (n=-5; n<0; n++) DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]);
|
||||
for (n=lowest; n>0; n--)
|
||||
DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u-n]);
|
||||
DISPLAY(" :%02X: ", ((const BYTE*)srcBuffer)[u]);
|
||||
for (n=1; n<3; n++) DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]);
|
||||
for (n=1; n<3; n++)
|
||||
DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]);
|
||||
DISPLAY(" \n");
|
||||
DISPLAY("decode: ");
|
||||
for (n=-5; n<0; n++) DISPLAY("%02X ", resultBuffer[u+n]);
|
||||
for (n=lowest; n>0; n++)
|
||||
DISPLAY("%02X ", resultBuffer[u-n]);
|
||||
DISPLAY(" :%02X: ", resultBuffer[u]);
|
||||
for (n=1; n<3; n++) DISPLAY("%02X ", resultBuffer[u+n]);
|
||||
for (n=1; n<3; n++)
|
||||
DISPLAY("%02X ", resultBuffer[u+n]);
|
||||
DISPLAY(" \n");
|
||||
}
|
||||
break;
|
||||
|
@ -105,17 +105,18 @@ typedef enum {
|
||||
} BMK_mode_t;
|
||||
|
||||
typedef struct {
|
||||
BMK_mode_t mode; /* 0: all, 1: compress only 2: decode only */
|
||||
unsigned nbSeconds; /* default timing is in nbSeconds */
|
||||
size_t blockSize; /* Maximum size of each block*/
|
||||
unsigned nbWorkers; /* multithreading */
|
||||
unsigned realTime; /* real time priority */
|
||||
int additionalParam; /* used by python speed benchmark */
|
||||
unsigned ldmFlag; /* enables long distance matching */
|
||||
unsigned ldmMinMatch; /* below: parameters for long distance matching, see zstd.1.md */
|
||||
unsigned ldmHashLog;
|
||||
unsigned ldmBucketSizeLog;
|
||||
unsigned ldmHashRateLog;
|
||||
BMK_mode_t mode; /* 0: all, 1: compress only 2: decode only */
|
||||
unsigned nbSeconds; /* default timing is in nbSeconds */
|
||||
size_t blockSize; /* Maximum size of each block*/
|
||||
int nbWorkers; /* multithreading */
|
||||
unsigned realTime; /* real time priority */
|
||||
int additionalParam; /* used by python speed benchmark */
|
||||
int ldmFlag; /* enables long distance matching */
|
||||
int ldmMinMatch; /* below: parameters for long distance matching, see zstd.1.md */
|
||||
int ldmHashLog;
|
||||
int ldmBucketSizeLog;
|
||||
int ldmHashRateLog;
|
||||
ZSTD_literalCompressionMode_e literalCompressionMode;
|
||||
} BMK_advancedParams_t;
|
||||
|
||||
/* returns default parameters used by nonAdvanced functions */
|
||||
@ -169,7 +170,7 @@ BMK_benchOutcome_t BMK_syntheticTest(
|
||||
* comprParams - basic compression parameters
|
||||
* dictBuffer - a dictionary if used, null otherwise
|
||||
* dictBufferSize - size of dictBuffer, 0 otherwise
|
||||
* diplayLevel - see BMK_benchFiles
|
||||
* displayLevel - see BMK_benchFiles
|
||||
* displayName - name used by display
|
||||
* @return:
|
||||
* a variant, which expresses either an error, or a valid result.
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include <errno.h> /* errno */
|
||||
#include <assert.h>
|
||||
|
||||
#include "timefn.h" /* UTIL_time_t, UTIL_clockSpanMicro, UTIL_getTime */
|
||||
#include "mem.h" /* read */
|
||||
#include "error_private.h"
|
||||
#include "dibio.h"
|
||||
|
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user