import zstd 1.4.0
This commit is contained in:
parent
70d64c5852
commit
5df09a9215
461
CHANGELOG
Normal file
461
CHANGELOG
Normal file
@ -0,0 +1,461 @@
|
||||
v1.4.0
|
||||
perf: Improve level 1 compression speed in most scenarios by 6% by @gbtucker and @terrelln
|
||||
api: Move the advanced API, including all functions in the staging section, to the stable section
|
||||
api: Make ZSTD_e_flush and ZSTD_e_end block for maximum forward progress
|
||||
api: Rename ZSTD_CCtxParam_getParameter to ZSTD_CCtxParams_getParameter
|
||||
api: Rename ZSTD_CCtxParam_setParameter to ZSTD_CCtxParams_setParameter
|
||||
api: Don't export ZSTDMT functions from the shared library by default
|
||||
api: Require ZSTD_MULTITHREAD to be defined to use ZSTDMT
|
||||
api: Add ZSTD_decompressBound() to provide an upper bound on decompressed size by @shakeelrao
|
||||
api: Fix ZSTD_decompressDCtx() corner cases with a dictionary
|
||||
api: Move ZSTD_getDictID_*() functions to the stable section
|
||||
api: Add ZSTD_c_literalCompressionMode flag to enable or disable literal compression by @terrelln
|
||||
api: Allow compression parameters to be set when a dictionary is used
|
||||
api: Allow setting parameters before or after ZSTD_CCtx_loadDictionary() is called
|
||||
api: Fix ZSTD_estimateCStreamSize_usingCCtxParams()
|
||||
api: Setting ZSTD_d_maxWindowLog to 0 means use the default
|
||||
cli: Ensure that a dictionary is not used to compress itself by @shakeelrao
|
||||
cli: Add --[no-]compress-literals flag to enable or disable literal compression
|
||||
doc: Update the examples to use the advanced API
|
||||
doc: Explain how to transition from old streaming functions to the advanced API in the header
|
||||
build: Improve the Windows release packages
|
||||
build: Improve CMake build by @hjmjohnson
|
||||
build: Build fixes for FreeBSD by @lwhsu
|
||||
build: Remove redundant warnings by @thatsafunnyname
|
||||
build: Fix tests on OpenBSD by @bket
|
||||
build: Extend fuzzer build system to work with the new clang engine
|
||||
build: CMake now creates the libzstd.so.1 symlink
|
||||
build: Improve Menson build by @lzutao
|
||||
misc: Fix symbolic link detection on FreeBSD
|
||||
misc: Use physical core count for -T0 on FreeBSD by @cemeyer
|
||||
misc: Fix zstd --list on truncated files by @kostmo
|
||||
misc: Improve logging in debug mode by @felixhandte
|
||||
misc: Add CirrusCI tests by @lwhsu
|
||||
misc: Optimize dictionary memory usage in corner cases
|
||||
misc: Improve the dictionary builder on small or homogeneous data
|
||||
misc: Fix spelling across the repo by @jsoref
|
||||
|
||||
v1.3.8
|
||||
perf: better decompression speed on large files (+7%) and cold dictionaries (+15%)
|
||||
perf: slightly better compression ratio at high compression modes
|
||||
api : finalized advanced API, last stage before "stable" status
|
||||
api : new --rsyncable mode, by @terrelln
|
||||
api : support decompression of empty frames into NULL (used to be an error) (#1385)
|
||||
build: new set of macros to build a minimal size decoder, by @felixhandte
|
||||
build: fix compilation on MIPS32, reported by @clbr (#1441)
|
||||
build: fix compilation with multiple -arch flags, by @ryandesign
|
||||
build: highly upgraded meson build, by @lzutao
|
||||
build: improved buck support, by @obelisk
|
||||
build: fix cmake script : can create debug build, by @pitrou
|
||||
build: Makefile : grep works on both colored consoles and systems without color support
|
||||
build: fixed zstd-pgo, by @bmwiedemann
|
||||
cli : support ZSTD_CLEVEL environment variable, by @yijinfb (#1423)
|
||||
cli : --no-progress flag, preserving final summary (#1371), by @terrelln
|
||||
cli : ensure destination file is not source file (#1422)
|
||||
cli : clearer error messages, especially when input file not present
|
||||
doc : clarified zstd_compression_format.md, by @ulikunitz
|
||||
misc: fixed zstdgrep, returns 1 on failure, by @lzutao
|
||||
misc: NEWS renamed as CHANGELOG, in accordance with fboss
|
||||
|
||||
v1.3.7
|
||||
perf: slightly better decompression speed on clang (depending on hardware target)
|
||||
fix : performance of dictionary compression for small input < 4 KB at levels 9 and 10
|
||||
build: no longer build backtrace by default in release mode; restrict further automatic mode
|
||||
build: control backtrace support through build macro BACKTRACE
|
||||
misc: added man pages for zstdless and zstdgrep, by @samrussell
|
||||
|
||||
v1.3.6
|
||||
perf: much faster dictionary builder, by @jenniferliu
|
||||
perf: faster dictionary compression on small data when using multiple contexts, by @felixhandte
|
||||
perf: faster dictionary decompression when using a very large number of dictionaries simultaneously
|
||||
cli : fix : does no longer overwrite destination when source does not exist (#1082)
|
||||
cli : new command --adapt, for automatic compression level adaptation
|
||||
api : fix : block api can be streamed with > 4 GB, reported by @catid
|
||||
api : reduced ZSTD_DDict size by 2 KB
|
||||
api : minimum negative compression level is defined, and can be queried using ZSTD_minCLevel().
|
||||
build: support Haiku target, by @korli
|
||||
build: Read Legacy format is limited to v0.5+ by default. Can be changed at compile time with macro ZSTD_LEGACY_SUPPORT.
|
||||
doc : zstd_compression_format.md updated to match wording in IETF RFC 8478
|
||||
misc: tests/paramgrill, a parameter optimizer, by @GeorgeLu97
|
||||
|
||||
v1.3.5
|
||||
perf: much faster dictionary compression, by @felixhandte
|
||||
perf: small quality improvement for dictionary generation, by @terrelln
|
||||
perf: slightly improved high compression levels (notably level 19)
|
||||
mem : automatic memory release for long duration contexts
|
||||
cli : fix : overlapLog can be manually set
|
||||
cli : fix : decoding invalid lz4 frames
|
||||
api : fix : performance degradation for dictionary compression when using advanced API, by @terrelln
|
||||
api : change : clarify ZSTD_CCtx_reset() vs ZSTD_CCtx_resetParameters(), by @terrelln
|
||||
build: select custom libzstd scope through control macros, by @GeorgeLu97
|
||||
build: OpenBSD patch, by @bket
|
||||
build: make and make all are compatible with -j
|
||||
doc : clarify zstd_compression_format.md, updated for IETF RFC process
|
||||
misc: pzstd compatible with reproducible compilation, by @lamby
|
||||
|
||||
v1.3.4
|
||||
perf: faster speed (especially decoding speed) on recent cpus (haswell+)
|
||||
perf: much better performance associating --long with multi-threading, by @terrelln
|
||||
perf: better compression at levels 13-15
|
||||
cli : asynchronous compression by default, for faster experience (use --single-thread for former behavior)
|
||||
cli : smoother status report in multi-threading mode
|
||||
cli : added command --fast=#, for faster compression modes
|
||||
cli : fix crash when not overwriting existing files, by Pádraig Brady (@pixelb)
|
||||
api : `nbThreads` becomes `nbWorkers` : 1 triggers asynchronous mode
|
||||
api : compression levels can be negative, for even more speed
|
||||
api : ZSTD_getFrameProgression() : get precise progress status of ZSTDMT anytime
|
||||
api : ZSTDMT can accept new compression parameters during compression
|
||||
api : implemented all advanced dictionary decompression prototypes
|
||||
build: improved meson recipe, by Shawn Landden (@shawnl)
|
||||
build: VS2017 scripts, by @HaydnTrigg
|
||||
misc: all /contrib projects fixed
|
||||
misc: added /contrib/docker script by @gyscos
|
||||
|
||||
v1.3.3
|
||||
perf: faster zstd_opt strategy (levels 16-19)
|
||||
fix : bug #944 : multithreading with shared ditionary and large data, reported by @gsliepen
|
||||
cli : fix : content size written in header by default
|
||||
cli : fix : improved LZ4 format support, by @felixhandte
|
||||
cli : new : hidden command `-S`, to benchmark multiple files while generating one result per file
|
||||
api : fix : support large skippable frames, by @terrelln
|
||||
api : fix : streaming interface was adding a useless 3-bytes null block to small frames
|
||||
api : change : when setting `pledgedSrcSize`, use `ZSTD_CONTENTSIZE_UNKNOWN` macro value to mean "unknown"
|
||||
build: fix : compilation under rhel6 and centos6, reported by @pixelb
|
||||
build: added `check` target
|
||||
|
||||
v1.3.2
|
||||
new : long range mode, using --long command, by Stella Lau (@stellamplau)
|
||||
new : ability to generate and decode magicless frames (#591)
|
||||
changed : maximum nb of threads reduced to 200, to avoid address space exhaustion in 32-bits mode
|
||||
fix : multi-threading compression works with custom allocators
|
||||
fix : ZSTD_sizeof_CStream() was over-evaluating memory usage
|
||||
fix : a rare compression bug when compression generates very large distances and bunch of other conditions (only possible at --ultra -22)
|
||||
fix : 32-bits build can now decode large offsets (levels 21+)
|
||||
cli : added LZ4 frame support by default, by Felix Handte (@felixhandte)
|
||||
cli : improved --list output
|
||||
cli : new : can split input file for dictionary training, using command -B#
|
||||
cli : new : clean operation artefact on Ctrl-C interruption
|
||||
cli : fix : do not change /dev/null permissions when using command -t with root access, reported by @mike155 (#851)
|
||||
cli : fix : write file size in header in multiple-files mode
|
||||
api : added macro ZSTD_COMPRESSBOUND() for static allocation
|
||||
api : experimental : new advanced decompression API
|
||||
api : fix : sizeof_CCtx() used to over-estimate
|
||||
build: fix : no-multithread variant compiles without pool.c dependency, reported by Mitchell Blank Jr (@mitchblank) (#819)
|
||||
build: better compatibility with reproducible builds, by Bernhard M. Wiedemann (@bmwiedemann) (#818)
|
||||
example : added streaming_memory_usage
|
||||
license : changed /examples license to BSD + GPLv2
|
||||
license : fix a few header files to reflect new license (#825)
|
||||
|
||||
v1.3.1
|
||||
New license : BSD + GPLv2
|
||||
perf: substantially decreased memory usage in Multi-threading mode, thanks to reports by Tino Reichardt (@mcmilk)
|
||||
perf: Multi-threading supports up to 256 threads. Cap at 256 when more are requested (#760)
|
||||
cli : improved and fixed --list command, by @ib (#772)
|
||||
cli : command -vV to list supported formats, by @ib (#771)
|
||||
build : fixed binary variants, reported by @svenha (#788)
|
||||
build : fix Visual compilation for non x86/x64 targets, reported by Greg Slazinski (@GregSlazinski) (#718)
|
||||
API exp : breaking change : ZSTD_getframeHeader() provides more information
|
||||
API exp : breaking change : pinned down values of error codes
|
||||
doc : fixed huffman example, by Ulrich Kunitz (@ulikunitz)
|
||||
new : contrib/adaptive-compression, I/O driven compression strength, by Paul Cruz (@paulcruz74)
|
||||
new : contrib/long_distance_matching, statistics by Stella Lau (@stellamplau)
|
||||
updated : contrib/linux-kernel, by Nick Terrell (@terrelln)
|
||||
|
||||
v1.3.0
|
||||
cli : new : `--list` command, by Paul Cruz
|
||||
cli : changed : xz/lzma support enabled by default
|
||||
cli : changed : `-t *` continue processing list after a decompression error
|
||||
API : added : ZSTD_versionString()
|
||||
API : promoted to stable status : ZSTD_getFrameContentSize(), by Sean Purcell
|
||||
API exp : new advanced API : ZSTD_compress_generic(), ZSTD_CCtx_setParameter()
|
||||
API exp : new : API for static or external allocation : ZSTD_initStatic?Ctx()
|
||||
API exp : added : ZSTD_decompressBegin_usingDDict(), requested by Guy Riddle (#700)
|
||||
API exp : clarified memory estimation / measurement functions.
|
||||
API exp : changed : strongest strategy renamed ZSTD_btultra, fastest strategy ZSTD_fast set to 1
|
||||
tools : decodecorpus can generate random dictionary-compressed samples, by Paul Cruz
|
||||
new : contrib/seekable_format, demo and API, by Sean Purcell
|
||||
changed : contrib/linux-kernel, updated version and license, by Nick Terrell
|
||||
|
||||
v1.2.0
|
||||
cli : changed : Multithreading enabled by default (use target zstd-nomt or HAVE_THREAD=0 to disable)
|
||||
cli : new : command -T0 means "detect and use nb of cores", by Sean Purcell
|
||||
cli : new : zstdmt symlink hardwired to `zstd -T0`
|
||||
cli : new : command --threads=# (#671)
|
||||
cli : changed : cover dictionary builder by default, for improved quality, by Nick Terrell
|
||||
cli : new : commands --train-cover and --train-legacy, to select dictionary algorithm and parameters
|
||||
cli : experimental targets `zstd4` and `xzstd4`, with support for lz4 format, by Sean Purcell
|
||||
cli : fix : does not output compressed data on console
|
||||
cli : fix : ignore symbolic links unless --force specified,
|
||||
API : breaking change : ZSTD_createCDict_advanced(), only use compressionParameters as argument
|
||||
API : added : prototypes ZSTD_*_usingCDict_advanced(), for direct control over frameParameters.
|
||||
API : improved: ZSTDMT_compressCCtx() reduced memory usage
|
||||
API : fix : ZSTDMT_compressCCtx() now provides srcSize in header (#634)
|
||||
API : fix : src size stored in frame header is controlled at end of frame
|
||||
API : fix : enforced consistent rules for pledgedSrcSize==0 (#641)
|
||||
API : fix : error code "GENERIC" replaced by "dstSizeTooSmall" when appropriate
|
||||
build: improved cmake script, by @Majlen
|
||||
build: enabled Multi-threading support for *BSD, by Baptiste Daroussin
|
||||
tools: updated Paramgrill. Command -O# provides best parameters for sample and speed target.
|
||||
new : contrib/linux-kernel version, by Nick Terrell
|
||||
|
||||
v1.1.4
|
||||
cli : new : can compress in *.gz format, using --format=gzip command, by Przemyslaw Skibinski
|
||||
cli : new : advanced benchmark command --priority=rt
|
||||
cli : fix : write on sparse-enabled file systems in 32-bits mode, by @ds77
|
||||
cli : fix : --rm remains silent when input is stdin
|
||||
cli : experimental : xzstd, with support for xz/lzma decoding, by Przemyslaw Skibinski
|
||||
speed : improved decompression speed in streaming mode for single shot scenarios (+5%)
|
||||
memory: DDict (decompression dictionary) memory usage down from 150 KB to 20 KB
|
||||
arch: 32-bits variant able to generate and decode very long matches (>32 MB), by Sean Purcell
|
||||
API : new : ZSTD_findFrameCompressedSize(), ZSTD_getFrameContentSize(), ZSTD_findDecompressedSize()
|
||||
API : changed : dropped support of legacy versions <= v0.3 (can be changed by modifying ZSTD_LEGACY_SUPPORT value)
|
||||
build : new: meson build system in contrib/meson, by Dima Krasner
|
||||
build : improved cmake script, by @Majlen
|
||||
build : added -Wformat-security flag, as recommended by Padraig Brady
|
||||
doc : new : educational decoder, by Sean Purcell
|
||||
|
||||
v1.1.3
|
||||
cli : zstd can decompress .gz files (can be disabled with `make zstd-nogz` or `make HAVE_ZLIB=0`)
|
||||
cli : new : experimental target `make zstdmt`, with multi-threading support
|
||||
cli : new : improved dictionary builder "cover" (experimental), by Nick Terrell, based on prior work by Giuseppe Ottaviano.
|
||||
cli : new : advanced commands for detailed parameters, by Przemyslaw Skibinski
|
||||
cli : fix zstdless on Mac OS-X, by Andrew Janke
|
||||
cli : fix #232 "compress non-files"
|
||||
dictBuilder : improved dictionary generation quality, thanks to Nick Terrell
|
||||
API : new : lib/compress/ZSTDMT_compress.h multithreading API (experimental)
|
||||
API : new : ZSTD_create?Dict_byReference(), requested by Bartosz Taudul
|
||||
API : new : ZDICT_finalizeDictionary()
|
||||
API : fix : ZSTD_initCStream_usingCDict() properly writes dictID into frame header, by Gregory Szorc (#511)
|
||||
API : fix : all symbols properly exposed in libzstd, by Nick Terrell
|
||||
build : support for Solaris target, by Przemyslaw Skibinski
|
||||
doc : clarified specification, by Sean Purcell
|
||||
|
||||
v1.1.2
|
||||
API : streaming : decompression : changed : automatic implicit reset when chain-decoding new frames without init
|
||||
API : experimental : added : dictID retrieval functions, and ZSTD_initCStream_srcSize()
|
||||
API : zbuff : changed : prototypes now generate deprecation warnings
|
||||
lib : improved : faster decompression speed at ultra compression settings and 32-bits mode
|
||||
lib : changed : only public ZSTD_ symbols are now exposed
|
||||
lib : changed : reduced usage of stack memory
|
||||
lib : fixed : several corner case bugs, by Nick Terrell
|
||||
cli : new : gzstd, experimental version able to decode .gz files, by Przemyslaw Skibinski
|
||||
cli : new : preserve file attributes
|
||||
cli : new : added zstdless and zstdgrep tools
|
||||
cli : fixed : status displays total amount decoded, even for file consisting of multiple frames (like pzstd)
|
||||
cli : fixed : zstdcat
|
||||
zlib_wrapper : added support for gz* functions, by Przemyslaw Skibinski
|
||||
install : better compatibility with FreeBSD, by Dimitry Andric
|
||||
source tree : changed : zbuff source files moved to lib/deprecated
|
||||
|
||||
v1.1.1
|
||||
New : command -M#, --memory=, --memlimit=, --memlimit-decompress= to limit allowed memory consumption
|
||||
New : doc/zstd_manual.html, by Przemyslaw Skibinski
|
||||
Improved : slightly better compression ratio at --ultra levels (>= 20)
|
||||
Improved : better memory usage when using streaming compression API, thanks to @Rogier-5 report
|
||||
Added : API : ZSTD_initCStream_usingCDict(), ZSTD_initDStream_usingDDict() (experimental section)
|
||||
Added : example/multiple_streaming_compression.c
|
||||
Changed : zstd_errors.h is now installed within /include (and replaces errors_public.h)
|
||||
Updated man page
|
||||
Fixed : zstd-small, zstd-compress and zstd-decompress compilation targets
|
||||
|
||||
v1.1.0
|
||||
New : contrib/pzstd, parallel version of zstd, by Nick Terrell
|
||||
added : NetBSD install target (#338)
|
||||
Improved : speed for batches of small files
|
||||
Improved : speed of zlib wrapper, by Przemyslaw Skibinski
|
||||
Changed : libzstd on Windows supports legacy formats, by Christophe Chevalier
|
||||
Fixed : CLI -d output to stdout by default when input is stdin (#322)
|
||||
Fixed : CLI correctly detects console on Mac OS-X
|
||||
Fixed : CLI supports recursive mode `-r` on Mac OS-X
|
||||
Fixed : Legacy decoders use unified error codes, reported by benrg (#341), fixed by Przemyslaw Skibinski
|
||||
Fixed : compatibility with OpenBSD, reported by Juan Francisco Cantero Hurtado (#319)
|
||||
Fixed : compatibility with Hurd, by Przemyslaw Skibinski (#365)
|
||||
Fixed : zstd-pgo, reported by octoploid (#329)
|
||||
|
||||
v1.0.0
|
||||
Change Licensing, all project is now BSD, Copyright Facebook
|
||||
Small decompression speed improvement
|
||||
API : Streaming API supports legacy format
|
||||
API : ZDICT_getDictID(), ZSTD_sizeof_{CCtx, DCtx, CStream, DStream}(), ZSTD_setDStreamParameter()
|
||||
CLI supports legacy formats v0.4+
|
||||
Fixed : compression fails on certain huge files, reported by Jesse McGrew
|
||||
Enhanced documentation, by Przemyslaw Skibinski
|
||||
|
||||
v0.8.1
|
||||
New streaming API
|
||||
Changed : --ultra now enables levels beyond 19
|
||||
Changed : -i# now selects benchmark time in second
|
||||
Fixed : ZSTD_compress* can now compress > 4 GB in a single pass, reported by Nick Terrell
|
||||
Fixed : speed regression on specific patterns (#272)
|
||||
Fixed : support for Z_SYNC_FLUSH, by Dmitry Krot (#291)
|
||||
Fixed : ICC compilation, by Przemyslaw Skibinski
|
||||
|
||||
v0.8.0
|
||||
Improved : better speed on clang and gcc -O2, thanks to Eric Biggers
|
||||
New : Build on FreeBSD and DragonFly, thanks to JrMarino
|
||||
Changed : modified API : ZSTD_compressEnd()
|
||||
Fixed : legacy mode with ZSTD_HEAPMODE=0, by Christopher Bergqvist
|
||||
Fixed : premature end of frame when zero-sized raw block, reported by Eric Biggers
|
||||
Fixed : large dictionaries (> 384 KB), reported by Ilona Papava
|
||||
Fixed : checksum correctly checked in single-pass mode
|
||||
Fixed : combined --test amd --rm, reported by Andreas M. Nilsson
|
||||
Modified : minor compression level adaptations
|
||||
Updated : compression format specification to v0.2.0
|
||||
changed : zstd.h moved to /lib directory
|
||||
|
||||
v0.7.5
|
||||
Transition version, supporting decoding of v0.8.x
|
||||
|
||||
v0.7.4
|
||||
Added : homebrew for Mac, by Daniel Cade
|
||||
Added : more examples
|
||||
Fixed : segfault when using small dictionaries, reported by Felix Handte
|
||||
Modified : default compression level for CLI is now 3
|
||||
Updated : specification, to v0.1.1
|
||||
|
||||
v0.7.3
|
||||
New : compression format specification
|
||||
New : `--` separator, stating that all following arguments are file names. Suggested by Chip Turner.
|
||||
New : `ZSTD_getDecompressedSize()`
|
||||
New : OpenBSD target, by Juan Francisco Cantero Hurtado
|
||||
New : `examples` directory
|
||||
fixed : dictBuilder using HC levels, reported by Bartosz Taudul
|
||||
fixed : legacy support from ZSTD_decompress_usingDDict(), reported by Felix Handte
|
||||
fixed : multi-blocks decoding with intermediate uncompressed blocks, reported by Greg Slazinski
|
||||
modified : removed "mem.h" and "error_public.h" dependencies from "zstd.h" (experimental section)
|
||||
modified : legacy functions no longer need magic number
|
||||
|
||||
v0.7.2
|
||||
fixed : ZSTD_decompressBlock() using multiple consecutive blocks. Reported by Greg Slazinski.
|
||||
fixed : potential segfault on very large files (many gigabytes). Reported by Chip Turner.
|
||||
fixed : CLI displays system error message when destination file cannot be created (#231). Reported by Chip Turner.
|
||||
|
||||
v0.7.1
|
||||
fixed : ZBUFF_compressEnd() called multiple times with too small `dst` buffer, reported by Christophe Chevalier
|
||||
fixed : dictBuilder fails if first sample is too small, reported by Руслан Ковалёв
|
||||
fixed : corruption issue, reported by cj
|
||||
modified : checksum enabled by default in command line mode
|
||||
|
||||
v0.7.0
|
||||
New : Support for directory compression, using `-r`, thanks to Przemyslaw Skibinski
|
||||
New : Command `--rm`, to remove source file after successful de/compression
|
||||
New : Visual build scripts, by Christophe Chevalier
|
||||
New : Support for Sparse File-systems (do not use space for zero-filled sectors)
|
||||
New : Frame checksum support
|
||||
New : Support pass-through mode (when using `-df`)
|
||||
API : more efficient Dictionary API : `ZSTD_compress_usingCDict()`, `ZSTD_decompress_usingDDict()`
|
||||
API : create dictionary files from custom content, by Giuseppe Ottaviano
|
||||
API : support for custom malloc/free functions
|
||||
New : controllable Dictionary ID
|
||||
New : Support for skippable frames
|
||||
|
||||
v0.6.1
|
||||
New : zlib wrapper API, thanks to Przemyslaw Skibinski
|
||||
New : Ability to compile compressor / decompressor separately
|
||||
Changed : new lib directory structure
|
||||
Fixed : Legacy codec v0.5 compatible with dictionary decompression
|
||||
Fixed : Decoder corruption error (#173)
|
||||
Fixed : null-string roundtrip (#176)
|
||||
New : benchmark mode can select directory as input
|
||||
Experimental : midipix support, VMS support
|
||||
|
||||
v0.6.0
|
||||
Stronger high compression modes, thanks to Przemyslaw Skibinski
|
||||
API : ZSTD_getFrameParams() provides size of decompressed content
|
||||
New : highest compression modes require `--ultra` command to fully unleash their capacity
|
||||
Fixed : zstd cli return error code > 0 and removes dst file artifact when decompression fails, thanks to Chip Turner
|
||||
|
||||
v0.5.1
|
||||
New : Optimal parsing => Very high compression modes, thanks to Przemyslaw Skibinski
|
||||
Changed : Dictionary builder integrated into libzstd and zstd cli
|
||||
Changed (!) : zstd cli now uses "multiple input files" as default mode. See `zstd -h`.
|
||||
Fix : high compression modes for big-endian platforms
|
||||
New : zstd cli : `-t` | `--test` command
|
||||
|
||||
v0.5.0
|
||||
New : dictionary builder utility
|
||||
Changed : streaming & dictionary API
|
||||
Improved : better compression of small data
|
||||
|
||||
v0.4.7
|
||||
Improved : small compression speed improvement in HC mode
|
||||
Changed : `zstd_decompress.c` has ZSTD_LEGACY_SUPPORT to 0 by default
|
||||
fix : bt search bug
|
||||
|
||||
v0.4.6
|
||||
fix : fast compression mode on Windows
|
||||
New : cmake configuration file, thanks to Artyom Dymchenko
|
||||
Improved : high compression mode on repetitive data
|
||||
New : block-level API
|
||||
New : ZSTD_duplicateCCtx()
|
||||
|
||||
v0.4.5
|
||||
new : -m/--multiple : compress/decompress multiple files
|
||||
|
||||
v0.4.4
|
||||
Fixed : high compression modes for Windows 32 bits
|
||||
new : external dictionary API extended to buffered mode and accessible through command line
|
||||
new : windows DLL project, thanks to Christophe Chevalier
|
||||
|
||||
v0.4.3 :
|
||||
new : external dictionary API
|
||||
new : zstd-frugal
|
||||
|
||||
v0.4.2 :
|
||||
Generic minor improvements for small blocks
|
||||
Fixed : big-endian compatibility, by Peter Harris (#85)
|
||||
|
||||
v0.4.1
|
||||
Fixed : ZSTD_LEGACY_SUPPORT=0 build mode (reported by Luben)
|
||||
removed `zstd.c`
|
||||
|
||||
v0.4.0
|
||||
Command line utility compatible with high compression levels
|
||||
Removed zstdhc => merged into zstd
|
||||
Added : ZBUFF API (see zstd_buffered.h)
|
||||
Rolling buffer support
|
||||
|
||||
v0.3.6
|
||||
small blocks params
|
||||
|
||||
v0.3.5
|
||||
minor generic compression improvements
|
||||
|
||||
v0.3.4
|
||||
Faster fast cLevels
|
||||
|
||||
v0.3.3
|
||||
Small compression ratio improvement
|
||||
|
||||
v0.3.2
|
||||
Fixed Visual Studio
|
||||
|
||||
v0.3.1 :
|
||||
Small compression ratio improvement
|
||||
|
||||
v0.3
|
||||
HC mode : compression levels 2-26
|
||||
|
||||
v0.2.2
|
||||
Fix : Visual Studio 2013 & 2015 release compilation, by Christophe Chevalier
|
||||
|
||||
v0.2.1
|
||||
Fix : Read errors, advanced fuzzer tests, by Hanno Böck
|
||||
|
||||
v0.2.0
|
||||
**Breaking format change**
|
||||
Faster decompression speed
|
||||
Can still decode v0.1 format
|
||||
|
||||
v0.1.3
|
||||
fix uninitialization warning, reported by Evan Nemerson
|
||||
|
||||
v0.1.2
|
||||
frame concatenation support
|
||||
|
||||
v0.1.1
|
||||
fix compression bug
|
||||
detects write-flush errors
|
||||
|
||||
v0.1.0
|
||||
first release
|
8
Makefile
8
Makefile
@ -156,7 +156,7 @@ list:
|
||||
done \
|
||||
} | column -t -s $$'\t'
|
||||
|
||||
.PHONY: install clangtest armtest usan asan uasan
|
||||
.PHONY: install armtest usan asan uasan
|
||||
install:
|
||||
@$(MAKE) -C $(ZSTDDIR) $@
|
||||
@$(MAKE) -C $(PRGDIR) $@
|
||||
@ -188,7 +188,7 @@ gcc7build: clean
|
||||
.PHONY: clangbuild
|
||||
clangbuild: clean
|
||||
clang -v
|
||||
CXX=clang++ CC=clang $(MAKE) all MOREFLAGS="-Werror -Wconversion -Wno-sign-conversion -Wdocumentation"
|
||||
CXX=clang++ CC=clang CFLAGS="-Werror -Wconversion -Wno-sign-conversion -Wdocumentation" $(MAKE) all
|
||||
|
||||
m32build: clean
|
||||
gcc -v
|
||||
@ -232,10 +232,6 @@ gcc6test: clean
|
||||
gcc-6 -v
|
||||
$(MAKE) all CC=gcc-6 MOREFLAGS="-Werror"
|
||||
|
||||
clangtest: clean
|
||||
clang -v
|
||||
$(MAKE) all CXX=clang++ CC=clang MOREFLAGS="-Werror -Wconversion -Wno-sign-conversion -Wdocumentation"
|
||||
|
||||
armtest: clean
|
||||
$(MAKE) -C $(TESTDIR) datagen # use native, faster
|
||||
$(MAKE) -C $(TESTDIR) test CC=arm-linux-gnueabi-gcc QEMU_SYS=qemu-arm-static ZSTDRTTEST= MOREFLAGS="-Werror -static" FUZZER_FLAGS=--no-big-tests
|
||||
|
25
README.md
25
README.md
@ -14,6 +14,7 @@ a list of known ports and bindings is provided on [Zstandard homepage](http://ww
|
||||
[![Build Status][travisDevBadge]][travisLink]
|
||||
[![Build status][AppveyorDevBadge]][AppveyorLink]
|
||||
[![Build status][CircleDevBadge]][CircleLink]
|
||||
[![Build status][CirrusDevBadge]][CirrusLink]
|
||||
|
||||
[travisDevBadge]: https://travis-ci.org/facebook/zstd.svg?branch=dev "Continuous Integration test suite"
|
||||
[travisLink]: https://travis-ci.org/facebook/zstd
|
||||
@ -21,14 +22,16 @@ a list of known ports and bindings is provided on [Zstandard homepage](http://ww
|
||||
[AppveyorLink]: https://ci.appveyor.com/project/YannCollet/zstd-p0yf0
|
||||
[CircleDevBadge]: https://circleci.com/gh/facebook/zstd/tree/dev.svg?style=shield "Short test suite"
|
||||
[CircleLink]: https://circleci.com/gh/facebook/zstd
|
||||
[CirrusDevBadge]: https://api.cirrus-ci.com/github/facebook/zstd.svg?branch=dev
|
||||
[CirrusLink]: https://cirrus-ci.com/github/facebook/zstd
|
||||
|
||||
## Benchmarks
|
||||
|
||||
For reference, several fast compression algorithms were tested and compared
|
||||
on a server running Linux Debian (`Linux version 4.14.0-3-amd64`),
|
||||
with a Core i7-6700K CPU @ 4.0GHz,
|
||||
on a server running Arch Linux (`Linux version 5.0.5-arch1-1`),
|
||||
with a Core i9-9900K CPU @ 5.0GHz,
|
||||
using [lzbench], an open-source in-memory benchmark by @inikep
|
||||
compiled with [gcc] 7.3.0,
|
||||
compiled with [gcc] 8.2.1,
|
||||
on the [Silesia compression corpus].
|
||||
|
||||
[lzbench]: https://github.com/inikep/lzbench
|
||||
@ -37,14 +40,14 @@ on the [Silesia compression corpus].
|
||||
|
||||
| Compressor name | Ratio | Compression| Decompress.|
|
||||
| --------------- | ------| -----------| ---------- |
|
||||
| **zstd 1.3.4 -1** | 2.877 | 470 MB/s | 1380 MB/s |
|
||||
| zlib 1.2.11 -1 | 2.743 | 110 MB/s | 400 MB/s |
|
||||
| brotli 1.0.2 -0 | 2.701 | 410 MB/s | 430 MB/s |
|
||||
| quicklz 1.5.0 -1 | 2.238 | 550 MB/s | 710 MB/s |
|
||||
| lzo1x 2.09 -1 | 2.108 | 650 MB/s | 830 MB/s |
|
||||
| lz4 1.8.1 | 2.101 | 750 MB/s | 3700 MB/s |
|
||||
| snappy 1.1.4 | 2.091 | 530 MB/s | 1800 MB/s |
|
||||
| lzf 3.6 -1 | 2.077 | 400 MB/s | 860 MB/s |
|
||||
| **zstd 1.4.0 -1** | 2.884 | 530 MB/s | 1360 MB/s |
|
||||
| zlib 1.2.11 -1 | 2.743 | 110 MB/s | 440 MB/s |
|
||||
| brotli 1.0.7 -0 | 2.701 | 430 MB/s | 470 MB/s |
|
||||
| quicklz 1.5.0 -1 | 2.238 | 600 MB/s | 800 MB/s |
|
||||
| lzo1x 2.09 -1 | 2.106 | 680 MB/s | 950 MB/s |
|
||||
| lz4 1.8.3 | 2.101 | 800 MB/s | 4220 MB/s |
|
||||
| snappy 1.1.4 | 2.073 | 580 MB/s | 2020 MB/s |
|
||||
| lzf 3.6 -1 | 2.077 | 440 MB/s | 930 MB/s |
|
||||
|
||||
[zlib]: http://www.zlib.net/
|
||||
[LZ4]: http://www.lz4.org/
|
||||
|
@ -13,7 +13,7 @@ CPPFLAGS += -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
|
||||
CFLAGS ?= -O3
|
||||
CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
|
||||
-Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
|
||||
-Wstrict-prototypes -Wundef -Wformat-security \
|
||||
-Wstrict-prototypes -Wundef \
|
||||
-Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
|
||||
-Wredundant-decls
|
||||
CFLAGS += $(DEBUGFLAGS)
|
||||
@ -22,10 +22,10 @@ FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MULTITHREAD_LDFLAGS)
|
||||
|
||||
all: adapt datagen
|
||||
|
||||
adapt: $(ZSTD_FILES) $(PRGDIR)/util.c adapt.c
|
||||
adapt: $(ZSTD_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c adapt.c
|
||||
$(CC) $(FLAGS) $^ -o $@
|
||||
|
||||
adapt-debug: $(ZSTD_FILES) $(PRGDIR)/util.c adapt.c
|
||||
adapt-debug: $(ZSTD_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c adapt.c
|
||||
$(CC) $(FLAGS) -DDEBUG_MODE=2 $^ -o adapt
|
||||
|
||||
datagen : $(PRGDIR)/datagen.c datagencli.c
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <string.h> /* memset */
|
||||
#include "zstd_internal.h"
|
||||
#include "util.h"
|
||||
#include "timefn.h" /* UTIL_time_t, UTIL_getTime, UTIL_getSpanTimeMicro */
|
||||
|
||||
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
|
||||
#define PRINT(...) fprintf(stdout, __VA_ARGS__)
|
||||
|
20
contrib/docker/Dockerfile
Normal file
20
contrib/docker/Dockerfile
Normal file
@ -0,0 +1,20 @@
|
||||
# Dockerfile
|
||||
# First image to build the binary
|
||||
FROM alpine as builder
|
||||
|
||||
RUN apk --no-cache add make gcc libc-dev
|
||||
COPY . /src
|
||||
RUN mkdir /pkg && cd /src && make && make DESTDIR=/pkg install
|
||||
|
||||
# Second minimal image to only keep the built binary
|
||||
FROM alpine
|
||||
|
||||
# Copy the built files
|
||||
COPY --from=builder /pkg /
|
||||
|
||||
# Copy the license as well
|
||||
RUN mkdir -p /usr/local/share/licenses/zstd
|
||||
COPY --from=builder /src/LICENSE /usr/local/share/licences/zstd/
|
||||
|
||||
# Just run `zstd` if no other command is given
|
||||
CMD ["/usr/local/bin/zstd"]
|
20
contrib/docker/README.md
Normal file
20
contrib/docker/README.md
Normal file
@ -0,0 +1,20 @@
|
||||
|
||||
## Requirement
|
||||
|
||||
The `Dockerfile` script requires a version of `docker` >= 17.05
|
||||
|
||||
## Installing docker
|
||||
|
||||
The official docker install docs use a ppa with a modern version available:
|
||||
https://docs.docker.com/install/linux/docker-ce/ubuntu/
|
||||
|
||||
## How to run
|
||||
|
||||
`docker build -t zstd .`
|
||||
|
||||
## test
|
||||
|
||||
```
|
||||
echo foo | docker run -i --rm zstd | docker run -i --rm zstd zstdcat
|
||||
foo
|
||||
```
|
@ -0,0 +1,44 @@
|
||||
ARG :=
|
||||
|
||||
CC ?= gcc
|
||||
CFLAGS ?= -O3
|
||||
INCLUDES := -I ../randomDictBuilder -I ../../../programs -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder
|
||||
|
||||
RANDOM_FILE := ../randomDictBuilder/random.c
|
||||
IO_FILE := ../randomDictBuilder/io.c
|
||||
|
||||
all: run clean
|
||||
|
||||
.PHONY: run
|
||||
run: benchmark
|
||||
echo "Benchmarking with $(ARG)"
|
||||
./benchmark $(ARG)
|
||||
|
||||
.PHONY: test
|
||||
test: benchmarkTest clean
|
||||
|
||||
.PHONY: benchmarkTest
|
||||
benchmarkTest: benchmark test.sh
|
||||
sh test.sh
|
||||
|
||||
benchmark: benchmark.o io.o random.o libzstd.a
|
||||
$(CC) $(CFLAGS) benchmark.o io.o random.o libzstd.a -o benchmark
|
||||
|
||||
benchmark.o: benchmark.c
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c benchmark.c
|
||||
|
||||
random.o: $(RANDOM_FILE)
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c $(RANDOM_FILE)
|
||||
|
||||
io.o: $(IO_FILE)
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c $(IO_FILE)
|
||||
|
||||
libzstd.a:
|
||||
$(MAKE) -C ../../../lib libzstd.a
|
||||
mv ../../../lib/libzstd.a .
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
rm -f *.o benchmark libzstd.a
|
||||
$(MAKE) -C ../../../lib clean
|
||||
echo "Cleaning is completed"
|
@ -0,0 +1,849 @@
|
||||
Benchmarking Dictionary Builder
|
||||
|
||||
### Permitted Argument:
|
||||
Input File/Directory (in=fileName): required; file/directory used to build dictionary; if directory, will operate recursively for files inside directory; can include multiple files/directories, each following "in="
|
||||
|
||||
###Running Test:
|
||||
make test
|
||||
|
||||
###Usage:
|
||||
Benchmark given input files: make ARG= followed by permitted arguments
|
||||
|
||||
### Examples:
|
||||
make ARG="in=../../../lib/dictBuilder in=../../../lib/compress"
|
||||
|
||||
###Benchmarking Result:
|
||||
- First Cover is optimize cover, second Cover uses optimized d and k from first one.
|
||||
- For every f value of fastCover, the first one is optimize fastCover and the second one uses optimized d and k from first one. This is run for accel values from 1 to 10.
|
||||
- Fourth column is chosen d and fifth column is chosen k
|
||||
|
||||
github:
|
||||
NODICT 0.000004 2.999642
|
||||
RANDOM 0.024560 8.791189
|
||||
LEGACY 0.727109 8.173529
|
||||
COVER 40.565676 10.652243 8 1298
|
||||
COVER 3.608284 10.652243 8 1298
|
||||
FAST f=15 a=1 4.181024 10.570882 8 1154
|
||||
FAST f=15 a=1 0.040788 10.570882 8 1154
|
||||
FAST f=15 a=2 3.548352 10.574287 6 1970
|
||||
FAST f=15 a=2 0.035535 10.574287 6 1970
|
||||
FAST f=15 a=3 3.287364 10.613950 6 1010
|
||||
FAST f=15 a=3 0.032182 10.613950 6 1010
|
||||
FAST f=15 a=4 3.184976 10.573883 6 1058
|
||||
FAST f=15 a=4 0.029878 10.573883 6 1058
|
||||
FAST f=15 a=5 3.045513 10.580640 8 1154
|
||||
FAST f=15 a=5 0.022162 10.580640 8 1154
|
||||
FAST f=15 a=6 3.003296 10.583677 6 1010
|
||||
FAST f=15 a=6 0.028091 10.583677 6 1010
|
||||
FAST f=15 a=7 2.952655 10.622551 6 1106
|
||||
FAST f=15 a=7 0.02724 10.622551 6 1106
|
||||
FAST f=15 a=8 2.945674 10.614657 6 1010
|
||||
FAST f=15 a=8 0.027264 10.614657 6 1010
|
||||
FAST f=15 a=9 3.153439 10.564018 8 1154
|
||||
FAST f=15 a=9 0.020635 10.564018 8 1154
|
||||
FAST f=15 a=10 2.950416 10.511454 6 1010
|
||||
FAST f=15 a=10 0.026606 10.511454 6 1010
|
||||
FAST f=16 a=1 3.970029 10.681035 8 1154
|
||||
FAST f=16 a=1 0.038188 10.681035 8 1154
|
||||
FAST f=16 a=2 3.422892 10.484978 6 1874
|
||||
FAST f=16 a=2 0.034702 10.484978 6 1874
|
||||
FAST f=16 a=3 3.215836 10.632631 8 1154
|
||||
FAST f=16 a=3 0.026084 10.632631 8 1154
|
||||
FAST f=16 a=4 3.081353 10.626533 6 1106
|
||||
FAST f=16 a=4 0.030032 10.626533 6 1106
|
||||
FAST f=16 a=5 3.041241 10.545027 8 1922
|
||||
FAST f=16 a=5 0.022882 10.545027 8 1922
|
||||
FAST f=16 a=6 2.989390 10.638284 6 1874
|
||||
FAST f=16 a=6 0.028308 10.638284 6 1874
|
||||
FAST f=16 a=7 3.001581 10.797136 6 1106
|
||||
FAST f=16 a=7 0.027479 10.797136 6 1106
|
||||
FAST f=16 a=8 2.984107 10.658356 8 1058
|
||||
FAST f=16 a=8 0.021099 10.658356 8 1058
|
||||
FAST f=16 a=9 2.925788 10.523869 6 1010
|
||||
FAST f=16 a=9 0.026905 10.523869 6 1010
|
||||
FAST f=16 a=10 2.889605 10.745841 6 1874
|
||||
FAST f=16 a=10 0.026846 10.745841 6 1874
|
||||
FAST f=17 a=1 4.031953 10.672080 8 1202
|
||||
FAST f=17 a=1 0.040658 10.672080 8 1202
|
||||
FAST f=17 a=2 3.458107 10.589352 8 1106
|
||||
FAST f=17 a=2 0.02926 10.589352 8 1106
|
||||
FAST f=17 a=3 3.291189 10.662714 8 1154
|
||||
FAST f=17 a=3 0.026531 10.662714 8 1154
|
||||
FAST f=17 a=4 3.154950 10.549456 8 1346
|
||||
FAST f=17 a=4 0.024991 10.549456 8 1346
|
||||
FAST f=17 a=5 3.092271 10.541670 6 1202
|
||||
FAST f=17 a=5 0.038285 10.541670 6 1202
|
||||
FAST f=17 a=6 3.166146 10.729112 6 1874
|
||||
FAST f=17 a=6 0.038217 10.729112 6 1874
|
||||
FAST f=17 a=7 3.035467 10.810485 6 1106
|
||||
FAST f=17 a=7 0.036655 10.810485 6 1106
|
||||
FAST f=17 a=8 3.035668 10.530532 6 1058
|
||||
FAST f=17 a=8 0.037715 10.530532 6 1058
|
||||
FAST f=17 a=9 2.987917 10.589802 8 1922
|
||||
FAST f=17 a=9 0.02217 10.589802 8 1922
|
||||
FAST f=17 a=10 2.981647 10.722579 8 1106
|
||||
FAST f=17 a=10 0.021948 10.722579 8 1106
|
||||
FAST f=18 a=1 4.067144 10.634943 8 1154
|
||||
FAST f=18 a=1 0.041386 10.634943 8 1154
|
||||
FAST f=18 a=2 3.507377 10.546230 6 1970
|
||||
FAST f=18 a=2 0.037572 10.546230 6 1970
|
||||
FAST f=18 a=3 3.323015 10.648061 8 1154
|
||||
FAST f=18 a=3 0.028306 10.648061 8 1154
|
||||
FAST f=18 a=4 3.216735 10.705402 6 1010
|
||||
FAST f=18 a=4 0.030755 10.705402 6 1010
|
||||
FAST f=18 a=5 3.175794 10.588154 8 1874
|
||||
FAST f=18 a=5 0.025315 10.588154 8 1874
|
||||
FAST f=18 a=6 3.127459 10.751104 8 1106
|
||||
FAST f=18 a=6 0.023897 10.751104 8 1106
|
||||
FAST f=18 a=7 3.083017 10.780402 6 1106
|
||||
FAST f=18 a=7 0.029158 10.780402 6 1106
|
||||
FAST f=18 a=8 3.069700 10.547226 8 1346
|
||||
FAST f=18 a=8 0.024046 10.547226 8 1346
|
||||
FAST f=18 a=9 3.056591 10.674759 6 1010
|
||||
FAST f=18 a=9 0.028496 10.674759 6 1010
|
||||
FAST f=18 a=10 3.063588 10.737578 8 1106
|
||||
FAST f=18 a=10 0.023033 10.737578 8 1106
|
||||
FAST f=19 a=1 4.164041 10.650333 8 1154
|
||||
FAST f=19 a=1 0.042906 10.650333 8 1154
|
||||
FAST f=19 a=2 3.585409 10.577066 6 1058
|
||||
FAST f=19 a=2 0.038994 10.577066 6 1058
|
||||
FAST f=19 a=3 3.439643 10.639403 8 1154
|
||||
FAST f=19 a=3 0.028427 10.639403 8 1154
|
||||
FAST f=19 a=4 3.268869 10.554410 8 1298
|
||||
FAST f=19 a=4 0.026866 10.554410 8 1298
|
||||
FAST f=19 a=5 3.238225 10.615109 6 1010
|
||||
FAST f=19 a=5 0.03078 10.615109 6 1010
|
||||
FAST f=19 a=6 3.199558 10.609782 6 1874
|
||||
FAST f=19 a=6 0.030099 10.609782 6 1874
|
||||
FAST f=19 a=7 3.132395 10.794753 6 1106
|
||||
FAST f=19 a=7 0.028964 10.794753 6 1106
|
||||
FAST f=19 a=8 3.148446 10.554842 8 1298
|
||||
FAST f=19 a=8 0.024277 10.554842 8 1298
|
||||
FAST f=19 a=9 3.108324 10.668763 6 1010
|
||||
FAST f=19 a=9 0.02896 10.668763 6 1010
|
||||
FAST f=19 a=10 3.159863 10.757347 8 1106
|
||||
FAST f=19 a=10 0.023351 10.757347 8 1106
|
||||
FAST f=20 a=1 4.462698 10.661788 8 1154
|
||||
FAST f=20 a=1 0.047174 10.661788 8 1154
|
||||
FAST f=20 a=2 3.820269 10.678612 6 1106
|
||||
FAST f=20 a=2 0.040807 10.678612 6 1106
|
||||
FAST f=20 a=3 3.644955 10.648424 8 1154
|
||||
FAST f=20 a=3 0.031398 10.648424 8 1154
|
||||
FAST f=20 a=4 3.546257 10.559756 8 1298
|
||||
FAST f=20 a=4 0.029856 10.559756 8 1298
|
||||
FAST f=20 a=5 3.485248 10.646637 6 1010
|
||||
FAST f=20 a=5 0.033756 10.646637 6 1010
|
||||
FAST f=20 a=6 3.490438 10.775824 8 1106
|
||||
FAST f=20 a=6 0.028338 10.775824 8 1106
|
||||
FAST f=20 a=7 3.631289 10.801795 6 1106
|
||||
FAST f=20 a=7 0.035228 10.801795 6 1106
|
||||
FAST f=20 a=8 3.758936 10.545116 8 1346
|
||||
FAST f=20 a=8 0.027495 10.545116 8 1346
|
||||
FAST f=20 a=9 3.707024 10.677454 6 1010
|
||||
FAST f=20 a=9 0.031326 10.677454 6 1010
|
||||
FAST f=20 a=10 3.586593 10.756017 8 1106
|
||||
FAST f=20 a=10 0.027122 10.756017 8 1106
|
||||
FAST f=21 a=1 5.701396 10.655398 8 1154
|
||||
FAST f=21 a=1 0.067744 10.655398 8 1154
|
||||
FAST f=21 a=2 5.270542 10.650743 6 1106
|
||||
FAST f=21 a=2 0.052999 10.650743 6 1106
|
||||
FAST f=21 a=3 4.945294 10.652380 8 1154
|
||||
FAST f=21 a=3 0.052678 10.652380 8 1154
|
||||
FAST f=21 a=4 4.894079 10.543185 8 1298
|
||||
FAST f=21 a=4 0.04997 10.543185 8 1298
|
||||
FAST f=21 a=5 4.785417 10.630321 6 1010
|
||||
FAST f=21 a=5 0.045294 10.630321 6 1010
|
||||
FAST f=21 a=6 4.789381 10.664477 6 1874
|
||||
FAST f=21 a=6 0.046578 10.664477 6 1874
|
||||
FAST f=21 a=7 4.302955 10.805179 6 1106
|
||||
FAST f=21 a=7 0.041205 10.805179 6 1106
|
||||
FAST f=21 a=8 4.034630 10.551211 8 1298
|
||||
FAST f=21 a=8 0.040121 10.551211 8 1298
|
||||
FAST f=21 a=9 4.523868 10.799114 6 1010
|
||||
FAST f=21 a=9 0.043592 10.799114 6 1010
|
||||
FAST f=21 a=10 4.760736 10.750255 8 1106
|
||||
FAST f=21 a=10 0.043483 10.750255 8 1106
|
||||
FAST f=22 a=1 6.743064 10.640537 8 1154
|
||||
FAST f=22 a=1 0.086967 10.640537 8 1154
|
||||
FAST f=22 a=2 6.121739 10.626638 6 1970
|
||||
FAST f=22 a=2 0.066337 10.626638 6 1970
|
||||
FAST f=22 a=3 5.248851 10.640688 8 1154
|
||||
FAST f=22 a=3 0.054935 10.640688 8 1154
|
||||
FAST f=22 a=4 5.436579 10.588333 8 1298
|
||||
FAST f=22 a=4 0.064113 10.588333 8 1298
|
||||
FAST f=22 a=5 5.812815 10.652653 6 1010
|
||||
FAST f=22 a=5 0.058189 10.652653 6 1010
|
||||
FAST f=22 a=6 5.745472 10.666437 6 1874
|
||||
FAST f=22 a=6 0.057188 10.666437 6 1874
|
||||
FAST f=22 a=7 5.716393 10.806911 6 1106
|
||||
FAST f=22 a=7 0.056 10.806911 6 1106
|
||||
FAST f=22 a=8 5.698799 10.530784 8 1298
|
||||
FAST f=22 a=8 0.0583 10.530784 8 1298
|
||||
FAST f=22 a=9 5.710533 10.777391 6 1010
|
||||
FAST f=22 a=9 0.054945 10.777391 6 1010
|
||||
FAST f=22 a=10 5.685395 10.745023 8 1106
|
||||
FAST f=22 a=10 0.056526 10.745023 8 1106
|
||||
FAST f=23 a=1 7.836923 10.638828 8 1154
|
||||
FAST f=23 a=1 0.099522 10.638828 8 1154
|
||||
FAST f=23 a=2 6.627834 10.631061 6 1970
|
||||
FAST f=23 a=2 0.066769 10.631061 6 1970
|
||||
FAST f=23 a=3 5.602533 10.647288 8 1154
|
||||
FAST f=23 a=3 0.064513 10.647288 8 1154
|
||||
FAST f=23 a=4 6.005580 10.568747 8 1298
|
||||
FAST f=23 a=4 0.062022 10.568747 8 1298
|
||||
FAST f=23 a=5 5.481816 10.676921 6 1010
|
||||
FAST f=23 a=5 0.058959 10.676921 6 1010
|
||||
FAST f=23 a=6 5.460444 10.666194 6 1874
|
||||
FAST f=23 a=6 0.057687 10.666194 6 1874
|
||||
FAST f=23 a=7 5.659822 10.800377 6 1106
|
||||
FAST f=23 a=7 0.06783 10.800377 6 1106
|
||||
FAST f=23 a=8 6.826940 10.522167 8 1298
|
||||
FAST f=23 a=8 0.070533 10.522167 8 1298
|
||||
FAST f=23 a=9 6.804757 10.577799 8 1682
|
||||
FAST f=23 a=9 0.069949 10.577799 8 1682
|
||||
FAST f=23 a=10 6.774933 10.742093 8 1106
|
||||
FAST f=23 a=10 0.068395 10.742093 8 1106
|
||||
FAST f=24 a=1 8.444110 10.632783 8 1154
|
||||
FAST f=24 a=1 0.094357 10.632783 8 1154
|
||||
FAST f=24 a=2 7.289578 10.631061 6 1970
|
||||
FAST f=24 a=2 0.098515 10.631061 6 1970
|
||||
FAST f=24 a=3 8.619780 10.646289 8 1154
|
||||
FAST f=24 a=3 0.098041 10.646289 8 1154
|
||||
FAST f=24 a=4 8.508455 10.555199 8 1298
|
||||
FAST f=24 a=4 0.093885 10.555199 8 1298
|
||||
FAST f=24 a=5 8.471145 10.674363 6 1010
|
||||
FAST f=24 a=5 0.088676 10.674363 6 1010
|
||||
FAST f=24 a=6 8.426727 10.667228 6 1874
|
||||
FAST f=24 a=6 0.087247 10.667228 6 1874
|
||||
FAST f=24 a=7 8.356826 10.803027 6 1106
|
||||
FAST f=24 a=7 0.085835 10.803027 6 1106
|
||||
FAST f=24 a=8 6.756811 10.522049 8 1298
|
||||
FAST f=24 a=8 0.07107 10.522049 8 1298
|
||||
FAST f=24 a=9 6.548169 10.571882 8 1682
|
||||
FAST f=24 a=9 0.0713 10.571882 8 1682
|
||||
FAST f=24 a=10 8.238079 10.736453 8 1106
|
||||
FAST f=24 a=10 0.07004 10.736453 8 1106
|
||||
|
||||
|
||||
hg-commands:
|
||||
NODICT 0.000005 2.425276
|
||||
RANDOM 0.046332 3.490331
|
||||
LEGACY 0.720351 3.911682
|
||||
COVER 45.507731 4.132653 8 386
|
||||
COVER 1.868810 4.132653 8 386
|
||||
FAST f=15 a=1 4.561427 3.866894 8 1202
|
||||
FAST f=15 a=1 0.048946 3.866894 8 1202
|
||||
FAST f=15 a=2 3.574462 3.892119 8 1538
|
||||
FAST f=15 a=2 0.033677 3.892119 8 1538
|
||||
FAST f=15 a=3 3.230227 3.888791 6 1346
|
||||
FAST f=15 a=3 0.034312 3.888791 6 1346
|
||||
FAST f=15 a=4 3.042388 3.899739 8 1010
|
||||
FAST f=15 a=4 0.024307 3.899739 8 1010
|
||||
FAST f=15 a=5 2.800148 3.896220 8 818
|
||||
FAST f=15 a=5 0.022331 3.896220 8 818
|
||||
FAST f=15 a=6 2.706518 3.882039 8 578
|
||||
FAST f=15 a=6 0.020955 3.882039 8 578
|
||||
FAST f=15 a=7 2.701820 3.885430 6 866
|
||||
FAST f=15 a=7 0.026074 3.885430 6 866
|
||||
FAST f=15 a=8 2.604445 3.906932 8 1826
|
||||
FAST f=15 a=8 0.021789 3.906932 8 1826
|
||||
FAST f=15 a=9 2.598568 3.870324 6 1682
|
||||
FAST f=15 a=9 0.026004 3.870324 6 1682
|
||||
FAST f=15 a=10 2.575920 3.920783 8 1442
|
||||
FAST f=15 a=10 0.020228 3.920783 8 1442
|
||||
FAST f=16 a=1 4.630623 4.001430 8 770
|
||||
FAST f=16 a=1 0.047497 4.001430 8 770
|
||||
FAST f=16 a=2 3.674721 3.974431 8 1874
|
||||
FAST f=16 a=2 0.035761 3.974431 8 1874
|
||||
FAST f=16 a=3 3.338384 3.978703 8 1010
|
||||
FAST f=16 a=3 0.029436 3.978703 8 1010
|
||||
FAST f=16 a=4 3.004412 3.983035 8 1010
|
||||
FAST f=16 a=4 0.025744 3.983035 8 1010
|
||||
FAST f=16 a=5 2.881892 3.987710 8 770
|
||||
FAST f=16 a=5 0.023211 3.987710 8 770
|
||||
FAST f=16 a=6 2.807410 3.952717 8 1298
|
||||
FAST f=16 a=6 0.023199 3.952717 8 1298
|
||||
FAST f=16 a=7 2.819623 3.994627 8 770
|
||||
FAST f=16 a=7 0.021806 3.994627 8 770
|
||||
FAST f=16 a=8 2.740092 3.954032 8 1826
|
||||
FAST f=16 a=8 0.0226 3.954032 8 1826
|
||||
FAST f=16 a=9 2.682564 3.969879 6 1442
|
||||
FAST f=16 a=9 0.026324 3.969879 6 1442
|
||||
FAST f=16 a=10 2.657959 3.969755 8 674
|
||||
FAST f=16 a=10 0.020413 3.969755 8 674
|
||||
FAST f=17 a=1 4.729228 4.046000 8 530
|
||||
FAST f=17 a=1 0.049703 4.046000 8 530
|
||||
FAST f=17 a=2 3.764510 3.991519 8 1970
|
||||
FAST f=17 a=2 0.038195 3.991519 8 1970
|
||||
FAST f=17 a=3 3.416992 4.006296 6 914
|
||||
FAST f=17 a=3 0.036244 4.006296 6 914
|
||||
FAST f=17 a=4 3.145626 3.979182 8 1970
|
||||
FAST f=17 a=4 0.028676 3.979182 8 1970
|
||||
FAST f=17 a=5 2.995070 4.050070 8 770
|
||||
FAST f=17 a=5 0.025707 4.050070 8 770
|
||||
FAST f=17 a=6 2.911833 4.040024 8 770
|
||||
FAST f=17 a=6 0.02453 4.040024 8 770
|
||||
FAST f=17 a=7 2.894796 4.015884 8 818
|
||||
FAST f=17 a=7 0.023956 4.015884 8 818
|
||||
FAST f=17 a=8 2.789962 4.039303 8 530
|
||||
FAST f=17 a=8 0.023219 4.039303 8 530
|
||||
FAST f=17 a=9 2.787625 3.996762 8 1634
|
||||
FAST f=17 a=9 0.023651 3.996762 8 1634
|
||||
FAST f=17 a=10 2.754796 4.005059 8 1058
|
||||
FAST f=17 a=10 0.022537 4.005059 8 1058
|
||||
FAST f=18 a=1 4.779117 4.038214 8 242
|
||||
FAST f=18 a=1 0.048814 4.038214 8 242
|
||||
FAST f=18 a=2 3.829753 4.045768 8 722
|
||||
FAST f=18 a=2 0.036541 4.045768 8 722
|
||||
FAST f=18 a=3 3.495053 4.021497 8 770
|
||||
FAST f=18 a=3 0.032648 4.021497 8 770
|
||||
FAST f=18 a=4 3.221395 4.039623 8 770
|
||||
FAST f=18 a=4 0.027818 4.039623 8 770
|
||||
FAST f=18 a=5 3.059369 4.050414 8 530
|
||||
FAST f=18 a=5 0.026296 4.050414 8 530
|
||||
FAST f=18 a=6 3.019292 4.010714 6 962
|
||||
FAST f=18 a=6 0.031104 4.010714 6 962
|
||||
FAST f=18 a=7 2.949322 4.031439 6 770
|
||||
FAST f=18 a=7 0.030745 4.031439 6 770
|
||||
FAST f=18 a=8 2.876425 4.032088 6 386
|
||||
FAST f=18 a=8 0.027407 4.032088 6 386
|
||||
FAST f=18 a=9 2.850958 4.053372 8 674
|
||||
FAST f=18 a=9 0.023799 4.053372 8 674
|
||||
FAST f=18 a=10 2.884352 4.020148 8 1730
|
||||
FAST f=18 a=10 0.024401 4.020148 8 1730
|
||||
FAST f=19 a=1 4.815669 4.061203 8 674
|
||||
FAST f=19 a=1 0.051425 4.061203 8 674
|
||||
FAST f=19 a=2 3.951356 4.013822 8 1442
|
||||
FAST f=19 a=2 0.039968 4.013822 8 1442
|
||||
FAST f=19 a=3 3.554682 4.050425 8 722
|
||||
FAST f=19 a=3 0.032725 4.050425 8 722
|
||||
FAST f=19 a=4 3.242585 4.054677 8 722
|
||||
FAST f=19 a=4 0.028194 4.054677 8 722
|
||||
FAST f=19 a=5 3.105909 4.064524 8 818
|
||||
FAST f=19 a=5 0.02675 4.064524 8 818
|
||||
FAST f=19 a=6 3.059901 4.036857 8 1250
|
||||
FAST f=19 a=6 0.026396 4.036857 8 1250
|
||||
FAST f=19 a=7 3.016151 4.068234 6 770
|
||||
FAST f=19 a=7 0.031501 4.068234 6 770
|
||||
FAST f=19 a=8 2.962902 4.077509 8 530
|
||||
FAST f=19 a=8 0.023333 4.077509 8 530
|
||||
FAST f=19 a=9 2.899607 4.067328 8 530
|
||||
FAST f=19 a=9 0.024553 4.067328 8 530
|
||||
FAST f=19 a=10 2.950978 4.059901 8 434
|
||||
FAST f=19 a=10 0.023852 4.059901 8 434
|
||||
FAST f=20 a=1 5.259834 4.027579 8 1634
|
||||
FAST f=20 a=1 0.061123 4.027579 8 1634
|
||||
FAST f=20 a=2 4.382150 4.025093 8 1634
|
||||
FAST f=20 a=2 0.048009 4.025093 8 1634
|
||||
FAST f=20 a=3 4.104323 4.060842 8 530
|
||||
FAST f=20 a=3 0.040965 4.060842 8 530
|
||||
FAST f=20 a=4 3.853340 4.023504 6 914
|
||||
FAST f=20 a=4 0.041072 4.023504 6 914
|
||||
FAST f=20 a=5 3.728841 4.018089 6 1634
|
||||
FAST f=20 a=5 0.037469 4.018089 6 1634
|
||||
FAST f=20 a=6 3.683045 4.069138 8 578
|
||||
FAST f=20 a=6 0.028011 4.069138 8 578
|
||||
FAST f=20 a=7 3.726973 4.063160 8 722
|
||||
FAST f=20 a=7 0.028437 4.063160 8 722
|
||||
FAST f=20 a=8 3.555073 4.057690 8 386
|
||||
FAST f=20 a=8 0.027588 4.057690 8 386
|
||||
FAST f=20 a=9 3.551095 4.067253 8 482
|
||||
FAST f=20 a=9 0.025976 4.067253 8 482
|
||||
FAST f=20 a=10 3.490127 4.068518 8 530
|
||||
FAST f=20 a=10 0.025971 4.068518 8 530
|
||||
FAST f=21 a=1 7.343816 4.064945 8 770
|
||||
FAST f=21 a=1 0.085035 4.064945 8 770
|
||||
FAST f=21 a=2 5.930894 4.048206 8 386
|
||||
FAST f=21 a=2 0.067349 4.048206 8 386
|
||||
FAST f=21 a=3 6.770775 4.063417 8 578
|
||||
FAST f=21 a=3 0.077104 4.063417 8 578
|
||||
FAST f=21 a=4 6.889409 4.066761 8 626
|
||||
FAST f=21 a=4 0.0717 4.066761 8 626
|
||||
FAST f=21 a=5 6.714896 4.051813 8 914
|
||||
FAST f=21 a=5 0.071026 4.051813 8 914
|
||||
FAST f=21 a=6 6.539890 4.047263 8 1922
|
||||
FAST f=21 a=6 0.07127 4.047263 8 1922
|
||||
FAST f=21 a=7 6.511052 4.068373 8 482
|
||||
FAST f=21 a=7 0.065467 4.068373 8 482
|
||||
FAST f=21 a=8 6.458788 4.071597 8 482
|
||||
FAST f=21 a=8 0.063817 4.071597 8 482
|
||||
FAST f=21 a=9 6.377591 4.052905 8 434
|
||||
FAST f=21 a=9 0.063112 4.052905 8 434
|
||||
FAST f=21 a=10 6.360752 4.047773 8 530
|
||||
FAST f=21 a=10 0.063606 4.047773 8 530
|
||||
FAST f=22 a=1 10.523471 4.040812 8 962
|
||||
FAST f=22 a=1 0.14214 4.040812 8 962
|
||||
FAST f=22 a=2 9.454758 4.059396 8 914
|
||||
FAST f=22 a=2 0.118343 4.059396 8 914
|
||||
FAST f=22 a=3 9.043197 4.043019 8 1922
|
||||
FAST f=22 a=3 0.109798 4.043019 8 1922
|
||||
FAST f=22 a=4 8.716261 4.044819 8 770
|
||||
FAST f=22 a=4 0.099687 4.044819 8 770
|
||||
FAST f=22 a=5 8.529472 4.070576 8 530
|
||||
FAST f=22 a=5 0.093127 4.070576 8 530
|
||||
FAST f=22 a=6 8.424241 4.070565 8 722
|
||||
FAST f=22 a=6 0.093703 4.070565 8 722
|
||||
FAST f=22 a=7 8.403391 4.070591 8 578
|
||||
FAST f=22 a=7 0.089763 4.070591 8 578
|
||||
FAST f=22 a=8 8.285221 4.089171 8 530
|
||||
FAST f=22 a=8 0.087716 4.089171 8 530
|
||||
FAST f=22 a=9 8.282506 4.047470 8 722
|
||||
FAST f=22 a=9 0.089773 4.047470 8 722
|
||||
FAST f=22 a=10 8.241809 4.064151 8 818
|
||||
FAST f=22 a=10 0.090413 4.064151 8 818
|
||||
FAST f=23 a=1 12.389208 4.051635 6 530
|
||||
FAST f=23 a=1 0.147796 4.051635 6 530
|
||||
FAST f=23 a=2 11.300910 4.042835 6 914
|
||||
FAST f=23 a=2 0.133178 4.042835 6 914
|
||||
FAST f=23 a=3 10.879455 4.047415 8 626
|
||||
FAST f=23 a=3 0.129571 4.047415 8 626
|
||||
FAST f=23 a=4 10.522718 4.038269 6 914
|
||||
FAST f=23 a=4 0.118121 4.038269 6 914
|
||||
FAST f=23 a=5 10.348043 4.066884 8 434
|
||||
FAST f=23 a=5 0.112098 4.066884 8 434
|
||||
FAST f=23 a=6 10.238630 4.048635 8 1010
|
||||
FAST f=23 a=6 0.120281 4.048635 8 1010
|
||||
FAST f=23 a=7 10.213255 4.061809 8 530
|
||||
FAST f=23 a=7 0.1121 4.061809 8 530
|
||||
FAST f=23 a=8 10.107879 4.074104 8 818
|
||||
FAST f=23 a=8 0.116544 4.074104 8 818
|
||||
FAST f=23 a=9 10.063424 4.064811 8 674
|
||||
FAST f=23 a=9 0.109045 4.064811 8 674
|
||||
FAST f=23 a=10 10.035801 4.054918 8 530
|
||||
FAST f=23 a=10 0.108735 4.054918 8 530
|
||||
FAST f=24 a=1 14.963878 4.073490 8 722
|
||||
FAST f=24 a=1 0.206344 4.073490 8 722
|
||||
FAST f=24 a=2 13.833472 4.036100 8 962
|
||||
FAST f=24 a=2 0.17486 4.036100 8 962
|
||||
FAST f=24 a=3 13.404631 4.026281 6 1106
|
||||
FAST f=24 a=3 0.153961 4.026281 6 1106
|
||||
FAST f=24 a=4 13.041164 4.065448 8 674
|
||||
FAST f=24 a=4 0.155509 4.065448 8 674
|
||||
FAST f=24 a=5 12.879412 4.054636 8 674
|
||||
FAST f=24 a=5 0.148282 4.054636 8 674
|
||||
FAST f=24 a=6 12.773736 4.081376 8 530
|
||||
FAST f=24 a=6 0.142563 4.081376 8 530
|
||||
FAST f=24 a=7 12.711310 4.059834 8 770
|
||||
FAST f=24 a=7 0.149321 4.059834 8 770
|
||||
FAST f=24 a=8 12.635459 4.052050 8 1298
|
||||
FAST f=24 a=8 0.15095 4.052050 8 1298
|
||||
FAST f=24 a=9 12.558104 4.076516 8 722
|
||||
FAST f=24 a=9 0.144361 4.076516 8 722
|
||||
FAST f=24 a=10 10.661348 4.062137 8 818
|
||||
FAST f=24 a=10 0.108232 4.062137 8 818
|
||||
|
||||
|
||||
hg-changelog:
|
||||
NODICT 0.000017 1.377590
|
||||
RANDOM 0.186171 2.097487
|
||||
LEGACY 1.670867 2.058907
|
||||
COVER 173.561948 2.189685 8 98
|
||||
COVER 4.811180 2.189685 8 98
|
||||
FAST f=15 a=1 18.685906 2.129682 8 434
|
||||
FAST f=15 a=1 0.173376 2.129682 8 434
|
||||
FAST f=15 a=2 12.928259 2.131890 8 482
|
||||
FAST f=15 a=2 0.102582 2.131890 8 482
|
||||
FAST f=15 a=3 11.132343 2.128027 8 386
|
||||
FAST f=15 a=3 0.077122 2.128027 8 386
|
||||
FAST f=15 a=4 10.120683 2.125797 8 434
|
||||
FAST f=15 a=4 0.065175 2.125797 8 434
|
||||
FAST f=15 a=5 9.479092 2.127697 8 386
|
||||
FAST f=15 a=5 0.057905 2.127697 8 386
|
||||
FAST f=15 a=6 9.159523 2.127132 8 1682
|
||||
FAST f=15 a=6 0.058604 2.127132 8 1682
|
||||
FAST f=15 a=7 8.724003 2.129914 8 434
|
||||
FAST f=15 a=7 0.0493 2.129914 8 434
|
||||
FAST f=15 a=8 8.595001 2.127137 8 338
|
||||
FAST f=15 a=8 0.0474 2.127137 8 338
|
||||
FAST f=15 a=9 8.356405 2.125512 8 482
|
||||
FAST f=15 a=9 0.046126 2.125512 8 482
|
||||
FAST f=15 a=10 8.207111 2.126066 8 338
|
||||
FAST f=15 a=10 0.043292 2.126066 8 338
|
||||
FAST f=16 a=1 18.464436 2.144040 8 242
|
||||
FAST f=16 a=1 0.172156 2.144040 8 242
|
||||
FAST f=16 a=2 12.844825 2.148171 8 194
|
||||
FAST f=16 a=2 0.099619 2.148171 8 194
|
||||
FAST f=16 a=3 11.082568 2.140837 8 290
|
||||
FAST f=16 a=3 0.079165 2.140837 8 290
|
||||
FAST f=16 a=4 10.066749 2.144405 8 386
|
||||
FAST f=16 a=4 0.068411 2.144405 8 386
|
||||
FAST f=16 a=5 9.501121 2.140720 8 386
|
||||
FAST f=16 a=5 0.061316 2.140720 8 386
|
||||
FAST f=16 a=6 9.179332 2.139478 8 386
|
||||
FAST f=16 a=6 0.056322 2.139478 8 386
|
||||
FAST f=16 a=7 8.849438 2.142412 8 194
|
||||
FAST f=16 a=7 0.050493 2.142412 8 194
|
||||
FAST f=16 a=8 8.810919 2.143454 8 434
|
||||
FAST f=16 a=8 0.051304 2.143454 8 434
|
||||
FAST f=16 a=9 8.553900 2.140339 8 194
|
||||
FAST f=16 a=9 0.047285 2.140339 8 194
|
||||
FAST f=16 a=10 8.398027 2.143130 8 386
|
||||
FAST f=16 a=10 0.046386 2.143130 8 386
|
||||
FAST f=17 a=1 18.644657 2.157192 8 98
|
||||
FAST f=17 a=1 0.173884 2.157192 8 98
|
||||
FAST f=17 a=2 13.071242 2.159830 8 146
|
||||
FAST f=17 a=2 0.10388 2.159830 8 146
|
||||
FAST f=17 a=3 11.332366 2.153654 6 194
|
||||
FAST f=17 a=3 0.08983 2.153654 6 194
|
||||
FAST f=17 a=4 10.362413 2.156813 8 242
|
||||
FAST f=17 a=4 0.070389 2.156813 8 242
|
||||
FAST f=17 a=5 9.808159 2.155098 6 338
|
||||
FAST f=17 a=5 0.072661 2.155098 6 338
|
||||
FAST f=17 a=6 9.451165 2.153845 6 146
|
||||
FAST f=17 a=6 0.064959 2.153845 6 146
|
||||
FAST f=17 a=7 9.163097 2.155424 6 242
|
||||
FAST f=17 a=7 0.064323 2.155424 6 242
|
||||
FAST f=17 a=8 9.047276 2.156640 8 242
|
||||
FAST f=17 a=8 0.053382 2.156640 8 242
|
||||
FAST f=17 a=9 8.807671 2.152396 8 146
|
||||
FAST f=17 a=9 0.049617 2.152396 8 146
|
||||
FAST f=17 a=10 8.649827 2.152370 8 146
|
||||
FAST f=17 a=10 0.047849 2.152370 8 146
|
||||
FAST f=18 a=1 18.809502 2.168116 8 98
|
||||
FAST f=18 a=1 0.175226 2.168116 8 98
|
||||
FAST f=18 a=2 13.756502 2.170870 6 242
|
||||
FAST f=18 a=2 0.119507 2.170870 6 242
|
||||
FAST f=18 a=3 12.059748 2.163094 6 98
|
||||
FAST f=18 a=3 0.093912 2.163094 6 98
|
||||
FAST f=18 a=4 11.410294 2.172372 8 98
|
||||
FAST f=18 a=4 0.073048 2.172372 8 98
|
||||
FAST f=18 a=5 10.560297 2.166388 8 98
|
||||
FAST f=18 a=5 0.065136 2.166388 8 98
|
||||
FAST f=18 a=6 10.071390 2.162672 8 98
|
||||
FAST f=18 a=6 0.059402 2.162672 8 98
|
||||
FAST f=18 a=7 10.084214 2.166624 6 194
|
||||
FAST f=18 a=7 0.073276 2.166624 6 194
|
||||
FAST f=18 a=8 9.953226 2.167454 8 98
|
||||
FAST f=18 a=8 0.053659 2.167454 8 98
|
||||
FAST f=18 a=9 8.982461 2.161593 6 146
|
||||
FAST f=18 a=9 0.05955 2.161593 6 146
|
||||
FAST f=18 a=10 8.986092 2.164373 6 242
|
||||
FAST f=18 a=10 0.059135 2.164373 6 242
|
||||
FAST f=19 a=1 18.908277 2.176021 8 98
|
||||
FAST f=19 a=1 0.177316 2.176021 8 98
|
||||
FAST f=19 a=2 13.471313 2.176103 8 98
|
||||
FAST f=19 a=2 0.106344 2.176103 8 98
|
||||
FAST f=19 a=3 11.571406 2.172812 8 98
|
||||
FAST f=19 a=3 0.083293 2.172812 8 98
|
||||
FAST f=19 a=4 10.632775 2.177770 6 146
|
||||
FAST f=19 a=4 0.079864 2.177770 6 146
|
||||
FAST f=19 a=5 10.030190 2.175574 6 146
|
||||
FAST f=19 a=5 0.07223 2.175574 6 146
|
||||
FAST f=19 a=6 9.717818 2.169997 8 98
|
||||
FAST f=19 a=6 0.060049 2.169997 8 98
|
||||
FAST f=19 a=7 9.397531 2.172770 8 146
|
||||
FAST f=19 a=7 0.057188 2.172770 8 146
|
||||
FAST f=19 a=8 9.281061 2.175822 8 98
|
||||
FAST f=19 a=8 0.053711 2.175822 8 98
|
||||
FAST f=19 a=9 9.165242 2.169849 6 146
|
||||
FAST f=19 a=9 0.059898 2.169849 6 146
|
||||
FAST f=19 a=10 9.048763 2.173394 8 98
|
||||
FAST f=19 a=10 0.049757 2.173394 8 98
|
||||
FAST f=20 a=1 21.166917 2.183923 6 98
|
||||
FAST f=20 a=1 0.205425 2.183923 6 98
|
||||
FAST f=20 a=2 15.642753 2.182349 6 98
|
||||
FAST f=20 a=2 0.135957 2.182349 6 98
|
||||
FAST f=20 a=3 14.053730 2.173544 6 98
|
||||
FAST f=20 a=3 0.11266 2.173544 6 98
|
||||
FAST f=20 a=4 15.270019 2.183656 8 98
|
||||
FAST f=20 a=4 0.107892 2.183656 8 98
|
||||
FAST f=20 a=5 15.497927 2.174661 6 98
|
||||
FAST f=20 a=5 0.100305 2.174661 6 98
|
||||
FAST f=20 a=6 13.973505 2.172391 8 98
|
||||
FAST f=20 a=6 0.087565 2.172391 8 98
|
||||
FAST f=20 a=7 14.083296 2.172443 8 98
|
||||
FAST f=20 a=7 0.078062 2.172443 8 98
|
||||
FAST f=20 a=8 12.560048 2.175581 8 98
|
||||
FAST f=20 a=8 0.070282 2.175581 8 98
|
||||
FAST f=20 a=9 13.078645 2.173975 6 146
|
||||
FAST f=20 a=9 0.081041 2.173975 6 146
|
||||
FAST f=20 a=10 12.823328 2.177778 8 98
|
||||
FAST f=20 a=10 0.074522 2.177778 8 98
|
||||
FAST f=21 a=1 29.825370 2.183057 6 98
|
||||
FAST f=21 a=1 0.334453 2.183057 6 98
|
||||
FAST f=21 a=2 29.476474 2.182752 8 98
|
||||
FAST f=21 a=2 0.286602 2.182752 8 98
|
||||
FAST f=21 a=3 25.937186 2.175867 8 98
|
||||
FAST f=21 a=3 0.17626 2.175867 8 98
|
||||
FAST f=21 a=4 20.413865 2.179780 8 98
|
||||
FAST f=21 a=4 0.206085 2.179780 8 98
|
||||
FAST f=21 a=5 20.541889 2.178328 6 146
|
||||
FAST f=21 a=5 0.199157 2.178328 6 146
|
||||
FAST f=21 a=6 21.090670 2.174443 6 146
|
||||
FAST f=21 a=6 0.190645 2.174443 6 146
|
||||
FAST f=21 a=7 20.221569 2.177384 6 146
|
||||
FAST f=21 a=7 0.184278 2.177384 6 146
|
||||
FAST f=21 a=8 20.322357 2.179456 6 98
|
||||
FAST f=21 a=8 0.178458 2.179456 6 98
|
||||
FAST f=21 a=9 20.683912 2.174396 6 146
|
||||
FAST f=21 a=9 0.190829 2.174396 6 146
|
||||
FAST f=21 a=10 20.840865 2.174905 8 98
|
||||
FAST f=21 a=10 0.172515 2.174905 8 98
|
||||
FAST f=22 a=1 36.822827 2.181612 6 98
|
||||
FAST f=22 a=1 0.437389 2.181612 6 98
|
||||
FAST f=22 a=2 30.616902 2.183142 8 98
|
||||
FAST f=22 a=2 0.324284 2.183142 8 98
|
||||
FAST f=22 a=3 28.472482 2.178130 8 98
|
||||
FAST f=22 a=3 0.236538 2.178130 8 98
|
||||
FAST f=22 a=4 25.847028 2.181878 8 98
|
||||
FAST f=22 a=4 0.263744 2.181878 8 98
|
||||
FAST f=22 a=5 27.095881 2.180775 8 98
|
||||
FAST f=22 a=5 0.24988 2.180775 8 98
|
||||
FAST f=22 a=6 25.939172 2.170916 8 98
|
||||
FAST f=22 a=6 0.240033 2.170916 8 98
|
||||
FAST f=22 a=7 27.064194 2.177849 8 98
|
||||
FAST f=22 a=7 0.242383 2.177849 8 98
|
||||
FAST f=22 a=8 25.140221 2.178216 8 98
|
||||
FAST f=22 a=8 0.237601 2.178216 8 98
|
||||
FAST f=22 a=9 25.505283 2.177455 6 146
|
||||
FAST f=22 a=9 0.223217 2.177455 6 146
|
||||
FAST f=22 a=10 24.529362 2.176705 6 98
|
||||
FAST f=22 a=10 0.222876 2.176705 6 98
|
||||
FAST f=23 a=1 39.127310 2.183006 6 98
|
||||
FAST f=23 a=1 0.417338 2.183006 6 98
|
||||
FAST f=23 a=2 32.468161 2.183524 6 98
|
||||
FAST f=23 a=2 0.351645 2.183524 6 98
|
||||
FAST f=23 a=3 31.577620 2.172604 6 98
|
||||
FAST f=23 a=3 0.319659 2.172604 6 98
|
||||
FAST f=23 a=4 30.129247 2.183932 6 98
|
||||
FAST f=23 a=4 0.307239 2.183932 6 98
|
||||
FAST f=23 a=5 29.103376 2.183529 6 146
|
||||
FAST f=23 a=5 0.285533 2.183529 6 146
|
||||
FAST f=23 a=6 29.776045 2.174367 8 98
|
||||
FAST f=23 a=6 0.276846 2.174367 8 98
|
||||
FAST f=23 a=7 28.940407 2.178022 6 146
|
||||
FAST f=23 a=7 0.274082 2.178022 6 146
|
||||
FAST f=23 a=8 29.256009 2.179462 6 98
|
||||
FAST f=23 a=8 0.26949 2.179462 6 98
|
||||
FAST f=23 a=9 29.347312 2.170407 8 98
|
||||
FAST f=23 a=9 0.265034 2.170407 8 98
|
||||
FAST f=23 a=10 29.140081 2.171762 8 98
|
||||
FAST f=23 a=10 0.259183 2.171762 8 98
|
||||
FAST f=24 a=1 44.871179 2.182115 6 98
|
||||
FAST f=24 a=1 0.509433 2.182115 6 98
|
||||
FAST f=24 a=2 38.694867 2.180549 8 98
|
||||
FAST f=24 a=2 0.406695 2.180549 8 98
|
||||
FAST f=24 a=3 38.363769 2.172821 8 98
|
||||
FAST f=24 a=3 0.359581 2.172821 8 98
|
||||
FAST f=24 a=4 36.580797 2.184142 8 98
|
||||
FAST f=24 a=4 0.340614 2.184142 8 98
|
||||
FAST f=24 a=5 33.125701 2.183301 8 98
|
||||
FAST f=24 a=5 0.324874 2.183301 8 98
|
||||
FAST f=24 a=6 34.776068 2.173019 6 146
|
||||
FAST f=24 a=6 0.340397 2.173019 6 146
|
||||
FAST f=24 a=7 34.417625 2.176561 6 146
|
||||
FAST f=24 a=7 0.308223 2.176561 6 146
|
||||
FAST f=24 a=8 35.470291 2.182161 6 98
|
||||
FAST f=24 a=8 0.307724 2.182161 6 98
|
||||
FAST f=24 a=9 34.927252 2.172682 6 146
|
||||
FAST f=24 a=9 0.300598 2.172682 6 146
|
||||
FAST f=24 a=10 33.238355 2.173395 6 98
|
||||
FAST f=24 a=10 0.249916 2.173395 6 98
|
||||
|
||||
|
||||
hg-manifest:
|
||||
NODICT 0.000004 1.866377
|
||||
RANDOM 0.696346 2.309436
|
||||
LEGACY 7.064527 2.506977
|
||||
COVER 876.312865 2.582528 8 434
|
||||
COVER 35.684533 2.582528 8 434
|
||||
FAST f=15 a=1 76.618201 2.404013 8 1202
|
||||
FAST f=15 a=1 0.700722 2.404013 8 1202
|
||||
FAST f=15 a=2 49.213058 2.409248 6 1826
|
||||
FAST f=15 a=2 0.473393 2.409248 6 1826
|
||||
FAST f=15 a=3 41.753197 2.409677 8 1490
|
||||
FAST f=15 a=3 0.336848 2.409677 8 1490
|
||||
FAST f=15 a=4 38.648295 2.407996 8 1538
|
||||
FAST f=15 a=4 0.283952 2.407996 8 1538
|
||||
FAST f=15 a=5 36.144936 2.402895 8 1874
|
||||
FAST f=15 a=5 0.270128 2.402895 8 1874
|
||||
FAST f=15 a=6 35.484675 2.394873 8 1586
|
||||
FAST f=15 a=6 0.251637 2.394873 8 1586
|
||||
FAST f=15 a=7 34.280599 2.397311 8 1778
|
||||
FAST f=15 a=7 0.23984 2.397311 8 1778
|
||||
FAST f=15 a=8 32.122572 2.396089 6 1490
|
||||
FAST f=15 a=8 0.251508 2.396089 6 1490
|
||||
FAST f=15 a=9 29.909842 2.390092 6 1970
|
||||
FAST f=15 a=9 0.251233 2.390092 6 1970
|
||||
FAST f=15 a=10 30.102938 2.400086 6 1682
|
||||
FAST f=15 a=10 0.23688 2.400086 6 1682
|
||||
FAST f=16 a=1 67.750401 2.475460 6 1346
|
||||
FAST f=16 a=1 0.796035 2.475460 6 1346
|
||||
FAST f=16 a=2 52.812027 2.480860 6 1730
|
||||
FAST f=16 a=2 0.480384 2.480860 6 1730
|
||||
FAST f=16 a=3 44.179259 2.469304 8 1970
|
||||
FAST f=16 a=3 0.332657 2.469304 8 1970
|
||||
FAST f=16 a=4 37.612728 2.478208 6 1970
|
||||
FAST f=16 a=4 0.32498 2.478208 6 1970
|
||||
FAST f=16 a=5 35.056222 2.475568 6 1298
|
||||
FAST f=16 a=5 0.302824 2.475568 6 1298
|
||||
FAST f=16 a=6 34.713012 2.486079 8 1730
|
||||
FAST f=16 a=6 0.24755 2.486079 8 1730
|
||||
FAST f=16 a=7 33.713687 2.477180 6 1682
|
||||
FAST f=16 a=7 0.280358 2.477180 6 1682
|
||||
FAST f=16 a=8 31.571412 2.475418 8 1538
|
||||
FAST f=16 a=8 0.241241 2.475418 8 1538
|
||||
FAST f=16 a=9 31.608069 2.478263 8 1922
|
||||
FAST f=16 a=9 0.241764 2.478263 8 1922
|
||||
FAST f=16 a=10 31.358002 2.472263 8 1442
|
||||
FAST f=16 a=10 0.221661 2.472263 8 1442
|
||||
FAST f=17 a=1 66.185775 2.536085 6 1346
|
||||
FAST f=17 a=1 0.713549 2.536085 6 1346
|
||||
FAST f=17 a=2 50.365000 2.546105 8 1298
|
||||
FAST f=17 a=2 0.467846 2.546105 8 1298
|
||||
FAST f=17 a=3 42.712843 2.536250 8 1298
|
||||
FAST f=17 a=3 0.34047 2.536250 8 1298
|
||||
FAST f=17 a=4 39.514227 2.535555 8 1442
|
||||
FAST f=17 a=4 0.302989 2.535555 8 1442
|
||||
FAST f=17 a=5 35.189292 2.524925 8 1202
|
||||
FAST f=17 a=5 0.273451 2.524925 8 1202
|
||||
FAST f=17 a=6 35.791683 2.523466 8 1202
|
||||
FAST f=17 a=6 0.268261 2.523466 8 1202
|
||||
FAST f=17 a=7 37.416136 2.526625 6 1010
|
||||
FAST f=17 a=7 0.277558 2.526625 6 1010
|
||||
FAST f=17 a=8 37.084707 2.533274 6 1250
|
||||
FAST f=17 a=8 0.285104 2.533274 6 1250
|
||||
FAST f=17 a=9 34.183814 2.532765 8 1298
|
||||
FAST f=17 a=9 0.235133 2.532765 8 1298
|
||||
FAST f=17 a=10 31.149235 2.528722 8 1346
|
||||
FAST f=17 a=10 0.232679 2.528722 8 1346
|
||||
FAST f=18 a=1 72.942176 2.559857 6 386
|
||||
FAST f=18 a=1 0.718618 2.559857 6 386
|
||||
FAST f=18 a=2 51.690440 2.559572 8 290
|
||||
FAST f=18 a=2 0.403978 2.559572 8 290
|
||||
FAST f=18 a=3 45.344908 2.561040 8 962
|
||||
FAST f=18 a=3 0.357205 2.561040 8 962
|
||||
FAST f=18 a=4 39.804522 2.558446 8 1010
|
||||
FAST f=18 a=4 0.310526 2.558446 8 1010
|
||||
FAST f=18 a=5 38.134888 2.561811 8 626
|
||||
FAST f=18 a=5 0.273743 2.561811 8 626
|
||||
FAST f=18 a=6 35.091890 2.555518 8 722
|
||||
FAST f=18 a=6 0.260135 2.555518 8 722
|
||||
FAST f=18 a=7 34.639523 2.562938 8 290
|
||||
FAST f=18 a=7 0.234294 2.562938 8 290
|
||||
FAST f=18 a=8 36.076431 2.563567 8 1586
|
||||
FAST f=18 a=8 0.274075 2.563567 8 1586
|
||||
FAST f=18 a=9 36.376433 2.560950 8 722
|
||||
FAST f=18 a=9 0.240106 2.560950 8 722
|
||||
FAST f=18 a=10 32.624790 2.559340 8 578
|
||||
FAST f=18 a=10 0.234704 2.559340 8 578
|
||||
FAST f=19 a=1 70.513761 2.572441 8 194
|
||||
FAST f=19 a=1 0.726112 2.572441 8 194
|
||||
FAST f=19 a=2 59.263032 2.574560 8 482
|
||||
FAST f=19 a=2 0.451554 2.574560 8 482
|
||||
FAST f=19 a=3 51.509594 2.571546 6 194
|
||||
FAST f=19 a=3 0.393014 2.571546 6 194
|
||||
FAST f=19 a=4 55.393906 2.573386 8 482
|
||||
FAST f=19 a=4 0.38819 2.573386 8 482
|
||||
FAST f=19 a=5 43.201736 2.567589 8 674
|
||||
FAST f=19 a=5 0.292155 2.567589 8 674
|
||||
FAST f=19 a=6 42.911687 2.572666 6 434
|
||||
FAST f=19 a=6 0.303988 2.572666 6 434
|
||||
FAST f=19 a=7 44.687591 2.573613 6 290
|
||||
FAST f=19 a=7 0.308721 2.573613 6 290
|
||||
FAST f=19 a=8 37.372868 2.571039 6 194
|
||||
FAST f=19 a=8 0.287137 2.571039 6 194
|
||||
FAST f=19 a=9 36.074230 2.566473 6 482
|
||||
FAST f=19 a=9 0.280721 2.566473 6 482
|
||||
FAST f=19 a=10 33.731720 2.570306 8 194
|
||||
FAST f=19 a=10 0.224073 2.570306 8 194
|
||||
FAST f=20 a=1 79.670634 2.581146 6 290
|
||||
FAST f=20 a=1 0.899986 2.581146 6 290
|
||||
FAST f=20 a=2 58.827141 2.579782 8 386
|
||||
FAST f=20 a=2 0.602288 2.579782 8 386
|
||||
FAST f=20 a=3 51.289004 2.579627 8 722
|
||||
FAST f=20 a=3 0.446091 2.579627 8 722
|
||||
FAST f=20 a=4 47.711068 2.581508 8 722
|
||||
FAST f=20 a=4 0.473007 2.581508 8 722
|
||||
FAST f=20 a=5 47.402929 2.578062 6 434
|
||||
FAST f=20 a=5 0.497131 2.578062 6 434
|
||||
FAST f=20 a=6 54.797102 2.577365 8 482
|
||||
FAST f=20 a=6 0.515061 2.577365 8 482
|
||||
FAST f=20 a=7 51.370877 2.583050 8 386
|
||||
FAST f=20 a=7 0.402878 2.583050 8 386
|
||||
FAST f=20 a=8 51.437931 2.574875 6 242
|
||||
FAST f=20 a=8 0.453094 2.574875 6 242
|
||||
FAST f=20 a=9 44.105456 2.576700 6 242
|
||||
FAST f=20 a=9 0.456633 2.576700 6 242
|
||||
FAST f=20 a=10 44.447580 2.578305 8 338
|
||||
FAST f=20 a=10 0.409121 2.578305 8 338
|
||||
FAST f=21 a=1 113.031686 2.582449 6 242
|
||||
FAST f=21 a=1 1.456971 2.582449 6 242
|
||||
FAST f=21 a=2 97.700932 2.582124 8 194
|
||||
FAST f=21 a=2 1.072078 2.582124 8 194
|
||||
FAST f=21 a=3 96.563648 2.585479 8 434
|
||||
FAST f=21 a=3 0.949528 2.585479 8 434
|
||||
FAST f=21 a=4 90.597813 2.582366 6 386
|
||||
FAST f=21 a=4 0.76944 2.582366 6 386
|
||||
FAST f=21 a=5 86.815980 2.579043 8 434
|
||||
FAST f=21 a=5 0.858167 2.579043 8 434
|
||||
FAST f=21 a=6 91.235820 2.578378 8 530
|
||||
FAST f=21 a=6 0.684274 2.578378 8 530
|
||||
FAST f=21 a=7 84.392788 2.581243 8 386
|
||||
FAST f=21 a=7 0.814386 2.581243 8 386
|
||||
FAST f=21 a=8 82.052310 2.582547 8 338
|
||||
FAST f=21 a=8 0.822633 2.582547 8 338
|
||||
FAST f=21 a=9 74.696074 2.579319 8 194
|
||||
FAST f=21 a=9 0.811028 2.579319 8 194
|
||||
FAST f=21 a=10 76.211170 2.578766 8 290
|
||||
FAST f=21 a=10 0.809715 2.578766 8 290
|
||||
FAST f=22 a=1 138.976871 2.580478 8 194
|
||||
FAST f=22 a=1 1.748932 2.580478 8 194
|
||||
FAST f=22 a=2 120.164097 2.583633 8 386
|
||||
FAST f=22 a=2 1.333239 2.583633 8 386
|
||||
FAST f=22 a=3 111.986474 2.582566 6 194
|
||||
FAST f=22 a=3 1.305734 2.582566 6 194
|
||||
FAST f=22 a=4 108.548148 2.583068 6 194
|
||||
FAST f=22 a=4 1.314026 2.583068 6 194
|
||||
FAST f=22 a=5 103.173017 2.583495 6 290
|
||||
FAST f=22 a=5 1.228664 2.583495 6 290
|
||||
FAST f=22 a=6 108.421262 2.582349 8 530
|
||||
FAST f=22 a=6 1.076773 2.582349 8 530
|
||||
FAST f=22 a=7 103.284127 2.581022 8 386
|
||||
FAST f=22 a=7 1.112117 2.581022 8 386
|
||||
FAST f=22 a=8 96.330279 2.581073 8 290
|
||||
FAST f=22 a=8 1.109303 2.581073 8 290
|
||||
FAST f=22 a=9 97.651348 2.580075 6 194
|
||||
FAST f=22 a=9 0.933032 2.580075 6 194
|
||||
FAST f=22 a=10 101.660621 2.584886 8 194
|
||||
FAST f=22 a=10 0.796823 2.584886 8 194
|
||||
FAST f=23 a=1 159.322978 2.581474 6 242
|
||||
FAST f=23 a=1 2.015878 2.581474 6 242
|
||||
FAST f=23 a=2 134.331775 2.581619 8 194
|
||||
FAST f=23 a=2 1.545845 2.581619 8 194
|
||||
FAST f=23 a=3 127.724552 2.579888 6 338
|
||||
FAST f=23 a=3 1.444496 2.579888 6 338
|
||||
FAST f=23 a=4 126.077675 2.578137 6 242
|
||||
FAST f=23 a=4 1.364394 2.578137 6 242
|
||||
FAST f=23 a=5 124.914027 2.580843 8 338
|
||||
FAST f=23 a=5 1.116059 2.580843 8 338
|
||||
FAST f=23 a=6 122.874153 2.577637 6 338
|
||||
FAST f=23 a=6 1.164584 2.577637 6 338
|
||||
FAST f=23 a=7 123.099257 2.582715 6 386
|
||||
FAST f=23 a=7 1.354042 2.582715 6 386
|
||||
FAST f=23 a=8 122.026753 2.577681 8 194
|
||||
FAST f=23 a=8 1.210966 2.577681 8 194
|
||||
FAST f=23 a=9 121.164312 2.584599 6 290
|
||||
FAST f=23 a=9 1.174859 2.584599 6 290
|
||||
FAST f=23 a=10 117.462222 2.580358 8 194
|
||||
FAST f=23 a=10 1.075258 2.580358 8 194
|
||||
FAST f=24 a=1 169.539659 2.581642 6 194
|
||||
FAST f=24 a=1 1.916804 2.581642 6 194
|
||||
FAST f=24 a=2 160.539270 2.580421 6 290
|
||||
FAST f=24 a=2 1.71087 2.580421 6 290
|
||||
FAST f=24 a=3 155.455874 2.580449 6 242
|
||||
FAST f=24 a=3 1.60307 2.580449 6 242
|
||||
FAST f=24 a=4 147.630320 2.582953 6 338
|
||||
FAST f=24 a=4 1.396364 2.582953 6 338
|
||||
FAST f=24 a=5 133.767428 2.580589 6 290
|
||||
FAST f=24 a=5 1.19933 2.580589 6 290
|
||||
FAST f=24 a=6 146.437535 2.579453 8 194
|
||||
FAST f=24 a=6 1.385405 2.579453 8 194
|
||||
FAST f=24 a=7 147.227507 2.584155 8 386
|
||||
FAST f=24 a=7 1.48942 2.584155 8 386
|
||||
FAST f=24 a=8 138.005773 2.584115 8 194
|
||||
FAST f=24 a=8 1.352 2.584115 8 194
|
||||
FAST f=24 a=9 141.442625 2.582902 8 290
|
||||
FAST f=24 a=9 1.39647 2.582902 8 290
|
||||
FAST f=24 a=10 142.157446 2.582701 8 434
|
||||
FAST f=24 a=10 1.498889 2.582701 8 434
|
@ -0,0 +1,442 @@
|
||||
#include <stdio.h> /* fprintf */
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* strcmp, strlen */
|
||||
#include <errno.h> /* errno */
|
||||
#include <ctype.h>
|
||||
#include <time.h>
|
||||
#include "random.h"
|
||||
#include "dictBuilder.h"
|
||||
#include "zstd_internal.h" /* includes zstd.h */
|
||||
#include "io.h"
|
||||
#include "util.h"
|
||||
#include "zdict.h"
|
||||
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Console display
|
||||
***************************************/
|
||||
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
|
||||
#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
|
||||
|
||||
static const U64 g_refreshRate = SEC_TO_MICRO / 6;
|
||||
static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
|
||||
|
||||
#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
|
||||
if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
|
||||
{ g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
|
||||
if (displayLevel>=4) fflush(stderr); } } }
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Exceptions
|
||||
***************************************/
|
||||
#ifndef DEBUG
|
||||
# define DEBUG 0
|
||||
#endif
|
||||
#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
|
||||
#define EXM_THROW(error, ...) \
|
||||
{ \
|
||||
DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
|
||||
DISPLAY("Error %i : ", error); \
|
||||
DISPLAY(__VA_ARGS__); \
|
||||
DISPLAY("\n"); \
|
||||
exit(error); \
|
||||
}
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
static const unsigned g_defaultMaxDictSize = 110 KB;
|
||||
#define DEFAULT_CLEVEL 3
|
||||
#define DEFAULT_DISPLAYLEVEL 2
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Struct
|
||||
***************************************/
|
||||
typedef struct {
|
||||
const void* dictBuffer;
|
||||
size_t dictSize;
|
||||
} dictInfo;
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Dictionary related operations
|
||||
***************************************/
|
||||
/** createDictFromFiles() :
|
||||
* Based on type of param given, train dictionary using the corresponding algorithm
|
||||
* @return dictInfo containing dictionary buffer and dictionary size
|
||||
*/
|
||||
dictInfo* createDictFromFiles(sampleInfo *info, unsigned maxDictSize,
|
||||
ZDICT_random_params_t *randomParams, ZDICT_cover_params_t *coverParams,
|
||||
ZDICT_legacy_params_t *legacyParams, ZDICT_fastCover_params_t *fastParams) {
|
||||
unsigned const displayLevel = randomParams ? randomParams->zParams.notificationLevel :
|
||||
coverParams ? coverParams->zParams.notificationLevel :
|
||||
legacyParams ? legacyParams->zParams.notificationLevel :
|
||||
fastParams ? fastParams->zParams.notificationLevel :
|
||||
DEFAULT_DISPLAYLEVEL; /* no dict */
|
||||
void* const dictBuffer = malloc(maxDictSize);
|
||||
|
||||
dictInfo* dInfo = NULL;
|
||||
|
||||
/* Checks */
|
||||
if (!dictBuffer)
|
||||
EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */
|
||||
|
||||
{ size_t dictSize;
|
||||
if(randomParams) {
|
||||
dictSize = ZDICT_trainFromBuffer_random(dictBuffer, maxDictSize, info->srcBuffer,
|
||||
info->samplesSizes, info->nbSamples, *randomParams);
|
||||
}else if(coverParams) {
|
||||
/* Run the optimize version if either k or d is not provided */
|
||||
if (!coverParams->d || !coverParams->k){
|
||||
dictSize = ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, maxDictSize, info->srcBuffer,
|
||||
info->samplesSizes, info->nbSamples, coverParams);
|
||||
} else {
|
||||
dictSize = ZDICT_trainFromBuffer_cover(dictBuffer, maxDictSize, info->srcBuffer,
|
||||
info->samplesSizes, info->nbSamples, *coverParams);
|
||||
}
|
||||
} else if(legacyParams) {
|
||||
dictSize = ZDICT_trainFromBuffer_legacy(dictBuffer, maxDictSize, info->srcBuffer,
|
||||
info->samplesSizes, info->nbSamples, *legacyParams);
|
||||
} else if(fastParams) {
|
||||
/* Run the optimize version if either k or d is not provided */
|
||||
if (!fastParams->d || !fastParams->k) {
|
||||
dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
|
||||
info->samplesSizes, info->nbSamples, fastParams);
|
||||
} else {
|
||||
dictSize = ZDICT_trainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
|
||||
info->samplesSizes, info->nbSamples, *fastParams);
|
||||
}
|
||||
} else {
|
||||
dictSize = 0;
|
||||
}
|
||||
if (ZDICT_isError(dictSize)) {
|
||||
DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
|
||||
free(dictBuffer);
|
||||
return dInfo;
|
||||
}
|
||||
dInfo = (dictInfo *)malloc(sizeof(dictInfo));
|
||||
dInfo->dictBuffer = dictBuffer;
|
||||
dInfo->dictSize = dictSize;
|
||||
}
|
||||
return dInfo;
|
||||
}
|
||||
|
||||
|
||||
/** compressWithDict() :
|
||||
* Compress samples from sample buffer given dictionary stored on dictionary buffer and compression level
|
||||
* @return compression ratio
|
||||
*/
|
||||
double compressWithDict(sampleInfo *srcInfo, dictInfo* dInfo, int compressionLevel, int displayLevel) {
|
||||
/* Local variables */
|
||||
size_t totalCompressedSize = 0;
|
||||
size_t totalOriginalSize = 0;
|
||||
const unsigned hasDict = dInfo->dictSize > 0 ? 1 : 0;
|
||||
double cRatio;
|
||||
size_t dstCapacity;
|
||||
int i;
|
||||
|
||||
/* Pointers */
|
||||
ZSTD_CDict *cdict = NULL;
|
||||
ZSTD_CCtx* cctx = NULL;
|
||||
size_t *offsets = NULL;
|
||||
void* dst = NULL;
|
||||
|
||||
/* Allocate dst with enough space to compress the maximum sized sample */
|
||||
{
|
||||
size_t maxSampleSize = 0;
|
||||
for (i = 0; i < srcInfo->nbSamples; i++) {
|
||||
maxSampleSize = MAX(srcInfo->samplesSizes[i], maxSampleSize);
|
||||
}
|
||||
dstCapacity = ZSTD_compressBound(maxSampleSize);
|
||||
dst = malloc(dstCapacity);
|
||||
}
|
||||
|
||||
/* Calculate offset for each sample */
|
||||
offsets = (size_t *)malloc((srcInfo->nbSamples + 1) * sizeof(size_t));
|
||||
offsets[0] = 0;
|
||||
for (i = 1; i <= srcInfo->nbSamples; i++) {
|
||||
offsets[i] = offsets[i - 1] + srcInfo->samplesSizes[i - 1];
|
||||
}
|
||||
|
||||
/* Create the cctx */
|
||||
cctx = ZSTD_createCCtx();
|
||||
if(!cctx || !dst) {
|
||||
cRatio = -1;
|
||||
goto _cleanup;
|
||||
}
|
||||
|
||||
/* Create CDict if there's a dictionary stored on buffer */
|
||||
if (hasDict) {
|
||||
cdict = ZSTD_createCDict(dInfo->dictBuffer, dInfo->dictSize, compressionLevel);
|
||||
if(!cdict) {
|
||||
cRatio = -1;
|
||||
goto _cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
/* Compress each sample and sum their sizes*/
|
||||
const BYTE *const samples = (const BYTE *)srcInfo->srcBuffer;
|
||||
for (i = 0; i < srcInfo->nbSamples; i++) {
|
||||
size_t compressedSize;
|
||||
if(hasDict) {
|
||||
compressedSize = ZSTD_compress_usingCDict(cctx, dst, dstCapacity, samples + offsets[i], srcInfo->samplesSizes[i], cdict);
|
||||
} else {
|
||||
compressedSize = ZSTD_compressCCtx(cctx, dst, dstCapacity,samples + offsets[i], srcInfo->samplesSizes[i], compressionLevel);
|
||||
}
|
||||
if (ZSTD_isError(compressedSize)) {
|
||||
cRatio = -1;
|
||||
goto _cleanup;
|
||||
}
|
||||
totalCompressedSize += compressedSize;
|
||||
}
|
||||
|
||||
/* Sum original sizes */
|
||||
for (i = 0; i<srcInfo->nbSamples; i++) {
|
||||
totalOriginalSize += srcInfo->samplesSizes[i];
|
||||
}
|
||||
|
||||
/* Calculate compression ratio */
|
||||
DISPLAYLEVEL(2, "original size is %lu\n", totalOriginalSize);
|
||||
DISPLAYLEVEL(2, "compressed size is %lu\n", totalCompressedSize);
|
||||
cRatio = (double)totalOriginalSize/(double)totalCompressedSize;
|
||||
|
||||
_cleanup:
|
||||
free(dst);
|
||||
free(offsets);
|
||||
ZSTD_freeCCtx(cctx);
|
||||
ZSTD_freeCDict(cdict);
|
||||
return cRatio;
|
||||
}
|
||||
|
||||
|
||||
/** FreeDictInfo() :
|
||||
* Free memory allocated for dictInfo
|
||||
*/
|
||||
void freeDictInfo(dictInfo* info) {
|
||||
if (!info) return;
|
||||
if (info->dictBuffer) free((void*)(info->dictBuffer));
|
||||
free(info);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*-********************************************************
|
||||
* Benchmarking functions
|
||||
**********************************************************/
|
||||
/** benchmarkDictBuilder() :
|
||||
* Measure how long a dictionary builder takes and compression ratio with the dictionary built
|
||||
* @return 0 if benchmark successfully, 1 otherwise
|
||||
*/
|
||||
int benchmarkDictBuilder(sampleInfo *srcInfo, unsigned maxDictSize, ZDICT_random_params_t *randomParam,
|
||||
ZDICT_cover_params_t *coverParam, ZDICT_legacy_params_t *legacyParam,
|
||||
ZDICT_fastCover_params_t *fastParam) {
|
||||
/* Local variables */
|
||||
const unsigned displayLevel = randomParam ? randomParam->zParams.notificationLevel :
|
||||
coverParam ? coverParam->zParams.notificationLevel :
|
||||
legacyParam ? legacyParam->zParams.notificationLevel :
|
||||
fastParam ? fastParam->zParams.notificationLevel:
|
||||
DEFAULT_DISPLAYLEVEL; /* no dict */
|
||||
const char* name = randomParam ? "RANDOM" :
|
||||
coverParam ? "COVER" :
|
||||
legacyParam ? "LEGACY" :
|
||||
fastParam ? "FAST":
|
||||
"NODICT"; /* no dict */
|
||||
const unsigned cLevel = randomParam ? randomParam->zParams.compressionLevel :
|
||||
coverParam ? coverParam->zParams.compressionLevel :
|
||||
legacyParam ? legacyParam->zParams.compressionLevel :
|
||||
fastParam ? fastParam->zParams.compressionLevel:
|
||||
DEFAULT_CLEVEL; /* no dict */
|
||||
int result = 0;
|
||||
|
||||
/* Calculate speed */
|
||||
const UTIL_time_t begin = UTIL_getTime();
|
||||
dictInfo* dInfo = createDictFromFiles(srcInfo, maxDictSize, randomParam, coverParam, legacyParam, fastParam);
|
||||
const U64 timeMicro = UTIL_clockSpanMicro(begin);
|
||||
const double timeSec = timeMicro / (double)SEC_TO_MICRO;
|
||||
if (!dInfo) {
|
||||
DISPLAYLEVEL(1, "%s does not train successfully\n", name);
|
||||
result = 1;
|
||||
goto _cleanup;
|
||||
}
|
||||
DISPLAYLEVEL(1, "%s took %f seconds to execute \n", name, timeSec);
|
||||
|
||||
/* Calculate compression ratio */
|
||||
const double cRatio = compressWithDict(srcInfo, dInfo, cLevel, displayLevel);
|
||||
if (cRatio < 0) {
|
||||
DISPLAYLEVEL(1, "Compressing with %s dictionary does not work\n", name);
|
||||
result = 1;
|
||||
goto _cleanup;
|
||||
|
||||
}
|
||||
DISPLAYLEVEL(1, "Compression ratio with %s dictionary is %f\n", name, cRatio);
|
||||
|
||||
_cleanup:
|
||||
freeDictInfo(dInfo);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int main(int argCount, const char* argv[])
|
||||
{
|
||||
const int displayLevel = DEFAULT_DISPLAYLEVEL;
|
||||
const char* programName = argv[0];
|
||||
int result = 0;
|
||||
|
||||
/* Initialize arguments to default values */
|
||||
unsigned k = 200;
|
||||
unsigned d = 8;
|
||||
unsigned f;
|
||||
unsigned accel;
|
||||
unsigned i;
|
||||
const unsigned cLevel = DEFAULT_CLEVEL;
|
||||
const unsigned dictID = 0;
|
||||
const unsigned maxDictSize = g_defaultMaxDictSize;
|
||||
|
||||
/* Initialize table to store input files */
|
||||
const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*));
|
||||
unsigned filenameIdx = 0;
|
||||
|
||||
char* fileNamesBuf = NULL;
|
||||
unsigned fileNamesNb = filenameIdx;
|
||||
const int followLinks = 0;
|
||||
const char** extendedFileList = NULL;
|
||||
|
||||
/* Parse arguments */
|
||||
for (i = 1; i < argCount; i++) {
|
||||
const char* argument = argv[i];
|
||||
if (longCommandWArg(&argument, "in=")) {
|
||||
filenameTable[filenameIdx] = argument;
|
||||
filenameIdx++;
|
||||
continue;
|
||||
}
|
||||
DISPLAYLEVEL(1, "benchmark: Incorrect parameters\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Get the list of all files recursively (because followLinks==0)*/
|
||||
extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf,
|
||||
&fileNamesNb, followLinks);
|
||||
if (extendedFileList) {
|
||||
unsigned u;
|
||||
for (u=0; u<fileNamesNb; u++) DISPLAYLEVEL(4, "%u %s\n", u, extendedFileList[u]);
|
||||
free((void*)filenameTable);
|
||||
filenameTable = extendedFileList;
|
||||
filenameIdx = fileNamesNb;
|
||||
}
|
||||
|
||||
/* get sampleInfo */
|
||||
size_t blockSize = 0;
|
||||
sampleInfo* srcInfo= getSampleInfo(filenameTable,
|
||||
filenameIdx, blockSize, maxDictSize, displayLevel);
|
||||
|
||||
/* set up zParams */
|
||||
ZDICT_params_t zParams;
|
||||
zParams.compressionLevel = cLevel;
|
||||
zParams.notificationLevel = displayLevel;
|
||||
zParams.dictID = dictID;
|
||||
|
||||
/* with no dict */
|
||||
{
|
||||
const int noDictResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, NULL);
|
||||
if(noDictResult) {
|
||||
result = 1;
|
||||
goto _cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
/* for random */
|
||||
{
|
||||
ZDICT_random_params_t randomParam;
|
||||
randomParam.zParams = zParams;
|
||||
randomParam.k = k;
|
||||
const int randomResult = benchmarkDictBuilder(srcInfo, maxDictSize, &randomParam, NULL, NULL, NULL);
|
||||
DISPLAYLEVEL(2, "k=%u\n", randomParam.k);
|
||||
if(randomResult) {
|
||||
result = 1;
|
||||
goto _cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
/* for legacy */
|
||||
{
|
||||
ZDICT_legacy_params_t legacyParam;
|
||||
legacyParam.zParams = zParams;
|
||||
legacyParam.selectivityLevel = 9;
|
||||
const int legacyResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, &legacyParam, NULL);
|
||||
DISPLAYLEVEL(2, "selectivityLevel=%u\n", legacyParam.selectivityLevel);
|
||||
if(legacyResult) {
|
||||
result = 1;
|
||||
goto _cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
/* for cover */
|
||||
{
|
||||
/* for cover (optimizing k and d) */
|
||||
ZDICT_cover_params_t coverParam;
|
||||
memset(&coverParam, 0, sizeof(coverParam));
|
||||
coverParam.zParams = zParams;
|
||||
coverParam.splitPoint = 1.0;
|
||||
coverParam.steps = 40;
|
||||
coverParam.nbThreads = 1;
|
||||
const int coverOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, &coverParam, NULL, NULL);
|
||||
DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\nsplit=%u\n", coverParam.k, coverParam.d, coverParam.steps, (unsigned)(coverParam.splitPoint * 100));
|
||||
if(coverOptResult) {
|
||||
result = 1;
|
||||
goto _cleanup;
|
||||
}
|
||||
|
||||
/* for cover (with k and d provided) */
|
||||
const int coverResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, &coverParam, NULL, NULL);
|
||||
DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\nsplit=%u\n", coverParam.k, coverParam.d, coverParam.steps, (unsigned)(coverParam.splitPoint * 100));
|
||||
if(coverResult) {
|
||||
result = 1;
|
||||
goto _cleanup;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* for fastCover */
|
||||
for (f = 15; f < 25; f++){
|
||||
DISPLAYLEVEL(2, "current f is %u\n", f);
|
||||
for (accel = 1; accel < 11; accel++) {
|
||||
DISPLAYLEVEL(2, "current accel is %u\n", accel);
|
||||
/* for fastCover (optimizing k and d) */
|
||||
ZDICT_fastCover_params_t fastParam;
|
||||
memset(&fastParam, 0, sizeof(fastParam));
|
||||
fastParam.zParams = zParams;
|
||||
fastParam.f = f;
|
||||
fastParam.steps = 40;
|
||||
fastParam.nbThreads = 1;
|
||||
fastParam.accel = accel;
|
||||
const int fastOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, &fastParam);
|
||||
DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\n", fastParam.k, fastParam.d, fastParam.f, fastParam.steps, (unsigned)(fastParam.splitPoint * 100), fastParam.accel);
|
||||
if(fastOptResult) {
|
||||
result = 1;
|
||||
goto _cleanup;
|
||||
}
|
||||
|
||||
/* for fastCover (with k and d provided) */
|
||||
for (i = 0; i < 5; i++) {
|
||||
const int fastResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, &fastParam);
|
||||
DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\n", fastParam.k, fastParam.d, fastParam.f, fastParam.steps, (unsigned)(fastParam.splitPoint * 100), fastParam.accel);
|
||||
if(fastResult) {
|
||||
result = 1;
|
||||
goto _cleanup;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Free allocated memory */
|
||||
_cleanup:
|
||||
UTIL_freeFileList(extendedFileList, fileNamesBuf);
|
||||
freeSampleInfo(srcInfo);
|
||||
return result;
|
||||
}
|
@ -0,0 +1,6 @@
|
||||
/* ZDICT_trainFromBuffer_legacy() :
|
||||
* issue : samplesBuffer need to be followed by a noisy guard band.
|
||||
* work around : duplicate the buffer, and add the noise */
|
||||
size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity,
|
||||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
||||
ZDICT_legacy_params_t params);
|
2
contrib/experimental_dict_builders/benchmarkDictBuilder/test.sh
Executable file
2
contrib/experimental_dict_builders/benchmarkDictBuilder/test.sh
Executable file
@ -0,0 +1,2 @@
|
||||
echo "Benchmark with in=../../lib/common"
|
||||
./benchmark in=../../../lib/common
|
54
contrib/experimental_dict_builders/fastCover/Makefile
Normal file
54
contrib/experimental_dict_builders/fastCover/Makefile
Normal file
@ -0,0 +1,54 @@
|
||||
ARG :=
|
||||
|
||||
CC ?= gcc
|
||||
CFLAGS ?= -O3 -g
|
||||
INCLUDES := -I ../../../programs -I ../randomDictBuilder -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder
|
||||
|
||||
IO_FILE := ../randomDictBuilder/io.c
|
||||
|
||||
TEST_INPUT := ../../../lib
|
||||
TEST_OUTPUT := fastCoverDict
|
||||
|
||||
all: main run clean
|
||||
|
||||
.PHONY: test
|
||||
test: main testrun testshell clean
|
||||
|
||||
.PHONY: run
|
||||
run:
|
||||
echo "Building a fastCover dictionary with given arguments"
|
||||
./main $(ARG)
|
||||
|
||||
main: main.o io.o fastCover.o libzstd.a
|
||||
$(CC) $(CFLAGS) main.o io.o fastCover.o libzstd.a -o main
|
||||
|
||||
main.o: main.c
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c main.c
|
||||
|
||||
fastCover.o: fastCover.c
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c fastCover.c
|
||||
|
||||
io.o: $(IO_FILE)
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c $(IO_FILE)
|
||||
|
||||
libzstd.a:
|
||||
$(MAKE) MOREFLAGS=-g -C ../../../lib libzstd.a
|
||||
mv ../../../lib/libzstd.a .
|
||||
|
||||
.PHONY: testrun
|
||||
testrun: main
|
||||
echo "Run with $(TEST_INPUT) and $(TEST_OUTPUT) "
|
||||
./main in=$(TEST_INPUT) out=$(TEST_OUTPUT)
|
||||
zstd -be3 -D $(TEST_OUTPUT) -r $(TEST_INPUT) -q
|
||||
rm -f $(TEST_OUTPUT)
|
||||
|
||||
.PHONY: testshell
|
||||
testshell: test.sh
|
||||
sh test.sh
|
||||
echo "Finish running test.sh"
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
rm -f *.o main libzstd.a
|
||||
$(MAKE) -C ../../../lib clean
|
||||
echo "Cleaning is completed"
|
24
contrib/experimental_dict_builders/fastCover/README.md
Normal file
24
contrib/experimental_dict_builders/fastCover/README.md
Normal file
@ -0,0 +1,24 @@
|
||||
FastCover Dictionary Builder
|
||||
|
||||
### Permitted Arguments:
|
||||
Input File/Directory (in=fileName): required; file/directory used to build dictionary; if directory, will operate recursively for files inside directory; can include multiple files/directories, each following "in="
|
||||
Output Dictionary (out=dictName): if not provided, default to fastCoverDict
|
||||
Dictionary ID (dictID=#): nonnegative number; if not provided, default to 0
|
||||
Maximum Dictionary Size (maxdict=#): positive number; in bytes, if not provided, default to 110KB
|
||||
Size of Selected Segment (k=#): positive number; in bytes; if not provided, default to 200
|
||||
Size of Dmer (d=#): either 6 or 8; if not provided, default to 8
|
||||
Number of steps (steps=#): positive number, if not provided, default to 32
|
||||
Percentage of samples used for training(split=#): positive number; if not provided, default to 100
|
||||
|
||||
|
||||
###Running Test:
|
||||
make test
|
||||
|
||||
|
||||
###Usage:
|
||||
To build a FASTCOVER dictionary with the provided arguments: make ARG= followed by arguments
|
||||
If k or d is not provided, the optimize version of FASTCOVER is run.
|
||||
|
||||
### Examples:
|
||||
make ARG="in=../../../lib/dictBuilder out=dict100 dictID=520"
|
||||
make ARG="in=../../../lib/dictBuilder in=../../../lib/compress"
|
809
contrib/experimental_dict_builders/fastCover/fastCover.c
Normal file
809
contrib/experimental_dict_builders/fastCover/fastCover.c
Normal file
@ -0,0 +1,809 @@
|
||||
/*-*************************************
|
||||
* Dependencies
|
||||
***************************************/
|
||||
#include <stdio.h> /* fprintf */
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* memset */
|
||||
#include <time.h> /* clock */
|
||||
#include "mem.h" /* read */
|
||||
#include "pool.h"
|
||||
#include "threading.h"
|
||||
#include "fastCover.h"
|
||||
#include "zstd_internal.h" /* includes zstd.h */
|
||||
#include "zdict.h"
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
#define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
|
||||
#define FASTCOVER_MAX_F 32
|
||||
#define DEFAULT_SPLITPOINT 1.0
|
||||
|
||||
/*-*************************************
|
||||
* Console display
|
||||
***************************************/
|
||||
static int g_displayLevel = 2;
|
||||
#define DISPLAY(...) \
|
||||
{ \
|
||||
fprintf(stderr, __VA_ARGS__); \
|
||||
fflush(stderr); \
|
||||
}
|
||||
#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
|
||||
if (displayLevel >= l) { \
|
||||
DISPLAY(__VA_ARGS__); \
|
||||
} /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
|
||||
#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
|
||||
|
||||
#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
|
||||
if (displayLevel >= l) { \
|
||||
if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
|
||||
g_time = clock(); \
|
||||
DISPLAY(__VA_ARGS__); \
|
||||
} \
|
||||
}
|
||||
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
|
||||
static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
||||
static clock_t g_time = 0;
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Hash Functions
|
||||
***************************************/
|
||||
static const U64 prime6bytes = 227718039650203ULL;
|
||||
static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
|
||||
static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
|
||||
|
||||
static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
|
||||
static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
|
||||
static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
|
||||
|
||||
|
||||
/**
|
||||
* Hash the d-byte value pointed to by p and mod 2^f
|
||||
*/
|
||||
static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 h, unsigned d) {
|
||||
if (d == 6) {
|
||||
return ZSTD_hash6Ptr(p, h) & ((1 << h) - 1);
|
||||
}
|
||||
return ZSTD_hash8Ptr(p, h) & ((1 << h) - 1);
|
||||
}
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Context
|
||||
***************************************/
|
||||
typedef struct {
|
||||
const BYTE *samples;
|
||||
size_t *offsets;
|
||||
const size_t *samplesSizes;
|
||||
size_t nbSamples;
|
||||
size_t nbTrainSamples;
|
||||
size_t nbTestSamples;
|
||||
size_t nbDmers;
|
||||
U32 *freqs;
|
||||
U16 *segmentFreqs;
|
||||
unsigned d;
|
||||
} FASTCOVER_ctx_t;
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Helper functions
|
||||
***************************************/
|
||||
/**
|
||||
* Returns the sum of the sample sizes.
|
||||
*/
|
||||
static size_t FASTCOVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
|
||||
size_t sum = 0;
|
||||
unsigned i;
|
||||
for (i = 0; i < nbSamples; ++i) {
|
||||
sum += samplesSizes[i];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* fast functions
|
||||
***************************************/
|
||||
/**
|
||||
* A segment is a range in the source as well as the score of the segment.
|
||||
*/
|
||||
typedef struct {
|
||||
U32 begin;
|
||||
U32 end;
|
||||
U32 score;
|
||||
} FASTCOVER_segment_t;
|
||||
|
||||
|
||||
/**
|
||||
* Selects the best segment in an epoch.
|
||||
* Segments of are scored according to the function:
|
||||
*
|
||||
* Let F(d) be the frequency of all dmers with hash value d.
|
||||
* Let S_i be hash value of the dmer at position i of segment S which has length k.
|
||||
*
|
||||
* Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
|
||||
*
|
||||
* Once the dmer with hash value d is in the dictionary we set F(d) = F(d)/2.
|
||||
*/
|
||||
static FASTCOVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx,
|
||||
U32 *freqs, U32 begin,U32 end,
|
||||
ZDICT_fastCover_params_t parameters) {
|
||||
/* Constants */
|
||||
const U32 k = parameters.k;
|
||||
const U32 d = parameters.d;
|
||||
const U32 dmersInK = k - d + 1;
|
||||
/* Try each segment (activeSegment) and save the best (bestSegment) */
|
||||
FASTCOVER_segment_t bestSegment = {0, 0, 0};
|
||||
FASTCOVER_segment_t activeSegment;
|
||||
/* Reset the activeDmers in the segment */
|
||||
/* The activeSegment starts at the beginning of the epoch. */
|
||||
activeSegment.begin = begin;
|
||||
activeSegment.end = begin;
|
||||
activeSegment.score = 0;
|
||||
{
|
||||
/* Slide the activeSegment through the whole epoch.
|
||||
* Save the best segment in bestSegment.
|
||||
*/
|
||||
while (activeSegment.end < end) {
|
||||
/* Get hash value of current dmer */
|
||||
const size_t index = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, parameters.f, ctx->d);
|
||||
/* Add frequency of this index to score if this is the first occurrence of index in active segment */
|
||||
if (ctx->segmentFreqs[index] == 0) {
|
||||
activeSegment.score += freqs[index];
|
||||
}
|
||||
ctx->segmentFreqs[index] += 1;
|
||||
/* Increment end of segment */
|
||||
activeSegment.end += 1;
|
||||
/* If the window is now too large, drop the first position */
|
||||
if (activeSegment.end - activeSegment.begin == dmersInK + 1) {
|
||||
/* Get hash value of the dmer to be eliminated from active segment */
|
||||
const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, parameters.f, ctx->d);
|
||||
ctx->segmentFreqs[delIndex] -= 1;
|
||||
/* Subtract frequency of this index from score if this is the last occurrence of this index in active segment */
|
||||
if (ctx->segmentFreqs[delIndex] == 0) {
|
||||
activeSegment.score -= freqs[delIndex];
|
||||
}
|
||||
/* Increment start of segment */
|
||||
activeSegment.begin += 1;
|
||||
}
|
||||
/* If this segment is the best so far save it */
|
||||
if (activeSegment.score > bestSegment.score) {
|
||||
bestSegment = activeSegment;
|
||||
}
|
||||
}
|
||||
/* Zero out rest of segmentFreqs array */
|
||||
while (activeSegment.begin < end) {
|
||||
const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, parameters.f, ctx->d);
|
||||
ctx->segmentFreqs[delIndex] -= 1;
|
||||
activeSegment.begin += 1;
|
||||
}
|
||||
}
|
||||
{
|
||||
/* Trim off the zero frequency head and tail from the segment. */
|
||||
U32 newBegin = bestSegment.end;
|
||||
U32 newEnd = bestSegment.begin;
|
||||
U32 pos;
|
||||
for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
|
||||
const size_t index = FASTCOVER_hashPtrToIndex(ctx->samples + pos, parameters.f, ctx->d);
|
||||
U32 freq = freqs[index];
|
||||
if (freq != 0) {
|
||||
newBegin = MIN(newBegin, pos);
|
||||
newEnd = pos + 1;
|
||||
}
|
||||
}
|
||||
bestSegment.begin = newBegin;
|
||||
bestSegment.end = newEnd;
|
||||
}
|
||||
{
|
||||
/* Zero the frequency of hash value of each dmer covered by the chosen segment. */
|
||||
U32 pos;
|
||||
for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
|
||||
const size_t i = FASTCOVER_hashPtrToIndex(ctx->samples + pos, parameters.f, ctx->d);
|
||||
freqs[i] = 0;
|
||||
}
|
||||
}
|
||||
return bestSegment;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check the validity of the parameters.
|
||||
* Returns non-zero if the parameters are valid and 0 otherwise.
|
||||
*/
|
||||
static int FASTCOVER_checkParameters(ZDICT_fastCover_params_t parameters,
|
||||
size_t maxDictSize) {
|
||||
/* k, d, and f are required parameters */
|
||||
if (parameters.d == 0 || parameters.k == 0 || parameters.f == 0) {
|
||||
return 0;
|
||||
}
|
||||
/* d has to be 6 or 8 */
|
||||
if (parameters.d != 6 && parameters.d != 8) {
|
||||
return 0;
|
||||
}
|
||||
/* 0 < f <= FASTCOVER_MAX_F */
|
||||
if (parameters.f > FASTCOVER_MAX_F) {
|
||||
return 0;
|
||||
}
|
||||
/* k <= maxDictSize */
|
||||
if (parameters.k > maxDictSize) {
|
||||
return 0;
|
||||
}
|
||||
/* d <= k */
|
||||
if (parameters.d > parameters.k) {
|
||||
return 0;
|
||||
}
|
||||
/* 0 < splitPoint <= 1 */
|
||||
if (parameters.splitPoint <= 0 || parameters.splitPoint > 1) {
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Clean up a context initialized with `FASTCOVER_ctx_init()`.
|
||||
*/
|
||||
static void FASTCOVER_ctx_destroy(FASTCOVER_ctx_t *ctx) {
|
||||
if (!ctx) {
|
||||
return;
|
||||
}
|
||||
if (ctx->segmentFreqs) {
|
||||
free(ctx->segmentFreqs);
|
||||
ctx->segmentFreqs = NULL;
|
||||
}
|
||||
if (ctx->freqs) {
|
||||
free(ctx->freqs);
|
||||
ctx->freqs = NULL;
|
||||
}
|
||||
if (ctx->offsets) {
|
||||
free(ctx->offsets);
|
||||
ctx->offsets = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate for frequency of hash value of each dmer in ctx->samples
|
||||
*/
|
||||
static void FASTCOVER_computeFrequency(U32 *freqs, unsigned f, FASTCOVER_ctx_t *ctx){
|
||||
size_t start; /* start of current dmer */
|
||||
for (unsigned i = 0; i < ctx->nbTrainSamples; i++) {
|
||||
size_t currSampleStart = ctx->offsets[i];
|
||||
size_t currSampleEnd = ctx->offsets[i+1];
|
||||
start = currSampleStart;
|
||||
while (start + ctx->d <= currSampleEnd) {
|
||||
const size_t dmerIndex = FASTCOVER_hashPtrToIndex(ctx->samples + start, f, ctx->d);
|
||||
freqs[dmerIndex]++;
|
||||
start++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare a context for dictionary building.
|
||||
* The context is only dependent on the parameter `d` and can used multiple
|
||||
* times.
|
||||
* Returns 1 on success or zero on error.
|
||||
* The context must be destroyed with `FASTCOVER_ctx_destroy()`.
|
||||
*/
|
||||
static int FASTCOVER_ctx_init(FASTCOVER_ctx_t *ctx, const void *samplesBuffer,
|
||||
const size_t *samplesSizes, unsigned nbSamples,
|
||||
unsigned d, double splitPoint, unsigned f) {
|
||||
const BYTE *const samples = (const BYTE *)samplesBuffer;
|
||||
const size_t totalSamplesSize = FASTCOVER_sum(samplesSizes, nbSamples);
|
||||
/* Split samples into testing and training sets */
|
||||
const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples;
|
||||
const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples;
|
||||
const size_t trainingSamplesSize = splitPoint < 1.0 ? FASTCOVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize;
|
||||
const size_t testSamplesSize = splitPoint < 1.0 ? FASTCOVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize;
|
||||
/* Checks */
|
||||
if (totalSamplesSize < MAX(d, sizeof(U64)) ||
|
||||
totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) {
|
||||
DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
|
||||
(U32)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20));
|
||||
return 0;
|
||||
}
|
||||
/* Check if there are at least 5 training samples */
|
||||
if (nbTrainSamples < 5) {
|
||||
DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
|
||||
return 0;
|
||||
}
|
||||
/* Check if there's testing sample */
|
||||
if (nbTestSamples < 1) {
|
||||
DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
|
||||
return 0;
|
||||
}
|
||||
/* Zero the context */
|
||||
memset(ctx, 0, sizeof(*ctx));
|
||||
DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples,
|
||||
(U32)trainingSamplesSize);
|
||||
DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples,
|
||||
(U32)testSamplesSize);
|
||||
|
||||
ctx->samples = samples;
|
||||
ctx->samplesSizes = samplesSizes;
|
||||
ctx->nbSamples = nbSamples;
|
||||
ctx->nbTrainSamples = nbTrainSamples;
|
||||
ctx->nbTestSamples = nbTestSamples;
|
||||
ctx->nbDmers = trainingSamplesSize - d + 1;
|
||||
ctx->d = d;
|
||||
|
||||
/* The offsets of each file */
|
||||
ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t));
|
||||
if (!ctx->offsets) {
|
||||
DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
|
||||
FASTCOVER_ctx_destroy(ctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Fill offsets from the samplesSizes */
|
||||
{
|
||||
U32 i;
|
||||
ctx->offsets[0] = 0;
|
||||
for (i = 1; i <= nbSamples; ++i) {
|
||||
ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1];
|
||||
}
|
||||
}
|
||||
|
||||
/* Initialize frequency array of size 2^f */
|
||||
ctx->freqs = (U32 *)calloc((1 << f), sizeof(U32));
|
||||
ctx->segmentFreqs = (U16 *)calloc((1 << f), sizeof(U16));
|
||||
DISPLAYLEVEL(2, "Computing frequencies\n");
|
||||
FASTCOVER_computeFrequency(ctx->freqs, f, ctx);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Given the prepared context build the dictionary.
|
||||
*/
|
||||
static size_t FASTCOVER_buildDictionary(const FASTCOVER_ctx_t *ctx, U32 *freqs,
|
||||
void *dictBuffer,
|
||||
size_t dictBufferCapacity,
|
||||
ZDICT_fastCover_params_t parameters){
|
||||
BYTE *const dict = (BYTE *)dictBuffer;
|
||||
size_t tail = dictBufferCapacity;
|
||||
/* Divide the data up into epochs of equal size.
|
||||
* We will select at least one segment from each epoch.
|
||||
*/
|
||||
const U32 epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k));
|
||||
const U32 epochSize = (U32)(ctx->nbDmers / epochs);
|
||||
size_t epoch;
|
||||
DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", epochs,
|
||||
epochSize);
|
||||
/* Loop through the epochs until there are no more segments or the dictionary
|
||||
* is full.
|
||||
*/
|
||||
for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) {
|
||||
const U32 epochBegin = (U32)(epoch * epochSize);
|
||||
const U32 epochEnd = epochBegin + epochSize;
|
||||
size_t segmentSize;
|
||||
/* Select a segment */
|
||||
FASTCOVER_segment_t segment = FASTCOVER_selectSegment(
|
||||
ctx, freqs, epochBegin, epochEnd, parameters);
|
||||
|
||||
/* If the segment covers no dmers, then we are out of content */
|
||||
if (segment.score == 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* Trim the segment if necessary and if it is too small then we are done */
|
||||
segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
|
||||
if (segmentSize < parameters.d) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* We fill the dictionary from the back to allow the best segments to be
|
||||
* referenced with the smallest offsets.
|
||||
*/
|
||||
tail -= segmentSize;
|
||||
memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
|
||||
DISPLAYUPDATE(
|
||||
2, "\r%u%% ",
|
||||
(U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
|
||||
}
|
||||
DISPLAYLEVEL(2, "\r%79s\r", "");
|
||||
return tail;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* FASTCOVER_best_t is used for two purposes:
|
||||
* 1. Synchronizing threads.
|
||||
* 2. Saving the best parameters and dictionary.
|
||||
*
|
||||
* All of the methods except FASTCOVER_best_init() are thread safe if zstd is
|
||||
* compiled with multithreaded support.
|
||||
*/
|
||||
typedef struct fast_best_s {
|
||||
ZSTD_pthread_mutex_t mutex;
|
||||
ZSTD_pthread_cond_t cond;
|
||||
size_t liveJobs;
|
||||
void *dict;
|
||||
size_t dictSize;
|
||||
ZDICT_fastCover_params_t parameters;
|
||||
size_t compressedSize;
|
||||
} FASTCOVER_best_t;
|
||||
|
||||
/**
|
||||
* Initialize the `FASTCOVER_best_t`.
|
||||
*/
|
||||
static void FASTCOVER_best_init(FASTCOVER_best_t *best) {
|
||||
if (best==NULL) return; /* compatible with init on NULL */
|
||||
(void)ZSTD_pthread_mutex_init(&best->mutex, NULL);
|
||||
(void)ZSTD_pthread_cond_init(&best->cond, NULL);
|
||||
best->liveJobs = 0;
|
||||
best->dict = NULL;
|
||||
best->dictSize = 0;
|
||||
best->compressedSize = (size_t)-1;
|
||||
memset(&best->parameters, 0, sizeof(best->parameters));
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait until liveJobs == 0.
|
||||
*/
|
||||
static void FASTCOVER_best_wait(FASTCOVER_best_t *best) {
|
||||
if (!best) {
|
||||
return;
|
||||
}
|
||||
ZSTD_pthread_mutex_lock(&best->mutex);
|
||||
while (best->liveJobs != 0) {
|
||||
ZSTD_pthread_cond_wait(&best->cond, &best->mutex);
|
||||
}
|
||||
ZSTD_pthread_mutex_unlock(&best->mutex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Call FASTCOVER_best_wait() and then destroy the FASTCOVER_best_t.
|
||||
*/
|
||||
static void FASTCOVER_best_destroy(FASTCOVER_best_t *best) {
|
||||
if (!best) {
|
||||
return;
|
||||
}
|
||||
FASTCOVER_best_wait(best);
|
||||
if (best->dict) {
|
||||
free(best->dict);
|
||||
}
|
||||
ZSTD_pthread_mutex_destroy(&best->mutex);
|
||||
ZSTD_pthread_cond_destroy(&best->cond);
|
||||
}
|
||||
|
||||
/**
|
||||
* Called when a thread is about to be launched.
|
||||
* Increments liveJobs.
|
||||
*/
|
||||
static void FASTCOVER_best_start(FASTCOVER_best_t *best) {
|
||||
if (!best) {
|
||||
return;
|
||||
}
|
||||
ZSTD_pthread_mutex_lock(&best->mutex);
|
||||
++best->liveJobs;
|
||||
ZSTD_pthread_mutex_unlock(&best->mutex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Called when a thread finishes executing, both on error or success.
|
||||
* Decrements liveJobs and signals any waiting threads if liveJobs == 0.
|
||||
* If this dictionary is the best so far save it and its parameters.
|
||||
*/
|
||||
static void FASTCOVER_best_finish(FASTCOVER_best_t *best, size_t compressedSize,
|
||||
ZDICT_fastCover_params_t parameters, void *dict,
|
||||
size_t dictSize) {
|
||||
if (!best) {
|
||||
return;
|
||||
}
|
||||
{
|
||||
size_t liveJobs;
|
||||
ZSTD_pthread_mutex_lock(&best->mutex);
|
||||
--best->liveJobs;
|
||||
liveJobs = best->liveJobs;
|
||||
/* If the new dictionary is better */
|
||||
if (compressedSize < best->compressedSize) {
|
||||
/* Allocate space if necessary */
|
||||
if (!best->dict || best->dictSize < dictSize) {
|
||||
if (best->dict) {
|
||||
free(best->dict);
|
||||
}
|
||||
best->dict = malloc(dictSize);
|
||||
if (!best->dict) {
|
||||
best->compressedSize = ERROR(GENERIC);
|
||||
best->dictSize = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
/* Save the dictionary, parameters, and size */
|
||||
memcpy(best->dict, dict, dictSize);
|
||||
best->dictSize = dictSize;
|
||||
best->parameters = parameters;
|
||||
best->compressedSize = compressedSize;
|
||||
}
|
||||
ZSTD_pthread_mutex_unlock(&best->mutex);
|
||||
if (liveJobs == 0) {
|
||||
ZSTD_pthread_cond_broadcast(&best->cond);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parameters for FASTCOVER_tryParameters().
|
||||
*/
|
||||
typedef struct FASTCOVER_tryParameters_data_s {
|
||||
const FASTCOVER_ctx_t *ctx;
|
||||
FASTCOVER_best_t *best;
|
||||
size_t dictBufferCapacity;
|
||||
ZDICT_fastCover_params_t parameters;
|
||||
} FASTCOVER_tryParameters_data_t;
|
||||
|
||||
/**
|
||||
* Tries a set of parameters and updates the FASTCOVER_best_t with the results.
|
||||
* This function is thread safe if zstd is compiled with multithreaded support.
|
||||
* It takes its parameters as an *OWNING* opaque pointer to support threading.
|
||||
*/
|
||||
static void FASTCOVER_tryParameters(void *opaque) {
|
||||
/* Save parameters as local variables */
|
||||
FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t *)opaque;
|
||||
const FASTCOVER_ctx_t *const ctx = data->ctx;
|
||||
const ZDICT_fastCover_params_t parameters = data->parameters;
|
||||
size_t dictBufferCapacity = data->dictBufferCapacity;
|
||||
size_t totalCompressedSize = ERROR(GENERIC);
|
||||
/* Allocate space for hash table, dict, and freqs */
|
||||
BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
|
||||
U32 *freqs = (U32*) malloc((1 << parameters.f) * sizeof(U32));
|
||||
if (!dict || !freqs) {
|
||||
DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
|
||||
goto _cleanup;
|
||||
}
|
||||
/* Copy the frequencies because we need to modify them */
|
||||
memcpy(freqs, ctx->freqs, (1 << parameters.f) * sizeof(U32));
|
||||
/* Build the dictionary */
|
||||
{
|
||||
const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict,
|
||||
dictBufferCapacity, parameters);
|
||||
|
||||
dictBufferCapacity = ZDICT_finalizeDictionary(
|
||||
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
|
||||
ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples,
|
||||
parameters.zParams);
|
||||
if (ZDICT_isError(dictBufferCapacity)) {
|
||||
DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
|
||||
goto _cleanup;
|
||||
}
|
||||
}
|
||||
/* Check total compressed size */
|
||||
{
|
||||
/* Pointers */
|
||||
ZSTD_CCtx *cctx;
|
||||
ZSTD_CDict *cdict;
|
||||
void *dst;
|
||||
/* Local variables */
|
||||
size_t dstCapacity;
|
||||
size_t i;
|
||||
/* Allocate dst with enough space to compress the maximum sized sample */
|
||||
{
|
||||
size_t maxSampleSize = 0;
|
||||
i = parameters.splitPoint < 1.0 ? ctx->nbTrainSamples : 0;
|
||||
for (; i < ctx->nbSamples; ++i) {
|
||||
maxSampleSize = MAX(ctx->samplesSizes[i], maxSampleSize);
|
||||
}
|
||||
dstCapacity = ZSTD_compressBound(maxSampleSize);
|
||||
dst = malloc(dstCapacity);
|
||||
}
|
||||
/* Create the cctx and cdict */
|
||||
cctx = ZSTD_createCCtx();
|
||||
cdict = ZSTD_createCDict(dict, dictBufferCapacity,
|
||||
parameters.zParams.compressionLevel);
|
||||
if (!dst || !cctx || !cdict) {
|
||||
goto _compressCleanup;
|
||||
}
|
||||
/* Compress each sample and sum their sizes (or error) */
|
||||
totalCompressedSize = dictBufferCapacity;
|
||||
i = parameters.splitPoint < 1.0 ? ctx->nbTrainSamples : 0;
|
||||
for (; i < ctx->nbSamples; ++i) {
|
||||
const size_t size = ZSTD_compress_usingCDict(
|
||||
cctx, dst, dstCapacity, ctx->samples + ctx->offsets[i],
|
||||
ctx->samplesSizes[i], cdict);
|
||||
if (ZSTD_isError(size)) {
|
||||
totalCompressedSize = ERROR(GENERIC);
|
||||
goto _compressCleanup;
|
||||
}
|
||||
totalCompressedSize += size;
|
||||
}
|
||||
_compressCleanup:
|
||||
ZSTD_freeCCtx(cctx);
|
||||
ZSTD_freeCDict(cdict);
|
||||
if (dst) {
|
||||
free(dst);
|
||||
}
|
||||
}
|
||||
|
||||
_cleanup:
|
||||
FASTCOVER_best_finish(data->best, totalCompressedSize, parameters, dict,
|
||||
dictBufferCapacity);
|
||||
free(data);
|
||||
if (dict) {
|
||||
free(dict);
|
||||
}
|
||||
if (freqs) {
|
||||
free(freqs);
|
||||
}
|
||||
}
|
||||
|
||||
ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(
|
||||
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
||||
const size_t *samplesSizes, unsigned nbSamples, ZDICT_fastCover_params_t parameters) {
|
||||
BYTE* const dict = (BYTE*)dictBuffer;
|
||||
FASTCOVER_ctx_t ctx;
|
||||
parameters.splitPoint = 1.0;
|
||||
/* Initialize global data */
|
||||
g_displayLevel = parameters.zParams.notificationLevel;
|
||||
/* Checks */
|
||||
if (!FASTCOVER_checkParameters(parameters, dictBufferCapacity)) {
|
||||
DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n");
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
if (nbSamples == 0) {
|
||||
DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n");
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
|
||||
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
|
||||
ZDICT_DICTSIZE_MIN);
|
||||
return ERROR(dstSize_tooSmall);
|
||||
}
|
||||
/* Initialize context */
|
||||
if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
|
||||
parameters.d, parameters.splitPoint, parameters.f)) {
|
||||
DISPLAYLEVEL(1, "Failed to initialize context\n");
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
/* Build the dictionary */
|
||||
DISPLAYLEVEL(2, "Building dictionary\n");
|
||||
{
|
||||
const size_t tail = FASTCOVER_buildDictionary(&ctx, ctx.freqs, dictBuffer,
|
||||
dictBufferCapacity, parameters);
|
||||
|
||||
const size_t dictionarySize = ZDICT_finalizeDictionary(
|
||||
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
|
||||
samplesBuffer, samplesSizes, (unsigned)ctx.nbTrainSamples,
|
||||
parameters.zParams);
|
||||
if (!ZSTD_isError(dictionarySize)) {
|
||||
DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
|
||||
(U32)dictionarySize);
|
||||
}
|
||||
FASTCOVER_ctx_destroy(&ctx);
|
||||
return dictionarySize;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(
|
||||
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
||||
const size_t *samplesSizes, unsigned nbSamples,
|
||||
ZDICT_fastCover_params_t *parameters) {
|
||||
/* constants */
|
||||
const unsigned nbThreads = parameters->nbThreads;
|
||||
const double splitPoint =
|
||||
parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint;
|
||||
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
|
||||
const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
|
||||
const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
|
||||
const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k;
|
||||
const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps;
|
||||
const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
|
||||
const unsigned kIterations =
|
||||
(1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
|
||||
const unsigned f = parameters->f == 0 ? 23 : parameters->f;
|
||||
|
||||
/* Local variables */
|
||||
const int displayLevel = parameters->zParams.notificationLevel;
|
||||
unsigned iteration = 1;
|
||||
unsigned d;
|
||||
unsigned k;
|
||||
FASTCOVER_best_t best;
|
||||
POOL_ctx *pool = NULL;
|
||||
|
||||
/* Checks */
|
||||
if (splitPoint <= 0 || splitPoint > 1) {
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n");
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
if (kMinK < kMaxD || kMaxK < kMinK) {
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n");
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
if (nbSamples == 0) {
|
||||
DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n");
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
|
||||
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
|
||||
ZDICT_DICTSIZE_MIN);
|
||||
return ERROR(dstSize_tooSmall);
|
||||
}
|
||||
if (nbThreads > 1) {
|
||||
pool = POOL_create(nbThreads, 1);
|
||||
if (!pool) {
|
||||
return ERROR(memory_allocation);
|
||||
}
|
||||
}
|
||||
/* Initialization */
|
||||
FASTCOVER_best_init(&best);
|
||||
/* Turn down global display level to clean up display at level 2 and below */
|
||||
g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1;
|
||||
/* Loop through d first because each new value needs a new context */
|
||||
LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
|
||||
kIterations);
|
||||
for (d = kMinD; d <= kMaxD; d += 2) {
|
||||
/* Initialize the context for this value of d */
|
||||
FASTCOVER_ctx_t ctx;
|
||||
LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
|
||||
if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f)) {
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
|
||||
FASTCOVER_best_destroy(&best);
|
||||
POOL_free(pool);
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
/* Loop through k reusing the same context */
|
||||
for (k = kMinK; k <= kMaxK; k += kStepSize) {
|
||||
/* Prepare the arguments */
|
||||
FASTCOVER_tryParameters_data_t *data = (FASTCOVER_tryParameters_data_t *)malloc(
|
||||
sizeof(FASTCOVER_tryParameters_data_t));
|
||||
LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k);
|
||||
if (!data) {
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n");
|
||||
FASTCOVER_best_destroy(&best);
|
||||
FASTCOVER_ctx_destroy(&ctx);
|
||||
POOL_free(pool);
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
data->ctx = &ctx;
|
||||
data->best = &best;
|
||||
data->dictBufferCapacity = dictBufferCapacity;
|
||||
data->parameters = *parameters;
|
||||
data->parameters.k = k;
|
||||
data->parameters.d = d;
|
||||
data->parameters.f = f;
|
||||
data->parameters.splitPoint = splitPoint;
|
||||
data->parameters.steps = kSteps;
|
||||
data->parameters.zParams.notificationLevel = g_displayLevel;
|
||||
/* Check the parameters */
|
||||
if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity)) {
|
||||
DISPLAYLEVEL(1, "fastCover parameters incorrect\n");
|
||||
free(data);
|
||||
continue;
|
||||
}
|
||||
/* Call the function and pass ownership of data to it */
|
||||
FASTCOVER_best_start(&best);
|
||||
if (pool) {
|
||||
POOL_add(pool, &FASTCOVER_tryParameters, data);
|
||||
} else {
|
||||
FASTCOVER_tryParameters(data);
|
||||
}
|
||||
/* Print status */
|
||||
LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ",
|
||||
(U32)((iteration * 100) / kIterations));
|
||||
++iteration;
|
||||
}
|
||||
FASTCOVER_best_wait(&best);
|
||||
FASTCOVER_ctx_destroy(&ctx);
|
||||
}
|
||||
LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", "");
|
||||
/* Fill the output buffer and parameters with output of the best parameters */
|
||||
{
|
||||
const size_t dictSize = best.dictSize;
|
||||
if (ZSTD_isError(best.compressedSize)) {
|
||||
const size_t compressedSize = best.compressedSize;
|
||||
FASTCOVER_best_destroy(&best);
|
||||
POOL_free(pool);
|
||||
return compressedSize;
|
||||
}
|
||||
*parameters = best.parameters;
|
||||
memcpy(dictBuffer, best.dict, dictSize);
|
||||
FASTCOVER_best_destroy(&best);
|
||||
POOL_free(pool);
|
||||
return dictSize;
|
||||
}
|
||||
|
||||
}
|
57
contrib/experimental_dict_builders/fastCover/fastCover.h
Normal file
57
contrib/experimental_dict_builders/fastCover/fastCover.h
Normal file
@ -0,0 +1,57 @@
|
||||
#include <stdio.h> /* fprintf */
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* memset */
|
||||
#include <time.h> /* clock */
|
||||
#include "mem.h" /* read */
|
||||
#include "pool.h"
|
||||
#include "threading.h"
|
||||
#include "zstd_internal.h" /* includes zstd.h */
|
||||
#ifndef ZDICT_STATIC_LINKING_ONLY
|
||||
#define ZDICT_STATIC_LINKING_ONLY
|
||||
#endif
|
||||
#include "zdict.h"
|
||||
|
||||
|
||||
typedef struct {
|
||||
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
|
||||
unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
|
||||
unsigned f; /* log of size of frequency array */
|
||||
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
|
||||
unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
|
||||
double splitPoint; /* Percentage of samples used for training: the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */
|
||||
ZDICT_params_t zParams;
|
||||
} ZDICT_fastCover_params_t;
|
||||
|
||||
|
||||
/*! ZDICT_optimizeTrainFromBuffer_fastCover():
|
||||
* Train a dictionary from an array of samples using a modified version of the COVER algorithm.
|
||||
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
||||
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
||||
* The resulting dictionary will be saved into `dictBuffer`.
|
||||
* All of the parameters except for f are optional.
|
||||
* If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
|
||||
* if steps is zero it defaults to its default value.
|
||||
* If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
|
||||
*
|
||||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
* or an error code, which can be tested with ZDICT_isError().
|
||||
* On success `*parameters` contains the parameters selected.
|
||||
*/
|
||||
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(
|
||||
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
||||
const size_t *samplesSizes, unsigned nbSamples,
|
||||
ZDICT_fastCover_params_t *parameters);
|
||||
|
||||
|
||||
/*! ZDICT_trainFromBuffer_fastCover():
|
||||
* Train a dictionary from an array of samples using a modified version of the COVER algorithm.
|
||||
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
||||
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
||||
* The resulting dictionary will be saved into `dictBuffer`.
|
||||
* d, k, and f are required.
|
||||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
* or an error code, which can be tested with ZDICT_isError().
|
||||
*/
|
||||
ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(
|
||||
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
||||
const size_t *samplesSizes, unsigned nbSamples, ZDICT_fastCover_params_t parameters);
|
183
contrib/experimental_dict_builders/fastCover/main.c
Normal file
183
contrib/experimental_dict_builders/fastCover/main.c
Normal file
@ -0,0 +1,183 @@
|
||||
#include <stdio.h> /* fprintf */
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* strcmp, strlen */
|
||||
#include <errno.h> /* errno */
|
||||
#include <ctype.h>
|
||||
#include "fastCover.h"
|
||||
#include "io.h"
|
||||
#include "util.h"
|
||||
#include "zdict.h"
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Console display
|
||||
***************************************/
|
||||
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
|
||||
#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
|
||||
|
||||
static const U64 g_refreshRate = SEC_TO_MICRO / 6;
|
||||
static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
|
||||
|
||||
#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
|
||||
if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
|
||||
{ g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
|
||||
if (displayLevel>=4) fflush(stderr); } } }
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Exceptions
|
||||
***************************************/
|
||||
#ifndef DEBUG
|
||||
# define DEBUG 0
|
||||
#endif
|
||||
#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
|
||||
#define EXM_THROW(error, ...) \
|
||||
{ \
|
||||
DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
|
||||
DISPLAY("Error %i : ", error); \
|
||||
DISPLAY(__VA_ARGS__); \
|
||||
DISPLAY("\n"); \
|
||||
exit(error); \
|
||||
}
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
static const unsigned g_defaultMaxDictSize = 110 KB;
|
||||
#define DEFAULT_CLEVEL 3
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* FASTCOVER
|
||||
***************************************/
|
||||
int FASTCOVER_trainFromFiles(const char* dictFileName, sampleInfo *info,
|
||||
unsigned maxDictSize,
|
||||
ZDICT_fastCover_params_t *params) {
|
||||
unsigned const displayLevel = params->zParams.notificationLevel;
|
||||
void* const dictBuffer = malloc(maxDictSize);
|
||||
|
||||
int result = 0;
|
||||
|
||||
/* Checks */
|
||||
if (!dictBuffer)
|
||||
EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */
|
||||
|
||||
{ size_t dictSize;
|
||||
/* Run the optimize version if either k or d is not provided */
|
||||
if (!params->d || !params->k) {
|
||||
dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
|
||||
info->samplesSizes, info->nbSamples, params);
|
||||
} else {
|
||||
dictSize = ZDICT_trainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
|
||||
info->samplesSizes, info->nbSamples, *params);
|
||||
}
|
||||
DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\n", params->k, params->d, params->f, params->steps, (unsigned)(params->splitPoint*100));
|
||||
if (ZDICT_isError(dictSize)) {
|
||||
DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
|
||||
result = 1;
|
||||
goto _done;
|
||||
}
|
||||
/* save dict */
|
||||
DISPLAYLEVEL(2, "Save dictionary of size %u into file %s \n", (U32)dictSize, dictFileName);
|
||||
saveDict(dictFileName, dictBuffer, dictSize);
|
||||
}
|
||||
|
||||
/* clean up */
|
||||
_done:
|
||||
free(dictBuffer);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int main(int argCount, const char* argv[])
|
||||
{
|
||||
int displayLevel = 2;
|
||||
const char* programName = argv[0];
|
||||
int operationResult = 0;
|
||||
|
||||
/* Initialize arguments to default values */
|
||||
unsigned k = 0;
|
||||
unsigned d = 0;
|
||||
unsigned f = 23;
|
||||
unsigned steps = 32;
|
||||
unsigned nbThreads = 1;
|
||||
unsigned split = 100;
|
||||
const char* outputFile = "fastCoverDict";
|
||||
unsigned dictID = 0;
|
||||
unsigned maxDictSize = g_defaultMaxDictSize;
|
||||
|
||||
/* Initialize table to store input files */
|
||||
const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*));
|
||||
unsigned filenameIdx = 0;
|
||||
|
||||
char* fileNamesBuf = NULL;
|
||||
unsigned fileNamesNb = filenameIdx;
|
||||
int followLinks = 0; /* follow directory recursively */
|
||||
const char** extendedFileList = NULL;
|
||||
|
||||
/* Parse arguments */
|
||||
for (int i = 1; i < argCount; i++) {
|
||||
const char* argument = argv[i];
|
||||
if (longCommandWArg(&argument, "k=")) { k = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "d=")) { d = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "f=")) { f = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "steps=")) { steps = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "split=")) { split = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "dictID=")) { dictID = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "in=")) {
|
||||
filenameTable[filenameIdx] = argument;
|
||||
filenameIdx++;
|
||||
continue;
|
||||
}
|
||||
if (longCommandWArg(&argument, "out=")) {
|
||||
outputFile = argument;
|
||||
continue;
|
||||
}
|
||||
DISPLAYLEVEL(1, "Incorrect parameters\n");
|
||||
operationResult = 1;
|
||||
return operationResult;
|
||||
}
|
||||
|
||||
/* Get the list of all files recursively (because followLinks==0)*/
|
||||
extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf,
|
||||
&fileNamesNb, followLinks);
|
||||
if (extendedFileList) {
|
||||
unsigned u;
|
||||
for (u=0; u<fileNamesNb; u++) DISPLAYLEVEL(4, "%u %s\n", u, extendedFileList[u]);
|
||||
free((void*)filenameTable);
|
||||
filenameTable = extendedFileList;
|
||||
filenameIdx = fileNamesNb;
|
||||
}
|
||||
|
||||
size_t blockSize = 0;
|
||||
|
||||
/* Set up zParams */
|
||||
ZDICT_params_t zParams;
|
||||
zParams.compressionLevel = DEFAULT_CLEVEL;
|
||||
zParams.notificationLevel = displayLevel;
|
||||
zParams.dictID = dictID;
|
||||
|
||||
/* Set up fastCover params */
|
||||
ZDICT_fastCover_params_t params;
|
||||
params.zParams = zParams;
|
||||
params.k = k;
|
||||
params.d = d;
|
||||
params.f = f;
|
||||
params.steps = steps;
|
||||
params.nbThreads = nbThreads;
|
||||
params.splitPoint = (double)split/100;
|
||||
|
||||
/* Build dictionary */
|
||||
sampleInfo* info = getSampleInfo(filenameTable,
|
||||
filenameIdx, blockSize, maxDictSize, zParams.notificationLevel);
|
||||
operationResult = FASTCOVER_trainFromFiles(outputFile, info, maxDictSize, ¶ms);
|
||||
|
||||
/* Free allocated memory */
|
||||
UTIL_freeFileList(extendedFileList, fileNamesBuf);
|
||||
freeSampleInfo(info);
|
||||
|
||||
return operationResult;
|
||||
}
|
15
contrib/experimental_dict_builders/fastCover/test.sh
Executable file
15
contrib/experimental_dict_builders/fastCover/test.sh
Executable file
@ -0,0 +1,15 @@
|
||||
echo "Building fastCover dictionary with in=../../lib/common f=20 out=dict1"
|
||||
./main in=../../../lib/common f=20 out=dict1
|
||||
zstd -be3 -D dict1 -r ../../../lib/common -q
|
||||
echo "Building fastCover dictionary with in=../../lib/common k=500 d=6 f=24 out=dict2 dictID=100 maxdict=140000"
|
||||
./main in=../../../lib/common k=500 d=6 f=24 out=dict2 dictID=100 maxdict=140000
|
||||
zstd -be3 -D dict2 -r ../../../lib/common -q
|
||||
echo "Building fastCover dictionary with 2 sample sources"
|
||||
./main in=../../../lib/common in=../../../lib/compress out=dict3
|
||||
zstd -be3 -D dict3 -r ../../../lib/common -q
|
||||
echo "Removing dict1 dict2 dict3"
|
||||
rm -f dict1 dict2 dict3
|
||||
|
||||
echo "Testing with invalid parameters, should fail"
|
||||
! ./main in=../../../lib/common r=10
|
||||
! ./main in=../../../lib/common d=10
|
@ -0,0 +1,52 @@
|
||||
ARG :=
|
||||
|
||||
CC ?= gcc
|
||||
CFLAGS ?= -O3
|
||||
INCLUDES := -I ../../../programs -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder
|
||||
|
||||
TEST_INPUT := ../../../lib
|
||||
TEST_OUTPUT := randomDict
|
||||
|
||||
all: main run clean
|
||||
|
||||
.PHONY: test
|
||||
test: main testrun testshell clean
|
||||
|
||||
.PHONY: run
|
||||
run:
|
||||
echo "Building a random dictionary with given arguments"
|
||||
./main $(ARG)
|
||||
|
||||
main: main.o io.o random.o libzstd.a
|
||||
$(CC) $(CFLAGS) main.o io.o random.o libzstd.a -o main
|
||||
|
||||
main.o: main.c
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c main.c
|
||||
|
||||
random.o: random.c
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c random.c
|
||||
|
||||
io.o: io.c
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c io.c
|
||||
|
||||
libzstd.a:
|
||||
$(MAKE) -C ../../../lib libzstd.a
|
||||
mv ../../../lib/libzstd.a .
|
||||
|
||||
.PHONY: testrun
|
||||
testrun: main
|
||||
echo "Run with $(TEST_INPUT) and $(TEST_OUTPUT) "
|
||||
./main in=$(TEST_INPUT) out=$(TEST_OUTPUT)
|
||||
zstd -be3 -D $(TEST_OUTPUT) -r $(TEST_INPUT) -q
|
||||
rm -f $(TEST_OUTPUT)
|
||||
|
||||
.PHONY: testshell
|
||||
testshell: test.sh
|
||||
sh test.sh
|
||||
echo "Finish running test.sh"
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
rm -f *.o main libzstd.a
|
||||
$(MAKE) -C ../../../lib clean
|
||||
echo "Cleaning is completed"
|
@ -0,0 +1,20 @@
|
||||
Random Dictionary Builder
|
||||
|
||||
### Permitted Arguments:
|
||||
Input File/Directory (in=fileName): required; file/directory used to build dictionary; if directory, will operate recursively for files inside directory; can include multiple files/directories, each following "in="
|
||||
Output Dictionary (out=dictName): if not provided, default to defaultDict
|
||||
Dictionary ID (dictID=#): nonnegative number; if not provided, default to 0
|
||||
Maximum Dictionary Size (maxdict=#): positive number; in bytes, if not provided, default to 110KB
|
||||
Size of Randomly Selected Segment (k=#): positive number; in bytes; if not provided, default to 200
|
||||
|
||||
###Running Test:
|
||||
make test
|
||||
|
||||
|
||||
###Usage:
|
||||
To build a random dictionary with the provided arguments: make ARG= followed by arguments
|
||||
|
||||
|
||||
### Examples:
|
||||
make ARG="in=../../../lib/dictBuilder out=dict100 dictID=520"
|
||||
make ARG="in=../../../lib/dictBuilder in=../../../lib/compress"
|
284
contrib/experimental_dict_builders/randomDictBuilder/io.c
Normal file
284
contrib/experimental_dict_builders/randomDictBuilder/io.c
Normal file
@ -0,0 +1,284 @@
|
||||
#include <stdio.h> /* fprintf */
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* strcmp, strlen */
|
||||
#include <errno.h> /* errno */
|
||||
#include <ctype.h>
|
||||
#include "io.h"
|
||||
#include "fileio.h" /* stdinmark, stdoutmark, ZSTD_EXTENSION */
|
||||
#include "platform.h" /* Large Files support */
|
||||
#include "util.h"
|
||||
#include "zdict.h"
|
||||
|
||||
/*-*************************************
|
||||
* Console display
|
||||
***************************************/
|
||||
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
|
||||
#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
|
||||
|
||||
static const U64 g_refreshRate = SEC_TO_MICRO / 6;
|
||||
static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
|
||||
|
||||
#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
|
||||
if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
|
||||
{ g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
|
||||
if (displayLevel>=4) fflush(stderr); } } }
|
||||
|
||||
/*-*************************************
|
||||
* Exceptions
|
||||
***************************************/
|
||||
#ifndef DEBUG
|
||||
# define DEBUG 0
|
||||
#endif
|
||||
#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
|
||||
#define EXM_THROW(error, ...) \
|
||||
{ \
|
||||
DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
|
||||
DISPLAY("Error %i : ", error); \
|
||||
DISPLAY(__VA_ARGS__); \
|
||||
DISPLAY("\n"); \
|
||||
exit(error); \
|
||||
}
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
|
||||
#define SAMPLESIZE_MAX (128 KB)
|
||||
#define RANDOM_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
|
||||
#define RANDOM_MEMMULT 9
|
||||
static const size_t g_maxMemory = (sizeof(size_t) == 4) ?
|
||||
(2 GB - 64 MB) : ((size_t)(512 MB) << sizeof(size_t));
|
||||
|
||||
#define NOISELENGTH 32
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Commandline related functions
|
||||
***************************************/
|
||||
unsigned readU32FromChar(const char** stringPtr){
|
||||
const char errorMsg[] = "error: numeric value too large";
|
||||
unsigned result = 0;
|
||||
while ((**stringPtr >='0') && (**stringPtr <='9')) {
|
||||
unsigned const max = (((unsigned)(-1)) / 10) - 1;
|
||||
if (result > max) exit(1);
|
||||
result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
|
||||
}
|
||||
if ((**stringPtr=='K') || (**stringPtr=='M')) {
|
||||
unsigned const maxK = ((unsigned)(-1)) >> 10;
|
||||
if (result > maxK) exit(1);
|
||||
result <<= 10;
|
||||
if (**stringPtr=='M') {
|
||||
if (result > maxK) exit(1);
|
||||
result <<= 10;
|
||||
}
|
||||
(*stringPtr)++; /* skip `K` or `M` */
|
||||
if (**stringPtr=='i') (*stringPtr)++;
|
||||
if (**stringPtr=='B') (*stringPtr)++;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
unsigned longCommandWArg(const char** stringPtr, const char* longCommand){
|
||||
size_t const comSize = strlen(longCommand);
|
||||
int const result = !strncmp(*stringPtr, longCommand, comSize);
|
||||
if (result) *stringPtr += comSize;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/* ********************************************************
|
||||
* File related operations
|
||||
**********************************************************/
|
||||
/** loadFiles() :
|
||||
* load samples from files listed in fileNamesTable into buffer.
|
||||
* works even if buffer is too small to load all samples.
|
||||
* Also provides the size of each sample into sampleSizes table
|
||||
* which must be sized correctly, using DiB_fileStats().
|
||||
* @return : nb of samples effectively loaded into `buffer`
|
||||
* *bufferSizePtr is modified, it provides the amount data loaded within buffer.
|
||||
* sampleSizes is filled with the size of each sample.
|
||||
*/
|
||||
static unsigned loadFiles(void* buffer, size_t* bufferSizePtr, size_t* sampleSizes,
|
||||
unsigned sstSize, const char** fileNamesTable, unsigned nbFiles,
|
||||
size_t targetChunkSize, unsigned displayLevel) {
|
||||
char* const buff = (char*)buffer;
|
||||
size_t pos = 0;
|
||||
unsigned nbLoadedChunks = 0, fileIndex;
|
||||
|
||||
for (fileIndex=0; fileIndex<nbFiles; fileIndex++) {
|
||||
const char* const fileName = fileNamesTable[fileIndex];
|
||||
unsigned long long const fs64 = UTIL_getFileSize(fileName);
|
||||
unsigned long long remainingToLoad = (fs64 == UTIL_FILESIZE_UNKNOWN) ? 0 : fs64;
|
||||
U32 const nbChunks = targetChunkSize ? (U32)((fs64 + (targetChunkSize-1)) / targetChunkSize) : 1;
|
||||
U64 const chunkSize = targetChunkSize ? MIN(targetChunkSize, fs64) : fs64;
|
||||
size_t const maxChunkSize = (size_t)MIN(chunkSize, SAMPLESIZE_MAX);
|
||||
U32 cnb;
|
||||
FILE* const f = fopen(fileName, "rb");
|
||||
if (f==NULL) EXM_THROW(10, "zstd: dictBuilder: %s %s ", fileName, strerror(errno));
|
||||
DISPLAYUPDATE(2, "Loading %s... \r", fileName);
|
||||
for (cnb=0; cnb<nbChunks; cnb++) {
|
||||
size_t const toLoad = (size_t)MIN(maxChunkSize, remainingToLoad);
|
||||
if (toLoad > *bufferSizePtr-pos) break;
|
||||
{ size_t const readSize = fread(buff+pos, 1, toLoad, f);
|
||||
if (readSize != toLoad) EXM_THROW(11, "Pb reading %s", fileName);
|
||||
pos += readSize;
|
||||
sampleSizes[nbLoadedChunks++] = toLoad;
|
||||
remainingToLoad -= targetChunkSize;
|
||||
if (nbLoadedChunks == sstSize) { /* no more space left in sampleSizes table */
|
||||
fileIndex = nbFiles; /* stop there */
|
||||
break;
|
||||
}
|
||||
if (toLoad < targetChunkSize) {
|
||||
fseek(f, (long)(targetChunkSize - toLoad), SEEK_CUR);
|
||||
} } }
|
||||
fclose(f);
|
||||
}
|
||||
DISPLAYLEVEL(2, "\r%79s\r", "");
|
||||
*bufferSizePtr = pos;
|
||||
DISPLAYLEVEL(4, "loaded : %u KB \n", (U32)(pos >> 10))
|
||||
return nbLoadedChunks;
|
||||
}
|
||||
|
||||
#define rotl32(x,r) ((x << r) | (x >> (32 - r)))
|
||||
static U32 getRand(U32* src)
|
||||
{
|
||||
static const U32 prime1 = 2654435761U;
|
||||
static const U32 prime2 = 2246822519U;
|
||||
U32 rand32 = *src;
|
||||
rand32 *= prime1;
|
||||
rand32 ^= prime2;
|
||||
rand32 = rotl32(rand32, 13);
|
||||
*src = rand32;
|
||||
return rand32 >> 5;
|
||||
}
|
||||
|
||||
/* shuffle() :
|
||||
* shuffle a table of file names in a semi-random way
|
||||
* It improves dictionary quality by reducing "locality" impact, so if sample set is very large,
|
||||
* it will load random elements from it, instead of just the first ones. */
|
||||
static void shuffle(const char** fileNamesTable, unsigned nbFiles) {
|
||||
U32 seed = 0xFD2FB528;
|
||||
unsigned i;
|
||||
for (i = nbFiles - 1; i > 0; --i) {
|
||||
unsigned const j = getRand(&seed) % (i + 1);
|
||||
const char* const tmp = fileNamesTable[j];
|
||||
fileNamesTable[j] = fileNamesTable[i];
|
||||
fileNamesTable[i] = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*-********************************************************
|
||||
* Dictionary training functions
|
||||
**********************************************************/
|
||||
size_t findMaxMem(unsigned long long requiredMem) {
|
||||
size_t const step = 8 MB;
|
||||
void* testmem = NULL;
|
||||
|
||||
requiredMem = (((requiredMem >> 23) + 1) << 23);
|
||||
requiredMem += step;
|
||||
if (requiredMem > g_maxMemory) requiredMem = g_maxMemory;
|
||||
|
||||
while (!testmem) {
|
||||
testmem = malloc((size_t)requiredMem);
|
||||
requiredMem -= step;
|
||||
}
|
||||
|
||||
free(testmem);
|
||||
return (size_t)requiredMem;
|
||||
}
|
||||
|
||||
void saveDict(const char* dictFileName,
|
||||
const void* buff, size_t buffSize) {
|
||||
FILE* const f = fopen(dictFileName, "wb");
|
||||
if (f==NULL) EXM_THROW(3, "cannot open %s ", dictFileName);
|
||||
|
||||
{ size_t const n = fwrite(buff, 1, buffSize, f);
|
||||
if (n!=buffSize) EXM_THROW(4, "%s : write error", dictFileName) }
|
||||
|
||||
{ size_t const n = (size_t)fclose(f);
|
||||
if (n!=0) EXM_THROW(5, "%s : flush error", dictFileName) }
|
||||
}
|
||||
|
||||
/*! getFileStats() :
|
||||
* Given a list of files, and a chunkSize (0 == no chunk, whole files)
|
||||
* provides the amount of data to be loaded and the resulting nb of samples.
|
||||
* This is useful primarily for allocation purpose => sample buffer, and sample sizes table.
|
||||
*/
|
||||
static fileStats getFileStats(const char** fileNamesTable, unsigned nbFiles,
|
||||
size_t chunkSize, unsigned displayLevel) {
|
||||
fileStats fs;
|
||||
unsigned n;
|
||||
memset(&fs, 0, sizeof(fs));
|
||||
for (n=0; n<nbFiles; n++) {
|
||||
U64 const fileSize = UTIL_getFileSize(fileNamesTable[n]);
|
||||
U64 const srcSize = (fileSize == UTIL_FILESIZE_UNKNOWN) ? 0 : fileSize;
|
||||
U32 const nbSamples = (U32)(chunkSize ? (srcSize + (chunkSize-1)) / chunkSize : 1);
|
||||
U64 const chunkToLoad = chunkSize ? MIN(chunkSize, srcSize) : srcSize;
|
||||
size_t const cappedChunkSize = (size_t)MIN(chunkToLoad, SAMPLESIZE_MAX);
|
||||
fs.totalSizeToLoad += cappedChunkSize * nbSamples;
|
||||
fs.oneSampleTooLarge |= (chunkSize > 2*SAMPLESIZE_MAX);
|
||||
fs.nbSamples += nbSamples;
|
||||
}
|
||||
DISPLAYLEVEL(4, "Preparing to load : %u KB \n", (U32)(fs.totalSizeToLoad >> 10));
|
||||
return fs;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
sampleInfo* getSampleInfo(const char** fileNamesTable, unsigned nbFiles, size_t chunkSize,
|
||||
unsigned maxDictSize, const unsigned displayLevel) {
|
||||
fileStats const fs = getFileStats(fileNamesTable, nbFiles, chunkSize, displayLevel);
|
||||
size_t* const sampleSizes = (size_t*)malloc(fs.nbSamples * sizeof(size_t));
|
||||
size_t const memMult = RANDOM_MEMMULT;
|
||||
size_t const maxMem = findMaxMem(fs.totalSizeToLoad * memMult) / memMult;
|
||||
size_t loadedSize = (size_t) MIN ((unsigned long long)maxMem, fs.totalSizeToLoad);
|
||||
void* const srcBuffer = malloc(loadedSize+NOISELENGTH);
|
||||
|
||||
/* Checks */
|
||||
if ((!sampleSizes) || (!srcBuffer))
|
||||
EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */
|
||||
if (fs.oneSampleTooLarge) {
|
||||
DISPLAYLEVEL(2, "! Warning : some sample(s) are very large \n");
|
||||
DISPLAYLEVEL(2, "! Note that dictionary is only useful for small samples. \n");
|
||||
DISPLAYLEVEL(2, "! As a consequence, only the first %u bytes of each sample are loaded \n", SAMPLESIZE_MAX);
|
||||
}
|
||||
if (fs.nbSamples < 5) {
|
||||
DISPLAYLEVEL(2, "! Warning : nb of samples too low for proper processing ! \n");
|
||||
DISPLAYLEVEL(2, "! Please provide _one file per sample_. \n");
|
||||
DISPLAYLEVEL(2, "! Alternatively, split files into fixed-size blocks representative of samples, with -B# \n");
|
||||
EXM_THROW(14, "nb of samples too low"); /* we now clearly forbid this case */
|
||||
}
|
||||
if (fs.totalSizeToLoad < (unsigned long long)(8 * maxDictSize)) {
|
||||
DISPLAYLEVEL(2, "! Warning : data size of samples too small for target dictionary size \n");
|
||||
DISPLAYLEVEL(2, "! Samples should be about 100x larger than target dictionary size \n");
|
||||
}
|
||||
|
||||
/* init */
|
||||
if (loadedSize < fs.totalSizeToLoad)
|
||||
DISPLAYLEVEL(1, "Not enough memory; training on %u MB only...\n", (unsigned)(loadedSize >> 20));
|
||||
|
||||
/* Load input buffer */
|
||||
DISPLAYLEVEL(3, "Shuffling input files\n");
|
||||
shuffle(fileNamesTable, nbFiles);
|
||||
nbFiles = loadFiles(srcBuffer, &loadedSize, sampleSizes, fs.nbSamples,
|
||||
fileNamesTable, nbFiles, chunkSize, displayLevel);
|
||||
|
||||
sampleInfo *info = (sampleInfo *)malloc(sizeof(sampleInfo));
|
||||
|
||||
info->nbSamples = fs.nbSamples;
|
||||
info->samplesSizes = sampleSizes;
|
||||
info->srcBuffer = srcBuffer;
|
||||
|
||||
return info;
|
||||
}
|
||||
|
||||
|
||||
void freeSampleInfo(sampleInfo *info) {
|
||||
if (!info) return;
|
||||
if (info->samplesSizes) free((void*)(info->samplesSizes));
|
||||
if (info->srcBuffer) free((void*)(info->srcBuffer));
|
||||
free(info);
|
||||
}
|
60
contrib/experimental_dict_builders/randomDictBuilder/io.h
Normal file
60
contrib/experimental_dict_builders/randomDictBuilder/io.h
Normal file
@ -0,0 +1,60 @@
|
||||
#include <stdio.h> /* fprintf */
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* strcmp, strlen */
|
||||
#include <errno.h> /* errno */
|
||||
#include <ctype.h>
|
||||
#include "zstd_internal.h" /* includes zstd.h */
|
||||
#include "fileio.h" /* stdinmark, stdoutmark, ZSTD_EXTENSION */
|
||||
#include "platform.h" /* Large Files support */
|
||||
#include "util.h"
|
||||
#include "zdict.h"
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Structs
|
||||
***************************************/
|
||||
typedef struct {
|
||||
U64 totalSizeToLoad;
|
||||
unsigned oneSampleTooLarge;
|
||||
unsigned nbSamples;
|
||||
} fileStats;
|
||||
|
||||
typedef struct {
|
||||
const void* srcBuffer;
|
||||
const size_t *samplesSizes;
|
||||
size_t nbSamples;
|
||||
}sampleInfo;
|
||||
|
||||
|
||||
|
||||
/*! getSampleInfo():
|
||||
* Load from input files and add samples to buffer
|
||||
* @return: a sampleInfo struct containing infomation about buffer where samples are stored,
|
||||
* size of each sample, and total number of samples
|
||||
*/
|
||||
sampleInfo* getSampleInfo(const char** fileNamesTable, unsigned nbFiles, size_t chunkSize,
|
||||
unsigned maxDictSize, const unsigned displayLevel);
|
||||
|
||||
|
||||
|
||||
/*! freeSampleInfo():
|
||||
* Free memory allocated for info
|
||||
*/
|
||||
void freeSampleInfo(sampleInfo *info);
|
||||
|
||||
|
||||
|
||||
/*! saveDict():
|
||||
* Save data stored on buff to dictFileName
|
||||
*/
|
||||
void saveDict(const char* dictFileName, const void* buff, size_t buffSize);
|
||||
|
||||
|
||||
unsigned readU32FromChar(const char** stringPtr);
|
||||
|
||||
/** longCommandWArg() :
|
||||
* check if *stringPtr is the same as longCommand.
|
||||
* If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
|
||||
* @return 0 and doesn't modify *stringPtr otherwise.
|
||||
*/
|
||||
unsigned longCommandWArg(const char** stringPtr, const char* longCommand);
|
161
contrib/experimental_dict_builders/randomDictBuilder/main.c
Normal file
161
contrib/experimental_dict_builders/randomDictBuilder/main.c
Normal file
@ -0,0 +1,161 @@
|
||||
#include <stdio.h> /* fprintf */
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* strcmp, strlen */
|
||||
#include <errno.h> /* errno */
|
||||
#include <ctype.h>
|
||||
#include "random.h"
|
||||
#include "io.h"
|
||||
#include "util.h"
|
||||
#include "zdict.h"
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Console display
|
||||
***************************************/
|
||||
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
|
||||
#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
|
||||
|
||||
static const U64 g_refreshRate = SEC_TO_MICRO / 6;
|
||||
static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
|
||||
|
||||
#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
|
||||
if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
|
||||
{ g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
|
||||
if (displayLevel>=4) fflush(stderr); } } }
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Exceptions
|
||||
***************************************/
|
||||
#ifndef DEBUG
|
||||
# define DEBUG 0
|
||||
#endif
|
||||
#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
|
||||
#define EXM_THROW(error, ...) \
|
||||
{ \
|
||||
DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
|
||||
DISPLAY("Error %i : ", error); \
|
||||
DISPLAY(__VA_ARGS__); \
|
||||
DISPLAY("\n"); \
|
||||
exit(error); \
|
||||
}
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
static const unsigned g_defaultMaxDictSize = 110 KB;
|
||||
#define DEFAULT_CLEVEL 3
|
||||
#define DEFAULT_k 200
|
||||
#define DEFAULT_OUTPUTFILE "defaultDict"
|
||||
#define DEFAULT_DICTID 0
|
||||
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* RANDOM
|
||||
***************************************/
|
||||
int RANDOM_trainFromFiles(const char* dictFileName, sampleInfo *info,
|
||||
unsigned maxDictSize,
|
||||
ZDICT_random_params_t *params) {
|
||||
unsigned const displayLevel = params->zParams.notificationLevel;
|
||||
void* const dictBuffer = malloc(maxDictSize);
|
||||
|
||||
int result = 0;
|
||||
|
||||
/* Checks */
|
||||
if (!dictBuffer)
|
||||
EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */
|
||||
|
||||
{ size_t dictSize;
|
||||
dictSize = ZDICT_trainFromBuffer_random(dictBuffer, maxDictSize, info->srcBuffer,
|
||||
info->samplesSizes, info->nbSamples, *params);
|
||||
DISPLAYLEVEL(2, "k=%u\n", params->k);
|
||||
if (ZDICT_isError(dictSize)) {
|
||||
DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
|
||||
result = 1;
|
||||
goto _done;
|
||||
}
|
||||
/* save dict */
|
||||
DISPLAYLEVEL(2, "Save dictionary of size %u into file %s \n", (U32)dictSize, dictFileName);
|
||||
saveDict(dictFileName, dictBuffer, dictSize);
|
||||
}
|
||||
|
||||
/* clean up */
|
||||
_done:
|
||||
free(dictBuffer);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int main(int argCount, const char* argv[])
|
||||
{
|
||||
int displayLevel = 2;
|
||||
const char* programName = argv[0];
|
||||
int operationResult = 0;
|
||||
|
||||
/* Initialize arguments to default values */
|
||||
unsigned k = DEFAULT_k;
|
||||
const char* outputFile = DEFAULT_OUTPUTFILE;
|
||||
unsigned dictID = DEFAULT_DICTID;
|
||||
unsigned maxDictSize = g_defaultMaxDictSize;
|
||||
|
||||
/* Initialize table to store input files */
|
||||
const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*));
|
||||
unsigned filenameIdx = 0;
|
||||
|
||||
/* Parse arguments */
|
||||
for (int i = 1; i < argCount; i++) {
|
||||
const char* argument = argv[i];
|
||||
if (longCommandWArg(&argument, "k=")) { k = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "dictID=")) { dictID = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "in=")) {
|
||||
filenameTable[filenameIdx] = argument;
|
||||
filenameIdx++;
|
||||
continue;
|
||||
}
|
||||
if (longCommandWArg(&argument, "out=")) {
|
||||
outputFile = argument;
|
||||
continue;
|
||||
}
|
||||
DISPLAYLEVEL(1, "Incorrect parameters\n");
|
||||
operationResult = 1;
|
||||
return operationResult;
|
||||
}
|
||||
|
||||
char* fileNamesBuf = NULL;
|
||||
unsigned fileNamesNb = filenameIdx;
|
||||
int followLinks = 0; /* follow directory recursively */
|
||||
const char** extendedFileList = NULL;
|
||||
extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf,
|
||||
&fileNamesNb, followLinks);
|
||||
if (extendedFileList) {
|
||||
unsigned u;
|
||||
for (u=0; u<fileNamesNb; u++) DISPLAYLEVEL(4, "%u %s\n", u, extendedFileList[u]);
|
||||
free((void*)filenameTable);
|
||||
filenameTable = extendedFileList;
|
||||
filenameIdx = fileNamesNb;
|
||||
}
|
||||
|
||||
size_t blockSize = 0;
|
||||
|
||||
ZDICT_random_params_t params;
|
||||
ZDICT_params_t zParams;
|
||||
zParams.compressionLevel = DEFAULT_CLEVEL;
|
||||
zParams.notificationLevel = displayLevel;
|
||||
zParams.dictID = dictID;
|
||||
params.zParams = zParams;
|
||||
params.k = k;
|
||||
|
||||
sampleInfo* info = getSampleInfo(filenameTable,
|
||||
filenameIdx, blockSize, maxDictSize, zParams.notificationLevel);
|
||||
operationResult = RANDOM_trainFromFiles(outputFile, info, maxDictSize, ¶ms);
|
||||
|
||||
/* Free allocated memory */
|
||||
UTIL_freeFileList(extendedFileList, fileNamesBuf);
|
||||
freeSampleInfo(info);
|
||||
|
||||
return operationResult;
|
||||
}
|
163
contrib/experimental_dict_builders/randomDictBuilder/random.c
Normal file
163
contrib/experimental_dict_builders/randomDictBuilder/random.c
Normal file
@ -0,0 +1,163 @@
|
||||
/*-*************************************
|
||||
* Dependencies
|
||||
***************************************/
|
||||
#include <stdio.h> /* fprintf */
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* memset */
|
||||
#include <time.h> /* clock */
|
||||
#include "random.h"
|
||||
#include "util.h" /* UTIL_getFileSize, UTIL_getTotalFileSize */
|
||||
#ifndef ZDICT_STATIC_LINKING_ONLY
|
||||
#define ZDICT_STATIC_LINKING_ONLY
|
||||
#endif
|
||||
#include "zdict.h"
|
||||
|
||||
/*-*************************************
|
||||
* Console display
|
||||
***************************************/
|
||||
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
|
||||
#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
|
||||
|
||||
#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
|
||||
if (displayLevel >= l) { \
|
||||
if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
|
||||
g_time = clock(); \
|
||||
DISPLAY(__VA_ARGS__); \
|
||||
} \
|
||||
}
|
||||
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(displayLevel, l, __VA_ARGS__)
|
||||
static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
||||
static clock_t g_time = 0;
|
||||
|
||||
|
||||
|
||||
/* ********************************************************
|
||||
* Random Dictionary Builder
|
||||
**********************************************************/
|
||||
/**
|
||||
* Returns the sum of the sample sizes.
|
||||
*/
|
||||
static size_t RANDOM_sum(const size_t *samplesSizes, unsigned nbSamples) {
|
||||
size_t sum = 0;
|
||||
unsigned i;
|
||||
for (i = 0; i < nbSamples; ++i) {
|
||||
sum += samplesSizes[i];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* A segment is an inclusive range in the source.
|
||||
*/
|
||||
typedef struct {
|
||||
U32 begin;
|
||||
U32 end;
|
||||
} RANDOM_segment_t;
|
||||
|
||||
|
||||
/**
|
||||
* Selects a random segment from totalSamplesSize - k + 1 possible segments
|
||||
*/
|
||||
static RANDOM_segment_t RANDOM_selectSegment(const size_t totalSamplesSize,
|
||||
ZDICT_random_params_t parameters) {
|
||||
const U32 k = parameters.k;
|
||||
RANDOM_segment_t segment;
|
||||
unsigned index;
|
||||
|
||||
/* Randomly generate a number from 0 to sampleSizes - k */
|
||||
index = rand()%(totalSamplesSize - k + 1);
|
||||
|
||||
/* inclusive */
|
||||
segment.begin = index;
|
||||
segment.end = index + k - 1;
|
||||
|
||||
return segment;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Check the validity of the parameters.
|
||||
* Returns non-zero if the parameters are valid and 0 otherwise.
|
||||
*/
|
||||
static int RANDOM_checkParameters(ZDICT_random_params_t parameters,
|
||||
size_t maxDictSize) {
|
||||
/* k is a required parameter */
|
||||
if (parameters.k == 0) {
|
||||
return 0;
|
||||
}
|
||||
/* k <= maxDictSize */
|
||||
if (parameters.k > maxDictSize) {
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Given the prepared context build the dictionary.
|
||||
*/
|
||||
static size_t RANDOM_buildDictionary(const size_t totalSamplesSize, const BYTE *samples,
|
||||
void *dictBuffer, size_t dictBufferCapacity,
|
||||
ZDICT_random_params_t parameters) {
|
||||
BYTE *const dict = (BYTE *)dictBuffer;
|
||||
size_t tail = dictBufferCapacity;
|
||||
const int displayLevel = parameters.zParams.notificationLevel;
|
||||
while (tail > 0) {
|
||||
|
||||
/* Select a segment */
|
||||
RANDOM_segment_t segment = RANDOM_selectSegment(totalSamplesSize, parameters);
|
||||
|
||||
size_t segmentSize;
|
||||
segmentSize = MIN(segment.end - segment.begin + 1, tail);
|
||||
|
||||
tail -= segmentSize;
|
||||
memcpy(dict + tail, samples + segment.begin, segmentSize);
|
||||
DISPLAYUPDATE(
|
||||
2, "\r%u%% ",
|
||||
(U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
|
||||
}
|
||||
|
||||
return tail;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
ZDICTLIB_API size_t ZDICT_trainFromBuffer_random(
|
||||
void *dictBuffer, size_t dictBufferCapacity,
|
||||
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
|
||||
ZDICT_random_params_t parameters) {
|
||||
const int displayLevel = parameters.zParams.notificationLevel;
|
||||
BYTE* const dict = (BYTE*)dictBuffer;
|
||||
/* Checks */
|
||||
if (!RANDOM_checkParameters(parameters, dictBufferCapacity)) {
|
||||
DISPLAYLEVEL(1, "k is incorrect\n");
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
if (nbSamples == 0) {
|
||||
DISPLAYLEVEL(1, "Random must have at least one input file\n");
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
|
||||
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
|
||||
ZDICT_DICTSIZE_MIN);
|
||||
return ERROR(dstSize_tooSmall);
|
||||
}
|
||||
const size_t totalSamplesSize = RANDOM_sum(samplesSizes, nbSamples);
|
||||
const BYTE *const samples = (const BYTE *)samplesBuffer;
|
||||
|
||||
DISPLAYLEVEL(2, "Building dictionary\n");
|
||||
{
|
||||
const size_t tail = RANDOM_buildDictionary(totalSamplesSize, samples,
|
||||
dictBuffer, dictBufferCapacity, parameters);
|
||||
const size_t dictSize = ZDICT_finalizeDictionary(
|
||||
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
|
||||
samplesBuffer, samplesSizes, nbSamples, parameters.zParams);
|
||||
if (!ZSTD_isError(dictSize)) {
|
||||
DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
|
||||
(U32)dictSize);
|
||||
}
|
||||
return dictSize;
|
||||
}
|
||||
}
|
@ -0,0 +1,29 @@
|
||||
#include <stdio.h> /* fprintf */
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* memset */
|
||||
#include <time.h> /* clock */
|
||||
#include "zstd_internal.h" /* includes zstd.h */
|
||||
#ifndef ZDICT_STATIC_LINKING_ONLY
|
||||
#define ZDICT_STATIC_LINKING_ONLY
|
||||
#endif
|
||||
#include "zdict.h"
|
||||
|
||||
|
||||
|
||||
typedef struct {
|
||||
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+]; Default to 200 */
|
||||
ZDICT_params_t zParams;
|
||||
} ZDICT_random_params_t;
|
||||
|
||||
|
||||
/*! ZDICT_trainFromBuffer_random():
|
||||
* Train a dictionary from an array of samples.
|
||||
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
||||
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
||||
* The resulting dictionary will be saved into `dictBuffer`.
|
||||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
* or an error code, which can be tested with ZDICT_isError().
|
||||
*/
|
||||
ZDICTLIB_API size_t ZDICT_trainFromBuffer_random( void *dictBuffer, size_t dictBufferCapacity,
|
||||
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
|
||||
ZDICT_random_params_t parameters);
|
14
contrib/experimental_dict_builders/randomDictBuilder/test.sh
Executable file
14
contrib/experimental_dict_builders/randomDictBuilder/test.sh
Executable file
@ -0,0 +1,14 @@
|
||||
echo "Building random dictionary with in=../../lib/common k=200 out=dict1"
|
||||
./main in=../../../lib/common k=200 out=dict1
|
||||
zstd -be3 -D dict1 -r ../../../lib/common -q
|
||||
echo "Building random dictionary with in=../../lib/common k=500 out=dict2 dictID=100 maxdict=140000"
|
||||
./main in=../../../lib/common k=500 out=dict2 dictID=100 maxdict=140000
|
||||
zstd -be3 -D dict2 -r ../../../lib/common -q
|
||||
echo "Building random dictionary with 2 sample sources"
|
||||
./main in=../../../lib/common in=../../../lib/compress out=dict3
|
||||
zstd -be3 -D dict3 -r ../../../lib/common -q
|
||||
echo "Removing dict1 dict2 dict3"
|
||||
rm -f dict1 dict2 dict3
|
||||
|
||||
echo "Testing with invalid parameters, should fail"
|
||||
! ./main r=10
|
58
contrib/largeNbDicts/Makefile
Normal file
58
contrib/largeNbDicts/Makefile
Normal file
@ -0,0 +1,58 @@
|
||||
# ################################################################
|
||||
# Copyright (c) 2018-present, Yann Collet, Facebook, Inc.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under both the BSD-style license (found in the
|
||||
# LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
# in the COPYING file in the root directory of this source tree).
|
||||
# ################################################################
|
||||
|
||||
PROGDIR = ../../programs
|
||||
LIBDIR = ../../lib
|
||||
|
||||
LIBZSTD = $(LIBDIR)/libzstd.a
|
||||
|
||||
CPPFLAGS+= -I$(LIBDIR) -I$(LIBDIR)/common -I$(LIBDIR)/dictBuilder -I$(PROGDIR)
|
||||
|
||||
CFLAGS ?= -O3
|
||||
CFLAGS += -std=gnu99
|
||||
DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
|
||||
-Wstrict-aliasing=1 -Wswitch-enum \
|
||||
-Wstrict-prototypes -Wundef -Wpointer-arith \
|
||||
-Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
|
||||
-Wredundant-decls
|
||||
CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS)
|
||||
|
||||
|
||||
default: largeNbDicts
|
||||
|
||||
all : largeNbDicts
|
||||
|
||||
largeNbDicts: util.o timefn.o benchfn.o datagen.o xxhash.o largeNbDicts.c $(LIBZSTD)
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
.PHONY: $(LIBZSTD)
|
||||
$(LIBZSTD):
|
||||
$(MAKE) -C $(LIBDIR) libzstd.a CFLAGS="$(CFLAGS)"
|
||||
|
||||
benchfn.o: $(PROGDIR)/benchfn.c
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $^ -c
|
||||
|
||||
timefn.o: $(PROGDIR)/timefn.c
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $^ -c
|
||||
|
||||
datagen.o: $(PROGDIR)/datagen.c
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $^ -c
|
||||
|
||||
util.o: $(PROGDIR)/util.c
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $^ -c
|
||||
|
||||
|
||||
xxhash.o : $(LIBDIR)/common/xxhash.c
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $^ -c
|
||||
|
||||
|
||||
clean:
|
||||
$(RM) *.o
|
||||
$(MAKE) -C $(LIBDIR) clean > /dev/null
|
||||
$(RM) largeNbDicts
|
25
contrib/largeNbDicts/README.md
Normal file
25
contrib/largeNbDicts/README.md
Normal file
@ -0,0 +1,25 @@
|
||||
largeNbDicts
|
||||
=====================
|
||||
|
||||
`largeNbDicts` is a benchmark test tool
|
||||
dedicated to the specific scenario of
|
||||
dictionary decompression using a very large number of dictionaries.
|
||||
When dictionaries are constantly changing, they are always "cold",
|
||||
suffering from increased latency due to cache misses.
|
||||
|
||||
The tool is created in a bid to investigate performance for this scenario,
|
||||
and experiment mitigation techniques.
|
||||
|
||||
Command line :
|
||||
```
|
||||
largeNbDicts [Options] filename(s)
|
||||
|
||||
Options :
|
||||
-r : recursively load all files in subdirectories (default: off)
|
||||
-B# : split input into blocks of size # (default: no split)
|
||||
-# : use compression level # (default: 3)
|
||||
-D # : use # as a dictionary (default: create one)
|
||||
-i# : nb benchmark rounds (default: 6)
|
||||
--nbDicts=# : set nb of dictionaries to # (default: one per block)
|
||||
-h : help (this text)
|
||||
```
|
817
contrib/largeNbDicts/largeNbDicts.c
Normal file
817
contrib/largeNbDicts/largeNbDicts.c
Normal file
@ -0,0 +1,817 @@
|
||||
/*
|
||||
* Copyright (c) 2018-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
/* largeNbDicts
|
||||
* This is a benchmark test tool
|
||||
* dedicated to the specific case of dictionary decompression
|
||||
* using a very large nb of dictionaries
|
||||
* thus suffering latency from lots of cache misses.
|
||||
* It's created in a bid to investigate performance and find optimizations. */
|
||||
|
||||
|
||||
/*--- Dependencies ---*/
|
||||
|
||||
#include <stddef.h> /* size_t */
|
||||
#include <stdlib.h> /* malloc, free, abort */
|
||||
#include <stdio.h> /* fprintf */
|
||||
#include <limits.h> /* UINT_MAX */
|
||||
#include <assert.h> /* assert */
|
||||
|
||||
#include "util.h"
|
||||
#include "benchfn.h"
|
||||
#define ZSTD_STATIC_LINKING_ONLY
|
||||
#include "zstd.h"
|
||||
#include "zdict.h"
|
||||
|
||||
|
||||
/*--- Constants --- */
|
||||
|
||||
#define KB *(1<<10)
|
||||
#define MB *(1<<20)
|
||||
|
||||
#define BLOCKSIZE_DEFAULT 0 /* no slicing into blocks */
|
||||
#define DICTSIZE (4 KB)
|
||||
#define CLEVEL_DEFAULT 3
|
||||
|
||||
#define BENCH_TIME_DEFAULT_S 6
|
||||
#define RUN_TIME_DEFAULT_MS 1000
|
||||
#define BENCH_TIME_DEFAULT_MS (BENCH_TIME_DEFAULT_S * RUN_TIME_DEFAULT_MS)
|
||||
|
||||
#define DISPLAY_LEVEL_DEFAULT 3
|
||||
|
||||
#define BENCH_SIZE_MAX (1200 MB)
|
||||
|
||||
|
||||
/*--- Macros ---*/
|
||||
|
||||
#define CONTROL(c) { if (!(c)) abort(); }
|
||||
#undef MIN
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
|
||||
|
||||
/*--- Display Macros ---*/
|
||||
|
||||
#define DISPLAY(...) fprintf(stdout, __VA_ARGS__)
|
||||
#define DISPLAYLEVEL(l, ...) { if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } }
|
||||
static int g_displayLevel = DISPLAY_LEVEL_DEFAULT; /* 0 : no display, 1: errors, 2 : + result + interaction + warnings, 3 : + progression, 4 : + information */
|
||||
|
||||
|
||||
/*--- buffer_t ---*/
|
||||
|
||||
typedef struct {
|
||||
void* ptr;
|
||||
size_t size;
|
||||
size_t capacity;
|
||||
} buffer_t;
|
||||
|
||||
static const buffer_t kBuffNull = { NULL, 0, 0 };
|
||||
|
||||
/* @return : kBuffNull if any error */
|
||||
static buffer_t createBuffer(size_t capacity)
|
||||
{
|
||||
assert(capacity > 0);
|
||||
void* const ptr = malloc(capacity);
|
||||
if (ptr==NULL) return kBuffNull;
|
||||
|
||||
buffer_t buffer;
|
||||
buffer.ptr = ptr;
|
||||
buffer.capacity = capacity;
|
||||
buffer.size = 0;
|
||||
return buffer;
|
||||
}
|
||||
|
||||
static void freeBuffer(buffer_t buff)
|
||||
{
|
||||
free(buff.ptr);
|
||||
}
|
||||
|
||||
|
||||
static void fillBuffer_fromHandle(buffer_t* buff, FILE* f)
|
||||
{
|
||||
size_t const readSize = fread(buff->ptr, 1, buff->capacity, f);
|
||||
buff->size = readSize;
|
||||
}
|
||||
|
||||
|
||||
/* @return : kBuffNull if any error */
|
||||
static buffer_t createBuffer_fromFile(const char* fileName)
|
||||
{
|
||||
U64 const fileSize = UTIL_getFileSize(fileName);
|
||||
size_t const bufferSize = (size_t) fileSize;
|
||||
|
||||
if (fileSize == UTIL_FILESIZE_UNKNOWN) return kBuffNull;
|
||||
assert((U64)bufferSize == fileSize); /* check overflow */
|
||||
|
||||
{ FILE* const f = fopen(fileName, "rb");
|
||||
if (f == NULL) return kBuffNull;
|
||||
|
||||
buffer_t buff = createBuffer(bufferSize);
|
||||
CONTROL(buff.ptr != NULL);
|
||||
|
||||
fillBuffer_fromHandle(&buff, f);
|
||||
CONTROL(buff.size == buff.capacity);
|
||||
|
||||
fclose(f); /* do nothing specific if fclose() fails */
|
||||
return buff;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* @return : kBuffNull if any error */
|
||||
static buffer_t
|
||||
createDictionaryBuffer(const char* dictionaryName,
|
||||
const void* srcBuffer,
|
||||
const size_t* srcBlockSizes, size_t nbBlocks,
|
||||
size_t requestedDictSize)
|
||||
{
|
||||
if (dictionaryName) {
|
||||
DISPLAYLEVEL(3, "loading dictionary %s \n", dictionaryName);
|
||||
return createBuffer_fromFile(dictionaryName); /* note : result might be kBuffNull */
|
||||
|
||||
} else {
|
||||
|
||||
DISPLAYLEVEL(3, "creating dictionary, of target size %u bytes \n",
|
||||
(unsigned)requestedDictSize);
|
||||
void* const dictBuffer = malloc(requestedDictSize);
|
||||
CONTROL(dictBuffer != NULL);
|
||||
|
||||
assert(nbBlocks <= UINT_MAX);
|
||||
size_t const dictSize = ZDICT_trainFromBuffer(dictBuffer, requestedDictSize,
|
||||
srcBuffer,
|
||||
srcBlockSizes, (unsigned)nbBlocks);
|
||||
CONTROL(!ZSTD_isError(dictSize));
|
||||
|
||||
buffer_t result;
|
||||
result.ptr = dictBuffer;
|
||||
result.capacity = requestedDictSize;
|
||||
result.size = dictSize;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*! BMK_loadFiles() :
|
||||
* Loads `buffer`, with content from files listed within `fileNamesTable`.
|
||||
* Fills `buffer` entirely.
|
||||
* @return : 0 on success, !=0 on error */
|
||||
static int loadFiles(void* buffer, size_t bufferSize,
|
||||
size_t* fileSizes,
|
||||
const char* const * fileNamesTable, unsigned nbFiles)
|
||||
{
|
||||
size_t pos = 0, totalSize = 0;
|
||||
|
||||
for (unsigned n=0; n<nbFiles; n++) {
|
||||
U64 fileSize = UTIL_getFileSize(fileNamesTable[n]);
|
||||
if (UTIL_isDirectory(fileNamesTable[n])) {
|
||||
fileSizes[n] = 0;
|
||||
continue;
|
||||
}
|
||||
if (fileSize == UTIL_FILESIZE_UNKNOWN) {
|
||||
fileSizes[n] = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
FILE* const f = fopen(fileNamesTable[n], "rb");
|
||||
assert(f!=NULL);
|
||||
|
||||
assert(pos <= bufferSize);
|
||||
assert(fileSize <= bufferSize - pos);
|
||||
|
||||
{ size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
|
||||
assert(readSize == fileSize);
|
||||
pos += readSize;
|
||||
}
|
||||
fileSizes[n] = (size_t)fileSize;
|
||||
totalSize += (size_t)fileSize;
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
assert(totalSize == bufferSize);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*--- slice_collection_t ---*/
|
||||
|
||||
typedef struct {
|
||||
void** slicePtrs;
|
||||
size_t* capacities;
|
||||
size_t nbSlices;
|
||||
} slice_collection_t;
|
||||
|
||||
static const slice_collection_t kNullCollection = { NULL, NULL, 0 };
|
||||
|
||||
static void freeSliceCollection(slice_collection_t collection)
|
||||
{
|
||||
free(collection.slicePtrs);
|
||||
free(collection.capacities);
|
||||
}
|
||||
|
||||
/* shrinkSizes() :
|
||||
* downsizes sizes of slices within collection, according to `newSizes`.
|
||||
* every `newSizes` entry must be <= than its corresponding collection size */
|
||||
void shrinkSizes(slice_collection_t collection,
|
||||
const size_t* newSizes) /* presumed same size as collection */
|
||||
{
|
||||
size_t const nbSlices = collection.nbSlices;
|
||||
for (size_t blockNb = 0; blockNb < nbSlices; blockNb++) {
|
||||
assert(newSizes[blockNb] <= collection.capacities[blockNb]);
|
||||
collection.capacities[blockNb] = newSizes[blockNb];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* splitSlices() :
|
||||
* nbSlices : if == 0, nbSlices is automatically determined from srcSlices and blockSize.
|
||||
* otherwise, creates exactly nbSlices slices,
|
||||
* by either truncating input (when smaller)
|
||||
* or repeating input from beginning */
|
||||
static slice_collection_t
|
||||
splitSlices(slice_collection_t srcSlices, size_t blockSize, size_t nbSlices)
|
||||
{
|
||||
if (blockSize==0) blockSize = (size_t)(-1); /* means "do not cut" */
|
||||
size_t nbSrcBlocks = 0;
|
||||
for (size_t ssnb=0; ssnb < srcSlices.nbSlices; ssnb++) {
|
||||
size_t pos = 0;
|
||||
while (pos <= srcSlices.capacities[ssnb]) {
|
||||
nbSrcBlocks++;
|
||||
pos += blockSize;
|
||||
}
|
||||
}
|
||||
|
||||
if (nbSlices == 0) nbSlices = nbSrcBlocks;
|
||||
|
||||
void** const sliceTable = (void**)malloc(nbSlices * sizeof(*sliceTable));
|
||||
size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
|
||||
if (sliceTable == NULL || capacities == NULL) {
|
||||
free(sliceTable);
|
||||
free(capacities);
|
||||
return kNullCollection;
|
||||
}
|
||||
|
||||
size_t ssnb = 0;
|
||||
for (size_t sliceNb=0; sliceNb < nbSlices; ) {
|
||||
ssnb = (ssnb + 1) % srcSlices.nbSlices;
|
||||
size_t pos = 0;
|
||||
char* const ptr = (char*)srcSlices.slicePtrs[ssnb];
|
||||
while (pos < srcSlices.capacities[ssnb] && sliceNb < nbSlices) {
|
||||
size_t const size = MIN(blockSize, srcSlices.capacities[ssnb] - pos);
|
||||
sliceTable[sliceNb] = ptr + pos;
|
||||
capacities[sliceNb] = size;
|
||||
sliceNb++;
|
||||
pos += blockSize;
|
||||
}
|
||||
}
|
||||
|
||||
slice_collection_t result;
|
||||
result.nbSlices = nbSlices;
|
||||
result.slicePtrs = sliceTable;
|
||||
result.capacities = capacities;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
static size_t sliceCollection_totalCapacity(slice_collection_t sc)
|
||||
{
|
||||
size_t totalSize = 0;
|
||||
for (size_t n=0; n<sc.nbSlices; n++)
|
||||
totalSize += sc.capacities[n];
|
||||
return totalSize;
|
||||
}
|
||||
|
||||
|
||||
/* --- buffer collection --- */
|
||||
|
||||
typedef struct {
|
||||
buffer_t buffer;
|
||||
slice_collection_t slices;
|
||||
} buffer_collection_t;
|
||||
|
||||
|
||||
static void freeBufferCollection(buffer_collection_t bc)
|
||||
{
|
||||
freeBuffer(bc.buffer);
|
||||
freeSliceCollection(bc.slices);
|
||||
}
|
||||
|
||||
|
||||
static buffer_collection_t
|
||||
createBufferCollection_fromSliceCollectionSizes(slice_collection_t sc)
|
||||
{
|
||||
size_t const bufferSize = sliceCollection_totalCapacity(sc);
|
||||
|
||||
buffer_t buffer = createBuffer(bufferSize);
|
||||
CONTROL(buffer.ptr != NULL);
|
||||
|
||||
size_t const nbSlices = sc.nbSlices;
|
||||
void** const slices = (void**)malloc(nbSlices * sizeof(*slices));
|
||||
CONTROL(slices != NULL);
|
||||
|
||||
size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
|
||||
CONTROL(capacities != NULL);
|
||||
|
||||
char* const ptr = (char*)buffer.ptr;
|
||||
size_t pos = 0;
|
||||
for (size_t n=0; n < nbSlices; n++) {
|
||||
capacities[n] = sc.capacities[n];
|
||||
slices[n] = ptr + pos;
|
||||
pos += capacities[n];
|
||||
}
|
||||
|
||||
buffer_collection_t result;
|
||||
result.buffer = buffer;
|
||||
result.slices.nbSlices = nbSlices;
|
||||
result.slices.capacities = capacities;
|
||||
result.slices.slicePtrs = slices;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/* @return : kBuffNull if any error */
|
||||
static buffer_collection_t
|
||||
createBufferCollection_fromFiles(const char* const * fileNamesTable, unsigned nbFiles)
|
||||
{
|
||||
U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles);
|
||||
assert(totalSizeToLoad != UTIL_FILESIZE_UNKNOWN);
|
||||
assert(totalSizeToLoad <= BENCH_SIZE_MAX);
|
||||
size_t const loadedSize = (size_t)totalSizeToLoad;
|
||||
assert(loadedSize > 0);
|
||||
void* const srcBuffer = malloc(loadedSize);
|
||||
assert(srcBuffer != NULL);
|
||||
|
||||
assert(nbFiles > 0);
|
||||
size_t* const fileSizes = (size_t*)calloc(nbFiles, sizeof(*fileSizes));
|
||||
assert(fileSizes != NULL);
|
||||
|
||||
/* Load input buffer */
|
||||
int const errorCode = loadFiles(srcBuffer, loadedSize,
|
||||
fileSizes,
|
||||
fileNamesTable, nbFiles);
|
||||
assert(errorCode == 0);
|
||||
|
||||
void** sliceTable = (void**)malloc(nbFiles * sizeof(*sliceTable));
|
||||
assert(sliceTable != NULL);
|
||||
|
||||
char* const ptr = (char*)srcBuffer;
|
||||
size_t pos = 0;
|
||||
unsigned fileNb = 0;
|
||||
for ( ; (pos < loadedSize) && (fileNb < nbFiles); fileNb++) {
|
||||
sliceTable[fileNb] = ptr + pos;
|
||||
pos += fileSizes[fileNb];
|
||||
}
|
||||
assert(pos == loadedSize);
|
||||
assert(fileNb == nbFiles);
|
||||
|
||||
|
||||
buffer_t buffer;
|
||||
buffer.ptr = srcBuffer;
|
||||
buffer.capacity = loadedSize;
|
||||
buffer.size = loadedSize;
|
||||
|
||||
slice_collection_t slices;
|
||||
slices.slicePtrs = sliceTable;
|
||||
slices.capacities = fileSizes;
|
||||
slices.nbSlices = nbFiles;
|
||||
|
||||
buffer_collection_t bc;
|
||||
bc.buffer = buffer;
|
||||
bc.slices = slices;
|
||||
return bc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*--- ddict_collection_t ---*/
|
||||
|
||||
typedef struct {
|
||||
ZSTD_DDict** ddicts;
|
||||
size_t nbDDict;
|
||||
} ddict_collection_t;
|
||||
|
||||
static const ddict_collection_t kNullDDictCollection = { NULL, 0 };
|
||||
|
||||
static void freeDDictCollection(ddict_collection_t ddictc)
|
||||
{
|
||||
for (size_t dictNb=0; dictNb < ddictc.nbDDict; dictNb++) {
|
||||
ZSTD_freeDDict(ddictc.ddicts[dictNb]);
|
||||
}
|
||||
free(ddictc.ddicts);
|
||||
}
|
||||
|
||||
/* returns .buffers=NULL if operation fails */
|
||||
static ddict_collection_t createDDictCollection(const void* dictBuffer, size_t dictSize, size_t nbDDict)
|
||||
{
|
||||
ZSTD_DDict** const ddicts = malloc(nbDDict * sizeof(ZSTD_DDict*));
|
||||
assert(ddicts != NULL);
|
||||
if (ddicts==NULL) return kNullDDictCollection;
|
||||
for (size_t dictNb=0; dictNb < nbDDict; dictNb++) {
|
||||
ddicts[dictNb] = ZSTD_createDDict(dictBuffer, dictSize);
|
||||
assert(ddicts[dictNb] != NULL);
|
||||
}
|
||||
ddict_collection_t ddictc;
|
||||
ddictc.ddicts = ddicts;
|
||||
ddictc.nbDDict = nbDDict;
|
||||
return ddictc;
|
||||
}
|
||||
|
||||
|
||||
/* mess with addresses, so that linear scanning dictionaries != linear address scanning */
|
||||
void shuffleDictionaries(ddict_collection_t dicts)
|
||||
{
|
||||
size_t const nbDicts = dicts.nbDDict;
|
||||
for (size_t r=0; r<nbDicts; r++) {
|
||||
size_t const d = rand() % nbDicts;
|
||||
ZSTD_DDict* tmpd = dicts.ddicts[d];
|
||||
dicts.ddicts[d] = dicts.ddicts[r];
|
||||
dicts.ddicts[r] = tmpd;
|
||||
}
|
||||
for (size_t r=0; r<nbDicts; r++) {
|
||||
size_t const d1 = rand() % nbDicts;
|
||||
size_t const d2 = rand() % nbDicts;
|
||||
ZSTD_DDict* tmpd = dicts.ddicts[d1];
|
||||
dicts.ddicts[d1] = dicts.ddicts[d2];
|
||||
dicts.ddicts[d2] = tmpd;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* --- Compression --- */
|
||||
|
||||
/* compressBlocks() :
|
||||
* @return : total compressed size of all blocks,
|
||||
* or 0 if error.
|
||||
*/
|
||||
static size_t compressBlocks(size_t* cSizes, /* optional (can be NULL). If present, must contain at least nbBlocks fields */
|
||||
slice_collection_t dstBlockBuffers,
|
||||
slice_collection_t srcBlockBuffers,
|
||||
ZSTD_CDict* cdict, int cLevel)
|
||||
{
|
||||
size_t const nbBlocks = srcBlockBuffers.nbSlices;
|
||||
assert(dstBlockBuffers.nbSlices == srcBlockBuffers.nbSlices);
|
||||
|
||||
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
|
||||
assert(cctx != NULL);
|
||||
|
||||
size_t totalCSize = 0;
|
||||
for (size_t blockNb=0; blockNb < nbBlocks; blockNb++) {
|
||||
size_t cBlockSize;
|
||||
if (cdict == NULL) {
|
||||
cBlockSize = ZSTD_compressCCtx(cctx,
|
||||
dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb],
|
||||
srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb],
|
||||
cLevel);
|
||||
} else {
|
||||
cBlockSize = ZSTD_compress_usingCDict(cctx,
|
||||
dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb],
|
||||
srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb],
|
||||
cdict);
|
||||
}
|
||||
CONTROL(!ZSTD_isError(cBlockSize));
|
||||
if (cSizes) cSizes[blockNb] = cBlockSize;
|
||||
totalCSize += cBlockSize;
|
||||
}
|
||||
return totalCSize;
|
||||
}
|
||||
|
||||
|
||||
/* --- Benchmark --- */
|
||||
|
||||
typedef struct {
|
||||
ZSTD_DCtx* dctx;
|
||||
size_t nbDicts;
|
||||
size_t dictNb;
|
||||
ddict_collection_t dictionaries;
|
||||
} decompressInstructions;
|
||||
|
||||
decompressInstructions createDecompressInstructions(ddict_collection_t dictionaries)
|
||||
{
|
||||
decompressInstructions di;
|
||||
di.dctx = ZSTD_createDCtx();
|
||||
assert(di.dctx != NULL);
|
||||
di.nbDicts = dictionaries.nbDDict;
|
||||
di.dictNb = 0;
|
||||
di.dictionaries = dictionaries;
|
||||
return di;
|
||||
}
|
||||
|
||||
void freeDecompressInstructions(decompressInstructions di)
|
||||
{
|
||||
ZSTD_freeDCtx(di.dctx);
|
||||
}
|
||||
|
||||
/* benched function */
|
||||
size_t decompress(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* payload)
|
||||
{
|
||||
decompressInstructions* const di = (decompressInstructions*) payload;
|
||||
|
||||
size_t const result = ZSTD_decompress_usingDDict(di->dctx,
|
||||
dst, dstCapacity,
|
||||
src, srcSize,
|
||||
di->dictionaries.ddicts[di->dictNb]);
|
||||
|
||||
di->dictNb = di->dictNb + 1;
|
||||
if (di->dictNb >= di->nbDicts) di->dictNb = 0;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
static int benchMem(slice_collection_t dstBlocks,
|
||||
slice_collection_t srcBlocks,
|
||||
ddict_collection_t dictionaries,
|
||||
int nbRounds)
|
||||
{
|
||||
assert(dstBlocks.nbSlices == srcBlocks.nbSlices);
|
||||
|
||||
unsigned const ms_per_round = RUN_TIME_DEFAULT_MS;
|
||||
unsigned const total_time_ms = nbRounds * ms_per_round;
|
||||
|
||||
double bestSpeed = 0.;
|
||||
|
||||
BMK_timedFnState_t* const benchState =
|
||||
BMK_createTimedFnState(total_time_ms, ms_per_round);
|
||||
decompressInstructions di = createDecompressInstructions(dictionaries);
|
||||
BMK_benchParams_t const bp = {
|
||||
.benchFn = decompress,
|
||||
.benchPayload = &di,
|
||||
.initFn = NULL,
|
||||
.initPayload = NULL,
|
||||
.errorFn = ZSTD_isError,
|
||||
.blockCount = dstBlocks.nbSlices,
|
||||
.srcBuffers = (const void* const*) srcBlocks.slicePtrs,
|
||||
.srcSizes = srcBlocks.capacities,
|
||||
.dstBuffers = dstBlocks.slicePtrs,
|
||||
.dstCapacities = dstBlocks.capacities,
|
||||
.blockResults = NULL
|
||||
};
|
||||
|
||||
for (;;) {
|
||||
BMK_runOutcome_t const outcome = BMK_benchTimedFn(benchState, bp);
|
||||
CONTROL(BMK_isSuccessful_runOutcome(outcome));
|
||||
|
||||
BMK_runTime_t const result = BMK_extract_runTime(outcome);
|
||||
U64 const dTime_ns = result.nanoSecPerRun;
|
||||
double const dTime_sec = (double)dTime_ns / 1000000000;
|
||||
size_t const srcSize = result.sumOfReturn;
|
||||
double const dSpeed_MBps = (double)srcSize / dTime_sec / (1 MB);
|
||||
if (dSpeed_MBps > bestSpeed) bestSpeed = dSpeed_MBps;
|
||||
DISPLAY("Decompression Speed : %.1f MB/s \r", bestSpeed);
|
||||
fflush(stdout);
|
||||
if (BMK_isCompleted_TimedFn(benchState)) break;
|
||||
}
|
||||
DISPLAY("\n");
|
||||
|
||||
freeDecompressInstructions(di);
|
||||
BMK_freeTimedFnState(benchState);
|
||||
|
||||
return 0; /* success */
|
||||
}
|
||||
|
||||
|
||||
/*! bench() :
|
||||
* fileName : file to load for benchmarking purpose
|
||||
* dictionary : optional (can be NULL), file to load as dictionary,
|
||||
* if none provided : will be calculated on the fly by the program.
|
||||
* @return : 0 is success, 1+ otherwise */
|
||||
int bench(const char** fileNameTable, unsigned nbFiles,
|
||||
const char* dictionary,
|
||||
size_t blockSize, int clevel,
|
||||
unsigned nbDictMax, unsigned nbBlocks,
|
||||
int nbRounds)
|
||||
{
|
||||
int result = 0;
|
||||
|
||||
DISPLAYLEVEL(3, "loading %u files... \n", nbFiles);
|
||||
buffer_collection_t const srcs = createBufferCollection_fromFiles(fileNameTable, nbFiles);
|
||||
CONTROL(srcs.buffer.ptr != NULL);
|
||||
buffer_t srcBuffer = srcs.buffer;
|
||||
size_t const srcSize = srcBuffer.size;
|
||||
DISPLAYLEVEL(3, "created src buffer of size %.1f MB \n",
|
||||
(double)srcSize / (1 MB));
|
||||
|
||||
slice_collection_t const srcSlices = splitSlices(srcs.slices, blockSize, nbBlocks);
|
||||
nbBlocks = (unsigned)(srcSlices.nbSlices);
|
||||
DISPLAYLEVEL(3, "split input into %u blocks ", nbBlocks);
|
||||
if (blockSize)
|
||||
DISPLAYLEVEL(3, "of max size %u bytes ", (unsigned)blockSize);
|
||||
DISPLAYLEVEL(3, "\n");
|
||||
size_t const totalSrcSlicesSize = sliceCollection_totalCapacity(srcSlices);
|
||||
|
||||
|
||||
size_t* const dstCapacities = malloc(nbBlocks * sizeof(*dstCapacities));
|
||||
CONTROL(dstCapacities != NULL);
|
||||
size_t dstBufferCapacity = 0;
|
||||
for (size_t bnb=0; bnb<nbBlocks; bnb++) {
|
||||
dstCapacities[bnb] = ZSTD_compressBound(srcSlices.capacities[bnb]);
|
||||
dstBufferCapacity += dstCapacities[bnb];
|
||||
}
|
||||
|
||||
buffer_t dstBuffer = createBuffer(dstBufferCapacity);
|
||||
CONTROL(dstBuffer.ptr != NULL);
|
||||
|
||||
void** const sliceTable = malloc(nbBlocks * sizeof(*sliceTable));
|
||||
CONTROL(sliceTable != NULL);
|
||||
|
||||
{ char* const ptr = dstBuffer.ptr;
|
||||
size_t pos = 0;
|
||||
for (size_t snb=0; snb < nbBlocks; snb++) {
|
||||
sliceTable[snb] = ptr + pos;
|
||||
pos += dstCapacities[snb];
|
||||
} }
|
||||
|
||||
slice_collection_t dstSlices;
|
||||
dstSlices.capacities = dstCapacities;
|
||||
dstSlices.slicePtrs = sliceTable;
|
||||
dstSlices.nbSlices = nbBlocks;
|
||||
|
||||
|
||||
/* dictionary determination */
|
||||
buffer_t const dictBuffer = createDictionaryBuffer(dictionary,
|
||||
srcs.buffer.ptr,
|
||||
srcs.slices.capacities, srcs.slices.nbSlices,
|
||||
DICTSIZE);
|
||||
CONTROL(dictBuffer.ptr != NULL);
|
||||
|
||||
ZSTD_CDict* const cdict = ZSTD_createCDict(dictBuffer.ptr, dictBuffer.size, clevel);
|
||||
CONTROL(cdict != NULL);
|
||||
|
||||
size_t const cTotalSizeNoDict = compressBlocks(NULL, dstSlices, srcSlices, NULL, clevel);
|
||||
CONTROL(cTotalSizeNoDict != 0);
|
||||
DISPLAYLEVEL(3, "compressing at level %u without dictionary : Ratio=%.2f (%u bytes) \n",
|
||||
clevel,
|
||||
(double)totalSrcSlicesSize / cTotalSizeNoDict, (unsigned)cTotalSizeNoDict);
|
||||
|
||||
size_t* const cSizes = malloc(nbBlocks * sizeof(size_t));
|
||||
CONTROL(cSizes != NULL);
|
||||
|
||||
size_t const cTotalSize = compressBlocks(cSizes, dstSlices, srcSlices, cdict, clevel);
|
||||
CONTROL(cTotalSize != 0);
|
||||
DISPLAYLEVEL(3, "compressed using a %u bytes dictionary : Ratio=%.2f (%u bytes) \n",
|
||||
(unsigned)dictBuffer.size,
|
||||
(double)totalSrcSlicesSize / cTotalSize, (unsigned)cTotalSize);
|
||||
|
||||
/* now dstSlices contain the real compressed size of each block, instead of the maximum capacity */
|
||||
shrinkSizes(dstSlices, cSizes);
|
||||
|
||||
size_t const dictMem = ZSTD_estimateDDictSize(dictBuffer.size, ZSTD_dlm_byCopy);
|
||||
unsigned const nbDicts = nbDictMax ? nbDictMax : nbBlocks;
|
||||
size_t const allDictMem = dictMem * nbDicts;
|
||||
DISPLAYLEVEL(3, "generating %u dictionaries, using %.1f MB of memory \n",
|
||||
nbDicts, (double)allDictMem / (1 MB));
|
||||
|
||||
ddict_collection_t const dictionaries = createDDictCollection(dictBuffer.ptr, dictBuffer.size, nbDicts);
|
||||
CONTROL(dictionaries.ddicts != NULL);
|
||||
|
||||
shuffleDictionaries(dictionaries);
|
||||
|
||||
buffer_collection_t resultCollection = createBufferCollection_fromSliceCollectionSizes(srcSlices);
|
||||
CONTROL(resultCollection.buffer.ptr != NULL);
|
||||
|
||||
result = benchMem(resultCollection.slices, dstSlices, dictionaries, nbRounds);
|
||||
|
||||
/* free all heap objects in reverse order */
|
||||
freeBufferCollection(resultCollection);
|
||||
freeDDictCollection(dictionaries);
|
||||
free(cSizes);
|
||||
ZSTD_freeCDict(cdict);
|
||||
freeBuffer(dictBuffer);
|
||||
freeSliceCollection(dstSlices);
|
||||
freeBuffer(dstBuffer);
|
||||
freeSliceCollection(srcSlices);
|
||||
freeBufferCollection(srcs);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* --- Command Line --- */
|
||||
|
||||
/*! readU32FromChar() :
|
||||
* @return : unsigned integer value read from input in `char` format.
|
||||
* allows and interprets K, KB, KiB, M, MB and MiB suffix.
|
||||
* Will also modify `*stringPtr`, advancing it to position where it stopped reading.
|
||||
* Note : function will exit() program if digit sequence overflows */
|
||||
static unsigned readU32FromChar(const char** stringPtr)
|
||||
{
|
||||
unsigned result = 0;
|
||||
while ((**stringPtr >='0') && (**stringPtr <='9')) {
|
||||
unsigned const max = (((unsigned)(-1)) / 10) - 1;
|
||||
assert(result <= max); /* check overflow */
|
||||
result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
|
||||
}
|
||||
if ((**stringPtr=='K') || (**stringPtr=='M')) {
|
||||
unsigned const maxK = ((unsigned)(-1)) >> 10;
|
||||
assert(result <= maxK); /* check overflow */
|
||||
result <<= 10;
|
||||
if (**stringPtr=='M') {
|
||||
assert(result <= maxK); /* check overflow */
|
||||
result <<= 10;
|
||||
}
|
||||
(*stringPtr)++; /* skip `K` or `M` */
|
||||
if (**stringPtr=='i') (*stringPtr)++;
|
||||
if (**stringPtr=='B') (*stringPtr)++;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/** longCommandWArg() :
|
||||
* check if *stringPtr is the same as longCommand.
|
||||
* If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
|
||||
* @return 0 and doesn't modify *stringPtr otherwise.
|
||||
*/
|
||||
static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
|
||||
{
|
||||
size_t const comSize = strlen(longCommand);
|
||||
int const result = !strncmp(*stringPtr, longCommand, comSize);
|
||||
if (result) *stringPtr += comSize;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
int usage(const char* exeName)
|
||||
{
|
||||
DISPLAY (" \n");
|
||||
DISPLAY (" %s [Options] filename(s) \n", exeName);
|
||||
DISPLAY (" \n");
|
||||
DISPLAY ("Options : \n");
|
||||
DISPLAY ("-r : recursively load all files in subdirectories (default: off) \n");
|
||||
DISPLAY ("-B# : split input into blocks of size # (default: no split) \n");
|
||||
DISPLAY ("-# : use compression level # (default: %u) \n", CLEVEL_DEFAULT);
|
||||
DISPLAY ("-D # : use # as a dictionary (default: create one) \n");
|
||||
DISPLAY ("-i# : nb benchmark rounds (default: %u) \n", BENCH_TIME_DEFAULT_S);
|
||||
DISPLAY ("--nbBlocks=#: use # blocks for bench (default: one per file) \n");
|
||||
DISPLAY ("--nbDicts=# : create # dictionaries for bench (default: one per block) \n");
|
||||
DISPLAY ("-h : help (this text) \n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bad_usage(const char* exeName)
|
||||
{
|
||||
DISPLAY (" bad usage : \n");
|
||||
usage(exeName);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int main (int argc, const char** argv)
|
||||
{
|
||||
int recursiveMode = 0;
|
||||
int nbRounds = BENCH_TIME_DEFAULT_S;
|
||||
const char* const exeName = argv[0];
|
||||
|
||||
if (argc < 2) return bad_usage(exeName);
|
||||
|
||||
const char** nameTable = (const char**)malloc(argc * sizeof(const char*));
|
||||
assert(nameTable != NULL);
|
||||
unsigned nameIdx = 0;
|
||||
|
||||
const char* dictionary = NULL;
|
||||
int cLevel = CLEVEL_DEFAULT;
|
||||
size_t blockSize = BLOCKSIZE_DEFAULT;
|
||||
unsigned nbDicts = 0; /* determine nbDicts automatically: 1 dictionary per block */
|
||||
unsigned nbBlocks = 0; /* determine nbBlocks automatically, from source and blockSize */
|
||||
|
||||
for (int argNb = 1; argNb < argc ; argNb++) {
|
||||
const char* argument = argv[argNb];
|
||||
if (!strcmp(argument, "-h")) { free(nameTable); return usage(exeName); }
|
||||
if (!strcmp(argument, "-r")) { recursiveMode = 1; continue; }
|
||||
if (!strcmp(argument, "-D")) { argNb++; assert(argNb < argc); dictionary = argv[argNb]; continue; }
|
||||
if (longCommandWArg(&argument, "-i")) { nbRounds = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--dictionary=")) { dictionary = argument; continue; }
|
||||
if (longCommandWArg(&argument, "-B")) { blockSize = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--blockSize=")) { blockSize = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--nbDicts=")) { nbDicts = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--nbBlocks=")) { nbBlocks = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--clevel=")) { cLevel = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "-")) { cLevel = readU32FromChar(&argument); continue; }
|
||||
/* anything that's not a command is a filename */
|
||||
nameTable[nameIdx++] = argument;
|
||||
}
|
||||
|
||||
const char** filenameTable = nameTable;
|
||||
unsigned nbFiles = nameIdx;
|
||||
char* buffer_containing_filenames = NULL;
|
||||
|
||||
if (recursiveMode) {
|
||||
#ifndef UTIL_HAS_CREATEFILELIST
|
||||
assert(0); /* missing capability, do not run */
|
||||
#endif
|
||||
filenameTable = UTIL_createFileList(nameTable, nameIdx, &buffer_containing_filenames, &nbFiles, 1 /* follow_links */);
|
||||
}
|
||||
|
||||
int result = bench(filenameTable, nbFiles, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds);
|
||||
|
||||
free(buffer_containing_filenames);
|
||||
free(nameTable);
|
||||
|
||||
return result;
|
||||
}
|
6
contrib/premake/premake4.lua
Normal file
6
contrib/premake/premake4.lua
Normal file
@ -0,0 +1,6 @@
|
||||
-- Include zstd.lua in your GENie or premake4 file, which exposes a project_zstd function
|
||||
dofile('zstd.lua')
|
||||
|
||||
solution 'example'
|
||||
configurations { 'Debug', 'Release' }
|
||||
project_zstd('../../lib/')
|
80
contrib/premake/zstd.lua
Normal file
80
contrib/premake/zstd.lua
Normal file
@ -0,0 +1,80 @@
|
||||
-- This GENie/premake file copies the behavior of the Makefile in the lib folder.
|
||||
-- Basic usage: project_zstd(ZSTD_DIR)
|
||||
|
||||
function project_zstd(dir, compression, decompression, deprecated, dictbuilder, legacy)
|
||||
if compression == nil then compression = true end
|
||||
if decompression == nil then decompression = true end
|
||||
if deprecated == nil then deprecated = false end
|
||||
if dictbuilder == nil then dictbuilder = false end
|
||||
|
||||
if legacy == nil then legacy = 0 end
|
||||
|
||||
if not compression then
|
||||
dictbuilder = false
|
||||
deprecated = false
|
||||
end
|
||||
|
||||
if not decompression then
|
||||
legacy = 0
|
||||
deprecated = false
|
||||
end
|
||||
|
||||
project 'zstd'
|
||||
kind 'StaticLib'
|
||||
language 'C'
|
||||
|
||||
files {
|
||||
dir .. 'zstd.h',
|
||||
dir .. 'common/**.c',
|
||||
dir .. 'common/**.h'
|
||||
}
|
||||
|
||||
if compression then
|
||||
files {
|
||||
dir .. 'compress/**.c',
|
||||
dir .. 'compress/**.h'
|
||||
}
|
||||
end
|
||||
|
||||
if decompression then
|
||||
files {
|
||||
dir .. 'decompress/**.c',
|
||||
dir .. 'decompress/**.h'
|
||||
}
|
||||
end
|
||||
|
||||
if dictbuilder then
|
||||
files {
|
||||
dir .. 'dictBuilder/**.c',
|
||||
dir .. 'dictBuilder/**.h'
|
||||
}
|
||||
end
|
||||
|
||||
if deprecated then
|
||||
files {
|
||||
dir .. 'deprecated/**.c',
|
||||
dir .. 'deprecated/**.h'
|
||||
}
|
||||
end
|
||||
|
||||
if legacy ~= 0 then
|
||||
if legacy >= 8 then
|
||||
files {
|
||||
dir .. 'legacy/zstd_v0' .. (legacy - 7) .. '.*'
|
||||
}
|
||||
end
|
||||
includedirs {
|
||||
dir .. 'legacy'
|
||||
}
|
||||
end
|
||||
|
||||
includedirs {
|
||||
dir,
|
||||
dir .. 'common'
|
||||
}
|
||||
|
||||
defines {
|
||||
'XXH_NAMESPACE=ZSTD_',
|
||||
'ZSTD_LEGACY_SUPPORT=' .. legacy
|
||||
}
|
||||
end
|
@ -190,13 +190,15 @@ $(ZSTDDIR)/libzstd.a: $(ZSTD_FILES)
|
||||
CFLAGS="$(ALL_CFLAGS)" LDFLAGS="$(ALL_LDFLAGS)" $(MAKE) -C $(ZSTDDIR) libzstd.a
|
||||
|
||||
# Rules to build the tests
|
||||
test/RoundTripTest$(EXT): test/RoundTripTest.o $(PROGDIR)/datagen.o Options.o \
|
||||
test/RoundTripTest$(EXT): test/RoundTripTest.o $(PROGDIR)/datagen.o \
|
||||
$(PROGDIR)/util.o Options.o \
|
||||
Pzstd.o SkippableFrame.o $(ZSTDDIR)/libzstd.a
|
||||
$(LD_COMMAND)
|
||||
|
||||
test/%Test$(EXT): PZSTD_LDFLAGS += $(GTEST_LIB)
|
||||
test/%Test$(EXT): LIBS += -lgtest -lgtest_main
|
||||
test/%Test$(EXT): test/%Test.o $(PROGDIR)/datagen.o Options.o Pzstd.o \
|
||||
test/%Test$(EXT): test/%Test.o $(PROGDIR)/datagen.o \
|
||||
$(PROGDIR)/util.o Options.o Pzstd.o \
|
||||
SkippableFrame.o $(ZSTDDIR)/libzstd.a
|
||||
$(LD_COMMAND)
|
||||
|
||||
|
@ -55,7 +55,7 @@ static std::uint64_t handleOneInput(const Options &options,
|
||||
SharedState& state) {
|
||||
auto inputSize = fileSizeOrZero(inputFile);
|
||||
// WorkQueue outlives ThreadPool so in the case of error we are certain
|
||||
// we don't accidently try to call push() on it after it is destroyed
|
||||
// we don't accidentally try to call push() on it after it is destroyed
|
||||
WorkQueue<std::shared_ptr<BufferWorkQueue>> outs{options.numThreads + 1};
|
||||
std::uint64_t bytesRead;
|
||||
std::uint64_t bytesWritten;
|
||||
|
@ -9,7 +9,7 @@
|
||||
|
||||
/**
|
||||
* A subset of `folly/Range.h`.
|
||||
* All code copied verbatiam modulo formatting
|
||||
* All code copied verbatim modulo formatting
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
|
@ -54,7 +54,7 @@ class ResourcePool {
|
||||
|
||||
/**
|
||||
* @returns A unique pointer to a resource. The resource is null iff
|
||||
* there are no avaiable resources and `factory()` returns null.
|
||||
* there are no available resources and `factory()` returns null.
|
||||
*/
|
||||
UniquePtr get() {
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
|
28
contrib/snap/snapcraft.yaml
Normal file
28
contrib/snap/snapcraft.yaml
Normal file
@ -0,0 +1,28 @@
|
||||
name: zstd
|
||||
version: git
|
||||
summary: Zstandard - Fast real-time compression algorithm
|
||||
description: |
|
||||
Zstandard, or zstd as short version, is a fast lossless compression
|
||||
algorithm, targeting real-time compression scenarios at zlib-level and better
|
||||
compression ratios. It's backed by a very fast entropy stage, provided by
|
||||
Huff0 and FSE library
|
||||
|
||||
grade: devel # must be 'stable' to release into candidate/stable channels
|
||||
confinement: devmode # use 'strict' once you have the right plugs and slots
|
||||
|
||||
apps:
|
||||
zstd:
|
||||
command: usr/local/bin/zstd
|
||||
plugs: [home, removable-media]
|
||||
zstdgrep:
|
||||
command: usr/local/bin/zstdgrep
|
||||
plugs: [home, removable-media]
|
||||
zstdless:
|
||||
command: usr/local/bin/zstdless
|
||||
plugs: [home, removable-media]
|
||||
|
||||
parts:
|
||||
zstd:
|
||||
source: .
|
||||
plugin: make
|
||||
build-packages: [g++]
|
@ -12,8 +12,8 @@ __`zstd_compression_format.md`__ : This document defines the Zstandard compressi
|
||||
Compliant decoders must adhere to this document,
|
||||
and compliant encoders must generate data that follows it.
|
||||
|
||||
Should you look for ressources to develop your own port of Zstandard algorithm,
|
||||
you may find the following ressources useful :
|
||||
Should you look for resources to develop your own port of Zstandard algorithm,
|
||||
you may find the following resources useful :
|
||||
|
||||
__`educational_decoder`__ : This directory contains an implementation of a Zstandard decoder,
|
||||
compliant with the Zstandard compression format.
|
||||
|
@ -7,7 +7,7 @@ CPPFLAGS += -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
|
||||
CFLAGS ?= -O3
|
||||
CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
|
||||
-Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
|
||||
-Wstrict-prototypes -Wundef -Wformat-security \
|
||||
-Wstrict-prototypes -Wundef \
|
||||
-Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
|
||||
-Wredundant-decls
|
||||
CFLAGS += $(DEBUGFLAGS)
|
||||
|
@ -358,7 +358,7 @@ static u32 copy_literals(const size_t seq, istream_t *litstream,
|
||||
ostream_t *const out);
|
||||
|
||||
// Given an offset code from a sequence command (either an actual offset value
|
||||
// or an index for previous offset), computes the correct offset and udpates
|
||||
// or an index for previous offset), computes the correct offset and updates
|
||||
// the offset history
|
||||
static size_t compute_offset(sequence_command_t seq, u64 *const offset_hist);
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
15
examples/.gitignore
vendored
Normal file
15
examples/.gitignore
vendored
Normal file
@ -0,0 +1,15 @@
|
||||
#build
|
||||
simple_compression
|
||||
simple_decompression
|
||||
multiple_simple_compression
|
||||
dictionary_compression
|
||||
dictionary_decompression
|
||||
streaming_compression
|
||||
streaming_decompression
|
||||
multiple_streaming_compression
|
||||
streaming_memory_usage
|
||||
|
||||
#test artefact
|
||||
tmp*
|
||||
test*
|
||||
*.zst
|
90
examples/Makefile
Normal file
90
examples/Makefile
Normal file
@ -0,0 +1,90 @@
|
||||
# ################################################################
|
||||
# Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under both the BSD-style license (found in the
|
||||
# LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
# in the COPYING file in the root directory of this source tree).
|
||||
# ################################################################
|
||||
|
||||
# This Makefile presumes libzstd is installed, using `sudo make install`
|
||||
|
||||
CPPFLAGS += -I../lib
|
||||
LIB = ../lib/libzstd.a
|
||||
|
||||
.PHONY: default all clean test
|
||||
|
||||
default: all
|
||||
|
||||
all: simple_compression simple_decompression \
|
||||
multiple_simple_compression\
|
||||
dictionary_compression dictionary_decompression \
|
||||
streaming_compression streaming_decompression \
|
||||
multiple_streaming_compression streaming_memory_usage
|
||||
|
||||
$(LIB) :
|
||||
$(MAKE) -C ../lib libzstd.a
|
||||
|
||||
simple_compression : simple_compression.c common.h $(LIB)
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $< $(LIB) $(LDFLAGS) -o $@
|
||||
|
||||
simple_decompression : simple_decompression.c common.h $(LIB)
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $< $(LIB) $(LDFLAGS) -o $@
|
||||
|
||||
multiple_simple_compression : multiple_simple_compression.c common.h $(LIB)
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $< $(LIB) $(LDFLAGS) -o $@
|
||||
|
||||
dictionary_compression : dictionary_compression.c common.h $(LIB)
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $< $(LIB) $(LDFLAGS) -o $@
|
||||
|
||||
dictionary_decompression : dictionary_decompression.c common.h $(LIB)
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $< $(LIB) $(LDFLAGS) -o $@
|
||||
|
||||
streaming_compression : streaming_compression.c common.h $(LIB)
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $< $(LIB) $(LDFLAGS) -o $@
|
||||
|
||||
multiple_streaming_compression : multiple_streaming_compression.c common.h $(LIB)
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $< $(LIB) $(LDFLAGS) -o $@
|
||||
|
||||
streaming_decompression : streaming_decompression.c common.h $(LIB)
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $< $(LIB) $(LDFLAGS) -o $@
|
||||
|
||||
streaming_memory_usage : streaming_memory_usage.c $(LIB)
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $< $(LIB) $(LDFLAGS) -o $@
|
||||
|
||||
clean:
|
||||
@rm -f core *.o tmp* result* *.zst \
|
||||
simple_compression simple_decompression \
|
||||
multiple_simple_compression \
|
||||
dictionary_compression dictionary_decompression \
|
||||
streaming_compression streaming_decompression \
|
||||
multiple_streaming_compression streaming_memory_usage
|
||||
@echo Cleaning completed
|
||||
|
||||
test: all
|
||||
cp README.md tmp
|
||||
cp Makefile tmp2
|
||||
@echo -- Simple compression tests
|
||||
./simple_compression tmp
|
||||
./simple_decompression tmp.zst
|
||||
./multiple_simple_compression *.c
|
||||
./streaming_decompression tmp.zst > /dev/null
|
||||
@echo -- Streaming memory usage
|
||||
./streaming_memory_usage
|
||||
@echo -- Streaming compression tests
|
||||
./streaming_compression tmp
|
||||
./streaming_decompression tmp.zst > /dev/null
|
||||
@echo -- Edge cases detection
|
||||
! ./streaming_decompression tmp # invalid input, must fail
|
||||
! ./simple_decompression tmp # invalid input, must fail
|
||||
! ./simple_decompression tmp.zst # unknown input size, must fail
|
||||
touch tmpNull # create 0-size file
|
||||
./simple_compression tmpNull
|
||||
./simple_decompression tmpNull.zst # 0-size frame : must work
|
||||
@echo -- Multiple streaming tests
|
||||
./multiple_streaming_compression *.c
|
||||
@echo -- Dictionary compression tests
|
||||
./dictionary_compression tmp2 tmp README.md
|
||||
./dictionary_decompression tmp2.zst tmp.zst README.md
|
||||
$(RM) tmp* *.zst
|
||||
@echo tests completed
|
46
examples/README.md
Normal file
46
examples/README.md
Normal file
@ -0,0 +1,46 @@
|
||||
Zstandard library : usage examples
|
||||
==================================
|
||||
|
||||
- [Simple compression](simple_compression.c) :
|
||||
Compress a single file.
|
||||
Introduces usage of : `ZSTD_compress()`
|
||||
|
||||
- [Simple decompression](simple_decompression.c) :
|
||||
Decompress a single file.
|
||||
Only compatible with simple compression.
|
||||
Result remains in memory.
|
||||
Introduces usage of : `ZSTD_decompress()`
|
||||
|
||||
- [Multiple simple compression](multiple_simple_compression.c) :
|
||||
Compress multiple files (in simple mode) in a single command line.
|
||||
Demonstrates memory preservation technique that
|
||||
minimizes malloc()/free() calls by re-using existing resources.
|
||||
Introduces usage of : `ZSTD_compressCCtx()`
|
||||
|
||||
- [Streaming memory usage](streaming_memory_usage.c) :
|
||||
Provides amount of memory used by streaming context.
|
||||
Introduces usage of : `ZSTD_sizeof_CStream()`
|
||||
|
||||
- [Streaming compression](streaming_compression.c) :
|
||||
Compress a single file.
|
||||
Introduces usage of : `ZSTD_compressStream()`
|
||||
|
||||
- [Multiple Streaming compression](multiple_streaming_compression.c) :
|
||||
Compress multiple files (in streaming mode) in a single command line.
|
||||
Introduces memory usage preservation technique,
|
||||
reducing impact of malloc()/free() and memset() by re-using existing resources.
|
||||
|
||||
- [Streaming decompression](streaming_decompression.c) :
|
||||
Decompress a single file compressed by zstd.
|
||||
Compatible with both simple and streaming compression.
|
||||
Result is sent to stdout.
|
||||
Introduces usage of : `ZSTD_decompressStream()`
|
||||
|
||||
- [Dictionary compression](dictionary_compression.c) :
|
||||
Compress multiple files using the same dictionary.
|
||||
Introduces usage of : `ZSTD_createCDict()` and `ZSTD_compress_usingCDict()`
|
||||
|
||||
- [Dictionary decompression](dictionary_decompression.c) :
|
||||
Decompress multiple files using the same dictionary.
|
||||
Result remains in memory.
|
||||
Introduces usage of : `ZSTD_createDDict()` and `ZSTD_decompress_usingDDict()`
|
234
examples/common.h
Normal file
234
examples/common.h
Normal file
@ -0,0 +1,234 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This header file has common utility functions used in examples.
|
||||
*/
|
||||
#ifndef COMMON_H
|
||||
#define COMMON_H
|
||||
|
||||
#include <stdlib.h> // malloc, free, exit
|
||||
#include <stdio.h> // fprintf, perror, fopen, etc.
|
||||
#include <string.h> // strerror
|
||||
#include <errno.h> // errno
|
||||
#include <sys/stat.h> // stat
|
||||
#include <zstd.h>
|
||||
|
||||
/*
|
||||
* Define the returned error code from utility functions.
|
||||
*/
|
||||
typedef enum {
|
||||
ERROR_fsize = 1,
|
||||
ERROR_fopen = 2,
|
||||
ERROR_fclose = 3,
|
||||
ERROR_fread = 4,
|
||||
ERROR_fwrite = 5,
|
||||
ERROR_loadFile = 6,
|
||||
ERROR_saveFile = 7,
|
||||
ERROR_malloc = 8,
|
||||
ERROR_largeFile = 9,
|
||||
} COMMON_ErrorCode;
|
||||
|
||||
/*! CHECK
|
||||
* Check that the condition holds. If it doesn't print a message and die.
|
||||
*/
|
||||
#define CHECK(cond, ...) \
|
||||
do { \
|
||||
if (!(cond)) { \
|
||||
fprintf(stderr, \
|
||||
"%s:%d CHECK(%s) failed: ", \
|
||||
__FILE__, \
|
||||
__LINE__, \
|
||||
#cond); \
|
||||
fprintf(stderr, "" __VA_ARGS__); \
|
||||
fprintf(stderr, "\n"); \
|
||||
exit(1); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/*! CHECK_ZSTD
|
||||
* Check the zstd error code and die if an error occurred after printing a
|
||||
* message.
|
||||
*/
|
||||
#define CHECK_ZSTD(fn, ...) \
|
||||
do { \
|
||||
size_t const err = (fn); \
|
||||
CHECK(!ZSTD_isError(err), "%s", ZSTD_getErrorName(err)); \
|
||||
} while (0)
|
||||
|
||||
/*! fsize_orDie() :
|
||||
* Get the size of a given file path.
|
||||
*
|
||||
* @return The size of a given file path.
|
||||
*/
|
||||
static size_t fsize_orDie(const char *filename)
|
||||
{
|
||||
struct stat st;
|
||||
if (stat(filename, &st) != 0) {
|
||||
/* error */
|
||||
perror(filename);
|
||||
exit(ERROR_fsize);
|
||||
}
|
||||
|
||||
off_t const fileSize = st.st_size;
|
||||
size_t const size = (size_t)fileSize;
|
||||
/* 1. fileSize should be non-negative,
|
||||
* 2. if off_t -> size_t type conversion results in discrepancy,
|
||||
* the file size is too large for type size_t.
|
||||
*/
|
||||
if ((fileSize < 0) || (fileSize != (off_t)size)) {
|
||||
fprintf(stderr, "%s : filesize too large \n", filename);
|
||||
exit(ERROR_largeFile);
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
/*! fopen_orDie() :
|
||||
* Open a file using given file path and open option.
|
||||
*
|
||||
* @return If successful this function will return a FILE pointer to an
|
||||
* opened file otherwise it sends an error to stderr and exits.
|
||||
*/
|
||||
static FILE* fopen_orDie(const char *filename, const char *instruction)
|
||||
{
|
||||
FILE* const inFile = fopen(filename, instruction);
|
||||
if (inFile) return inFile;
|
||||
/* error */
|
||||
perror(filename);
|
||||
exit(ERROR_fopen);
|
||||
}
|
||||
|
||||
/*! fclose_orDie() :
|
||||
* Close an opened file using given FILE pointer.
|
||||
*/
|
||||
static void fclose_orDie(FILE* file)
|
||||
{
|
||||
if (!fclose(file)) { return; };
|
||||
/* error */
|
||||
perror("fclose");
|
||||
exit(ERROR_fclose);
|
||||
}
|
||||
|
||||
/*! fread_orDie() :
|
||||
*
|
||||
* Read sizeToRead bytes from a given file, storing them at the
|
||||
* location given by buffer.
|
||||
*
|
||||
* @return The number of bytes read.
|
||||
*/
|
||||
static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file)
|
||||
{
|
||||
size_t const readSize = fread(buffer, 1, sizeToRead, file);
|
||||
if (readSize == sizeToRead) return readSize; /* good */
|
||||
if (feof(file)) return readSize; /* good, reached end of file */
|
||||
/* error */
|
||||
perror("fread");
|
||||
exit(ERROR_fread);
|
||||
}
|
||||
|
||||
/*! fwrite_orDie() :
|
||||
*
|
||||
* Write sizeToWrite bytes to a file pointed to by file, obtaining
|
||||
* them from a location given by buffer.
|
||||
*
|
||||
* Note: This function will send an error to stderr and exit if it
|
||||
* cannot write data to the given file pointer.
|
||||
*
|
||||
* @return The number of bytes written.
|
||||
*/
|
||||
static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file)
|
||||
{
|
||||
size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file);
|
||||
if (writtenSize == sizeToWrite) return sizeToWrite; /* good */
|
||||
/* error */
|
||||
perror("fwrite");
|
||||
exit(ERROR_fwrite);
|
||||
}
|
||||
|
||||
/*! malloc_orDie() :
|
||||
* Allocate memory.
|
||||
*
|
||||
* @return If successful this function returns a pointer to allo-
|
||||
* cated memory. If there is an error, this function will send that
|
||||
* error to stderr and exit.
|
||||
*/
|
||||
static void* malloc_orDie(size_t size)
|
||||
{
|
||||
void* const buff = malloc(size);
|
||||
if (buff) return buff;
|
||||
/* error */
|
||||
perror("malloc");
|
||||
exit(ERROR_malloc);
|
||||
}
|
||||
|
||||
/*! loadFile_orDie() :
|
||||
* load file into buffer (memory).
|
||||
*
|
||||
* Note: This function will send an error to stderr and exit if it
|
||||
* cannot read data from the given file path.
|
||||
*
|
||||
* @return If successful this function will load file into buffer and
|
||||
* return file size, otherwise it will printout an error to stderr and exit.
|
||||
*/
|
||||
static size_t loadFile_orDie(const char* fileName, void* buffer, size_t bufferSize)
|
||||
{
|
||||
size_t const fileSize = fsize_orDie(fileName);
|
||||
CHECK(fileSize <= bufferSize, "File too large!");
|
||||
|
||||
FILE* const inFile = fopen_orDie(fileName, "rb");
|
||||
size_t const readSize = fread(buffer, 1, fileSize, inFile);
|
||||
if (readSize != (size_t)fileSize) {
|
||||
fprintf(stderr, "fread: %s : %s \n", fileName, strerror(errno));
|
||||
exit(ERROR_fread);
|
||||
}
|
||||
fclose(inFile); /* can't fail, read only */
|
||||
return fileSize;
|
||||
}
|
||||
|
||||
/*! mallocAndLoadFile_orDie() :
|
||||
* allocate memory buffer and then load file into it.
|
||||
*
|
||||
* Note: This function will send an error to stderr and exit if memory allocation
|
||||
* fails or it cannot read data from the given file path.
|
||||
*
|
||||
* @return If successful this function will return buffer and bufferSize(=fileSize),
|
||||
* otherwise it will printout an error to stderr and exit.
|
||||
*/
|
||||
static void* mallocAndLoadFile_orDie(const char* fileName, size_t* bufferSize) {
|
||||
size_t const fileSize = fsize_orDie(fileName);
|
||||
*bufferSize = fileSize;
|
||||
void* const buffer = malloc_orDie(*bufferSize);
|
||||
loadFile_orDie(fileName, buffer, *bufferSize);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
/*! saveFile_orDie() :
|
||||
*
|
||||
* Save buffSize bytes to a given file path, obtaining them from a location pointed
|
||||
* to by buff.
|
||||
*
|
||||
* Note: This function will send an error to stderr and exit if it
|
||||
* cannot write to a given file.
|
||||
*/
|
||||
static void saveFile_orDie(const char* fileName, const void* buff, size_t buffSize)
|
||||
{
|
||||
FILE* const oFile = fopen_orDie(fileName, "wb");
|
||||
size_t const wSize = fwrite(buff, 1, buffSize, oFile);
|
||||
if (wSize != (size_t)buffSize) {
|
||||
fprintf(stderr, "fwrite: %s : %s \n", fileName, strerror(errno));
|
||||
exit(ERROR_fwrite);
|
||||
}
|
||||
if (fclose(oFile)) {
|
||||
perror(fileName);
|
||||
exit(ERROR_fclose);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
97
examples/dictionary_compression.c
Normal file
97
examples/dictionary_compression.c
Normal file
@ -0,0 +1,97 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
#include <stdio.h> // printf
|
||||
#include <stdlib.h> // free
|
||||
#include <string.h> // memset, strcat
|
||||
#include <zstd.h> // presumes zstd library is installed
|
||||
#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD()
|
||||
|
||||
/* createDict() :
|
||||
`dictFileName` is supposed to have been created using `zstd --train` */
|
||||
static ZSTD_CDict* createCDict_orDie(const char* dictFileName, int cLevel)
|
||||
{
|
||||
size_t dictSize;
|
||||
printf("loading dictionary %s \n", dictFileName);
|
||||
void* const dictBuffer = mallocAndLoadFile_orDie(dictFileName, &dictSize);
|
||||
ZSTD_CDict* const cdict = ZSTD_createCDict(dictBuffer, dictSize, cLevel);
|
||||
CHECK(cdict != NULL, "ZSTD_createCDict() failed!");
|
||||
free(dictBuffer);
|
||||
return cdict;
|
||||
}
|
||||
|
||||
|
||||
static void compress(const char* fname, const char* oname, const ZSTD_CDict* cdict)
|
||||
{
|
||||
size_t fSize;
|
||||
void* const fBuff = mallocAndLoadFile_orDie(fname, &fSize);
|
||||
size_t const cBuffSize = ZSTD_compressBound(fSize);
|
||||
void* const cBuff = malloc_orDie(cBuffSize);
|
||||
|
||||
/* Compress using the dictionary.
|
||||
* This function writes the dictionary id, and content size into the header.
|
||||
* But, it doesn't use a checksum. You can control these options using the
|
||||
* advanced API: ZSTD_CCtx_setParameter(), ZSTD_CCtx_refCDict(),
|
||||
* and ZSTD_compress2().
|
||||
*/
|
||||
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
|
||||
CHECK(cctx != NULL, "ZSTD_createCCtx() failed!");
|
||||
size_t const cSize = ZSTD_compress_usingCDict(cctx, cBuff, cBuffSize, fBuff, fSize, cdict);
|
||||
CHECK_ZSTD(cSize);
|
||||
|
||||
saveFile_orDie(oname, cBuff, cSize);
|
||||
|
||||
/* success */
|
||||
printf("%25s : %6u -> %7u - %s \n", fname, (unsigned)fSize, (unsigned)cSize, oname);
|
||||
|
||||
ZSTD_freeCCtx(cctx); /* never fails */
|
||||
free(fBuff);
|
||||
free(cBuff);
|
||||
}
|
||||
|
||||
|
||||
static char* createOutFilename_orDie(const char* filename)
|
||||
{
|
||||
size_t const inL = strlen(filename);
|
||||
size_t const outL = inL + 5;
|
||||
void* outSpace = malloc_orDie(outL);
|
||||
memset(outSpace, 0, outL);
|
||||
strcat(outSpace, filename);
|
||||
strcat(outSpace, ".zst");
|
||||
return (char*)outSpace;
|
||||
}
|
||||
|
||||
int main(int argc, const char** argv)
|
||||
{
|
||||
const char* const exeName = argv[0];
|
||||
int const cLevel = 3;
|
||||
|
||||
if (argc<3) {
|
||||
fprintf(stderr, "wrong arguments\n");
|
||||
fprintf(stderr, "usage:\n");
|
||||
fprintf(stderr, "%s [FILES] dictionary\n", exeName);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* load dictionary only once */
|
||||
const char* const dictName = argv[argc-1];
|
||||
ZSTD_CDict* const dictPtr = createCDict_orDie(dictName, cLevel);
|
||||
|
||||
int u;
|
||||
for (u=1; u<argc-1; u++) {
|
||||
const char* inFilename = argv[u];
|
||||
char* const outFilename = createOutFilename_orDie(inFilename);
|
||||
compress(inFilename, outFilename, dictPtr);
|
||||
free(outFilename);
|
||||
}
|
||||
|
||||
ZSTD_freeCDict(dictPtr);
|
||||
printf("All %u files compressed. \n", argc-2);
|
||||
return 0;
|
||||
}
|
99
examples/dictionary_decompression.c
Normal file
99
examples/dictionary_decompression.c
Normal file
@ -0,0 +1,99 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdio.h> // printf
|
||||
#include <stdlib.h> // free
|
||||
#include <zstd.h> // presumes zstd library is installed
|
||||
#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD()
|
||||
|
||||
/* createDict() :
|
||||
`dictFileName` is supposed to have been created using `zstd --train` */
|
||||
static ZSTD_DDict* createDict_orDie(const char* dictFileName)
|
||||
{
|
||||
size_t dictSize;
|
||||
printf("loading dictionary %s \n", dictFileName);
|
||||
void* const dictBuffer = mallocAndLoadFile_orDie(dictFileName, &dictSize);
|
||||
ZSTD_DDict* const ddict = ZSTD_createDDict(dictBuffer, dictSize);
|
||||
CHECK(ddict != NULL, "ZSTD_createDDict() failed!");
|
||||
free(dictBuffer);
|
||||
return ddict;
|
||||
}
|
||||
|
||||
static void decompress(const char* fname, const ZSTD_DDict* ddict)
|
||||
{
|
||||
size_t cSize;
|
||||
void* const cBuff = mallocAndLoadFile_orDie(fname, &cSize);
|
||||
/* Read the content size from the frame header. For simplicity we require
|
||||
* that it is always present. By default, zstd will write the content size
|
||||
* in the header when it is known. If you can't guarantee that the frame
|
||||
* content size is always written into the header, either use streaming
|
||||
* decompression, or ZSTD_decompressBound().
|
||||
*/
|
||||
unsigned long long const rSize = ZSTD_getFrameContentSize(cBuff, cSize);
|
||||
CHECK(rSize != ZSTD_CONTENTSIZE_ERROR, "%s: not compressed by zstd!", fname);
|
||||
CHECK(rSize != ZSTD_CONTENTSIZE_UNKNOWN, "%s: original size unknown!", fname);
|
||||
void* const rBuff = malloc_orDie((size_t)rSize);
|
||||
|
||||
/* Check that the dictionary ID matches.
|
||||
* If a non-zstd dictionary is used, then both will be zero.
|
||||
* By default zstd always writes the dictionary ID into the frame.
|
||||
* Zstd will check if there is a dictionary ID mismatch as well.
|
||||
*/
|
||||
unsigned const expectedDictID = ZSTD_getDictID_fromDDict(ddict);
|
||||
unsigned const actualDictID = ZSTD_getDictID_fromFrame(cBuff, cSize);
|
||||
CHECK(actualDictID == expectedDictID,
|
||||
"DictID mismatch: expected %u got %u",
|
||||
expectedDictID,
|
||||
actualDictID);
|
||||
|
||||
/* Decompress using the dictionary.
|
||||
* If you need to control the decompression parameters, then use the
|
||||
* advanced API: ZSTD_DCtx_setParameter(), ZSTD_DCtx_refDDict(), and
|
||||
* ZSTD_decompressDCtx().
|
||||
*/
|
||||
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
|
||||
CHECK(dctx != NULL, "ZSTD_createDCtx() failed!");
|
||||
size_t const dSize = ZSTD_decompress_usingDDict(dctx, rBuff, rSize, cBuff, cSize, ddict);
|
||||
CHECK_ZSTD(dSize);
|
||||
/* When zstd knows the content size, it will error if it doesn't match. */
|
||||
CHECK(dSize == rSize, "Impossible because zstd will check this condition!");
|
||||
|
||||
/* success */
|
||||
printf("%25s : %6u -> %7u \n", fname, (unsigned)cSize, (unsigned)rSize);
|
||||
|
||||
ZSTD_freeDCtx(dctx);
|
||||
free(rBuff);
|
||||
free(cBuff);
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, const char** argv)
|
||||
{
|
||||
const char* const exeName = argv[0];
|
||||
|
||||
if (argc<3) {
|
||||
printf("wrong arguments\n");
|
||||
printf("usage:\n");
|
||||
printf("%s [FILES] dictionary\n", exeName);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* load dictionary only once */
|
||||
const char* const dictName = argv[argc-1];
|
||||
ZSTD_DDict* const dictPtr = createDict_orDie(dictName);
|
||||
|
||||
int u;
|
||||
for (u=1; u<argc-1; u++) decompress(argv[u], dictPtr);
|
||||
|
||||
ZSTD_freeDDict(dictPtr);
|
||||
printf("All %u files correctly decoded (in memory) \n", argc-2);
|
||||
return 0;
|
||||
}
|
116
examples/multiple_simple_compression.c
Normal file
116
examples/multiple_simple_compression.c
Normal file
@ -0,0 +1,116 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
#include <stdio.h> // printf
|
||||
#include <stdlib.h> // free
|
||||
#include <string.h> // memcpy, strlen
|
||||
#include <zstd.h> // presumes zstd library is installed
|
||||
#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD()
|
||||
|
||||
typedef struct {
|
||||
void* fBuffer;
|
||||
void* cBuffer;
|
||||
size_t fBufferSize;
|
||||
size_t cBufferSize;
|
||||
ZSTD_CCtx* cctx;
|
||||
} resources;
|
||||
|
||||
/*
|
||||
* allocate memory for buffers big enough to compress all files
|
||||
* as well as memory for output file name (ofn)
|
||||
*/
|
||||
static resources createResources_orDie(int argc, const char** argv, char **ofn, size_t* ofnBufferLen)
|
||||
{
|
||||
size_t maxFilenameLength=0;
|
||||
size_t maxFileSize = 0;
|
||||
|
||||
int argNb;
|
||||
for (argNb = 1; argNb < argc; argNb++) {
|
||||
const char* const filename = argv[argNb];
|
||||
size_t const filenameLength = strlen(filename);
|
||||
size_t const fileSize = fsize_orDie(filename);
|
||||
|
||||
if (filenameLength > maxFilenameLength) maxFilenameLength = filenameLength;
|
||||
if (fileSize > maxFileSize) maxFileSize = fileSize;
|
||||
}
|
||||
|
||||
resources ress;
|
||||
ress.fBufferSize = maxFileSize;
|
||||
ress.cBufferSize = ZSTD_compressBound(maxFileSize);
|
||||
|
||||
*ofnBufferLen = maxFilenameLength + 5;
|
||||
*ofn = (char*)malloc_orDie(*ofnBufferLen);
|
||||
ress.fBuffer = malloc_orDie(ress.fBufferSize);
|
||||
ress.cBuffer = malloc_orDie(ress.cBufferSize);
|
||||
ress.cctx = ZSTD_createCCtx();
|
||||
CHECK(ress.cctx != NULL, "ZSTD_createCCtx() failed!");
|
||||
return ress;
|
||||
}
|
||||
|
||||
static void freeResources(resources ress, char *outFilename)
|
||||
{
|
||||
free(ress.fBuffer);
|
||||
free(ress.cBuffer);
|
||||
ZSTD_freeCCtx(ress.cctx); /* never fails */
|
||||
free(outFilename);
|
||||
}
|
||||
|
||||
/* compress with pre-allocated context (ZSTD_CCtx) and input/output buffers*/
|
||||
static void compressFile_orDie(resources ress, const char* fname, const char* oname)
|
||||
{
|
||||
size_t fSize = loadFile_orDie(fname, ress.fBuffer, ress.fBufferSize);
|
||||
|
||||
/* Compress using the context.
|
||||
* If you need more control over parameters, use the advanced API:
|
||||
* ZSTD_CCtx_setParameter(), and ZSTD_compress2().
|
||||
*/
|
||||
size_t const cSize = ZSTD_compressCCtx(ress.cctx, ress.cBuffer, ress.cBufferSize, ress.fBuffer, fSize, 1);
|
||||
CHECK_ZSTD(cSize);
|
||||
|
||||
saveFile_orDie(oname, ress.cBuffer, cSize);
|
||||
|
||||
/* success */
|
||||
printf("%25s : %6u -> %7u - %s \n", fname, (unsigned)fSize, (unsigned)cSize, oname);
|
||||
}
|
||||
|
||||
int main(int argc, const char** argv)
|
||||
{
|
||||
const char* const exeName = argv[0];
|
||||
|
||||
if (argc<2) {
|
||||
printf("wrong arguments\n");
|
||||
printf("usage:\n");
|
||||
printf("%s FILE(s)\n", exeName);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* memory allocation for outFilename and resources */
|
||||
char* outFilename;
|
||||
size_t outFilenameBufferLen;
|
||||
resources const ress = createResources_orDie(argc, argv, &outFilename, &outFilenameBufferLen);
|
||||
|
||||
/* compress files with shared context, input and output buffers */
|
||||
int argNb;
|
||||
for (argNb = 1; argNb < argc; argNb++) {
|
||||
const char* const inFilename = argv[argNb];
|
||||
size_t const inFilenameLen = strlen(inFilename);
|
||||
CHECK(inFilenameLen + 5 <= outFilenameBufferLen, "File name too long!");
|
||||
memcpy(outFilename, inFilename, inFilenameLen);
|
||||
memcpy(outFilename+inFilenameLen, ".zst", 5);
|
||||
compressFile_orDie(ress, inFilename, outFilename);
|
||||
}
|
||||
|
||||
/* free memory */
|
||||
freeResources(ress,outFilename);
|
||||
|
||||
printf("compressed %i files \n", argc-1);
|
||||
|
||||
return 0;
|
||||
}
|
133
examples/multiple_streaming_compression.c
Normal file
133
examples/multiple_streaming_compression.c
Normal file
@ -0,0 +1,133 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
|
||||
/* The objective of this example is to show of to compress multiple successive files
|
||||
* while preserving memory management.
|
||||
* All structures and buffers will be created only once,
|
||||
* and shared across all compression operations */
|
||||
|
||||
#include <stdio.h> // printf
|
||||
#include <stdlib.h> // free
|
||||
#include <string.h> // memset, strcat
|
||||
#include <zstd.h> // presumes zstd library is installed
|
||||
#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD()
|
||||
|
||||
typedef struct {
|
||||
void* buffIn;
|
||||
void* buffOut;
|
||||
size_t buffInSize;
|
||||
size_t buffOutSize;
|
||||
ZSTD_CCtx* cctx;
|
||||
} resources;
|
||||
|
||||
static resources createResources_orDie(int cLevel)
|
||||
{
|
||||
resources ress;
|
||||
ress.buffInSize = ZSTD_CStreamInSize(); /* can always read one full block */
|
||||
ress.buffOutSize= ZSTD_CStreamOutSize(); /* can always flush a full block */
|
||||
ress.buffIn = malloc_orDie(ress.buffInSize);
|
||||
ress.buffOut= malloc_orDie(ress.buffOutSize);
|
||||
ress.cctx = ZSTD_createCCtx();
|
||||
CHECK(ress.cctx != NULL, "ZSTD_createCCtx() failed!");
|
||||
|
||||
/* Set any compression parameters you want here.
|
||||
* They will persist for every compression operation.
|
||||
* Here we set the compression level, and enable the checksum.
|
||||
*/
|
||||
CHECK_ZSTD( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, cLevel) );
|
||||
CHECK_ZSTD( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_checksumFlag, 1) );
|
||||
return ress;
|
||||
}
|
||||
|
||||
static void freeResources(resources ress)
|
||||
{
|
||||
ZSTD_freeCCtx(ress.cctx);
|
||||
free(ress.buffIn);
|
||||
free(ress.buffOut);
|
||||
}
|
||||
|
||||
static void compressFile_orDie(resources ress, const char* fname, const char* outName)
|
||||
{
|
||||
// Open the input and output files.
|
||||
FILE* const fin = fopen_orDie(fname, "rb");
|
||||
FILE* const fout = fopen_orDie(outName, "wb");
|
||||
|
||||
/* Reset the context to a clean state to start a new compression operation.
|
||||
* The parameters are sticky, so we keep the compression level and extra
|
||||
* parameters that we set in createResources_orDie().
|
||||
*/
|
||||
CHECK_ZSTD( ZSTD_CCtx_reset(ress.cctx, ZSTD_reset_session_only) );
|
||||
|
||||
size_t const toRead = ress.buffInSize;
|
||||
size_t read;
|
||||
while ( (read = fread_orDie(ress.buffIn, toRead, fin)) ) {
|
||||
/* This loop is the same as streaming_compression.c.
|
||||
* See that file for detailed comments.
|
||||
*/
|
||||
int const lastChunk = (read < toRead);
|
||||
ZSTD_EndDirective const mode = lastChunk ? ZSTD_e_end : ZSTD_e_continue;
|
||||
|
||||
ZSTD_inBuffer input = { ress.buffIn, read, 0 };
|
||||
int finished;
|
||||
do {
|
||||
ZSTD_outBuffer output = { ress.buffOut, ress.buffOutSize, 0 };
|
||||
size_t const remaining = ZSTD_compressStream2(ress.cctx, &output, &input, mode);
|
||||
CHECK_ZSTD(remaining);
|
||||
fwrite_orDie(ress.buffOut, output.pos, fout);
|
||||
finished = lastChunk ? (remaining == 0) : (input.pos == input.size);
|
||||
} while (!finished);
|
||||
CHECK(input.pos == input.size,
|
||||
"Impossible: zstd only returns 0 when the input is completely consumed!");
|
||||
}
|
||||
|
||||
fclose_orDie(fout);
|
||||
fclose_orDie(fin);
|
||||
}
|
||||
|
||||
int main(int argc, const char** argv)
|
||||
{
|
||||
const char* const exeName = argv[0];
|
||||
|
||||
if (argc<2) {
|
||||
printf("wrong arguments\n");
|
||||
printf("usage:\n");
|
||||
printf("%s FILE(s)\n", exeName);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int const cLevel = 7;
|
||||
resources const ress = createResources_orDie(cLevel);
|
||||
void* ofnBuffer = NULL;
|
||||
size_t ofnbSize = 0;
|
||||
|
||||
int argNb;
|
||||
for (argNb = 1; argNb < argc; argNb++) {
|
||||
const char* const ifn = argv[argNb];
|
||||
size_t const ifnSize = strlen(ifn);
|
||||
size_t const ofnSize = ifnSize + 5;
|
||||
if (ofnbSize <= ofnSize) {
|
||||
ofnbSize = ofnSize + 16;
|
||||
free(ofnBuffer);
|
||||
ofnBuffer = malloc_orDie(ofnbSize);
|
||||
}
|
||||
memset(ofnBuffer, 0, ofnSize);
|
||||
strcat(ofnBuffer, ifn);
|
||||
strcat(ofnBuffer, ".zst");
|
||||
compressFile_orDie(ress, ifn, ofnBuffer);
|
||||
}
|
||||
|
||||
freeResources(ress);
|
||||
free(ofnBuffer);
|
||||
|
||||
printf("compressed %i files \n", argc-1);
|
||||
|
||||
return 0;
|
||||
}
|
68
examples/simple_compression.c
Normal file
68
examples/simple_compression.c
Normal file
@ -0,0 +1,68 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
#include <stdio.h> // printf
|
||||
#include <stdlib.h> // free
|
||||
#include <string.h> // strlen, strcat, memset
|
||||
#include <zstd.h> // presumes zstd library is installed
|
||||
#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD()
|
||||
|
||||
static void compress_orDie(const char* fname, const char* oname)
|
||||
{
|
||||
size_t fSize;
|
||||
void* const fBuff = mallocAndLoadFile_orDie(fname, &fSize);
|
||||
size_t const cBuffSize = ZSTD_compressBound(fSize);
|
||||
void* const cBuff = malloc_orDie(cBuffSize);
|
||||
|
||||
/* Compress.
|
||||
* If you are doing many compressions, you may want to reuse the context.
|
||||
* See the multiple_simple_compression.c example.
|
||||
*/
|
||||
size_t const cSize = ZSTD_compress(cBuff, cBuffSize, fBuff, fSize, 1);
|
||||
CHECK_ZSTD(cSize);
|
||||
|
||||
saveFile_orDie(oname, cBuff, cSize);
|
||||
|
||||
/* success */
|
||||
printf("%25s : %6u -> %7u - %s \n", fname, (unsigned)fSize, (unsigned)cSize, oname);
|
||||
|
||||
free(fBuff);
|
||||
free(cBuff);
|
||||
}
|
||||
|
||||
static char* createOutFilename_orDie(const char* filename)
|
||||
{
|
||||
size_t const inL = strlen(filename);
|
||||
size_t const outL = inL + 5;
|
||||
void* const outSpace = malloc_orDie(outL);
|
||||
memset(outSpace, 0, outL);
|
||||
strcat(outSpace, filename);
|
||||
strcat(outSpace, ".zst");
|
||||
return (char*)outSpace;
|
||||
}
|
||||
|
||||
int main(int argc, const char** argv)
|
||||
{
|
||||
const char* const exeName = argv[0];
|
||||
|
||||
if (argc!=2) {
|
||||
printf("wrong arguments\n");
|
||||
printf("usage:\n");
|
||||
printf("%s FILE\n", exeName);
|
||||
return 1;
|
||||
}
|
||||
|
||||
const char* const inFilename = argv[1];
|
||||
|
||||
char* const outFilename = createOutFilename_orDie(inFilename);
|
||||
compress_orDie(inFilename, outFilename);
|
||||
free(outFilename);
|
||||
return 0;
|
||||
}
|
65
examples/simple_decompression.c
Normal file
65
examples/simple_decompression.c
Normal file
@ -0,0 +1,65 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
#include <stdio.h> // printf
|
||||
#include <stdlib.h> // free
|
||||
#include <zstd.h> // presumes zstd library is installed
|
||||
#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD()
|
||||
|
||||
static void decompress(const char* fname)
|
||||
{
|
||||
size_t cSize;
|
||||
void* const cBuff = mallocAndLoadFile_orDie(fname, &cSize);
|
||||
/* Read the content size from the frame header. For simplicity we require
|
||||
* that it is always present. By default, zstd will write the content size
|
||||
* in the header when it is known. If you can't guarantee that the frame
|
||||
* content size is always written into the header, either use streaming
|
||||
* decompression, or ZSTD_decompressBound().
|
||||
*/
|
||||
unsigned long long const rSize = ZSTD_getFrameContentSize(cBuff, cSize);
|
||||
CHECK(rSize != ZSTD_CONTENTSIZE_ERROR, "%s: not compressed by zstd!", fname);
|
||||
CHECK(rSize != ZSTD_CONTENTSIZE_UNKNOWN, "%s: original size unknown!", fname);
|
||||
|
||||
void* const rBuff = malloc_orDie((size_t)rSize);
|
||||
|
||||
/* Decompress.
|
||||
* If you are doing many decompressions, you may want to reuse the context
|
||||
* and use ZSTD_decompressDCtx(). If you want to set advanced parameters,
|
||||
* use ZSTD_DCtx_setParameter().
|
||||
*/
|
||||
size_t const dSize = ZSTD_decompress(rBuff, rSize, cBuff, cSize);
|
||||
CHECK_ZSTD(dSize);
|
||||
/* When zstd knows the content size, it will error if it doesn't match. */
|
||||
CHECK(dSize == rSize, "Impossible because zstd will check this condition!");
|
||||
|
||||
/* success */
|
||||
printf("%25s : %6u -> %7u \n", fname, (unsigned)cSize, (unsigned)rSize);
|
||||
|
||||
free(rBuff);
|
||||
free(cBuff);
|
||||
}
|
||||
|
||||
int main(int argc, const char** argv)
|
||||
{
|
||||
const char* const exeName = argv[0];
|
||||
|
||||
if (argc!=2) {
|
||||
printf("wrong arguments\n");
|
||||
printf("usage:\n");
|
||||
printf("%s FILE\n", exeName);
|
||||
return 1;
|
||||
}
|
||||
|
||||
decompress(argv[1]);
|
||||
|
||||
printf("%s correctly decoded (in memory). \n", argv[1]);
|
||||
|
||||
return 0;
|
||||
}
|
119
examples/streaming_compression.c
Normal file
119
examples/streaming_compression.c
Normal file
@ -0,0 +1,119 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdio.h> // printf
|
||||
#include <stdlib.h> // free
|
||||
#include <string.h> // memset, strcat, strlen
|
||||
#include <zstd.h> // presumes zstd library is installed
|
||||
#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD()
|
||||
|
||||
|
||||
static void compressFile_orDie(const char* fname, const char* outName, int cLevel)
|
||||
{
|
||||
/* Open the input and output files. */
|
||||
FILE* const fin = fopen_orDie(fname, "rb");
|
||||
FILE* const fout = fopen_orDie(outName, "wb");
|
||||
/* Create the input and output buffers.
|
||||
* They may be any size, but we recommend using these functions to size them.
|
||||
* Performance will only suffer significantly for very tiny buffers.
|
||||
*/
|
||||
size_t const buffInSize = ZSTD_CStreamInSize();
|
||||
void* const buffIn = malloc_orDie(buffInSize);
|
||||
size_t const buffOutSize = ZSTD_CStreamOutSize();
|
||||
void* const buffOut = malloc_orDie(buffOutSize);
|
||||
|
||||
/* Create the context. */
|
||||
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
|
||||
CHECK(cctx != NULL, "ZSTD_createCCtx() failed!");
|
||||
|
||||
/* Set any parameters you want.
|
||||
* Here we set the compression level, and enable the checksum.
|
||||
*/
|
||||
CHECK_ZSTD( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel) );
|
||||
CHECK_ZSTD( ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1) );
|
||||
|
||||
/* This loop read from the input file, compresses that entire chunk,
|
||||
* and writes all output produced to the output file.
|
||||
*/
|
||||
size_t const toRead = buffInSize;
|
||||
size_t read;
|
||||
while ((read = fread_orDie(buffIn, toRead, fin))) {
|
||||
/* Select the flush mode.
|
||||
* If the read may not be finished (read == toRead) we use
|
||||
* ZSTD_e_continue. If this is the last chunk, we use ZSTD_e_end.
|
||||
* Zstd optimizes the case where the first flush mode is ZSTD_e_end,
|
||||
* since it knows it is compressing the entire source in one pass.
|
||||
*/
|
||||
int const lastChunk = (read < toRead);
|
||||
ZSTD_EndDirective const mode = lastChunk ? ZSTD_e_end : ZSTD_e_continue;
|
||||
/* Set the input buffer to what we just read.
|
||||
* We compress until the input buffer is empty, each time flushing the
|
||||
* output.
|
||||
*/
|
||||
ZSTD_inBuffer input = { buffIn, read, 0 };
|
||||
int finished;
|
||||
do {
|
||||
/* Compress into the output buffer and write all of the output to
|
||||
* the file so we can reuse the buffer next iteration.
|
||||
*/
|
||||
ZSTD_outBuffer output = { buffOut, buffOutSize, 0 };
|
||||
size_t const remaining = ZSTD_compressStream2(cctx, &output , &input, mode);
|
||||
CHECK_ZSTD(remaining);
|
||||
fwrite_orDie(buffOut, output.pos, fout);
|
||||
/* If we're on the last chunk we're finished when zstd returns 0,
|
||||
* which means its consumed all the input AND finished the frame.
|
||||
* Otherwise, we're finished when we've consumed all the input.
|
||||
*/
|
||||
finished = lastChunk ? (remaining == 0) : (input.pos == input.size);
|
||||
} while (!finished);
|
||||
CHECK(input.pos == input.size,
|
||||
"Impossible: zstd only returns 0 when the input is completely consumed!");
|
||||
}
|
||||
|
||||
ZSTD_freeCCtx(cctx);
|
||||
fclose_orDie(fout);
|
||||
fclose_orDie(fin);
|
||||
free(buffIn);
|
||||
free(buffOut);
|
||||
}
|
||||
|
||||
|
||||
static char* createOutFilename_orDie(const char* filename)
|
||||
{
|
||||
size_t const inL = strlen(filename);
|
||||
size_t const outL = inL + 5;
|
||||
void* const outSpace = malloc_orDie(outL);
|
||||
memset(outSpace, 0, outL);
|
||||
strcat(outSpace, filename);
|
||||
strcat(outSpace, ".zst");
|
||||
return (char*)outSpace;
|
||||
}
|
||||
|
||||
int main(int argc, const char** argv)
|
||||
{
|
||||
const char* const exeName = argv[0];
|
||||
|
||||
if (argc!=2) {
|
||||
printf("wrong arguments\n");
|
||||
printf("usage:\n");
|
||||
printf("%s FILE\n", exeName);
|
||||
return 1;
|
||||
}
|
||||
|
||||
const char* const inFilename = argv[1];
|
||||
|
||||
char* const outFilename = createOutFilename_orDie(inFilename);
|
||||
compressFile_orDie(inFilename, outFilename, 1);
|
||||
|
||||
free(outFilename); /* not strictly required, since program execution stops there,
|
||||
* but some static analyzer main complain otherwise */
|
||||
return 0;
|
||||
}
|
82
examples/streaming_decompression.c
Normal file
82
examples/streaming_decompression.c
Normal file
@ -0,0 +1,82 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdio.h> // fprintf
|
||||
#include <stdlib.h> // free
|
||||
#include <zstd.h> // presumes zstd library is installed
|
||||
#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD()
|
||||
|
||||
static void decompressFile_orDie(const char* fname)
|
||||
{
|
||||
FILE* const fin = fopen_orDie(fname, "rb");
|
||||
size_t const buffInSize = ZSTD_DStreamInSize();
|
||||
void* const buffIn = malloc_orDie(buffInSize);
|
||||
FILE* const fout = stdout;
|
||||
size_t const buffOutSize = ZSTD_DStreamOutSize(); /* Guarantee to successfully flush at least one complete compressed block in all circumstances. */
|
||||
void* const buffOut = malloc_orDie(buffOutSize);
|
||||
|
||||
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
|
||||
CHECK(dctx != NULL, "ZSTD_createDCtx() failed!");
|
||||
|
||||
/* This loop assumes that the input file is one or more concatenated zstd
|
||||
* streams. This example won't work if there is trailing non-zstd data at
|
||||
* the end, but streaming decompression in general handles this case.
|
||||
* ZSTD_decompressStream() returns 0 exactly when the frame is completed,
|
||||
* and doesn't consume input after the frame.
|
||||
*/
|
||||
size_t const toRead = buffInSize;
|
||||
size_t read;
|
||||
while ( (read = fread_orDie(buffIn, toRead, fin)) ) {
|
||||
ZSTD_inBuffer input = { buffIn, read, 0 };
|
||||
/* Given a valid frame, zstd won't consume the last byte of the frame
|
||||
* until it has flushed all of the decompressed data of the frame.
|
||||
* Therefore, instead of checking if the return code is 0, we can
|
||||
* decompress just check if input.pos < input.size.
|
||||
*/
|
||||
while (input.pos < input.size) {
|
||||
ZSTD_outBuffer output = { buffOut, buffOutSize, 0 };
|
||||
/* The return code is zero if the frame is complete, but there may
|
||||
* be multiple frames concatenated together. Zstd will automatically
|
||||
* reset the context when a frame is complete. Still, calling
|
||||
* ZSTD_DCtx_reset() can be useful to reset the context to a clean
|
||||
* state, for instance if the last decompression call returned an
|
||||
* error.
|
||||
*/
|
||||
size_t const ret = ZSTD_decompressStream(dctx, &output , &input);
|
||||
CHECK_ZSTD(ret);
|
||||
fwrite_orDie(buffOut, output.pos, fout);
|
||||
}
|
||||
}
|
||||
|
||||
ZSTD_freeDCtx(dctx);
|
||||
fclose_orDie(fin);
|
||||
fclose_orDie(fout);
|
||||
free(buffIn);
|
||||
free(buffOut);
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, const char** argv)
|
||||
{
|
||||
const char* const exeName = argv[0];
|
||||
|
||||
if (argc!=2) {
|
||||
fprintf(stderr, "wrong arguments\n");
|
||||
fprintf(stderr, "usage:\n");
|
||||
fprintf(stderr, "%s FILE\n", exeName);
|
||||
return 1;
|
||||
}
|
||||
|
||||
const char* const inFilename = argv[1];
|
||||
|
||||
decompressFile_orDie(inFilename);
|
||||
return 0;
|
||||
}
|
137
examples/streaming_memory_usage.c
Normal file
137
examples/streaming_memory_usage.c
Normal file
@ -0,0 +1,137 @@
|
||||
/*
|
||||
* Copyright (c) 2017-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
|
||||
/*=== Tuning parameter ===*/
|
||||
#ifndef MAX_TESTED_LEVEL
|
||||
#define MAX_TESTED_LEVEL 12
|
||||
#endif
|
||||
|
||||
|
||||
/*=== Dependencies ===*/
|
||||
#include <stdio.h> // printf
|
||||
#define ZSTD_STATIC_LINKING_ONLY
|
||||
#include <zstd.h> // presumes zstd library is installed
|
||||
#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD()
|
||||
|
||||
|
||||
/*=== functions ===*/
|
||||
|
||||
/*! readU32FromChar() :
|
||||
@return : unsigned integer value read from input in `char` format
|
||||
allows and interprets K, KB, KiB, M, MB and MiB suffix.
|
||||
Will also modify `*stringPtr`, advancing it to position where it stopped reading.
|
||||
Note : function result can overflow if digit string > MAX_UINT */
|
||||
static unsigned readU32FromChar(const char** stringPtr)
|
||||
{
|
||||
unsigned result = 0;
|
||||
while ((**stringPtr >='0') && (**stringPtr <='9'))
|
||||
result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
|
||||
if ((**stringPtr=='K') || (**stringPtr=='M')) {
|
||||
result <<= 10;
|
||||
if (**stringPtr=='M') result <<= 10;
|
||||
(*stringPtr)++ ;
|
||||
if (**stringPtr=='i') (*stringPtr)++;
|
||||
if (**stringPtr=='B') (*stringPtr)++;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char const *argv[]) {
|
||||
|
||||
printf("\n Zstandard (v%s) memory usage for streaming : \n\n", ZSTD_versionString());
|
||||
|
||||
unsigned wLog = 0;
|
||||
if (argc > 1) {
|
||||
const char* valStr = argv[1];
|
||||
wLog = readU32FromChar(&valStr);
|
||||
}
|
||||
|
||||
int compressionLevel;
|
||||
for (compressionLevel = 1; compressionLevel <= MAX_TESTED_LEVEL; compressionLevel++) {
|
||||
#define INPUT_SIZE 5
|
||||
#define COMPRESSED_SIZE 128
|
||||
char const dataToCompress[INPUT_SIZE] = "abcde";
|
||||
char compressedData[COMPRESSED_SIZE];
|
||||
char decompressedData[INPUT_SIZE];
|
||||
/* the ZSTD_CCtx_params structure is a way to save parameters and use
|
||||
* them across multiple contexts. We use them here so we can call the
|
||||
* function ZSTD_estimateCStreamSize_usingCCtxParams().
|
||||
*/
|
||||
ZSTD_CCtx_params* const cctxParams = ZSTD_createCCtxParams();
|
||||
CHECK(cctxParams != NULL, "ZSTD_createCCtxParams() failed!");
|
||||
|
||||
/* Set the compression level. */
|
||||
CHECK_ZSTD( ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_compressionLevel, compressionLevel) );
|
||||
/* Set the window log.
|
||||
* The value 0 means use the default window log, which is equivalent to
|
||||
* not setting it.
|
||||
*/
|
||||
CHECK_ZSTD( ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_windowLog, wLog) );
|
||||
|
||||
/* Force the compressor to allocate the maximum memory size for a given
|
||||
* level by not providing the pledged source size, or calling
|
||||
* ZSTD_compressStream2() with ZSTD_e_end.
|
||||
*/
|
||||
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
|
||||
CHECK(cctx != NULL, "ZSTD_createCCtx() failed!");
|
||||
CHECK_ZSTD( ZSTD_CCtx_setParametersUsingCCtxParams(cctx, cctxParams) );
|
||||
size_t compressedSize;
|
||||
{
|
||||
ZSTD_inBuffer inBuff = { dataToCompress, sizeof(dataToCompress), 0 };
|
||||
ZSTD_outBuffer outBuff = { compressedData, sizeof(compressedData), 0 };
|
||||
CHECK_ZSTD( ZSTD_compressStream(cctx, &outBuff, &inBuff) );
|
||||
size_t const remaining = ZSTD_endStream(cctx, &outBuff);
|
||||
CHECK_ZSTD(remaining);
|
||||
CHECK(remaining == 0, "Frame not flushed!");
|
||||
compressedSize = outBuff.pos;
|
||||
}
|
||||
|
||||
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
|
||||
CHECK(dctx != NULL, "ZSTD_createDCtx() failed!");
|
||||
/* Set the maximum allowed window log.
|
||||
* The value 0 means use the default window log, which is equivalent to
|
||||
* not setting it.
|
||||
*/
|
||||
CHECK_ZSTD( ZSTD_DCtx_setParameter(dctx, ZSTD_d_windowLogMax, wLog) );
|
||||
/* forces decompressor to use maximum memory size, since the
|
||||
* decompressed size is not stored in the frame header.
|
||||
*/
|
||||
{ ZSTD_inBuffer inBuff = { compressedData, compressedSize, 0 };
|
||||
ZSTD_outBuffer outBuff = { decompressedData, sizeof(decompressedData), 0 };
|
||||
size_t const remaining = ZSTD_decompressStream(dctx, &outBuff, &inBuff);
|
||||
CHECK_ZSTD(remaining);
|
||||
CHECK(remaining == 0, "Frame not complete!");
|
||||
CHECK(outBuff.pos == sizeof(dataToCompress), "Bad decompression!");
|
||||
}
|
||||
|
||||
size_t const cstreamSize = ZSTD_sizeof_CStream(cctx);
|
||||
size_t const cstreamEstimatedSize = ZSTD_estimateCStreamSize_usingCCtxParams(cctxParams);
|
||||
size_t const dstreamSize = ZSTD_sizeof_DStream(dctx);
|
||||
size_t const dstreamEstimatedSize = ZSTD_estimateDStreamSize_fromFrame(compressedData, compressedSize);
|
||||
|
||||
CHECK(cstreamSize <= cstreamEstimatedSize, "Compression mem (%u) > estimated (%u)",
|
||||
(unsigned)cstreamSize, (unsigned)cstreamEstimatedSize);
|
||||
CHECK(dstreamSize <= dstreamEstimatedSize, "Decompression mem (%u) > estimated (%u)",
|
||||
(unsigned)dstreamSize, (unsigned)dstreamEstimatedSize);
|
||||
|
||||
printf("Level %2i : Compression Mem = %5u KB (estimated : %5u KB) ; Decompression Mem = %4u KB (estimated : %5u KB)\n",
|
||||
compressionLevel,
|
||||
(unsigned)(cstreamSize>>10), (unsigned)(cstreamEstimatedSize>>10),
|
||||
(unsigned)(dstreamSize>>10), (unsigned)(dstreamEstimatedSize>>10));
|
||||
|
||||
ZSTD_freeDCtx(dctx);
|
||||
ZSTD_freeCCtx(cctx);
|
||||
ZSTD_freeCCtxParams(cctxParams);
|
||||
if (wLog) break; /* single test */
|
||||
}
|
||||
return 0;
|
||||
}
|
10
lib/Makefile
10
lib/Makefile
@ -25,7 +25,7 @@ endif
|
||||
CFLAGS ?= -O3
|
||||
DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
|
||||
-Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
|
||||
-Wstrict-prototypes -Wundef -Wpointer-arith -Wformat-security \
|
||||
-Wstrict-prototypes -Wundef -Wpointer-arith \
|
||||
-Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
|
||||
-Wredundant-decls -Wmissing-prototypes -Wc++-compat
|
||||
CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS)
|
||||
@ -56,6 +56,7 @@ ZSTD_FORCE_DECOMPRESS_SHORT ?= 0
|
||||
ZSTD_FORCE_DECOMPRESS_LONG ?= 0
|
||||
ZSTD_NO_INLINE ?= 0
|
||||
ZSTD_STRIP_ERROR_STRINGS ?= 0
|
||||
ZSTD_LEGACY_MULTITHREADED_API ?= 0
|
||||
|
||||
ifeq ($(ZSTD_LIB_COMPRESSION), 0)
|
||||
ZSTD_LIB_DICTBUILDER = 0
|
||||
@ -107,6 +108,10 @@ ifneq ($(ZSTD_STRIP_ERROR_STRINGS), 0)
|
||||
CFLAGS += -DZSTD_STRIP_ERROR_STRINGS
|
||||
endif
|
||||
|
||||
ifneq ($(ZSTD_LEGACY_MULTITHREADED_API), 0)
|
||||
CFLAGS += -DZSTD_LEGACY_MULTITHREADED_API
|
||||
endif
|
||||
|
||||
ifneq ($(ZSTD_LEGACY_SUPPORT), 0)
|
||||
ifeq ($(shell test $(ZSTD_LEGACY_SUPPORT) -lt 8; echo $$?), 0)
|
||||
ZSTD_FILES += $(shell ls legacy/*.c | $(GREP) 'v0[$(ZSTD_LEGACY_SUPPORT)-7]')
|
||||
@ -151,8 +156,7 @@ ifneq (,$(filter Windows%,$(OS)))
|
||||
LIBZSTD = dll\libzstd.dll
|
||||
$(LIBZSTD): $(ZSTD_FILES)
|
||||
@echo compiling dynamic library $(LIBVER)
|
||||
@$(CC) $(FLAGS) -DZSTD_DLL_EXPORT=1 -shared $^ -o $@
|
||||
dlltool -D $@ -d dll\libzstd.def -l dll\libzstd.lib
|
||||
$(CC) $(FLAGS) -DZSTD_DLL_EXPORT=1 -Wl,--out-implib,dll\libzstd.lib -shared $^ -o $@
|
||||
|
||||
else
|
||||
|
||||
|
@ -31,8 +31,6 @@ note that it's necessary to request the `-pthread` flag during link stage.
|
||||
|
||||
Multithreading capabilities are exposed
|
||||
via the [advanced API defined in `lib/zstd.h`](https://github.com/facebook/zstd/blob/v1.3.8/lib/zstd.h#L592).
|
||||
This API is still labelled experimental,
|
||||
but is expected to become "stable" in the near future.
|
||||
|
||||
|
||||
#### API
|
||||
@ -110,6 +108,10 @@ The file structure is designed to make this selection manually achievable for an
|
||||
which removes the error messages that are otherwise returned by
|
||||
`ZSTD_getErrorName`.
|
||||
|
||||
- While invoking `make libzstd`, the build macro `ZSTD_LEGACY_MULTITHREADED_API=1`
|
||||
will expose the deprecated `ZSTDMT` API exposed by `zstdmt_compress.h` in
|
||||
the shared library, which is now hidden by default.
|
||||
|
||||
|
||||
#### Windows : using MinGW+MSYS to create DLL
|
||||
|
||||
|
@ -40,7 +40,7 @@
|
||||
|
||||
/**
|
||||
* FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
|
||||
* parameters. They must be inlined for the compiler to elimininate the constant
|
||||
* parameters. They must be inlined for the compiler to eliminate the constant
|
||||
* branches.
|
||||
*/
|
||||
#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
|
||||
|
@ -358,7 +358,7 @@ size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size
|
||||
typedef enum {
|
||||
FSE_repeat_none, /**< Cannot use the previous table */
|
||||
FSE_repeat_check, /**< Can use the previous table but it must be checked */
|
||||
FSE_repeat_valid /**< Can use the previous table and it is asumed to be valid */
|
||||
FSE_repeat_valid /**< Can use the previous table and it is assumed to be valid */
|
||||
} FSE_repeat;
|
||||
|
||||
/* *****************************************
|
||||
|
@ -14,8 +14,8 @@
|
||||
* This file will hold wrapper for systems, which do not support pthreads
|
||||
*/
|
||||
|
||||
/* create fake symbol to avoid empty trnaslation unit warning */
|
||||
int g_ZSTD_threading_useles_symbol;
|
||||
/* create fake symbol to avoid empty translation unit warning */
|
||||
int g_ZSTD_threading_useless_symbol;
|
||||
|
||||
#if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
|
||||
|
||||
|
@ -66,10 +66,10 @@
|
||||
/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */
|
||||
|
||||
/*!XXH_FORCE_NATIVE_FORMAT :
|
||||
* By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
|
||||
* By default, xxHash library provides endian-independent Hash values, based on little-endian convention.
|
||||
* Results are therefore identical for little-endian and big-endian CPU.
|
||||
* This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
|
||||
* Should endian-independance be of no importance for your application, you may set the #define below to 1,
|
||||
* Should endian-independence be of no importance for your application, you may set the #define below to 1,
|
||||
* to improve speed for Big-endian CPU.
|
||||
* This option has no impact on Little_Endian CPU.
|
||||
*/
|
||||
|
@ -53,8 +53,50 @@ extern "C" {
|
||||
#undef MAX
|
||||
#define MIN(a,b) ((a)<(b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a)>(b) ? (a) : (b))
|
||||
#define CHECK_F(f) { size_t const errcod = f; if (ERR_isError(errcod)) return errcod; } /* check and Forward error code */
|
||||
#define CHECK_E(f, e) { size_t const errcod = f; if (ERR_isError(errcod)) return ERROR(e); } /* check and send Error code */
|
||||
|
||||
/**
|
||||
* Return the specified error if the condition evaluates to true.
|
||||
*
|
||||
* In debug modes, prints additional information. In order to do that
|
||||
* (particularly, printing the conditional that failed), this can't just wrap
|
||||
* RETURN_ERROR().
|
||||
*/
|
||||
#define RETURN_ERROR_IF(cond, err, ...) \
|
||||
if (cond) { \
|
||||
RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \
|
||||
RAWLOG(3, ": " __VA_ARGS__); \
|
||||
RAWLOG(3, "\n"); \
|
||||
return ERROR(err); \
|
||||
}
|
||||
|
||||
/**
|
||||
* Unconditionally return the specified error.
|
||||
*
|
||||
* In debug modes, prints additional information.
|
||||
*/
|
||||
#define RETURN_ERROR(err, ...) \
|
||||
do { \
|
||||
RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \
|
||||
RAWLOG(3, ": " __VA_ARGS__); \
|
||||
RAWLOG(3, "\n"); \
|
||||
return ERROR(err); \
|
||||
} while(0);
|
||||
|
||||
/**
|
||||
* If the provided expression evaluates to an error code, returns that error code.
|
||||
*
|
||||
* In debug modes, prints additional information.
|
||||
*/
|
||||
#define FORWARD_IF_ERROR(err, ...) \
|
||||
do { \
|
||||
size_t const err_code = (err); \
|
||||
if (ERR_isError(err_code)) { \
|
||||
RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \
|
||||
RAWLOG(3, ": " __VA_ARGS__); \
|
||||
RAWLOG(3, "\n"); \
|
||||
return err_code; \
|
||||
} \
|
||||
} while(0);
|
||||
|
||||
|
||||
/*-*************************************
|
||||
@ -200,6 +242,17 @@ typedef struct {
|
||||
U32 longLengthPos;
|
||||
} seqStore_t;
|
||||
|
||||
/**
|
||||
* Contains the compressed frame size and an upper-bound for the decompressed frame size.
|
||||
* Note: before using `compressedSize`, check for errors using ZSTD_isError().
|
||||
* similarly, before using `decompressedBound`, check for errors using:
|
||||
* `decompressedBound != ZSTD_CONTENTSIZE_ERROR`
|
||||
*/
|
||||
typedef struct {
|
||||
size_t compressedSize;
|
||||
unsigned long long decompressedBound;
|
||||
} ZSTD_frameSizeInfo; /* decompress & legacy */
|
||||
|
||||
const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */
|
||||
void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
|
||||
|
||||
|
@ -129,9 +129,9 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
||||
{ U32 position = 0;
|
||||
U32 symbol;
|
||||
for (symbol=0; symbol<=maxSymbolValue; symbol++) {
|
||||
int nbOccurences;
|
||||
int nbOccurrences;
|
||||
int const freq = normalizedCounter[symbol];
|
||||
for (nbOccurences=0; nbOccurences<freq; nbOccurences++) {
|
||||
for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
|
||||
tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
|
||||
position = (position + step) & tableMask;
|
||||
while (position > highThreshold)
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -36,9 +36,9 @@ extern "C" {
|
||||
#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 now means "unsorted".
|
||||
It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
|
||||
It's not a big deal though : candidate will just be sorted again.
|
||||
Additionnally, candidate position 1 will be lost.
|
||||
Additionally, candidate position 1 will be lost.
|
||||
But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
|
||||
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be misdhandled after table re-use with a different strategy
|
||||
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy
|
||||
Constant required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
|
||||
|
||||
|
||||
@ -54,6 +54,14 @@ typedef struct ZSTD_prefixDict_s {
|
||||
ZSTD_dictContentType_e dictContentType;
|
||||
} ZSTD_prefixDict;
|
||||
|
||||
typedef struct {
|
||||
void* dictBuffer;
|
||||
void const* dict;
|
||||
size_t dictSize;
|
||||
ZSTD_dictContentType_e dictContentType;
|
||||
ZSTD_CDict* cdict;
|
||||
} ZSTD_localDict;
|
||||
|
||||
typedef struct {
|
||||
U32 CTable[HUF_CTABLE_SIZE_U32(255)];
|
||||
HUF_repeat repeatMode;
|
||||
@ -107,6 +115,7 @@ typedef struct {
|
||||
U32 offCodeSumBasePrice; /* to compare to log2(offreq) */
|
||||
ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */
|
||||
const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */
|
||||
ZSTD_literalCompressionMode_e literalCompressionMode;
|
||||
} optState_t;
|
||||
|
||||
typedef struct {
|
||||
@ -188,6 +197,7 @@ struct ZSTD_CCtx_params_s {
|
||||
* 1<<wLog, even for dictionary */
|
||||
|
||||
ZSTD_dictAttachPref_e attachDictPref;
|
||||
ZSTD_literalCompressionMode_e literalCompressionMode;
|
||||
|
||||
/* Multithreading: used to pass parameters to mtctx */
|
||||
int nbWorkers;
|
||||
@ -243,7 +253,7 @@ struct ZSTD_CCtx_s {
|
||||
U32 frameEnded;
|
||||
|
||||
/* Dictionary */
|
||||
ZSTD_CDict* cdictLocal;
|
||||
ZSTD_localDict localDict;
|
||||
const ZSTD_CDict* cdict;
|
||||
ZSTD_prefixDict prefixDict; /* single-usage dictionary */
|
||||
|
||||
@ -806,13 +816,6 @@ size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
|
||||
|
||||
void ZSTD_resetSeqStore(seqStore_t* ssPtr);
|
||||
|
||||
/*! ZSTD_compressStream_generic() :
|
||||
* Private use only. To be called from zstdmt_compress.c in single-thread mode. */
|
||||
size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
||||
ZSTD_outBuffer* output,
|
||||
ZSTD_inBuffer* input,
|
||||
ZSTD_EndDirective const flushMode);
|
||||
|
||||
/*! ZSTD_getCParamsFromCDict() :
|
||||
* as the name implies */
|
||||
ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
|
||||
@ -839,7 +842,7 @@ size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
|
||||
/* ZSTD_writeLastEmptyBlock() :
|
||||
* output an empty Block with end-of-frame mark to complete a frame
|
||||
* @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
|
||||
* or an error code if `dstCapcity` is too small (<ZSTD_blockHeaderSize)
|
||||
* or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
|
||||
*/
|
||||
size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
|
||||
|
||||
|
@ -45,7 +45,155 @@ FORCE_INLINE_TEMPLATE
|
||||
size_t ZSTD_compressBlock_fast_generic(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize,
|
||||
U32 const mls, ZSTD_dictMode_e const dictMode)
|
||||
U32 const mls)
|
||||
{
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
U32* const hashTable = ms->hashTable;
|
||||
U32 const hlog = cParams->hashLog;
|
||||
/* support stepSize of 0 */
|
||||
size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1;
|
||||
const BYTE* const base = ms->window.base;
|
||||
const BYTE* const istart = (const BYTE*)src;
|
||||
/* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */
|
||||
const BYTE* ip0 = istart;
|
||||
const BYTE* ip1;
|
||||
const BYTE* anchor = istart;
|
||||
const U32 prefixStartIndex = ms->window.dictLimit;
|
||||
const BYTE* const prefixStart = base + prefixStartIndex;
|
||||
const BYTE* const iend = istart + srcSize;
|
||||
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
||||
U32 offset_1=rep[0], offset_2=rep[1];
|
||||
U32 offsetSaved = 0;
|
||||
|
||||
/* init */
|
||||
ip0 += (ip0 == prefixStart);
|
||||
ip1 = ip0 + 1;
|
||||
{
|
||||
U32 const maxRep = (U32)(ip0 - prefixStart);
|
||||
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
|
||||
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
|
||||
}
|
||||
|
||||
/* Main Search Loop */
|
||||
while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */
|
||||
size_t mLength;
|
||||
BYTE const* ip2 = ip0 + 2;
|
||||
size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls);
|
||||
U32 const val0 = MEM_read32(ip0);
|
||||
size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls);
|
||||
U32 const val1 = MEM_read32(ip1);
|
||||
U32 const current0 = (U32)(ip0-base);
|
||||
U32 const current1 = (U32)(ip1-base);
|
||||
U32 const matchIndex0 = hashTable[h0];
|
||||
U32 const matchIndex1 = hashTable[h1];
|
||||
BYTE const* repMatch = ip2-offset_1;
|
||||
const BYTE* match0 = base + matchIndex0;
|
||||
const BYTE* match1 = base + matchIndex1;
|
||||
U32 offcode;
|
||||
hashTable[h0] = current0; /* update hash table */
|
||||
hashTable[h1] = current1; /* update hash table */
|
||||
|
||||
assert(ip0 + 1 == ip1);
|
||||
|
||||
if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) {
|
||||
mLength = ip2[-1] == repMatch[-1] ? 1 : 0;
|
||||
ip0 = ip2 - mLength;
|
||||
match0 = repMatch - mLength;
|
||||
offcode = 0;
|
||||
goto _match;
|
||||
}
|
||||
if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) {
|
||||
/* found a regular match */
|
||||
goto _offset;
|
||||
}
|
||||
if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) {
|
||||
/* found a regular match after one literal */
|
||||
ip0 = ip1;
|
||||
match0 = match1;
|
||||
goto _offset;
|
||||
}
|
||||
{
|
||||
size_t const step = ((ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
|
||||
assert(step >= 2);
|
||||
ip0 += step;
|
||||
ip1 += step;
|
||||
continue;
|
||||
}
|
||||
_offset: /* Requires: ip0, match0 */
|
||||
/* Compute the offset code */
|
||||
offset_2 = offset_1;
|
||||
offset_1 = (U32)(ip0-match0);
|
||||
offcode = offset_1 + ZSTD_REP_MOVE;
|
||||
mLength = 0;
|
||||
/* Count the backwards match length */
|
||||
while (((ip0>anchor) & (match0>prefixStart))
|
||||
&& (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */
|
||||
|
||||
_match: /* Requires: ip0, match0, offcode */
|
||||
/* Count the forward length */
|
||||
mLength += ZSTD_count(ip0+mLength+4, match0+mLength+4, iend) + 4;
|
||||
ZSTD_storeSeq(seqStore, ip0-anchor, anchor, offcode, mLength-MINMATCH);
|
||||
/* match found */
|
||||
ip0 += mLength;
|
||||
anchor = ip0;
|
||||
ip1 = ip0 + 1;
|
||||
|
||||
if (ip0 <= ilimit) {
|
||||
/* Fill Table */
|
||||
assert(base+current0+2 > istart); /* check base overflow */
|
||||
hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */
|
||||
hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
|
||||
|
||||
while ( (ip0 <= ilimit)
|
||||
&& ( (offset_2>0)
|
||||
& (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) )) {
|
||||
/* store sequence */
|
||||
size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
|
||||
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
|
||||
hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
|
||||
ip0 += rLength;
|
||||
ip1 = ip0 + 1;
|
||||
ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
|
||||
anchor = ip0;
|
||||
continue; /* faster when present (confirmed on gcc-8) ... (?) */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* save reps for next block */
|
||||
rep[0] = offset_1 ? offset_1 : offsetSaved;
|
||||
rep[1] = offset_2 ? offset_2 : offsetSaved;
|
||||
|
||||
/* Return the last literals size */
|
||||
return iend - anchor;
|
||||
}
|
||||
|
||||
|
||||
size_t ZSTD_compressBlock_fast(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize)
|
||||
{
|
||||
ZSTD_compressionParameters const* cParams = &ms->cParams;
|
||||
U32 const mls = cParams->minMatch;
|
||||
assert(ms->dictMatchState == NULL);
|
||||
switch(mls)
|
||||
{
|
||||
default: /* includes case 3 */
|
||||
case 4 :
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4);
|
||||
case 5 :
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5);
|
||||
case 6 :
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6);
|
||||
case 7 :
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7);
|
||||
}
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE
|
||||
size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize, U32 const mls)
|
||||
{
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
U32* const hashTable = ms->hashTable;
|
||||
@ -64,46 +212,26 @@ size_t ZSTD_compressBlock_fast_generic(
|
||||
U32 offsetSaved = 0;
|
||||
|
||||
const ZSTD_matchState_t* const dms = ms->dictMatchState;
|
||||
const ZSTD_compressionParameters* const dictCParams =
|
||||
dictMode == ZSTD_dictMatchState ?
|
||||
&dms->cParams : NULL;
|
||||
const U32* const dictHashTable = dictMode == ZSTD_dictMatchState ?
|
||||
dms->hashTable : NULL;
|
||||
const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ?
|
||||
dms->window.dictLimit : 0;
|
||||
const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
|
||||
dms->window.base : NULL;
|
||||
const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ?
|
||||
dictBase + dictStartIndex : NULL;
|
||||
const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
|
||||
dms->window.nextSrc : NULL;
|
||||
const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
|
||||
prefixStartIndex - (U32)(dictEnd - dictBase) :
|
||||
0;
|
||||
const ZSTD_compressionParameters* const dictCParams = &dms->cParams ;
|
||||
const U32* const dictHashTable = dms->hashTable;
|
||||
const U32 dictStartIndex = dms->window.dictLimit;
|
||||
const BYTE* const dictBase = dms->window.base;
|
||||
const BYTE* const dictStart = dictBase + dictStartIndex;
|
||||
const BYTE* const dictEnd = dms->window.nextSrc;
|
||||
const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase);
|
||||
const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart);
|
||||
const U32 dictHLog = dictMode == ZSTD_dictMatchState ?
|
||||
dictCParams->hashLog : hlog;
|
||||
|
||||
assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
|
||||
const U32 dictHLog = dictCParams->hashLog;
|
||||
|
||||
/* otherwise, we would get index underflow when translating a dict index
|
||||
* into a local index */
|
||||
assert(dictMode != ZSTD_dictMatchState
|
||||
|| prefixStartIndex >= (U32)(dictEnd - dictBase));
|
||||
assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
|
||||
|
||||
/* init */
|
||||
ip += (dictAndPrefixLength == 0);
|
||||
if (dictMode == ZSTD_noDict) {
|
||||
U32 const maxRep = (U32)(ip - prefixStart);
|
||||
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
|
||||
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
|
||||
}
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
/* dictMatchState repCode checks don't currently handle repCode == 0
|
||||
* disabling. */
|
||||
assert(offset_1 <= dictAndPrefixLength);
|
||||
assert(offset_2 <= dictAndPrefixLength);
|
||||
}
|
||||
/* dictMatchState repCode checks don't currently handle repCode == 0
|
||||
* disabling. */
|
||||
assert(offset_1 <= dictAndPrefixLength);
|
||||
assert(offset_2 <= dictAndPrefixLength);
|
||||
|
||||
/* Main Search Loop */
|
||||
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
|
||||
@ -113,50 +241,37 @@ size_t ZSTD_compressBlock_fast_generic(
|
||||
U32 const matchIndex = hashTable[h];
|
||||
const BYTE* match = base + matchIndex;
|
||||
const U32 repIndex = current + 1 - offset_1;
|
||||
const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
|
||||
&& repIndex < prefixStartIndex) ?
|
||||
const BYTE* repMatch = (repIndex < prefixStartIndex) ?
|
||||
dictBase + (repIndex - dictIndexDelta) :
|
||||
base + repIndex;
|
||||
hashTable[h] = current; /* update hash table */
|
||||
|
||||
if ( (dictMode == ZSTD_dictMatchState)
|
||||
&& ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
|
||||
if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
|
||||
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
||||
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
||||
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
|
||||
ip++;
|
||||
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
|
||||
} else if ( dictMode == ZSTD_noDict
|
||||
&& ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
|
||||
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
|
||||
ip++;
|
||||
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
|
||||
} else if ( (matchIndex <= prefixStartIndex) ) {
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
|
||||
U32 const dictMatchIndex = dictHashTable[dictHash];
|
||||
const BYTE* dictMatch = dictBase + dictMatchIndex;
|
||||
if (dictMatchIndex <= dictStartIndex ||
|
||||
MEM_read32(dictMatch) != MEM_read32(ip)) {
|
||||
assert(stepSize >= 1);
|
||||
ip += ((ip-anchor) >> kSearchStrength) + stepSize;
|
||||
continue;
|
||||
} else {
|
||||
/* found a dict match */
|
||||
U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta);
|
||||
mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
|
||||
while (((ip>anchor) & (dictMatch>dictStart))
|
||||
&& (ip[-1] == dictMatch[-1])) {
|
||||
ip--; dictMatch--; mLength++;
|
||||
} /* catch up */
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
}
|
||||
} else {
|
||||
size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
|
||||
U32 const dictMatchIndex = dictHashTable[dictHash];
|
||||
const BYTE* dictMatch = dictBase + dictMatchIndex;
|
||||
if (dictMatchIndex <= dictStartIndex ||
|
||||
MEM_read32(dictMatch) != MEM_read32(ip)) {
|
||||
assert(stepSize >= 1);
|
||||
ip += ((ip-anchor) >> kSearchStrength) + stepSize;
|
||||
continue;
|
||||
} else {
|
||||
/* found a dict match */
|
||||
U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta);
|
||||
mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
|
||||
while (((ip>anchor) & (dictMatch>dictStart))
|
||||
&& (ip[-1] == dictMatch[-1])) {
|
||||
ip--; dictMatch--; mLength++;
|
||||
} /* catch up */
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
}
|
||||
} else if (MEM_read32(match) != MEM_read32(ip)) {
|
||||
/* it's not a match, and we're not going to check the dictionary */
|
||||
@ -185,41 +300,27 @@ size_t ZSTD_compressBlock_fast_generic(
|
||||
hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
|
||||
|
||||
/* check immediate repcode */
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
while (ip <= ilimit) {
|
||||
U32 const current2 = (U32)(ip-base);
|
||||
U32 const repIndex2 = current2 - offset_2;
|
||||
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
|
||||
dictBase - dictIndexDelta + repIndex2 :
|
||||
base + repIndex2;
|
||||
if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
|
||||
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
||||
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
|
||||
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
|
||||
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
||||
ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
|
||||
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
|
||||
ip += repLength2;
|
||||
anchor = ip;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (dictMode == ZSTD_noDict) {
|
||||
while ( (ip <= ilimit)
|
||||
&& ( (offset_2>0)
|
||||
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
|
||||
/* store sequence */
|
||||
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
|
||||
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
|
||||
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = (U32)(ip-base);
|
||||
ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
|
||||
ip += rLength;
|
||||
while (ip <= ilimit) {
|
||||
U32 const current2 = (U32)(ip-base);
|
||||
U32 const repIndex2 = current2 - offset_2;
|
||||
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
|
||||
dictBase - dictIndexDelta + repIndex2 :
|
||||
base + repIndex2;
|
||||
if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
|
||||
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
||||
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
|
||||
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
|
||||
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
||||
ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
|
||||
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
|
||||
ip += repLength2;
|
||||
anchor = ip;
|
||||
continue; /* faster when present ... (?) */
|
||||
} } } }
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* save reps for next block */
|
||||
rep[0] = offset_1 ? offset_1 : offsetSaved;
|
||||
@ -229,28 +330,6 @@ size_t ZSTD_compressBlock_fast_generic(
|
||||
return iend - anchor;
|
||||
}
|
||||
|
||||
|
||||
size_t ZSTD_compressBlock_fast(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize)
|
||||
{
|
||||
ZSTD_compressionParameters const* cParams = &ms->cParams;
|
||||
U32 const mls = cParams->minMatch;
|
||||
assert(ms->dictMatchState == NULL);
|
||||
switch(mls)
|
||||
{
|
||||
default: /* includes case 3 */
|
||||
case 4 :
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict);
|
||||
case 5 :
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict);
|
||||
case 6 :
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict);
|
||||
case 7 :
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict);
|
||||
}
|
||||
}
|
||||
|
||||
size_t ZSTD_compressBlock_fast_dictMatchState(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize)
|
||||
@ -262,13 +341,13 @@ size_t ZSTD_compressBlock_fast_dictMatchState(
|
||||
{
|
||||
default: /* includes case 3 */
|
||||
case 4 :
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState);
|
||||
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4);
|
||||
case 5 :
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState);
|
||||
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5);
|
||||
case 6 :
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState);
|
||||
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6);
|
||||
case 7 :
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState);
|
||||
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -19,7 +19,7 @@ extern "C" {
|
||||
|
||||
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
|
||||
|
||||
void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). pre-emptively increase value of ZSTD_DUBT_UNSORTED_MARK */
|
||||
void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */
|
||||
|
||||
size_t ZSTD_compressBlock_btlazy2(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
|
@ -429,7 +429,7 @@ size_t ZSTD_ldm_generateSequences(
|
||||
*/
|
||||
assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);
|
||||
/* The input could be very large (in zstdmt), so it must be broken up into
|
||||
* chunks to enforce the maximmum distance and handle overflow correction.
|
||||
* chunks to enforce the maximum distance and handle overflow correction.
|
||||
*/
|
||||
assert(sequences->pos <= sequences->size);
|
||||
assert(sequences->size <= sequences->capacity);
|
||||
|
@ -64,9 +64,15 @@ MEM_STATIC double ZSTD_fCost(U32 price)
|
||||
}
|
||||
#endif
|
||||
|
||||
static int ZSTD_compressedLiterals(optState_t const* const optPtr)
|
||||
{
|
||||
return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed;
|
||||
}
|
||||
|
||||
static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
|
||||
{
|
||||
optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
|
||||
if (ZSTD_compressedLiterals(optPtr))
|
||||
optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
|
||||
optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
|
||||
optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
|
||||
optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
|
||||
@ -99,6 +105,7 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
||||
const BYTE* const src, size_t const srcSize,
|
||||
int const optLevel)
|
||||
{
|
||||
int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
|
||||
DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
|
||||
optPtr->priceType = zop_dynamic;
|
||||
|
||||
@ -113,9 +120,10 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
||||
/* huffman table presumed generated by dictionary */
|
||||
optPtr->priceType = zop_dynamic;
|
||||
|
||||
assert(optPtr->litFreq != NULL);
|
||||
optPtr->litSum = 0;
|
||||
{ unsigned lit;
|
||||
if (compressedLiterals) {
|
||||
unsigned lit;
|
||||
assert(optPtr->litFreq != NULL);
|
||||
optPtr->litSum = 0;
|
||||
for (lit=0; lit<=MaxLit; lit++) {
|
||||
U32 const scaleLog = 11; /* scale to 2K */
|
||||
U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
|
||||
@ -163,10 +171,11 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
||||
} else { /* not a dictionary */
|
||||
|
||||
assert(optPtr->litFreq != NULL);
|
||||
{ unsigned lit = MaxLit;
|
||||
if (compressedLiterals) {
|
||||
unsigned lit = MaxLit;
|
||||
HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
|
||||
optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
|
||||
}
|
||||
optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
|
||||
|
||||
{ unsigned ll;
|
||||
for (ll=0; ll<=MaxLL; ll++)
|
||||
@ -190,7 +199,8 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
||||
|
||||
} else { /* new block : re-use previous statistics, scaled down */
|
||||
|
||||
optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
|
||||
if (compressedLiterals)
|
||||
optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
|
||||
optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0);
|
||||
optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0);
|
||||
optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0);
|
||||
@ -207,6 +217,10 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
|
||||
int optLevel)
|
||||
{
|
||||
if (litLength == 0) return 0;
|
||||
|
||||
if (!ZSTD_compressedLiterals(optPtr))
|
||||
return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */
|
||||
|
||||
if (optPtr->priceType == zop_predef)
|
||||
return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */
|
||||
|
||||
@ -310,7 +324,8 @@ static void ZSTD_updateStats(optState_t* const optPtr,
|
||||
U32 offsetCode, U32 matchLength)
|
||||
{
|
||||
/* literals */
|
||||
{ U32 u;
|
||||
if (ZSTD_compressedLiterals(optPtr)) {
|
||||
U32 u;
|
||||
for (u=0; u < litLength; u++)
|
||||
optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
|
||||
optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
|
||||
@ -870,7 +885,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
||||
/* large match -> immediate encoding */
|
||||
{ U32 const maxML = matches[nbMatches-1].len;
|
||||
U32 const maxOffset = matches[nbMatches-1].off;
|
||||
DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new serie",
|
||||
DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
|
||||
nbMatches, maxML, maxOffset, (U32)(ip-prefixStart));
|
||||
|
||||
if (maxML > sufficient_len) {
|
||||
@ -1108,7 +1123,8 @@ static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
|
||||
/* used in 2-pass strategy */
|
||||
MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
|
||||
{
|
||||
optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
|
||||
if (ZSTD_compressedLiterals(optPtr))
|
||||
optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
|
||||
optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
|
||||
optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
|
||||
optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
|
||||
@ -1117,7 +1133,7 @@ MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
|
||||
/* ZSTD_initStats_ultra():
|
||||
* make a first compression pass, just to seed stats with more accurate starting values.
|
||||
* only works on first block, with no dictionary and no ldm.
|
||||
* this function cannot error, hence its constract must be respected.
|
||||
* this function cannot error, hence its contract must be respected.
|
||||
*/
|
||||
static void
|
||||
ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
|
||||
|
@ -22,6 +22,7 @@
|
||||
/* ====== Dependencies ====== */
|
||||
#include <string.h> /* memcpy, memset */
|
||||
#include <limits.h> /* INT_MAX, UINT_MAX */
|
||||
#include "mem.h" /* MEM_STATIC */
|
||||
#include "pool.h" /* threadpool */
|
||||
#include "threading.h" /* mutex */
|
||||
#include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
|
||||
@ -456,7 +457,7 @@ typedef struct {
|
||||
* Must be acquired after the main mutex when acquiring both.
|
||||
*/
|
||||
ZSTD_pthread_mutex_t ldmWindowMutex;
|
||||
ZSTD_pthread_cond_t ldmWindowCond; /* Signaled when ldmWindow is udpated */
|
||||
ZSTD_pthread_cond_t ldmWindowCond; /* Signaled when ldmWindow is updated */
|
||||
ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */
|
||||
} serialState_t;
|
||||
|
||||
@ -647,7 +648,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
||||
buffer_t dstBuff = job->dstBuff;
|
||||
size_t lastCBlockSize = 0;
|
||||
|
||||
/* ressources */
|
||||
/* resources */
|
||||
if (cctx==NULL) JOB_ERROR(ERROR(memory_allocation));
|
||||
if (dstBuff.start == NULL) { /* streaming job : doesn't provide a dstBuffer */
|
||||
dstBuff = ZSTDMT_getBuffer(job->bufPool);
|
||||
@ -672,7 +673,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
||||
if (ZSTD_isError(initError)) JOB_ERROR(initError);
|
||||
} else { /* srcStart points at reloaded section */
|
||||
U64 const pledgedSrcSize = job->firstJob ? job->fullFrameSize : job->src.size;
|
||||
{ size_t const forceWindowError = ZSTD_CCtxParam_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob);
|
||||
{ size_t const forceWindowError = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob);
|
||||
if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError);
|
||||
}
|
||||
{ size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
|
||||
@ -864,14 +865,10 @@ static size_t ZSTDMT_expandJobsTable (ZSTDMT_CCtx* mtctx, U32 nbWorkers) {
|
||||
* Internal use only */
|
||||
size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers)
|
||||
{
|
||||
if (nbWorkers > ZSTDMT_NBWORKERS_MAX) nbWorkers = ZSTDMT_NBWORKERS_MAX;
|
||||
params->nbWorkers = nbWorkers;
|
||||
params->overlapLog = ZSTDMT_OVERLAPLOG_DEFAULT;
|
||||
params->jobSize = 0;
|
||||
return nbWorkers;
|
||||
return ZSTD_CCtxParams_setParameter(params, ZSTD_c_nbWorkers, (int)nbWorkers);
|
||||
}
|
||||
|
||||
ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem)
|
||||
MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers, ZSTD_customMem cMem)
|
||||
{
|
||||
ZSTDMT_CCtx* mtctx;
|
||||
U32 nbJobs = nbWorkers + 2;
|
||||
@ -906,6 +903,17 @@ ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem)
|
||||
return mtctx;
|
||||
}
|
||||
|
||||
ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem)
|
||||
{
|
||||
#ifdef ZSTD_MULTITHREAD
|
||||
return ZSTDMT_createCCtx_advanced_internal(nbWorkers, cMem);
|
||||
#else
|
||||
(void)nbWorkers;
|
||||
(void)cMem;
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers)
|
||||
{
|
||||
return ZSTDMT_createCCtx_advanced(nbWorkers, ZSTD_defaultCMem);
|
||||
@ -986,26 +994,13 @@ ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params,
|
||||
{
|
||||
case ZSTDMT_p_jobSize :
|
||||
DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %i", value);
|
||||
if ( value != 0 /* default */
|
||||
&& value < ZSTDMT_JOBSIZE_MIN)
|
||||
value = ZSTDMT_JOBSIZE_MIN;
|
||||
assert(value >= 0);
|
||||
if (value > ZSTDMT_JOBSIZE_MAX) value = ZSTDMT_JOBSIZE_MAX;
|
||||
params->jobSize = value;
|
||||
return value;
|
||||
|
||||
return ZSTD_CCtxParams_setParameter(params, ZSTD_c_jobSize, value);
|
||||
case ZSTDMT_p_overlapLog :
|
||||
DEBUGLOG(4, "ZSTDMT_p_overlapLog : %i", value);
|
||||
if (value < ZSTD_OVERLAPLOG_MIN) value = ZSTD_OVERLAPLOG_MIN;
|
||||
if (value > ZSTD_OVERLAPLOG_MAX) value = ZSTD_OVERLAPLOG_MAX;
|
||||
params->overlapLog = value;
|
||||
return value;
|
||||
|
||||
return ZSTD_CCtxParams_setParameter(params, ZSTD_c_overlapLog, value);
|
||||
case ZSTDMT_p_rsyncable :
|
||||
value = (value != 0);
|
||||
params->rsyncable = value;
|
||||
return value;
|
||||
|
||||
DEBUGLOG(4, "ZSTD_p_rsyncable : %i", value);
|
||||
return ZSTD_CCtxParams_setParameter(params, ZSTD_c_rsyncable, value);
|
||||
default :
|
||||
return ERROR(parameter_unsupported);
|
||||
}
|
||||
@ -1021,32 +1016,29 @@ size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter,
|
||||
{
|
||||
switch (parameter) {
|
||||
case ZSTDMT_p_jobSize:
|
||||
assert(mtctx->params.jobSize <= INT_MAX);
|
||||
*value = (int)(mtctx->params.jobSize);
|
||||
break;
|
||||
return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_jobSize, value);
|
||||
case ZSTDMT_p_overlapLog:
|
||||
*value = mtctx->params.overlapLog;
|
||||
break;
|
||||
return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_overlapLog, value);
|
||||
case ZSTDMT_p_rsyncable:
|
||||
*value = mtctx->params.rsyncable;
|
||||
break;
|
||||
return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_rsyncable, value);
|
||||
default:
|
||||
return ERROR(parameter_unsupported);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Sets parameters relevant to the compression job,
|
||||
* initializing others to default values. */
|
||||
static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
|
||||
{
|
||||
ZSTD_CCtx_params jobParams;
|
||||
memset(&jobParams, 0, sizeof(jobParams));
|
||||
|
||||
jobParams.cParams = params.cParams;
|
||||
jobParams.fParams = params.fParams;
|
||||
jobParams.compressionLevel = params.compressionLevel;
|
||||
|
||||
ZSTD_CCtx_params jobParams = params;
|
||||
/* Clear parameters related to multithreading */
|
||||
jobParams.forceWindow = 0;
|
||||
jobParams.nbWorkers = 0;
|
||||
jobParams.jobSize = 0;
|
||||
jobParams.overlapLog = 0;
|
||||
jobParams.rsyncable = 0;
|
||||
memset(&jobParams.ldmParams, 0, sizeof(ldmParams_t));
|
||||
memset(&jobParams.customMem, 0, sizeof(ZSTD_customMem));
|
||||
return jobParams;
|
||||
}
|
||||
|
||||
@ -1056,7 +1048,7 @@ static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
|
||||
static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers)
|
||||
{
|
||||
if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation);
|
||||
CHECK_F( ZSTDMT_expandJobsTable(mtctx, nbWorkers) );
|
||||
FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) );
|
||||
mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers);
|
||||
if (mtctx->bufPool == NULL) return ERROR(memory_allocation);
|
||||
mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers);
|
||||
@ -1263,7 +1255,7 @@ static size_t ZSTDMT_compress_advanced_internal(
|
||||
if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize))
|
||||
return ERROR(memory_allocation);
|
||||
|
||||
CHECK_F( ZSTDMT_expandJobsTable(mtctx, nbJobs) ); /* only expands if necessary */
|
||||
FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbJobs) ); /* only expands if necessary */
|
||||
|
||||
{ unsigned u;
|
||||
for (u=0; u<nbJobs; u++) {
|
||||
@ -1396,7 +1388,7 @@ size_t ZSTDMT_initCStream_internal(
|
||||
|
||||
/* init */
|
||||
if (params.nbWorkers != mtctx->params.nbWorkers)
|
||||
CHECK_F( ZSTDMT_resize(mtctx, params.nbWorkers) );
|
||||
FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) );
|
||||
|
||||
if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
|
||||
if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
|
||||
@ -1547,7 +1539,7 @@ size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel) {
|
||||
/* ZSTDMT_writeLastEmptyBlock()
|
||||
* Write a single empty block with an end-of-frame to finish a frame.
|
||||
* Job must be created from streaming variant.
|
||||
* This function is always successfull if expected conditions are fulfilled.
|
||||
* This function is always successful if expected conditions are fulfilled.
|
||||
*/
|
||||
static void ZSTDMT_writeLastEmptyBlock(ZSTDMT_jobDescription* job)
|
||||
{
|
||||
@ -1987,7 +1979,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
||||
assert(input->pos <= input->size);
|
||||
|
||||
if (mtctx->singleBlockingThread) { /* delegate to single-thread (synchronous) */
|
||||
return ZSTD_compressStream_generic(mtctx->cctxPool->cctx[0], output, input, endOp);
|
||||
return ZSTD_compressStream2(mtctx->cctxPool->cctx[0], output, input, endOp);
|
||||
}
|
||||
|
||||
if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) {
|
||||
@ -2051,7 +2043,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
||||
|| ((endOp == ZSTD_e_end) && (!mtctx->frameEnded)) ) { /* must finish the frame with a zero-size block */
|
||||
size_t const jobSize = mtctx->inBuff.filled;
|
||||
assert(mtctx->inBuff.filled <= mtctx->targetSectionSize);
|
||||
CHECK_F( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) );
|
||||
FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) );
|
||||
}
|
||||
|
||||
/* check for potential compressed data ready to be flushed */
|
||||
@ -2065,7 +2057,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
||||
|
||||
size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
|
||||
{
|
||||
CHECK_F( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) );
|
||||
FORWARD_IF_ERROR( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) );
|
||||
|
||||
/* recommended next input size : fill current input buffer */
|
||||
return mtctx->targetSectionSize - mtctx->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */
|
||||
@ -2082,7 +2074,7 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* ou
|
||||
|| ((endFrame==ZSTD_e_end) && !mtctx->frameEnded)) { /* need a last 0-size block to end frame */
|
||||
DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job (%u bytes, end:%u)",
|
||||
(U32)srcSize, (U32)endFrame);
|
||||
CHECK_F( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) );
|
||||
FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) );
|
||||
}
|
||||
|
||||
/* check if there is any data available to flush */
|
||||
|
@ -17,10 +17,25 @@
|
||||
|
||||
|
||||
/* Note : This is an internal API.
|
||||
* Some methods are still exposed (ZSTDLIB_API),
|
||||
* These APIs used to be exposed with ZSTDLIB_API,
|
||||
* because it used to be the only way to invoke MT compression.
|
||||
* Now, it's recommended to use ZSTD_compress_generic() instead.
|
||||
* These methods will stop being exposed in a future version */
|
||||
* Now, it's recommended to use ZSTD_compress2 and ZSTD_compressStream2()
|
||||
* instead.
|
||||
*
|
||||
* If you depend on these APIs and can't switch, then define
|
||||
* ZSTD_LEGACY_MULTITHREADED_API when making the dynamic library.
|
||||
* However, we may completely remove these functions in a future
|
||||
* release, so please switch soon.
|
||||
*
|
||||
* This API requires ZSTD_MULTITHREAD to be defined during compilation,
|
||||
* otherwise ZSTDMT_createCCtx*() will fail.
|
||||
*/
|
||||
|
||||
#ifdef ZSTD_LEGACY_MULTITHREADED_API
|
||||
# define ZSTDMT_API ZSTDLIB_API
|
||||
#else
|
||||
# define ZSTDMT_API
|
||||
#endif
|
||||
|
||||
/* === Dependencies === */
|
||||
#include <stddef.h> /* size_t */
|
||||
@ -40,17 +55,19 @@
|
||||
|
||||
/* === Memory management === */
|
||||
typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
|
||||
ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers);
|
||||
ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers,
|
||||
/* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */
|
||||
ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers);
|
||||
/* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */
|
||||
ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers,
|
||||
ZSTD_customMem cMem);
|
||||
ZSTDLIB_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
|
||||
ZSTDMT_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
|
||||
|
||||
ZSTDLIB_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
|
||||
ZSTDMT_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
|
||||
|
||||
|
||||
/* === Simple one-pass compression function === */
|
||||
|
||||
ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
|
||||
ZSTDMT_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
int compressionLevel);
|
||||
@ -59,31 +76,31 @@ ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
|
||||
|
||||
/* === Streaming functions === */
|
||||
|
||||
ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
|
||||
ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */
|
||||
ZSTDMT_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
|
||||
ZSTDMT_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */
|
||||
|
||||
ZSTDLIB_API size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx);
|
||||
ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
|
||||
ZSTDMT_API size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx);
|
||||
ZSTDMT_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
|
||||
|
||||
ZSTDLIB_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
|
||||
ZSTDLIB_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
|
||||
ZSTDMT_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
|
||||
ZSTDMT_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
|
||||
|
||||
|
||||
/* === Advanced functions and parameters === */
|
||||
|
||||
ZSTDLIB_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
const ZSTD_CDict* cdict,
|
||||
ZSTD_parameters params,
|
||||
int overlapLog);
|
||||
ZSTDMT_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
const ZSTD_CDict* cdict,
|
||||
ZSTD_parameters params,
|
||||
int overlapLog);
|
||||
|
||||
ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
|
||||
ZSTDMT_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
|
||||
const void* dict, size_t dictSize, /* dict can be released after init, a local copy is preserved within zcs */
|
||||
ZSTD_parameters params,
|
||||
unsigned long long pledgedSrcSize); /* pledgedSrcSize is optional and can be zero == unknown */
|
||||
|
||||
ZSTDLIB_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
|
||||
ZSTDMT_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
|
||||
const ZSTD_CDict* cdict,
|
||||
ZSTD_frameParameters fparams,
|
||||
unsigned long long pledgedSrcSize); /* note : zero means empty */
|
||||
@ -92,7 +109,7 @@ ZSTDLIB_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
|
||||
* List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
|
||||
typedef enum {
|
||||
ZSTDMT_p_jobSize, /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */
|
||||
ZSTDMT_p_overlapLog, /* Each job may reload a part of previous job to enhance compressionr ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */
|
||||
ZSTDMT_p_overlapLog, /* Each job may reload a part of previous job to enhance compression ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */
|
||||
ZSTDMT_p_rsyncable /* Enables rsyncable mode. */
|
||||
} ZSTDMT_parameter;
|
||||
|
||||
@ -101,12 +118,12 @@ typedef enum {
|
||||
* The function must be called typically after ZSTD_createCCtx() but __before ZSTDMT_init*() !__
|
||||
* Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions.
|
||||
* @return : 0, or an error code (which can be tested using ZSTD_isError()) */
|
||||
ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value);
|
||||
ZSTDMT_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value);
|
||||
|
||||
/* ZSTDMT_getMTCtxParameter() :
|
||||
* Query the ZSTDMT_CCtx for a parameter value.
|
||||
* @return : 0, or an error code (which can be tested using ZSTD_isError()) */
|
||||
ZSTDLIB_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value);
|
||||
ZSTDMT_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value);
|
||||
|
||||
|
||||
/*! ZSTDMT_compressStream_generic() :
|
||||
@ -116,7 +133,7 @@ ZSTDLIB_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter
|
||||
* 0 if fully flushed
|
||||
* or an error code
|
||||
* note : needs to be init using any ZSTD_initCStream*() variant */
|
||||
ZSTDLIB_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
||||
ZSTDMT_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
||||
ZSTD_outBuffer* output,
|
||||
ZSTD_inBuffer* input,
|
||||
ZSTD_EndDirective endOp);
|
||||
|
@ -105,9 +105,9 @@ ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
|
||||
ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
|
||||
|
||||
/* load entropy tables */
|
||||
CHECK_E( ZSTD_loadDEntropy(&ddict->entropy,
|
||||
ddict->dictContent, ddict->dictSize),
|
||||
dictionary_corrupted );
|
||||
RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
|
||||
&ddict->entropy, ddict->dictContent, ddict->dictSize)),
|
||||
dictionary_corrupted);
|
||||
ddict->entropyPresent = 1;
|
||||
return 0;
|
||||
}
|
||||
@ -133,7 +133,7 @@ static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
|
||||
ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
|
||||
|
||||
/* parse dictionary content */
|
||||
CHECK_F( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) );
|
||||
FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -106,6 +106,7 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
|
||||
dctx->ddictLocal = NULL;
|
||||
dctx->dictEnd = NULL;
|
||||
dctx->ddictIsCold = 0;
|
||||
dctx->dictUses = ZSTD_dont_use;
|
||||
dctx->inBuff = NULL;
|
||||
dctx->inBuffSize = 0;
|
||||
dctx->outBuffSize = 0;
|
||||
@ -147,13 +148,20 @@ ZSTD_DCtx* ZSTD_createDCtx(void)
|
||||
return ZSTD_createDCtx_advanced(ZSTD_defaultCMem);
|
||||
}
|
||||
|
||||
static void ZSTD_clearDict(ZSTD_DCtx* dctx)
|
||||
{
|
||||
ZSTD_freeDDict(dctx->ddictLocal);
|
||||
dctx->ddictLocal = NULL;
|
||||
dctx->ddict = NULL;
|
||||
dctx->dictUses = ZSTD_dont_use;
|
||||
}
|
||||
|
||||
size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
|
||||
{
|
||||
if (dctx==NULL) return 0; /* support free on NULL */
|
||||
if (dctx->staticSize) return ERROR(memory_allocation); /* not compatible with static DCtx */
|
||||
RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx");
|
||||
{ ZSTD_customMem const cMem = dctx->customMem;
|
||||
ZSTD_freeDDict(dctx->ddictLocal);
|
||||
dctx->ddictLocal = NULL;
|
||||
ZSTD_clearDict(dctx);
|
||||
ZSTD_free(dctx->inBuff, cMem);
|
||||
dctx->inBuff = NULL;
|
||||
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
|
||||
@ -203,7 +211,7 @@ unsigned ZSTD_isFrame(const void* buffer, size_t size)
|
||||
static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format)
|
||||
{
|
||||
size_t const minInputSize = ZSTD_startingInputLength(format);
|
||||
if (srcSize < minInputSize) return ERROR(srcSize_wrong);
|
||||
RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong);
|
||||
|
||||
{ BYTE const fhd = ((const BYTE*)src)[minInputSize-1];
|
||||
U32 const dictID= fhd & 3;
|
||||
@ -238,7 +246,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s
|
||||
|
||||
memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */
|
||||
if (srcSize < minInputSize) return minInputSize;
|
||||
if (src==NULL) return ERROR(GENERIC); /* invalid parameter */
|
||||
RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter");
|
||||
|
||||
if ( (format != ZSTD_f_zstd1_magicless)
|
||||
&& (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) {
|
||||
@ -251,7 +259,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s
|
||||
zfhPtr->frameType = ZSTD_skippableFrame;
|
||||
return 0;
|
||||
}
|
||||
return ERROR(prefix_unknown);
|
||||
RETURN_ERROR(prefix_unknown);
|
||||
}
|
||||
|
||||
/* ensure there is enough `srcSize` to fully read/decode frame header */
|
||||
@ -269,14 +277,13 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s
|
||||
U64 windowSize = 0;
|
||||
U32 dictID = 0;
|
||||
U64 frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN;
|
||||
if ((fhdByte & 0x08) != 0)
|
||||
return ERROR(frameParameter_unsupported); /* reserved bits, must be zero */
|
||||
RETURN_ERROR_IF((fhdByte & 0x08) != 0, frameParameter_unsupported,
|
||||
"reserved bits, must be zero");
|
||||
|
||||
if (!singleSegment) {
|
||||
BYTE const wlByte = ip[pos++];
|
||||
U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
|
||||
if (windowLog > ZSTD_WINDOWLOG_MAX)
|
||||
return ERROR(frameParameter_windowTooLarge);
|
||||
RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge);
|
||||
windowSize = (1ULL << windowLog);
|
||||
windowSize += (windowSize >> 3) * (wlByte&7);
|
||||
}
|
||||
@ -348,12 +355,11 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize)
|
||||
size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE;
|
||||
U32 sizeU32;
|
||||
|
||||
if (srcSize < ZSTD_SKIPPABLEHEADERSIZE)
|
||||
return ERROR(srcSize_wrong);
|
||||
RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong);
|
||||
|
||||
sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
|
||||
if ((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32)
|
||||
return ERROR(frameParameter_unsupported);
|
||||
RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
|
||||
frameParameter_unsupported);
|
||||
|
||||
return skippableHeaderSize + sizeU32;
|
||||
}
|
||||
@ -428,13 +434,89 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t he
|
||||
{
|
||||
size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format);
|
||||
if (ZSTD_isError(result)) return result; /* invalid header */
|
||||
if (result>0) return ERROR(srcSize_wrong); /* headerSize too small */
|
||||
if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID))
|
||||
return ERROR(dictionary_wrong);
|
||||
RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small");
|
||||
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
/* Skip the dictID check in fuzzing mode, because it makes the search
|
||||
* harder.
|
||||
*/
|
||||
RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID),
|
||||
dictionary_wrong);
|
||||
#endif
|
||||
if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret)
|
||||
{
|
||||
ZSTD_frameSizeInfo frameSizeInfo;
|
||||
frameSizeInfo.compressedSize = ret;
|
||||
frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
|
||||
return frameSizeInfo;
|
||||
}
|
||||
|
||||
static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize)
|
||||
{
|
||||
ZSTD_frameSizeInfo frameSizeInfo;
|
||||
memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo));
|
||||
|
||||
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
|
||||
if (ZSTD_isLegacy(src, srcSize))
|
||||
return ZSTD_findFrameSizeInfoLegacy(src, srcSize);
|
||||
#endif
|
||||
|
||||
if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
|
||||
&& (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
|
||||
frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize);
|
||||
return frameSizeInfo;
|
||||
} else {
|
||||
const BYTE* ip = (const BYTE*)src;
|
||||
const BYTE* const ipstart = ip;
|
||||
size_t remainingSize = srcSize;
|
||||
size_t nbBlocks = 0;
|
||||
ZSTD_frameHeader zfh;
|
||||
|
||||
/* Extract Frame Header */
|
||||
{ size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize);
|
||||
if (ZSTD_isError(ret))
|
||||
return ZSTD_errorFrameSizeInfo(ret);
|
||||
if (ret > 0)
|
||||
return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
|
||||
}
|
||||
|
||||
ip += zfh.headerSize;
|
||||
remainingSize -= zfh.headerSize;
|
||||
|
||||
/* Iterate over each block */
|
||||
while (1) {
|
||||
blockProperties_t blockProperties;
|
||||
size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
|
||||
if (ZSTD_isError(cBlockSize))
|
||||
return ZSTD_errorFrameSizeInfo(cBlockSize);
|
||||
|
||||
if (ZSTD_blockHeaderSize + cBlockSize > remainingSize)
|
||||
return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
|
||||
|
||||
ip += ZSTD_blockHeaderSize + cBlockSize;
|
||||
remainingSize -= ZSTD_blockHeaderSize + cBlockSize;
|
||||
nbBlocks++;
|
||||
|
||||
if (blockProperties.lastBlock) break;
|
||||
}
|
||||
|
||||
/* Final frame content checksum */
|
||||
if (zfh.checksumFlag) {
|
||||
if (remainingSize < 4)
|
||||
return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
|
||||
ip += 4;
|
||||
}
|
||||
|
||||
frameSizeInfo.compressedSize = ip - ipstart;
|
||||
frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN)
|
||||
? zfh.frameContentSize
|
||||
: nbBlocks * zfh.blockSizeMax;
|
||||
return frameSizeInfo;
|
||||
}
|
||||
}
|
||||
|
||||
/** ZSTD_findFrameCompressedSize() :
|
||||
* compatible with legacy mode
|
||||
@ -443,53 +525,34 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t he
|
||||
* @return : the compressed size of the frame starting at `src` */
|
||||
size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
|
||||
{
|
||||
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
|
||||
if (ZSTD_isLegacy(src, srcSize))
|
||||
return ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
|
||||
#endif
|
||||
if ( (srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
|
||||
&& (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START ) {
|
||||
return readSkippableFrameSize(src, srcSize);
|
||||
} else {
|
||||
const BYTE* ip = (const BYTE*)src;
|
||||
const BYTE* const ipstart = ip;
|
||||
size_t remainingSize = srcSize;
|
||||
ZSTD_frameHeader zfh;
|
||||
|
||||
/* Extract Frame Header */
|
||||
{ size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize);
|
||||
if (ZSTD_isError(ret)) return ret;
|
||||
if (ret > 0) return ERROR(srcSize_wrong);
|
||||
}
|
||||
|
||||
ip += zfh.headerSize;
|
||||
remainingSize -= zfh.headerSize;
|
||||
|
||||
/* Loop on each block */
|
||||
while (1) {
|
||||
blockProperties_t blockProperties;
|
||||
size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
|
||||
if (ZSTD_isError(cBlockSize)) return cBlockSize;
|
||||
|
||||
if (ZSTD_blockHeaderSize + cBlockSize > remainingSize)
|
||||
return ERROR(srcSize_wrong);
|
||||
|
||||
ip += ZSTD_blockHeaderSize + cBlockSize;
|
||||
remainingSize -= ZSTD_blockHeaderSize + cBlockSize;
|
||||
|
||||
if (blockProperties.lastBlock) break;
|
||||
}
|
||||
|
||||
if (zfh.checksumFlag) { /* Final frame content checksum */
|
||||
if (remainingSize < 4) return ERROR(srcSize_wrong);
|
||||
ip += 4;
|
||||
}
|
||||
|
||||
return ip - ipstart;
|
||||
}
|
||||
ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
|
||||
return frameSizeInfo.compressedSize;
|
||||
}
|
||||
|
||||
|
||||
/** ZSTD_decompressBound() :
|
||||
* compatible with legacy mode
|
||||
* `src` must point to the start of a ZSTD frame or a skippeable frame
|
||||
* `srcSize` must be at least as large as the frame contained
|
||||
* @return : the maximum decompressed size of the compressed source
|
||||
*/
|
||||
unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
|
||||
{
|
||||
unsigned long long bound = 0;
|
||||
/* Iterate over each frame */
|
||||
while (srcSize > 0) {
|
||||
ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
|
||||
size_t const compressedSize = frameSizeInfo.compressedSize;
|
||||
unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
|
||||
if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
|
||||
return ZSTD_CONTENTSIZE_ERROR;
|
||||
src = (const BYTE*)src + compressedSize;
|
||||
srcSize -= compressedSize;
|
||||
bound += decompressedBound;
|
||||
}
|
||||
return bound;
|
||||
}
|
||||
|
||||
|
||||
/*-*************************************************************
|
||||
* Frame decoding
|
||||
@ -522,9 +585,9 @@ static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity,
|
||||
DEBUGLOG(5, "ZSTD_copyRawBlock");
|
||||
if (dst == NULL) {
|
||||
if (srcSize == 0) return 0;
|
||||
return ERROR(dstBuffer_null);
|
||||
RETURN_ERROR(dstBuffer_null);
|
||||
}
|
||||
if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall);
|
||||
RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall);
|
||||
memcpy(dst, src, srcSize);
|
||||
return srcSize;
|
||||
}
|
||||
@ -535,9 +598,9 @@ static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity,
|
||||
{
|
||||
if (dst == NULL) {
|
||||
if (regenSize == 0) return 0;
|
||||
return ERROR(dstBuffer_null);
|
||||
RETURN_ERROR(dstBuffer_null);
|
||||
}
|
||||
if (regenSize > dstCapacity) return ERROR(dstSize_tooSmall);
|
||||
RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall);
|
||||
memset(dst, b, regenSize);
|
||||
return regenSize;
|
||||
}
|
||||
@ -560,15 +623,16 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
|
||||
DEBUGLOG(4, "ZSTD_decompressFrame (srcSize:%i)", (int)*srcSizePtr);
|
||||
|
||||
/* check */
|
||||
if (remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN+ZSTD_blockHeaderSize)
|
||||
return ERROR(srcSize_wrong);
|
||||
RETURN_ERROR_IF(
|
||||
remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN+ZSTD_blockHeaderSize,
|
||||
srcSize_wrong);
|
||||
|
||||
/* Frame Header */
|
||||
{ size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_FRAMEHEADERSIZE_PREFIX);
|
||||
if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
|
||||
if (remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize)
|
||||
return ERROR(srcSize_wrong);
|
||||
CHECK_F( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) );
|
||||
RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize,
|
||||
srcSize_wrong);
|
||||
FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) );
|
||||
ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize;
|
||||
}
|
||||
|
||||
@ -581,7 +645,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
|
||||
|
||||
ip += ZSTD_blockHeaderSize;
|
||||
remainingSrcSize -= ZSTD_blockHeaderSize;
|
||||
if (cBlockSize > remainingSrcSize) return ERROR(srcSize_wrong);
|
||||
RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong);
|
||||
|
||||
switch(blockProperties.blockType)
|
||||
{
|
||||
@ -596,7 +660,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
|
||||
break;
|
||||
case bt_reserved :
|
||||
default:
|
||||
return ERROR(corruption_detected);
|
||||
RETURN_ERROR(corruption_detected);
|
||||
}
|
||||
|
||||
if (ZSTD_isError(decodedSize)) return decodedSize;
|
||||
@ -609,15 +673,15 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
|
||||
}
|
||||
|
||||
if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) {
|
||||
if ((U64)(op-ostart) != dctx->fParams.frameContentSize) {
|
||||
return ERROR(corruption_detected);
|
||||
} }
|
||||
RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize,
|
||||
corruption_detected);
|
||||
}
|
||||
if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */
|
||||
U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState);
|
||||
U32 checkRead;
|
||||
if (remainingSrcSize<4) return ERROR(checksum_wrong);
|
||||
RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong);
|
||||
checkRead = MEM_readLE32(ip);
|
||||
if (checkRead != checkCalc) return ERROR(checksum_wrong);
|
||||
RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong);
|
||||
ip += 4;
|
||||
remainingSrcSize -= 4;
|
||||
}
|
||||
@ -652,8 +716,8 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
|
||||
size_t decodedSize;
|
||||
size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
|
||||
if (ZSTD_isError(frameSize)) return frameSize;
|
||||
/* legacy support is not compatible with static dctx */
|
||||
if (dctx->staticSize) return ERROR(memory_allocation);
|
||||
RETURN_ERROR_IF(dctx->staticSize, memory_allocation,
|
||||
"legacy support is not compatible with static dctx");
|
||||
|
||||
decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize);
|
||||
if (ZSTD_isError(decodedSize)) return decodedSize;
|
||||
@ -676,7 +740,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
|
||||
size_t const skippableSize = readSkippableFrameSize(src, srcSize);
|
||||
if (ZSTD_isError(skippableSize))
|
||||
return skippableSize;
|
||||
if (srcSize < skippableSize) return ERROR(srcSize_wrong);
|
||||
RETURN_ERROR_IF(srcSize < skippableSize, srcSize_wrong);
|
||||
|
||||
src = (const BYTE *)src + skippableSize;
|
||||
srcSize -= skippableSize;
|
||||
@ -685,29 +749,29 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
|
||||
|
||||
if (ddict) {
|
||||
/* we were called from ZSTD_decompress_usingDDict */
|
||||
CHECK_F(ZSTD_decompressBegin_usingDDict(dctx, ddict));
|
||||
FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict));
|
||||
} else {
|
||||
/* this will initialize correctly with no dict if dict == NULL, so
|
||||
* use this in all cases but ddict */
|
||||
CHECK_F(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize));
|
||||
FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize));
|
||||
}
|
||||
ZSTD_checkContinuity(dctx, dst);
|
||||
|
||||
{ const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity,
|
||||
&src, &srcSize);
|
||||
if ( (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown)
|
||||
&& (moreThan1Frame==1) ) {
|
||||
/* at least one frame successfully completed,
|
||||
* but following bytes are garbage :
|
||||
* it's more likely to be a srcSize error,
|
||||
* specifying more bytes than compressed size of frame(s).
|
||||
* This error message replaces ERROR(prefix_unknown),
|
||||
* which would be confusing, as the first header is actually correct.
|
||||
* Note that one could be unlucky, it might be a corruption error instead,
|
||||
* happening right at the place where we expect zstd magic bytes.
|
||||
* But this is _much_ less likely than a srcSize field error. */
|
||||
return ERROR(srcSize_wrong);
|
||||
}
|
||||
RETURN_ERROR_IF(
|
||||
(ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown)
|
||||
&& (moreThan1Frame==1),
|
||||
srcSize_wrong,
|
||||
"at least one frame successfully completed, but following "
|
||||
"bytes are garbage: it's more likely to be a srcSize error, "
|
||||
"specifying more bytes than compressed size of frame(s). This "
|
||||
"error message replaces ERROR(prefix_unknown), which would be "
|
||||
"confusing, as the first header is actually correct. Note that "
|
||||
"one could be unlucky, it might be a corruption error instead, "
|
||||
"happening right at the place where we expect zstd magic "
|
||||
"bytes. But this is _much_ less likely than a srcSize field "
|
||||
"error.");
|
||||
if (ZSTD_isError(res)) return res;
|
||||
assert(res <= dstCapacity);
|
||||
dst = (BYTE*)dst + res;
|
||||
@ -716,7 +780,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
|
||||
moreThan1Frame = 1;
|
||||
} /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */
|
||||
|
||||
if (srcSize) return ERROR(srcSize_wrong); /* input not entirely consumed */
|
||||
RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed");
|
||||
|
||||
return (BYTE*)dst - (BYTE*)dststart;
|
||||
}
|
||||
@ -730,9 +794,26 @@ size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
|
||||
}
|
||||
|
||||
|
||||
static ZSTD_DDict const* ZSTD_getDDict(ZSTD_DCtx* dctx)
|
||||
{
|
||||
switch (dctx->dictUses) {
|
||||
default:
|
||||
assert(0 /* Impossible */);
|
||||
/* fall-through */
|
||||
case ZSTD_dont_use:
|
||||
ZSTD_clearDict(dctx);
|
||||
return NULL;
|
||||
case ZSTD_use_indefinitely:
|
||||
return dctx->ddict;
|
||||
case ZSTD_use_once:
|
||||
dctx->dictUses = ZSTD_dont_use;
|
||||
return dctx->ddict;
|
||||
}
|
||||
}
|
||||
|
||||
size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
|
||||
{
|
||||
return ZSTD_decompress_usingDict(dctx, dst, dstCapacity, src, srcSize, NULL, 0);
|
||||
return ZSTD_decompress_usingDDict(dctx, dst, dstCapacity, src, srcSize, ZSTD_getDDict(dctx));
|
||||
}
|
||||
|
||||
|
||||
@ -741,7 +822,7 @@ size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t sr
|
||||
#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1)
|
||||
size_t regenSize;
|
||||
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
|
||||
if (dctx==NULL) return ERROR(memory_allocation);
|
||||
RETURN_ERROR_IF(dctx==NULL, memory_allocation);
|
||||
regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize);
|
||||
ZSTD_freeDCtx(dctx);
|
||||
return regenSize;
|
||||
@ -791,8 +872,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
|
||||
{
|
||||
DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize);
|
||||
/* Sanity check */
|
||||
if (srcSize != dctx->expected)
|
||||
return ERROR(srcSize_wrong); /* not allowed */
|
||||
RETURN_ERROR_IF(srcSize != dctx->expected, srcSize_wrong, "not allowed");
|
||||
if (dstCapacity) ZSTD_checkContinuity(dctx, dst);
|
||||
|
||||
switch (dctx->stage)
|
||||
@ -817,7 +897,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
|
||||
case ZSTDds_decodeFrameHeader:
|
||||
assert(src != NULL);
|
||||
memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize);
|
||||
CHECK_F(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize));
|
||||
FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize));
|
||||
dctx->expected = ZSTD_blockHeaderSize;
|
||||
dctx->stage = ZSTDds_decodeBlockHeader;
|
||||
return 0;
|
||||
@ -867,7 +947,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
|
||||
break;
|
||||
case bt_reserved : /* should never happen */
|
||||
default:
|
||||
return ERROR(corruption_detected);
|
||||
RETURN_ERROR(corruption_detected);
|
||||
}
|
||||
if (ZSTD_isError(rSize)) return rSize;
|
||||
DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize);
|
||||
@ -876,10 +956,10 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
|
||||
|
||||
if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */
|
||||
DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (unsigned)dctx->decodedSize);
|
||||
if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) {
|
||||
if (dctx->decodedSize != dctx->fParams.frameContentSize) {
|
||||
return ERROR(corruption_detected);
|
||||
} }
|
||||
RETURN_ERROR_IF(
|
||||
dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
|
||||
&& dctx->decodedSize != dctx->fParams.frameContentSize,
|
||||
corruption_detected);
|
||||
if (dctx->fParams.checksumFlag) { /* another round for frame checksum */
|
||||
dctx->expected = 4;
|
||||
dctx->stage = ZSTDds_checkChecksum;
|
||||
@ -900,7 +980,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
|
||||
{ U32 const h32 = (U32)XXH64_digest(&dctx->xxhState);
|
||||
U32 const check32 = MEM_readLE32(src);
|
||||
DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32);
|
||||
if (check32 != h32) return ERROR(checksum_wrong);
|
||||
RETURN_ERROR_IF(check32 != h32, checksum_wrong);
|
||||
dctx->expected = 0;
|
||||
dctx->stage = ZSTDds_getFrameHeaderSize;
|
||||
return 0;
|
||||
@ -921,7 +1001,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
|
||||
|
||||
default:
|
||||
assert(0); /* impossible */
|
||||
return ERROR(GENERIC); /* some compiler require default to do something */
|
||||
RETURN_ERROR(GENERIC); /* some compiler require default to do something */
|
||||
}
|
||||
}
|
||||
|
||||
@ -945,7 +1025,7 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
||||
const BYTE* dictPtr = (const BYTE*)dict;
|
||||
const BYTE* const dictEnd = dictPtr + dictSize;
|
||||
|
||||
if (dictSize <= 8) return ERROR(dictionary_corrupted);
|
||||
RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted);
|
||||
assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY); /* dict must be valid */
|
||||
dictPtr += 8; /* skip header = magic + dictID */
|
||||
|
||||
@ -964,16 +1044,16 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
||||
dictPtr, dictEnd - dictPtr,
|
||||
workspace, workspaceSize);
|
||||
#endif
|
||||
if (HUF_isError(hSize)) return ERROR(dictionary_corrupted);
|
||||
RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted);
|
||||
dictPtr += hSize;
|
||||
}
|
||||
|
||||
{ short offcodeNCount[MaxOff+1];
|
||||
unsigned offcodeMaxValue = MaxOff, offcodeLog;
|
||||
size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
|
||||
if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
|
||||
if (offcodeMaxValue > MaxOff) return ERROR(dictionary_corrupted);
|
||||
if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
|
||||
RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted);
|
||||
RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted);
|
||||
RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted);
|
||||
ZSTD_buildFSETable( entropy->OFTable,
|
||||
offcodeNCount, offcodeMaxValue,
|
||||
OF_base, OF_bits,
|
||||
@ -984,9 +1064,9 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
||||
{ short matchlengthNCount[MaxML+1];
|
||||
unsigned matchlengthMaxValue = MaxML, matchlengthLog;
|
||||
size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
|
||||
if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
|
||||
if (matchlengthMaxValue > MaxML) return ERROR(dictionary_corrupted);
|
||||
if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
|
||||
RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted);
|
||||
RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted);
|
||||
RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted);
|
||||
ZSTD_buildFSETable( entropy->MLTable,
|
||||
matchlengthNCount, matchlengthMaxValue,
|
||||
ML_base, ML_bits,
|
||||
@ -997,9 +1077,9 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
||||
{ short litlengthNCount[MaxLL+1];
|
||||
unsigned litlengthMaxValue = MaxLL, litlengthLog;
|
||||
size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
|
||||
if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
|
||||
if (litlengthMaxValue > MaxLL) return ERROR(dictionary_corrupted);
|
||||
if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
|
||||
RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted);
|
||||
RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted);
|
||||
RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted);
|
||||
ZSTD_buildFSETable( entropy->LLTable,
|
||||
litlengthNCount, litlengthMaxValue,
|
||||
LL_base, LL_bits,
|
||||
@ -1007,12 +1087,13 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
||||
dictPtr += litlengthHeaderSize;
|
||||
}
|
||||
|
||||
if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
|
||||
RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted);
|
||||
{ int i;
|
||||
size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12));
|
||||
for (i=0; i<3; i++) {
|
||||
U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4;
|
||||
if (rep==0 || rep >= dictContentSize) return ERROR(dictionary_corrupted);
|
||||
RETURN_ERROR_IF(rep==0 || rep >= dictContentSize,
|
||||
dictionary_corrupted);
|
||||
entropy->rep[i] = rep;
|
||||
} }
|
||||
|
||||
@ -1030,7 +1111,7 @@ static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict
|
||||
|
||||
/* load entropy tables */
|
||||
{ size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize);
|
||||
if (ZSTD_isError(eSize)) return ERROR(dictionary_corrupted);
|
||||
RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted);
|
||||
dict = (const char*)dict + eSize;
|
||||
dictSize -= eSize;
|
||||
}
|
||||
@ -1064,9 +1145,11 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
|
||||
|
||||
size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
|
||||
{
|
||||
CHECK_F( ZSTD_decompressBegin(dctx) );
|
||||
FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) );
|
||||
if (dict && dictSize)
|
||||
CHECK_E(ZSTD_decompress_insertDictionary(dctx, dict, dictSize), dictionary_corrupted);
|
||||
RETURN_ERROR_IF(
|
||||
ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)),
|
||||
dictionary_corrupted);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1085,7 +1168,7 @@ size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
|
||||
DEBUGLOG(4, "DDict is %s",
|
||||
dctx->ddictIsCold ? "~cold~" : "hot!");
|
||||
}
|
||||
CHECK_F( ZSTD_decompressBegin(dctx) );
|
||||
FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) );
|
||||
if (ddict) { /* NULL ddict is equivalent to no dictionary */
|
||||
ZSTD_copyDDictParameters(dctx, ddict);
|
||||
}
|
||||
@ -1104,7 +1187,7 @@ unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize)
|
||||
}
|
||||
|
||||
/*! ZSTD_getDictID_fromFrame() :
|
||||
* Provides the dictID required to decompresse frame stored within `src`.
|
||||
* Provides the dictID required to decompress frame stored within `src`.
|
||||
* If @return == 0, the dictID could not be decoded.
|
||||
* This could for one of the following reasons :
|
||||
* - The frame does not require a dictionary (most common case).
|
||||
@ -1176,15 +1259,14 @@ size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx,
|
||||
ZSTD_dictLoadMethod_e dictLoadMethod,
|
||||
ZSTD_dictContentType_e dictContentType)
|
||||
{
|
||||
if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
|
||||
ZSTD_freeDDict(dctx->ddictLocal);
|
||||
RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
|
||||
ZSTD_clearDict(dctx);
|
||||
if (dict && dictSize >= 8) {
|
||||
dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem);
|
||||
if (dctx->ddictLocal == NULL) return ERROR(memory_allocation);
|
||||
} else {
|
||||
dctx->ddictLocal = NULL;
|
||||
RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation);
|
||||
dctx->ddict = dctx->ddictLocal;
|
||||
dctx->dictUses = ZSTD_use_indefinitely;
|
||||
}
|
||||
dctx->ddict = dctx->ddictLocal;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1200,7 +1282,9 @@ size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSi
|
||||
|
||||
size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
|
||||
{
|
||||
return ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType);
|
||||
FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType));
|
||||
dctx->dictUses = ZSTD_use_once;
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize)
|
||||
@ -1215,9 +1299,8 @@ size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSiz
|
||||
size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
|
||||
{
|
||||
DEBUGLOG(4, "ZSTD_initDStream_usingDict");
|
||||
zds->streamStage = zdss_init;
|
||||
zds->noForwardProgress = 0;
|
||||
CHECK_F( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) );
|
||||
FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) );
|
||||
FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) );
|
||||
return ZSTD_FRAMEHEADERSIZE_PREFIX;
|
||||
}
|
||||
|
||||
@ -1225,7 +1308,7 @@ size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t di
|
||||
size_t ZSTD_initDStream(ZSTD_DStream* zds)
|
||||
{
|
||||
DEBUGLOG(4, "ZSTD_initDStream");
|
||||
return ZSTD_initDStream_usingDict(zds, NULL, 0);
|
||||
return ZSTD_initDStream_usingDDict(zds, NULL);
|
||||
}
|
||||
|
||||
/* ZSTD_initDStream_usingDDict() :
|
||||
@ -1233,9 +1316,9 @@ size_t ZSTD_initDStream(ZSTD_DStream* zds)
|
||||
* this function cannot fail */
|
||||
size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
|
||||
{
|
||||
size_t const initResult = ZSTD_initDStream(dctx);
|
||||
dctx->ddict = ddict;
|
||||
return initResult;
|
||||
FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) );
|
||||
FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) );
|
||||
return ZSTD_FRAMEHEADERSIZE_PREFIX;
|
||||
}
|
||||
|
||||
/* ZSTD_resetDStream() :
|
||||
@ -1243,19 +1326,19 @@ size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
|
||||
* this function cannot fail */
|
||||
size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
|
||||
{
|
||||
DEBUGLOG(4, "ZSTD_resetDStream");
|
||||
dctx->streamStage = zdss_loadHeader;
|
||||
dctx->lhSize = dctx->inPos = dctx->outStart = dctx->outEnd = 0;
|
||||
dctx->legacyVersion = 0;
|
||||
dctx->hostageByte = 0;
|
||||
FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only));
|
||||
return ZSTD_FRAMEHEADERSIZE_PREFIX;
|
||||
}
|
||||
|
||||
|
||||
size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
|
||||
{
|
||||
if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
|
||||
dctx->ddict = ddict;
|
||||
RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
|
||||
ZSTD_clearDict(dctx);
|
||||
if (ddict) {
|
||||
dctx->ddict = ddict;
|
||||
dctx->dictUses = ZSTD_use_indefinitely;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1267,9 +1350,9 @@ size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize)
|
||||
ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax);
|
||||
size_t const min = (size_t)1 << bounds.lowerBound;
|
||||
size_t const max = (size_t)1 << bounds.upperBound;
|
||||
if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
|
||||
if (maxWindowSize < min) return ERROR(parameter_outOfBound);
|
||||
if (maxWindowSize > max) return ERROR(parameter_outOfBound);
|
||||
RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
|
||||
RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound);
|
||||
RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound);
|
||||
dctx->maxWindowSize = maxWindowSize;
|
||||
return 0;
|
||||
}
|
||||
@ -1311,15 +1394,15 @@ static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value)
|
||||
}
|
||||
|
||||
#define CHECK_DBOUNDS(p,v) { \
|
||||
if (!ZSTD_dParam_withinBounds(p, v)) \
|
||||
return ERROR(parameter_outOfBound); \
|
||||
RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound); \
|
||||
}
|
||||
|
||||
size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value)
|
||||
{
|
||||
if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
|
||||
RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
|
||||
switch(dParam) {
|
||||
case ZSTD_d_windowLogMax:
|
||||
if (value == 0) value = ZSTD_WINDOWLOG_LIMIT_DEFAULT;
|
||||
CHECK_DBOUNDS(ZSTD_d_windowLogMax, value);
|
||||
dctx->maxWindowSize = ((size_t)1) << value;
|
||||
return 0;
|
||||
@ -1329,19 +1412,20 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value
|
||||
return 0;
|
||||
default:;
|
||||
}
|
||||
return ERROR(parameter_unsupported);
|
||||
RETURN_ERROR(parameter_unsupported);
|
||||
}
|
||||
|
||||
size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset)
|
||||
{
|
||||
if ( (reset == ZSTD_reset_session_only)
|
||||
|| (reset == ZSTD_reset_session_and_parameters) ) {
|
||||
(void)ZSTD_initDStream(dctx);
|
||||
dctx->streamStage = zdss_init;
|
||||
dctx->noForwardProgress = 0;
|
||||
}
|
||||
if ( (reset == ZSTD_reset_parameters)
|
||||
|| (reset == ZSTD_reset_session_and_parameters) ) {
|
||||
if (dctx->streamStage != zdss_init)
|
||||
return ERROR(stage_wrong);
|
||||
RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
|
||||
ZSTD_clearDict(dctx);
|
||||
dctx->format = ZSTD_f_zstd1;
|
||||
dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
|
||||
}
|
||||
@ -1360,7 +1444,8 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
|
||||
unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2);
|
||||
unsigned long long const neededSize = MIN(frameContentSize, neededRBSize);
|
||||
size_t const minRBSize = (size_t) neededSize;
|
||||
if ((unsigned long long)minRBSize != neededSize) return ERROR(frameParameter_windowTooLarge);
|
||||
RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize,
|
||||
frameParameter_windowTooLarge);
|
||||
return minRBSize;
|
||||
}
|
||||
|
||||
@ -1378,9 +1463,9 @@ size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize)
|
||||
ZSTD_frameHeader zfh;
|
||||
size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize);
|
||||
if (ZSTD_isError(err)) return err;
|
||||
if (err>0) return ERROR(srcSize_wrong);
|
||||
if (zfh.windowSize > windowSizeMax)
|
||||
return ERROR(frameParameter_windowTooLarge);
|
||||
RETURN_ERROR_IF(err>0, srcSize_wrong);
|
||||
RETURN_ERROR_IF(zfh.windowSize > windowSizeMax,
|
||||
frameParameter_windowTooLarge);
|
||||
return ZSTD_estimateDStreamSize((size_t)zfh.windowSize);
|
||||
}
|
||||
|
||||
@ -1406,16 +1491,16 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
U32 someMoreWork = 1;
|
||||
|
||||
DEBUGLOG(5, "ZSTD_decompressStream");
|
||||
if (input->pos > input->size) { /* forbidden */
|
||||
DEBUGLOG(5, "in: pos: %u vs size: %u",
|
||||
(U32)input->pos, (U32)input->size);
|
||||
return ERROR(srcSize_wrong);
|
||||
}
|
||||
if (output->pos > output->size) { /* forbidden */
|
||||
DEBUGLOG(5, "out: pos: %u vs size: %u",
|
||||
(U32)output->pos, (U32)output->size);
|
||||
return ERROR(dstSize_tooSmall);
|
||||
}
|
||||
RETURN_ERROR_IF(
|
||||
input->pos > input->size,
|
||||
srcSize_wrong,
|
||||
"forbidden. in: pos: %u vs size: %u",
|
||||
(U32)input->pos, (U32)input->size);
|
||||
RETURN_ERROR_IF(
|
||||
output->pos > output->size,
|
||||
dstSize_tooSmall,
|
||||
"forbidden. out: pos: %u vs size: %u",
|
||||
(U32)output->pos, (U32)output->size);
|
||||
DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos));
|
||||
|
||||
while (someMoreWork) {
|
||||
@ -1423,15 +1508,18 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
{
|
||||
case zdss_init :
|
||||
DEBUGLOG(5, "stage zdss_init => transparent reset ");
|
||||
ZSTD_resetDStream(zds); /* transparent reset on starting decoding a new frame */
|
||||
zds->streamStage = zdss_loadHeader;
|
||||
zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
|
||||
zds->legacyVersion = 0;
|
||||
zds->hostageByte = 0;
|
||||
/* fall-through */
|
||||
|
||||
case zdss_loadHeader :
|
||||
DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip));
|
||||
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
|
||||
if (zds->legacyVersion) {
|
||||
/* legacy support is incompatible with static dctx */
|
||||
if (zds->staticSize) return ERROR(memory_allocation);
|
||||
RETURN_ERROR_IF(zds->staticSize, memory_allocation,
|
||||
"legacy support is incompatible with static dctx");
|
||||
{ size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input);
|
||||
if (hint==0) zds->streamStage = zdss_init;
|
||||
return hint;
|
||||
@ -1443,12 +1531,13 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
|
||||
U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart);
|
||||
if (legacyVersion) {
|
||||
const void* const dict = zds->ddict ? ZSTD_DDict_dictContent(zds->ddict) : NULL;
|
||||
size_t const dictSize = zds->ddict ? ZSTD_DDict_dictSize(zds->ddict) : 0;
|
||||
ZSTD_DDict const* const ddict = ZSTD_getDDict(zds);
|
||||
const void* const dict = ddict ? ZSTD_DDict_dictContent(ddict) : NULL;
|
||||
size_t const dictSize = ddict ? ZSTD_DDict_dictSize(ddict) : 0;
|
||||
DEBUGLOG(5, "ZSTD_decompressStream: detected legacy version v0.%u", legacyVersion);
|
||||
/* legacy support is incompatible with static dctx */
|
||||
if (zds->staticSize) return ERROR(memory_allocation);
|
||||
CHECK_F(ZSTD_initLegacyStream(&zds->legacyContext,
|
||||
RETURN_ERROR_IF(zds->staticSize, memory_allocation,
|
||||
"legacy support is incompatible with static dctx");
|
||||
FORWARD_IF_ERROR(ZSTD_initLegacyStream(&zds->legacyContext,
|
||||
zds->previousLegacyVersion, legacyVersion,
|
||||
dict, dictSize));
|
||||
zds->legacyVersion = zds->previousLegacyVersion = legacyVersion;
|
||||
@ -1482,7 +1571,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend-istart);
|
||||
if (cSize <= (size_t)(iend-istart)) {
|
||||
/* shortcut : using single-pass mode */
|
||||
size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, oend-op, istart, cSize, zds->ddict);
|
||||
size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, oend-op, istart, cSize, ZSTD_getDDict(zds));
|
||||
if (ZSTD_isError(decompressedSize)) return decompressedSize;
|
||||
DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()")
|
||||
ip = istart + cSize;
|
||||
@ -1495,13 +1584,13 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
|
||||
/* Consume header (see ZSTDds_decodeFrameHeader) */
|
||||
DEBUGLOG(4, "Consume header");
|
||||
CHECK_F(ZSTD_decompressBegin_usingDDict(zds, zds->ddict));
|
||||
FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)));
|
||||
|
||||
if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
|
||||
zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE);
|
||||
zds->stage = ZSTDds_skipFrame;
|
||||
} else {
|
||||
CHECK_F(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize));
|
||||
FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize));
|
||||
zds->expected = ZSTD_blockHeaderSize;
|
||||
zds->stage = ZSTDds_decodeBlockHeader;
|
||||
}
|
||||
@ -1511,7 +1600,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
(U32)(zds->fParams.windowSize >>10),
|
||||
(U32)(zds->maxWindowSize >> 10) );
|
||||
zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
|
||||
if (zds->fParams.windowSize > zds->maxWindowSize) return ERROR(frameParameter_windowTooLarge);
|
||||
RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize,
|
||||
frameParameter_windowTooLarge);
|
||||
|
||||
/* Adapt buffer sizes to frame header instructions */
|
||||
{ size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */);
|
||||
@ -1525,14 +1615,15 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
if (zds->staticSize) { /* static DCtx */
|
||||
DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize);
|
||||
assert(zds->staticSize >= sizeof(ZSTD_DCtx)); /* controlled at init */
|
||||
if (bufferSize > zds->staticSize - sizeof(ZSTD_DCtx))
|
||||
return ERROR(memory_allocation);
|
||||
RETURN_ERROR_IF(
|
||||
bufferSize > zds->staticSize - sizeof(ZSTD_DCtx),
|
||||
memory_allocation);
|
||||
} else {
|
||||
ZSTD_free(zds->inBuff, zds->customMem);
|
||||
zds->inBuffSize = 0;
|
||||
zds->outBuffSize = 0;
|
||||
zds->inBuff = (char*)ZSTD_malloc(bufferSize, zds->customMem);
|
||||
if (zds->inBuff == NULL) return ERROR(memory_allocation);
|
||||
RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation);
|
||||
}
|
||||
zds->inBuffSize = neededInBuffSize;
|
||||
zds->outBuff = zds->inBuff + zds->inBuffSize;
|
||||
@ -1574,7 +1665,9 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
if (isSkipFrame) {
|
||||
loadedSize = MIN(toLoad, (size_t)(iend-ip));
|
||||
} else {
|
||||
if (toLoad > zds->inBuffSize - zds->inPos) return ERROR(corruption_detected); /* should never happen */
|
||||
RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos,
|
||||
corruption_detected,
|
||||
"should never happen");
|
||||
loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend-ip);
|
||||
}
|
||||
ip += loadedSize;
|
||||
@ -1615,7 +1708,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
|
||||
default:
|
||||
assert(0); /* impossible */
|
||||
return ERROR(GENERIC); /* some compiler require default to do something */
|
||||
RETURN_ERROR(GENERIC); /* some compiler require default to do something */
|
||||
} }
|
||||
|
||||
/* result */
|
||||
@ -1624,8 +1717,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
if ((ip==istart) && (op==ostart)) { /* no forward progress */
|
||||
zds->noForwardProgress ++;
|
||||
if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) {
|
||||
if (op==oend) return ERROR(dstSize_tooSmall);
|
||||
if (ip==iend) return ERROR(srcSize_wrong);
|
||||
RETURN_ERROR_IF(op==oend, dstSize_tooSmall);
|
||||
RETURN_ERROR_IF(ip==iend, srcSize_wrong);
|
||||
assert(0);
|
||||
}
|
||||
} else {
|
||||
|
@ -56,14 +56,15 @@ static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
|
||||
size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
|
||||
blockProperties_t* bpPtr)
|
||||
{
|
||||
if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
|
||||
RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong);
|
||||
|
||||
{ U32 const cBlockHeader = MEM_readLE24(src);
|
||||
U32 const cSize = cBlockHeader >> 3;
|
||||
bpPtr->lastBlock = cBlockHeader & 1;
|
||||
bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
|
||||
bpPtr->origSize = cSize; /* only useful for RLE */
|
||||
if (bpPtr->blockType == bt_rle) return 1;
|
||||
if (bpPtr->blockType == bt_reserved) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected);
|
||||
return cSize;
|
||||
}
|
||||
}
|
||||
@ -78,7 +79,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
||||
size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
||||
const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
|
||||
{
|
||||
if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected);
|
||||
|
||||
{ const BYTE* const istart = (const BYTE*) src;
|
||||
symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
|
||||
@ -86,11 +87,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
||||
switch(litEncType)
|
||||
{
|
||||
case set_repeat:
|
||||
if (dctx->litEntropy==0) return ERROR(dictionary_corrupted);
|
||||
RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted);
|
||||
/* fall-through */
|
||||
|
||||
case set_compressed:
|
||||
if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */
|
||||
RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
|
||||
{ size_t lhSize, litSize, litCSize;
|
||||
U32 singleStream=0;
|
||||
U32 const lhlCode = (istart[0] >> 2) & 3;
|
||||
@ -118,8 +119,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
||||
litCSize = (lhc >> 22) + (istart[4] << 10);
|
||||
break;
|
||||
}
|
||||
if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
|
||||
if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected);
|
||||
RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected);
|
||||
|
||||
/* prefetch huffman table if cold */
|
||||
if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
|
||||
@ -157,7 +158,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
||||
}
|
||||
}
|
||||
|
||||
if (HUF_isError(hufSuccess)) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected);
|
||||
|
||||
dctx->litPtr = dctx->litBuffer;
|
||||
dctx->litSize = litSize;
|
||||
@ -187,7 +188,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
||||
}
|
||||
|
||||
if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
|
||||
if (litSize+lhSize > srcSize) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected);
|
||||
memcpy(dctx->litBuffer, istart+lhSize, litSize);
|
||||
dctx->litPtr = dctx->litBuffer;
|
||||
dctx->litSize = litSize;
|
||||
@ -216,17 +217,17 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
||||
case 3:
|
||||
lhSize = 3;
|
||||
litSize = MEM_readLE24(istart) >> 4;
|
||||
if (srcSize<4) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */
|
||||
RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
|
||||
break;
|
||||
}
|
||||
if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected);
|
||||
memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
|
||||
dctx->litPtr = dctx->litBuffer;
|
||||
dctx->litSize = litSize;
|
||||
return lhSize+1;
|
||||
}
|
||||
default:
|
||||
return ERROR(corruption_detected); /* impossible */
|
||||
RETURN_ERROR(corruption_detected, "impossible");
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -436,8 +437,8 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
||||
switch(type)
|
||||
{
|
||||
case set_rle :
|
||||
if (!srcSize) return ERROR(srcSize_wrong);
|
||||
if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(!srcSize, srcSize_wrong);
|
||||
RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected);
|
||||
{ U32 const symbol = *(const BYTE*)src;
|
||||
U32 const baseline = baseValue[symbol];
|
||||
U32 const nbBits = nbAdditionalBits[symbol];
|
||||
@ -449,7 +450,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
||||
*DTablePtr = defaultTable;
|
||||
return 0;
|
||||
case set_repeat:
|
||||
if (!flagRepeatTable) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(!flagRepeatTable, corruption_detected);
|
||||
/* prefetch FSE table if used */
|
||||
if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
|
||||
const void* const pStart = *DTablePtr;
|
||||
@ -461,15 +462,15 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
||||
{ unsigned tableLog;
|
||||
S16 norm[MaxSeq+1];
|
||||
size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
|
||||
if (FSE_isError(headerSize)) return ERROR(corruption_detected);
|
||||
if (tableLog > maxLog) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected);
|
||||
RETURN_ERROR_IF(tableLog > maxLog, corruption_detected);
|
||||
ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
|
||||
*DTablePtr = DTableSpace;
|
||||
return headerSize;
|
||||
}
|
||||
default : /* impossible */
|
||||
default :
|
||||
assert(0);
|
||||
return ERROR(GENERIC);
|
||||
RETURN_ERROR(GENERIC, "impossible");
|
||||
}
|
||||
}
|
||||
|
||||
@ -483,28 +484,28 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
||||
DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
|
||||
|
||||
/* check */
|
||||
if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong);
|
||||
RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong);
|
||||
|
||||
/* SeqHead */
|
||||
nbSeq = *ip++;
|
||||
if (!nbSeq) {
|
||||
*nbSeqPtr=0;
|
||||
if (srcSize != 1) return ERROR(srcSize_wrong);
|
||||
RETURN_ERROR_IF(srcSize != 1, srcSize_wrong);
|
||||
return 1;
|
||||
}
|
||||
if (nbSeq > 0x7F) {
|
||||
if (nbSeq == 0xFF) {
|
||||
if (ip+2 > iend) return ERROR(srcSize_wrong);
|
||||
RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong);
|
||||
nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
|
||||
} else {
|
||||
if (ip >= iend) return ERROR(srcSize_wrong);
|
||||
RETURN_ERROR_IF(ip >= iend, srcSize_wrong);
|
||||
nbSeq = ((nbSeq-0x80)<<8) + *ip++;
|
||||
}
|
||||
}
|
||||
*nbSeqPtr = nbSeq;
|
||||
|
||||
/* FSE table descriptors */
|
||||
if (ip+4 > iend) return ERROR(srcSize_wrong); /* minimum possible size */
|
||||
RETURN_ERROR_IF(ip+4 > iend, srcSize_wrong); /* minimum possible size */
|
||||
{ symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
|
||||
symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
|
||||
symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
|
||||
@ -517,7 +518,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
||||
LL_base, LL_bits,
|
||||
LL_defaultDTable, dctx->fseEntropy,
|
||||
dctx->ddictIsCold, nbSeq);
|
||||
if (ZSTD_isError(llhSize)) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected);
|
||||
ip += llhSize;
|
||||
}
|
||||
|
||||
@ -527,7 +528,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
||||
OF_base, OF_bits,
|
||||
OF_defaultDTable, dctx->fseEntropy,
|
||||
dctx->ddictIsCold, nbSeq);
|
||||
if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected);
|
||||
ip += ofhSize;
|
||||
}
|
||||
|
||||
@ -537,7 +538,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
||||
ML_base, ML_bits,
|
||||
ML_defaultDTable, dctx->fseEntropy,
|
||||
dctx->ddictIsCold, nbSeq);
|
||||
if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected);
|
||||
ip += mlhSize;
|
||||
}
|
||||
}
|
||||
@ -590,8 +591,8 @@ size_t ZSTD_execSequenceLast7(BYTE* op,
|
||||
const BYTE* match = oLitEnd - sequence.offset;
|
||||
|
||||
/* check */
|
||||
if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must fit within dstBuffer */
|
||||
if (iLitEnd > litLimit) return ERROR(corruption_detected); /* try to read beyond literal buffer */
|
||||
RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must fit within dstBuffer");
|
||||
RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "try to read beyond literal buffer");
|
||||
|
||||
/* copy literals */
|
||||
while (op < oLitEnd) *op++ = *(*litPtr)++;
|
||||
@ -599,7 +600,7 @@ size_t ZSTD_execSequenceLast7(BYTE* op,
|
||||
/* copy Match */
|
||||
if (sequence.offset > (size_t)(oLitEnd - base)) {
|
||||
/* offset beyond prefix */
|
||||
if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - vBase),corruption_detected);
|
||||
match = dictEnd - (base-match);
|
||||
if (match + sequence.matchLength <= dictEnd) {
|
||||
memmove(oLitEnd, match, sequence.matchLength);
|
||||
@ -631,8 +632,8 @@ size_t ZSTD_execSequence(BYTE* op,
|
||||
const BYTE* match = oLitEnd - sequence.offset;
|
||||
|
||||
/* check */
|
||||
if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
|
||||
if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
|
||||
RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
|
||||
RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
|
||||
if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
|
||||
|
||||
/* copy Literals */
|
||||
@ -645,8 +646,7 @@ size_t ZSTD_execSequence(BYTE* op,
|
||||
/* copy Match */
|
||||
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
||||
/* offset beyond prefix -> go into extDict */
|
||||
if (sequence.offset > (size_t)(oLitEnd - virtualStart))
|
||||
return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
|
||||
match = dictEnd + (match - prefixStart);
|
||||
if (match + sequence.matchLength <= dictEnd) {
|
||||
memmove(oLitEnd, match, sequence.matchLength);
|
||||
@ -712,8 +712,8 @@ size_t ZSTD_execSequenceLong(BYTE* op,
|
||||
const BYTE* match = sequence.match;
|
||||
|
||||
/* check */
|
||||
if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
|
||||
if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
|
||||
RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
|
||||
RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
|
||||
if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
|
||||
|
||||
/* copy Literals */
|
||||
@ -726,7 +726,7 @@ size_t ZSTD_execSequenceLong(BYTE* op,
|
||||
/* copy Match */
|
||||
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
||||
/* offset beyond prefix */
|
||||
if (sequence.offset > (size_t)(oLitEnd - dictStart)) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - dictStart), corruption_detected);
|
||||
if (match + sequence.matchLength <= dictEnd) {
|
||||
memmove(oLitEnd, match, sequence.matchLength);
|
||||
return sequenceLength;
|
||||
@ -801,7 +801,7 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
|
||||
/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
|
||||
* offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
|
||||
* bits before reloading. This value is the maximum number of bytes we read
|
||||
* after reloading when we are decoding long offets.
|
||||
* after reloading when we are decoding long offsets.
|
||||
*/
|
||||
#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
|
||||
(ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
|
||||
@ -911,7 +911,9 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
||||
seqState_t seqState;
|
||||
dctx->fseEntropy = 1;
|
||||
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
|
||||
CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
|
||||
RETURN_ERROR_IF(
|
||||
ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
|
||||
corruption_detected);
|
||||
ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
|
||||
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
|
||||
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
|
||||
@ -927,14 +929,14 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
||||
|
||||
/* check if reached exact end */
|
||||
DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
|
||||
if (nbSeq) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(nbSeq, corruption_detected);
|
||||
/* save reps for next block */
|
||||
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
|
||||
}
|
||||
|
||||
/* last literal segment */
|
||||
{ size_t const lastLLSize = litEnd - litPtr;
|
||||
if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
|
||||
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall);
|
||||
memcpy(op, litPtr, lastLLSize);
|
||||
op += lastLLSize;
|
||||
}
|
||||
@ -1066,7 +1068,9 @@ ZSTD_decompressSequencesLong_body(
|
||||
seqState.pos = (size_t)(op-prefixStart);
|
||||
seqState.dictEnd = dictEnd;
|
||||
assert(iend >= ip);
|
||||
CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
|
||||
RETURN_ERROR_IF(
|
||||
ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
|
||||
corruption_detected);
|
||||
ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
|
||||
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
|
||||
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
|
||||
@ -1076,7 +1080,7 @@ ZSTD_decompressSequencesLong_body(
|
||||
sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
|
||||
PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
|
||||
}
|
||||
if (seqNb<seqAdvance) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected);
|
||||
|
||||
/* decode and decompress */
|
||||
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
|
||||
@ -1087,7 +1091,7 @@ ZSTD_decompressSequencesLong_body(
|
||||
sequences[seqNb & STORED_SEQS_MASK] = sequence;
|
||||
op += oneSeqSize;
|
||||
}
|
||||
if (seqNb<nbSeq) return ERROR(corruption_detected);
|
||||
RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected);
|
||||
|
||||
/* finish queue */
|
||||
seqNb -= seqAdvance;
|
||||
@ -1103,7 +1107,7 @@ ZSTD_decompressSequencesLong_body(
|
||||
|
||||
/* last literal segment */
|
||||
{ size_t const lastLLSize = litEnd - litPtr;
|
||||
if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
|
||||
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall);
|
||||
memcpy(op, litPtr, lastLLSize);
|
||||
op += lastLLSize;
|
||||
}
|
||||
@ -1176,7 +1180,7 @@ ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
|
||||
/* ZSTD_decompressSequencesLong() :
|
||||
* decompression function triggered when a minimum share of offsets is considered "long",
|
||||
* aka out of cache.
|
||||
* note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes mearning "farther than memory cache distance".
|
||||
* note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
|
||||
* This function will try to mitigate main memory latency through the use of prefetching */
|
||||
static size_t
|
||||
ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
|
||||
@ -1240,7 +1244,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
||||
ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
|
||||
DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
|
||||
|
||||
if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);
|
||||
RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong);
|
||||
|
||||
/* Decode literals section */
|
||||
{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
|
||||
|
@ -89,6 +89,12 @@ typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
|
||||
typedef enum { zdss_init=0, zdss_loadHeader,
|
||||
zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
|
||||
|
||||
typedef enum {
|
||||
ZSTD_use_indefinitely = -1, /* Use the dictionary indefinitely */
|
||||
ZSTD_dont_use = 0, /* Do not use the dictionary (if one exists free it) */
|
||||
ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */
|
||||
} ZSTD_dictUses_e;
|
||||
|
||||
struct ZSTD_DCtx_s
|
||||
{
|
||||
const ZSTD_seqSymbol* LLTptr;
|
||||
@ -123,6 +129,7 @@ struct ZSTD_DCtx_s
|
||||
const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */
|
||||
U32 dictID;
|
||||
int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
|
||||
ZSTD_dictUses_e dictUses;
|
||||
|
||||
/* streaming */
|
||||
ZSTD_dStreamStage streamStage;
|
||||
|
@ -391,7 +391,7 @@ static void COVER_group(COVER_ctx_t *ctx, const void *group,
|
||||
*
|
||||
* Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
|
||||
*
|
||||
* Once the dmer d is in the dictionay we set F(d) = 0.
|
||||
* Once the dmer d is in the dictionary we set F(d) = 0.
|
||||
*/
|
||||
static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
|
||||
COVER_map_t *activeDmers, U32 begin,
|
||||
@ -435,7 +435,7 @@ static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
|
||||
U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer);
|
||||
activeSegment.begin += 1;
|
||||
*delDmerOcc -= 1;
|
||||
/* If this is the last occurence of the dmer, subtract its score */
|
||||
/* If this is the last occurrence of the dmer, subtract its score */
|
||||
if (*delDmerOcc == 0) {
|
||||
COVER_map_remove(activeDmers, delDmer);
|
||||
activeSegment.score -= freqs[delDmer];
|
||||
@ -627,6 +627,39 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
||||
return 1;
|
||||
}
|
||||
|
||||
void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
|
||||
{
|
||||
const double ratio = (double)nbDmers / maxDictSize;
|
||||
if (ratio >= 10) {
|
||||
return;
|
||||
}
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1,
|
||||
"WARNING: The maximum dictionary size %u is too large "
|
||||
"compared to the source size %u! "
|
||||
"size(source)/size(dictionary) = %f, but it should be >= "
|
||||
"10! This may lead to a subpar dictionary! We recommend "
|
||||
"training on sources at least 10x, and up to 100x the "
|
||||
"size of the dictionary!\n", (U32)maxDictSize,
|
||||
(U32)nbDmers, ratio);
|
||||
}
|
||||
|
||||
COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize,
|
||||
U32 nbDmers, U32 k, U32 passes)
|
||||
{
|
||||
const U32 minEpochSize = k * 10;
|
||||
COVER_epoch_info_t epochs;
|
||||
epochs.num = MAX(1, maxDictSize / k / passes);
|
||||
epochs.size = nbDmers / epochs.num;
|
||||
if (epochs.size >= minEpochSize) {
|
||||
assert(epochs.size * epochs.num <= nbDmers);
|
||||
return epochs;
|
||||
}
|
||||
epochs.size = MIN(minEpochSize, nbDmers);
|
||||
epochs.num = nbDmers / epochs.size;
|
||||
assert(epochs.size * epochs.num <= nbDmers);
|
||||
return epochs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Given the prepared context build the dictionary.
|
||||
*/
|
||||
@ -636,28 +669,34 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
|
||||
ZDICT_cover_params_t parameters) {
|
||||
BYTE *const dict = (BYTE *)dictBuffer;
|
||||
size_t tail = dictBufferCapacity;
|
||||
/* Divide the data up into epochs of equal size.
|
||||
* We will select at least one segment from each epoch.
|
||||
*/
|
||||
const unsigned epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k / 4));
|
||||
const unsigned epochSize = (U32)(ctx->suffixSize / epochs);
|
||||
/* Divide the data into epochs. We will select one segment from each epoch. */
|
||||
const COVER_epoch_info_t epochs = COVER_computeEpochs(
|
||||
(U32)dictBufferCapacity, (U32)ctx->suffixSize, parameters.k, 4);
|
||||
const size_t maxZeroScoreRun = MAX(10, MIN(100, epochs.num >> 3));
|
||||
size_t zeroScoreRun = 0;
|
||||
size_t epoch;
|
||||
DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n",
|
||||
epochs, epochSize);
|
||||
(U32)epochs.num, (U32)epochs.size);
|
||||
/* Loop through the epochs until there are no more segments or the dictionary
|
||||
* is full.
|
||||
*/
|
||||
for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) {
|
||||
const U32 epochBegin = (U32)(epoch * epochSize);
|
||||
const U32 epochEnd = epochBegin + epochSize;
|
||||
for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) {
|
||||
const U32 epochBegin = (U32)(epoch * epochs.size);
|
||||
const U32 epochEnd = epochBegin + epochs.size;
|
||||
size_t segmentSize;
|
||||
/* Select a segment */
|
||||
COVER_segment_t segment = COVER_selectSegment(
|
||||
ctx, freqs, activeDmers, epochBegin, epochEnd, parameters);
|
||||
/* If the segment covers no dmers, then we are out of content */
|
||||
/* If the segment covers no dmers, then we are out of content.
|
||||
* There may be new content in other epochs, for continue for some time.
|
||||
*/
|
||||
if (segment.score == 0) {
|
||||
break;
|
||||
if (++zeroScoreRun >= maxZeroScoreRun) {
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
zeroScoreRun = 0;
|
||||
/* Trim the segment if necessary and if it is too small then we are done */
|
||||
segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
|
||||
if (segmentSize < parameters.d) {
|
||||
@ -706,6 +745,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
||||
parameters.d, parameters.splitPoint)) {
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel);
|
||||
if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
|
||||
DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
|
||||
COVER_ctx_destroy(&ctx);
|
||||
@ -977,6 +1017,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
||||
unsigned k;
|
||||
COVER_best_t best;
|
||||
POOL_ctx *pool = NULL;
|
||||
int warned = 0;
|
||||
|
||||
/* Checks */
|
||||
if (splitPoint <= 0 || splitPoint > 1) {
|
||||
@ -1019,6 +1060,10 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
||||
POOL_free(pool);
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
if (!warned) {
|
||||
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel);
|
||||
warned = 1;
|
||||
}
|
||||
/* Loop through k reusing the same context */
|
||||
for (k = kMinK; k <= kMaxK; k += kStepSize) {
|
||||
/* Prepare the arguments */
|
||||
|
@ -38,6 +38,35 @@ typedef struct {
|
||||
U32 score;
|
||||
} COVER_segment_t;
|
||||
|
||||
/**
|
||||
*Number of epochs and size of each epoch.
|
||||
*/
|
||||
typedef struct {
|
||||
U32 num;
|
||||
U32 size;
|
||||
} COVER_epoch_info_t;
|
||||
|
||||
/**
|
||||
* Computes the number of epochs and the size of each epoch.
|
||||
* We will make sure that each epoch gets at least 10 * k bytes.
|
||||
*
|
||||
* The COVER algorithms divide the data up into epochs of equal size and
|
||||
* select one segment from each epoch.
|
||||
*
|
||||
* @param maxDictSize The maximum allowed dictionary size.
|
||||
* @param nbDmers The number of dmers we are training on.
|
||||
* @param k The parameter k (segment size).
|
||||
* @param passes The target number of passes over the dmer corpus.
|
||||
* More passes means a better dictionary.
|
||||
*/
|
||||
COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, U32 nbDmers,
|
||||
U32 k, U32 passes);
|
||||
|
||||
/**
|
||||
* Warns the user when their corpus is too small.
|
||||
*/
|
||||
void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel);
|
||||
|
||||
/**
|
||||
* Checks total compressed size of a dictionary
|
||||
*/
|
||||
|
@ -132,7 +132,7 @@ typedef struct {
|
||||
*
|
||||
* Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
|
||||
*
|
||||
* Once the dmer with hash value d is in the dictionay we set F(d) = 0.
|
||||
* Once the dmer with hash value d is in the dictionary we set F(d) = 0.
|
||||
*/
|
||||
static COVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx,
|
||||
U32 *freqs, U32 begin, U32 end,
|
||||
@ -161,7 +161,7 @@ static COVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx,
|
||||
/* Get hash value of current dmer */
|
||||
const size_t idx = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, f, d);
|
||||
|
||||
/* Add frequency of this index to score if this is the first occurence of index in active segment */
|
||||
/* Add frequency of this index to score if this is the first occurrence of index in active segment */
|
||||
if (segmentFreqs[idx] == 0) {
|
||||
activeSegment.score += freqs[idx];
|
||||
}
|
||||
@ -386,29 +386,35 @@ FASTCOVER_buildDictionary(const FASTCOVER_ctx_t* ctx,
|
||||
{
|
||||
BYTE *const dict = (BYTE *)dictBuffer;
|
||||
size_t tail = dictBufferCapacity;
|
||||
/* Divide the data up into epochs of equal size.
|
||||
* We will select at least one segment from each epoch.
|
||||
*/
|
||||
const unsigned epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k));
|
||||
const unsigned epochSize = (U32)(ctx->nbDmers / epochs);
|
||||
/* Divide the data into epochs. We will select one segment from each epoch. */
|
||||
const COVER_epoch_info_t epochs = COVER_computeEpochs(
|
||||
(U32)dictBufferCapacity, (U32)ctx->nbDmers, parameters.k, 1);
|
||||
const size_t maxZeroScoreRun = 10;
|
||||
size_t zeroScoreRun = 0;
|
||||
size_t epoch;
|
||||
DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n",
|
||||
epochs, epochSize);
|
||||
(U32)epochs.num, (U32)epochs.size);
|
||||
/* Loop through the epochs until there are no more segments or the dictionary
|
||||
* is full.
|
||||
*/
|
||||
for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) {
|
||||
const U32 epochBegin = (U32)(epoch * epochSize);
|
||||
const U32 epochEnd = epochBegin + epochSize;
|
||||
for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) {
|
||||
const U32 epochBegin = (U32)(epoch * epochs.size);
|
||||
const U32 epochEnd = epochBegin + epochs.size;
|
||||
size_t segmentSize;
|
||||
/* Select a segment */
|
||||
COVER_segment_t segment = FASTCOVER_selectSegment(
|
||||
ctx, freqs, epochBegin, epochEnd, parameters, segmentFreqs);
|
||||
|
||||
/* If the segment covers no dmers, then we are out of content */
|
||||
/* If the segment covers no dmers, then we are out of content.
|
||||
* There may be new content in other epochs, for continue for some time.
|
||||
*/
|
||||
if (segment.score == 0) {
|
||||
break;
|
||||
if (++zeroScoreRun >= maxZeroScoreRun) {
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
zeroScoreRun = 0;
|
||||
|
||||
/* Trim the segment if necessary and if it is too small then we are done */
|
||||
segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
|
||||
@ -564,6 +570,7 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
|
||||
DISPLAYLEVEL(1, "Failed to initialize context\n");
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel);
|
||||
/* Build the dictionary */
|
||||
DISPLAYLEVEL(2, "Building dictionary\n");
|
||||
{
|
||||
@ -616,6 +623,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
||||
unsigned k;
|
||||
COVER_best_t best;
|
||||
POOL_ctx *pool = NULL;
|
||||
int warned = 0;
|
||||
/* Checks */
|
||||
if (splitPoint <= 0 || splitPoint > 1) {
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n");
|
||||
@ -664,6 +672,10 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
||||
POOL_free(pool);
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
if (!warned) {
|
||||
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel);
|
||||
warned = 1;
|
||||
}
|
||||
/* Loop through k reusing the same context */
|
||||
for (k = kMinK; k <= kMaxK; k += kStepSize) {
|
||||
/* Prepare the arguments */
|
||||
|
@ -46,7 +46,12 @@ extern "C" {
|
||||
* The resulting dictionary will be saved into `dictBuffer`.
|
||||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
* or an error code, which can be tested with ZDICT_isError().
|
||||
* Note: ZDICT_trainFromBuffer() requires about 9 bytes of memory for each input byte.
|
||||
* Note: Dictionary training will fail if there are not enough samples to construct a
|
||||
* dictionary, or if most of the samples are too small (< 8 bytes being the lower limit).
|
||||
* If dictionary training fails, you should use zstd without a dictionary, as the dictionary
|
||||
* would've been ineffective anyways. If you believe your samples would benefit from a dictionary
|
||||
* please open an issue with details, and we can look into it.
|
||||
* Note: ZDICT_trainFromBuffer()'s memory usage is about 6 MB.
|
||||
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
||||
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
|
||||
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
|
||||
@ -110,6 +115,7 @@ typedef struct {
|
||||
* The resulting dictionary will be saved into `dictBuffer`.
|
||||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
* or an error code, which can be tested with ZDICT_isError().
|
||||
* See ZDICT_trainFromBuffer() for details on failure modes.
|
||||
* Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
|
||||
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
||||
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
|
||||
@ -133,8 +139,9 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
||||
* If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
|
||||
*
|
||||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
* or an error code, which can be tested with ZDICT_isError().
|
||||
* On success `*parameters` contains the parameters selected.
|
||||
* or an error code, which can be tested with ZDICT_isError().
|
||||
* On success `*parameters` contains the parameters selected.
|
||||
* See ZDICT_trainFromBuffer() for details on failure modes.
|
||||
* Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
|
||||
*/
|
||||
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
||||
@ -151,7 +158,8 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
||||
* The resulting dictionary will be saved into `dictBuffer`.
|
||||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
* or an error code, which can be tested with ZDICT_isError().
|
||||
* Note: ZDICT_trainFromBuffer_fastCover() requires about 1 bytes of memory for each input byte and additionally another 6 * 2^f bytes of memory .
|
||||
* See ZDICT_trainFromBuffer() for details on failure modes.
|
||||
* Note: ZDICT_trainFromBuffer_fastCover() requires 6 * 2^f bytes of memory.
|
||||
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
||||
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
|
||||
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
|
||||
@ -175,9 +183,10 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer,
|
||||
* If accel is zero, default value of 1 is used.
|
||||
*
|
||||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
* or an error code, which can be tested with ZDICT_isError().
|
||||
* On success `*parameters` contains the parameters selected.
|
||||
* Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 1 byte of memory for each input byte and additionally another 6 * 2^f bytes of memory for each thread.
|
||||
* or an error code, which can be tested with ZDICT_isError().
|
||||
* On success `*parameters` contains the parameters selected.
|
||||
* See ZDICT_trainFromBuffer() for details on failure modes.
|
||||
* Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 6 * 2^f bytes of memory for each thread.
|
||||
*/
|
||||
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
|
||||
size_t dictBufferCapacity, const void* samplesBuffer,
|
||||
@ -195,7 +204,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
|
||||
* maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
|
||||
*
|
||||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
|
||||
* or an error code, which can be tested by ZDICT_isError().
|
||||
* or an error code, which can be tested by ZDICT_isError().
|
||||
* Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
|
||||
* Note 2: dictBuffer and dictContent can overlap
|
||||
*/
|
||||
@ -219,6 +228,7 @@ typedef struct {
|
||||
* `parameters` is optional and can be provided with values set to 0 to mean "default".
|
||||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
* or an error code, which can be tested with ZDICT_isError().
|
||||
* See ZDICT_trainFromBuffer() for details on failure modes.
|
||||
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
||||
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
|
||||
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
|
||||
|
@ -20,7 +20,7 @@ extern "C" {
|
||||
***************************************/
|
||||
#include "mem.h" /* MEM_STATIC */
|
||||
#include "error_private.h" /* ERROR */
|
||||
#include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer */
|
||||
#include "zstd_internal.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTD_frameSizeInfo */
|
||||
|
||||
#if !defined (ZSTD_LEGACY_SUPPORT) || (ZSTD_LEGACY_SUPPORT == 0)
|
||||
# undef ZSTD_LEGACY_SUPPORT
|
||||
@ -178,43 +178,73 @@ MEM_STATIC size_t ZSTD_decompressLegacy(
|
||||
}
|
||||
}
|
||||
|
||||
MEM_STATIC size_t ZSTD_findFrameCompressedSizeLegacy(const void *src,
|
||||
size_t compressedSize)
|
||||
MEM_STATIC ZSTD_frameSizeInfo ZSTD_findFrameSizeInfoLegacy(const void *src, size_t srcSize)
|
||||
{
|
||||
U32 const version = ZSTD_isLegacy(src, compressedSize);
|
||||
ZSTD_frameSizeInfo frameSizeInfo;
|
||||
U32 const version = ZSTD_isLegacy(src, srcSize);
|
||||
switch(version)
|
||||
{
|
||||
#if (ZSTD_LEGACY_SUPPORT <= 1)
|
||||
case 1 :
|
||||
return ZSTDv01_findFrameCompressedSize(src, compressedSize);
|
||||
ZSTDv01_findFrameSizeInfoLegacy(src, srcSize,
|
||||
&frameSizeInfo.compressedSize,
|
||||
&frameSizeInfo.decompressedBound);
|
||||
break;
|
||||
#endif
|
||||
#if (ZSTD_LEGACY_SUPPORT <= 2)
|
||||
case 2 :
|
||||
return ZSTDv02_findFrameCompressedSize(src, compressedSize);
|
||||
ZSTDv02_findFrameSizeInfoLegacy(src, srcSize,
|
||||
&frameSizeInfo.compressedSize,
|
||||
&frameSizeInfo.decompressedBound);
|
||||
break;
|
||||
#endif
|
||||
#if (ZSTD_LEGACY_SUPPORT <= 3)
|
||||
case 3 :
|
||||
return ZSTDv03_findFrameCompressedSize(src, compressedSize);
|
||||
ZSTDv03_findFrameSizeInfoLegacy(src, srcSize,
|
||||
&frameSizeInfo.compressedSize,
|
||||
&frameSizeInfo.decompressedBound);
|
||||
break;
|
||||
#endif
|
||||
#if (ZSTD_LEGACY_SUPPORT <= 4)
|
||||
case 4 :
|
||||
return ZSTDv04_findFrameCompressedSize(src, compressedSize);
|
||||
ZSTDv04_findFrameSizeInfoLegacy(src, srcSize,
|
||||
&frameSizeInfo.compressedSize,
|
||||
&frameSizeInfo.decompressedBound);
|
||||
break;
|
||||
#endif
|
||||
#if (ZSTD_LEGACY_SUPPORT <= 5)
|
||||
case 5 :
|
||||
return ZSTDv05_findFrameCompressedSize(src, compressedSize);
|
||||
ZSTDv05_findFrameSizeInfoLegacy(src, srcSize,
|
||||
&frameSizeInfo.compressedSize,
|
||||
&frameSizeInfo.decompressedBound);
|
||||
break;
|
||||
#endif
|
||||
#if (ZSTD_LEGACY_SUPPORT <= 6)
|
||||
case 6 :
|
||||
return ZSTDv06_findFrameCompressedSize(src, compressedSize);
|
||||
ZSTDv06_findFrameSizeInfoLegacy(src, srcSize,
|
||||
&frameSizeInfo.compressedSize,
|
||||
&frameSizeInfo.decompressedBound);
|
||||
break;
|
||||
#endif
|
||||
#if (ZSTD_LEGACY_SUPPORT <= 7)
|
||||
case 7 :
|
||||
return ZSTDv07_findFrameCompressedSize(src, compressedSize);
|
||||
ZSTDv07_findFrameSizeInfoLegacy(src, srcSize,
|
||||
&frameSizeInfo.compressedSize,
|
||||
&frameSizeInfo.decompressedBound);
|
||||
break;
|
||||
#endif
|
||||
default :
|
||||
return ERROR(prefix_unknown);
|
||||
frameSizeInfo.compressedSize = ERROR(prefix_unknown);
|
||||
frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
|
||||
break;
|
||||
}
|
||||
return frameSizeInfo;
|
||||
}
|
||||
|
||||
MEM_STATIC size_t ZSTD_findFrameCompressedSizeLegacy(const void *src, size_t srcSize)
|
||||
{
|
||||
ZSTD_frameSizeInfo frameSizeInfo = ZSTD_findFrameSizeInfoLegacy(src, srcSize);
|
||||
return frameSizeInfo.compressedSize;
|
||||
}
|
||||
|
||||
MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version)
|
||||
|
@ -1336,6 +1336,8 @@ static const U32 ZSTD_magicNumber = 0xFD2FB51E; /* 3rd version : seqNb header
|
||||
#define LITERAL_NOENTROPY 63
|
||||
#define COMMAND_NOENTROPY 7 /* to remove */
|
||||
|
||||
#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2)
|
||||
|
||||
static const size_t ZSTD_blockHeaderSize = 3;
|
||||
static const size_t ZSTD_frameHeaderSize = 4;
|
||||
|
||||
@ -1757,7 +1759,7 @@ static size_t ZSTD_execSequence(BYTE* op,
|
||||
BYTE* const base, BYTE* const oend)
|
||||
{
|
||||
static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */
|
||||
static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* substracted */
|
||||
static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* subtracted */
|
||||
const BYTE* const ostart = op;
|
||||
const size_t litLength = sequence.litLength;
|
||||
BYTE* const endMatch = op + litLength + sequence.matchLength; /* risk : address space overflow (32-bits) */
|
||||
@ -1999,36 +2001,59 @@ size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t
|
||||
return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
|
||||
}
|
||||
|
||||
size_t ZSTDv01_findFrameCompressedSize(const void* src, size_t srcSize)
|
||||
/* ZSTD_errorFrameSizeInfoLegacy() :
|
||||
assumes `cSize` and `dBound` are _not_ NULL */
|
||||
static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
|
||||
{
|
||||
*cSize = ret;
|
||||
*dBound = ZSTD_CONTENTSIZE_ERROR;
|
||||
}
|
||||
|
||||
void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
|
||||
{
|
||||
const BYTE* ip = (const BYTE*)src;
|
||||
size_t remainingSize = srcSize;
|
||||
size_t nbBlocks = 0;
|
||||
U32 magicNumber;
|
||||
blockProperties_t blockProperties;
|
||||
|
||||
/* Frame Header */
|
||||
if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
|
||||
if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
|
||||
return;
|
||||
}
|
||||
magicNumber = ZSTD_readBE32(src);
|
||||
if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
|
||||
if (magicNumber != ZSTD_magicNumber) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
|
||||
return;
|
||||
}
|
||||
ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
|
||||
|
||||
/* Loop on each block */
|
||||
while (1)
|
||||
{
|
||||
size_t blockSize = ZSTDv01_getcBlockSize(ip, remainingSize, &blockProperties);
|
||||
if (ZSTDv01_isError(blockSize)) return blockSize;
|
||||
if (ZSTDv01_isError(blockSize)) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, blockSize);
|
||||
return;
|
||||
}
|
||||
|
||||
ip += ZSTD_blockHeaderSize;
|
||||
remainingSize -= ZSTD_blockHeaderSize;
|
||||
if (blockSize > remainingSize) return ERROR(srcSize_wrong);
|
||||
if (blockSize > remainingSize) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
|
||||
return;
|
||||
}
|
||||
|
||||
if (blockSize == 0) break; /* bt_end */
|
||||
|
||||
ip += blockSize;
|
||||
remainingSize -= blockSize;
|
||||
nbBlocks++;
|
||||
}
|
||||
|
||||
return ip - (const BYTE*)src;
|
||||
*cSize = ip - (const BYTE*)src;
|
||||
*dBound = nbBlocks * BLOCKSIZE;
|
||||
}
|
||||
|
||||
/*******************************
|
||||
|
@ -35,13 +35,18 @@ ZSTDv01_decompress() : decompress ZSTD frames compliant with v0.1.x format
|
||||
size_t ZSTDv01_decompress( void* dst, size_t maxOriginalSize,
|
||||
const void* src, size_t compressedSize);
|
||||
|
||||
/**
|
||||
ZSTDv01_getFrameSrcSize() : get the source length of a ZSTD frame compliant with v0.1.x format
|
||||
compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
|
||||
return : the number of bytes that would be read to decompress this frame
|
||||
or an errorCode if it fails (which can be tested using ZSTDv01_isError())
|
||||
*/
|
||||
size_t ZSTDv01_findFrameCompressedSize(const void* src, size_t compressedSize);
|
||||
/**
|
||||
ZSTDv01_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.1.x format
|
||||
srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
|
||||
cSize (output parameter) : the number of bytes that would be read to decompress this frame
|
||||
or an error code if it fails (which can be tested using ZSTDv01_isError())
|
||||
dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
|
||||
or ZSTD_CONTENTSIZE_ERROR if an error occurs
|
||||
|
||||
note : assumes `cSize` and `dBound` are _not_ NULL.
|
||||
*/
|
||||
void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
|
||||
size_t* cSize, unsigned long long* dBound);
|
||||
|
||||
/**
|
||||
ZSTDv01_isError() : tells if the result of ZSTDv01_decompress() is an error
|
||||
|
@ -2728,6 +2728,8 @@ static size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_
|
||||
#define LITERAL_NOENTROPY 63
|
||||
#define COMMAND_NOENTROPY 7 /* to remove */
|
||||
|
||||
#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2)
|
||||
|
||||
static const size_t ZSTD_blockHeaderSize = 3;
|
||||
static const size_t ZSTD_frameHeaderSize = 4;
|
||||
|
||||
@ -3096,7 +3098,7 @@ static size_t ZSTD_execSequence(BYTE* op,
|
||||
BYTE* const base, BYTE* const oend)
|
||||
{
|
||||
static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */
|
||||
static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* substracted */
|
||||
static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* subtracted */
|
||||
const BYTE* const ostart = op;
|
||||
BYTE* const oLitEnd = op + sequence.litLength;
|
||||
BYTE* const oMatchEnd = op + sequence.litLength + sequence.matchLength; /* risk : address space overflow (32-bits) */
|
||||
@ -3312,37 +3314,59 @@ static size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, siz
|
||||
return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
|
||||
}
|
||||
|
||||
static size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
|
||||
/* ZSTD_errorFrameSizeInfoLegacy() :
|
||||
assumes `cSize` and `dBound` are _not_ NULL */
|
||||
static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
|
||||
{
|
||||
*cSize = ret;
|
||||
*dBound = ZSTD_CONTENTSIZE_ERROR;
|
||||
}
|
||||
|
||||
void ZSTDv02_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
|
||||
{
|
||||
const BYTE* ip = (const BYTE*)src;
|
||||
size_t remainingSize = srcSize;
|
||||
size_t nbBlocks = 0;
|
||||
U32 magicNumber;
|
||||
blockProperties_t blockProperties;
|
||||
|
||||
/* Frame Header */
|
||||
if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
|
||||
if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
|
||||
return;
|
||||
}
|
||||
magicNumber = MEM_readLE32(src);
|
||||
if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
|
||||
if (magicNumber != ZSTD_magicNumber) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
|
||||
return;
|
||||
}
|
||||
ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
|
||||
|
||||
/* Loop on each block */
|
||||
while (1)
|
||||
{
|
||||
size_t cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
|
||||
if (ZSTD_isError(cBlockSize)) return cBlockSize;
|
||||
if (ZSTD_isError(cBlockSize)) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, cBlockSize);
|
||||
return;
|
||||
}
|
||||
|
||||
ip += ZSTD_blockHeaderSize;
|
||||
remainingSize -= ZSTD_blockHeaderSize;
|
||||
if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
|
||||
if (cBlockSize > remainingSize) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
|
||||
return;
|
||||
}
|
||||
|
||||
if (cBlockSize == 0) break; /* bt_end */
|
||||
|
||||
ip += cBlockSize;
|
||||
remainingSize -= cBlockSize;
|
||||
nbBlocks++;
|
||||
}
|
||||
|
||||
return ip - (const BYTE*)src;
|
||||
*cSize = ip - (const BYTE*)src;
|
||||
*dBound = nbBlocks * BLOCKSIZE;
|
||||
}
|
||||
|
||||
/*******************************
|
||||
@ -3458,11 +3482,6 @@ size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize,
|
||||
return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize);
|
||||
}
|
||||
|
||||
size_t ZSTDv02_findFrameCompressedSize(const void *src, size_t compressedSize)
|
||||
{
|
||||
return ZSTD_findFrameCompressedSize(src, compressedSize);
|
||||
}
|
||||
|
||||
ZSTDv02_Dctx* ZSTDv02_createDCtx(void)
|
||||
{
|
||||
return (ZSTDv02_Dctx*)ZSTD_createDCtx();
|
||||
|
@ -35,13 +35,18 @@ ZSTDv02_decompress() : decompress ZSTD frames compliant with v0.2.x format
|
||||
size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize,
|
||||
const void* src, size_t compressedSize);
|
||||
|
||||
/**
|
||||
ZSTDv02_getFrameSrcSize() : get the source length of a ZSTD frame compliant with v0.2.x format
|
||||
compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
|
||||
return : the number of bytes that would be read to decompress this frame
|
||||
or an errorCode if it fails (which can be tested using ZSTDv02_isError())
|
||||
*/
|
||||
size_t ZSTDv02_findFrameCompressedSize(const void* src, size_t compressedSize);
|
||||
/**
|
||||
ZSTDv02_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.2.x format
|
||||
srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
|
||||
cSize (output parameter) : the number of bytes that would be read to decompress this frame
|
||||
or an error code if it fails (which can be tested using ZSTDv01_isError())
|
||||
dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
|
||||
or ZSTD_CONTENTSIZE_ERROR if an error occurs
|
||||
|
||||
note : assumes `cSize` and `dBound` are _not_ NULL.
|
||||
*/
|
||||
void ZSTDv02_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
|
||||
size_t* cSize, unsigned long long* dBound);
|
||||
|
||||
/**
|
||||
ZSTDv02_isError() : tells if the result of ZSTDv02_decompress() is an error
|
||||
|
@ -2369,6 +2369,8 @@ static size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_
|
||||
#define LITERAL_NOENTROPY 63
|
||||
#define COMMAND_NOENTROPY 7 /* to remove */
|
||||
|
||||
#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2)
|
||||
|
||||
static const size_t ZSTD_blockHeaderSize = 3;
|
||||
static const size_t ZSTD_frameHeaderSize = 4;
|
||||
|
||||
@ -2737,7 +2739,7 @@ static size_t ZSTD_execSequence(BYTE* op,
|
||||
BYTE* const base, BYTE* const oend)
|
||||
{
|
||||
static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */
|
||||
static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* substracted */
|
||||
static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* subtracted */
|
||||
const BYTE* const ostart = op;
|
||||
BYTE* const oLitEnd = op + sequence.litLength;
|
||||
BYTE* const oMatchEnd = op + sequence.litLength + sequence.matchLength; /* risk : address space overflow (32-bits) */
|
||||
@ -2953,36 +2955,59 @@ static size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, siz
|
||||
return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
|
||||
}
|
||||
|
||||
static size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize)
|
||||
/* ZSTD_errorFrameSizeInfoLegacy() :
|
||||
assumes `cSize` and `dBound` are _not_ NULL */
|
||||
MEM_STATIC void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
|
||||
{
|
||||
*cSize = ret;
|
||||
*dBound = ZSTD_CONTENTSIZE_ERROR;
|
||||
}
|
||||
|
||||
void ZSTDv03_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
|
||||
{
|
||||
const BYTE* ip = (const BYTE*)src;
|
||||
size_t remainingSize = srcSize;
|
||||
size_t nbBlocks = 0;
|
||||
U32 magicNumber;
|
||||
blockProperties_t blockProperties;
|
||||
|
||||
/* Frame Header */
|
||||
if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
|
||||
if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
|
||||
return;
|
||||
}
|
||||
magicNumber = MEM_readLE32(src);
|
||||
if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
|
||||
if (magicNumber != ZSTD_magicNumber) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
|
||||
return;
|
||||
}
|
||||
ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
|
||||
|
||||
/* Loop on each block */
|
||||
while (1)
|
||||
{
|
||||
size_t cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
|
||||
if (ZSTD_isError(cBlockSize)) return cBlockSize;
|
||||
if (ZSTD_isError(cBlockSize)) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, cBlockSize);
|
||||
return;
|
||||
}
|
||||
|
||||
ip += ZSTD_blockHeaderSize;
|
||||
remainingSize -= ZSTD_blockHeaderSize;
|
||||
if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
|
||||
if (cBlockSize > remainingSize) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
|
||||
return;
|
||||
}
|
||||
|
||||
if (cBlockSize == 0) break; /* bt_end */
|
||||
|
||||
ip += cBlockSize;
|
||||
remainingSize -= cBlockSize;
|
||||
nbBlocks++;
|
||||
}
|
||||
|
||||
return ip - (const BYTE*)src;
|
||||
*cSize = ip - (const BYTE*)src;
|
||||
*dBound = nbBlocks * BLOCKSIZE;
|
||||
}
|
||||
|
||||
|
||||
@ -3099,11 +3124,6 @@ size_t ZSTDv03_decompress( void* dst, size_t maxOriginalSize,
|
||||
return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize);
|
||||
}
|
||||
|
||||
size_t ZSTDv03_findFrameCompressedSize(const void* src, size_t srcSize)
|
||||
{
|
||||
return ZSTD_findFrameCompressedSize(src, srcSize);
|
||||
}
|
||||
|
||||
ZSTDv03_Dctx* ZSTDv03_createDCtx(void)
|
||||
{
|
||||
return (ZSTDv03_Dctx*)ZSTD_createDCtx();
|
||||
|
@ -35,13 +35,18 @@ ZSTDv03_decompress() : decompress ZSTD frames compliant with v0.3.x format
|
||||
size_t ZSTDv03_decompress( void* dst, size_t maxOriginalSize,
|
||||
const void* src, size_t compressedSize);
|
||||
|
||||
/**
|
||||
ZSTDv03_getFrameSrcSize() : get the source length of a ZSTD frame compliant with v0.3.x format
|
||||
compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
|
||||
return : the number of bytes that would be read to decompress this frame
|
||||
or an errorCode if it fails (which can be tested using ZSTDv03_isError())
|
||||
*/
|
||||
size_t ZSTDv03_findFrameCompressedSize(const void* src, size_t compressedSize);
|
||||
/**
|
||||
ZSTDv03_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.3.x format
|
||||
srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
|
||||
cSize (output parameter) : the number of bytes that would be read to decompress this frame
|
||||
or an error code if it fails (which can be tested using ZSTDv01_isError())
|
||||
dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
|
||||
or ZSTD_CONTENTSIZE_ERROR if an error occurs
|
||||
|
||||
note : assumes `cSize` and `dBound` are _not_ NULL.
|
||||
*/
|
||||
void ZSTDv03_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
|
||||
size_t* cSize, unsigned long long* dBound);
|
||||
|
||||
/**
|
||||
ZSTDv03_isError() : tells if the result of ZSTDv03_decompress() is an error
|
||||
|
@ -373,6 +373,8 @@ static const size_t ZSTD_frameHeaderSize_min = 5;
|
||||
#define MIN_SEQUENCES_SIZE (2 /*seqNb*/ + 2 /*dumps*/ + 3 /*seqTables*/ + 1 /*bitStream*/)
|
||||
#define MIN_CBLOCK_SIZE (3 /*litCSize*/ + MIN_SEQUENCES_SIZE)
|
||||
|
||||
#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2)
|
||||
|
||||
typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
|
||||
|
||||
|
||||
@ -2860,7 +2862,7 @@ static size_t ZSTD_execSequence(BYTE* op,
|
||||
const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
|
||||
{
|
||||
static const int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
|
||||
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* substracted */
|
||||
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
|
||||
BYTE* const oLitEnd = op + sequence.litLength;
|
||||
const size_t sequenceLength = sequence.litLength + sequence.matchLength;
|
||||
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
|
||||
@ -3119,34 +3121,57 @@ static size_t ZSTD_decompress_usingDict(ZSTD_DCtx* ctx,
|
||||
return op-ostart;
|
||||
}
|
||||
|
||||
static size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize)
|
||||
/* ZSTD_errorFrameSizeInfoLegacy() :
|
||||
assumes `cSize` and `dBound` are _not_ NULL */
|
||||
static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
|
||||
{
|
||||
*cSize = ret;
|
||||
*dBound = ZSTD_CONTENTSIZE_ERROR;
|
||||
}
|
||||
|
||||
void ZSTDv04_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
|
||||
{
|
||||
const BYTE* ip = (const BYTE*)src;
|
||||
size_t remainingSize = srcSize;
|
||||
size_t nbBlocks = 0;
|
||||
blockProperties_t blockProperties;
|
||||
|
||||
/* Frame Header */
|
||||
if (srcSize < ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong);
|
||||
if (MEM_readLE32(src) != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown);
|
||||
if (srcSize < ZSTD_frameHeaderSize_min) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
|
||||
return;
|
||||
}
|
||||
if (MEM_readLE32(src) != ZSTD_MAGICNUMBER) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
|
||||
return;
|
||||
}
|
||||
ip += ZSTD_frameHeaderSize_min; remainingSize -= ZSTD_frameHeaderSize_min;
|
||||
|
||||
/* Loop on each block */
|
||||
while (1)
|
||||
{
|
||||
size_t cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
|
||||
if (ZSTD_isError(cBlockSize)) return cBlockSize;
|
||||
if (ZSTD_isError(cBlockSize)) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, cBlockSize);
|
||||
return;
|
||||
}
|
||||
|
||||
ip += ZSTD_blockHeaderSize;
|
||||
remainingSize -= ZSTD_blockHeaderSize;
|
||||
if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
|
||||
if (cBlockSize > remainingSize) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
|
||||
return;
|
||||
}
|
||||
|
||||
if (cBlockSize == 0) break; /* bt_end */
|
||||
|
||||
ip += cBlockSize;
|
||||
remainingSize -= cBlockSize;
|
||||
nbBlocks++;
|
||||
}
|
||||
|
||||
return ip - (const BYTE*)src;
|
||||
*cSize = ip - (const BYTE*)src;
|
||||
*dBound = nbBlocks * BLOCKSIZE;
|
||||
}
|
||||
|
||||
/* ******************************
|
||||
@ -3578,11 +3603,6 @@ size_t ZSTDv04_decompress(void* dst, size_t maxDstSize, const void* src, size_t
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t ZSTDv04_findFrameCompressedSize(const void* src, size_t srcSize)
|
||||
{
|
||||
return ZSTD_findFrameCompressedSize(src, srcSize);
|
||||
}
|
||||
|
||||
size_t ZSTDv04_resetDCtx(ZSTDv04_Dctx* dctx) { return ZSTD_resetDCtx(dctx); }
|
||||
|
||||
size_t ZSTDv04_nextSrcSizeToDecompress(ZSTDv04_Dctx* dctx)
|
||||
|
@ -35,13 +35,18 @@ ZSTDv04_decompress() : decompress ZSTD frames compliant with v0.4.x format
|
||||
size_t ZSTDv04_decompress( void* dst, size_t maxOriginalSize,
|
||||
const void* src, size_t compressedSize);
|
||||
|
||||
/**
|
||||
ZSTDv04_getFrameSrcSize() : get the source length of a ZSTD frame compliant with v0.4.x format
|
||||
compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
|
||||
return : the number of bytes that would be read to decompress this frame
|
||||
or an errorCode if it fails (which can be tested using ZSTDv04_isError())
|
||||
*/
|
||||
size_t ZSTDv04_findFrameCompressedSize(const void* src, size_t compressedSize);
|
||||
/**
|
||||
ZSTDv04_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.4.x format
|
||||
srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
|
||||
cSize (output parameter) : the number of bytes that would be read to decompress this frame
|
||||
or an error code if it fails (which can be tested using ZSTDv01_isError())
|
||||
dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
|
||||
or ZSTD_CONTENTSIZE_ERROR if an error occurs
|
||||
|
||||
note : assumes `cSize` and `dBound` are _not_ NULL.
|
||||
*/
|
||||
void ZSTDv04_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
|
||||
size_t* cSize, unsigned long long* dBound);
|
||||
|
||||
/**
|
||||
ZSTDv04_isError() : tells if the result of ZSTDv04_decompress() is an error
|
||||
|
@ -491,6 +491,8 @@ static const size_t ZSTDv05_frameHeaderSize_min = 5;
|
||||
|
||||
#define WILDCOPY_OVERLENGTH 8
|
||||
|
||||
#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2)
|
||||
|
||||
typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
|
||||
|
||||
|
||||
@ -3217,7 +3219,7 @@ static size_t ZSTDv05_execSequence(BYTE* op,
|
||||
const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
|
||||
{
|
||||
static const int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
|
||||
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* substracted */
|
||||
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
|
||||
BYTE* const oLitEnd = op + sequence.litLength;
|
||||
const size_t sequenceLength = sequence.litLength + sequence.matchLength;
|
||||
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
|
||||
@ -3508,34 +3510,57 @@ size_t ZSTDv05_decompress(void* dst, size_t maxDstSize, const void* src, size_t
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t ZSTDv05_findFrameCompressedSize(const void *src, size_t srcSize)
|
||||
/* ZSTD_errorFrameSizeInfoLegacy() :
|
||||
assumes `cSize` and `dBound` are _not_ NULL */
|
||||
static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
|
||||
{
|
||||
*cSize = ret;
|
||||
*dBound = ZSTD_CONTENTSIZE_ERROR;
|
||||
}
|
||||
|
||||
void ZSTDv05_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
|
||||
{
|
||||
const BYTE* ip = (const BYTE*)src;
|
||||
size_t remainingSize = srcSize;
|
||||
size_t nbBlocks = 0;
|
||||
blockProperties_t blockProperties;
|
||||
|
||||
/* Frame Header */
|
||||
if (srcSize < ZSTDv05_frameHeaderSize_min) return ERROR(srcSize_wrong);
|
||||
if (MEM_readLE32(src) != ZSTDv05_MAGICNUMBER) return ERROR(prefix_unknown);
|
||||
if (srcSize < ZSTDv05_frameHeaderSize_min) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
|
||||
return;
|
||||
}
|
||||
if (MEM_readLE32(src) != ZSTDv05_MAGICNUMBER) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
|
||||
return;
|
||||
}
|
||||
ip += ZSTDv05_frameHeaderSize_min; remainingSize -= ZSTDv05_frameHeaderSize_min;
|
||||
|
||||
/* Loop on each block */
|
||||
while (1)
|
||||
{
|
||||
size_t cBlockSize = ZSTDv05_getcBlockSize(ip, remainingSize, &blockProperties);
|
||||
if (ZSTDv05_isError(cBlockSize)) return cBlockSize;
|
||||
if (ZSTDv05_isError(cBlockSize)) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, cBlockSize);
|
||||
return;
|
||||
}
|
||||
|
||||
ip += ZSTDv05_blockHeaderSize;
|
||||
remainingSize -= ZSTDv05_blockHeaderSize;
|
||||
if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
|
||||
if (cBlockSize > remainingSize) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
|
||||
return;
|
||||
}
|
||||
|
||||
if (cBlockSize == 0) break; /* bt_end */
|
||||
|
||||
ip += cBlockSize;
|
||||
remainingSize -= cBlockSize;
|
||||
nbBlocks++;
|
||||
}
|
||||
|
||||
return ip - (const BYTE*)src;
|
||||
*cSize = ip - (const BYTE*)src;
|
||||
*dBound = nbBlocks * BLOCKSIZE;
|
||||
}
|
||||
|
||||
/* ******************************
|
||||
|
@ -33,13 +33,18 @@ extern "C" {
|
||||
size_t ZSTDv05_decompress( void* dst, size_t dstCapacity,
|
||||
const void* src, size_t compressedSize);
|
||||
|
||||
/**
|
||||
ZSTDv05_getFrameSrcSize() : get the source length of a ZSTD frame
|
||||
compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
|
||||
return : the number of bytes that would be read to decompress this frame
|
||||
or an errorCode if it fails (which can be tested using ZSTDv05_isError())
|
||||
*/
|
||||
size_t ZSTDv05_findFrameCompressedSize(const void* src, size_t compressedSize);
|
||||
/**
|
||||
ZSTDv05_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.5.x format
|
||||
srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
|
||||
cSize (output parameter) : the number of bytes that would be read to decompress this frame
|
||||
or an error code if it fails (which can be tested using ZSTDv01_isError())
|
||||
dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
|
||||
or ZSTD_CONTENTSIZE_ERROR if an error occurs
|
||||
|
||||
note : assumes `cSize` and `dBound` are _not_ NULL.
|
||||
*/
|
||||
void ZSTDv05_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
|
||||
size_t* cSize, unsigned long long* dBound);
|
||||
|
||||
/* *************************************
|
||||
* Helper functions
|
||||
|
@ -506,6 +506,8 @@ typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
|
||||
#define FSEv06_ENCODING_STATIC 2
|
||||
#define FSEv06_ENCODING_DYNAMIC 3
|
||||
|
||||
#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2)
|
||||
|
||||
static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9,10,11,12,
|
||||
13,14,15,16 };
|
||||
@ -3406,7 +3408,7 @@ static size_t ZSTDv06_execSequence(BYTE* op,
|
||||
if (sequence.offset < 8) {
|
||||
/* close range match, overlap */
|
||||
static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
|
||||
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* substracted */
|
||||
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
|
||||
int const sub2 = dec64table[sequence.offset];
|
||||
op[0] = match[0];
|
||||
op[1] = match[1];
|
||||
@ -3654,36 +3656,62 @@ size_t ZSTDv06_decompress(void* dst, size_t dstCapacity, const void* src, size_t
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t ZSTDv06_findFrameCompressedSize(const void* src, size_t srcSize)
|
||||
/* ZSTD_errorFrameSizeInfoLegacy() :
|
||||
assumes `cSize` and `dBound` are _not_ NULL */
|
||||
static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
|
||||
{
|
||||
*cSize = ret;
|
||||
*dBound = ZSTD_CONTENTSIZE_ERROR;
|
||||
}
|
||||
|
||||
void ZSTDv06_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
|
||||
{
|
||||
const BYTE* ip = (const BYTE*)src;
|
||||
size_t remainingSize = srcSize;
|
||||
size_t nbBlocks = 0;
|
||||
blockProperties_t blockProperties = { bt_compressed, 0 };
|
||||
|
||||
/* Frame Header */
|
||||
{ size_t const frameHeaderSize = ZSTDv06_frameHeaderSize(src, ZSTDv06_frameHeaderSize_min);
|
||||
if (ZSTDv06_isError(frameHeaderSize)) return frameHeaderSize;
|
||||
if (MEM_readLE32(src) != ZSTDv06_MAGICNUMBER) return ERROR(prefix_unknown);
|
||||
if (srcSize < frameHeaderSize+ZSTDv06_blockHeaderSize) return ERROR(srcSize_wrong);
|
||||
if (ZSTDv06_isError(frameHeaderSize)) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, frameHeaderSize);
|
||||
return;
|
||||
}
|
||||
if (MEM_readLE32(src) != ZSTDv06_MAGICNUMBER) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
|
||||
return;
|
||||
}
|
||||
if (srcSize < frameHeaderSize+ZSTDv06_blockHeaderSize) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
|
||||
return;
|
||||
}
|
||||
ip += frameHeaderSize; remainingSize -= frameHeaderSize;
|
||||
}
|
||||
|
||||
/* Loop on each block */
|
||||
while (1) {
|
||||
size_t const cBlockSize = ZSTDv06_getcBlockSize(ip, remainingSize, &blockProperties);
|
||||
if (ZSTDv06_isError(cBlockSize)) return cBlockSize;
|
||||
if (ZSTDv06_isError(cBlockSize)) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, cBlockSize);
|
||||
return;
|
||||
}
|
||||
|
||||
ip += ZSTDv06_blockHeaderSize;
|
||||
remainingSize -= ZSTDv06_blockHeaderSize;
|
||||
if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
|
||||
if (cBlockSize > remainingSize) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
|
||||
return;
|
||||
}
|
||||
|
||||
if (cBlockSize == 0) break; /* bt_end */
|
||||
|
||||
ip += cBlockSize;
|
||||
remainingSize -= cBlockSize;
|
||||
nbBlocks++;
|
||||
}
|
||||
|
||||
return ip - (const BYTE*)src;
|
||||
*cSize = ip - (const BYTE*)src;
|
||||
*dBound = nbBlocks * ZSTDv06_BLOCKSIZE_MAX;
|
||||
}
|
||||
|
||||
/*_******************************
|
||||
|
@ -43,12 +43,17 @@ ZSTDLIBv06_API size_t ZSTDv06_decompress( void* dst, size_t dstCapacity,
|
||||
const void* src, size_t compressedSize);
|
||||
|
||||
/**
|
||||
ZSTDv06_getFrameSrcSize() : get the source length of a ZSTD frame
|
||||
compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
|
||||
return : the number of bytes that would be read to decompress this frame
|
||||
or an errorCode if it fails (which can be tested using ZSTDv06_isError())
|
||||
ZSTDv06_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.6.x format
|
||||
srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
|
||||
cSize (output parameter) : the number of bytes that would be read to decompress this frame
|
||||
or an error code if it fails (which can be tested using ZSTDv01_isError())
|
||||
dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
|
||||
or ZSTD_CONTENTSIZE_ERROR if an error occurs
|
||||
|
||||
note : assumes `cSize` and `dBound` are _not_ NULL.
|
||||
*/
|
||||
size_t ZSTDv06_findFrameCompressedSize(const void* src, size_t compressedSize);
|
||||
void ZSTDv06_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
|
||||
size_t* cSize, unsigned long long* dBound);
|
||||
|
||||
/* *************************************
|
||||
* Helper functions
|
||||
|
@ -2740,6 +2740,8 @@ typedef enum { lbt_huffman, lbt_repeat, lbt_raw, lbt_rle } litBlockType_t;
|
||||
#define FSEv07_ENCODING_STATIC 2
|
||||
#define FSEv07_ENCODING_DYNAMIC 3
|
||||
|
||||
#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2)
|
||||
|
||||
static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9,10,11,12,
|
||||
13,14,15,16 };
|
||||
@ -3631,7 +3633,7 @@ size_t ZSTDv07_execSequence(BYTE* op,
|
||||
if (sequence.offset < 8) {
|
||||
/* close range match, overlap */
|
||||
static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
|
||||
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* substracted */
|
||||
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
|
||||
int const sub2 = dec64table[sequence.offset];
|
||||
op[0] = match[0];
|
||||
op[1] = match[1];
|
||||
@ -3895,19 +3897,40 @@ size_t ZSTDv07_decompress(void* dst, size_t dstCapacity, const void* src, size_t
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t ZSTDv07_findFrameCompressedSize(const void* src, size_t srcSize)
|
||||
/* ZSTD_errorFrameSizeInfoLegacy() :
|
||||
assumes `cSize` and `dBound` are _not_ NULL */
|
||||
static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
|
||||
{
|
||||
*cSize = ret;
|
||||
*dBound = ZSTD_CONTENTSIZE_ERROR;
|
||||
}
|
||||
|
||||
void ZSTDv07_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
|
||||
{
|
||||
const BYTE* ip = (const BYTE*)src;
|
||||
size_t remainingSize = srcSize;
|
||||
size_t nbBlocks = 0;
|
||||
|
||||
/* check */
|
||||
if (srcSize < ZSTDv07_frameHeaderSize_min+ZSTDv07_blockHeaderSize) return ERROR(srcSize_wrong);
|
||||
if (srcSize < ZSTDv07_frameHeaderSize_min+ZSTDv07_blockHeaderSize) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
|
||||
return;
|
||||
}
|
||||
|
||||
/* Frame Header */
|
||||
{ size_t const frameHeaderSize = ZSTDv07_frameHeaderSize(src, ZSTDv07_frameHeaderSize_min);
|
||||
if (ZSTDv07_isError(frameHeaderSize)) return frameHeaderSize;
|
||||
if (MEM_readLE32(src) != ZSTDv07_MAGICNUMBER) return ERROR(prefix_unknown);
|
||||
if (srcSize < frameHeaderSize+ZSTDv07_blockHeaderSize) return ERROR(srcSize_wrong);
|
||||
if (ZSTDv07_isError(frameHeaderSize)) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, frameHeaderSize);
|
||||
return;
|
||||
}
|
||||
if (MEM_readLE32(src) != ZSTDv07_MAGICNUMBER) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
|
||||
return;
|
||||
}
|
||||
if (srcSize < frameHeaderSize+ZSTDv07_blockHeaderSize) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
|
||||
return;
|
||||
}
|
||||
ip += frameHeaderSize; remainingSize -= frameHeaderSize;
|
||||
}
|
||||
|
||||
@ -3915,20 +3938,28 @@ size_t ZSTDv07_findFrameCompressedSize(const void* src, size_t srcSize)
|
||||
while (1) {
|
||||
blockProperties_t blockProperties;
|
||||
size_t const cBlockSize = ZSTDv07_getcBlockSize(ip, remainingSize, &blockProperties);
|
||||
if (ZSTDv07_isError(cBlockSize)) return cBlockSize;
|
||||
if (ZSTDv07_isError(cBlockSize)) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, cBlockSize);
|
||||
return;
|
||||
}
|
||||
|
||||
ip += ZSTDv07_blockHeaderSize;
|
||||
remainingSize -= ZSTDv07_blockHeaderSize;
|
||||
|
||||
if (blockProperties.blockType == bt_end) break;
|
||||
|
||||
if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
|
||||
if (cBlockSize > remainingSize) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
|
||||
return;
|
||||
}
|
||||
|
||||
ip += cBlockSize;
|
||||
remainingSize -= cBlockSize;
|
||||
nbBlocks++;
|
||||
}
|
||||
|
||||
return ip - (const BYTE*)src;
|
||||
*cSize = ip - (const BYTE*)src;
|
||||
*dBound = nbBlocks * ZSTDv07_BLOCKSIZE_ABSOLUTEMAX;
|
||||
}
|
||||
|
||||
/*_******************************
|
||||
|
@ -50,12 +50,17 @@ ZSTDLIBv07_API size_t ZSTDv07_decompress( void* dst, size_t dstCapacity,
|
||||
const void* src, size_t compressedSize);
|
||||
|
||||
/**
|
||||
ZSTDv07_getFrameSrcSize() : get the source length of a ZSTD frame
|
||||
compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
|
||||
return : the number of bytes that would be read to decompress this frame
|
||||
or an errorCode if it fails (which can be tested using ZSTDv07_isError())
|
||||
ZSTDv07_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.7.x format
|
||||
srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
|
||||
cSize (output parameter) : the number of bytes that would be read to decompress this frame
|
||||
or an error code if it fails (which can be tested using ZSTDv01_isError())
|
||||
dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
|
||||
or ZSTD_CONTENTSIZE_ERROR if an error occurs
|
||||
|
||||
note : assumes `cSize` and `dBound` are _not_ NULL.
|
||||
*/
|
||||
size_t ZSTDv07_findFrameCompressedSize(const void* src, size_t compressedSize);
|
||||
void ZSTDv07_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
|
||||
size_t* cSize, unsigned long long* dBound);
|
||||
|
||||
/*====== Helper functions ======*/
|
||||
ZSTDLIBv07_API unsigned ZSTDv07_isError(size_t code); /*!< tells if a `size_t` function result is an error code */
|
||||
|
1085
lib/zstd.h
1085
lib/zstd.h
File diff suppressed because it is too large
Load Diff
@ -51,7 +51,7 @@ endif
|
||||
CFLAGS ?= -O3
|
||||
DEBUGFLAGS+=-Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
|
||||
-Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
|
||||
-Wstrict-prototypes -Wundef -Wpointer-arith -Wformat-security \
|
||||
-Wstrict-prototypes -Wundef -Wpointer-arith \
|
||||
-Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
|
||||
-Wredundant-decls -Wmissing-prototypes -Wc++-compat
|
||||
CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS)
|
||||
@ -165,7 +165,7 @@ $(ZSTDDECOMP_O): CFLAGS += $(ALIGN_LOOP)
|
||||
zstd : CPPFLAGS += $(THREAD_CPP) $(ZLIBCPP) $(LZMACPP) $(LZ4CPP)
|
||||
zstd : LDFLAGS += $(THREAD_LD) $(ZLIBLD) $(LZMALD) $(LZ4LD) $(DEBUGFLAGS_LD)
|
||||
zstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
|
||||
zstd : $(ZSTDLIB_FILES) zstdcli.o util.o fileio.o benchfn.o benchzstd.o datagen.o dibio.o
|
||||
zstd : $(ZSTDLIB_FILES) zstdcli.o util.o timefn.o fileio.o benchfn.o benchzstd.o datagen.o dibio.o
|
||||
@echo "$(THREAD_MSG)"
|
||||
@echo "$(ZLIB_MSG)"
|
||||
@echo "$(LZMA_MSG)"
|
||||
@ -183,13 +183,13 @@ zstd-release: zstd
|
||||
zstd32 : CPPFLAGS += $(THREAD_CPP)
|
||||
zstd32 : LDFLAGS += $(THREAD_LD)
|
||||
zstd32 : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
|
||||
zstd32 : $(ZSTDLIB_FILES) zstdcli.c util.c fileio.c benchfn.c benchzstd.c datagen.c dibio.c
|
||||
zstd32 : $(ZSTDLIB_FILES) zstdcli.c util.c timefn.c fileio.c benchfn.c benchzstd.c datagen.c dibio.c
|
||||
ifneq (,$(filter Windows%,$(OS)))
|
||||
windres/generate_res.bat
|
||||
endif
|
||||
$(CC) -m32 $(FLAGS) $^ $(RES32_FILE) -o $@$(EXT)
|
||||
|
||||
zstd-nolegacy : $(ZSTD_FILES) $(ZDICT_FILES) zstdcli.o util.o fileio.c benchfn.o benchzstd.o datagen.o dibio.o
|
||||
zstd-nolegacy : $(ZSTD_FILES) $(ZDICT_FILES) zstdcli.o util.o fileio.c benchfn.o benchzstd.o timefn.o datagen.o dibio.o
|
||||
$(CC) $(FLAGS) $^ -o $@$(EXT) $(LDFLAGS)
|
||||
|
||||
zstd-nomt : THREAD_CPP :=
|
||||
@ -222,13 +222,13 @@ zstd-pgo :
|
||||
|
||||
# minimal target, with only zstd compression and decompression. no bench. no legacy.
|
||||
zstd-small: CFLAGS = -Os -s
|
||||
zstd-frugal zstd-small: $(ZSTD_FILES) zstdcli.c util.c fileio.c
|
||||
zstd-frugal zstd-small: $(ZSTD_FILES) zstdcli.c util.c timefn.c fileio.c
|
||||
$(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT $^ -o $@$(EXT)
|
||||
|
||||
zstd-decompress: $(ZSTDCOMMON_FILES) $(ZSTDDECOMP_FILES) zstdcli.c util.c fileio.c
|
||||
zstd-decompress: $(ZSTDCOMMON_FILES) $(ZSTDDECOMP_FILES) zstdcli.c util.c timefn.c fileio.c
|
||||
$(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NOCOMPRESS $^ -o $@$(EXT)
|
||||
|
||||
zstd-compress: $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) zstdcli.c util.c fileio.c
|
||||
zstd-compress: $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) zstdcli.c util.c timefn.c fileio.c
|
||||
$(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NODECOMPRESS $^ -o $@$(EXT)
|
||||
|
||||
zstdmt: zstd
|
||||
@ -265,9 +265,9 @@ man: zstd.1 zstdgrep.1 zstdless.1
|
||||
|
||||
.PHONY: clean-man
|
||||
clean-man:
|
||||
rm zstd.1
|
||||
rm zstdgrep.1
|
||||
rm zstdless.1
|
||||
$(RM) zstd.1
|
||||
$(RM) zstdgrep.1
|
||||
$(RM) zstdless.1
|
||||
|
||||
.PHONY: preview-man
|
||||
preview-man: clean-man man
|
||||
|
@ -13,25 +13,20 @@
|
||||
/* *************************************
|
||||
* Includes
|
||||
***************************************/
|
||||
#include "platform.h" /* Large Files support */
|
||||
#include "util.h" /* UTIL_getFileSize, UTIL_sleep */
|
||||
#include <stdlib.h> /* malloc, free */
|
||||
#include <string.h> /* memset */
|
||||
#include <stdio.h> /* fprintf, fopen */
|
||||
#undef NDEBUG /* assert must not be disabled */
|
||||
#include <assert.h> /* assert */
|
||||
|
||||
#include "mem.h"
|
||||
#include "timefn.h" /* UTIL_time_t, UTIL_getTime */
|
||||
#include "benchfn.h"
|
||||
|
||||
|
||||
/* *************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
#define TIMELOOP_MICROSEC (1*1000000ULL) /* 1 second */
|
||||
#define TIMELOOP_MICROSEC SEC_TO_MICRO /* 1 second */
|
||||
#define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */
|
||||
#define ACTIVEPERIOD_MICROSEC (70*TIMELOOP_MICROSEC) /* 70 seconds */
|
||||
#define COOLPERIOD_SEC 10
|
||||
|
||||
#define KB *(1 <<10)
|
||||
#define MB *(1 <<20)
|
||||
@ -39,14 +34,16 @@
|
||||
|
||||
|
||||
/* *************************************
|
||||
* Errors
|
||||
* Debug errors
|
||||
***************************************/
|
||||
#ifndef DEBUG
|
||||
# define DEBUG 0
|
||||
#if defined(DEBUG) && (DEBUG >= 1)
|
||||
# include <stdio.h> /* fprintf */
|
||||
# define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
|
||||
# define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
|
||||
#else
|
||||
# define DEBUGOUTPUT(...)
|
||||
#endif
|
||||
|
||||
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
|
||||
#define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
|
||||
|
||||
/* error without displaying */
|
||||
#define RETURN_QUIET_ERROR(retValue, ...) { \
|
||||
@ -116,15 +113,7 @@ BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p,
|
||||
{ size_t i;
|
||||
for(i = 0; i < p.blockCount; i++) {
|
||||
memset(p.dstBuffers[i], 0xE5, p.dstCapacities[i]); /* warm up and erase result buffer */
|
||||
}
|
||||
#if 0
|
||||
/* based on testing these seem to lower accuracy of multiple calls of 1 nbLoops vs 1 call of multiple nbLoops
|
||||
* (Makes former slower)
|
||||
*/
|
||||
UTIL_sleepMilli(5); /* give processor time to other processes */
|
||||
UTIL_waitForNextTick();
|
||||
#endif
|
||||
}
|
||||
} }
|
||||
|
||||
/* benchmark */
|
||||
{ UTIL_time_t const clockStart = UTIL_getTime();
|
||||
@ -146,9 +135,9 @@ BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p,
|
||||
} }
|
||||
} /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */
|
||||
|
||||
{ U64 const totalTime = UTIL_clockSpanNano(clockStart);
|
||||
{ PTime const totalTime = UTIL_clockSpanNano(clockStart);
|
||||
BMK_runTime_t rt;
|
||||
rt.nanoSecPerRun = totalTime / nbLoops;
|
||||
rt.nanoSecPerRun = (double)totalTime / nbLoops;
|
||||
rt.sumOfReturn = dstSize;
|
||||
return BMK_setValid_runTime(rt);
|
||||
} }
|
||||
@ -158,9 +147,9 @@ BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p,
|
||||
/* ==== Benchmarking any function, providing intermediate results ==== */
|
||||
|
||||
struct BMK_timedFnState_s {
|
||||
U64 timeSpent_ns;
|
||||
U64 timeBudget_ns;
|
||||
U64 runBudget_ns;
|
||||
PTime timeSpent_ns;
|
||||
PTime timeBudget_ns;
|
||||
PTime runBudget_ns;
|
||||
BMK_runTime_t fastestRun;
|
||||
unsigned nbLoops;
|
||||
UTIL_time_t coolTime;
|
||||
@ -174,8 +163,20 @@ BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms)
|
||||
return r;
|
||||
}
|
||||
|
||||
void BMK_freeTimedFnState(BMK_timedFnState_t* state) {
|
||||
free(state);
|
||||
void BMK_freeTimedFnState(BMK_timedFnState_t* state) { free(state); }
|
||||
|
||||
BMK_timedFnState_t*
|
||||
BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms)
|
||||
{
|
||||
typedef char check_size[ 2 * (sizeof(BMK_timedFnState_shell) >= sizeof(struct BMK_timedFnState_s)) - 1]; /* static assert : a compilation failure indicates that BMK_timedFnState_shell is not large enough */
|
||||
typedef struct { check_size c; BMK_timedFnState_t tfs; } tfs_align; /* force tfs to be aligned at its next best position */
|
||||
size_t const tfs_alignment = offsetof(tfs_align, tfs); /* provides the minimal alignment restriction for BMK_timedFnState_t */
|
||||
BMK_timedFnState_t* const r = (BMK_timedFnState_t*)buffer;
|
||||
if (buffer == NULL) return NULL;
|
||||
if (size < sizeof(struct BMK_timedFnState_s)) return NULL;
|
||||
if ((size_t)buffer % tfs_alignment) return NULL; /* buffer must be properly aligned */
|
||||
BMK_resetTimedFnState(r, total_ms, run_ms);
|
||||
return r;
|
||||
}
|
||||
|
||||
void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms)
|
||||
@ -184,9 +185,9 @@ void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms,
|
||||
if (!run_ms) run_ms = 1;
|
||||
if (run_ms > total_ms) run_ms = total_ms;
|
||||
timedFnState->timeSpent_ns = 0;
|
||||
timedFnState->timeBudget_ns = (U64)total_ms * TIMELOOP_NANOSEC / 1000;
|
||||
timedFnState->runBudget_ns = (U64)run_ms * TIMELOOP_NANOSEC / 1000;
|
||||
timedFnState->fastestRun.nanoSecPerRun = (U64)(-1LL);
|
||||
timedFnState->timeBudget_ns = (PTime)total_ms * TIMELOOP_NANOSEC / 1000;
|
||||
timedFnState->runBudget_ns = (PTime)run_ms * TIMELOOP_NANOSEC / 1000;
|
||||
timedFnState->fastestRun.nanoSecPerRun = (double)TIMELOOP_NANOSEC * 2000000000; /* hopefully large enough : must be larger than any potential measurement */
|
||||
timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL);
|
||||
timedFnState->nbLoops = 1;
|
||||
timedFnState->coolTime = UTIL_getTime();
|
||||
@ -208,37 +209,27 @@ int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState)
|
||||
BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont,
|
||||
BMK_benchParams_t p)
|
||||
{
|
||||
U64 const runBudget_ns = cont->runBudget_ns;
|
||||
U64 const runTimeMin_ns = runBudget_ns / 2;
|
||||
PTime const runBudget_ns = cont->runBudget_ns;
|
||||
PTime const runTimeMin_ns = runBudget_ns / 2;
|
||||
int completed = 0;
|
||||
BMK_runTime_t bestRunTime = cont->fastestRun;
|
||||
|
||||
while (!completed) {
|
||||
BMK_runOutcome_t runResult;
|
||||
|
||||
/* Overheat protection */
|
||||
if (UTIL_clockSpanMicro(cont->coolTime) > ACTIVEPERIOD_MICROSEC) {
|
||||
DEBUGOUTPUT("\rcooling down ... \r");
|
||||
UTIL_sleep(COOLPERIOD_SEC);
|
||||
cont->coolTime = UTIL_getTime();
|
||||
}
|
||||
|
||||
/* reinitialize capacity */
|
||||
runResult = BMK_benchFunction(p, cont->nbLoops);
|
||||
BMK_runOutcome_t const runResult = BMK_benchFunction(p, cont->nbLoops);
|
||||
|
||||
if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */
|
||||
return runResult;
|
||||
}
|
||||
|
||||
{ BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult);
|
||||
U64 const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops;
|
||||
double const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops;
|
||||
|
||||
cont->timeSpent_ns += loopDuration_ns;
|
||||
cont->timeSpent_ns += (unsigned long long)loopDuration_ns;
|
||||
|
||||
/* estimate nbLoops for next run to last approximately 1 second */
|
||||
if (loopDuration_ns > (runBudget_ns / 50)) {
|
||||
U64 const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun);
|
||||
cont->nbLoops = (U32)(runBudget_ns / fastestRun_ns) + 1;
|
||||
double const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun);
|
||||
cont->nbLoops = (unsigned)(runBudget_ns / fastestRun_ns) + 1;
|
||||
} else {
|
||||
/* previous run was too short : blindly increase workload by x multiplier */
|
||||
const unsigned multiplier = 10;
|
||||
|
@ -31,7 +31,7 @@ extern "C" {
|
||||
/* BMK_runTime_t: valid result return type */
|
||||
|
||||
typedef struct {
|
||||
unsigned long long nanoSecPerRun; /* time per iteration (over all blocks) */
|
||||
double nanoSecPerRun; /* time per iteration (over all blocks) */
|
||||
size_t sumOfReturn; /* sum of return values */
|
||||
} BMK_runTime_t;
|
||||
|
||||
@ -58,30 +58,31 @@ typedef size_t (*BMK_initFn_t)(void* initPayload);
|
||||
typedef unsigned (*BMK_errorFn_t)(size_t);
|
||||
|
||||
|
||||
/* BMK_benchFunction() parameters are provided through following structure.
|
||||
* This is preferable for readability,
|
||||
* as the number of parameters required is pretty large.
|
||||
/* BMK_benchFunction() parameters are provided via the following structure.
|
||||
* A structure is preferable for readability,
|
||||
* as the number of parameters required is fairly large.
|
||||
* No initializer is provided, because it doesn't make sense to provide some "default" :
|
||||
* all parameters should be specified by the caller */
|
||||
* all parameters must be specified by the caller.
|
||||
* optional parameters are labelled explicitly, and accept value NULL when not used */
|
||||
typedef struct {
|
||||
BMK_benchFn_t benchFn; /* the function to benchmark, over the set of blocks */
|
||||
void* benchPayload; /* pass custom parameters to benchFn :
|
||||
* (*benchFn)(srcBuffers[i], srcSizes[i], dstBuffers[i], dstCapacities[i], benchPayload) */
|
||||
BMK_initFn_t initFn; /* (*initFn)(initPayload) is run once per run, at the beginning. */
|
||||
void* initPayload; /* Both arguments can be NULL, in which case nothing is run. */
|
||||
BMK_errorFn_t errorFn; /* errorFn will check each return value of benchFn over each block, to determine if it failed or not.
|
||||
* errorFn can be NULL, in which case no check is performed.
|
||||
* errorFn must return 0 when benchFn was successful, and >= 1 if it detects an error.
|
||||
* Execution is stopped as soon as an error is detected.
|
||||
* the triggering return value can be retrieved using BMK_extract_errorResult(). */
|
||||
size_t blockCount; /* number of blocks to operate benchFn on.
|
||||
* It's also the size of all array parameters :
|
||||
* srcBuffers, srcSizes, dstBuffers, dstCapacities, blockResults */
|
||||
const void *const * srcBuffers; /* array of buffers to be operated on by benchFn */
|
||||
const size_t* srcSizes; /* array of the sizes of srcBuffers buffers */
|
||||
void *const * dstBuffers;/* array of buffers to be written into by benchFn */
|
||||
const size_t* dstCapacities; /* array of the capacities of dstBuffers buffers */
|
||||
size_t* blockResults; /* Optional: store the return value of benchFn for each block. Use NULL if this result is not requested. */
|
||||
BMK_benchFn_t benchFn; /* the function to benchmark, over the set of blocks */
|
||||
void* benchPayload; /* pass custom parameters to benchFn :
|
||||
* (*benchFn)(srcBuffers[i], srcSizes[i], dstBuffers[i], dstCapacities[i], benchPayload) */
|
||||
BMK_initFn_t initFn; /* (*initFn)(initPayload) is run once per run, at the beginning. */
|
||||
void* initPayload; /* Both arguments can be NULL, in which case nothing is run. */
|
||||
BMK_errorFn_t errorFn; /* errorFn will check each return value of benchFn over each block, to determine if it failed or not.
|
||||
* errorFn can be NULL, in which case no check is performed.
|
||||
* errorFn must return 0 when benchFn was successful, and >= 1 if it detects an error.
|
||||
* Execution is stopped as soon as an error is detected.
|
||||
* the triggering return value can be retrieved using BMK_extract_errorResult(). */
|
||||
size_t blockCount; /* number of blocks to operate benchFn on.
|
||||
* It's also the size of all array parameters :
|
||||
* srcBuffers, srcSizes, dstBuffers, dstCapacities, blockResults */
|
||||
const void *const * srcBuffers; /* read-only array of buffers to be operated on by benchFn */
|
||||
const size_t* srcSizes; /* read-only array containing sizes of srcBuffers */
|
||||
void *const * dstBuffers; /* array of buffers to be written into by benchFn. This array is not optional, it must be provided even if unused by benchfn. */
|
||||
const size_t* dstCapacities; /* read-only array containing capacities of dstBuffers. This array must be present. */
|
||||
size_t* blockResults; /* Optional: store the return value of benchFn for each block. Use NULL if this result is not requested. */
|
||||
} BMK_benchParams_t;
|
||||
|
||||
|
||||
@ -159,6 +160,21 @@ void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms,
|
||||
void BMK_freeTimedFnState(BMK_timedFnState_t* state);
|
||||
|
||||
|
||||
/* BMK_timedFnState_shell and BMK_initStatic_timedFnState() :
|
||||
* Makes it possible to statically allocate a BMK_timedFnState_t on stack.
|
||||
* BMK_timedFnState_shell is only there to allocate space,
|
||||
* never ever access its members.
|
||||
* BMK_timedFnState_t() actually accepts any buffer.
|
||||
* It will check if provided buffer is large enough and is correctly aligned,
|
||||
* and will return NULL if conditions are not respected.
|
||||
*/
|
||||
#define BMK_TIMEDFNSTATE_SIZE 64
|
||||
typedef union {
|
||||
char never_access_space[BMK_TIMEDFNSTATE_SIZE];
|
||||
long long alignment_enforcer; /* must be aligned on 8-bytes boundaries */
|
||||
} BMK_timedFnState_shell;
|
||||
BMK_timedFnState_t* BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms);
|
||||
|
||||
|
||||
#endif /* BENCH_FN_H_23876 */
|
||||
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include <errno.h>
|
||||
#include <assert.h> /* assert */
|
||||
|
||||
#include "timefn.h" /* UTIL_time_t */
|
||||
#include "benchfn.h"
|
||||
#include "mem.h"
|
||||
#define ZSTD_STATIC_LINKING_ONLY
|
||||
@ -135,7 +136,8 @@ BMK_advancedParams_t BMK_initAdvancedParams(void) {
|
||||
0, /* ldmMinMatch */
|
||||
0, /* ldmHashLog */
|
||||
0, /* ldmBuckSizeLog */
|
||||
0 /* ldmHashRateLog */
|
||||
0, /* ldmHashRateLog */
|
||||
ZSTD_lcm_auto /* literalCompressionMode */
|
||||
};
|
||||
return res;
|
||||
}
|
||||
@ -159,9 +161,13 @@ typedef struct {
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
static void BMK_initCCtx(ZSTD_CCtx* ctx,
|
||||
const void* dictBuffer, size_t dictBufferSize, int cLevel,
|
||||
const ZSTD_compressionParameters* comprParams, const BMK_advancedParams_t* adv) {
|
||||
static void
|
||||
BMK_initCCtx(ZSTD_CCtx* ctx,
|
||||
const void* dictBuffer, size_t dictBufferSize,
|
||||
int cLevel,
|
||||
const ZSTD_compressionParameters* comprParams,
|
||||
const BMK_advancedParams_t* adv)
|
||||
{
|
||||
ZSTD_CCtx_reset(ctx, ZSTD_reset_session_and_parameters);
|
||||
if (adv->nbWorkers==1) {
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_nbWorkers, 0));
|
||||
@ -174,12 +180,13 @@ static void BMK_initCCtx(ZSTD_CCtx* ctx,
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmHashLog, adv->ldmHashLog));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmBucketSizeLog, adv->ldmBucketSizeLog));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmHashRateLog, adv->ldmHashRateLog));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_windowLog, comprParams->windowLog));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_hashLog, comprParams->hashLog));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_chainLog, comprParams->chainLog));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_searchLog, comprParams->searchLog));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_minMatch, comprParams->minMatch));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_targetLength, comprParams->targetLength));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_windowLog, (int)comprParams->windowLog));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_hashLog, (int)comprParams->hashLog));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_chainLog, (int)comprParams->chainLog));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_searchLog, (int)comprParams->searchLog));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_minMatch, (int)comprParams->minMatch));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_targetLength, (int)comprParams->targetLength));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_literalCompressionMode, (int)adv->literalCompressionMode));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_strategy, comprParams->strategy));
|
||||
CHECK_Z(ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize));
|
||||
}
|
||||
@ -376,7 +383,7 @@ BMK_benchMemAdvancedNoAlloc(
|
||||
}
|
||||
}
|
||||
|
||||
/* warmimg up `compressedBuffer` */
|
||||
/* warming up `compressedBuffer` */
|
||||
if (adv->mode == BMK_decodeOnly) {
|
||||
memcpy(compressedBuffer, srcBuffer, loadedCompressedSize);
|
||||
} else {
|
||||
@ -444,7 +451,7 @@ BMK_benchMemAdvancedNoAlloc(
|
||||
cSize = cResult.sumOfReturn;
|
||||
ratio = (double)srcSize / cSize;
|
||||
{ BMK_benchResult_t newResult;
|
||||
newResult.cSpeed = ((U64)srcSize * TIMELOOP_NANOSEC / cResult.nanoSecPerRun);
|
||||
newResult.cSpeed = (U64)((double)srcSize * TIMELOOP_NANOSEC / cResult.nanoSecPerRun);
|
||||
benchResult.cSize = cSize;
|
||||
if (newResult.cSpeed > benchResult.cSpeed)
|
||||
benchResult.cSpeed = newResult.cSpeed;
|
||||
@ -468,7 +475,7 @@ BMK_benchMemAdvancedNoAlloc(
|
||||
}
|
||||
|
||||
{ BMK_runTime_t const dResult = BMK_extract_runTime(dOutcome);
|
||||
U64 const newDSpeed = (srcSize * TIMELOOP_NANOSEC / dResult.nanoSecPerRun);
|
||||
U64 const newDSpeed = (U64)((double)srcSize * TIMELOOP_NANOSEC / dResult.nanoSecPerRun);
|
||||
if (newDSpeed > benchResult.dSpeed)
|
||||
benchResult.dSpeed = newDSpeed;
|
||||
}
|
||||
@ -505,17 +512,21 @@ BMK_benchMemAdvancedNoAlloc(
|
||||
pos = (U32)(u - bacc);
|
||||
bNb = pos / (128 KB);
|
||||
DISPLAY("(sample %u, block %u, pos %u) \n", segNb, bNb, pos);
|
||||
if (u>5) {
|
||||
int n;
|
||||
{ size_t const lowest = (u>5) ? 5 : u;
|
||||
size_t n;
|
||||
DISPLAY("origin: ");
|
||||
for (n=-5; n<0; n++) DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]);
|
||||
for (n=lowest; n>0; n--)
|
||||
DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u-n]);
|
||||
DISPLAY(" :%02X: ", ((const BYTE*)srcBuffer)[u]);
|
||||
for (n=1; n<3; n++) DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]);
|
||||
for (n=1; n<3; n++)
|
||||
DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]);
|
||||
DISPLAY(" \n");
|
||||
DISPLAY("decode: ");
|
||||
for (n=-5; n<0; n++) DISPLAY("%02X ", resultBuffer[u+n]);
|
||||
for (n=lowest; n>0; n++)
|
||||
DISPLAY("%02X ", resultBuffer[u-n]);
|
||||
DISPLAY(" :%02X: ", resultBuffer[u]);
|
||||
for (n=1; n<3; n++) DISPLAY("%02X ", resultBuffer[u+n]);
|
||||
for (n=1; n<3; n++)
|
||||
DISPLAY("%02X ", resultBuffer[u+n]);
|
||||
DISPLAY(" \n");
|
||||
}
|
||||
break;
|
||||
|
@ -105,17 +105,18 @@ typedef enum {
|
||||
} BMK_mode_t;
|
||||
|
||||
typedef struct {
|
||||
BMK_mode_t mode; /* 0: all, 1: compress only 2: decode only */
|
||||
unsigned nbSeconds; /* default timing is in nbSeconds */
|
||||
size_t blockSize; /* Maximum size of each block*/
|
||||
unsigned nbWorkers; /* multithreading */
|
||||
unsigned realTime; /* real time priority */
|
||||
int additionalParam; /* used by python speed benchmark */
|
||||
unsigned ldmFlag; /* enables long distance matching */
|
||||
unsigned ldmMinMatch; /* below: parameters for long distance matching, see zstd.1.md */
|
||||
unsigned ldmHashLog;
|
||||
unsigned ldmBucketSizeLog;
|
||||
unsigned ldmHashRateLog;
|
||||
BMK_mode_t mode; /* 0: all, 1: compress only 2: decode only */
|
||||
unsigned nbSeconds; /* default timing is in nbSeconds */
|
||||
size_t blockSize; /* Maximum size of each block*/
|
||||
int nbWorkers; /* multithreading */
|
||||
unsigned realTime; /* real time priority */
|
||||
int additionalParam; /* used by python speed benchmark */
|
||||
int ldmFlag; /* enables long distance matching */
|
||||
int ldmMinMatch; /* below: parameters for long distance matching, see zstd.1.md */
|
||||
int ldmHashLog;
|
||||
int ldmBucketSizeLog;
|
||||
int ldmHashRateLog;
|
||||
ZSTD_literalCompressionMode_e literalCompressionMode;
|
||||
} BMK_advancedParams_t;
|
||||
|
||||
/* returns default parameters used by nonAdvanced functions */
|
||||
@ -169,7 +170,7 @@ BMK_benchOutcome_t BMK_syntheticTest(
|
||||
* comprParams - basic compression parameters
|
||||
* dictBuffer - a dictionary if used, null otherwise
|
||||
* dictBufferSize - size of dictBuffer, 0 otherwise
|
||||
* diplayLevel - see BMK_benchFiles
|
||||
* displayLevel - see BMK_benchFiles
|
||||
* displayName - name used by display
|
||||
* @return:
|
||||
* a variant, which expresses either an error, or a valid result.
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include <errno.h> /* errno */
|
||||
#include <assert.h>
|
||||
|
||||
#include "timefn.h" /* UTIL_time_t, UTIL_clockSpanMicro, UTIL_getTime */
|
||||
#include "mem.h" /* read */
|
||||
#include "error_private.h"
|
||||
#include "dibio.h"
|
||||
|
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user