diff --git a/Makefile.am b/Makefile.am index d4b75264da73..37a9bfdf8697 100644 --- a/Makefile.am +++ b/Makefile.am @@ -126,6 +126,7 @@ cstyle: ! -name 'zfs_config.*' ! -name '*.mod.c' \ ! -name 'opt_global.h' ! -name '*_if*.h' \ ! -path './module/zstd/lib/*' \ + ! -path './module/zfs/lz4.c' \ $(cstyle_line) filter_executable = -exec test -x '{}' \; -print diff --git a/config/ax_code_coverage.m4 b/config/ax_code_coverage.m4 index 3e3c666f3c54..cd2498358df2 100644 --- a/config/ax_code_coverage.m4 +++ b/config/ax_code_coverage.m4 @@ -224,6 +224,7 @@ CODE_COVERAGE_GENHTML_OPTIONS ?= $(CODE_COVERAGE_GENHTML_OPTIONS_DEFAULT) # Ignore tmp and tests themselves CODE_COVERAGE_IGNORE_PATTERN ?= "/tmp/*" "*/tests/*" CODE_COVERAGE_IGNORE_PATTERN += "*/module/zstd/lib/*" +CODE_COVERAGE_IGNORE_PATTERN += "*/module/zfs/lz4.c" GITIGNOREFILES ?= GITIGNOREFILES += $(CODE_COVERAGE_OUTPUT_FILE) $(CODE_COVERAGE_OUTPUT_DIRECTORY) diff --git a/lib/libzpool/Makefile.am b/lib/libzpool/Makefile.am index dce3f81b829d..e49577ec4a63 100644 --- a/lib/libzpool/Makefile.am +++ b/lib/libzpool/Makefile.am @@ -94,6 +94,7 @@ KERNEL_C = \ gzip.c \ hkdf.c \ lz4.c \ + lz4_zfs.c \ lzjb.c \ metaslab.c \ mmp.c \ diff --git a/module/Makefile.bsd b/module/Makefile.bsd index 315be2808605..5172394e6ac8 100644 --- a/module/Makefile.bsd +++ b/module/Makefile.bsd @@ -228,6 +228,7 @@ SRCS+= abd.c \ gzip.c \ lzjb.c \ lz4.c \ + lz4_zfs.c \ metaslab.c \ mmp.c \ multilist.c \ @@ -352,7 +353,7 @@ CFLAGS.dsl_deadlist.c= -Wno-cast-qual CFLAGS.dsl_prop.c= -Wno-cast-qual CFLAGS.edonr.c=-Wno-cast-qual CFLAGS.fm.c= -Wno-cast-qual -CFLAGS.lz4.c= -Wno-cast-qual +CFLAGS.lz4_zfs.c= -Wno-cast-qual CFLAGS.spa.c= -Wno-cast-qual CFLAGS.spa_misc.c= -Wno-cast-qual CFLAGS.sysctl_os.c= -include ../zfs_config.h diff --git a/module/zfs/Makefile.in b/module/zfs/Makefile.in index 653ea0da9bcc..30dc91a7eb59 100644 --- a/module/zfs/Makefile.in +++ b/module/zfs/Makefile.in @@ -56,6 +56,7 @@ $(MODULE)-objs += fm.o $(MODULE)-objs += gzip.o $(MODULE)-objs += hkdf.o $(MODULE)-objs += lz4.o +$(MODULE)-objs += lz4_zfs.o $(MODULE)-objs += lzjb.o $(MODULE)-objs += metaslab.o $(MODULE)-objs += mmp.o diff --git a/module/zfs/lz4.c b/module/zfs/lz4.c index eba8f01b5065..800635b3449e 100644 --- a/module/zfs/lz4.c +++ b/module/zfs/lz4.c @@ -1,164 +1,50 @@ /* - * LZ4 - Fast LZ compression algorithm - * Header File - * Copyright (C) 2011-2013, Yann Collet. - * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + LZ4 - Fast LZ compression algorithm + Copyright (C) 2011-present, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - LZ4 homepage : http://www.lz4.org + - LZ4 source repository : https://github.com/lz4/lz4 +*/ + +/* + * This file contains unmodified code from lz4 1.9.3's decompressor, plus + * associated macros and constants. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: + * It also contains a couple of defines from the old lz4.c to make things + * fit together smoothly. * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You can contact the author at : - * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html - * - LZ4 source repository : http://code.google.com/p/lz4/ */ #include -#include -static int real_LZ4_compress(const char *source, char *dest, int isize, - int osize); -static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest, +int LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize, int maxOutputSize); -static int LZ4_compressCtx(void *ctx, const char *source, char *dest, - int isize, int osize); -static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest, - int isize, int osize); - -static kmem_cache_t *lz4_cache; - -size_t -lz4_compress_zfs(void *s_start, void *d_start, size_t s_len, - size_t d_len, int n) -{ - (void) n; - uint32_t bufsiz; - char *dest = d_start; - - ASSERT(d_len >= sizeof (bufsiz)); - - bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len, - d_len - sizeof (bufsiz)); - - /* Signal an error if the compression routine returned zero. */ - if (bufsiz == 0) - return (s_len); - - /* - * The exact compressed size is needed by the decompression routine, - * so it is stored at the start of the buffer. Note that this may be - * less than the compressed block size, which is rounded up to a - * multiple of 1< s_len) - return (1); - - /* - * Returns 0 on success (decompression function returned non-negative) - * and non-zero on failure (decompression function returned negative). - */ - return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)], - d_start, bufsiz, d_len) < 0); -} - -/* - * LZ4 API Description: - * - * Simple Functions: - * real_LZ4_compress() : - * isize : is the input size. Max supported value is ~1.9GB - * return : the number of bytes written in buffer dest - * or 0 if the compression fails (if LZ4_COMPRESSMIN is set). - * note : destination buffer must be already allocated. - * destination buffer must be sized to handle worst cases - * situations (input data not compressible) worst case size - * evaluation is provided by function LZ4_compressBound(). - * - * real_LZ4_uncompress() : - * osize : is the output size, therefore the original size - * return : the number of bytes read in the source buffer. - * If the source stream is malformed, the function will stop - * decoding and return a negative result, indicating the byte - * position of the faulty instruction. This function never - * writes beyond dest + osize, and is therefore protected - * against malicious data packets. - * note : destination buffer must be already allocated - * note : real_LZ4_uncompress() is not used in ZFS so its code - * is not present here. - * - * Advanced Functions - * - * LZ4_compressBound() : - * Provides the maximum size that LZ4 may output in a "worst case" - * scenario (input data not compressible) primarily useful for memory - * allocation of output buffer. - * - * isize : is the input size. Max supported value is ~1.9GB - * return : maximum output size in a "worst case" scenario - * note : this function is limited by "int" range (2^31-1) - * - * LZ4_uncompress_unknownOutputSize() : - * isize : is the input size, therefore the compressed size - * maxOutputSize : is the size of the destination buffer (which must be - * already allocated) - * return : the number of bytes decoded in the destination buffer - * (necessarily <= maxOutputSize). If the source stream is - * malformed, the function will stop decoding and return a - * negative result, indicating the byte position of the faulty - * instruction. This function never writes beyond dest + - * maxOutputSize, and is therefore protected against malicious - * data packets. - * note : Destination buffer must be already allocated. - * This version is slightly slower than real_LZ4_uncompress() - * - * LZ4_compressCtx() : - * This function explicitly handles the CTX memory structure. - * - * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated - * by the caller (either on the stack or using kmem_cache_alloc). Passing - * NULL isn't valid. - * - * LZ4_compress64kCtx() : - * Same as LZ4_compressCtx(), but specific to small inputs (<64KB). - * isize *Must* be <64KB, otherwise the output will be corrupted. - * - * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated - * by the caller (either on the stack or using kmem_cache_alloc). Passing - * NULL isn't valid. - */ /* * Tuning parameters @@ -184,26 +70,6 @@ lz4_decompress_zfs(void *s_start, void *d_start, size_t s_len, */ #define NOTCOMPRESSIBLE_CONFIRMATION 6 -/* - * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to - * performance for big endian cpu, but the resulting compressed stream - * will be incompatible with little-endian CPU. You can set this option - * to 1 in situations where data will stay within closed environment. - * This option is useless on Little_Endian CPU (such as x86). - */ -/* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */ - -/* - * CPU Feature Detection - */ - -/* 32 or 64 bits ? */ -#if defined(_LP64) -#define LZ4_ARCH64 1 -#else -#define LZ4_ARCH64 0 -#endif - /* * Little Endian or Big Endian? * Note: overwrite the below #define if you know your architecture endianness. @@ -218,17 +84,36 @@ lz4_decompress_zfs(void *s_start, void *d_start, size_t s_len, #undef LZ4_BIG_ENDIAN #endif -/* - * Unaligned memory access is automatically enabled for "common" CPU, - * such as x86. For others CPU, the compiler will be more cautious, and - * insert extra code to ensure aligned access is respected. If you know - * your target CPU supports unaligned memory access, you may want to - * force this option manually to improve performance +/*-************************************ +* CPU Feature Detection +**************************************/ +/* LZ4_FORCE_MEMORY_ACCESS + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method is portable but violate C standard. + * It can generate buggy code on targets which assembly generation depends on alignment. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. + * Prefer these methods in priority order (0 > 1 > 2) */ -#if defined(__ARM_FEATURE_UNALIGNED) -#define LZ4_FORCE_UNALIGNED_ACCESS 1 +#ifndef LZ4_FORCE_MEMORY_ACCESS /* can be defined externally */ +# if defined(__GNUC__) && \ + ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \ + || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define LZ4_FORCE_MEMORY_ACCESS 2 +# elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__) +# define LZ4_FORCE_MEMORY_ACCESS 1 +# endif #endif +/* + * LZ4_FORCE_SW_BITCOUNT + * Define this parameter if your target system or compiler does not support hardware bit count + */ /* * Illumos : we can't use GCC's __builtin_ctz family of builtins in the * kernel @@ -236,7 +121,7 @@ lz4_decompress_zfs(void *s_start, void *d_start, size_t s_len, * kernel */ #undef LZ4_FORCE_SW_BITCOUNT -#if defined(__sparc) +#if defined(__sunos__) #define LZ4_FORCE_SW_BITCOUNT #endif @@ -256,10 +141,50 @@ lz4_decompress_zfs(void *s_start, void *d_start, size_t s_len, #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) -#if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) -#define expect(expr, value) (__builtin_expect((expr), (value))) +#ifndef LZ4_FORCE_INLINE +# ifdef _MSC_VER /* Visual Studio */ +# define LZ4_FORCE_INLINE static __forceinline +# else +# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# ifdef __GNUC__ +# define LZ4_FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define LZ4_FORCE_INLINE static inline +# endif +# else +# define LZ4_FORCE_INLINE static +# endif /* __STDC_VERSION__ */ +# endif /* _MSC_VER */ +#endif /* LZ4_FORCE_INLINE */ + +/* LZ4_FORCE_O2 and LZ4_FORCE_INLINE + * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8, + * together with a simple 8-byte copy loop as a fall-back path. + * However, this optimization hurts the decompression speed by >30%, + * because the execution does not go to the optimized loop + * for typical compressible data, and all of the preamble checks + * before going to the fall-back path become useless overhead. + * This optimization happens only with the -O3 flag, and -O2 generates + * a simple 8-byte copy loop. + * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8 + * functions are annotated with __attribute__((optimize("O2"))), + * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute + * of LZ4_wildCopy8 does not affect the compression speed. + */ +#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__) +# define LZ4_FORCE_O2 __attribute__((optimize("O2"))) +# undef LZ4_FORCE_INLINE +# define LZ4_FORCE_INLINE static __inline __attribute__((optimize("O2"),always_inline)) #else -#define expect(expr, value) (expr) +# define LZ4_FORCE_O2 +#endif + +#ifndef expect +#if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__) +# define expect(expr,value) (__builtin_expect ((expr),(value)) ) +#else +# define expect(expr,value) (expr) +#endif #endif #ifndef likely @@ -270,760 +195,793 @@ lz4_decompress_zfs(void *s_start, void *d_start, size_t s_len, #define unlikely(expr) expect((expr) != 0, 0) #endif -#define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \ - (((x) & 0xffu) << 8))) +#ifndef _KERNEL +#include /* malloc, calloc, free */ +#include /* memset, memcpy */ +#endif +#define ALLOC(s) malloc(s) +#define ALLOC_AND_ZERO(s) calloc(1,s) +#define FREEMEM(p) free(p) -/* Basic types */ -#define BYTE uint8_t -#define U16 uint16_t -#define U32 uint32_t -#define S32 int32_t -#define U64 uint64_t +#define MEM_INIT(p,v,s) memset((p),(v),(s)) -#ifndef LZ4_FORCE_UNALIGNED_ACCESS -#pragma pack(1) + +/*-************************************ +* Common Constants +**************************************/ +#define MINMATCH 4 + +#define WILDCOPYLENGTH 8 +#define LASTLITERALS 5 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ +#define MFLIMIT 12 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ +#define MATCH_SAFEGUARD_DISTANCE ((2*WILDCOPYLENGTH) - MINMATCH) /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */ +#define FASTLOOP_SAFE_DISTANCE 64 + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#ifndef LZ4_DISTANCE_MAX /* history window size; can be user-defined at compile time */ +# define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */ #endif -typedef struct _U16_S { - U16 v; -} U16_S; -typedef struct _U32_S { - U32 v; -} U32_S; -typedef struct _U64_S { - U64 v; -} U64_S; - -#ifndef LZ4_FORCE_UNALIGNED_ACCESS -#pragma pack() +#define LZ4_DISTANCE_ABSOLUTE_MAX 65535 +#if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX) /* max supported by LZ4 format */ +# error "LZ4_DISTANCE_MAX is too big : must be <= 65535" #endif -#define A64(x) (((U64_S *)(x))->v) -#define A32(x) (((U32_S *)(x))->v) -#define A16(x) (((U16_S *)(x))->v) +#define ML_BITS 4 +#define ML_MASK ((1U< +#endif +#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +#ifndef _KERNEL +#include +#endif + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; + typedef uintptr_t uptrval; +#else +# if UINT_MAX != 4294967295UL +# error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4" +# endif + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; + typedef size_t uptrval; /* generally true, except OpenVMS-64 */ +#endif + +#if defined(__x86_64__) + typedef U64 reg_t; /* 64-bits in x32 mode */ +#else + typedef size_t reg_t; /* 32-bits in x32 mode */ +#endif + +typedef enum { + notLimited = 0, + limitedOutput = 1, + fillOutput = 2 +} limitedOutput_directive; + + +/*-************************************ +* Reading and writing into memory +**************************************/ + +/** + * LZ4 relies on memcpy with a constant size being inlined. In freestanding + * environments, the compiler can't assume the implementation of memcpy() is + * standard compliant, so it can't apply its specialized memcpy() inlining + * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze + * memcpy() as if it were standard compliant, so it can inline it in freestanding + * environments. This is needed when decompressing the Linux Kernel, for example. */ -#define MINMATCH 4 - -#define HASH_LOG COMPRESSIONLEVEL -#define HASHTABLESIZE (1 << HASH_LOG) -#define HASH_MASK (HASHTABLESIZE - 1) - -#define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \ - NOTCOMPRESSIBLE_CONFIRMATION : 2) - -#define COPYLENGTH 8 -#define LASTLITERALS 5 -#define MFLIMIT (COPYLENGTH + MINMATCH) -#define MINLENGTH (MFLIMIT + 1) - -#define MAXD_LOG 16 -#define MAX_DISTANCE ((1 << MAXD_LOG) - 1) - -#define ML_BITS 4 -#define ML_MASK ((1U<= 4) +#define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size) #else -#define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); } -#define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; } +#define LZ4_memcpy(dst, src, size) memcpy(dst, src, size) #endif - -/* Local structures */ -struct refTables { - HTYPE hashTable[HASHTABLESIZE]; -}; - - -/* Macros */ -#define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \ - HASH_LOG)) -#define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p)) -#define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e); -#define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \ - d = e; } - - -/* Private functions */ -#if LZ4_ARCH64 - -static inline int -LZ4_NbCommonBytes(register U64 val) +static unsigned LZ4_isLittleEndian(void) { -#if defined(LZ4_BIG_ENDIAN) -#if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \ - !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_clzll(val) >> 3); -#else - int r; - if (!(val >> 32)) { - r = 4; - } else { - r = 0; - val >>= 32; - } - if (!(val >> 16)) { - r += 2; - val >>= 8; - } else { - val >>= 24; - } - r += (!val); - return (r); -#endif -#else -#if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \ - !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_ctzll(val) >> 3); -#else - static const int DeBruijnBytePos[64] = - { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, - 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, - 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, - 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 - }; - return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >> - 58]; -#endif -#endif -} - -#else - -static inline int -LZ4_NbCommonBytes(register U32 val) -{ -#if defined(LZ4_BIG_ENDIAN) -#if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \ - !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_clz(val) >> 3); -#else - int r; - if (!(val >> 16)) { - r = 2; - val >>= 8; - } else { - r = 0; - val >>= 24; - } - r += (!val); - return (r); -#endif -#else -#if defined(__GNUC__) && (GCC_VERSION >= 304) && \ - !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_ctz(val) >> 3); -#else - static const int DeBruijnBytePos[32] = { - 0, 0, 3, 0, 3, 1, 3, 0, - 3, 2, 2, 1, 3, 2, 0, 1, - 3, 3, 1, 2, 2, 2, 2, 0, - 3, 1, 2, 0, 1, 0, 1, 1 - }; - return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> - 27]; -#endif -#endif -} - -#endif - -/* Compression functions */ - -static int -LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize, - int osize) -{ - struct refTables *srt = (struct refTables *)ctx; - HTYPE *HashTable = (HTYPE *) (srt->hashTable); - - const BYTE *ip = (BYTE *) source; - INITBASE(base); - const BYTE *anchor = ip; - const BYTE *const iend = ip + isize; - const BYTE *const oend = (BYTE *) dest + osize; - const BYTE *const mflimit = iend - MFLIMIT; -#define matchlimit (iend - LASTLITERALS) - - BYTE *op = (BYTE *) dest; - - int len, length; - const int skipStrength = SKIPSTRENGTH; - U32 forwardH; - - - /* Init */ - if (isize < MINLENGTH) - goto _last_literals; - - /* First Byte */ - HashTable[LZ4_HASH_VALUE(ip)] = ip - base; - ip++; - forwardH = LZ4_HASH_VALUE(ip); - - /* Main Loop */ - for (;;) { - int findMatchAttempts = (1U << skipStrength) + 3; - const BYTE *forwardIp = ip; - const BYTE *ref; - BYTE *token; - - /* Find a match */ - do { - U32 h = forwardH; - int step = findMatchAttempts++ >> skipStrength; - ip = forwardIp; - forwardIp = ip + step; - - if (unlikely(forwardIp > mflimit)) { - goto _last_literals; - } - - forwardH = LZ4_HASH_VALUE(forwardIp); - ref = base + HashTable[h]; - HashTable[h] = ip - base; - - } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); - - /* Catch up */ - while ((ip > anchor) && (ref > (BYTE *) source) && - unlikely(ip[-1] == ref[-1])) { - ip--; - ref--; - } - - /* Encode Literal length */ - length = ip - anchor; - token = op++; - - /* Check output limit */ - if (unlikely(op + length + (2 + 1 + LASTLITERALS) + - (length >> 8) > oend)) - return (0); - - if (length >= (int)RUN_MASK) { - *token = (RUN_MASK << ML_BITS); - len = length - RUN_MASK; - for (; len > 254; len -= 255) - *op++ = 255; - *op++ = (BYTE)len; - } else - *token = (length << ML_BITS); - - /* Copy Literals */ - LZ4_BLINDCOPY(anchor, op, length); - - _next_match: - /* Encode Offset */ - LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); - - /* Start Counting */ - ip += MINMATCH; - ref += MINMATCH; /* MinMatch verified */ - anchor = ip; - while (likely(ip < matchlimit - (STEPSIZE - 1))) { - UARCH diff = AARCH(ref) ^ AARCH(ip); - if (!diff) { - ip += STEPSIZE; - ref += STEPSIZE; - continue; - } - ip += LZ4_NbCommonBytes(diff); - goto _endCount; - } -#if LZ4_ARCH64 - if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { - ip += 4; - ref += 4; - } -#endif - if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { - ip += 2; - ref += 2; - } - if ((ip < matchlimit) && (*ref == *ip)) - ip++; - _endCount: - - /* Encode MatchLength */ - len = (ip - anchor); - /* Check output limit */ - if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)) - return (0); - if (len >= (int)ML_MASK) { - *token += ML_MASK; - len -= ML_MASK; - for (; len > 509; len -= 510) { - *op++ = 255; - *op++ = 255; - } - if (len > 254) { - len -= 255; - *op++ = 255; - } - *op++ = (BYTE)len; - } else - *token += len; - - /* Test end of chunk */ - if (ip > mflimit) { - anchor = ip; - break; - } - /* Fill table */ - HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base; - - /* Test next position */ - ref = base + HashTable[LZ4_HASH_VALUE(ip)]; - HashTable[LZ4_HASH_VALUE(ip)] = ip - base; - if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { - token = op++; - *token = 0; - goto _next_match; - } - /* Prepare next loop */ - anchor = ip++; - forwardH = LZ4_HASH_VALUE(ip); - } - - _last_literals: - /* Encode Last Literals */ - { - int lastRun = iend - anchor; - if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > - oend) - return (0); - if (lastRun >= (int)RUN_MASK) { - *op++ = (RUN_MASK << ML_BITS); - lastRun -= RUN_MASK; - for (; lastRun > 254; lastRun -= 255) { - *op++ = 255; - } - *op++ = (BYTE)lastRun; - } else - *op++ = (lastRun << ML_BITS); - (void) memcpy(op, anchor, iend - anchor); - op += iend - anchor; - } - - /* End */ - return (int)(((char *)op) - dest); + const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; } +#if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2) +/* lie to the compiler about data alignment; use with caution */ -/* Note : this function is valid only if isize < LZ4_64KLIMIT */ -#define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1)) -#define HASHLOG64K (HASH_LOG + 1) -#define HASH64KTABLESIZE (1U << HASHLOG64K) -#define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \ - HASHLOG64K)) -#define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p)) +static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; } -static int -LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize, - int osize) +static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } +static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } + +#elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) unalign; + +static U16 LZ4_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } + +static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } + +#else /* safe and portable access using memcpy() */ + +static U16 LZ4_read16(const void* memPtr) { - struct refTables *srt = (struct refTables *)ctx; - U16 *HashTable = (U16 *) (srt->hashTable); + U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; +} - const BYTE *ip = (BYTE *) source; - const BYTE *anchor = ip; - const BYTE *const base = ip; - const BYTE *const iend = ip + isize; - const BYTE *const oend = (BYTE *) dest + osize; - const BYTE *const mflimit = iend - MFLIMIT; -#define matchlimit (iend - LASTLITERALS) +static void LZ4_write32(void* memPtr, U32 value) +{ + LZ4_memcpy(memPtr, &value, sizeof(value)); +} - BYTE *op = (BYTE *) dest; +#endif /* LZ4_FORCE_MEMORY_ACCESS */ - int len, length; - const int skipStrength = SKIPSTRENGTH; - U32 forwardH; +static U16 LZ4_readLE16(const void* memPtr) +{ + if (LZ4_isLittleEndian()) { + return LZ4_read16(memPtr); + } else { + const BYTE* p = (const BYTE*)memPtr; + return (U16)((U16)p[0] + (p[1]<<8)); + } +} - /* Init */ - if (isize < MINLENGTH) - goto _last_literals; +/* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */ +LZ4_FORCE_INLINE +void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd) +{ + BYTE* d = (BYTE*)dstPtr; + const BYTE* s = (const BYTE*)srcPtr; + BYTE* const e = (BYTE*)dstEnd; - /* First Byte */ - ip++; - forwardH = LZ4_HASH64K_VALUE(ip); + do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d> skipStrength; - ip = forwardIp; - forwardIp = ip + step; - if (forwardIp > mflimit) { - goto _last_literals; - } - - forwardH = LZ4_HASH64K_VALUE(forwardIp); - ref = base + HashTable[h]; - HashTable[h] = ip - base; - - } while (A32(ref) != A32(ip)); - - /* Catch up */ - while ((ip > anchor) && (ref > (BYTE *) source) && - (ip[-1] == ref[-1])) { - ip--; - ref--; - } - - /* Encode Literal length */ - length = ip - anchor; - token = op++; - - /* Check output limit */ - if (unlikely(op + length + (2 + 1 + LASTLITERALS) + - (length >> 8) > oend)) - return (0); - - if (length >= (int)RUN_MASK) { - *token = (RUN_MASK << ML_BITS); - len = length - RUN_MASK; - for (; len > 254; len -= 255) - *op++ = 255; - *op++ = (BYTE)len; - } else - *token = (length << ML_BITS); - - /* Copy Literals */ - LZ4_BLINDCOPY(anchor, op, length); - - _next_match: - /* Encode Offset */ - LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); - - /* Start Counting */ - ip += MINMATCH; - ref += MINMATCH; /* MinMatch verified */ - anchor = ip; - while (ip < matchlimit - (STEPSIZE - 1)) { - UARCH diff = AARCH(ref) ^ AARCH(ip); - if (!diff) { - ip += STEPSIZE; - ref += STEPSIZE; - continue; - } - ip += LZ4_NbCommonBytes(diff); - goto _endCount; - } -#if LZ4_ARCH64 - if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { - ip += 4; - ref += 4; - } +#ifndef LZ4_FAST_DEC_LOOP +# if defined __i386__ || defined _M_IX86 || defined __x86_64__ || defined _M_X64 +# define LZ4_FAST_DEC_LOOP 1 +# elif defined(__aarch64__) && !defined(__clang__) + /* On aarch64, we disable this optimization for clang because on certain + * mobile chipsets, performance is reduced with clang. For information + * refer to https://github.com/lz4/lz4/pull/707 */ +# define LZ4_FAST_DEC_LOOP 1 +# else +# define LZ4_FAST_DEC_LOOP 0 +# endif #endif - if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { - ip += 2; - ref += 2; - } - if ((ip < matchlimit) && (*ref == *ip)) - ip++; - _endCount: - /* Encode MatchLength */ - len = (ip - anchor); - /* Check output limit */ - if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)) - return (0); - if (len >= (int)ML_MASK) { - *token += ML_MASK; - len -= ML_MASK; - for (; len > 509; len -= 510) { - *op++ = 255; - *op++ = 255; - } - if (len > 254) { - len -= 255; - *op++ = 255; - } - *op++ = (BYTE)len; - } else - *token += len; +#if LZ4_FAST_DEC_LOOP - /* Test end of chunk */ - if (ip > mflimit) { - anchor = ip; - break; - } - /* Fill table */ - HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base; - - /* Test next position */ - ref = base + HashTable[LZ4_HASH64K_VALUE(ip)]; - HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base; - if (A32(ref) == A32(ip)) { - token = op++; - *token = 0; - goto _next_match; - } - /* Prepare next loop */ - anchor = ip++; - forwardH = LZ4_HASH64K_VALUE(ip); - } - - _last_literals: - /* Encode Last Literals */ - { - int lastRun = iend - anchor; - if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > - oend) - return (0); - if (lastRun >= (int)RUN_MASK) { - *op++ = (RUN_MASK << ML_BITS); - lastRun -= RUN_MASK; - for (; lastRun > 254; lastRun -= 255) - *op++ = 255; - *op++ = (BYTE)lastRun; - } else - *op++ = (lastRun << ML_BITS); - (void) memcpy(op, anchor, iend - anchor); - op += iend - anchor; - } - - /* End */ - return (int)(((char *)op) - dest); -} - -static int -real_LZ4_compress(const char *source, char *dest, int isize, int osize) +LZ4_FORCE_INLINE void +LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset) { - void *ctx; - int result; + assert(srcPtr + offset == dstPtr); + if (offset < 8) { + LZ4_write32(dstPtr, 0); /* silence an msan warning when offset==0 */ + dstPtr[0] = srcPtr[0]; + dstPtr[1] = srcPtr[1]; + dstPtr[2] = srcPtr[2]; + dstPtr[3] = srcPtr[3]; + srcPtr += inc32table[offset]; + LZ4_memcpy(dstPtr+4, srcPtr, 4); + srcPtr -= dec64table[offset]; + dstPtr += 8; + } else { + LZ4_memcpy(dstPtr, srcPtr, 8); + dstPtr += 8; + srcPtr += 8; + } - ASSERT(lz4_cache != NULL); - ctx = kmem_cache_alloc(lz4_cache, KM_SLEEP); - - /* - * out of kernel memory, gently fall through - this will disable - * compression in zio_compress_data - */ - if (ctx == NULL) - return (0); - - memset(ctx, 0, sizeof (struct refTables)); - - if (isize < LZ4_64KLIMIT) - result = LZ4_compress64kCtx(ctx, source, dest, isize, osize); - else - result = LZ4_compressCtx(ctx, source, dest, isize, osize); - - kmem_cache_free(lz4_cache, ctx); - return (result); + LZ4_wildCopy8(dstPtr, srcPtr, dstEnd); } -/* Decompression functions */ +/* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd + * this version copies two times 16 bytes (instead of one time 32 bytes) + * because it must be compatible with offsets >= 16. */ +LZ4_FORCE_INLINE void +LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd) +{ + BYTE* d = (BYTE*)dstPtr; + const BYTE* s = (const BYTE*)srcPtr; + BYTE* const e = (BYTE*)dstEnd; -/* - * Note: The decoding functions real_LZ4_uncompress() and - * LZ4_uncompress_unknownOutputSize() are safe against "buffer overflow" - * attack type. They will never write nor read outside of the provided - * output buffers. LZ4_uncompress_unknownOutputSize() also insures that - * it will never read outside of the input buffer. A corrupted input - * will produce an error result, a negative int, indicating the position - * of the error within input stream. + do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d= dstPtr + MINMATCH + * - there is at least 8 bytes available to write after dstEnd */ +LZ4_FORCE_INLINE void +LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset) +{ + BYTE v[8]; + + assert(dstEnd >= dstPtr + MINMATCH); + + switch(offset) { + case 1: + MEM_INIT(v, *srcPtr, 8); + break; + case 2: + LZ4_memcpy(v, srcPtr, 2); + LZ4_memcpy(&v[2], srcPtr, 2); + LZ4_memcpy(&v[4], v, 4); + break; + case 4: + LZ4_memcpy(v, srcPtr, 4); + LZ4_memcpy(&v[4], srcPtr, 4); + break; + default: + LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset); + return; + } + + LZ4_memcpy(dstPtr, v, 8); + dstPtr += 8; + while (dstPtr < dstEnd) { + LZ4_memcpy(dstPtr, v, 8); + dstPtr += 8; + } +} +#endif + + +/*-************************************ +* Local Structures and types +**************************************/ +typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t; + +/** + * This enum distinguishes several different modes of accessing previous + * content in the stream. + * + * - noDict : There is no preceding content. + * - withPrefix64k : Table entries up to ctx->dictSize before the current blob + * blob being compressed are valid and refer to the preceding + * content (of length ctx->dictSize), which is available + * contiguously preceding in memory the content currently + * being compressed. + * - usingExtDict : Like withPrefix64k, but the preceding content is somewhere + * else in memory, starting at ctx->dictionary with length + * ctx->dictSize. + * - usingDictCtx : Like usingExtDict, but everything concerning the preceding + * content is in a separate context, pointed to by + * ctx->dictCtx. ctx->dictionary, ctx->dictSize, and table + * entries in the current context that refer to positions + * preceding the beginning of the current compression are + * ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx + * ->dictSize describe the location and size of the preceding + * content, and matches are found by looking in the ctx + * ->dictCtx->hashTable. + */ +typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive; +typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive; + +/*-******************************* + * Decompression functions + ********************************/ + +typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive; +typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive; + +typedef enum { loop_error = -2, initial_error = -1, ok = 0 } variable_length_error; + +LZ4_FORCE_INLINE unsigned +read_variable_length(const BYTE**ip, const BYTE* lencheck, + int loop_check, int initial_check, + variable_length_error* error) +{ + U32 length = 0; + U32 s; + if (initial_check && unlikely((*ip) >= lencheck)) { /* overflow detection */ + *error = initial_error; + return length; + } + do { + s = **ip; + (*ip)++; + length += s; + if (loop_check && unlikely((*ip) >= lencheck)) { /* overflow detection */ + *error = loop_error; + return length; + } + } while (s==255); + + return length; +} + +#define LZ4_STATIC_ASSERT(c) ASSERT(c) + + +/*! LZ4_decompress_generic() : + * This generic decompression function covers all use cases. + * It shall be instantiated several times, using different sets of directives. + * Note that it is important for performance that this function really get inlined, + * in order to remove useless branches during compilation optimization. + */ +LZ4_FORCE_INLINE int +LZ4_decompress_generic( + const char* const src, + char* const dst, + int srcSize, + int outputSize, /* If endOnInput==endOnInputSize, this value is `dstCapacity` */ + + endCondition_directive endOnInput, /* endOnOutputSize, endOnInputSize */ + earlyEnd_directive partialDecoding, /* full, partial */ + dict_directive dict, /* noDict, withPrefix64k, usingExtDict */ + const BYTE* const lowPrefix, /* always <= dst, == dst when no prefix */ + const BYTE* const dictStart, /* only if dict==usingExtDict */ + const size_t dictSize /* note : = 0 if noDict */ + ) +{ + if (src == NULL) { return -1; } + + { const BYTE* ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + + BYTE* op = (BYTE*) dst; + BYTE* const oend = op + outputSize; + BYTE* cpy; + + const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize; + + const int safeDecode = (endOnInput==endOnInputSize); + const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB))); + + + /* Set up the "end" pointers for the shortcut. */ + const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/; + const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/; + + const BYTE* match; + size_t offset; + unsigned token; + size_t length; + + + DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize); + + /* Special cases */ + assert(lowPrefix <= op); + if ((endOnInput) && (unlikely(outputSize==0))) { + /* Empty output buffer */ + if (partialDecoding) return 0; + return ((srcSize==1) && (*ip==0)) ? 0 : -1; + } + if ((!endOnInput) && (unlikely(outputSize==0))) { return (*ip==0 ? 1 : -1); } + if ((endOnInput) && unlikely(srcSize==0)) { return -1; } + + /* Currently the fast loop shows a regression on qualcomm arm chips. */ +#if LZ4_FAST_DEC_LOOP + if ((oend - op) < FASTLOOP_SAFE_DISTANCE) { + DEBUGLOG(6, "skip fast decode loop"); + goto safe_decode; + } + + /* Fast loop : decode sequences as long as output < iend-FASTLOOP_SAFE_DISTANCE */ + while (1) { + /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */ + assert(oend - op >= FASTLOOP_SAFE_DISTANCE); + if (endOnInput) { assert(ip < iend); } + token = *ip++; + length = token >> ML_BITS; /* literal length */ + + assert(!endOnInput || ip <= iend); /* ip < iend before the increment */ + + /* decode literal length */ + if (length == RUN_MASK) { + variable_length_error error = ok; + length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error); + if (error == initial_error) { goto _output_error; } + if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */ + if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */ + + /* copy literals */ + cpy = op+length; + LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); + if (endOnInput) { /* LZ4_decompress_safe() */ + if ((cpy>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; } + LZ4_wildCopy32(op, ip, cpy); + } else { /* LZ4_decompress_fast() */ + if (cpy>oend-8) { goto safe_literal_copy; } + LZ4_wildCopy8(op, ip, cpy); /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time : + * it doesn't know input length, and only relies on end-of-block properties */ + } + ip += length; op = cpy; + } else { + cpy = op+length; + if (endOnInput) { /* LZ4_decompress_safe() */ + DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length); + /* We don't need to check oend, since we check it once for each loop below */ + if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; } + /* Literals can only be 14, but hope compilers optimize if we copy by a register size */ + LZ4_memcpy(op, ip, 16); + } else { /* LZ4_decompress_fast() */ + /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time : + * it doesn't know input length, and relies on end-of-block properties */ + LZ4_memcpy(op, ip, 8); + if (length > 8) { LZ4_memcpy(op+8, ip+8, 8); } + } + ip += length; op = cpy; + } + + /* get offset */ + offset = LZ4_readLE16(ip); ip+=2; + match = op - offset; + assert(match <= op); + + /* get matchlength */ + length = token & ML_MASK; + + if (length == ML_MASK) { + variable_length_error error = ok; + if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */ + length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error); + if (error != ok) { goto _output_error; } + if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */ + length += MINMATCH; + if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) { + goto safe_match_copy; + } + } else { + length += MINMATCH; + if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) { + goto safe_match_copy; + } + + /* Fastpath check: Avoids a branch in LZ4_wildCopy32 if true */ + if ((dict == withPrefix64k) || (match >= lowPrefix)) { + if (offset >= 8) { + assert(match >= lowPrefix); + assert(match <= op); + assert(op + 18 <= oend); + + LZ4_memcpy(op, match, 8); + LZ4_memcpy(op+8, match+8, 8); + LZ4_memcpy(op+16, match+16, 2); + op += length; + continue; + } } } + + if (checkOffset && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */ + /* match starting within external dictionary */ + if ((dict==usingExtDict) && (match < lowPrefix)) { + if (unlikely(op+length > oend-LASTLITERALS)) { + if (partialDecoding) { + DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd"); + length = MIN(length, (size_t)(oend-op)); + } else { + goto _output_error; /* end-of-block condition violated */ + } } + + if (length <= (size_t)(lowPrefix-match)) { + /* match fits entirely within external dictionary : just copy */ + memmove(op, dictEnd - (lowPrefix-match), length); + op += length; + } else { + /* match stretches into both external dictionary and current block */ + size_t const copySize = (size_t)(lowPrefix - match); + size_t const restSize = length - copySize; + LZ4_memcpy(op, dictEnd - copySize, copySize); + op += copySize; + if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ + BYTE* const endOfMatch = op + restSize; + const BYTE* copyFrom = lowPrefix; + while (op < endOfMatch) { *op++ = *copyFrom++; } + } else { + LZ4_memcpy(op, lowPrefix, restSize); + op += restSize; + } } + continue; + } + + /* copy match within block */ + cpy = op + length; + + assert((op <= oend) && (oend-op >= 32)); + if (unlikely(offset<16)) { + LZ4_memcpy_using_offset(op, match, cpy, offset); + } else { + LZ4_wildCopy32(op, match, cpy); + } + + op = cpy; /* wildcopy correction */ + } + safe_decode: +#endif + + /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */ + while (1) { + token = *ip++; + length = token >> ML_BITS; /* literal length */ + + assert(!endOnInput || ip <= iend); /* ip < iend before the increment */ + + /* A two-stage shortcut for the most common case: + * 1) If the literal length is 0..14, and there is enough space, + * enter the shortcut and copy 16 bytes on behalf of the literals + * (in the fast mode, only 8 bytes can be safely copied this way). + * 2) Further if the match length is 4..18, copy 18 bytes in a similar + * manner; but we ensure that there's enough space in the output for + * those 18 bytes earlier, upon entering the shortcut (in other words, + * there is a combined check for both stages). + */ + if ( (endOnInput ? length != RUN_MASK : length <= 8) + /* strictly "less than" on input, to re-enter the loop with at least one byte */ + && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) { + /* Copy the literals */ + LZ4_memcpy(op, ip, endOnInput ? 16 : 8); + op += length; ip += length; + + /* The second stage: prepare for match copying, decode full info. + * If it doesn't work out, the info won't be wasted. */ + length = token & ML_MASK; /* match length */ + offset = LZ4_readLE16(ip); ip += 2; + match = op - offset; + assert(match <= op); /* check overflow */ + + /* Do not deal with overlapping matches. */ + if ( (length != ML_MASK) + && (offset >= 8) + && (dict==withPrefix64k || match >= lowPrefix) ) { + /* Copy the match. */ + LZ4_memcpy(op + 0, match + 0, 8); + LZ4_memcpy(op + 8, match + 8, 8); + LZ4_memcpy(op +16, match +16, 2); + op += length + MINMATCH; + /* Both stages worked, load the next token. */ + continue; + } + + /* The second stage didn't work out, but the info is ready. + * Propel it right to the point of match copying. */ + goto _copy_match; + } + + /* decode literal length */ + if (length == RUN_MASK) { + variable_length_error error = ok; + length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error); + if (error == initial_error) { goto _output_error; } + if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */ + if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */ + } + + /* copy literals */ + cpy = op+length; +#if LZ4_FAST_DEC_LOOP + safe_literal_copy: +#endif + LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); + if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) ) + || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) ) + { + /* We've either hit the input parsing restriction or the output parsing restriction. + * In the normal scenario, decoding a full block, it must be the last sequence, + * otherwise it's an error (invalid input or dimensions). + * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow. + */ + if (partialDecoding) { + /* Since we are partial decoding we may be in this block because of the output parsing + * restriction, which is not valid since the output buffer is allowed to be undersized. + */ + assert(endOnInput); + DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end") + DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length); + DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op)); + DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip)); + /* Finishing in the middle of a literals segment, + * due to lack of input. + */ + if (ip+length > iend) { + length = (size_t)(iend-ip); + cpy = op + length; + } + /* Finishing in the middle of a literals segment, + * due to lack of output space. + */ + if (cpy > oend) { + cpy = oend; + assert(op<=oend); + length = (size_t)(oend-op); + } + } else { + /* We must be on the last sequence because of the parsing limitations so check + * that we exactly regenerate the original size (must be exact when !endOnInput). + */ + if ((!endOnInput) && (cpy != oend)) { goto _output_error; } + /* We must be on the last sequence (or invalid) because of the parsing limitations + * so check that we exactly consume the input and don't overrun the output buffer. + */ + if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) { + DEBUGLOG(6, "should have been last run of literals") + DEBUGLOG(6, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend); + DEBUGLOG(6, "or cpy(%p) > oend(%p)", cpy, oend); + goto _output_error; + } + } + memmove(op, ip, length); /* supports overlapping memory regions; only matters for in-place decompression scenarios */ + ip += length; + op += length; + /* Necessarily EOF when !partialDecoding. + * When partialDecoding, it is EOF if we've either + * filled the output buffer or + * can't proceed with reading an offset for following match. + */ + if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) { + break; + } + } else { + LZ4_wildCopy8(op, ip, cpy); /* may overwrite up to WILDCOPYLENGTH beyond cpy */ + ip += length; op = cpy; + } + + /* get offset */ + offset = LZ4_readLE16(ip); ip+=2; + match = op - offset; + + /* get matchlength */ + length = token & ML_MASK; + + _copy_match: + if (length == ML_MASK) { + variable_length_error error = ok; + length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error); + if (error != ok) goto _output_error; + if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error; /* overflow detection */ + } + length += MINMATCH; + +#if LZ4_FAST_DEC_LOOP + safe_match_copy: +#endif + if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; /* Error : offset outside buffers */ + /* match starting within external dictionary */ + if ((dict==usingExtDict) && (match < lowPrefix)) { + if (unlikely(op+length > oend-LASTLITERALS)) { + if (partialDecoding) length = MIN(length, (size_t)(oend-op)); + else goto _output_error; /* doesn't respect parsing restriction */ + } + + if (length <= (size_t)(lowPrefix-match)) { + /* match fits entirely within external dictionary : just copy */ + memmove(op, dictEnd - (lowPrefix-match), length); + op += length; + } else { + /* match stretches into both external dictionary and current block */ + size_t const copySize = (size_t)(lowPrefix - match); + size_t const restSize = length - copySize; + LZ4_memcpy(op, dictEnd - copySize, copySize); + op += copySize; + if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ + BYTE* const endOfMatch = op + restSize; + const BYTE* copyFrom = lowPrefix; + while (op < endOfMatch) *op++ = *copyFrom++; + } else { + LZ4_memcpy(op, lowPrefix, restSize); + op += restSize; + } } + continue; + } + assert(match >= lowPrefix); + + /* copy match within block */ + cpy = op + length; + + /* partialDecoding : may end anywhere within the block */ + assert(op<=oend); + if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) { + size_t const mlen = MIN(length, (size_t)(oend-op)); + const BYTE* const matchEnd = match + mlen; + BYTE* const copyEnd = op + mlen; + if (matchEnd > op) { /* overlap copy */ + while (op < copyEnd) { *op++ = *match++; } + } else { + LZ4_memcpy(op, match, mlen); + } + op = copyEnd; + if (op == oend) { break; } + continue; + } + + if (unlikely(offset<8)) { + LZ4_write32(op, 0); /* silence msan warning when offset==0 */ + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + match += inc32table[offset]; + LZ4_memcpy(op+4, match, 4); + match -= dec64table[offset]; + } else { + LZ4_memcpy(op, match, 8); + match += 8; + } + op += 8; + + if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) { + BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1); + if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */ + if (op < oCopyLimit) { + LZ4_wildCopy8(op, match, oCopyLimit); + match += oCopyLimit - op; + op = oCopyLimit; + } + while (op < cpy) { *op++ = *match++; } + } else { + LZ4_memcpy(op, match, 8); + if (length > 16) { LZ4_wildCopy8(op+8, match+8, cpy); } + } + op = cpy; /* wildcopy correction */ + } + + /* end of decoding */ + if (endOnInput) { + DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst)); + return (int) (((char*)op)-dst); /* Nb of output bytes decoded */ + } else { + return (int) (((const char*)ip)-src); /* Nb of input bytes read */ + } + + /* Overflow error detected */ + _output_error: + return (int) (-(((const char*)ip)-src))-1; + } +} + +/* + * LZ4_uncompress_unknownOutputSize() : + * isize : is the input size, therefore the compressed size + * maxOutputSize : is the size of the destination buffer (which must be + * already allocated) + * return : the number of bytes decoded in the destination buffer + * (necessarily <= maxOutputSize). If the source stream is + * malformed, the function will stop decoding and return a + * negative result, indicating the byte position of the faulty + * instruction. This function never writes beyond dest + + * maxOutputSize, and is therefore protected against malicious + * data packets. + * note : Destination buffer must be already allocated. + * This version is slightly slower than real_LZ4_uncompress() * - * Note[2]: real_LZ4_uncompress(), referred to above, is not used in ZFS so - * its code is not present here. */ -static const int dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; -#if LZ4_ARCH64 -static const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3}; -#endif - -static int -LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize, - int maxOutputSize) +/* + * Note: In upstream code, LZ4_uncompress_unknownOutputSize is now a legacy + * wrapper for LZ4_decompress_safe which is a wrapper for + * LZ4_decompress_generic; this wrapper flattens that, rather than + * rewriting the callers. + */ +int LZ4_uncompress_unknownOutputSize(const char* source, char* dest, int compressedSize, int maxDecompressedSize) { - /* Local Variables */ - const BYTE *restrict ip = (const BYTE *) source; - const BYTE *const iend = ip + isize; - const BYTE *ref; - - BYTE *op = (BYTE *) dest; - BYTE *const oend = op + maxOutputSize; - BYTE *cpy; - - /* Main Loop */ - while (ip < iend) { - unsigned token; - size_t length; - - /* get runlength */ - token = *ip++; - if ((length = (token >> ML_BITS)) == RUN_MASK) { - int s = 255; - while ((ip < iend) && (s == 255)) { - s = *ip++; - if (unlikely(length > (size_t)(length + s))) - goto _output_error; - length += s; - } - } - /* copy literals */ - cpy = op + length; - /* CORNER-CASE: cpy might overflow. */ - if (cpy < op) - goto _output_error; /* cpy was overflowed, bail! */ - if ((cpy > oend - COPYLENGTH) || - (ip + length > iend - COPYLENGTH)) { - if (cpy > oend) - /* Error: writes beyond output buffer */ - goto _output_error; - if (ip + length != iend) - /* - * Error: LZ4 format requires to consume all - * input at this stage - */ - goto _output_error; - (void) memcpy(op, ip, length); - op += length; - /* Necessarily EOF, due to parsing restrictions */ - break; - } - LZ4_WILDCOPY(ip, op, cpy); - ip -= (op - cpy); - op = cpy; - - /* get offset */ - LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); - ip += 2; - if (ref < (BYTE * const) dest) - /* - * Error: offset creates reference outside of - * destination buffer - */ - goto _output_error; - - /* get matchlength */ - if ((length = (token & ML_MASK)) == ML_MASK) { - while (ip < iend) { - int s = *ip++; - if (unlikely(length > (size_t)(length + s))) - goto _output_error; - length += s; - if (s == 255) - continue; - break; - } - } - /* copy repeated sequence */ - if (unlikely(op - ref < STEPSIZE)) { -#if LZ4_ARCH64 - int dec64 = dec64table[op - ref]; -#else - const int dec64 = 0; -#endif - op[0] = ref[0]; - op[1] = ref[1]; - op[2] = ref[2]; - op[3] = ref[3]; - op += 4; - ref += 4; - ref -= dec32table[op - ref]; - A32(op) = A32(ref); - op += STEPSIZE - 4; - ref -= dec64; - } else { - LZ4_COPYSTEP(ref, op); - } - cpy = op + length - (STEPSIZE - 4); - if (cpy > oend - COPYLENGTH) { - if (cpy > oend) - /* - * Error: request to write outside of - * destination buffer - */ - goto _output_error; -#if LZ4_ARCH64 - if ((ref + COPYLENGTH) > oend) -#else - if ((ref + COPYLENGTH) > oend || - (op + COPYLENGTH) > oend) -#endif - goto _output_error; - LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); - while (op < cpy) - *op++ = *ref++; - op = cpy; - if (op == oend) - /* - * Check EOF (should never happen, since - * last 5 bytes are supposed to be literals) - */ - goto _output_error; - continue; - } - LZ4_SECURECOPY(ref, op, cpy); - op = cpy; /* correction */ - } - - /* end of decoding */ - return (int)(((char *)op) - dest); - - /* write overflow error detected */ - _output_error: - return (-1); -} - -void -lz4_init(void) -{ - lz4_cache = kmem_cache_create("lz4_cache", - sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL, 0); -} - -void -lz4_fini(void) -{ - if (lz4_cache) { - kmem_cache_destroy(lz4_cache); - lz4_cache = NULL; - } + return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, + endOnInputSize, decode_full_block, noDict, + (BYTE*)dest, NULL, 0); } diff --git a/module/zfs/lz4_zfs.c b/module/zfs/lz4_zfs.c new file mode 100644 index 000000000000..e28215cf3501 --- /dev/null +++ b/module/zfs/lz4_zfs.c @@ -0,0 +1,880 @@ +/* + * LZ4 - Fast LZ compression algorithm + * Header File + * Copyright (C) 2011-2013, Yann Collet. + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at : + * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + * - LZ4 source repository : http://code.google.com/p/lz4/ + */ + +/* + * N.B. - This file seems to be based on LZ4 r85, dated Dec 10, 2012 + */ + +#include +#include + +static int real_LZ4_compress(const char *source, char *dest, int isize, + int osize); +static int LZ4_compressCtx(void *ctx, const char *source, char *dest, + int isize, int osize); +static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest, + int isize, int osize); + +/* See lz4.c */ +int LZ4_uncompress_unknownOutputSize(const char *source, char *dest, + int isize, int maxOutputSize); + +static kmem_cache_t *lz4_cache; + +size_t +lz4_compress_zfs(void *s_start, void *d_start, size_t s_len, + size_t d_len, int n) +{ + (void) n; + uint32_t bufsiz; + char *dest = d_start; + + ASSERT(d_len >= sizeof (bufsiz)); + + bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len, + d_len - sizeof (bufsiz)); + + /* Signal an error if the compression routine returned zero. */ + if (bufsiz == 0) + return (s_len); + + /* + * The exact compressed size is needed by the decompression routine, + * so it is stored at the start of the buffer. Note that this may be + * less than the compressed block size, which is rounded up to a + * multiple of 1< s_len) + return (1); + + /* + * Returns 0 on success (decompression function returned non-negative) + * and non-zero on failure (decompression function returned negative). + */ + return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)], + d_start, bufsiz, d_len) < 0); +} + +/* + * LZ4 API Description: + * + * Simple Functions: + * real_LZ4_compress() : + * isize : is the input size. Max supported value is ~1.9GB + * return : the number of bytes written in buffer dest + * or 0 if the compression fails (if LZ4_COMPRESSMIN is set). + * note : destination buffer must be already allocated. + * destination buffer must be sized to handle worst cases + * situations (input data not compressible) worst case size + * evaluation is provided by function LZ4_compressBound(). + * + * real_LZ4_uncompress() : + * osize : is the output size, therefore the original size + * return : the number of bytes read in the source buffer. + * If the source stream is malformed, the function will stop + * decoding and return a negative result, indicating the byte + * position of the faulty instruction. This function never + * writes beyond dest + osize, and is therefore protected + * against malicious data packets. + * note : destination buffer must be already allocated + * note : real_LZ4_uncompress() is not used in ZFS so its code + * is not present here. + * + * Advanced Functions + * + * LZ4_compressBound() : + * Provides the maximum size that LZ4 may output in a "worst case" + * scenario (input data not compressible) primarily useful for memory + * allocation of output buffer. + * + * isize : is the input size. Max supported value is ~1.9GB + * return : maximum output size in a "worst case" scenario + * note : this function is limited by "int" range (2^31-1) + * + * LZ4_uncompress_unknownOutputSize() : + * isize : is the input size, therefore the compressed size + * maxOutputSize : is the size of the destination buffer (which must be + * already allocated) + * return : the number of bytes decoded in the destination buffer + * (necessarily <= maxOutputSize). If the source stream is + * malformed, the function will stop decoding and return a + * negative result, indicating the byte position of the faulty + * instruction. This function never writes beyond dest + + * maxOutputSize, and is therefore protected against malicious + * data packets. + * note : Destination buffer must be already allocated. + * This version is slightly slower than real_LZ4_uncompress() + * + * LZ4_compressCtx() : + * This function explicitly handles the CTX memory structure. + * + * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated + * by the caller (either on the stack or using kmem_cache_alloc). Passing + * NULL isn't valid. + * + * LZ4_compress64kCtx() : + * Same as LZ4_compressCtx(), but specific to small inputs (<64KB). + * isize *Must* be <64KB, otherwise the output will be corrupted. + * + * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated + * by the caller (either on the stack or using kmem_cache_alloc). Passing + * NULL isn't valid. + */ + +/* + * Tuning parameters + */ + +/* + * COMPRESSIONLEVEL: Increasing this value improves compression ratio + * Lowering this value reduces memory usage. Reduced memory usage + * typically improves speed, due to cache effect (ex: L1 32KB for Intel, + * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes + * (examples : 12 -> 16KB ; 17 -> 512KB) + */ +#define COMPRESSIONLEVEL 12 + +/* + * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the + * algorithm skip faster data segments considered "incompressible". + * This may decrease compression ratio dramatically, but will be + * faster on incompressible data. Increasing this value will make + * the algorithm search more before declaring a segment "incompressible". + * This could improve compression a bit, but will be slower on + * incompressible data. The default value (6) is recommended. + */ +#define NOTCOMPRESSIBLE_CONFIRMATION 6 + +/* + * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to + * performance for big endian cpu, but the resulting compressed stream + * will be incompatible with little-endian CPU. You can set this option + * to 1 in situations where data will stay within closed environment. + * This option is useless on Little_Endian CPU (such as x86). + */ +/* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */ + +/* + * CPU Feature Detection + */ + +/* 32 or 64 bits ? */ +#if defined(_LP64) +#define LZ4_ARCH64 1 +#else +#define LZ4_ARCH64 0 +#endif + +/* + * Little Endian or Big Endian? + * Note: overwrite the below #define if you know your architecture endianness. + */ +#if defined(_ZFS_BIG_ENDIAN) +#define LZ4_BIG_ENDIAN 1 +#else +/* + * Little Endian assumed. PDP Endian and other very rare endian format + * are unsupported. + */ +#undef LZ4_BIG_ENDIAN +#endif + +/* + * Unaligned memory access is automatically enabled for "common" CPU, + * such as x86. For others CPU, the compiler will be more cautious, and + * insert extra code to ensure aligned access is respected. If you know + * your target CPU supports unaligned memory access, you may want to + * force this option manually to improve performance + */ +#if defined(__ARM_FEATURE_UNALIGNED) +#define LZ4_FORCE_UNALIGNED_ACCESS 1 +#endif + +/* + * Illumos : we can't use GCC's __builtin_ctz family of builtins in the + * kernel + * Linux : we can use GCC's __builtin_ctz family of builtins in the + * kernel + */ +#undef LZ4_FORCE_SW_BITCOUNT +#if defined(__sparc) +#define LZ4_FORCE_SW_BITCOUNT +#endif + +/* + * Compiler Options + */ +/* Disable restrict */ +#define restrict + +/* + * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it. + * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16 + */ +#ifdef GCC_VERSION +#undef GCC_VERSION +#endif + +#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +#if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) +#define expect(expr, value) (__builtin_expect((expr), (value))) +#else +#define expect(expr, value) (expr) +#endif + +#ifndef likely +#define likely(expr) expect((expr) != 0, 1) +#endif + +#ifndef unlikely +#define unlikely(expr) expect((expr) != 0, 0) +#endif + +#define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \ + (((x) & 0xffu) << 8))) + +/* Basic types */ +#define BYTE uint8_t +#define U16 uint16_t +#define U32 uint32_t +#define S32 int32_t +#define U64 uint64_t + +#ifndef LZ4_FORCE_UNALIGNED_ACCESS +#pragma pack(1) +#endif + +typedef struct _U16_S { + U16 v; +} U16_S; +typedef struct _U32_S { + U32 v; +} U32_S; +typedef struct _U64_S { + U64 v; +} U64_S; + +#ifndef LZ4_FORCE_UNALIGNED_ACCESS +#pragma pack() +#endif + +#define A64(x) (((U64_S *)(x))->v) +#define A32(x) (((U32_S *)(x))->v) +#define A16(x) (((U16_S *)(x))->v) + +/* + * Constants + */ +#define MINMATCH 4 + +#define HASH_LOG COMPRESSIONLEVEL +#define HASHTABLESIZE (1 << HASH_LOG) +#define HASH_MASK (HASHTABLESIZE - 1) + +#define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \ + NOTCOMPRESSIBLE_CONFIRMATION : 2) + +#define COPYLENGTH 8 +#define LASTLITERALS 5 +#define MFLIMIT (COPYLENGTH + MINMATCH) +#define MINLENGTH (MFLIMIT + 1) + +#define MAXD_LOG 16 +#define MAX_DISTANCE ((1 << MAXD_LOG) - 1) + +#define ML_BITS 4 +#define ML_MASK ((1U<> ((MINMATCH * 8) - \ + HASH_LOG)) +#define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p)) +#define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e); +#define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \ + d = e; } + + +/* Private functions */ +#if LZ4_ARCH64 + +static inline int +LZ4_NbCommonBytes(register U64 val) +{ +#if defined(LZ4_BIG_ENDIAN) +#if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \ + !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_clzll(val) >> 3); +#else + int r; + if (!(val >> 32)) { + r = 4; + } else { + r = 0; + val >>= 32; + } + if (!(val >> 16)) { + r += 2; + val >>= 8; + } else { + val >>= 24; + } + r += (!val); + return (r); +#endif +#else +#if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \ + !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_ctzll(val) >> 3); +#else + static const int DeBruijnBytePos[64] = + { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, + 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, + 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, + 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 + }; + return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >> + 58]; +#endif +#endif +} + +#else + +static inline int +LZ4_NbCommonBytes(register U32 val) +{ +#if defined(LZ4_BIG_ENDIAN) +#if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \ + !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_clz(val) >> 3); +#else + int r; + if (!(val >> 16)) { + r = 2; + val >>= 8; + } else { + r = 0; + val >>= 24; + } + r += (!val); + return (r); +#endif +#else +#if defined(__GNUC__) && (GCC_VERSION >= 304) && \ + !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_ctz(val) >> 3); +#else + static const int DeBruijnBytePos[32] = { + 0, 0, 3, 0, 3, 1, 3, 0, + 3, 2, 2, 1, 3, 2, 0, 1, + 3, 3, 1, 2, 2, 2, 2, 0, + 3, 1, 2, 0, 1, 0, 1, 1 + }; + return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> + 27]; +#endif +#endif +} + +#endif + +/* Compression functions */ + +static int +LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize, + int osize) +{ + struct refTables *srt = (struct refTables *)ctx; + HTYPE *HashTable = (HTYPE *) (srt->hashTable); + + const BYTE *ip = (BYTE *) source; + INITBASE(base); + const BYTE *anchor = ip; + const BYTE *const iend = ip + isize; + const BYTE *const oend = (BYTE *) dest + osize; + const BYTE *const mflimit = iend - MFLIMIT; +#define matchlimit (iend - LASTLITERALS) + + BYTE *op = (BYTE *) dest; + + int len, length; + const int skipStrength = SKIPSTRENGTH; + U32 forwardH; + + + /* Init */ + if (isize < MINLENGTH) + goto _last_literals; + + /* First Byte */ + HashTable[LZ4_HASH_VALUE(ip)] = ip - base; + ip++; + forwardH = LZ4_HASH_VALUE(ip); + + /* Main Loop */ + for (;;) { + int findMatchAttempts = (1U << skipStrength) + 3; + const BYTE *forwardIp = ip; + const BYTE *ref; + BYTE *token; + + /* Find a match */ + do { + U32 h = forwardH; + int step = findMatchAttempts++ >> skipStrength; + ip = forwardIp; + forwardIp = ip + step; + + if (unlikely(forwardIp > mflimit)) { + goto _last_literals; + } + + forwardH = LZ4_HASH_VALUE(forwardIp); + ref = base + HashTable[h]; + HashTable[h] = ip - base; + + } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); + + /* Catch up */ + while ((ip > anchor) && (ref > (BYTE *) source) && + unlikely(ip[-1] == ref[-1])) { + ip--; + ref--; + } + + /* Encode Literal length */ + length = ip - anchor; + token = op++; + + /* Check output limit */ + if (unlikely(op + length + (2 + 1 + LASTLITERALS) + + (length >> 8) > oend)) + return (0); + + if (length >= (int)RUN_MASK) { + *token = (RUN_MASK << ML_BITS); + len = length - RUN_MASK; + for (; len > 254; len -= 255) + *op++ = 255; + *op++ = (BYTE)len; + } else + *token = (length << ML_BITS); + + /* Copy Literals */ + LZ4_BLINDCOPY(anchor, op, length); + + _next_match: + /* Encode Offset */ + LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); + + /* Start Counting */ + ip += MINMATCH; + ref += MINMATCH; /* MinMatch verified */ + anchor = ip; + while (likely(ip < matchlimit - (STEPSIZE - 1))) { + UARCH diff = AARCH(ref) ^ AARCH(ip); + if (!diff) { + ip += STEPSIZE; + ref += STEPSIZE; + continue; + } + ip += LZ4_NbCommonBytes(diff); + goto _endCount; + } +#if LZ4_ARCH64 + if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { + ip += 4; + ref += 4; + } +#endif + if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { + ip += 2; + ref += 2; + } + if ((ip < matchlimit) && (*ref == *ip)) + ip++; + _endCount: + + /* Encode MatchLength */ + len = (ip - anchor); + /* Check output limit */ + if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)) + return (0); + if (len >= (int)ML_MASK) { + *token += ML_MASK; + len -= ML_MASK; + for (; len > 509; len -= 510) { + *op++ = 255; + *op++ = 255; + } + if (len > 254) { + len -= 255; + *op++ = 255; + } + *op++ = (BYTE)len; + } else + *token += len; + + /* Test end of chunk */ + if (ip > mflimit) { + anchor = ip; + break; + } + /* Fill table */ + HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base; + + /* Test next position */ + ref = base + HashTable[LZ4_HASH_VALUE(ip)]; + HashTable[LZ4_HASH_VALUE(ip)] = ip - base; + if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { + token = op++; + *token = 0; + goto _next_match; + } + /* Prepare next loop */ + anchor = ip++; + forwardH = LZ4_HASH_VALUE(ip); + } + + _last_literals: + /* Encode Last Literals */ + { + int lastRun = iend - anchor; + if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > + oend) + return (0); + if (lastRun >= (int)RUN_MASK) { + *op++ = (RUN_MASK << ML_BITS); + lastRun -= RUN_MASK; + for (; lastRun > 254; lastRun -= 255) { + *op++ = 255; + } + *op++ = (BYTE)lastRun; + } else + *op++ = (lastRun << ML_BITS); + (void) memcpy(op, anchor, iend - anchor); + op += iend - anchor; + } + + /* End */ + return (int)(((char *)op) - dest); +} + + + +/* Note : this function is valid only if isize < LZ4_64KLIMIT */ +#define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1)) +#define HASHLOG64K (HASH_LOG + 1) +#define HASH64KTABLESIZE (1U << HASHLOG64K) +#define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \ + HASHLOG64K)) +#define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p)) + +static int +LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize, + int osize) +{ + struct refTables *srt = (struct refTables *)ctx; + U16 *HashTable = (U16 *) (srt->hashTable); + + const BYTE *ip = (BYTE *) source; + const BYTE *anchor = ip; + const BYTE *const base = ip; + const BYTE *const iend = ip + isize; + const BYTE *const oend = (BYTE *) dest + osize; + const BYTE *const mflimit = iend - MFLIMIT; +#define matchlimit (iend - LASTLITERALS) + + BYTE *op = (BYTE *) dest; + + int len, length; + const int skipStrength = SKIPSTRENGTH; + U32 forwardH; + + /* Init */ + if (isize < MINLENGTH) + goto _last_literals; + + /* First Byte */ + ip++; + forwardH = LZ4_HASH64K_VALUE(ip); + + /* Main Loop */ + for (;;) { + int findMatchAttempts = (1U << skipStrength) + 3; + const BYTE *forwardIp = ip; + const BYTE *ref; + BYTE *token; + + /* Find a match */ + do { + U32 h = forwardH; + int step = findMatchAttempts++ >> skipStrength; + ip = forwardIp; + forwardIp = ip + step; + + if (forwardIp > mflimit) { + goto _last_literals; + } + + forwardH = LZ4_HASH64K_VALUE(forwardIp); + ref = base + HashTable[h]; + HashTable[h] = ip - base; + + } while (A32(ref) != A32(ip)); + + /* Catch up */ + while ((ip > anchor) && (ref > (BYTE *) source) && + (ip[-1] == ref[-1])) { + ip--; + ref--; + } + + /* Encode Literal length */ + length = ip - anchor; + token = op++; + + /* Check output limit */ + if (unlikely(op + length + (2 + 1 + LASTLITERALS) + + (length >> 8) > oend)) + return (0); + + if (length >= (int)RUN_MASK) { + *token = (RUN_MASK << ML_BITS); + len = length - RUN_MASK; + for (; len > 254; len -= 255) + *op++ = 255; + *op++ = (BYTE)len; + } else + *token = (length << ML_BITS); + + /* Copy Literals */ + LZ4_BLINDCOPY(anchor, op, length); + + _next_match: + /* Encode Offset */ + LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); + + /* Start Counting */ + ip += MINMATCH; + ref += MINMATCH; /* MinMatch verified */ + anchor = ip; + while (ip < matchlimit - (STEPSIZE - 1)) { + UARCH diff = AARCH(ref) ^ AARCH(ip); + if (!diff) { + ip += STEPSIZE; + ref += STEPSIZE; + continue; + } + ip += LZ4_NbCommonBytes(diff); + goto _endCount; + } +#if LZ4_ARCH64 + if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { + ip += 4; + ref += 4; + } +#endif + if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { + ip += 2; + ref += 2; + } + if ((ip < matchlimit) && (*ref == *ip)) + ip++; + _endCount: + + /* Encode MatchLength */ + len = (ip - anchor); + /* Check output limit */ + if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)) + return (0); + if (len >= (int)ML_MASK) { + *token += ML_MASK; + len -= ML_MASK; + for (; len > 509; len -= 510) { + *op++ = 255; + *op++ = 255; + } + if (len > 254) { + len -= 255; + *op++ = 255; + } + *op++ = (BYTE)len; + } else + *token += len; + + /* Test end of chunk */ + if (ip > mflimit) { + anchor = ip; + break; + } + /* Fill table */ + HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base; + + /* Test next position */ + ref = base + HashTable[LZ4_HASH64K_VALUE(ip)]; + HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base; + if (A32(ref) == A32(ip)) { + token = op++; + *token = 0; + goto _next_match; + } + /* Prepare next loop */ + anchor = ip++; + forwardH = LZ4_HASH64K_VALUE(ip); + } + + _last_literals: + /* Encode Last Literals */ + { + int lastRun = iend - anchor; + if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > + oend) + return (0); + if (lastRun >= (int)RUN_MASK) { + *op++ = (RUN_MASK << ML_BITS); + lastRun -= RUN_MASK; + for (; lastRun > 254; lastRun -= 255) + *op++ = 255; + *op++ = (BYTE)lastRun; + } else + *op++ = (lastRun << ML_BITS); + (void) memcpy(op, anchor, iend - anchor); + op += iend - anchor; + } + + /* End */ + return (int)(((char *)op) - dest); +} + +static int +real_LZ4_compress(const char *source, char *dest, int isize, int osize) +{ + void *ctx; + int result; + + ASSERT(lz4_cache != NULL); + ctx = kmem_cache_alloc(lz4_cache, KM_SLEEP); + + /* + * out of kernel memory, gently fall through - this will disable + * compression in zio_compress_data + */ + if (ctx == NULL) + return (0); + + memset(ctx, 0, sizeof (struct refTables)); + + if (isize < LZ4_64KLIMIT) + result = LZ4_compress64kCtx(ctx, source, dest, isize, osize); + else + result = LZ4_compressCtx(ctx, source, dest, isize, osize); + + kmem_cache_free(lz4_cache, ctx); + return (result); +} + +void +lz4_init(void) +{ + lz4_cache = kmem_cache_create("lz4_cache", + sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL, 0); +} + +void +lz4_fini(void) +{ + if (lz4_cache) { + kmem_cache_destroy(lz4_cache); + lz4_cache = NULL; + } +}