MFV: xz-embedded 3f438e15109229bb14ab45f285f4bff5412a9542
MFC after: 2 weeks
This commit is contained in:
commit
cd3a777bca
@ -84,6 +84,21 @@ Embedding into userspace applications
|
||||
environment. Probably you should at least skim through it even if the
|
||||
default file works as is.
|
||||
|
||||
Supporting concatenated .xz files
|
||||
|
||||
Regular .xz files can be concatenated as is and the xz command line
|
||||
tool will decompress all streams from a concatenated file (a few
|
||||
other popular formats and tools support this too). This kind of .xz
|
||||
files aren't as uncommon as one might think because pxz, an early
|
||||
threaded XZ compressor, created this kind of .xz files.
|
||||
|
||||
The xz_dec_run() function will stop after decompressing one stream.
|
||||
This is good when XZ data is stored inside some other file format.
|
||||
However, if one is decompressing regular standalone .xz files, one
|
||||
will want to decompress all streams in the file. This is easy with
|
||||
xz_dec_catrun(). To include support for xz_dec_catrun(), you need
|
||||
to #define XZ_DEC_CONCATENATED in xz_config.h or in compiler flags.
|
||||
|
||||
Integrity check support
|
||||
|
||||
XZ Embedded always supports the integrity check types None and
|
||||
|
@ -13,13 +13,13 @@ Introduction
|
||||
The XZ decompressor in Linux is called XZ Embedded. It supports
|
||||
the LZMA2 filter and optionally also BCJ filters. CRC32 is supported
|
||||
for integrity checking. The home page of XZ Embedded is at
|
||||
<http://tukaani.org/xz/embedded.html>, where you can find the
|
||||
<https://tukaani.org/xz/embedded.html>, where you can find the
|
||||
latest version and also information about using the code outside
|
||||
the Linux kernel.
|
||||
|
||||
For userspace, XZ Utils provide a zlib-like compression library
|
||||
and a gzip-like command line tool. XZ Utils can be downloaded from
|
||||
<http://tukaani.org/xz/>.
|
||||
<https://tukaani.org/xz/>.
|
||||
|
||||
XZ related components in the kernel
|
||||
|
||||
@ -107,7 +107,7 @@ Conformance to the .xz file format specification
|
||||
Reporting bugs
|
||||
|
||||
Before reporting a bug, please check that it's not fixed already
|
||||
at upstream. See <http://tukaani.org/xz/embedded.html> to get the
|
||||
at upstream. See <https://tukaani.org/xz/embedded.html> to get the
|
||||
latest code.
|
||||
|
||||
Report bugs to <lasse.collin@tukaani.org> or visit #tukaani on
|
||||
|
@ -2,7 +2,7 @@
|
||||
* XZ decompressor
|
||||
*
|
||||
* Authors: Lasse Collin <lasse.collin@tukaani.org>
|
||||
* Igor Pavlov <http://7-zip.org/>
|
||||
* Igor Pavlov <https://7-zip.org/>
|
||||
*
|
||||
* This file has been put into the public domain.
|
||||
* You can do whatever you want with this file.
|
||||
@ -37,7 +37,7 @@ extern "C" {
|
||||
* enum xz_mode - Operation mode
|
||||
*
|
||||
* @XZ_SINGLE: Single-call mode. This uses less RAM than
|
||||
* than multi-call modes, because the LZMA2
|
||||
* multi-call modes, because the LZMA2
|
||||
* dictionary doesn't need to be allocated as
|
||||
* part of the decoder state. All required data
|
||||
* structures are allocated at initialization,
|
||||
@ -203,7 +203,7 @@ struct xz_dec;
|
||||
XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max);
|
||||
|
||||
/**
|
||||
* xz_dec_run() - Run the XZ decoder
|
||||
* xz_dec_run() - Run the XZ decoder for a single XZ stream
|
||||
* @s: Decoder state allocated using xz_dec_init()
|
||||
* @b: Input and output buffers
|
||||
*
|
||||
@ -219,9 +219,51 @@ XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max);
|
||||
* cannot give the single-call decoder a too small buffer and then expect to
|
||||
* get that amount valid data from the beginning of the stream. You must use
|
||||
* the multi-call decoder if you don't want to uncompress the whole stream.
|
||||
*
|
||||
* Use xz_dec_run() when XZ data is stored inside some other file format.
|
||||
* The decoding will stop after one XZ stream has been decompresed. To
|
||||
* decompress regular .xz files which might have multiple concatenated
|
||||
* streams, use xz_dec_catrun() instead.
|
||||
*/
|
||||
XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b);
|
||||
|
||||
/**
|
||||
* xz_dec_catrun() - Run the XZ decoder with support for concatenated streams
|
||||
* @s: Decoder state allocated using xz_dec_init()
|
||||
* @b: Input and output buffers
|
||||
* @finish: This is an int instead of bool to avoid requiring stdbool.h.
|
||||
* As long as more input might be coming, finish must be false.
|
||||
* When the caller knows that it has provided all the input to
|
||||
* the decoder (some possibly still in b->in), it must set finish
|
||||
* to true. Only when finish is true can this function return
|
||||
* XZ_STREAM_END to indicate successful decompression of the
|
||||
* file. In single-call mode (XZ_SINGLE) finish is assumed to
|
||||
* always be true; the caller-provided value is ignored.
|
||||
*
|
||||
* This is like xz_dec_run() except that this makes it easy to decode .xz
|
||||
* files with multiple streams (multiple .xz files concatenated as is).
|
||||
* The rarely-used Stream Padding feature is supported too, that is, there
|
||||
* can be null bytes after or between the streams. The number of null bytes
|
||||
* must be a multiple of four.
|
||||
*
|
||||
* When finish is false and b->in_pos == b->in_size, it is possible that
|
||||
* XZ_BUF_ERROR isn't returned even when no progress is possible (XZ_OK is
|
||||
* returned instead). This shouldn't matter because in this situation a
|
||||
* reasonable caller will attempt to provide more input or set finish to
|
||||
* true for the next xz_dec_catrun() call anyway.
|
||||
*
|
||||
* For any struct xz_dec that has been initialized for multi-call mode:
|
||||
* Once decoding has been started with xz_dec_run() or xz_dec_catrun(),
|
||||
* the same function must be used until xz_dec_reset() or xz_dec_end().
|
||||
* Switching between the two decoding functions without resetting results
|
||||
* in undefined behavior.
|
||||
*
|
||||
* xz_dec_catrun() is only available if XZ_DEC_CONCATENATED was defined
|
||||
* at compile time.
|
||||
*/
|
||||
XZ_EXTERN enum xz_ret xz_dec_catrun(struct xz_dec *s, struct xz_buf *b,
|
||||
int finish);
|
||||
|
||||
/**
|
||||
* xz_dec_reset() - Reset an already allocated decoder state
|
||||
* @s: Decoder state allocated using xz_dec_init()
|
||||
@ -242,6 +284,112 @@ XZ_EXTERN void xz_dec_reset(struct xz_dec *s);
|
||||
*/
|
||||
XZ_EXTERN void xz_dec_end(struct xz_dec *s);
|
||||
|
||||
/*
|
||||
* Decompressor for MicroLZMA, an LZMA variant with a very minimal header.
|
||||
* See xz_dec_microlzma_alloc() below for details.
|
||||
*
|
||||
* These functions aren't used or available in preboot code and thus aren't
|
||||
* marked with XZ_EXTERN. This avoids warnings about static functions that
|
||||
* are never defined.
|
||||
*/
|
||||
/**
|
||||
* struct xz_dec_microlzma - Opaque type to hold the MicroLZMA decoder state
|
||||
*/
|
||||
struct xz_dec_microlzma;
|
||||
|
||||
/**
|
||||
* xz_dec_microlzma_alloc() - Allocate memory for the MicroLZMA decoder
|
||||
* @mode XZ_SINGLE or XZ_PREALLOC
|
||||
* @dict_size LZMA dictionary size. This must be at least 4 KiB and
|
||||
* at most 3 GiB.
|
||||
*
|
||||
* In contrast to xz_dec_init(), this function only allocates the memory
|
||||
* and remembers the dictionary size. xz_dec_microlzma_reset() must be used
|
||||
* before calling xz_dec_microlzma_run().
|
||||
*
|
||||
* The amount of allocated memory is a little less than 30 KiB with XZ_SINGLE.
|
||||
* With XZ_PREALLOC also a dictionary buffer of dict_size bytes is allocated.
|
||||
*
|
||||
* On success, xz_dec_microlzma_alloc() returns a pointer to
|
||||
* struct xz_dec_microlzma. If memory allocation fails or
|
||||
* dict_size is invalid, NULL is returned.
|
||||
*
|
||||
* The compressed format supported by this decoder is a raw LZMA stream
|
||||
* whose first byte (always 0x00) has been replaced with bitwise-negation
|
||||
* of the LZMA properties (lc/lp/pb) byte. For example, if lc/lp/pb is
|
||||
* 3/0/2, the first byte is 0xA2. This way the first byte can never be 0x00.
|
||||
* Just like with LZMA2, lc + lp <= 4 must be true. The LZMA end-of-stream
|
||||
* marker must not be used. The unused values are reserved for future use.
|
||||
* This MicroLZMA header format was created for use in EROFS but may be used
|
||||
* by others too.
|
||||
*/
|
||||
extern struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode,
|
||||
uint32_t dict_size);
|
||||
|
||||
/**
|
||||
* xz_dec_microlzma_reset() - Reset the MicroLZMA decoder state
|
||||
* @s Decoder state allocated using xz_dec_microlzma_alloc()
|
||||
* @comp_size Compressed size of the input stream
|
||||
* @uncomp_size Uncompressed size of the input stream. A value smaller
|
||||
* than the real uncompressed size of the input stream can
|
||||
* be specified if uncomp_size_is_exact is set to false.
|
||||
* uncomp_size can never be set to a value larger than the
|
||||
* expected real uncompressed size because it would eventually
|
||||
* result in XZ_DATA_ERROR.
|
||||
* @uncomp_size_is_exact This is an int instead of bool to avoid
|
||||
* requiring stdbool.h. This should normally be set to true.
|
||||
* When this is set to false, error detection is weaker.
|
||||
*/
|
||||
extern void xz_dec_microlzma_reset(struct xz_dec_microlzma *s,
|
||||
uint32_t comp_size, uint32_t uncomp_size,
|
||||
int uncomp_size_is_exact);
|
||||
|
||||
/**
|
||||
* xz_dec_microlzma_run() - Run the MicroLZMA decoder
|
||||
* @s Decoder state initialized using xz_dec_microlzma_reset()
|
||||
* @b: Input and output buffers
|
||||
*
|
||||
* This works similarly to xz_dec_run() with a few important differences.
|
||||
* Only the differences are documented here.
|
||||
*
|
||||
* The only possible return values are XZ_OK, XZ_STREAM_END, and
|
||||
* XZ_DATA_ERROR. This function cannot return XZ_BUF_ERROR: if no progress
|
||||
* is possible due to lack of input data or output space, this function will
|
||||
* keep returning XZ_OK. Thus, the calling code must be written so that it
|
||||
* will eventually provide input and output space matching (or exceeding)
|
||||
* comp_size and uncomp_size arguments given to xz_dec_microlzma_reset().
|
||||
* If the caller cannot do this (for example, if the input file is truncated
|
||||
* or otherwise corrupt), the caller must detect this error by itself to
|
||||
* avoid an infinite loop.
|
||||
*
|
||||
* If the compressed data seems to be corrupt, XZ_DATA_ERROR is returned.
|
||||
* This can happen also when incorrect dictionary, uncompressed, or
|
||||
* compressed sizes have been specified.
|
||||
*
|
||||
* With XZ_PREALLOC only: As an extra feature, b->out may be NULL to skip over
|
||||
* uncompressed data. This way the caller doesn't need to provide a temporary
|
||||
* output buffer for the bytes that will be ignored.
|
||||
*
|
||||
* With XZ_SINGLE only: In contrast to xz_dec_run(), the return value XZ_OK
|
||||
* is also possible and thus XZ_SINGLE is actually a limited multi-call mode.
|
||||
* After XZ_OK the bytes decoded so far may be read from the output buffer.
|
||||
* It is possible to continue decoding but the variables b->out and b->out_pos
|
||||
* MUST NOT be changed by the caller. Increasing the value of b->out_size is
|
||||
* allowed to make more output space available; one doesn't need to provide
|
||||
* space for the whole uncompressed data on the first call. The input buffer
|
||||
* may be changed normally like with XZ_PREALLOC. This way input data can be
|
||||
* provided from non-contiguous memory.
|
||||
*/
|
||||
extern enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s,
|
||||
struct xz_buf *b);
|
||||
|
||||
/**
|
||||
* xz_dec_microlzma_end() - Free the memory allocated for the decoder state
|
||||
* @s: Decoder state allocated using xz_dec_microlzma_alloc().
|
||||
* If s is NULL, this function does nothing.
|
||||
*/
|
||||
extern void xz_dec_microlzma_end(struct xz_dec_microlzma *s);
|
||||
|
||||
/*
|
||||
* Standalone build (userspace build or in-kernel build for boot time use)
|
||||
* needs a CRC32 implementation. For normal in-kernel use, kernel's own
|
||||
|
@ -20,10 +20,10 @@
|
||||
*
|
||||
* The worst case for in-place decompression is that the beginning of
|
||||
* the file is compressed extremely well, and the rest of the file is
|
||||
* uncompressible. Thus, we must look for worst-case expansion when the
|
||||
* compressor is encoding uncompressible data.
|
||||
* incompressible. Thus, we must look for worst-case expansion when the
|
||||
* compressor is encoding incompressible data.
|
||||
*
|
||||
* The structure of the .xz file in case of a compresed kernel is as follows.
|
||||
* The structure of the .xz file in case of a compressed kernel is as follows.
|
||||
* Sizes (as bytes) of the fields are in parenthesis.
|
||||
*
|
||||
* Stream Header (12)
|
||||
@ -58,7 +58,7 @@
|
||||
* uncompressed size of the payload is in practice never less than the
|
||||
* payload size itself. The LZMA2 format would allow uncompressed size
|
||||
* to be less than the payload size, but no sane compressor creates such
|
||||
* files. LZMA2 supports storing uncompressible data in uncompressed form,
|
||||
* files. LZMA2 supports storing incompressible data in uncompressed form,
|
||||
* so there's never a need to create payloads whose uncompressed size is
|
||||
* smaller than the compressed size.
|
||||
*
|
||||
@ -167,8 +167,8 @@
|
||||
* memeq and memzero are not used much and any remotely sane implementation
|
||||
* is fast enough. memcpy/memmove speed matters in multi-call mode, but
|
||||
* the kernel image is decompressed in single-call mode, in which only
|
||||
* memcpy speed can matter and only if there is a lot of uncompressible data
|
||||
* (LZMA2 stores uncompressible chunks in uncompressed form). Thus, the
|
||||
* memmove speed can matter and only if there is a lot of incompressible data
|
||||
* (LZMA2 stores incompressible chunks in uncompressed form). Thus, the
|
||||
* functions below should just be kept small; it's probably not worth
|
||||
* optimizing for speed.
|
||||
*/
|
||||
|
@ -38,6 +38,19 @@ config XZ_DEC_SPARC
|
||||
default y if SPARC
|
||||
select XZ_DEC_BCJ
|
||||
|
||||
config XZ_DEC_MICROLZMA
|
||||
bool "MicroLZMA decoder"
|
||||
default n
|
||||
help
|
||||
MicroLZMA is a header format variant where the first byte
|
||||
of a raw LZMA stream (without the end of stream marker) has
|
||||
been replaced with a bitwise-negation of the lc/lp/pb
|
||||
properties byte. MicroLZMA was created to be used in EROFS
|
||||
but can be used by other things too where wasting minimal
|
||||
amount of space for headers is important.
|
||||
|
||||
Unless you know that you need this, say N.
|
||||
|
||||
endif
|
||||
|
||||
config XZ_DEC_BCJ
|
||||
|
@ -2,7 +2,7 @@
|
||||
* CRC32 using the polynomial from IEEE-802.3
|
||||
*
|
||||
* Authors: Lasse Collin <lasse.collin@tukaani.org>
|
||||
* Igor Pavlov <http://7-zip.org/>
|
||||
* Igor Pavlov <https://7-zip.org/>
|
||||
*
|
||||
* This file has been put into the public domain.
|
||||
* You can do whatever you want with this file.
|
||||
|
@ -4,7 +4,7 @@
|
||||
* This file is similar to xz_crc32.c. See the comments there.
|
||||
*
|
||||
* Authors: Lasse Collin <lasse.collin@tukaani.org>
|
||||
* Igor Pavlov <http://7-zip.org/>
|
||||
* Igor Pavlov <https://7-zip.org/>
|
||||
*
|
||||
* This file has been put into the public domain.
|
||||
* You can do whatever you want with this file.
|
||||
@ -20,7 +20,11 @@ STATIC_RW_DATA uint64_t xz_crc64_table[256];
|
||||
|
||||
XZ_EXTERN void xz_crc64_init(void)
|
||||
{
|
||||
const uint64_t poly = 0xC96C5795D7870F42;
|
||||
/*
|
||||
* The ULL suffix is needed for -std=gnu89 compatibility
|
||||
* on 32-bit platforms.
|
||||
*/
|
||||
const uint64_t poly = 0xC96C5795D7870F42ULL;
|
||||
|
||||
uint32_t i;
|
||||
uint32_t j;
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Branch/Call/Jump (BCJ) filter decoders
|
||||
*
|
||||
* Authors: Lasse Collin <lasse.collin@tukaani.org>
|
||||
* Igor Pavlov <http://7-zip.org/>
|
||||
* Igor Pavlov <https://7-zip.org/>
|
||||
*
|
||||
* This file has been put into the public domain.
|
||||
* You can do whatever you want with this file.
|
||||
@ -422,7 +422,7 @@ XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
|
||||
|
||||
/*
|
||||
* Flush pending already filtered data to the output buffer. Return
|
||||
* immediatelly if we couldn't flush everything, or if the next
|
||||
* immediately if we couldn't flush everything, or if the next
|
||||
* filter in the chain had already returned XZ_STREAM_END.
|
||||
*/
|
||||
if (s->temp.filtered > 0) {
|
||||
|
@ -2,7 +2,7 @@
|
||||
* LZMA2 decoder
|
||||
*
|
||||
* Authors: Lasse Collin <lasse.collin@tukaani.org>
|
||||
* Igor Pavlov <http://7-zip.org/>
|
||||
* Igor Pavlov <https://7-zip.org/>
|
||||
*
|
||||
* This file has been put into the public domain.
|
||||
* You can do whatever you want with this file.
|
||||
@ -147,8 +147,8 @@ struct lzma_dec {
|
||||
|
||||
/*
|
||||
* LZMA properties or related bit masks (number of literal
|
||||
* context bits, a mask dervied from the number of literal
|
||||
* position bits, and a mask dervied from the number
|
||||
* context bits, a mask derived from the number of literal
|
||||
* position bits, and a mask derived from the number
|
||||
* position bits)
|
||||
*/
|
||||
uint32_t lc;
|
||||
@ -248,6 +248,10 @@ struct lzma2_dec {
|
||||
* before the first LZMA chunk.
|
||||
*/
|
||||
bool need_props;
|
||||
|
||||
#ifdef XZ_DEC_MICROLZMA
|
||||
bool pedantic_microlzma;
|
||||
#endif
|
||||
};
|
||||
|
||||
struct xz_dec_lzma2 {
|
||||
@ -387,7 +391,14 @@ static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b,
|
||||
|
||||
*left -= copy_size;
|
||||
|
||||
memcpy(dict->buf + dict->pos, b->in + b->in_pos, copy_size);
|
||||
/*
|
||||
* If doing in-place decompression in single-call mode and the
|
||||
* uncompressed size of the file is larger than the caller
|
||||
* thought (i.e. it is invalid input!), the buffers below may
|
||||
* overlap and cause undefined behavior with memcpy().
|
||||
* With valid inputs memcpy() would be fine here.
|
||||
*/
|
||||
memmove(dict->buf + dict->pos, b->in + b->in_pos, copy_size);
|
||||
dict->pos += copy_size;
|
||||
|
||||
if (dict->full < dict->pos)
|
||||
@ -397,7 +408,11 @@ static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b,
|
||||
if (dict->pos == dict->end)
|
||||
dict->pos = 0;
|
||||
|
||||
memcpy(b->out + b->out_pos, b->in + b->in_pos,
|
||||
/*
|
||||
* Like above but for multi-call mode: use memmove()
|
||||
* to avoid undefined behavior with invalid input.
|
||||
*/
|
||||
memmove(b->out + b->out_pos, b->in + b->in_pos,
|
||||
copy_size);
|
||||
}
|
||||
|
||||
@ -408,6 +423,12 @@ static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b,
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef XZ_DEC_MICROLZMA
|
||||
# define DICT_FLUSH_SUPPORTS_SKIPPING true
|
||||
#else
|
||||
# define DICT_FLUSH_SUPPORTS_SKIPPING false
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Flush pending data from dictionary to b->out. It is assumed that there is
|
||||
* enough space in b->out. This is guaranteed because caller uses dict_limit()
|
||||
@ -421,8 +442,19 @@ static uint32_t dict_flush(struct dictionary *dict, struct xz_buf *b)
|
||||
if (dict->pos == dict->end)
|
||||
dict->pos = 0;
|
||||
|
||||
memcpy(b->out + b->out_pos, dict->buf + dict->start,
|
||||
copy_size);
|
||||
/*
|
||||
* These buffers cannot overlap even if doing in-place
|
||||
* decompression because in multi-call mode dict->buf
|
||||
* has been allocated by us in this file; it's not
|
||||
* provided by the caller like in single-call mode.
|
||||
*
|
||||
* With MicroLZMA, b->out can be NULL to skip bytes that
|
||||
* the caller doesn't need. This cannot be done with XZ
|
||||
* because it would break BCJ filters.
|
||||
*/
|
||||
if (!DICT_FLUSH_SUPPORTS_SKIPPING || b->out != NULL)
|
||||
memcpy(b->out + b->out_pos, dict->buf + dict->start,
|
||||
copy_size);
|
||||
}
|
||||
|
||||
dict->start = dict->pos;
|
||||
@ -484,11 +516,11 @@ static __always_inline void rc_normalize(struct rc_dec *rc)
|
||||
}
|
||||
|
||||
/*
|
||||
* Decode one bit. In some versions, this function has been splitted in three
|
||||
* Decode one bit. In some versions, this function has been split in three
|
||||
* functions so that the compiler is supposed to be able to more easily avoid
|
||||
* an extra branch. In this particular version of the LZMA decoder, this
|
||||
* doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3
|
||||
* on x86). Using a non-splitted version results in nicer looking code too.
|
||||
* on x86). Using a non-split version results in nicer looking code too.
|
||||
*
|
||||
* NOTE: This must return an int. Do not make it return a bool or the speed
|
||||
* of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care,
|
||||
@ -761,7 +793,7 @@ static bool lzma_main(struct xz_dec_lzma2 *s)
|
||||
}
|
||||
|
||||
/*
|
||||
* Reset the LZMA decoder and range decoder state. Dictionary is nore reset
|
||||
* Reset the LZMA decoder and range decoder state. Dictionary is not reset
|
||||
* here, because LZMA state may be reset without resetting the dictionary.
|
||||
*/
|
||||
static void lzma_reset(struct xz_dec_lzma2 *s)
|
||||
@ -774,6 +806,7 @@ static void lzma_reset(struct xz_dec_lzma2 *s)
|
||||
s->lzma.rep1 = 0;
|
||||
s->lzma.rep2 = 0;
|
||||
s->lzma.rep3 = 0;
|
||||
s->lzma.len = 0;
|
||||
|
||||
/*
|
||||
* All probabilities are initialized to the same value. This hack
|
||||
@ -1043,6 +1076,8 @@ XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
|
||||
|
||||
s->lzma2.sequence = SEQ_LZMA_PREPARE;
|
||||
|
||||
/* Fall through */
|
||||
|
||||
case SEQ_LZMA_PREPARE:
|
||||
if (s->lzma2.compressed < RC_INIT_BYTES)
|
||||
return XZ_DATA_ERROR;
|
||||
@ -1053,6 +1088,8 @@ XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
|
||||
s->lzma2.compressed -= RC_INIT_BYTES;
|
||||
s->lzma2.sequence = SEQ_LZMA_RUN;
|
||||
|
||||
/* Fall through */
|
||||
|
||||
case SEQ_LZMA_RUN:
|
||||
/*
|
||||
* Set dictionary limit to indicate how much we want
|
||||
@ -1142,6 +1179,7 @@ XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props)
|
||||
|
||||
if (DEC_IS_DYNALLOC(s->dict.mode)) {
|
||||
if (s->dict.allocated < s->dict.size) {
|
||||
s->dict.allocated = s->dict.size;
|
||||
vfree(s->dict.buf);
|
||||
s->dict.buf = vmalloc(s->dict.size);
|
||||
if (s->dict.buf == NULL) {
|
||||
@ -1152,8 +1190,6 @@ XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props)
|
||||
}
|
||||
}
|
||||
|
||||
s->lzma.len = 0;
|
||||
|
||||
s->lzma2.sequence = SEQ_CONTROL;
|
||||
s->lzma2.need_dict_reset = true;
|
||||
|
||||
@ -1169,3 +1205,140 @@ XZ_EXTERN void xz_dec_lzma2_end(struct xz_dec_lzma2 *s)
|
||||
|
||||
kfree(s);
|
||||
}
|
||||
|
||||
#ifdef XZ_DEC_MICROLZMA
|
||||
/* This is a wrapper struct to have a nice struct name in the public API. */
|
||||
struct xz_dec_microlzma {
|
||||
struct xz_dec_lzma2 s;
|
||||
};
|
||||
|
||||
enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s_ptr,
|
||||
struct xz_buf *b)
|
||||
{
|
||||
struct xz_dec_lzma2 *s = &s_ptr->s;
|
||||
|
||||
/*
|
||||
* sequence is SEQ_PROPERTIES before the first input byte,
|
||||
* SEQ_LZMA_PREPARE until a total of five bytes have been read,
|
||||
* and SEQ_LZMA_RUN for the rest of the input stream.
|
||||
*/
|
||||
if (s->lzma2.sequence != SEQ_LZMA_RUN) {
|
||||
if (s->lzma2.sequence == SEQ_PROPERTIES) {
|
||||
/* One byte is needed for the props. */
|
||||
if (b->in_pos >= b->in_size)
|
||||
return XZ_OK;
|
||||
|
||||
/*
|
||||
* Don't increment b->in_pos here. The same byte is
|
||||
* also passed to rc_read_init() which will ignore it.
|
||||
*/
|
||||
if (!lzma_props(s, ~b->in[b->in_pos]))
|
||||
return XZ_DATA_ERROR;
|
||||
|
||||
s->lzma2.sequence = SEQ_LZMA_PREPARE;
|
||||
}
|
||||
|
||||
/*
|
||||
* xz_dec_microlzma_reset() doesn't validate the compressed
|
||||
* size so we do it here. We have to limit the maximum size
|
||||
* to avoid integer overflows in lzma2_lzma(). 3 GiB is a nice
|
||||
* round number and much more than users of this code should
|
||||
* ever need.
|
||||
*/
|
||||
if (s->lzma2.compressed < RC_INIT_BYTES
|
||||
|| s->lzma2.compressed > (3U << 30))
|
||||
return XZ_DATA_ERROR;
|
||||
|
||||
if (!rc_read_init(&s->rc, b))
|
||||
return XZ_OK;
|
||||
|
||||
s->lzma2.compressed -= RC_INIT_BYTES;
|
||||
s->lzma2.sequence = SEQ_LZMA_RUN;
|
||||
|
||||
dict_reset(&s->dict, b);
|
||||
}
|
||||
|
||||
/* This is to allow increasing b->out_size between calls. */
|
||||
if (DEC_IS_SINGLE(s->dict.mode))
|
||||
s->dict.end = b->out_size - b->out_pos;
|
||||
|
||||
while (true) {
|
||||
dict_limit(&s->dict, min_t(size_t, b->out_size - b->out_pos,
|
||||
s->lzma2.uncompressed));
|
||||
|
||||
if (!lzma2_lzma(s, b))
|
||||
return XZ_DATA_ERROR;
|
||||
|
||||
s->lzma2.uncompressed -= dict_flush(&s->dict, b);
|
||||
|
||||
if (s->lzma2.uncompressed == 0) {
|
||||
if (s->lzma2.pedantic_microlzma) {
|
||||
if (s->lzma2.compressed > 0 || s->lzma.len > 0
|
||||
|| !rc_is_finished(&s->rc))
|
||||
return XZ_DATA_ERROR;
|
||||
}
|
||||
|
||||
return XZ_STREAM_END;
|
||||
}
|
||||
|
||||
if (b->out_pos == b->out_size)
|
||||
return XZ_OK;
|
||||
|
||||
if (b->in_pos == b->in_size
|
||||
&& s->temp.size < s->lzma2.compressed)
|
||||
return XZ_OK;
|
||||
}
|
||||
}
|
||||
|
||||
struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode,
|
||||
uint32_t dict_size)
|
||||
{
|
||||
struct xz_dec_microlzma *s;
|
||||
|
||||
/* Restrict dict_size to the same range as in the LZMA2 code. */
|
||||
if (dict_size < 4096 || dict_size > (3U << 30))
|
||||
return NULL;
|
||||
|
||||
s = kmalloc(sizeof(*s), GFP_KERNEL);
|
||||
if (s == NULL)
|
||||
return NULL;
|
||||
|
||||
s->s.dict.mode = mode;
|
||||
s->s.dict.size = dict_size;
|
||||
|
||||
if (DEC_IS_MULTI(mode)) {
|
||||
s->s.dict.end = dict_size;
|
||||
|
||||
s->s.dict.buf = vmalloc(dict_size);
|
||||
if (s->s.dict.buf == NULL) {
|
||||
kfree(s);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, uint32_t comp_size,
|
||||
uint32_t uncomp_size, int uncomp_size_is_exact)
|
||||
{
|
||||
/*
|
||||
* comp_size is validated in xz_dec_microlzma_run().
|
||||
* uncomp_size can safely be anything.
|
||||
*/
|
||||
s->s.lzma2.compressed = comp_size;
|
||||
s->s.lzma2.uncompressed = uncomp_size;
|
||||
s->s.lzma2.pedantic_microlzma = uncomp_size_is_exact;
|
||||
|
||||
s->s.lzma2.sequence = SEQ_PROPERTIES;
|
||||
s->s.temp.size = 0;
|
||||
}
|
||||
|
||||
void xz_dec_microlzma_end(struct xz_dec_microlzma *s)
|
||||
{
|
||||
if (DEC_IS_MULTI(s->s.dict.mode))
|
||||
vfree(s->s.dict.buf);
|
||||
|
||||
kfree(s);
|
||||
}
|
||||
#endif
|
||||
|
@ -35,7 +35,8 @@ struct xz_dec {
|
||||
SEQ_INDEX,
|
||||
SEQ_INDEX_PADDING,
|
||||
SEQ_INDEX_CRC32,
|
||||
SEQ_STREAM_FOOTER
|
||||
SEQ_STREAM_FOOTER,
|
||||
SEQ_STREAM_PADDING
|
||||
} sequence;
|
||||
|
||||
/* Position in variable-length integers and Check fields */
|
||||
@ -423,12 +424,12 @@ static enum xz_ret dec_stream_header(struct xz_dec *s)
|
||||
* check types too, but then the check won't be verified and
|
||||
* a warning (XZ_UNSUPPORTED_CHECK) will be given.
|
||||
*/
|
||||
if (s->temp.buf[HEADER_MAGIC_SIZE + 1] > XZ_CHECK_MAX)
|
||||
return XZ_OPTIONS_ERROR;
|
||||
|
||||
s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1];
|
||||
|
||||
#ifdef XZ_DEC_ANY_CHECK
|
||||
if (s->check_type > XZ_CHECK_MAX)
|
||||
return XZ_OPTIONS_ERROR;
|
||||
|
||||
if (s->check_type > XZ_CHECK_CRC32 && !IS_CRC64(s->check_type))
|
||||
return XZ_UNSUPPORTED_CHECK;
|
||||
#else
|
||||
@ -604,6 +605,8 @@ static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
|
||||
if (ret != XZ_OK)
|
||||
return ret;
|
||||
|
||||
/* Fall through */
|
||||
|
||||
case SEQ_BLOCK_START:
|
||||
/* We need one byte of input to continue. */
|
||||
if (b->in_pos == b->in_size)
|
||||
@ -627,6 +630,8 @@ static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
|
||||
s->temp.pos = 0;
|
||||
s->sequence = SEQ_BLOCK_HEADER;
|
||||
|
||||
/* Fall through */
|
||||
|
||||
case SEQ_BLOCK_HEADER:
|
||||
if (!fill_temp(s, b))
|
||||
return XZ_OK;
|
||||
@ -637,6 +642,8 @@ static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
|
||||
|
||||
s->sequence = SEQ_BLOCK_UNCOMPRESS;
|
||||
|
||||
/* Fall through */
|
||||
|
||||
case SEQ_BLOCK_UNCOMPRESS:
|
||||
ret = dec_block(s, b);
|
||||
if (ret != XZ_STREAM_END)
|
||||
@ -644,6 +651,8 @@ static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
|
||||
|
||||
s->sequence = SEQ_BLOCK_PADDING;
|
||||
|
||||
/* Fall through */
|
||||
|
||||
case SEQ_BLOCK_PADDING:
|
||||
/*
|
||||
* Size of Compressed Data + Block Padding
|
||||
@ -664,6 +673,8 @@ static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
|
||||
|
||||
s->sequence = SEQ_BLOCK_CHECK;
|
||||
|
||||
/* Fall through */
|
||||
|
||||
case SEQ_BLOCK_CHECK:
|
||||
if (s->check_type == XZ_CHECK_CRC32) {
|
||||
ret = crc_validate(s, b, 32);
|
||||
@ -691,6 +702,8 @@ static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
|
||||
|
||||
s->sequence = SEQ_INDEX_PADDING;
|
||||
|
||||
/* Fall through */
|
||||
|
||||
case SEQ_INDEX_PADDING:
|
||||
while ((s->index.size + (b->in_pos - s->in_start))
|
||||
& 3) {
|
||||
@ -713,6 +726,8 @@ static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
|
||||
|
||||
s->sequence = SEQ_INDEX_CRC32;
|
||||
|
||||
/* Fall through */
|
||||
|
||||
case SEQ_INDEX_CRC32:
|
||||
ret = crc_validate(s, b, 32);
|
||||
if (ret != XZ_STREAM_END)
|
||||
@ -721,11 +736,17 @@ static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
|
||||
s->temp.size = STREAM_HEADER_SIZE;
|
||||
s->sequence = SEQ_STREAM_FOOTER;
|
||||
|
||||
/* Fall through */
|
||||
|
||||
case SEQ_STREAM_FOOTER:
|
||||
if (!fill_temp(s, b))
|
||||
return XZ_OK;
|
||||
|
||||
return dec_stream_footer(s);
|
||||
|
||||
case SEQ_STREAM_PADDING:
|
||||
/* Never reached, only silencing a warning */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -793,6 +814,79 @@ XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b)
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef XZ_DEC_CONCATENATED
|
||||
XZ_EXTERN enum xz_ret xz_dec_catrun(struct xz_dec *s, struct xz_buf *b,
|
||||
int finish)
|
||||
{
|
||||
enum xz_ret ret;
|
||||
|
||||
if (DEC_IS_SINGLE(s->mode)) {
|
||||
xz_dec_reset(s);
|
||||
finish = true;
|
||||
}
|
||||
|
||||
while (true) {
|
||||
if (s->sequence == SEQ_STREAM_PADDING) {
|
||||
/*
|
||||
* Skip Stream Padding. Its size must be a multiple
|
||||
* of four bytes which is tracked with s->pos.
|
||||
*/
|
||||
while (true) {
|
||||
if (b->in_pos == b->in_size) {
|
||||
/*
|
||||
* Note that if we are repeatedly
|
||||
* given no input and finish is false,
|
||||
* we will keep returning XZ_OK even
|
||||
* though no progress is being made.
|
||||
* The lack of XZ_BUF_ERROR support
|
||||
* isn't a problem here because a
|
||||
* reasonable caller will eventually
|
||||
* provide more input or set finish
|
||||
* to true.
|
||||
*/
|
||||
if (!finish)
|
||||
return XZ_OK;
|
||||
|
||||
if (s->pos != 0)
|
||||
return XZ_DATA_ERROR;
|
||||
|
||||
return XZ_STREAM_END;
|
||||
}
|
||||
|
||||
if (b->in[b->in_pos] != 0x00) {
|
||||
if (s->pos != 0)
|
||||
return XZ_DATA_ERROR;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
++b->in_pos;
|
||||
s->pos = (s->pos + 1) & 3;
|
||||
}
|
||||
|
||||
/*
|
||||
* More input remains. It should be a new Stream.
|
||||
*
|
||||
* In single-call mode xz_dec_run() will always call
|
||||
* xz_dec_reset(). Thus, we need to do it here only
|
||||
* in multi-call mode.
|
||||
*/
|
||||
if (DEC_IS_MULTI(s->mode))
|
||||
xz_dec_reset(s);
|
||||
}
|
||||
|
||||
ret = xz_dec_run(s, b);
|
||||
|
||||
if (ret != XZ_STREAM_END)
|
||||
break;
|
||||
|
||||
s->sequence = SEQ_STREAM_PADDING;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max)
|
||||
{
|
||||
struct xz_dec *s = kmalloc(sizeof(*s), GFP_KERNEL);
|
||||
|
@ -15,8 +15,15 @@ EXPORT_SYMBOL(xz_dec_reset);
|
||||
EXPORT_SYMBOL(xz_dec_run);
|
||||
EXPORT_SYMBOL(xz_dec_end);
|
||||
|
||||
#ifdef CONFIG_XZ_DEC_MICROLZMA
|
||||
EXPORT_SYMBOL(xz_dec_microlzma_alloc);
|
||||
EXPORT_SYMBOL(xz_dec_microlzma_reset);
|
||||
EXPORT_SYMBOL(xz_dec_microlzma_run);
|
||||
EXPORT_SYMBOL(xz_dec_microlzma_end);
|
||||
#endif
|
||||
|
||||
MODULE_DESCRIPTION("XZ decompressor");
|
||||
MODULE_VERSION("1.0");
|
||||
MODULE_VERSION("1.1");
|
||||
MODULE_AUTHOR("Lasse Collin <lasse.collin@tukaani.org> and Igor Pavlov");
|
||||
|
||||
/*
|
||||
|
@ -2,7 +2,7 @@
|
||||
* LZMA2 definitions
|
||||
*
|
||||
* Authors: Lasse Collin <lasse.collin@tukaani.org>
|
||||
* Igor Pavlov <http://7-zip.org/>
|
||||
* Igor Pavlov <https://7-zip.org/>
|
||||
*
|
||||
* This file has been put into the public domain.
|
||||
* You can do whatever you want with this file.
|
||||
|
@ -37,6 +37,9 @@
|
||||
# ifdef CONFIG_XZ_DEC_SPARC
|
||||
# define XZ_DEC_SPARC
|
||||
# endif
|
||||
# ifdef CONFIG_XZ_DEC_MICROLZMA
|
||||
# define XZ_DEC_MICROLZMA
|
||||
# endif
|
||||
# define memeq(a, b, size) (memcmp(a, b, size) == 0)
|
||||
# define memzero(buf, size) memset(buf, 0, size)
|
||||
# endif
|
||||
|
@ -19,7 +19,7 @@
|
||||
|
||||
/*
|
||||
* See the .xz file format specification at
|
||||
* http://tukaani.org/xz/xz-file-format.txt
|
||||
* https://tukaani.org/xz/xz-file-format.txt
|
||||
* to understand the container format.
|
||||
*/
|
||||
|
||||
|
@ -7,11 +7,14 @@
|
||||
# You can do whatever you want with this file.
|
||||
#
|
||||
|
||||
# gcc -std=gnu89 is used because Linux uses it. It is fine to omit it as
|
||||
# the code is also C99/C11 compatible. With clang you may wish to omit
|
||||
# either -std=gnu89 or -pedantic as having both gives quite a few warnings.
|
||||
CC = gcc -std=gnu89
|
||||
BCJ_CPPFLAGS = -DXZ_DEC_X86 -DXZ_DEC_POWERPC -DXZ_DEC_IA64 \
|
||||
-DXZ_DEC_ARM -DXZ_DEC_ARMTHUMB -DXZ_DEC_SPARC
|
||||
CPPFLAGS = -DXZ_USE_CRC64 -DXZ_DEC_ANY_CHECK
|
||||
CFLAGS = -ggdb3 -O2 -pedantic -Wall -Wextra
|
||||
CPPFLAGS = -DXZ_USE_CRC64 -DXZ_DEC_ANY_CHECK -DXZ_DEC_CONCATENATED
|
||||
CFLAGS = -ggdb3 -O2 -pedantic -Wall -Wextra -Wno-long-long
|
||||
RM = rm -f
|
||||
VPATH = ../linux/include/linux ../linux/lib/xz
|
||||
COMMON_SRCS = xz_crc32.c xz_crc64.c xz_dec_stream.c xz_dec_lzma2.c xz_dec_bcj.c
|
||||
@ -44,5 +47,5 @@ boottest: $(BOOTTEST_OBJS) $(COMMON_SRCS)
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
-$(RM) $(COMMON_OBJS) $(XZMINIDEC_OBJS) $(BUFTEST_OBJS) \
|
||||
$(BOOTTEST_OBJS) $(PROGRAMS)
|
||||
-$(RM) $(COMMON_OBJS) $(XZMINIDEC_OBJS) $(BYTETEST_OBJS) \
|
||||
$(BUFTEST_OBJS) $(BOOTTEST_OBJS) $(PROGRAMS)
|
||||
|
@ -27,13 +27,14 @@ int main(void)
|
||||
|
||||
s = xz_dec_init(XZ_SINGLE, 0);
|
||||
if (s == NULL) {
|
||||
fputs("Initialization failed", stderr);
|
||||
fputs("Initialization failed\n", stderr);
|
||||
return 1;
|
||||
}
|
||||
|
||||
b.in = in;
|
||||
b.in_pos = 0;
|
||||
b.in_size = fread(in, 1, sizeof(in), stdin);
|
||||
|
||||
b.out = out;
|
||||
b.out_pos = 0;
|
||||
b.out_size = sizeof(out);
|
||||
|
@ -25,7 +25,7 @@ int main(int argc, char **argv)
|
||||
size_t uncomp_size;
|
||||
|
||||
if (argc != 2) {
|
||||
fputs("Give uncompressed size as the argument", stderr);
|
||||
fputs("Give uncompressed size as the argument\n", stderr);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -10,6 +10,9 @@
|
||||
#ifndef XZ_CONFIG_H
|
||||
#define XZ_CONFIG_H
|
||||
|
||||
/* Uncomment to enable building of xz_dec_catrun(). */
|
||||
/* #define XZ_DEC_CONCATENATED */
|
||||
|
||||
/* Uncomment to enable CRC64 support. */
|
||||
/* #define XZ_USE_CRC64 */
|
||||
|
||||
|
@ -61,10 +61,22 @@ int main(int argc, char **argv)
|
||||
while (true) {
|
||||
if (b.in_pos == b.in_size) {
|
||||
b.in_size = fread(in, 1, sizeof(in), stdin);
|
||||
|
||||
if (ferror(stdin)) {
|
||||
msg = "Read error\n";
|
||||
goto error;
|
||||
}
|
||||
|
||||
b.in_pos = 0;
|
||||
}
|
||||
|
||||
ret = xz_dec_run(s, &b);
|
||||
/*
|
||||
* There are a few ways to set the "finish" (the third)
|
||||
* argument. We could use feof(stdin) but testing in_size
|
||||
* is fine too and may also work in applications that don't
|
||||
* use FILEs.
|
||||
*/
|
||||
ret = xz_dec_catrun(s, &b, b.in_size == 0);
|
||||
|
||||
if (b.out_pos == sizeof(out)) {
|
||||
if (fwrite(out, 1, b.out_pos, stdout) != b.out_pos) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user