From 1008b7c6744a190c5b15aaf8cab1054671e4c7c3 Mon Sep 17 00:00:00 2001 From: mm Date: Fri, 7 May 2010 18:59:06 +0000 Subject: [PATCH] Vendor import of xz (stripped) Git revision: a290cfee3e23f046889c022aa96b4eca2016fdda Approved by: delphij (mentor) --- AUTHORS | 27 + COPYING | 65 + ChangeLog | 4716 +++++++++++++++++ README | 218 + THANKS | 67 + TODO | 60 + po/LINGUAS | 1 + po/Makevars | 46 + po/POTFILES.in | 10 + po/cs.po | 636 +++ src/common/mythread.h | 42 + src/common/sysdefs.h | 171 + src/common/tuklib_common.h | 71 + src/common/tuklib_config.h | 7 + src/common/tuklib_cpucores.c | 52 + src/common/tuklib_cpucores.h | 23 + src/common/tuklib_exit.c | 57 + src/common/tuklib_exit.h | 25 + src/common/tuklib_gettext.h | 44 + src/common/tuklib_integer.h | 523 ++ src/common/tuklib_open_stdxxx.c | 55 + src/common/tuklib_open_stdxxx.h | 23 + src/common/tuklib_physmem.c | 165 + src/common/tuklib_physmem.h | 28 + src/common/tuklib_progname.c | 50 + src/common/tuklib_progname.h | 32 + src/liblzma/api/lzma.h | 326 ++ src/liblzma/api/lzma/base.h | 596 +++ src/liblzma/api/lzma/bcj.h | 90 + src/liblzma/api/lzma/block.h | 529 ++ src/liblzma/api/lzma/check.h | 150 + src/liblzma/api/lzma/container.h | 404 ++ src/liblzma/api/lzma/delta.h | 77 + src/liblzma/api/lzma/filter.h | 421 ++ src/liblzma/api/lzma/hardware.h | 51 + src/liblzma/api/lzma/index.h | 677 +++ src/liblzma/api/lzma/index_hash.h | 107 + src/liblzma/api/lzma/lzma.h | 397 ++ src/liblzma/api/lzma/stream_flags.h | 227 + src/liblzma/api/lzma/subblock.h | 200 + src/liblzma/api/lzma/version.h | 121 + src/liblzma/api/lzma/vli.h | 168 + src/liblzma/check/check.c | 174 + src/liblzma/check/check.h | 95 + src/liblzma/check/crc32_fast.c | 82 + src/liblzma/check/crc32_small.c | 61 + src/liblzma/check/crc32_table.c | 19 + src/liblzma/check/crc32_table_be.h | 525 ++ src/liblzma/check/crc32_table_le.h | 525 ++ src/liblzma/check/crc32_tablegen.c | 117 + src/liblzma/check/crc32_x86.S | 304 ++ src/liblzma/check/crc64_fast.c | 72 + src/liblzma/check/crc64_small.c | 53 + src/liblzma/check/crc64_table.c | 19 + src/liblzma/check/crc64_table_be.h | 521 ++ src/liblzma/check/crc64_table_le.h | 521 ++ src/liblzma/check/crc64_tablegen.c | 88 + src/liblzma/check/crc64_x86.S | 287 + src/liblzma/check/crc_macros.h | 30 + src/liblzma/check/sha256.c | 201 + src/liblzma/common/alone_decoder.c | 232 + src/liblzma/common/alone_decoder.h | 22 + src/liblzma/common/alone_encoder.c | 157 + src/liblzma/common/auto_decoder.c | 186 + src/liblzma/common/block_buffer_decoder.c | 80 + src/liblzma/common/block_buffer_encoder.c | 299 ++ src/liblzma/common/block_decoder.c | 242 + src/liblzma/common/block_decoder.h | 22 + src/liblzma/common/block_encoder.c | 212 + src/liblzma/common/block_encoder.h | 47 + src/liblzma/common/block_header_decoder.c | 116 + src/liblzma/common/block_header_encoder.c | 132 + src/liblzma/common/block_util.c | 90 + src/liblzma/common/chunk_size.c | 67 + src/liblzma/common/common.c | 374 ++ src/liblzma/common/common.h | 290 + src/liblzma/common/easy_buffer_encoder.c | 27 + src/liblzma/common/easy_decoder_memusage.c | 24 + src/liblzma/common/easy_encoder.c | 25 + src/liblzma/common/easy_encoder_memusage.c | 24 + src/liblzma/common/easy_preset.c | 27 + src/liblzma/common/easy_preset.h | 32 + src/liblzma/common/filter_buffer_decoder.c | 87 + src/liblzma/common/filter_buffer_encoder.c | 54 + src/liblzma/common/filter_common.c | 346 ++ src/liblzma/common/filter_common.h | 48 + src/liblzma/common/filter_decoder.c | 199 + src/liblzma/common/filter_decoder.h | 23 + src/liblzma/common/filter_encoder.c | 298 ++ src/liblzma/common/filter_encoder.h | 27 + src/liblzma/common/filter_flags_decoder.c | 46 + src/liblzma/common/filter_flags_encoder.c | 56 + src/liblzma/common/hardware_physmem.c | 25 + src/liblzma/common/index.c | 1241 +++++ src/liblzma/common/index.h | 73 + src/liblzma/common/index_decoder.c | 343 ++ src/liblzma/common/index_encoder.c | 252 + src/liblzma/common/index_encoder.h | 23 + src/liblzma/common/index_hash.c | 332 ++ src/liblzma/common/stream_buffer_decoder.c | 91 + src/liblzma/common/stream_buffer_encoder.c | 131 + src/liblzma/common/stream_decoder.c | 451 ++ src/liblzma/common/stream_decoder.h | 21 + src/liblzma/common/stream_encoder.c | 331 ++ src/liblzma/common/stream_encoder.h | 23 + src/liblzma/common/stream_flags_common.c | 47 + src/liblzma/common/stream_flags_common.h | 33 + src/liblzma/common/stream_flags_decoder.c | 82 + src/liblzma/common/stream_flags_encoder.c | 86 + src/liblzma/common/vli_decoder.c | 86 + src/liblzma/common/vli_encoder.c | 69 + src/liblzma/common/vli_size.c | 30 + src/liblzma/delta/delta_common.c | 70 + src/liblzma/delta/delta_common.h | 20 + src/liblzma/delta/delta_decoder.c | 76 + src/liblzma/delta/delta_decoder.h | 25 + src/liblzma/delta/delta_encoder.c | 121 + src/liblzma/delta/delta_encoder.h | 23 + src/liblzma/delta/delta_private.h | 37 + src/liblzma/lz/lz_decoder.c | 299 ++ src/liblzma/lz/lz_decoder.h | 234 + src/liblzma/lz/lz_encoder.c | 578 ++ src/liblzma/lz/lz_encoder.h | 328 ++ src/liblzma/lz/lz_encoder_hash.h | 108 + src/liblzma/lz/lz_encoder_hash_table.h | 68 + src/liblzma/lz/lz_encoder_mf.c | 753 +++ src/liblzma/lzma/fastpos.h | 140 + src/liblzma/lzma/fastpos_table.c | 519 ++ src/liblzma/lzma/fastpos_tablegen.c | 56 + src/liblzma/lzma/lzma2_decoder.c | 305 ++ src/liblzma/lzma/lzma2_decoder.h | 28 + src/liblzma/lzma/lzma2_encoder.c | 393 ++ src/liblzma/lzma/lzma2_encoder.h | 41 + src/liblzma/lzma/lzma_common.h | 223 + src/liblzma/lzma/lzma_decoder.c | 1057 ++++ src/liblzma/lzma/lzma_decoder.h | 52 + src/liblzma/lzma/lzma_encoder.c | 675 +++ src/liblzma/lzma/lzma_encoder.h | 54 + src/liblzma/lzma/lzma_encoder_optimum_fast.c | 179 + .../lzma/lzma_encoder_optimum_normal.c | 868 +++ src/liblzma/lzma/lzma_encoder_presets.c | 52 + src/liblzma/lzma/lzma_encoder_private.h | 148 + src/liblzma/rangecoder/price.h | 92 + src/liblzma/rangecoder/price_table.c | 22 + src/liblzma/rangecoder/price_tablegen.c | 87 + src/liblzma/rangecoder/range_common.h | 73 + src/liblzma/rangecoder/range_decoder.h | 179 + src/liblzma/rangecoder/range_encoder.h | 231 + src/liblzma/simple/arm.c | 69 + src/liblzma/simple/armthumb.c | 74 + src/liblzma/simple/ia64.c | 110 + src/liblzma/simple/powerpc.c | 73 + src/liblzma/simple/simple_coder.c | 280 + src/liblzma/simple/simple_coder.h | 60 + src/liblzma/simple/simple_decoder.c | 40 + src/liblzma/simple/simple_decoder.h | 22 + src/liblzma/simple/simple_encoder.c | 38 + src/liblzma/simple/simple_encoder.h | 23 + src/liblzma/simple/simple_private.h | 76 + src/liblzma/simple/sparc.c | 81 + src/liblzma/simple/x86.c | 154 + src/liblzma/subblock/subblock_decoder.c | 630 +++ src/liblzma/subblock/subblock_decoder.h | 22 + .../subblock/subblock_decoder_helper.c | 70 + .../subblock/subblock_decoder_helper.h | 29 + src/liblzma/subblock/subblock_encoder.c | 984 ++++ src/liblzma/subblock/subblock_encoder.h | 21 + src/lzmainfo/lzmainfo.1 | 55 + src/lzmainfo/lzmainfo.c | 210 + src/xz/args.c | 549 ++ src/xz/args.h | 42 + src/xz/coder.c | 659 +++ src/xz/coder.h | 57 + src/xz/file_io.c | 957 ++++ src/xz/file_io.h | 129 + src/xz/hardware.c | 112 + src/xz/hardware.h | 35 + src/xz/list.c | 742 +++ src/xz/list.h | 18 + src/xz/main.c | 272 + src/xz/main.h | 30 + src/xz/message.c | 1189 +++++ src/xz/message.h | 151 + src/xz/options.c | 435 ++ src/xz/options.h | 38 + src/xz/private.h | 51 + src/xz/signals.c | 189 + src/xz/signals.h | 43 + src/xz/suffix.c | 211 + src/xz/suffix.h | 28 + src/xz/util.c | 314 ++ src/xz/util.h | 129 + src/xz/xz.1 | 1351 +++++ src/xzdec/xzdec.1 | 168 + src/xzdec/xzdec.c | 482 ++ 195 files changed, 42846 insertions(+) create mode 100644 AUTHORS create mode 100644 COPYING create mode 100644 ChangeLog create mode 100644 README create mode 100644 THANKS create mode 100644 TODO create mode 100644 po/LINGUAS create mode 100644 po/Makevars create mode 100644 po/POTFILES.in create mode 100644 po/cs.po create mode 100644 src/common/mythread.h create mode 100644 src/common/sysdefs.h create mode 100644 src/common/tuklib_common.h create mode 100644 src/common/tuklib_config.h create mode 100644 src/common/tuklib_cpucores.c create mode 100644 src/common/tuklib_cpucores.h create mode 100644 src/common/tuklib_exit.c create mode 100644 src/common/tuklib_exit.h create mode 100644 src/common/tuklib_gettext.h create mode 100644 src/common/tuklib_integer.h create mode 100644 src/common/tuklib_open_stdxxx.c create mode 100644 src/common/tuklib_open_stdxxx.h create mode 100644 src/common/tuklib_physmem.c create mode 100644 src/common/tuklib_physmem.h create mode 100644 src/common/tuklib_progname.c create mode 100644 src/common/tuklib_progname.h create mode 100644 src/liblzma/api/lzma.h create mode 100644 src/liblzma/api/lzma/base.h create mode 100644 src/liblzma/api/lzma/bcj.h create mode 100644 src/liblzma/api/lzma/block.h create mode 100644 src/liblzma/api/lzma/check.h create mode 100644 src/liblzma/api/lzma/container.h create mode 100644 src/liblzma/api/lzma/delta.h create mode 100644 src/liblzma/api/lzma/filter.h create mode 100644 src/liblzma/api/lzma/hardware.h create mode 100644 src/liblzma/api/lzma/index.h create mode 100644 src/liblzma/api/lzma/index_hash.h create mode 100644 src/liblzma/api/lzma/lzma.h create mode 100644 src/liblzma/api/lzma/stream_flags.h create mode 100644 src/liblzma/api/lzma/subblock.h create mode 100644 src/liblzma/api/lzma/version.h create mode 100644 src/liblzma/api/lzma/vli.h create mode 100644 src/liblzma/check/check.c create mode 100644 src/liblzma/check/check.h create mode 100644 src/liblzma/check/crc32_fast.c create mode 100644 src/liblzma/check/crc32_small.c create mode 100644 src/liblzma/check/crc32_table.c create mode 100644 src/liblzma/check/crc32_table_be.h create mode 100644 src/liblzma/check/crc32_table_le.h create mode 100644 src/liblzma/check/crc32_tablegen.c create mode 100644 src/liblzma/check/crc32_x86.S create mode 100644 src/liblzma/check/crc64_fast.c create mode 100644 src/liblzma/check/crc64_small.c create mode 100644 src/liblzma/check/crc64_table.c create mode 100644 src/liblzma/check/crc64_table_be.h create mode 100644 src/liblzma/check/crc64_table_le.h create mode 100644 src/liblzma/check/crc64_tablegen.c create mode 100644 src/liblzma/check/crc64_x86.S create mode 100644 src/liblzma/check/crc_macros.h create mode 100644 src/liblzma/check/sha256.c create mode 100644 src/liblzma/common/alone_decoder.c create mode 100644 src/liblzma/common/alone_decoder.h create mode 100644 src/liblzma/common/alone_encoder.c create mode 100644 src/liblzma/common/auto_decoder.c create mode 100644 src/liblzma/common/block_buffer_decoder.c create mode 100644 src/liblzma/common/block_buffer_encoder.c create mode 100644 src/liblzma/common/block_decoder.c create mode 100644 src/liblzma/common/block_decoder.h create mode 100644 src/liblzma/common/block_encoder.c create mode 100644 src/liblzma/common/block_encoder.h create mode 100644 src/liblzma/common/block_header_decoder.c create mode 100644 src/liblzma/common/block_header_encoder.c create mode 100644 src/liblzma/common/block_util.c create mode 100644 src/liblzma/common/chunk_size.c create mode 100644 src/liblzma/common/common.c create mode 100644 src/liblzma/common/common.h create mode 100644 src/liblzma/common/easy_buffer_encoder.c create mode 100644 src/liblzma/common/easy_decoder_memusage.c create mode 100644 src/liblzma/common/easy_encoder.c create mode 100644 src/liblzma/common/easy_encoder_memusage.c create mode 100644 src/liblzma/common/easy_preset.c create mode 100644 src/liblzma/common/easy_preset.h create mode 100644 src/liblzma/common/filter_buffer_decoder.c create mode 100644 src/liblzma/common/filter_buffer_encoder.c create mode 100644 src/liblzma/common/filter_common.c create mode 100644 src/liblzma/common/filter_common.h create mode 100644 src/liblzma/common/filter_decoder.c create mode 100644 src/liblzma/common/filter_decoder.h create mode 100644 src/liblzma/common/filter_encoder.c create mode 100644 src/liblzma/common/filter_encoder.h create mode 100644 src/liblzma/common/filter_flags_decoder.c create mode 100644 src/liblzma/common/filter_flags_encoder.c create mode 100644 src/liblzma/common/hardware_physmem.c create mode 100644 src/liblzma/common/index.c create mode 100644 src/liblzma/common/index.h create mode 100644 src/liblzma/common/index_decoder.c create mode 100644 src/liblzma/common/index_encoder.c create mode 100644 src/liblzma/common/index_encoder.h create mode 100644 src/liblzma/common/index_hash.c create mode 100644 src/liblzma/common/stream_buffer_decoder.c create mode 100644 src/liblzma/common/stream_buffer_encoder.c create mode 100644 src/liblzma/common/stream_decoder.c create mode 100644 src/liblzma/common/stream_decoder.h create mode 100644 src/liblzma/common/stream_encoder.c create mode 100644 src/liblzma/common/stream_encoder.h create mode 100644 src/liblzma/common/stream_flags_common.c create mode 100644 src/liblzma/common/stream_flags_common.h create mode 100644 src/liblzma/common/stream_flags_decoder.c create mode 100644 src/liblzma/common/stream_flags_encoder.c create mode 100644 src/liblzma/common/vli_decoder.c create mode 100644 src/liblzma/common/vli_encoder.c create mode 100644 src/liblzma/common/vli_size.c create mode 100644 src/liblzma/delta/delta_common.c create mode 100644 src/liblzma/delta/delta_common.h create mode 100644 src/liblzma/delta/delta_decoder.c create mode 100644 src/liblzma/delta/delta_decoder.h create mode 100644 src/liblzma/delta/delta_encoder.c create mode 100644 src/liblzma/delta/delta_encoder.h create mode 100644 src/liblzma/delta/delta_private.h create mode 100644 src/liblzma/lz/lz_decoder.c create mode 100644 src/liblzma/lz/lz_decoder.h create mode 100644 src/liblzma/lz/lz_encoder.c create mode 100644 src/liblzma/lz/lz_encoder.h create mode 100644 src/liblzma/lz/lz_encoder_hash.h create mode 100644 src/liblzma/lz/lz_encoder_hash_table.h create mode 100644 src/liblzma/lz/lz_encoder_mf.c create mode 100644 src/liblzma/lzma/fastpos.h create mode 100644 src/liblzma/lzma/fastpos_table.c create mode 100644 src/liblzma/lzma/fastpos_tablegen.c create mode 100644 src/liblzma/lzma/lzma2_decoder.c create mode 100644 src/liblzma/lzma/lzma2_decoder.h create mode 100644 src/liblzma/lzma/lzma2_encoder.c create mode 100644 src/liblzma/lzma/lzma2_encoder.h create mode 100644 src/liblzma/lzma/lzma_common.h create mode 100644 src/liblzma/lzma/lzma_decoder.c create mode 100644 src/liblzma/lzma/lzma_decoder.h create mode 100644 src/liblzma/lzma/lzma_encoder.c create mode 100644 src/liblzma/lzma/lzma_encoder.h create mode 100644 src/liblzma/lzma/lzma_encoder_optimum_fast.c create mode 100644 src/liblzma/lzma/lzma_encoder_optimum_normal.c create mode 100644 src/liblzma/lzma/lzma_encoder_presets.c create mode 100644 src/liblzma/lzma/lzma_encoder_private.h create mode 100644 src/liblzma/rangecoder/price.h create mode 100644 src/liblzma/rangecoder/price_table.c create mode 100644 src/liblzma/rangecoder/price_tablegen.c create mode 100644 src/liblzma/rangecoder/range_common.h create mode 100644 src/liblzma/rangecoder/range_decoder.h create mode 100644 src/liblzma/rangecoder/range_encoder.h create mode 100644 src/liblzma/simple/arm.c create mode 100644 src/liblzma/simple/armthumb.c create mode 100644 src/liblzma/simple/ia64.c create mode 100644 src/liblzma/simple/powerpc.c create mode 100644 src/liblzma/simple/simple_coder.c create mode 100644 src/liblzma/simple/simple_coder.h create mode 100644 src/liblzma/simple/simple_decoder.c create mode 100644 src/liblzma/simple/simple_decoder.h create mode 100644 src/liblzma/simple/simple_encoder.c create mode 100644 src/liblzma/simple/simple_encoder.h create mode 100644 src/liblzma/simple/simple_private.h create mode 100644 src/liblzma/simple/sparc.c create mode 100644 src/liblzma/simple/x86.c create mode 100644 src/liblzma/subblock/subblock_decoder.c create mode 100644 src/liblzma/subblock/subblock_decoder.h create mode 100644 src/liblzma/subblock/subblock_decoder_helper.c create mode 100644 src/liblzma/subblock/subblock_decoder_helper.h create mode 100644 src/liblzma/subblock/subblock_encoder.c create mode 100644 src/liblzma/subblock/subblock_encoder.h create mode 100644 src/lzmainfo/lzmainfo.1 create mode 100644 src/lzmainfo/lzmainfo.c create mode 100644 src/xz/args.c create mode 100644 src/xz/args.h create mode 100644 src/xz/coder.c create mode 100644 src/xz/coder.h create mode 100644 src/xz/file_io.c create mode 100644 src/xz/file_io.h create mode 100644 src/xz/hardware.c create mode 100644 src/xz/hardware.h create mode 100644 src/xz/list.c create mode 100644 src/xz/list.h create mode 100644 src/xz/main.c create mode 100644 src/xz/main.h create mode 100644 src/xz/message.c create mode 100644 src/xz/message.h create mode 100644 src/xz/options.c create mode 100644 src/xz/options.h create mode 100644 src/xz/private.h create mode 100644 src/xz/signals.c create mode 100644 src/xz/signals.h create mode 100644 src/xz/suffix.c create mode 100644 src/xz/suffix.h create mode 100644 src/xz/util.c create mode 100644 src/xz/util.h create mode 100644 src/xz/xz.1 create mode 100644 src/xzdec/xzdec.1 create mode 100644 src/xzdec/xzdec.c diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 000000000000..63a9815bc035 --- /dev/null +++ b/AUTHORS @@ -0,0 +1,27 @@ + +Authors of XZ Utils +=================== + + XZ Utils is developed and maintained by Lasse Collin + . + + Major parts of liblzma are based on code written by Igor Pavlov, + specifically the LZMA SDK . Without + this code, XZ Utils wouldn't exist. + + The SHA-256 implementation in liblzma is based on the code found from + 7-Zip , which has a modified version of the SHA-256 + code found from Crypto++ . The SHA-256 code + in Crypto++ was written by Kevin Springle and Wei Dai. + + Some scripts have been adapted from gzip. The original versions + were written by Jean-loup Gailly, Charles Levert, and Paul Eggert. + Andrew Dudman helped adapting the script and their man pages for + XZ Utils. + + The GNU Autotools based build system contains files from many authors, + which I'm not trying list here. + + Several people have contributed fixes or reported bugs. Most of them + are mentioned in the file THANKS. + diff --git a/COPYING b/COPYING new file mode 100644 index 000000000000..43c90d0598c5 --- /dev/null +++ b/COPYING @@ -0,0 +1,65 @@ + +XZ Utils Licensing +================== + + Different licenses apply to different files in this package. Here + is a rough summary of which licenses apply to which parts of this + package (but check the individual files to be sure!): + + - liblzma is in the public domain. + + - xz, xzdec, and lzmadec command line tools are in the public + domain unless GNU getopt_long had to be compiled and linked + in from the lib directory. The getopt_long code is under + GNU LGPLv2.1+. + + - The scripts to grep, diff, and view compressed files have been + adapted from gzip. These scripts and their documentation are + under GNU GPLv2+. + + - All the documentation in the doc directory and most of the + XZ Utils specific documentation files in other directories + are in the public domain. + + - Translated messages are in the public domain. + + - The build system contains public domain files, and files that + are under GNU GPLv2+ or GNU GPLv3+. None of these files end up + in the binaries being built. + + - Test files and test code in the tests directory, and debugging + utilities in the debug directory are in the public domain. + + - The extra directory may contain public domain files, and files + that are under various free software licenses. + + You can do whatever you want with the files that have been put into + the public domain. If you find public domain legally problematic, + take the previous sentence as a license grant. If you still find + the lack of copyright legally problematic, you have too many + lawyers. + + As usual, this software is provided "as is", without any warranty. + + If you copy significant amounts of public domain code from XZ Utils + into your project, acknowledging this somewhere in your software is + polite (especially if it is proprietary, non-free software), but + naturally it is not legally required. Here is an example of a good + notice to put into "about box" or into documentation: + + This software includes code from XZ Utils . + + The following license texts are included in the following files: + - COPYING.LGPLv2.1: GNU Lesser General Public License version 2.1 + - COPYING.GPLv2: GNU General Public License version 2 + - COPYING.GPLv3: GNU General Public License version 3 + + Note that the toolchain (compiler, linker etc.) may add some code + pieces that are copyrighted. Thus, it is possible that e.g. liblzma + binary wouldn't actually be in the public domain in its entirety + even though it contains no copyrighted code from the XZ Utils source + package. + + If you have questions, don't hesitate to ask the author(s) for more + information. + diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 000000000000..a3cb84ad49bd --- /dev/null +++ b/ChangeLog @@ -0,0 +1,4716 @@ +commit a290cfee3e23f046889c022aa96b4eca2016fdda +Author: Lasse Collin +Date: Mon Apr 12 21:55:56 2010 +0300 + + Show both elapsed time and estimated remaining time in xz -v. + + The extra space for showing both has been taken from the + sizes field. If the sizes grow big, bigger units than MiB + will be used. It makes it slightly difficult to see that + progress is still happening with huge files, but it should + be OK in practice. + + Thanks to Trent W. Buck for + and Jonathan Nieder for suggestions how to fix it. + +commit a1f7a986b8d708f9290da9799ca1b8d7082fad3e +Author: Lasse Collin +Date: Wed Mar 31 16:47:25 2010 +0300 + + Add a simple tip to faq.txt about tar and xz. + + Thanks to Gilles Espinasse. + +commit c737eec91d200d730aa82662affd6b06ebb0bff0 +Author: Lasse Collin +Date: Mon Mar 22 21:03:03 2010 +0200 + + Updated THANKS. + +commit f4b2b52624b802c786e4e2a8eb6895794dd93b24 +Author: Lasse Collin +Date: Sun Mar 7 19:52:25 2010 +0200 + + Fix xzgrep to not break if filenames have spaces or quotes. + + Thanks to someone who reported the bug on IRC. + +commit cf38da00a140bd3bd65b192390ae5553380fd774 +Author: Lasse Collin +Date: Sun Mar 7 13:59:32 2010 +0200 + + Treat all integer multiplier suffixes as base-2. + + Originally both base-2 and base-10 were supported, but since + there seems to be little need for base-10 in XZ Utils, treat + everything as base-2 and also be more relaxed about the case + of the first letter of the suffix. Now xz will accept e.g. + KiB, Ki, k, K, kB, and KB, and interpret them all as 1024. The + recommended spelling of the suffixes are still KiB, MiB, and GiB. + +commit 00fc1211ae7b687ac912098f4479112059deccbd +Author: Lasse Collin +Date: Sun Mar 7 13:50:23 2010 +0200 + + Consistently round up the memory usage limit in messages. + + It still feels a bit wrong to round 1 byte to 1 MiB but + at least it is now done consistently so that the same + byte value is always rounded the same way to MiB. + +commit 9886d436ff5615fc70eef32ff757b1e934069621 +Author: Lasse Collin +Date: Sun Mar 7 13:34:34 2010 +0200 + + Change the default of --enable-assume-ram from 32 to 128 MiB. + + This is to allow files created with "xz -9" to be decompressed + if the amount of RAM cannot be determined. + +commit 2672bcc9f85ba28ff648e092e9eb4cd9e69ce418 +Author: Lasse Collin +Date: Sun Mar 7 13:29:28 2010 +0200 + + Increase the default memory usage limit on "low-memory" systems. + + Previously the default limit was always 40 % of RAM. The + new limit is a little bit more complex: + + - If 40 % of RAM is at least 80 MiB, 40 % of RAM is used + as the limit. + + - If 80 % of RAM is over 80 MiB, 80 MiB is used as the limit. + + - Otherwise 80 % of RAM is used as the limit. + + This should make it possible to decompress files created with + "xz -9" on more systems. Swapping is generally more expected + on systems with less RAM, so higher default limit on them + shouldn't cause too bad surprises in terms of heavy swapping. + Instead, the higher default limit should reduce the number of + bad surprises when it used to prevent decompression of files + created with "xz -9". The DoS prevention system shouldn't be + a DoS itself. + + Note that even with the new default limit, a system with 64 MiB + RAM cannot decompress files created with "xz -9" without user + overriding the limit. This should be OK, because if xz is going + to need more memory than the system has RAM, it will run very + very slowly and thus it's good that user has to override the limit + in that case. + +commit 5527b7269a997e7f335d60f237a64bbf225d9dc7 +Author: Lasse Collin +Date: Sat Mar 6 21:36:19 2010 +0200 + + Updated THANKS. + +commit d0d1c51aea4351288a7e533cce28cb7f852f6b05 +Author: Lasse Collin +Date: Sat Mar 6 21:17:20 2010 +0200 + + Fix missing initialization in lzma_strm_init(). + + With bad luck, lzma_code() could return LZMA_BUF_ERROR + when it shouldn't. + + This has been here since the early days of liblzma. + It got triggered by the modifications made to the xz + tool in commit 18c10c30d2833f394cd7bce0e6a821044b15832f + but only when decompressing .lzma files. Somehow I managed + to miss testing that with Valgrind earlier. + + This fixes . + Thanks to Rafał Mużyło for helping to debug it on IRC. + +commit eb7d51a3faf9298c0c7aa9aaeae1023dcf9e37ea +Author: Lasse Collin +Date: Fri Feb 12 13:16:15 2010 +0200 + + Collection of language fixes to comments and docs. + + Thanks to Jonathan Nieder. + +commit 4785f2021aa6a23f1caf724fcc823e562584f225 +Author: Lasse Collin +Date: Fri Feb 12 12:41:20 2010 +0200 + + Fix jl -> jb in ASM files. + +commit 6b50c9429bf85521d355adc61745d06ee017f8c8 +Author: Lasse Collin +Date: Fri Feb 12 12:31:22 2010 +0200 + + Use __APPLE__ instead of __MACH__ in ASM files. + + This allows the files to work on HURD. + + Thanks to Jonathan Nieder. + +commit 6503fde658a5cdbdd907a788865470dd64771601 +Author: Lasse Collin +Date: Sun Feb 7 19:48:06 2010 +0200 + + Subtle change to liblzma Block handling API. + + lzma_block.version has to be initialized even for + lzma_block_header_decode(). This way a future version + of liblzma won't allocate memory in a way that an old + application doesn't know how to free it. + + The subtlety of this change is that all current apps + using lzma_block_header_decode() will keep working for + now, because the only possible version value is zero, + and lzma_block_header_decode() unconditionally sets the + version to zero even now. Unless fixed, these apps will + break in the future if a new version of the Block options + is ever needed. + +commit dd7c3841ff78cb94ce02b0220c6e4748460970f7 +Author: Lasse Collin +Date: Tue Feb 2 11:50:11 2010 +0200 + + Fix wrong assertion. + + This was added in 455e68c030fde8a8c2f5e254c3b3ab9489bf3735. + +commit 9d67588c1597849504a3e5ac8bf6f06e7d2ee8be +Author: Lasse Collin +Date: Mon Feb 1 22:48:42 2010 +0200 + + Updated TODO. + +commit fef6333f52c8801308c3b78acb7942988541d137 +Author: Lasse Collin +Date: Mon Feb 1 22:47:54 2010 +0200 + + Fix typos in comments. + +commit 455e68c030fde8a8c2f5e254c3b3ab9489bf3735 +Author: Lasse Collin +Date: Mon Feb 1 22:46:56 2010 +0200 + + Fix signal handling for --list. + +commit 82220a149015616f75641ee8bbea415137535b9b +Author: Lasse Collin +Date: Mon Feb 1 11:44:45 2010 +0200 + + Fix compression of symlinks with --force. + + xz --force accepted symlinks, but didn't remove + them after successful compression. Instead, an error + message was displayed. + +commit d4da177d5ba3d2ef7323a6f1e06ca16e0478810e +Author: Lasse Collin +Date: Mon Feb 1 10:20:57 2010 +0200 + + Fix a comment. + +commit f9dd797a423a148903cf345b4146cb1fe1eab11d +Author: Lasse Collin +Date: Sun Jan 31 23:43:54 2010 +0200 + + Updated THANKS. + +commit ee5ddb8b28419fe4923ded5c18a50570a762dcab +Author: Lasse Collin +Date: Sun Jan 31 23:41:29 2010 +0200 + + Updated TODO. + +commit 11936ad3f5a2e97bda3463c7a56a2f4bb9265ea6 +Author: Lasse Collin +Date: Sun Jan 31 23:35:04 2010 +0200 + + Mention TODO in README. + +commit 2901a8e7e82af05675b8cd8758a8ceddb111359f +Author: Lasse Collin +Date: Sun Jan 31 23:31:14 2010 +0200 + + Updated INSTALL. + +commit 8884e16864ba53fb4b58623d7537d7ef30c28e11 +Author: Lasse Collin +Date: Sun Jan 31 23:28:51 2010 +0200 + + Revise the Windows build files. + + The old Makefile + config.h was deleted, because it + becomes outdated too easily and building with the + Autotools based build system works fine even on Windows. + + windows/build.sh hasn't got much testing, but it should + work to build 32-bit x86 and x86-64 versions of XZ Utils + using MSYS, MinGW or MinGW-w32, and MinGW-w64. + + windows/INSTALL-Windows.txt describes what packages are + needed and how to install them. + + windows/README-Windows.txt is a readme file for the binary + package that build.sh hopefully builds. + + There are no instructions about using Autotools for now, + so those using a git snapshot may want to run + "autoreconf -fi && ./configure && make mydist" on a UN*X + box and then copy the resulting .tar.gz to a Windows. + +commit 34eb5e201d62f7f46bbe6fe97cfe08cb31b3b88c +Author: Lasse Collin +Date: Sun Jan 31 19:52:38 2010 +0200 + + Select the default integrity check type at runtime. + + Previously it was set statically to CRC64 or CRC32 + depending on options passed to the configure script. + +commit 96a4f840e3b9ca5c81e5711ff9c267b194f93ef1 +Author: Lasse Collin +Date: Sun Jan 31 18:17:50 2010 +0200 + + Improve displaying of the memory usage limit. + +commit b3cc4d8edd68a0250cc69680c99b9f7343f99cf2 +Author: Lasse Collin +Date: Sun Jan 31 12:53:56 2010 +0200 + + Don't use uninitialized sigset_t. + + If signal handlers haven't been established, then it's + useless to try to block them, especially since the sigset_t + used for blocking hasn't been initialized yet. + +commit 231c3c7098f1099a56abb8afece76fc9b8699f05 +Author: Lasse Collin +Date: Sun Jan 31 12:01:54 2010 +0200 + + Delay opening the destionation file and other fixes. + + The opening of the destination file is now delayed a little. + The coder is initialized, and if decompressing, the memory + usage of the first Block compared against the memory + usage limit before the destination file is opened. This + means that if --force was used, the old "target" file won't + be deleted so easily when something goes wrong very early. + Thanks to Mark K for the bug report. + + The above fix required some changes to progress message + handling. Now there is a separate function for setting and + printing the filename. It is used also in list.c. + + list_file() now handles stdin correctly (gives an error). + + A useless check for user_abort was removed from file_io.c. + +commit 0dbd0641db99d5e73d51d04ce7a71e52dc6b4105 +Author: Lasse Collin +Date: Fri Jan 29 22:48:04 2010 +0200 + + Add list.h to src/xz/Makefile.am. + + This should have been already in + 0bc9eab243dee3be764b3530433a7fcdc3f7c6a1. + +commit b4b1a56e0cbd597157858264f5c7189201ac9018 +Author: Lasse Collin +Date: Fri Jan 29 13:24:27 2010 +0200 + + Add lzmainfo.1 to manfiles list to convert to .txt and .pdf. + +commit 5574d64e03ad3a3d6e00e4b0d3e81c7b5529ec95 +Author: Lasse Collin +Date: Wed Jan 27 16:42:11 2010 +0200 + + Silence two compiler warnings on DOS-like systems. + +commit b063cc34a30a4edf109343ff373b2b62b8ca72d3 +Author: Lasse Collin +Date: Wed Jan 27 13:31:03 2010 +0200 + + Use PACKAGE_URL instead of custom PACKAGE_HOMEPAGE. + +commit 38b8035b5cb5f56457c5fa5a891d6900fcf5984f +Author: Lasse Collin +Date: Tue Jan 26 23:37:46 2010 +0200 + + Add a missing space to an error message. + + Thanks to Robert Readman. + +commit e5496f9628ff5979392a80421d0b63a4de8015b4 +Author: Lasse Collin +Date: Tue Jan 26 22:53:37 2010 +0200 + + Use past tense in error message in io_unlink(). + + Added a note to translators too. + + Thanks to Robert Readman. + +commit d9a9800597ea540090e434132c3b511217df0a2b +Author: Lasse Collin +Date: Tue Jan 26 15:42:24 2010 +0200 + + Fix too small static buffer in util.c. + + This was introduced in + 0dd6d007669b946543ca939a44243833c79e08f4 two days ago. + +commit d0b4bbf5da068503c099cd456e294d7673548cc0 +Author: Lasse Collin +Date: Tue Jan 26 14:46:43 2010 +0200 + + Minor comment fix. + +commit 0bc9eab243dee3be764b3530433a7fcdc3f7c6a1 +Author: Lasse Collin +Date: Sun Jan 24 23:50:54 2010 +0200 + + Add initial version of xz --list. + + This is a bit rough but should be useful for basic things. + Ideas (with detailed examples) about the output format are + welcome. + + The output of --robot --list is not necessarily stable yet, + although I don't currently have any plans about changing it. + + The man page hasn't been updated yet. + +commit df254ce03be016e217b511e7acd5d493f9929ca5 +Author: Lasse Collin +Date: Sun Jan 24 22:46:11 2010 +0200 + + Add io_pread(). + + It will be used by --list. + +commit ef68dd4a92976276304de2aedfbe34ae91a86abb +Author: Lasse Collin +Date: Sun Jan 24 22:45:14 2010 +0200 + + Set LC_NUMERIC=C when --robot is used. + + It is to ensure that floating point numbers + will always have a dot as the decimal separator. + +commit 0dd6d007669b946543ca939a44243833c79e08f4 +Author: Lasse Collin +Date: Sun Jan 24 16:57:40 2010 +0200 + + Some improvements to printing sizes in xz. + +commit 2a98fdffd68c66371279c211c29153c808ad5c1d +Author: Lasse Collin +Date: Wed Jan 20 22:02:35 2010 +0200 + + Fix a typo in README. + + Thanks to R. Bijker. + +commit 07a11dad44e041b01dcfc73e8d4e00731158c06d +Author: Lasse Collin +Date: Sun Jan 17 11:59:54 2010 +0200 + + Updated windows/Makefile. + + Thanks to Dan Shechter for the patch. + + It is likely that windows/Makefile will be removed + completely, because Autotols based build nowadays + works well with both 32-bit and 64-bit MinGW (I + just need to update the docs). + +commit 37f31ead9d2b4e467df11450cf29ed7d7e3e25f3 +Author: Lasse Collin +Date: Fri Jan 15 11:05:11 2010 +0200 + + Update the xz man page to match the previous two commits. + +commit 3ffd5d81a43210c8da56da5c5b3637d3f8bc63c7 +Author: Lasse Collin +Date: Wed Jan 13 19:10:25 2010 +0200 + + Don't read compressed data from a terminal or write it + to a terminal even if --force is specified. + + It just seems more reasonable this way. + + The new behavior matches bzip2. The old one matched gzip. + +commit 23ac2c44c3ac76994825adb7f9a8f719f78b5ee4 +Author: Lasse Collin +Date: Wed Jan 13 18:12:40 2010 +0200 + + Don't compress or decompress special files unless writing + to stdout even if --force is used. + + --force will still enable compression of symlinks, but only + in case they point to a regular file. + + The new way simply seems more reasonable. It matches gzip's + behavior while the old one matched bzip2's behavior. + +commit cee12aa852ec0902983dc1f153346ef750157fb9 +Author: Lasse Collin +Date: Tue Jan 12 16:30:33 2010 +0200 + + Updated THANKS. + +commit 153c7740c54b3c90129dbd3d6153ac1303c4d605 +Author: Lasse Collin +Date: Tue Jan 12 16:18:14 2010 +0200 + + Add IRIX-specific code to tuklib_physmem and tuklib_cpucores. + + This is untested but it will get tested soon and, if needed, + fixed before 5.0.0. + + Thanks to Stuart Shelton. + +commit 8ea8dc754a7a5bc2d60db1eac201839cabdab6a1 +Author: Lasse Collin +Date: Fri Jan 1 00:29:10 2010 +0200 + + Fix _memconfig() functions. + + This affects lzma_memusage() and lzma_memlimit_get(). + +commit 1a7ec87c8ee61dfc2e496d2e1fb7ab0939804691 +Author: Lasse Collin +Date: Thu Dec 31 22:45:53 2009 +0200 + + Revised the Index handling code. + + This breaks API and ABI but most apps are not affected + since most apps don't use this part of the API. You will + get a compile error if you are using anything that got + broken. + + Summary of changes: + + - Ability to store Stream Flags, which are needed + for random-access reading in multi-Stream files. + + - Separate function to set size of Stream Padding. + + - Iterator structure makes it possible to read the same + lzma_index from multiple threads at the same time. + + - A lot faster code to locate Blocks. + + - Removed lzma_index_equal() without adding anything + to replace it. I don't know what it should do exactly + with the new features and what actually needs this + function in the first place other than test_index.c, + which now has its own code to compare lzma_indexes. + +commit f29997a846e673cb3b8cbd57de47ed313b3978bb +Author: Lasse Collin +Date: Thu Dec 31 21:13:25 2009 +0200 + + Remove c-format tag in cs.po. + + It was fixed in the C code earlier. + +commit 097bad000363e0bf29f8274ad2d7ab59f7dbf644 +Author: Lasse Collin +Date: Thu Dec 31 21:11:05 2009 +0200 + + Add missing lzma_nothrow in filter.h. + +commit b56cb1fc31fa2381f92eefc040df85667048d626 +Author: Lasse Collin +Date: Wed Dec 9 18:13:44 2009 +0200 + + Remove redefinition of _(msgid) macro from lzmainfo.c. + +commit 171b03febfe09d9fae6ac8be6aa4518bcaf427d2 +Author: Jonathan Nieder +Date: Tue Dec 8 19:41:57 2009 -0600 + + update po/.gitignore + + Since the *.gmo files are deleted by the maintainer-clean target, + I assume they are not meant to be tracked. + + Also add the other files listed in the Makefile’s clean targets + (stamp-poT, xz.po, xz.[12].po, *.new.po, xz.mo) to make sure they + are not accidentally tracked. Most of these are intermediate + files that would not appear unless a build is interrupted or + fails. + + Split the list of untracked files by origin to make it easier to + tell if files are missing in the future. + + Signed-off-by: Jonathan Nieder + +commit f7e44c6c11f630519072971b8b07a5729c096c36 +Author: Lasse Collin +Date: Wed Dec 9 00:38:55 2009 +0200 + + Always rely on GCC's auto-import on Windows. + + I understood that this is nicer, because then people + don't need to worry about the LZMA_API_STATIC macro. + + Thanks to Charles Wilson and Keith Marshall. + +commit 7b76a3e2336f25088957cba92b0dbd854d9caa3c +Author: Lasse Collin +Date: Mon Dec 7 21:46:53 2009 +0200 + + Fix file_io.c on DOS-like systems. + + The problem was introduced when adding sparse file + support in 465d1b0d6518c5d980f2db4c2d769f9905bdd902. + + Thanks to Charles Wilson. + +commit 0696f5d268362221380e039bad48a86e29067c6a +Author: Lasse Collin +Date: Mon Dec 7 20:54:21 2009 +0200 + + Add Czech translation. + + Thanks to Marek Černocký. + + Other people planning to translate xz: Note that the + messages are a little bit in flux still. Translations + are still welcome, just be prepared to some extra work + in case there are changes. + +commit 5e817a50d276f0a3607638c1c1d449d50b9aa4e5 +Author: Lasse Collin +Date: Mon Dec 7 20:32:08 2009 +0200 + + Add a note for translators to add a bug reporting address + for translation bugs. + +commit 6db1c35be9e1e364cdacff6878910e1b7aac2a37 +Author: Lasse Collin +Date: Mon Dec 7 20:07:02 2009 +0200 + + Prevent xgettext from taking one regular string as a C format string. + + Thanks to Marek Černocký. + +commit e0c2776b6ffbd2b1900fde353aceac734edc93d7 +Author: Lasse Collin +Date: Sat Nov 28 17:45:22 2009 +0200 + + Remove duplicate code in io_open_dest(). + + Fix a missing _() in the error message too. + +commit f057a33c6f7c5992389479f2d4feabf2900ba7ee +Author: Lasse Collin +Date: Thu Nov 26 10:11:23 2009 +0200 + + Typo fix to sysdefs.h. + + Thanks to Jonathan Nieder. + +commit 8767b41534eafdf5e742e12190646bf5740b0cdb +Author: Lasse Collin +Date: Thu Nov 26 10:10:36 2009 +0200 + + Fix a memory leak in test_index.c. + + This was introduced in + bd13b04e202b6f495a68eb0766f97085b7c50a06. + + Thanks to Jim Meyering for noticing it. + +commit 919fbaff860acdaa4bcd216500a0b1c960a6db92 +Author: Lasse Collin +Date: Wed Nov 25 14:22:19 2009 +0200 + + Add missing error check to coder.c. + + With bad luck this could cause a segfault due to + reading (but not writing) past the end of the buffer. + +commit bd13b04e202b6f495a68eb0766f97085b7c50a06 +Author: Lasse Collin +Date: Wed Nov 25 13:04:10 2009 +0200 + + Fix bugs in lzma_index_read() and lzma_index_cat(). + + lzma_index_read() didn't skip over Stream Padding + if it was the first record in the Index. + + lzma_index_cat() didn't combine small Indexes correctly. + + The test suite was updated to check for these bugs. + + These bugs didn't affect the xz command line tool or + most users of liblzma in any way. + +commit 1f196909143b888e062bd9a0c4ba8c34d3019bfa +Author: Lasse Collin +Date: Wed Nov 25 12:52:56 2009 +0200 + + Index decoder fixes. + + The Index decoder code didn't perfectly match the API docs, + which said that *i will be set to point to the decoded Index + only after decoding has succeeded. The docs were a bit unclear + too. + + Now the decoder will initially set *i to NULL. *i will be set + to point to the decoded Index once decoding has succeeded. + This simplifies applications too, since it avoids dangling + pointers. + +commit 465d1b0d6518c5d980f2db4c2d769f9905bdd902 +Author: Lasse Collin +Date: Wed Nov 25 11:19:20 2009 +0200 + + Create sparse files by default when decompressing into + a regular file. + + Sparse file creation can be disabled with --no-sparse. + I don't promise yet that the name of this option won't + change before 5.0.0. It's possible that the code, that + checks when it is safe to use sparse output on stdout, + is not good enough, and a more flexible command line + option is needed to configure sparse file handling. + +commit 37de544414fc2dc5039471d1002ebd015eb3e627 +Author: Lasse Collin +Date: Sun Nov 22 12:43:06 2009 +0200 + + Updated THANKS. + +commit f1a28b96c900c658fe016852ff62f6c24d1f50fa +Author: Lasse Collin +Date: Sun Nov 22 12:05:33 2009 +0200 + + Add missing consts to pointer casts. + +commit b9b5c54cd438b3ae47b44cc211b71f3bc53e35ef +Author: Lasse Collin +Date: Sun Nov 22 12:00:30 2009 +0200 + + Enable assembler code only if it is known to work + on that operating system. + + I'm too lazy to think how to make a good Autoconf test + for this and it's not that important anyway. + + No longer define HAVE_ASM_X86 or HAVE_ASM_X86_64. + Inline assembler (if any) is used if a macro like + __i386__ or __x86_64__ is defined. + +commit 0733f4c9994db696420a405810d5f02c79ebc404 +Author: Lasse Collin +Date: Sun Nov 22 11:55:03 2009 +0200 + + Make fastpos.h use tuklib_integer.h instead of bsr.h + when --enable-small has been specified. + +commit 7ac3985d891dcc5773543f84cc5bce6c14841b12 +Author: Lasse Collin +Date: Sun Nov 22 11:52:30 2009 +0200 + + Update tuklib_integer.h with bit scan functions. + + Thanks to Joachim Henke for the original patch. + +commit c74c132f7f79a842c073c66575a4fdb985e4c2e3 +Author: Lasse Collin +Date: Fri Nov 20 12:51:19 2009 +0200 + + Update tuklib_cpucores.m4 and tuklib_physmem.m4 from tuklib, + which now use AC_CACHE_CHECK. Using the cache variable, + configure now warns if there is no method to detect the amount + of RAM and recommends using --enable-assume-ram. + +commit d315ca4930ff96e1428c6021c96f209e1abdd83e +Author: Lasse Collin +Date: Mon Nov 16 18:16:45 2009 +0200 + + Add support for --info-memory and --robot to xz. + + Currently --robot works only with --info-memory and + --version. --help and --long-help work too, but --robot + has no effect on them. + + Thanks to Jonathan Nieder for the original patches. + +commit e330fb7e6b8162894280c8a3dc22fdc05cd2d85e +Author: Lasse Collin +Date: Sun Nov 15 12:54:45 2009 +0200 + + Fix wrong indentation caused by incorrect settings + in the text editor. + +commit 93e418562cf127a9171e87bcd4e9af8e1bfcdae4 +Author: Lasse Collin +Date: Sun Nov 15 12:40:17 2009 +0200 + + Add lzma_physmem(). + + I had hoped to keep liblzma as purely a compression + library as possible (e.g. file I/O will go into + a different library), but it seems that applications + linking agaisnt liblzma need some way to determine + the memory usage limit, and knowing the amount of RAM + is one reasonable way to help making such decisions. + + Thanks to Jonathan Nieder for the original patch. + +commit cf39faca59083d38422058c6c97aa757ea7797d0 +Author: Lasse Collin +Date: Sat Nov 14 20:21:19 2009 +0200 + + Updated THANKS. + +commit 2ddcae247c284cc2f396b6cfdab57790c7588b5f +Author: Lasse Collin +Date: Sat Nov 14 20:20:03 2009 +0200 + + Some updates to xz man page. + +commit 19b2674f07f8b588dfaf6638396b4b42866d7e23 +Author: Lasse Collin +Date: Sat Nov 14 19:51:03 2009 +0200 + + Fix description of --memory in --long-help. + +commit 2291346f0cccf88e605d84b75c9c5aaaaddb5df8 +Author: Lasse Collin +Date: Sat Nov 14 19:45:39 2009 +0200 + + Update the debug programs so that they compile again. + +commit 418d64a32e8144210f98a810738fed5a897e8367 +Author: Lasse Collin +Date: Sat Nov 14 18:59:19 2009 +0200 + + Fix a design error in liblzma API. + + Originally the idea was that using LZMA_FULL_FLUSH + with Stream encoder would read the filter chain + from the same array that was used to intialize the + Stream encoder. Since most apps wouldn't use + LZMA_FULL_FLUSH, most apps wouldn't need to keep + the filter chain available after initializing the + Stream encoder. However, due to my mistake, it + actually required keeping the array always available. + + Since setting the new filter chain via the array + used at initialization time is not a nice way to do + it for a couple of reasons, this commit ditches it + and introduces lzma_filters_update(). This new function + replaces also the "persistent" flag used by LZMA2 + (and to-be-designed Subblock filter), which was also + an ugly thing to do. + + Thanks to Alexey Tourbin for reminding me about the problem + that Stream encoder used to require keeping the filter + chain allocated. + +commit f0bf7634b77263a4dd02b20c71861ab67995da68 +Author: Lasse Collin +Date: Sat Oct 17 11:11:58 2009 +0300 + + Fix wrong function name in the previous commit. + + It was meant to be lzma_filters_copy(), not lzma_filters_dup(). + +commit 6d118a0b9def82e96afba7386ec8d7da0b59649f +Author: Lasse Collin +Date: Sat Oct 17 01:47:07 2009 +0300 + + Add lzma_filters_copy(). + + This will be needed internally by liblzma once I fix + a design mistake in the encoder API. This function may + be useful to applications too so it's good to export it. + +commit 78e92c18470483e161388e679c1ee556adb3a691 +Author: Jonathan Nieder +Date: Thu Oct 15 20:44:13 2009 -0500 + + Escape dashes in xzmore.1 + + A minus sign is larger, easier to see in a printout, and more + likely to use the same glyph as ASCII hyphen-minus in a terminal + than a hyphen. Since broken manual pagers do not find hyphens + when the user searches for a hyphen-minus, minus signs are also + easier to search for. So use minus signs instead of hyphens to + render sample terminal output. + +commit 7b7fe902d98da28e5769e2aa1e0c08c92384f7ee +Author: Lasse Collin +Date: Fri Oct 16 20:35:39 2009 +0300 + + Mention --check=none in --long-help. It was already in + the man page though. + + Thanks to Jim Meyering for noticing this. + +commit ebfb2c5e1f344e5c6e549b9dedaa49b0749a4a24 +Author: Lasse Collin +Date: Sun Oct 4 22:57:12 2009 +0300 + + Use a tuklib module for integer handling. + + This replaces bswap.h and integer.h. + + The tuklib module uses on GNU, + on *BSDs and + on Solaris, which may contain optimized code + like inline assembly. + +commit 29fd321033276261b87da7be5223db33d879a4c7 +Author: Lasse Collin +Date: Fri Oct 2 14:35:56 2009 +0300 + + Add support for --enable-assume-ram=SIZE. + +commit 3782b3fee4812b0dd4ffdfa6563ed49f73060f25 +Author: Lasse Collin +Date: Fri Oct 2 11:28:17 2009 +0300 + + Use unaligned access (if possible) on both endiannesses + in lz_encoder_hash.h. + +commit c5f68b5cc79085a87f950fea53843e27f328068e +Author: Lasse Collin +Date: Fri Oct 2 11:03:26 2009 +0300 + + Make liblzma produce the same output on both endiannesses. + + Seems that it is a problem in some cases if the same + version of XZ Utils produces different output on different + endiannesses, so this commit fixes that problem. The output + will still vary between different XZ Utils versions, but I + cannot avoid that for now. + + This commit bloatens the code on big endian systems by 1 KiB, + which should be OK since liblzma is bloated already. ;-) + +commit 4a84d1adfda35e4fb4d41ecf0feb8223b100517a +Author: Mike Frysinger +Date: Sat Sep 26 12:51:50 2009 -0400 + + add lzmainfo to gitignore + + Signed-off-by: Mike Frysinger + +commit 188a1dcd0cc7867810ed3a55c598d0680922c63b +Author: Lasse Collin +Date: Sun Sep 27 11:53:36 2009 +0300 + + Updated THANKS. + +commit db9119b9181b307e7ac5d2bae82444d04b902b59 +Author: Lasse Collin +Date: Sun Sep 27 11:48:54 2009 +0300 + + Work around a bug in Interix header files. + + Thanks to Markus Duft for the patch. + +commit b3d105e69786a45963176fd2193abe75e05ba738 +Author: Lasse Collin +Date: Thu Sep 24 17:50:17 2009 +0300 + + Fix an error in OpenVMS-specific code. + + Thanks to Jouk Jansen. + +commit 5e000ff00d4d01e559397b49eb648ad3f159d496 +Author: Lasse Collin +Date: Tue Sep 22 18:59:56 2009 +0300 + + Added OpenVMS-specific information to INSTALL. + +commit 932b2e204463d70f3eee5b8a1ea5a23bf9d001a4 +Author: Lasse Collin +Date: Tue Sep 22 14:03:02 2009 +0300 + + Better fixes for OpenVMS support. + + Thanks to Jouk Jansen. + +commit 4c3630ec4179fe9265407a35c4db1374ffc82372 +Author: Lasse Collin +Date: Tue Sep 22 13:40:19 2009 +0300 + + Avoid non-standard preprocessor construct. + + Thanks to Jouk Jansen. + +commit 0deb1bb60addd1306b525e0ac0ad2a84eb0390d9 +Author: Lasse Collin +Date: Mon Sep 21 19:50:09 2009 +0300 + + Make sure that TUKLIB_DOSLIKE doesn't get defined on Cygwin. + + Thanks to Charles Wilson. + +commit e599bba4216c0edb8cc8f40adad3a6dba88685f4 +Author: Lasse Collin +Date: Sat Sep 19 09:47:30 2009 +0300 + + Various changes. + + Separate a few reusable components from XZ Utils specific + code. The reusable code is now in "tuklib" modules. A few + more could be separated still, e.g. bswap.h. + + Fix some bugs in lzmainfo. + + Fix physmem and cpucores code on OS/2. Thanks to Elbert Pol + for help. + + Add OpenVMS support into physmem. Add a few #ifdefs to ease + building XZ Utils on OpenVMS. Thanks to Jouk Jansen for the + original patch. + +commit 49cfc8d392cf535f8dd10233225b1fc726fec9ef +Author: Lasse Collin +Date: Tue Sep 15 21:07:23 2009 +0300 + + Fix incorrect use of "restrict". + +commit 15ffd675ab7af84592eb1c23b0e9f4699aa0fd8c +Author: Lasse Collin +Date: Sat Sep 12 14:09:17 2009 +0300 + + Fix GCC version check for nothrow attribute. + +commit 6bfdd3a88a819f04c8f202e7d3c6f88a01c7d224 +Author: Lasse Collin +Date: Sat Sep 12 14:08:15 2009 +0300 + + Updated THANKS. + +commit 4ab7b16b9573bdfa32279e4adadff684d5cd58ac +Author: Lasse Collin +Date: Sat Sep 12 14:07:36 2009 +0300 + + A few grammar fixes. + + Thanks to Christian Weisgerber for pointing out some of these. + +commit 8905a33daadcd2d6557c83c81c490b827d566c94 +Author: Lasse Collin +Date: Fri Sep 11 17:08:15 2009 +0300 + + Updated THANKS. + +commit 68059334ff435300ab1ce2c616b0eee1b0d88dd9 +Author: Lasse Collin +Date: Fri Sep 11 17:06:32 2009 +0300 + + Add PACKAGE_HOMEPAGE to {windows,dos}/config.h to fix build errors. + +commit 221be761f467da76875247bc02d7a1716682075d +Author: Lasse Collin +Date: Fri Sep 11 10:24:09 2009 +0300 + + Use $(LN_EXEEXT) in symlinks to executables. + + This fixes "make install" on operating systems using + a suffix for executables. + + Cygwin is treated specially. The symlink names won't have + .exe suffix even though the executables themselves have. + Thanks to Charles Wilson. + +commit 18a4233a53d9b82abac7db7d7804684c5fea9c2c +Author: Lasse Collin +Date: Fri Sep 11 09:25:09 2009 +0300 + + Fix a couple of warnings. + +commit 429910b2ba67611d8df60d1a9da9641bdb5f82b4 +Author: Lasse Collin +Date: Sat Sep 5 18:39:21 2009 +0300 + + Add OS/2-specific code to physmem.h. + + Also move DJGPP-specific code near the code meant + for other DOS-like systems. + +commit 7aca7b3174bcbba4a4915682ff0cd405d63f5740 +Author: Lasse Collin +Date: Sat Sep 5 01:21:15 2009 +0300 + + Updated THANKS. + +commit 60ccb80c9c4a0d771acc5b7d9d6f32b17fed1071 +Author: Lasse Collin +Date: Sat Sep 5 01:20:29 2009 +0300 + + Use sysctl() != -1 instead of !sysctl() to check if + the function call succeeded. + + NetBSD 4.0 returns positive values on success, but + NetBSD Current and FreeBSD return zero. OpenBSD's + man page doesn't tell what sysctl() returns on + success. All these BSDs return -1 on error. + + Thanks to Robert Elz and Thomas Klausner. + +commit 173368911cf09ab0b03fc4db8f3d4b81d86dce32 +Author: Lasse Collin +Date: Wed Sep 2 09:43:51 2009 +0300 + + Mention in INSTALL that --enable-small doesn't modify CFLAGS. + +commit 319a0fd7d7e9ebbb71ca6930abfc20777cb4aacc +Author: Lasse Collin +Date: Tue Sep 1 20:40:01 2009 +0300 + + Refactored option parsing. + +commit 25adaaa56e2e51a47a910a8d73452414619a2e53 +Author: Lasse Collin +Date: Tue Sep 1 20:23:30 2009 +0300 + + Fix options parsing bug in xz. + + xz used to reject "xz --lzma2=pb=2," while + "xz --lzma2=pb=2,," worked. Now both work. + +commit 5f6dddc6c911df02ba660564e78e6de80947c947 +Author: Lasse Collin +Date: Tue Sep 1 20:20:19 2009 +0300 + + Updated TODO. + +commit 655457b9ada5ec7db398c5392e41290f3f332ea8 +Author: Lasse Collin +Date: Mon Aug 31 21:59:25 2009 +0300 + + Revert 43f44160b1ddcbf7e5205c37db09b3bebe7226f9 + and use a fix that works on all systems using + GNU assembler. + + Maybe the assembler code is used e.g. on Solaris x86 + but let's worry about it if this doesn't work on it. + +commit 162189c3477953805a28f96d3a75cb9ab9417928 +Author: Lasse Collin +Date: Sun Aug 30 17:29:19 2009 +0300 + + Updated THANKS. + +commit 2331f5f97af3e5897e23da45d9df3d664099c7f8 +Author: Lasse Collin +Date: Sun Aug 30 17:28:52 2009 +0300 + + Add more OS/2 specific info to INSTALL. + +commit 94c66b3297b3ad307eee93cf6b160e3c43997f11 +Author: Lasse Collin +Date: Sat Aug 29 14:43:52 2009 +0300 + + Use even more hackish way to support thousand separators. + + Seems that in addition on Windows and DOS, also OpenBSD + lacks support for %'d style printf() format strings. + So far that is the only modern POSIX-like system I know + with this problem, but after this hack, the thousand + separator shouldn't be a problem on any system. + + Maybe testing if a format string like %'d produces + reasonable output is invoking undefined behavior on some + systems, but so far all the problematic systems I've tried + just print the raw format string (e.g. %'d prints 'd). + + Maybe Autoconf test would have been better, but this + hack works also for cross-compilation, and avoids + recompilation in case the system libc starts to support + the thousand separator. + +commit 3432e9c6aab851da1227b63dce645d7f190c04d8 +Author: Lasse Collin +Date: Sat Aug 29 13:42:56 2009 +0300 + + Updated THANKS. + +commit 27414daadf5727e8ab942374b5ec1c8990122878 +Author: Lasse Collin +Date: Sat Aug 29 13:39:21 2009 +0300 + + Fix sysctl() usage. + + This fixes build on *BSDs and Darwin. + + Thanks to Jukka Salmi for the patches. + Richard Koch reported the problem too. + +commit 43f44160b1ddcbf7e5205c37db09b3bebe7226f9 +Author: Lasse Collin +Date: Sat Aug 29 13:35:23 2009 +0300 + + Fix x86 assembler on GCC 3. + + Thanks to Karl Berry. + +commit 682efdc1f9492fdd76c9ce82e7c00ca0768067e8 +Author: Lasse Collin +Date: Thu Aug 27 18:36:59 2009 +0300 + + "make dist" fixes + +commit c8c184db1c95bf70f78256ec6237845a57f342af +Author: Lasse Collin +Date: Thu Aug 27 17:08:33 2009 +0300 + + Update xz man page date. + +commit 9756fce565e98b8fa5fe6ead296d84e7601ec254 +Author: Lasse Collin +Date: Thu Aug 27 17:00:22 2009 +0300 + + Fix the debug directory. + + 6a2eb54092fc625d59921a607ff68cd1a90aa898 and + 71f18e8a066a01dda0c8e5508b135ef104e43e4c required + some changes that weren't applied in debug. + +commit 77007a7fb20187fcf3d1dd9839c79ace2d63f2ea +Author: Lasse Collin +Date: Thu Aug 27 16:36:40 2009 +0300 + + Add missing files to EXTRA_DIST. + +commit 04dcbfdeb921e5f361a4487134e91e23fffbe09d +Author: Lasse Collin +Date: Thu Aug 27 16:21:22 2009 +0300 + + Bumped version to 4.999.9beta. + +commit fd7618611a22f42a6913bc8d518c9bbc9252d6b4 +Author: Lasse Collin +Date: Thu Aug 27 16:17:47 2009 +0300 + + Updated THANKS. + +commit c29e76c0f910fca0a90a50b78d337f6c32623e9d +Author: Lasse Collin +Date: Thu Aug 27 16:12:52 2009 +0300 + + .xz file format specification 1.0.4 (probably). + + Thanks to Christian von Roques, Peter Lawler, + and Jim Meyering for the fixes. + +commit 696d7ee3953beaf4f0ed18e78917ccf300431966 +Author: Lasse Collin +Date: Thu Aug 27 15:43:54 2009 +0300 + + Require GNU libtool 2.2. + +commit 4c3558aa8305a8f8b6c43b8569eb539717ca9e8d +Author: Lasse Collin +Date: Thu Aug 27 15:34:45 2009 +0300 + + Add "dos" to EXTRA_DIST. + +commit 35b29e4424ced5a3ababf132283e519080c7b298 +Author: Lasse Collin +Date: Thu Aug 27 15:23:27 2009 +0300 + + Updated TODO. + +commit 23414377192c21f3f34c84cdfe0ef0fbd06a1dea +Author: Lasse Collin +Date: Thu Aug 27 15:17:00 2009 +0300 + + Some xz man page improvements. + +commit 371b04e19fc9051dbaeec51ec0badec6a1f0699d +Author: Lasse Collin +Date: Thu Aug 27 10:41:01 2009 +0300 + + Removed doc/bugs.txt. + +commit d88c4072b36d3a76f839185799fb1d91037a1b81 +Author: Lasse Collin +Date: Thu Aug 27 10:40:25 2009 +0300 + + Updated README. + + It now includes bug reporting instructions/tips. + +commit 92e536d8b8d33a6b12d0802bcd7be4437046f13e +Author: Lasse Collin +Date: Thu Aug 27 10:21:18 2009 +0300 + + Fix a typo in FAQ. + + Thanks to Jim Meyering. + + (From now on, I try to always remember to put + the relevant thanks to commit messages.) + +commit 3e2ba8b58585743e59251e69ad2783eb08357079 +Author: Lasse Collin +Date: Thu Aug 27 10:13:46 2009 +0300 + + Updates to liblzma API headers. + + Added lzma_nothrow for every function. It adds + throw() when the header is used in C++ code. + + Some lzma_attrs were added or removed. + + Lots of comments were improved. + +commit 8e8ebc17c535a1f8846718059b48417409c37050 +Author: Lasse Collin +Date: Tue Aug 18 00:30:09 2009 +0300 + + Install faq.txt. + +commit b198e770a146e4a41f91a93f0b233713f2515848 +Author: Lasse Collin +Date: Tue Aug 18 00:26:48 2009 +0300 + + Updated faq.txt. + + Some questions worth answering were removed, because I + currently don't have good up to date answers to them. + +commit fe111a25cd788d31b581996e4533910388a7f0a9 +Author: Lasse Collin +Date: Mon Aug 17 22:45:50 2009 +0300 + + Some xz man changes. + +commit 10242a21e9abda0c5c6a03501703cc40b8a699a5 +Author: Lasse Collin +Date: Sun Aug 16 22:15:42 2009 +0300 + + Updated THANKS. + +commit 3ce1916c83041113b9cad9ead5c97a527cf8aa1d +Author: Lasse Collin +Date: Sun Aug 16 22:15:13 2009 +0300 + + Fix data corruption in LZ/LZMA2 encoder. + + Thanks to Jonathan Stott for the bug report. + +commit 66da129c8ec33dd66acc92f113f7c1ca740ca81a +Author: Lasse Collin +Date: Thu Aug 13 15:15:37 2009 +0300 + + Updated INSTALL and PACKAGERS to match the changes + made in --enable-dynamic. + +commit 8238c4b2402f952c4e492e5b778aa272e57b6705 +Author: Lasse Collin +Date: Thu Aug 13 15:03:46 2009 +0300 + + Link lzmainfo against shared liblzma by default. + +commit 71f18e8a066a01dda0c8e5508b135ef104e43e4c +Author: Lasse Collin +Date: Thu Aug 13 15:00:21 2009 +0300 + + Make --enable-dynamic a tristate option. + + Some programs will by default be linked against static + liblzma and some against shared liblzma. --enable-dynamic + now allows overriding the default to both directions + (all dynamic or all static) even when building both + shared and static liblzma. + + This is quite messy compared to how simple thing it is supposed + to be. The complexity is mostly due to Windows support. + +commit 5aa4678b2342dcfc1d2b31aa9fa4f39c539e4b61 +Author: Lasse Collin +Date: Thu Aug 13 12:56:47 2009 +0300 + + Fix xz Makefile.am for the man page. + + install-exec-hook -> install-data-hook + +commit e51b4e49e800bd84e6d589dca2964d3985e88139 +Author: Lasse Collin +Date: Thu Aug 13 12:55:45 2009 +0300 + + Add lzmainfo for backward compatibility with LZMA Utils. + + lzmainfo now links against static liblzma. In contrast + to other command line tools in XZ Utils, linking lzmainfo + against static liblzma by default is dumb. This will be + fixed once I have fixed some related issues in configure.ac. + +commit a4165d0584376d948c213ec93c6065d24ff6a5e7 +Author: Lasse Collin +Date: Thu Aug 13 12:42:36 2009 +0300 + + Sync some error messages from xz to xzdec. + + Make xz error message translation usable outside + xz (at least in upcoming lzmainfo). + +commit df636eb4e066b4e154ce8e66e82c87ba1db652a6 +Author: Lasse Collin +Date: Thu Aug 13 09:37:21 2009 +0300 + + Add xz man page to manfiles in toplevel Makefile.am. + +commit 180bdf58ea5bb07941e0a99b304d9aa832198748 +Author: Lasse Collin +Date: Thu Aug 13 09:37:01 2009 +0300 + + Fix first line of xz man page. + +commit e1ce2291e759b50ebfcf7cbbcc04cd098f1705a4 +Author: Lasse Collin +Date: Mon Aug 10 11:22:31 2009 +0300 + + Added a rough version of the xz man page. + +commit e71903fc6101f1c039d702e335b08aad1e1b4100 +Author: Jonathan Nieder +Date: Sun Aug 9 13:41:20 2009 -0500 + + “xzdiff a.xz b.xz” always fails + + Attempts to compare two compressed files result in no output and + exit status 2. + + Instead of going to standard output, ‘diff’ output is being + captured in the xz_status variable along with the exit status from + the decompression commands. Later, when this variable is examined + for nonzero status codes, numerals from dates in the ‘diff’ output + make it appear as though decompression failed. + + So let the ‘diff’ output leak to standard output with another file + descriptor. (This trick is used in all similar contexts elsewhere + in xzdiff and in the analogous context in gzip’s zdiff script.) + +commit 1d314b81aa5b0c4530638ffabd4e0edb52e5362c +Author: Jonathan Nieder +Date: Sun Aug 9 13:22:12 2009 -0500 + + xzless: Support compressed standard input + + It can be somewhat confusing that + + less < some_file.txt + + works fine, whereas + + xzless < some_file.txt.xz + + does not. Since version 429, ‘less’ allows a filter specified in + the LESSOPEN environment variable to preprocess its input even if + it comes from standard input, if $LESSOPEN begins with ‘|-’. So + set $LESSOPEN to take advantage of this feature. + + Check less’s version at runtime so xzless can continue to work + with older versions. + +commit a7f5d2fe4826ac68839d00059f05004fb81d5c69 +Author: Lasse Collin +Date: Sun Aug 9 20:57:46 2009 +0300 + + GPLv2+ not GPLv2 for Doxyfile.in is probably OK. + +commit b735cde20cc14857136ae65a0e5d336ed7ddc862 +Author: Lasse Collin +Date: Sun Aug 2 00:27:29 2009 +0300 + + Added a copyright notice to Doxyfile.in since it contains + lots of comments from Doxygen. + + It seems that the Doxygen authors' intent is to not apply + their copyright on generated files, but since it doesn't + matter for XZ Utils at all, better safe than sorry. + +commit 0fd157cc008446adfc8f91394f5503868025a642 +Author: Lasse Collin +Date: Sun Aug 2 00:11:37 2009 +0300 + + Updated THANKS. + +commit b198da96ff9ac8c89b466b4d196c5f3fe1c7904f +Author: Lasse Collin +Date: Sun Aug 2 00:10:22 2009 +0300 + + Updated TODO. + +commit 669413bb2db954bbfde3c4542fddbbab53891eb4 +Author: Lasse Collin +Date: Thu Jul 30 12:25:55 2009 +0300 + + Updated THANKS. + +commit dbbd8fb870ae789d96497911006c869d37148c15 +Author: Jonathan Nieder +Date: Tue Jul 28 17:37:24 2009 -0500 + + xzdiff: add missing ;; to case statement + +commit adbad2d16cb5909f85d4a429011005613ea62ffe +Author: Lasse Collin +Date: Fri Jul 24 13:15:06 2009 +0300 + + Added history.txt to doc_DATA. + +commit e0236f12569eb36f9b81ce7a1e52e0f73698ac27 +Author: Lasse Collin +Date: Fri Jul 24 12:00:40 2009 +0300 + + Updated .gitignore files. + +commit 2f34fb269265e3aba43a2a9c734020a45268826d +Author: Lasse Collin +Date: Fri Jul 24 11:34:02 2009 +0300 + + Minor improvements to COPYING. + +commit 0db1befcfbc120377df4b89923762f16d25f548a +Author: Lasse Collin +Date: Thu Jul 23 19:10:55 2009 +0300 + + Fix incorrect usage of getopt_long(), which caused + invalid memory access if XZ_OPT was defined. + +commit 8f8ec942d6d21ada2096eaf063411bc8bc7e2d48 +Author: Lasse Collin +Date: Mon Jul 20 15:43:32 2009 +0300 + + Avoid internal error with --format=xz --lzma1. + +commit 99f9e879a6a8bb54a65da99c12e0f390216c152a +Author: Lasse Collin +Date: Sun Jul 19 13:14:20 2009 +0300 + + Major documentation update. + + Installation and packaging instructions were added. + README and other generic docs were revised. + + Some of the documentation files are now installed to $docdir. + +commit ef4cf1851de89022cba5674784f1a8f6343c15b0 +Author: Lasse Collin +Date: Sun Jul 19 11:09:31 2009 +0300 + + Added missing author notice to xzless.in. + +commit 4c9c989d45b188667799a7a1d6c728ed43f7bf77 +Author: Lasse Collin +Date: Sat Jul 18 18:54:55 2009 +0300 + + Use AC_CONFIG_AUX_DIR to clean up the toplevel directory + a little. + + Fixed a related bug in the toplevel Makefile.am. + + Added the build-aux directory to .gitignore. + +commit 366e436090a7a87215e9bf0e3ddcd55f05b50587 +Author: Lasse Collin +Date: Sat Jul 18 14:34:08 2009 +0300 + + Updated the totally outdated TODO file. + +commit 64e498c89d8b9966e8663f43bf64d47c26c55c62 +Author: Lasse Collin +Date: Sat Jul 18 11:26:39 2009 +0300 + + Added public domain notice into a few files. + +commit a35755c5de808df027675688855d1b621a4fb428 +Author: Lasse Collin +Date: Tue Jul 14 21:10:36 2009 +0300 + + Allow extra commas in filter-specific options on xz command line. + + This may slightly ease writing scripts that construct + filter-specific option strings dynamically. + +commit 98f3cac1ad31191c5160a7e48398bf85141e941c +Author: Lasse Collin +Date: Tue Jul 14 18:04:31 2009 +0300 + + Accept --lzma2=preset=6e where "e" is equivalent to --extreme + when no custom chain is in use. + +commit d873a09e956363e54bf58c577c8f7e487b6fb464 +Author: Lasse Collin +Date: Sun Jul 12 19:08:30 2009 +0300 + + Add dist-hook to create ChangeLog from the commit log, + and to conver the man pages to PDF and plain text, which + may be convenient to those who cannot render man pages. + +commit cd69a5a6c16c289f6f8e2823b03c72289472270f +Author: Lasse Collin +Date: Fri Jul 10 11:39:38 2009 +0300 + + BCJ filters: Reject invalid start offsets with LZMA_OPTIONS_ERROR. + + This is a quick and slightly dirty fix to make the code + conform to the latest file format specification. Without + this patch, it's possible to make corrupt files by + specifying start offset that is not a multiple of the + filter's alignment. Custom start offset is almost never + used, so this was only a minor bug. + + The xz command line tool doesn't validate the start offset, + so one will get a bit unclear error message if trying to use + an invalid start offset. + +commit eed9953732b801f6c97317fb3160445a8754180b +Author: Lasse Collin +Date: Fri Jul 10 11:33:21 2009 +0300 + + Look for full command names instead of substrings + like "un", "cat", and "lz" when determining if + xz is run as unxz, xzcat, lzma, unlzma, or lzcat. + + This is to ensure that if xz is renamed (e.g. via + --program-transform-name), it doesn't so easily + work in wrong mode. + +commit 6f62fa88f4ff7ba78565c314c0e6e71c498fa658 +Author: Lasse Collin +Date: Wed Jul 8 23:06:46 2009 +0300 + + Updated THANKS. + +commit 1754b7e03e2aa7e2e0196807fe8b0f3f5a637b0e +Author: Lasse Collin +Date: Wed Jul 8 23:05:29 2009 +0300 + + Portability improvement to version.sh. + +commit 3bdb53792c0e3e3febe9370e56eda5b08f89410f +Author: Lasse Collin +Date: Wed Jul 8 22:50:16 2009 +0300 + + Remove --force from xzdec. + + It was ignored for compatibility with xz, but now that + --decompress --stdout --force copies unrecognized files + as is to stdout, simply ignoring --force in xzdec would + be wrong. xzdec will not support copying unrecognized + data as is to stdout, so it cannot support --force. + +commit 5f16ef4abf220028a9ddbcb138217597a9455f62 +Author: Lasse Collin +Date: Mon Jul 6 10:36:04 2009 +0300 + + Use sed instead of $(SED) so that we don't need to + use AC_PROG_SED. We don't do anything fancy with sed, + so this should work OK. libtool 2.2 sets SED but 1.5 + doesn't, so $(SED) happened to work when using libtool 2.2. + +commit 96e4b257e101d72072d43e144897d92920270669 +Author: Lasse Collin +Date: Sun Jul 5 22:25:17 2009 +0300 + + Major update to the xzgrep and other scripts based on + the latest versions found from gzip CVS repository. + + configure will try to find a POSIX shell to be used by + the scripts. This should ease portability on systems + which have pre-POSIX /bin/sh. + + xzgrep and xzdiff support .xz, .lzma, .gz, and .bz2 files. + xzmore and xzless support only .xz and .lzma files. + + The name of the xz executable used in these scripts is + now correct even if --program-transform-name has been used. + +commit 25cc7a6e8c2506a0d80084a4c1c67d33e7439100 +Author: Lasse Collin +Date: Sun Jul 5 19:26:53 2009 +0300 + + Use @PACKAGE_HOMEPAGE@ in liblzma.pc.in. + +commit 18c10c30d2833f394cd7bce0e6a821044b15832f +Author: Lasse Collin +Date: Sat Jul 4 00:40:44 2009 +0300 + + Make "xz --decompress --stdout --force" copy unrecognized + files as is to standard output. + + This feature is needed to be more compatible with gzip's + behavior. This was more complicated to implement than it + sounds, because the way liblzma is able to return errors with + files of only a few bytes in size. xz now has its own file + type detection code and no longer uses lzma_auto_decoder(). + +commit 0a289c01ac821ea9c4250aa906b0ae3cfa953633 +Author: Lasse Collin +Date: Thu Jul 2 14:30:38 2009 +0300 + + Define PACKAGE_HOMEPAGE in configure.ac and use it in + xz and xzdec. + + Use also PACKAGE_NAME instead of hardcoding "XZ Utils". + +commit 5cc99db5bae8633f85559e5cdaef4cd905a4ee9c +Author: Lasse Collin +Date: Wed Jul 1 12:21:24 2009 +0300 + + Avoid visibility related compiler warnings on Windows. + +commit 7653d1cf48080e63b189ed9d58dea0e82b6b1c5e +Author: Lasse Collin +Date: Tue Jun 30 17:14:39 2009 +0300 + + Use static liblzma by default also for tests. + +commit f42ee981668b545ab6d06c6072e262c29605273c +Author: Lasse Collin +Date: Tue Jun 30 17:09:57 2009 +0300 + + Build system fixes + + Don't use libtool convenience libraries to avoid recently + discovered long-standing subtle but somewhat severe bugs + in libtool (at least 1.5.22 and 2.2.6 are affected). It + was found when porting XZ Utils to Windows + + but the problem is significant also e.g. on GNU/Linux. + + Unless --disable-shared is passed to configure, static + library built from a set of convenience libraries will + contain PIC objects. That is, while libtool builds non-PIC + objects too, only PIC objects will be used from the + convenience libraries. On 32-bit x86 (tested on mobile XP2400+), + using PIC instead of non-PIC makes the decompressor 10 % slower + with the default CFLAGS. + + So while xz was linked against static liblzma by default, + it got the slower PIC objects unless --disable-shared was + used. I tend develop and benchmark with --disable-shared + due to faster build time, so I hadn't noticed the problem + in benchmarks earlier. + + This commit also adds support for building Windows resources + into liblzma and executables. + +commit 89dac1db6f168d7469cfbc4432651d4724c5c0de +Author: Lasse Collin +Date: Mon Jun 29 22:19:51 2009 +0300 + + Added a comment about "autoconf -fi" to autogen.sh. + +commit 6e685aae4594bc0af1b5032e01bb37d0edaa3ebd +Author: Lasse Collin +Date: Sun Jun 28 10:04:24 2009 +0300 + + Add -no-undefined to get shared liblzma on Windows. + +commit 73f560ee5fa064992b76688d9472baf139432540 +Author: Lasse Collin +Date: Sat Jun 27 22:57:15 2009 +0300 + + Make physmem() work on Cygwin 1.5 and older. + +commit 7ff0004fbce24ae72eddfe392828ffd7d4639ed1 +Author: Lasse Collin +Date: Sat Jun 27 17:28:01 2009 +0300 + + Moved the Windows resource files outside the windows directory + to prepare for building them with Autotools. + +commit 449c634674f35336a4815d398172e447659a135e +Author: Lasse Collin +Date: Sat Jun 27 13:05:03 2009 +0300 + + Added missing $(EXEEXT). + +commit 792db79f27ad9ab1fb977e23be65c7761f545752 +Author: Lasse Collin +Date: Sat Jun 27 12:32:40 2009 +0300 + + Create correct symlinks even when + --program-{prefix,suffix,transform} is passed to configure. + +commit 0adc72feb84f5b903f6ad9d3f759b1c326fafc6b +Author: Lasse Collin +Date: Sat Jun 27 10:02:24 2009 +0300 + + Silence a compiler warning on DOS-like systems. + +commit ad12edc95254ede3f0cb8dec8645e8789e984c4f +Author: Lasse Collin +Date: Sat Jun 27 09:35:15 2009 +0300 + + Updated the filenames in POTFILES.in too. + +commit b2b1f867532732fe9969131f8713bdd6b0731763 +Author: Lasse Collin +Date: Sat Jun 27 00:43:06 2009 +0300 + + Hopefully improved portability of the assembler code in + Autotools based builds on Windows. + +commit c393055947247627a09b6a6b8f20aa0c32f9be16 +Author: Lasse Collin +Date: Fri Jun 26 21:17:29 2009 +0300 + + Updated THANKS (most of today's commits are based on + Charles Wilson's patches). + +commit da0af22e4b4139b8a10710945f8b245b3a77c97d +Author: Lasse Collin +Date: Fri Jun 26 21:00:35 2009 +0300 + + Updated comments to match renamed files. + +commit 65014fd211dfbd4be48685998cb5a12aaa29c8d2 +Author: Lasse Collin +Date: Fri Jun 26 20:49:54 2009 +0300 + + Rename process.[hc] to coder.[hc] and io.[hc] to file_io.[hc] + to avoid problems on systems with system headers with those + names. + +commit 5e1257466dcb66f1d7a3f71814a5ad885cba43e8 +Author: Lasse Collin +Date: Fri Jun 26 20:43:36 2009 +0300 + + Rename process_file() to coder_run(). + +commit cad62551c5fa9865dbe0841a0b3bc729c4fbe8fc +Author: Lasse Collin +Date: Fri Jun 26 20:36:45 2009 +0300 + + Ugly hack to make it possible to use the thousand separator + format character with snprintf() on POSIX systems but not + on non-POSIX systems and still keep xgettext working. + +commit fe378d47074b16c52b00fe184d119287c68ce2e7 +Author: Lasse Collin +Date: Fri Jun 26 15:40:40 2009 +0300 + + Added missing source files to windows/Makefile. + +commit 390a6408563067613b29de895cb40e4d0386d62c +Author: Lasse Collin +Date: Fri Jun 26 15:37:53 2009 +0300 + + Basic support for building with Cygwin and MinGW using + the Autotools based build system. It's not good yet, more + fixes will follow. + +commit 1c9360b7d1197457aaad2f8888b99f1149861579 +Author: Lasse Collin +Date: Fri Jun 26 14:47:31 2009 +0300 + + Fix @variables@ to $(variables) in Makefile.am files. + Fix the ordering of libgnu.a and LTLIBINTL on the linker + command line and added missing LTLIBINTL to tests/Makefile.am. + +commit d45615c555e250209ebb55aa3649abe790f1eeac +Author: Lasse Collin +Date: Fri Jun 26 14:20:02 2009 +0300 + + Allow to explicitly specify autotool versions in autogen.sh. + +commit eaf8367368a329afa48785380f9dca6b681f3397 +Author: Lasse Collin +Date: Fri Jun 26 14:18:32 2009 +0300 + + Add version.sh to EXTRA_DIST. + +commit b317b218e2d383dd27a700094c0de4510540ea18 +Author: Lasse Collin +Date: Wed Jun 24 20:14:10 2009 +0300 + + Support HW_PHYSMEM64 + +commit ae82dde5d9cc60c80cc89601b6c51cc1611d48e7 +Author: Lasse Collin +Date: Wed Jun 24 13:01:59 2009 +0300 + + Cast a char argument to isspace() to unsigned char. + +commit 1735d31ea347210e914df038eeea4b2626e76e42 +Author: Lasse Collin +Date: Fri Jun 5 13:46:26 2009 +0300 + + A few more spelling fixes. Released the .xz spec 1.0.3. + +commit 8ed156ce894966103e895aa08f2a9fb912f6fad5 +Author: Lasse Collin +Date: Thu Jun 4 23:42:12 2009 +0300 + + Added xzdec man page. + +commit f6df39afaa84f71439507178a49b2a5dda6e824c +Author: Lasse Collin +Date: Thu Jun 4 23:26:47 2009 +0300 + + Harmonized xzdec --memory with xz --memory and made + minor cleanups. + +commit 1774f27c61ce294a56712ca2f4785f90a62441bc +Author: Lasse Collin +Date: Thu Jun 4 22:59:55 2009 +0300 + + Fix purporse -> purpose. Thanks to Andrew Dudman. + Released .xz spec 1.0.2 due to this fix too. + +commit cb613455642f48fb51059e22018615f64c59b70f +Author: Lasse Collin +Date: Mon Jun 1 14:53:57 2009 +0300 + + The .xz file format version 1.0.1 + +commit 083c23c680ff844846d177cfc58bb7a874e7e6b9 +Author: Lasse Collin +Date: Tue May 26 14:48:48 2009 +0300 + + Make the raw value of the Check field available to applications + via lzma_block structure. + + This changes ABI but not doesn't break API. + +commit b4f5c814090dc07d4350453576305e41eb9c998d +Author: Lasse Collin +Date: Sat May 23 16:57:21 2009 +0300 + + Remove undocumented alternative option names --bcj, --ppc, + and --itanium. + +commit b1edee2cdc7ef4411b1a21c07094ec763f071281 +Author: Lasse Collin +Date: Sat May 23 15:12:23 2009 +0300 + + Add support for specifying the BCJ filter start offset + in the xz command line tool. + +commit 72aa0e9c5f4289f10ef5bf240a9448d3017f1ceb +Author: Lasse Collin +Date: Sat May 23 14:51:09 2009 +0300 + + Updated THANKS. + +commit dcedb6998cefeca6597dd1219328a3abf5acf66d +Author: Lasse Collin +Date: Fri May 22 16:40:50 2009 +0300 + + Added support for --quiet and --no-warn to xzdec. + Cleaned up the --help message a little. + +commit 5f735dae80aa629853f4831d7b84ec1c614979eb +Author: Lasse Collin +Date: Fri May 22 15:11:52 2009 +0300 + + Use the 40 % of RAM memory usage limit in xzdec too. + + Update the memory usage info text in --help to match + the text in xz --long-help. + +commit b60376249e0c586910c4121fab4f791820cc1289 +Author: Lasse Collin +Date: Fri May 22 14:43:00 2009 +0300 + + Add --no-warn. + +commit b4f92f522d4b854c0adb7c38be7531e1a6a7b008 +Author: Lasse Collin +Date: Fri May 22 14:27:40 2009 +0300 + + Fix a comment. + +commit 4dd21d23f22569285ae706b58b0e5904b8db1839 +Author: Lasse Collin +Date: Fri May 22 14:21:20 2009 +0300 + + Remove the --info option, which was an alias for --list. + +commit 8836139b63ce774bdd62abf17ab69b290e08229e +Author: Lasse Collin +Date: Fri May 22 12:27:43 2009 +0300 + + If xz is run as lzma, unlzma, or lzcat, simply imply + --format=lzma. This means that xz emulating lzma + doesn't decompress .xz files, while before this + commit it did. The new way is slightly simpler in + code and especially in upcoming documentation. + +commit b0063023f8adb06ea735ec4af5c6f5b7bdb8e84d +Author: Lasse Collin +Date: Fri May 22 11:29:50 2009 +0300 + + Make the default memory usage limit 40 % of RAM for both + compressing and decompressing. This should be OK now that + xz automatically scales down the compression settings if + they would exceed the memory usage limit (earlier, the limit + for compression was increased to 90 % because low limit broke + scripts that used "xz -9" on systems with low RAM). + + Support spcifying the memory usage limit as a percentage + of RAM (e.g. --memory=50%). + + Support --threads=0 to reset the thread limit to the default + value (number of available CPU cores). Use UINT32_MAX instead + of SIZE_MAX as the maximum in args.c. hardware.c was already + expecting uint32_t value. + + Cleaned up the output of --help and --long-help. + +commit 071b825b23911a69dd1cd2f8cda004ef8a781fae +Author: Lasse Collin +Date: Thu May 21 17:22:01 2009 +0300 + + Support special value "max" where xz and xzdec accept an integer. + Don't round the memory usage limit in xzdec --help to avoid + an integer overflow and to not give wrong impression that + the limit is high enough when it may not actually be. + +commit 03ca67fd37dd43fa7f590de340899cd497c10802 +Author: ABCD +Date: Wed May 20 17:31:18 2009 -0400 + + Install lzdiff, lzgrep, and lzmore as symlinks + + This adds lzdiff, lzgrep, and lzmore to the list of symlinks to install. + It also installs symlinks for the manual pages and removes the new + symlinks on uninstall. + +commit a6f43e64128a6da5cd641de1e1e527433b3e5638 +Author: Lasse Collin +Date: Sat May 2 16:10:14 2009 +0300 + + Use a GCC-specific #pragma instead of GCC-specific + -Wno-uninitialized to silence a bogus warning. + +commit f6ce63ebdb45a857c8949960c83c9580ae888951 +Author: Lasse Collin +Date: Sat May 2 14:46:50 2009 +0300 + + Removed --disable-encoder and --disable-decoder. Use the values + given to --enable-encoders and --enable-decoders to determine + if any encoder or decoder support is wanted. + +commit be06858d5cf8ba46557395035d821dc332f3f830 +Author: Lasse Collin +Date: Fri May 1 11:28:52 2009 +0300 + + Remove docs that are too outdated to be updated + (rewrite will be better). + +commit 0255401e57c96af87c6b159eca28974e79430a82 +Author: Lasse Collin +Date: Fri May 1 11:21:46 2009 +0300 + + Added documentation about the legacy .lzma file format. + +commit 1496ff437c46f38303e0e94c511ca604b3a11f85 +Author: Lasse Collin +Date: Fri May 1 11:20:23 2009 +0300 + + Renamed the file format specification to xz-file-format.txt + which is the filename used on the WWW. + +commit 21c6b94373d239d7e86bd480fcd558e30391712f +Author: Lasse Collin +Date: Tue Apr 28 23:08:32 2009 +0300 + + Fixed a crash in liblzma. + + liblzma tries to avoid useless free()/malloc() pairs in + initialization when multiple files are handled using the + same lzma_stream. This didn't work with filter chains + due to comparison of wrong pointers in lzma_next_coder_init(), + making liblzma think that no memory reallocation is needed + even when it actually is. + + Easy way to trigger this bug is to decompress two files with + a single xz command. The first file should have e.g. x86+LZMA2 + as the filter chain, and the second file just LZMA2. + +commit e518d167aa5958e469982f4fb3a24b9b6a2b5d1c +Author: Lasse Collin +Date: Wed Apr 15 14:13:38 2009 +0300 + + Fix uint32_t -> size_t in ARM and ARM-Thumb filters. + + On 64-bit system it would have gone into infinite + loop if a single input buffer was over 4 GiB (unlikely). + +commit 31decdce041581e57c0d8a407d4795b114ef27ca +Author: Lasse Collin +Date: Tue Apr 14 11:48:46 2009 +0300 + + Minor fixes to test files' README. + +commit 4787d654434891c7df5b43959b0d2873718f06e0 +Author: Lasse Collin +Date: Mon Apr 13 16:36:41 2009 +0300 + + Updated history.txt. + +commit 2f0bc9cd40f709152a0177c8e585c0757e9af9c9 +Author: Lasse Collin +Date: Mon Apr 13 14:49:48 2009 +0300 + + Quick & dirty update to support xz in diff/grep/more scripts. + +commit 02ddf09bc3079b3e17297729b9e43f14d407b8fc +Author: Lasse Collin +Date: Mon Apr 13 11:27:40 2009 +0300 + + Put the interesting parts of XZ Utils into the public domain. + Some minor documentation cleanups were made at the same time. + +commit e79c42d854657ae7f75613bd80c1a35ff7c525cb +Author: Lasse Collin +Date: Fri Apr 10 11:17:02 2009 +0300 + + Fix off-by-one in LZ decoder. + + Fortunately, this bug had no security risk other than accepting + some corrupt files as valid. + +commit 94eb9ad46f1fded6d8369cf3d38bb9754c1375af +Author: Pavel Roskin +Date: Tue Mar 31 12:15:01 2009 -0400 + + Fix minor typos in README + +commit 9bab5336ebd765ec4e12252f416eefdf04eba750 +Author: Lasse Collin +Date: Tue Mar 31 21:52:51 2009 +0300 + + Add a note and work-around instructions to README about + problems detecting a C99 compiler when some standard + headers are missing. + +commit a0497ff7a06f9350349264fe9b52dfefc6d53ead +Author: Lasse Collin +Date: Wed Mar 18 16:54:38 2009 +0200 + + Updated THANKS. + +commit 390e69887fc5e0a108eb41203bed9acd100a3d76 +Author: Lasse Collin +Date: Wed Mar 18 16:51:41 2009 +0200 + + Fix wrong macro names in lc_cpucores.m4 and cpucores.h. + Thanks to Bert Wesarg. + +commit 0df9299e2478c2a0c62c05b1ae14a85a353e20d6 +Author: Lasse Collin +Date: Sun Mar 1 09:03:08 2009 +0200 + + Test for Linux-specific sysinfo() only on Linux systems. + Some other systems have sysinfo() with different semantics. + +commit cf751edfde3ad6e088dc18e0522d31ae38405933 +Author: Lasse Collin +Date: Sun Mar 1 09:00:06 2009 +0200 + + Added AC_CONFIG_MACRO_DIR to configure.ac. + +commit 63df14c57dee7c461717784287056688482a7eb9 +Author: Lasse Collin +Date: Sun Mar 1 08:58:41 2009 +0200 + + Fix the Autoconf test for getopt_long replacement. + It was broken by e114502b2bc371e4a45449832cb69be036360722. + +commit fd6a380f4eda4f00be5f2aa8d222992cd74a714f +Author: Lasse Collin +Date: Sun Feb 22 19:07:54 2009 +0200 + + Add a rough explanation of --extreme to output of --help. + +commit 68bf7ac2984d3627369a240ef0491934d53f7899 +Author: Lasse Collin +Date: Sun Feb 22 18:52:49 2009 +0200 + + Fixes to progress message handling in xz: + + - Don't use Windows-specific code on Windows. The old code + required at least Windows 2000. Now it should work on + Windows 98 and later, and maybe on Windows 95 too. + + - Use less precision when showing estimated remaining time. + + - Fix some small design issues. + +commit 47c2e21f82242f50f18713a27d644c2c94ab3fea +Author: Lasse Collin +Date: Wed Feb 18 13:00:10 2009 +0200 + + Added files missing from the previous commit. + +commit 489a3dbaa0465f04400804e956a1cfbbee3654a2 +Author: Lasse Collin +Date: Tue Feb 17 10:43:00 2009 +0200 + + Added lzma_easy_buffer_encode(). Splitted easy.c into small + pieces to avoid unneeded dependencies making statically + linked applications bigger than needed. + +commit 7494816ab08d82f4d6409788825930c4e43cfd0d +Author: Lasse Collin +Date: Sun Feb 15 15:48:45 2009 +0200 + + Make physmem.h work on old Windows versions. + Thanks to Hongbo Ni for the original patch. + +commit 11ae4ae35fd70182c713f2d914b7cb1143bc76f0 +Author: Lasse Collin +Date: Sat Feb 14 20:44:52 2009 +0200 + + Fix microsecond vs. nanosecond confusion in my_time(). + +commit 3084d662d2646ab7eb58daf0dc32cf3f9a74eec7 +Author: Lasse Collin +Date: Sat Feb 14 00:45:29 2009 +0200 + + Cleanups to the code that detects the amount of RAM and + the number of CPU cores. Added support for using sysinfo() + on Linux systems whose libc lacks appropriate sysconf() + support (at least dietlibc). The Autoconf macros were + split into separate files, and CPU core count detection + was moved from hardware.c to cpucores.h. The core count + isn't used for anything real for now, so a problematic + part in process.c was commented out. + +commit 9c62371eab2706c46b1072f5935e28cb4cd9dca8 +Author: Lasse Collin +Date: Fri Feb 13 18:23:50 2009 +0200 + + Initial port to DOS using DJGPP. + +commit 0dae8b7751d09e9c5a482d5519daaee4800ce203 +Author: Lasse Collin +Date: Fri Feb 13 18:02:05 2009 +0200 + + Windows port: Take advantage of the version number macros. + Now the version number is not duplicated in the + Windows-specific files anymore. + +commit fdbc0cfa71f7d660855098a609175ba384259529 +Author: Lasse Collin +Date: Fri Feb 13 18:00:03 2009 +0200 + + Changed how the version number is specified in various places. + Now configure.ac will get the version number directly from + src/liblzma/api/lzma/version.h. The intent is to reduce the + number of places where the version number is duplicated. In + future, support for displaying Git commit ID may be added too. + +commit 1d924e584b146136989f48c13fff2632896efb3d +Author: Lasse Collin +Date: Fri Feb 13 17:30:30 2009 +0200 + + Fix handling of integrity check type in the xz command line tool. + +commit 96c46df7deb231ea68a03d8d1da9de4c774e36d8 +Author: Lasse Collin +Date: Fri Feb 13 17:29:02 2009 +0200 + + Improve support for DOS-like systems. + Here DOS-like means DOS, Windows, and OS/2. + +commit b6a30ee8c2de60ecd722cd05223e4ba72f822e33 +Author: Lasse Collin +Date: Wed Feb 11 20:02:32 2009 +0200 + + Remove dead directories from .gitignore. + +commit 1ec5b0027911d94cb6f98892cbc690f818d8a861 +Author: Jim Meyering +Date: Wed Feb 11 14:45:14 2009 +0100 + + .gitignore vs. Makefiles + + How about this for those of us who do srcdir builds? + +commit 154f5aec2de201c674841de4fcc9804c2a87af07 +Author: Lasse Collin +Date: Tue Feb 10 21:48:35 2009 +0200 + + Removed Makefile from .gitignore since not all Makefiles + in the repository are generated by Autotools. People + should do test builds in a separate build directory anyway. + +commit e605c2663691b0a4c307786aa368d124ea081daa +Author: Lasse Collin +Date: Tue Feb 10 21:48:05 2009 +0200 + + Added resource files for the Windows build. + +commit a3bbbe05d32b1f7ea9eb98805df4dda2e811b476 +Author: Lasse Collin +Date: Mon Feb 9 14:54:31 2009 +0200 + + Let the user specify custom CFLAGS on the make command + line. Previously custom CFLAGS worked only when they were + passed to configure. + +commit 53f7598998b1860a69c51243b5d2e34623c6bf60 +Author: Lasse Collin +Date: Sun Feb 8 21:35:11 2009 +0200 + + Fix aliasing issue in physmem.h. + +commit 0e27028d74c5c7a8e036ae2a9b8cecb0ac79d3a6 +Author: Lasse Collin +Date: Sun Feb 8 18:24:50 2009 +0200 + + Add a separate internal function to initialize the CRC32 + table, which is used also by LZ encoder. This was needed + because calling lzma_crc32() and ignoring the result is + a no-op due to lzma_attr_pure. + +commit ae1ad9af54210c9a2be336b1316532da5071516c +Author: Lasse Collin +Date: Sun Feb 8 18:17:05 2009 +0200 + + Make "xz --force" to write to terminal as the error + message suggests. + +commit 79e25eded48d2fe33f31441ab7a034f902e335f8 +Author: Lasse Collin +Date: Sun Feb 8 10:37:50 2009 +0200 + + Support both slash and backslash as path component + separator on Windows when parsing argv[0]. + +commit bc7c7109cc4410055a888c1c70cbd1c9445c4361 +Author: Lasse Collin +Date: Sat Feb 7 23:18:13 2009 +0200 + + Omit the wrong and (even if corrected) nowadays useless rm + from autogen.sh. + +commit edfc2031e56f8a2ccda063f02936b3a848d88723 +Author: Lasse Collin +Date: Sat Feb 7 21:41:52 2009 +0200 + + Updated THANKS. + +commit 880c3309386aac58fc4f3d7ca99bd31bcb1526a3 +Author: Lasse Collin +Date: Sat Feb 7 21:17:07 2009 +0200 + + Make it easy to choose if command line tools should be + linked statically or dynamically against liblzma. The + default is still to use static liblzma, but it can now + be changed by passing --enable-dynamic to configure. + Thanks to Mike Frysinger for the original patch. + + Fixed a few minor bugs in configure.ac. + +commit 3f86532407e4ace3debb62be16035e009b56ca36 +Author: Mike Frysinger +Date: Fri Feb 6 23:38:39 2009 -0500 + + add gitignore files + + Signed-off-by: Mike Frysinger + +commit bd7ca1dad5c146b6217799ffaa230c32d207a3e5 +Author: Lasse Collin +Date: Sat Feb 7 17:07:52 2009 +0200 + + Assume 32 MiB of RAM on unsupported operating systems like + the comment in hardware.c already said. + +commit d0ab8c1c73ae712adb0d26fbb9da762d99a63618 +Author: Lasse Collin +Date: Sat Feb 7 16:26:58 2009 +0200 + + MinGW support: Don't build fastpos_tablegen.c as part of + liblzma. Build both static and dynamic liblzma, and also + static and dynamic versions of the command line tools. + +commit bfd91198e44a52bd9bfe3cd6dcae5edab7c6eb45 +Author: Lasse Collin +Date: Sat Feb 7 15:55:47 2009 +0200 + + Support LZMA_API_STATIC in assembler files to + avoid __declspec(dllexport) equivalent. + +commit 3306cf3883492720b3c34baa02f4eb4227d91c73 +Author: Lasse Collin +Date: Sat Feb 7 11:11:50 2009 +0200 + + Introduced LZMA_API_STATIC macro, which the applications + need to #define when linking against static liblzma on + platforms like Windows. Most developers don't need to + care about LZMA_API_STATIC at all. + +commit b719e63c5f4c91d2d5e2ea585d4c055ec3767d0b +Author: Lasse Collin +Date: Fri Feb 6 16:55:45 2009 +0200 + + Another grammar fix + +commit fe5434f940f75fec3611cf9d9edf78c4da8ac760 +Author: Lasse Collin +Date: Fri Feb 6 12:30:23 2009 +0200 + + Grammar fix in README. + +commit 3dfa58a9eedf5a0e566452b078801c9cbcf7a245 +Author: Lasse Collin +Date: Fri Feb 6 10:06:32 2009 +0200 + + Some MSYS installations (e.g. MsysGit) don't include + install.exe, so don't rely on it. + +commit 975d8fd72a5148d46b2e1745f7a211cf1dfd9d31 +Author: Lasse Collin +Date: Fri Feb 6 09:13:15 2009 +0200 + + Recreated the BCJ test files for x86 and SPARC. The old files + were linked with crt*.o, which are copyrighted, and thus the + old test files were not in the public domain as a whole. They + are freely distributable though, but it is better to be careful + and avoid including any copyrighted pieces in the test files. + The new files are just compiled and assembled object files, + and thus don't contain any copyrighted code. + +commit 094b1b09a531f0d201ec81f2b07346a995fd80b9 +Author: Lasse Collin +Date: Thu Feb 5 21:21:27 2009 +0200 + + Add the "windows" directory to EXTRA_DIST. + +commit e1c3412eec7acec7ca3b32c9c828f3147dc65b49 +Author: Lasse Collin +Date: Thu Feb 5 09:17:51 2009 +0200 + + Added initial experimental makefile for use with MinGW. + +commit 75905a9afc0ee89954ede7d08af70d1148bf0fd9 +Author: Lasse Collin +Date: Thu Feb 5 09:12:57 2009 +0200 + + Various code cleanups the the xz command line tool. + It now builds with MinGW. + +commit d0c0b9e94e0af59d1d8f7f4829695d6efe19ccfe +Author: Lasse Collin +Date: Tue Feb 3 12:15:17 2009 +0200 + + Another utime() fix. + +commit ccf92a29e8c7234284f1568c1ec0fd7cb98356ca +Author: Lasse Collin +Date: Tue Feb 3 10:41:11 2009 +0200 + + Fix wrong filename argument for utime() and utimes(). + This doesn't affect most systems, since most systems + have better functions available. + +commit 99c1c2abfae2e87f3c17e929783e6d1bb7a3f302 +Author: Lasse Collin +Date: Mon Feb 2 21:19:01 2009 +0200 + + Updated the x86 assembler code: + - Use call/ret pair to get instruction pointer for PIC. + - Use PIC only if PIC or __PIC__ is #defined. + - The code should work on MinGW and Darwin in addition + to GNU/Linux and Solaris. + +commit 22a0c6dd940b78cdac2f4a4b4b0e7cc0ac15021f +Author: Lasse Collin +Date: Mon Feb 2 20:14:03 2009 +0200 + + Modify LZMA_API macro so that it works on Windows with + other compilers than MinGW. This may hurt readability + of the API headers slightly, but I don't know any + better way to do this. + +commit 8dd7b6052e18621e2e6c62f40f762ee88bd3eb65 +Author: Lasse Collin +Date: Sun Feb 1 22:40:35 2009 +0200 + + Fix a bug in lzma_block_buffer_decode(), although this + function should be rewritten anyway. + +commit 55fd41431e61fb8178858283d636b6781e33e847 +Author: Lasse Collin +Date: Sun Feb 1 22:39:07 2009 +0200 + + Added initial version of raw buffer-to-buffer coding + functions, and cleaned up filter.h API header a little. + May be very buggy, not tested yet. + +commit 3e54ecee5cad30a5ca361a88a99230407abc0699 +Author: Lasse Collin +Date: Sun Feb 1 00:11:20 2009 +0200 + + Fix missing newlines in xzdec.c. + +commit d64ca34f1b6f34e86adefc7f735b4eff8e6d4a35 +Author: Lasse Collin +Date: Sun Feb 1 00:10:07 2009 +0200 + + Use __cdecl also for function pointers in liblzma API when + on Windows. + +commit 6a2eb54092fc625d59921a607ff68cd1a90aa898 +Author: Lasse Collin +Date: Sat Jan 31 11:01:48 2009 +0200 + + Add LZMA_API to liblzma API headers. It's useful at least + on Windows. sysdefs.h no longer #includes lzma.h, so lzma.h + has to be #included separately where needed. + +commit d9993fcb4dfc1f93abaf31ae23b3ef1f3123892b +Author: Lasse Collin +Date: Sat Jan 31 10:13:09 2009 +0200 + + Use _WIN32 instead of WIN32 in xzdec.c to test if compiling on Windows. + +commit 2dbdc5befb33c3703e4609809101047c67caf343 +Author: Lasse Collin +Date: Sat Jan 31 10:02:52 2009 +0200 + + Fix two lines in lzma.h on which the # wasn't at the + beginning of the line. + +commit 4ab760109106dc04f39dd81c97d50f528d1b51c1 +Author: Lasse Collin +Date: Sat Jan 31 09:55:05 2009 +0200 + + Add support for using liblzma headers in MSVC, which has no + stdint.h or inttypes.h. + +commit b2172cf823d3be34cb0246cb4cb32d105e2a34c9 +Author: Lasse Collin +Date: Sat Jan 31 08:49:54 2009 +0200 + + Fix # -> ## in a macro in lzma.h. + +commit 1aae8698746d3c87a93f8398cdde2de9ba1f7208 +Author: Lasse Collin +Date: Fri Jan 30 18:50:16 2009 +0200 + + Updated README. + +commit f54bcf6f80d585236bc03ce49f7c73e1abaa17eb +Author: Lasse Collin +Date: Fri Jan 30 00:29:58 2009 +0200 + + Remove dangling crc64_init.c. + +commit 982da7ed314398420c38bf154a8f759d5f18b480 +Author: Lasse Collin +Date: Wed Jan 28 17:16:38 2009 +0200 + + The .xz file format specification version 1.0.0 is now + officially released. The format has been technically the same + since 2008-11-19, but now that it is frozen, people can start + using it without a fear that the format will break. + +commit c4683a660b4372156bdaf92f0cdc54a58f95ee6f +Author: Lasse Collin +Date: Wed Jan 28 08:45:59 2009 +0200 + + Updated THANKS. + +commit 3241317093595db9f79104faafe93cb989c9f858 +Author: Lasse Collin +Date: Wed Jan 28 08:43:26 2009 +0200 + + Fix uninitialized variables in alone_decoder.c. This bug was + triggered by the previous commit, since these variables were + not used by anything before support for a preset dictionary. + +commit f76e39cf930f888d460b443d18f977ebedea8b2a +Author: Lasse Collin +Date: Tue Jan 27 18:36:05 2009 +0200 + + Added initial support for preset dictionary for raw LZMA1 + and LZMA2. It is not supported by the .xz format or the xz + command line tool yet. + +commit 449b8c832b26c3633f3bec60095e57d2d3ada1f3 +Author: Lasse Collin +Date: Mon Jan 26 20:09:17 2009 +0200 + + Regenerate the CRC tables without trailing blanks. + +commit 850f7400428dc9c5fd08a2f35a5bd2c9e45aede2 +Author: Jim Meyering +Date: Mon Jan 19 21:37:16 2009 +0100 + + remove trailing blanks from all but .xz files + +commit 667481f1aad34e1ed15738e7913a9c7e256b4cf5 +Author: Lasse Collin +Date: Mon Jan 26 14:34:10 2009 +0200 + + Add lzma_block_buffer_decode(). + +commit 5fb34d8324d3e7e0061df25d0086b64c8726b19d +Author: Lasse Collin +Date: Mon Jan 26 14:33:28 2009 +0200 + + Add more sanity checks to lzma_stream_buffer_decode(). + +commit c129748675a5daa8838df92bde32cc04f6ce61ba +Author: Lasse Collin +Date: Mon Jan 26 14:33:13 2009 +0200 + + Avoid hardcoded constant in easy.c. + +commit 1859d22d75e072463db74c25bc3f5a7992e5fdf6 +Author: Lasse Collin +Date: Mon Jan 26 13:06:49 2009 +0200 + + Tiny bit better sanity check in block_util.c + +commit 2c5fe958e4bbe9b147b10c255955dfe2827fb8e7 +Author: Lasse Collin +Date: Sun Jan 25 01:35:56 2009 +0200 + + Fix a dumb bug in Block decoder, which made it return + LZMA_DATA_ERROR with valid data. The bug was added in + e114502b2bc371e4a45449832cb69be036360722. + +commit c81f13ff29271de7293f8af3d81848b1dcae3d19 +Author: Lasse Collin +Date: Fri Jan 23 22:27:50 2009 +0200 + + Added lzma_stream_buffer_decode() and made minor cleanups. + +commit 0b3318661ce749550b8531dfd469639a08930391 +Author: Lasse Collin +Date: Thu Jan 22 12:53:33 2009 +0200 + + Fix a comment. + +commit 9ec80355a7212a0a2f8c89d98e51b1d8b4e34eec +Author: Lasse Collin +Date: Tue Jan 20 16:37:27 2009 +0200 + + Add some single-call buffer-to-buffer coding functions. + +commit d8b58d099340f8f4007b24b211ee41a7210c061c +Author: Lasse Collin +Date: Tue Jan 20 13:45:41 2009 +0200 + + Block encoder cleanups + +commit 0c09810cb3635cb575cb54e694d41523e7d0a335 +Author: Lasse Collin +Date: Tue Jan 20 10:35:15 2009 +0200 + + Use LZMA_PROG_ERROR in lzma_code() as documented in base.h. + +commit 2f1a8e8eb898f6c036cde55d153ad348bfab3c00 +Author: Lasse Collin +Date: Mon Jan 19 22:53:18 2009 +0200 + + Fix handling of non-fatal errors in lzma_code(). + +commit 4810b6bc25087be872960b9dd1d11ff07735dc88 +Author: Lasse Collin +Date: Mon Jan 19 14:00:33 2009 +0200 + + Move some LZMA2 constants to lzma2_encoder.h so that they + can be used outside lzma2_encoder.c. + +commit 00be5d2e09f9c7a6a8563465ad8b8042866817a4 +Author: Lasse Collin +Date: Mon Jan 19 13:52:36 2009 +0200 + + Remove dead code. + +commit 128586213f77c9bd82b7e9a62927f6d0c3769d85 +Author: Lasse Collin +Date: Sat Jan 17 14:24:25 2009 +0200 + + Beta was supposed to be API stable but I had forgot to rename + lzma_memlimit_encoder and lzma_memlimit_decoder to + lzma_raw_encoder_memlimit and lzma_raw_decoder_memlimit. :-( + Now it is fixed. Hopefully it doesn't cause too much trouble + to those who already thought API is stable. + +commit b056379490be5c584c264a967f0540041a163a1e +Author: Lasse Collin +Date: Thu Jan 15 14:29:22 2009 +0200 + + Updated THANKS. + +commit dc8f3be06d54ef6e6cfb5134dd3d25edd08cef89 +Author: Lasse Collin +Date: Thu Jan 15 14:27:32 2009 +0200 + + Fixed a bug in 7z2lzma.bash to make it work with .7z files + that use something else than 2^n as the dictionary size. + Thanks to Dan Shechter for the bug report. + +commit 8286a60b8f4bd5accfbc9d229d2204bac31994f2 +Author: Lasse Collin +Date: Wed Jan 7 18:41:15 2009 +0200 + + Use pthread_sigmask() instead of sigprocmask() when pthreads + are enabled. + +commit 4fd43cb3a906f6da2943f69239ee984c4787c9a9 +Author: Lasse Collin +Date: Wed Dec 31 20:01:00 2008 +0200 + + Bumped version to 4.999.8beta right after the release + of 4.999.7beta. + +commit 061748f5932719643cda73383db715167d543c22 +Author: Lasse Collin +Date: Wed Dec 31 18:59:02 2008 +0200 + + Disable Subblock filter from test_compress.sh since it is + disabled by default in configure.ac. + +commit 9c45658ddc8bd4a7819ef8547d3e7ccf73203e78 +Author: Lasse Collin +Date: Wed Dec 31 17:44:20 2008 +0200 + + Disable both Subblock encoder and decoder my default, + since they are not finished and may have security issues too. + +commit b59f1e98f50694cf6a8f1b342fd878feebdb2f88 +Author: Lasse Collin +Date: Wed Dec 31 17:42:50 2008 +0200 + + Update some files in debug directory. + +commit d1d17a40d33a9682424ca37282813492f2cba6d0 +Author: Lasse Collin +Date: Wed Dec 31 17:41:46 2008 +0200 + + Prepare for 4.999.7beta release. + +commit 88d3e6b0b18e24142b6d3b41dc1b84b00c49fef3 +Author: Lasse Collin +Date: Wed Dec 31 17:15:03 2008 +0200 + + Cleaned up some comments in the API headers. + +commit 322ecf93c961e45a1da8c4a794a7fdacefcd7f40 +Author: Lasse Collin +Date: Wed Dec 31 16:29:39 2008 +0200 + + Renamed lzma_options_simple to lzma_options_bcj in the API. + The internal implementation is still using the name "simple". + It may need some cleanups, so I look at it later. + +commit 7eea8bec3abfed883efba66264a1452a1c04f6b0 +Author: Lasse Collin +Date: Wed Dec 31 00:57:27 2008 +0200 + + Fixed missing quoting in configure.ac. + +commit 28e75f7086dbe9501d926c370375c69dfb1236ce +Author: Lasse Collin +Date: Wed Dec 31 00:48:23 2008 +0200 + + Updated src/liblzma/Makefile.am to use liblzma.pc.in, which + should have been in the previous commit. + +commit 7ed9d943b31d3ee9c5fb2387e84a241ba33afe90 +Author: Lasse Collin +Date: Wed Dec 31 00:30:49 2008 +0200 + + Remove lzma_init() and other init functions from liblzma API. + Half of developers were already forgetting to use these + functions, which could have caused total breakage in some future + liblzma version or even now if --enable-small was used. Now + liblzma uses pthread_once() to do the initializations unless + it has been built with --disable-threads which make these + initializations thread-unsafe. + + When --enable-small isn't used, liblzma currently gets needlessly + linked against libpthread (on systems that have it). While it is + stupid for now, liblzma will need threads in future anyway, so + this stupidity will be temporary only. + + When --enable-small is used, different code CRC32 and CRC64 is + now used than without --enable-small. This made the resulting + binary slightly smaller, but the main reason was to clean it up + and to handle the lack of lzma_init_check(). + + The pkg-config file lzma.pc was renamed to liblzma.pc. I'm not + sure if it works correctly and portably for static linking + (Libs.private includes -pthread or other operating system + specific flags). Hopefully someone complains if it is bad. + + lzma_rc_prices[] is now included as a precomputed array even + with --enable-small. It's just 128 bytes now that it uses uint8_t + instead of uint32_t. Smaller array seemed to be at least as fast + as the more bloated uint32_t array on x86; hopefully it's not bad + on other architectures. + +commit 5cda29b5665004fc0f21d0c41d78022a6a559ab2 +Author: Lasse Collin +Date: Sat Dec 27 19:40:31 2008 +0200 + + Use 28 MiB as memory usage limit for encoding in test_compress.sh. + +commit 050eb14d29e2537c014662e83599fd8a77f13c45 +Author: Lasse Collin +Date: Sat Dec 27 19:32:20 2008 +0200 + + Revert a change made in 3b34851de1eaf358cf9268922fa0eeed8278d680 + that was related to LZMA_MODE_FAST. The original code is slightly + faster although it compresses slightly worse. But since it is fast + mode, it is better to select the faster version. + +commit 4820f10d0f173864f6a2ea7479663b509ac53358 +Author: Lasse Collin +Date: Sat Dec 27 19:30:19 2008 +0200 + + Some xz command line tool improvements. + +commit e33194e79d8f5ce07cb4aca909b324ae75098f7e +Author: Lasse Collin +Date: Sat Dec 27 19:27:49 2008 +0200 + + Bunch of liblzma tweaks, including some API changes. + The API and ABI should now be very close to stable, + although the code behind it isn't yet. + +commit 4d00652e75dd2736aedc3a3a8baff3dd0ea38074 +Author: Lasse Collin +Date: Thu Dec 18 13:42:52 2008 +0200 + + Updated Makefile.am that was missing from the previous commit. + +commit 634636fa56ccee6e744f78b0abed76c8940f2f8f +Author: Lasse Collin +Date: Wed Dec 17 21:49:53 2008 +0200 + + Remove the alignment functions for now. Maybe they will + be added back in some form later, but the current version + wasn't modular, so it would need fixing anyway. + +commit 4fed98417d1687f5eccccb42a133fde3ec81216a +Author: Lasse Collin +Date: Wed Dec 17 20:11:23 2008 +0200 + + xz message handling improvements + +commit 653e457e3756ef35e5d1b2be3523b3e4b1e9ee4d +Author: Lasse Collin +Date: Mon Dec 15 23:26:43 2008 +0200 + + Fix a dumb bug in .lzma decoder which was introduced in + the previous commit. (Probably the previous commit has + other bugs too, it wasn't tested.) + +commit 671a5adf1e844bfdd6fd327016c3c28694493158 +Author: Lasse Collin +Date: Mon Dec 15 19:39:13 2008 +0200 + + Bunch of liblzma API cleanups and fixes. + +commit 17781c2c20fd77029cb32e77792889f2f211d69d +Author: Lasse Collin +Date: Mon Dec 15 14:26:52 2008 +0200 + + The LZMA2 decoder fix introduced a bug to LZ decoder, + which made LZ decoder return too early after dictionary + reset. This fixes it. + +commit f9f2d1e74398500724041f7fb3c38db35ad8c8d8 +Author: Lasse Collin +Date: Mon Dec 15 11:20:22 2008 +0200 + + Added two new test files. + +commit ff7fb2c605bccc411069e07b9f11fb957aea2ddf +Author: Lasse Collin +Date: Mon Dec 15 10:01:59 2008 +0200 + + Fix data corruption in LZMA2 decoder. + +commit 1ceebcf7e1bd30b95125f0ad67a09fdb6215d613 +Author: Lasse Collin +Date: Sat Dec 13 00:54:11 2008 +0200 + + Name the package "xz" in configure.ac. + +commit a94bf00d0af9b423851905b031be5a645a657820 +Author: Lasse Collin +Date: Fri Dec 12 22:43:21 2008 +0200 + + Some adjustments to GCC warning flags. The important change + is the removal of -pedantic. It messes up -Werror (which I + really want to keep so that I don't miss any warnings) with + printf format strings that are in POSIX but not in C99. + +commit 8582d392baacd2cdac07ca60041f8c661323676d +Author: Lasse Collin +Date: Wed Dec 10 01:31:00 2008 +0200 + + Remove obsolete comment. + +commit b1ae6dd731ea3636c3c2bfc7aefa71457d3328f1 +Author: Lasse Collin +Date: Wed Dec 10 01:27:15 2008 +0200 + + Use "decompression" consistently in --long-help. + +commit 1ea9e7f15afd5d3981e2432710e932320597bca9 +Author: Lasse Collin +Date: Wed Dec 10 01:23:58 2008 +0200 + + Added preset=NUM to --lzma1 and --lzma2. This makes it easy + to take a preset as a template and modify it a little. + +commit bceb3918dbb21f34976bfdd4c171a81319de71f7 +Author: Lasse Collin +Date: Tue Dec 9 17:43:31 2008 +0200 + + Put the file format specification into the public domain. + Same will be done to the actual code later. + +commit 6efa2d80d46a38861016f41f0eb6fa2ec9260fe6 +Author: Lasse Collin +Date: Tue Dec 9 17:41:49 2008 +0200 + + Make the memusage functions of LZMA1 and LZMA2 encoders + to validate the filter options. Add missing validation + to LZMA2 encoder when options are changed in the middle + of encoding. + +commit f20a03206b71ff01b827bb7a932411d6a6a4e06a +Author: Lasse Collin +Date: Tue Dec 9 10:36:24 2008 +0200 + + Updated THANKS. + +commit ef7890d56453dca1aeb2e12db29b7e418d93dde4 +Author: Lasse Collin +Date: Mon Dec 1 23:04:12 2008 +0200 + + In command line tool, take advantage of memusage calculation's + ability to also validate the filter chain and options (not + implemented yet for all filters). + +commit ccd57afa09e332d664d6d6a7498702791ea5f659 +Author: Lasse Collin +Date: Mon Dec 1 22:59:28 2008 +0200 + + Validate the filter chain before checking filter-specific + memory usage. + +commit c596fda40b62fe1683d0ac34d0c673dcaae2aa15 +Author: Lasse Collin +Date: Mon Dec 1 22:58:22 2008 +0200 + + Make the memusage functions of LZMA1 and LZMA2 decoders + to validate the filter options. + +commit c58f469be5bb9b0bdab825c6687445fd553f4f3a +Author: Lasse Collin +Date: Mon Dec 1 22:55:18 2008 +0200 + + Added the changes for Delta filter that should have been + part of 656ec87882ee74b192c4ea4a233a235eca7b04d4. + +commit cd708015202dbf7585b84a8781462a20c42a324b +Author: Lasse Collin +Date: Mon Dec 1 22:50:28 2008 +0200 + + LZMA2 decoder cleanups. Make it require new LZMA properties + also in the first LZMA chunk after a dictionary reset in + uncompressed chunk. + +commit 656ec87882ee74b192c4ea4a233a235eca7b04d4 +Author: Lasse Collin +Date: Mon Dec 1 16:30:11 2008 +0200 + + Added lzma_delta_coder_memusage() which also validates + the options. + +commit 691a9155b7a28882baf37e9d1e969e32e91dbc7a +Author: Lasse Collin +Date: Sat Nov 29 10:03:49 2008 +0200 + + Automake includes the m4 directory, so don't add it in + Makefile.am separately. + + Updated THANKS. + +commit c7007ddf06ac2b0e018d71d281c21b99f16e7ae0 +Author: Lasse Collin +Date: Fri Nov 28 12:00:48 2008 +0200 + + Tested using COLUMNS environment variable to avoid broken + progress indicator but since COLUMNS isn't usually available, + the code was left commented out. + +commit ae65dcfde27014e4d811e1a1308aa5d0fe8debbd +Author: Lasse Collin +Date: Thu Nov 27 19:28:59 2008 +0200 + + Cleanups to message.c. + +commit a8368b75cdcd5427299001cc42839287f27b244d +Author: Lasse Collin +Date: Tue Nov 25 02:37:47 2008 +0200 + + Remove the nowadays unneeded memory limitting malloc() wrapper. + +commit 69472ee5f055a2bb6f28106f0923e1461fd1d080 +Author: Lasse Collin +Date: Sun Nov 23 15:09:03 2008 +0200 + + VLI encoder and decoder cleanups. Made encoder return + LZMA_PROG_ERROR in single-call mode if there's no output + space. + +commit 4249c8c15a08f55b51b7012e6aaafce3aa9eb650 +Author: Lasse Collin +Date: Sat Nov 22 17:44:33 2008 +0200 + + Typo fix + +commit 6d1d6f4598d121253dbe1084c6866b66e95c361b +Author: Lasse Collin +Date: Thu Nov 20 22:59:10 2008 +0200 + + Support NetBSD's errno for O_NOFOLLOW. + +commit f901a290eef67b8ea4720ccdf5f46edf775ed9d7 +Author: Lasse Collin +Date: Thu Nov 20 18:05:52 2008 +0200 + + Build xzdec and lzmadec from xzdec.c. xzdec supports only .xz + files and lzmadec only .lzma files. + +commit 86a0ed8f01c8ed44721223f885e679c71b7bb94c +Author: Lasse Collin +Date: Thu Nov 20 11:01:29 2008 +0200 + + Minor cleanups to xzdec. + +commit 54f716ba8905d09752dcd1519455a40bd21d5317 +Author: Lasse Collin +Date: Wed Nov 19 23:55:22 2008 +0200 + + Added missing check for uint16_t. + +commit 1880a3927b23f265f63b2adb86fbdb81ea09eb06 +Author: Lasse Collin +Date: Wed Nov 19 23:52:24 2008 +0200 + + Renamed lzma to xz and lzmadec to xzdec. We create symlinks + lzma, unlzma, and lzcat in "make install" for backwards + compatibility with LZMA Utils 4.32.x; I'm not sure if this + should be the default though. + +commit e114502b2bc371e4a45449832cb69be036360722 +Author: Lasse Collin +Date: Wed Nov 19 20:46:52 2008 +0200 + + Oh well, big messy commit again. Some highlights: + - Updated to the latest, probably final file format version. + - Command line tool reworked to not use threads anymore. + Threading will probably go into liblzma anyway. + - Memory usage limit is now about 30 % for uncompression + and about 90 % for compression. + - Progress indicator with --verbose + - Simplified --help and full --long-help + - Upgraded to the last LGPLv2.1+ getopt_long from gnulib. + - Some bug fixes + +commit 3c3905b53462ae235c9438d86a4dc51086410932 +Author: Lasse Collin +Date: Thu Oct 9 11:12:29 2008 +0300 + + Fixed the test that should have been fixed as part + of 1e8e4fd1f3e50129b4541406ad765d2aa1233943. + +commit 0f295bf7a3ece01f667caae318cc3e3424085886 +Author: Lasse Collin +Date: Tue Oct 7 16:42:18 2008 +0300 + + Fixed some help messages. + +commit 1e8e4fd1f3e50129b4541406ad765d2aa1233943 +Author: Lasse Collin +Date: Tue Oct 7 09:40:31 2008 +0300 + + Made the preset numbering more logical in liblzma API. + +commit 5e4df4c3c09c82bbbb1a916784e3dc717ca4ff81 +Author: Lasse Collin +Date: Fri Oct 3 19:36:09 2008 +0300 + + Removed fi from po/LINGUAS. + +commit fcfb86c7770328cfffa2e83b176af9a1ba2d9128 +Author: Lasse Collin +Date: Fri Oct 3 07:06:48 2008 +0300 + + Fixed suffix handling with --format=raw. + +commit bd137524f2f50e30ba054f42f1f6536cd3cee920 +Author: Lasse Collin +Date: Thu Oct 2 22:51:46 2008 +0300 + + Initial changes to change the suffix of the new format to .xz. + This also fixes a bug related to --suffix option. Some issues + with suffixes with --format=raw were not fixed. + +commit 4c321a41c482821aa3c4d64cdf886a6ed904d844 +Author: Lasse Collin +Date: Tue Sep 30 17:43:55 2008 +0300 + + Renamed the test files from .lzma suffix to .xz suffix. + +commit 8e60c889a2816a63013a35c99ce26bf28f5b78eb +Author: Lasse Collin +Date: Tue Sep 30 13:57:44 2008 +0300 + + Fixed Stream decoder to actually use the first_stream variable. + +commit 3bdbc12c054d1961133ee19802af7dd3c3494543 +Author: Lasse Collin +Date: Tue Sep 30 13:56:57 2008 +0300 + + Added one more test file. + +commit a6639022fdc536e5659b070a465221b4cf7c51fa +Author: Lasse Collin +Date: Tue Sep 30 13:34:07 2008 +0300 + + Fixed uninitialized variable in Stream decoder. + +commit ed3709000a3f17ecefab29b2235d7e2221b00003 +Author: Lasse Collin +Date: Tue Sep 30 13:27:28 2008 +0300 + + Added two test files. + +commit ea560b0ea80525752bdcd0074d24f8dc170bbe29 +Author: Lasse Collin +Date: Sat Sep 27 23:49:24 2008 +0300 + + Fix conflicting Subblock helper filter's ID. + +commit ad97483b6e55142fd8d5c041db057017a891cd95 +Author: Lasse Collin +Date: Sat Sep 27 23:37:13 2008 +0300 + + Changed magic bytes to match the updated spec. Filename + suffix wasn't changed yet. + +commit 7a57069167e9e63394e2b095ee3a63253fcb51c7 +Author: Lasse Collin +Date: Sat Sep 27 23:16:09 2008 +0300 + + Remove po/fi.po since I'm not keeping it updated for now. + +commit 018ae09df8f2fee5a7374f307df4cb42fad0b81e +Author: Lasse Collin +Date: Sat Sep 27 23:13:54 2008 +0300 + + Fix also test_compress.sh. + +commit 3a62a5fb85d2eebd8666e64ed5d364d095062858 +Author: Lasse Collin +Date: Sat Sep 27 23:01:15 2008 +0300 + + Fixed compilation of test_filter_flags.c, which was broken by + 1dcecfb09b55157b8653d747963069c8bed74f04. + +commit c6ca26eef7cd07eba449035514e2b8f9ac3111c0 +Author: Lasse Collin +Date: Sat Sep 27 19:11:02 2008 +0300 + + Updated file format specification. It changes the suffix + of the new format to .xz and removes the recently added + LZMA filter. + +commit 1dcecfb09b55157b8653d747963069c8bed74f04 +Author: Lasse Collin +Date: Sat Sep 27 19:09:21 2008 +0300 + + Some API changes, bug fixes, cleanups etc. + +commit 5cc5064cae603b649c64c40125c7dd365de54c9d +Author: Lasse Collin +Date: Sat Sep 27 11:28:49 2008 +0300 + + Added 7z2lzma.bash. + +commit f147666a5cd15542d4e427da58629f4a71cc38e1 +Author: Lasse Collin +Date: Wed Sep 17 22:11:39 2008 +0300 + + Miscellaneous LZ and LZMA encoder cleanups + +commit 13d68b069849e19c33822cd8996cd6447890abb1 +Author: Lasse Collin +Date: Sat Sep 13 13:54:00 2008 +0300 + + LZ decoder cleanup + +commit 13a74b78e37f16c9096ba5fe1859cc04eaa2f9f7 +Author: Lasse Collin +Date: Sat Sep 13 12:10:43 2008 +0300 + + Renamed constants: + - LZMA_VLI_VALUE_MAX -> LZMA_VLI_MAX + - LZMA_VLI_VALUE_UNKNOWN -> LZMA_VLI_UNKNOWN + - LZMA_HEADER_ERRRO -> LZMA_OPTIONS_ERROR + +commit 320601b2c7b08fc7da9da18d5bf7c3c1a189b080 +Author: Lasse Collin +Date: Fri Sep 12 22:41:40 2008 +0300 + + Improved the Stream Flags handling API. + +commit ec490da5228263b25bf786bb23d1008468f55b30 +Author: Lasse Collin +Date: Thu Sep 11 23:10:44 2008 +0300 + + Simplified debug/known_sizes.c to match the relaxed + requirements of Block encoder. + +commit 16e8b98f2659347edfa74afdbbb9e73311153cb9 +Author: Lasse Collin +Date: Thu Sep 11 23:09:24 2008 +0300 + + Remove a check from Block encoder that should have already + been removed in 2ba01bfa755e47ff6af84a978e3c8d63d7d2775e. + +commit 5a710c3805bdf6d7e3c92e954e4e4565b27bcb13 +Author: Lasse Collin +Date: Thu Sep 11 20:02:38 2008 +0300 + + Remove bogus #includes. + +commit 01892b2ca5f69bed0ea746e04b604030d57806bb +Author: Lasse Collin +Date: Thu Sep 11 10:49:14 2008 +0300 + + Updated THANKS. + +commit 962f2231d49409fe6852e44ffe8c5dbabb04bc7d +Author: Lasse Collin +Date: Thu Sep 11 10:48:12 2008 +0300 + + Fix a compiler error on big endian systems that don't + support unaligned memory access. + +commit fa3ab0df8ae7a8a1ad55b52266dc0fd387458671 +Author: Lasse Collin +Date: Thu Sep 11 10:46:14 2008 +0300 + + Silence a compiler warning. + +commit 9373e81e18822db4972819442ea4c2cb9955470b +Author: Lasse Collin +Date: Wed Sep 10 19:16:32 2008 +0300 + + Bumped version to 4.999.6alpha. + +commit cb072b7c8442ba68bb0c62c0abbbe939794887a3 +Author: Lasse Collin +Date: Wed Sep 10 17:02:00 2008 +0300 + + Check for LZMA_FILTER_RESERVED_START in filter_flags_encoder.c. + Use LZMA_PROG_ERROR instead of LZMA_HEADER_ERROR if the Filter ID + is in the reserved range. This allows Block Header encoder to + detect unallowed Filter IDs, which is good for Stream encoder. + +commit 123ab0acec435c9e9866a99e30482116cfbd9ba5 +Author: Lasse Collin +Date: Wed Sep 10 16:44:32 2008 +0300 + + Filter handling cleanups + +commit 9cfcd0c4f2f865d8fbbb46ea28344a9be0dd8ad1 +Author: Lasse Collin +Date: Wed Sep 10 00:33:00 2008 +0300 + + Comments + +commit 2ba01bfa755e47ff6af84a978e3c8d63d7d2775e +Author: Lasse Collin +Date: Wed Sep 10 00:27:02 2008 +0300 + + Cleaned up Block encoder and moved the no longer shared + code from block_private.h to block_decoder.c. Now the Block + encoder doesn't need compressed_size and uncompressed_size + from lzma_block structure to be initialized. + +commit 07efcb5a6bc5d7018798ebd728586f84183e7d64 +Author: Lasse Collin +Date: Sun Sep 7 10:23:13 2008 +0300 + + Changed Filter ID of LZMA to 0x20. + +commit 32fe5fa541e82c08e054086279079ae5016bd8d8 +Author: Lasse Collin +Date: Sat Sep 6 23:42:50 2008 +0300 + + Comments + +commit 0a31ed9d5e3cde4feb094b66f3a8b2c074605d84 +Author: Lasse Collin +Date: Sat Sep 6 15:14:30 2008 +0300 + + Some API cleanups + +commit da98df54400998be2a6c3876f9655a3c51b93c10 +Author: Lasse Collin +Date: Thu Sep 4 11:53:06 2008 +0300 + + Added support for raw encoding and decoding to the command + line tool, and made various cleanups. --lzma was renamed to + --lzma1 to prevent people from accidentally using LZMA when + they want LZMA2. + +commit 2496aee8a7741a8a0d42987db41ff2cf1a4bdabd +Author: Lasse Collin +Date: Thu Sep 4 10:39:15 2008 +0300 + + Don't allow LZMA_SYNC_FLUSH with decoders anymore. There's + simply nothing that would use it. Allow LZMA_FINISH to the + decoders, which will usually ignore it (auto decoder and + Stream decoder being exceptions). + +commit bea301c26d5d52675e11e0236faec0492af98f60 +Author: Lasse Collin +Date: Wed Sep 3 17:06:25 2008 +0300 + + Minor updates to the file format specification. + +commit 9c75b089b4a9e0edcf4cf7970a4383768707d6c8 +Author: Lasse Collin +Date: Tue Sep 2 19:33:32 2008 +0300 + + Command line tool fixes + +commit bab0590504b5aeff460ab4ca8c964dd7c1bad9e4 +Author: Lasse Collin +Date: Tue Sep 2 19:31:42 2008 +0300 + + Auto decoder cleanup + +commit 689602336d126a46b60d791a67decab65e1e81f5 +Author: Lasse Collin +Date: Tue Sep 2 19:12:12 2008 +0300 + + Updated auto decoder to handle LZMA_CONCATENATED when decoding + LZMA_Alone files. Decoding of concatenated LZMA_Alone files is + intentionally not supported, so it is better to put this in + auto decoder than LZMA_Alone decoder. + +commit 80c4158f19904026433eb6f5d5ca98a0ecd4f66c +Author: Lasse Collin +Date: Tue Sep 2 14:56:52 2008 +0300 + + Stream decoder cleanups + +commit fc681657450ce57be1fe08f7a15d31dcc705e514 +Author: Lasse Collin +Date: Tue Sep 2 11:45:39 2008 +0300 + + Some fixes to LZ encoder. + +commit ede675f9ac1ca82a7d7c290324adba672118bc8d +Author: Lasse Collin +Date: Sun Aug 31 11:47:01 2008 +0300 + + Fix wrong pointer calculation in LZMA encoder. + +commit 3b34851de1eaf358cf9268922fa0eeed8278d680 +Author: Lasse Collin +Date: Thu Aug 28 22:53:15 2008 +0300 + + Sort of garbage collection commit. :-| Many things are still + broken. API has changed a lot and it will still change a + little more here and there. The command line tool doesn't + have all the required changes to reflect the API changes, so + it's easy to get "internal error" or trigger assertions. + +commit 57b9a145a527f0716822615e5ed536d33aebd3fc +Author: Lasse Collin +Date: Fri Jun 20 17:16:32 2008 +0300 + + Fix test_filter_flags to match the new restriction of lc+lp. + +commit eaafc4367c77ec1d910e16d11b4da293969d97a3 +Author: Lasse Collin +Date: Fri Jun 20 16:19:54 2008 +0300 + + Remove some redundant code from LZMA encoder. + +commit 0809c46534fa5664fe35d9e98d95e87312ed130e +Author: Lasse Collin +Date: Thu Jun 19 16:35:08 2008 +0300 + + Add limit of lc + lp <= 4. Now we can allocate the + literal coder as part of the main LZMA encoder or + decoder structure. + + Make the LZMA decoder to rely on the current internal API + to free the allocated memory in case an error occurs. + +commit d25ab1b96178f06a0e724f58e3cd68300b2b1275 +Author: Lasse Collin +Date: Wed Jun 18 21:45:19 2008 +0300 + + Comments + +commit 6368a2fa5901c75864be5171dd57a50af7adbb41 +Author: Lasse Collin +Date: Wed Jun 18 19:19:02 2008 +0300 + + Delete old code that was supposed to be already deleted + from test_block_header.c. + +commit 7d17818cec8597f847b0a2537fde991bbc3d9e96 +Author: Lasse Collin +Date: Wed Jun 18 18:02:10 2008 +0300 + + Update the code to mostly match the new simpler file format + specification. Simplify things by removing most of the + support for known uncompressed size in most places. + There are some miscellaneous changes here and there too. + + The API of liblzma has got many changes and still some + more will be done soon. While most of the code has been + updated, some things are not fixed (the command line tool + will choke with invalid filter chain, if nothing else). + + Subblock filter is somewhat broken for now. It will be + updated once the encoded format of the Subblock filter + has been decided. + +commit bf6348d1a3ff09fdc06940468f318f75ffa6af11 +Author: Lasse Collin +Date: Tue Jun 17 15:03:46 2008 +0300 + + Update the file format specification draft. The new one is + a lot simpler than the previous versions, but it also means + that the existing code will change a lot. + +commit 803194ddd26f01ff60ba4e9924c6087a56b29827 +Author: Lasse Collin +Date: Wed Jun 11 21:42:47 2008 +0300 + + Fix uninitialized variable in LZMA encoder. This was + introduced in 369f72fd656f537a9a8e06f13e6d0d4c242be22f. + +commit 0ea98e52ba87453497b1355c51f13bad55c8924a +Author: Lasse Collin +Date: Wed Jun 11 15:08:44 2008 +0300 + + Improve command line integer parsing a little in lzma and + lzmadec to make them accept also KiB in addition Ki etc. + Fix also memory usage information in lzmadec --help. + +commit 436fa5fae96d4e35759aed33066060f09ee8c6ef +Author: Lasse Collin +Date: Tue Jun 10 20:36:12 2008 +0300 + + s/decompressed/compressed/ in the command line tool's + error message. + +commit 369f72fd656f537a9a8e06f13e6d0d4c242be22f +Author: Lasse Collin +Date: Sun Jun 1 12:48:17 2008 +0300 + + Fix a buffer overflow in the LZMA encoder. It was due to my + misunderstanding of the code. There's no tiny fix for this + problem, so I also cleaned up the code in general. + + This reduces the speed of the encoder 2-5 % in the fastest + compression mode ("lzma -1"). High compression modes should + have no noticeable performance difference. + + This commit breaks things (especially LZMA_SYNC_FLUSH) but I + will fix them once the new format and LZMA2 has been roughly + implemented. Plain LZMA won't support LZMA_SYNC_FLUSH at all + and won't be supported in the new .lzma format. This may + change still but this is what it looks like now. + + Support for known uncompressed size (that is, LZMA or LZMA2 + without EOPM) is likely to go away. This means there will + be API changes. + +commit e55e0e873ce2511325749d415ae547d62ab5f00d +Author: Lasse Collin +Date: Fri May 30 11:53:41 2008 +0300 + + Typo fixes from meyering. + +commit ed6664146fcbe9cc4a3b23b31632182ed812ea93 +Author: Lasse Collin +Date: Sun May 11 14:24:42 2008 +0300 + + Remove support for pre-C89 libc versions that lack memcpy, + memmove, and memset. + +commit b09464bf9ae694afc2d1dc26188ac4e2e8af0a63 +Author: Lasse Collin +Date: Sun May 11 14:17:21 2008 +0300 + + Improved C99 compiler detection in configure.ac. It will + pass -std=gnu99 instead of -std=c99 to GCC now, but -pedantic + should still give warnings about GNU extensions like before + except with some special keywords like asm(). + +commit 11de5d5267f7a0a7f0a4d34eec147e65eaf9f9cf +Author: Lasse Collin +Date: Tue May 6 15:15:07 2008 +0300 + + Bunch of grammar fixes from meyering. + +commit dc192b6343ae36276c85fcf7ef6006147816eadc +Author: Lasse Collin +Date: Tue May 6 13:41:05 2008 +0300 + + Typo fix + +commit 944b62b93239b27b338d117f2668c0e95849659b +Author: Lasse Collin +Date: Sun May 4 22:29:27 2008 +0300 + + Don't print an error message on broken pipe unless --verbose + is used. + +commit 8e074349e47ea6832b8fdf9244e581d453733433 +Author: Lasse Collin +Date: Wed Apr 30 22:16:17 2008 +0300 + + Fix a crash with --format=alone if other filters than LZMA + are specified on the command line. + +commit 2f361ac19b7fd3abcd362de4d470e6a9eb495b73 +Author: Lasse Collin +Date: Mon Apr 28 17:08:27 2008 +0300 + + Updated THANKS. + +commit 3be21fb12f4cec2cf07799e8960382f4cb375369 +Author: Lasse Collin +Date: Mon Apr 28 17:06:34 2008 +0300 + + Fixed wrong spelling "limitter" to "limiter". This affects + liblzma's API. + +commit beeb81060821dfec4e7898e0d44b7900dcb2215e +Author: Lasse Collin +Date: Fri Apr 25 15:39:50 2008 +0300 + + Prevent LZ encoder from hanging with known uncompressed + size. The "fix" breaks LZMA_SYNC_FLUSH at end of stream + with known uncompressed size, but since it currently seems + likely that support for encoding with known uncompressed + size will go away anyway, I'm not fixing this problem now. + +commit c324325f9f13cdeb92153c5d00962341ba070ca2 +Author: Lasse Collin +Date: Fri Apr 25 13:58:56 2008 +0300 + + Removed src/liblzma/common/sysdefs.h symlink, which was + annoying, because "make dist" put two copies of sysdefs.h + into the tarball instead of the symlink. + +commit d3ba30243c75c13d094de1793f9c58acdbacc692 +Author: Lasse Collin +Date: Fri Apr 25 13:41:29 2008 +0300 + + Added memusage.c to debug directory. + +commit 8f804c29aa8471ccd6438ddca254092b8869ca52 +Author: Lasse Collin +Date: Fri Apr 25 13:32:35 2008 +0300 + + Bumped version number to 4.999.3alpha. It will become 5.0.0 + once we have a stable release (won't be very soon). The + version number is no longer related to version of LZMA SDK. + + Made some small Automake-related changes to toplevel + Makefile.am and configure.ac. + +commit c99037ea10f121cbacf60c37a36c29768ae53447 +Author: Lasse Collin +Date: Thu Apr 24 20:25:39 2008 +0300 + + Fix a memory leak by calling free(extra->data) in + lzma_extra_free(). + +commit 22ba3b0b5043fa481903482ce85015fe775939e5 +Author: Lasse Collin +Date: Thu Apr 24 20:23:05 2008 +0300 + + Make unlzma and lzcat symlinks. + +commit 17c36422d4cbc2c70d5c83ec389406f92cd9e85e +Author: Lasse Collin +Date: Thu Apr 24 20:20:27 2008 +0300 + + Fixed a bug in command line option parsing. + +commit 283f939974c32c47f05d495e8dea455ec646ed64 +Author: Lasse Collin +Date: Thu Apr 24 20:19:20 2008 +0300 + + Added two assert()s. + +commit eb348a60b6e19a7c093f892434f23c4756973ffd +Author: Lasse Collin +Date: Thu Apr 24 19:22:53 2008 +0300 + + Switch to uint16_t as the type of range coder probabilities. + +commit 6c5306e312bcfd254cf654f88c04e34ba786df3d +Author: Lasse Collin +Date: Thu Apr 24 18:39:57 2008 +0300 + + Fix wrong return type (uint32_t -> bool). + +commit 712cfe3ebfd24df24d8896b1315c53c3bc4369c8 +Author: Lasse Collin +Date: Thu Apr 24 18:38:00 2008 +0300 + + Fix data corruption in LZ encoder with LZMA_SYNC_FLUSH. + +commit bc04486e368d20b3027cde625267762aae063965 +Author: Lasse Collin +Date: Thu Apr 24 17:33:01 2008 +0300 + + Fix fastpos problem in Makefile.am when built with --enable-small. + +commit 7ab493924e0ed590a5121a15ee54038d238880d3 +Author: Lasse Collin +Date: Thu Apr 24 17:30:51 2008 +0300 + + Use 64-bit integer as range encoder's cache size. This fixes a + theoretical data corruption, which should be very hard to trigger + even intentionally. + +commit 641998c3e1ecc8b598fe0eb051fab8b9535c291b +Author: Lasse Collin +Date: Mon Mar 24 16:38:40 2008 +0200 + + Replaced the range decoder optimization that used arithmetic + right shift with as fast version that doesn't need + arithmetic right shift. Removed the related check from + configure.ac. + +commit ad999efd279d95f1e7ac555b14170e8e9020488c +Author: Lasse Collin +Date: Sat Mar 22 14:39:34 2008 +0200 + + Take advantage of arithmetic right shift in range decoder. + +commit 03e0e8a0d7228b6ff1f0af39e2c040a4e425973d +Author: Lasse Collin +Date: Sat Mar 22 14:18:29 2008 +0200 + + Added autoconf check to detect if we can use arithmetic + right shift for optimizations. + +commit 7521bbdc83acab834594a22bec50c8e1bd836298 +Author: Lasse Collin +Date: Sat Mar 22 01:26:36 2008 +0200 + + Update a comment to use the variable name rep_len_decoder. + + (And BTW, the previous commit actually did change the + program logic slightly.) + +commit 63b74d000eedaebb8485f623e56864ff5ab71064 +Author: Lasse Collin +Date: Sat Mar 22 00:57:33 2008 +0200 + + Demystified the "state" variable in LZMA code. Use the + word literal instead of char for better consistency. + There are still some names with _char instead of _literal + in lzma_optimum, these may be changed later. + + Renamed length coder variables. + + This commit doesn't change the program logic. + +commit e6eb0a26757e851cef62b9440319a8e73b015cb9 +Author: Lasse Collin +Date: Fri Mar 14 23:16:11 2008 +0200 + + Fix data corruption in LZMA encoder. Note that this bug was + specific to liblzma and was *not* present in LZMA SDK. + +commit 7d516f5129e4373a6d57249d7f608c634c66bf12 +Author: Lasse Collin +Date: Fri Mar 14 21:32:37 2008 +0200 + + Fix a comment API header. + +commit 748d6e4274921a350bd0a317380309717441ef9c +Author: Lasse Collin +Date: Wed Mar 12 23:14:50 2008 +0200 + + Make lzma_stream.next_in const. Let's see if anyone complains. + +commit bfde3b24a5ae25ce53c854762b6148952386b025 +Author: Lasse Collin +Date: Tue Mar 11 15:35:34 2008 +0200 + + Apply a minor speed optimization to LZMA decoder. + +commit f310c50286d9e4e9c6170bb65348c9bb430a65b4 +Author: Lasse Collin +Date: Tue Mar 11 15:17:16 2008 +0200 + + Initialize the last byte of the dictionary to zero so that + lz_get_byte(lz, 0) returns zero. This was broken by + 1a3b21859818e4d8e89a1da99699233c1bfd197d. + +commit 5ead36cf7f823093672a4e43c3180b38c9abbaff +Author: Lasse Collin +Date: Mon Mar 10 15:57:55 2008 +0200 + + Really fix the price count initialization. + +commit d4d7feb83d1a1ded8f662a82e21e053841ca726c +Author: Lasse Collin +Date: Mon Mar 10 13:47:17 2008 +0200 + + Updated THANKS. + +commit 0541c5ea63ef3c0ff85eeddb0a420e56b0c65258 +Author: Lasse Collin +Date: Mon Mar 10 13:46:48 2008 +0200 + + Initialize align_price_count and match_price_count in + lzma_encoder_init.c. While we don't call + fill_distances_prices() and fill_align_prices() in + lzma_lzma_encoder_init(), we still need to initialize + these two variables so that the fill functions get + called in lzma_encoder_getoptimum.c in the beginning + of a stream. + +commit 596fa1fac72823e4ef5bc26bb53f9090445bf748 +Author: Lasse Collin +Date: Mon Mar 10 13:44:29 2008 +0200 + + Always initialize lz->temp_size in lz_decoder.c. temp_size did + get initialized as a side-effect after allocating a new decoder, + but not when the decoder was reused. + +commit 45e43e169527e7a98a8c8a821d37bf25822b764d +Author: Lasse Collin +Date: Mon Mar 10 13:41:25 2008 +0200 + + Don't fill allocated memory with 0xFD when debugging is + enabled. It hides errors from Valgrind. + +commit c0e19e0662205f81a86da8903cdc325d50635870 +Author: Lasse Collin +Date: Thu Feb 28 10:24:31 2008 +0200 + + Remove two redundant validity checks from the LZMA decoder. + These are already checked elsewhere, so omitting these + gives (very) tiny speed up. + +commit de7485806284d1614095ae8cb2ebbb5d74c9ac45 +Author: Lasse Collin +Date: Wed Feb 6 13:25:32 2008 +0200 + + Tiny clean up to file-format.txt. + +commit 1a3b21859818e4d8e89a1da99699233c1bfd197d +Author: Lasse Collin +Date: Sat Feb 2 14:51:06 2008 +0200 + + Don't memzero() the history buffer when initializing LZ + decoder. There's no danger of information leak here, so + it isn't required. Doing memzero() takes a lot of time + with large dictionaries, which could make it easier to + construct DoS attack to consume too much CPU time. + +commit 7e796e312bf644ea95aea0ff85480f47cfa30fc0 +Author: Lasse Collin +Date: Fri Feb 1 08:39:26 2008 +0200 + + Do uncompressed size validation in raw encoder. This way + it gets done for not only raw encoder, but also Block + and LZMA_Alone encoders. + +commit 7dd48578a3853e0cfab9f1830bc30927173ec4bc +Author: Lasse Collin +Date: Fri Feb 1 08:32:05 2008 +0200 + + Avoid unneeded function call in raw_common.c. + +commit b596fac963c3ff96f615d4d9b427a213ec341211 +Author: Lasse Collin +Date: Sat Jan 26 21:42:38 2008 +0200 + + Updated THANKS. + +commit e9f6e9c075ad93141a568d94f7d4eb0f2edbd6c2 +Author: Lasse Collin +Date: Sat Jan 26 21:40:23 2008 +0200 + + Added note.GNU-stack to x86 assembler files. It is needed + when using non-executable stack. + +commit 4c7ad179c78f97f68ad548cb40a9dfa6871655ae +Author: Lasse Collin +Date: Sat Jan 26 19:12:50 2008 +0200 + + Added api/lzma/easy.h. I had forgot to add this to the + git repo. Thanks to Stephan Kulow. + +commit 288b232f54c3692cd36f471d4042f51daf3ea79f +Author: Lasse Collin +Date: Sat Jan 26 11:09:17 2008 +0200 + + Added more test files. + +commit c467b0defccf233d0c79234407bc38d7d09574d3 +Author: Lasse Collin +Date: Sat Jan 26 10:47:55 2008 +0200 + + Added more test files. + +commit f9842f712732c482f2def9f24437851e57dd83f8 +Author: Lasse Collin +Date: Sat Jan 26 00:25:34 2008 +0200 + + Return LZMA_HEADER_ERROR if LZMA_SYNC_FLUSH is used with any + of the so called simple filters. If there is demand, limited + support for LZMA_SYNC_FLUSH may be added in future. + + After this commit, using LZMA_SYNC_FLUSH shouldn't cause + undefined behavior in any situation. + +commit e988ea1d1a286dd0f27af0657f9665d5cd8573aa +Author: Lasse Collin +Date: Fri Jan 25 23:50:35 2008 +0200 + + Added more Multi-Block test files. Improved some + descriptions in the test files' README. + +commit 4441e004185cd4c61bda184010eca5924c9dec87 +Author: Lasse Collin +Date: Fri Jan 25 23:12:36 2008 +0200 + + Combine lzma_options_block validation needed by both Block + encoder and decoder, and put the shared things to + block_private.h. Improved the checks a little so that + they may detect too big Compressed Size at initialization + time if lzma_options_block.total_size or .total_limit is + known. + + Allow encoding and decoding Blocks with combinations of + fields that are not allowed by the file format specification. + Doing this requires that the application passes such a + combination in lzma_options_lzma; liblzma doesn't do that, + but it's not impossible that someone could find them useful + in some custom file format. + +commit bf4200c818fcf9102e56328d39cde91bfa13cfb6 +Author: Lasse Collin +Date: Fri Jan 25 19:21:22 2008 +0200 + + Added test_memlimit.c. + +commit 7b8fc7e6b501a32a36636dac79ecb57099269005 +Author: Lasse Collin +Date: Fri Jan 25 19:20:28 2008 +0200 + + Improved the memory limitter: + - Added lzma_memlimit_max() and lzma_memlimit_reached() + API functions. + - Added simple estimation of malloc()'s memory usage + overhead. + - Fixed integer overflow detection in lzma_memlimit_alloc(). + - Made some white space cleanups and added more comments. + + The description of lzma_memlimit_max() in memlimit.h is bad + and should be improved. + +commit e0c3d0043da2f670cfdb1abbb3223d5a594ad8db +Author: Lasse Collin +Date: Fri Jan 25 13:55:52 2008 +0200 + + Use more parenthesis in succeed() macro in tests/tests.h. + +commit 1fd76d488179580d37f31ee11948f4932aed31fd +Author: Lasse Collin +Date: Thu Jan 24 14:49:34 2008 +0200 + + Added more Multi-Block Stream test files. + +commit 6e27b1098a28f4ce09bfa6df68ad94182dfc2936 +Author: Lasse Collin +Date: Thu Jan 24 00:46:05 2008 +0200 + + Added bunch of test files containing Multi-Block Streams. + +commit db9df0a9609c01a00a227329fb96e983971040f5 +Author: Lasse Collin +Date: Wed Jan 23 23:43:00 2008 +0200 + + Fix decoding of empty Metadata Blocks, that don't have + even the Metadata Flags field. Earlier the code allowed + such files; now they are prohibited as the file format + specification requires. + +commit 765f0b05f6e95ed9194fb90819cee189ebbac36b +Author: Lasse Collin +Date: Wed Jan 23 23:38:18 2008 +0200 + + Fix a bug related to 99e12af4e2b866c011fe0106cd1e0bfdcc8fe9c6. + lzma_metadata.header_metadata_size was not properly set to + zero if the Metadata had only the Metadata Flags field. + +commit 3a7cc5c3dec7b078941f961b0393b86c418883b6 +Author: Lasse Collin +Date: Wed Jan 23 23:35:49 2008 +0200 + + Fix decoding of Extra Records that have empty Data. + +commit e5fdec93e273855c1bcc2579b83cfb481a9a1492 +Author: Lasse Collin +Date: Wed Jan 23 22:02:38 2008 +0200 + + Add the trailing '\0' to lzma_extra.data as the API header + already documents. + +commit ed40dc5a2c28a8dfccab8c165b3780738eeef93e +Author: Lasse Collin +Date: Wed Jan 23 21:21:21 2008 +0200 + + Added debug/full_flush.c. + +commit ae0cd09a666a1682da8fc09487322227679e218d +Author: Lasse Collin +Date: Wed Jan 23 21:05:33 2008 +0200 + + Return LZMA_STREAM_END instead of LZMA_OK if + LZMA_SYNC_FLUSH or LZMA_FULL_FLUSH is used when + there's no unfinished Block open. + +commit 0e80ded13dfceb98f9494cbb5381a95eb44d03db +Author: Lasse Collin +Date: Wed Jan 23 20:05:01 2008 +0200 + + Added bad-single-none-footer_filter_flags.lzma and + bad-single-none-too_long_vli.lzma. + +commit 8c8eb14055d8dd536b1b1c58fb284d34bb8ed1dd +Author: Lasse Collin +Date: Wed Jan 23 13:42:35 2008 +0200 + + Fixed a typo. + +commit 980f65a9a10160c4d105767871e3002b9aaba3e0 +Author: Lasse Collin +Date: Wed Jan 23 13:40:45 2008 +0200 + + Fix a memory leak in the Subblock encoder. + +commit 99e12af4e2b866c011fe0106cd1e0bfdcc8fe9c6 +Author: Lasse Collin +Date: Wed Jan 23 13:36:07 2008 +0200 + + Fix Size of Header Metadata Block handling. Now + lzma_metadata.header_metadata_size == LZMA_VLI_VALUE_UNKNOWN + is not allowed at all. To indicate missing Header Metadata + Block, header_metadata_size must be set to zero. This is + what Metadata decoder does after this patch too. + + Note that other missing fields in lzma_metadata are still + indicated with LZMA_VLI_VALUE_UNKNOWN. This isn't as + illogical as it sounds at first, because missing Size of + Header Metadata Block means that Header Metadata Block is + not present in the Stream. With other Metadata fields, + a missing field means only that the value is unknown. + +commit 58b78ab20c1bcced45cf71ae6684868fc90b4b81 +Author: Lasse Collin +Date: Wed Jan 23 13:15:55 2008 +0200 + + Fix a memory leak in metadata_decoder.c. + +commit 4d8cdbdab44400fd98f0f18a0f701e27cd1acdae +Author: Lasse Collin +Date: Wed Jan 23 13:13:58 2008 +0200 + + Fix the fix 863028cb7ad6d8d0455fa69348f56b376d7b908f which + just moved to problem. Now it's really fixed. + +commit 67321de963ccf69410b3868b8e31534fe18a90de +Author: Lasse Collin +Date: Wed Jan 23 00:21:04 2008 +0200 + + Take advantage of return_if_error() macro in + lzma_info_metadata_set() in info.c. + +commit 863028cb7ad6d8d0455fa69348f56b376d7b908f +Author: Lasse Collin +Date: Wed Jan 23 00:18:32 2008 +0200 + + Fixed a dangling pointer that caused invalid free(). + +commit cf49f42a6bd40143f54a6b10d6e605599e958c0b +Author: Lasse Collin +Date: Tue Jan 22 22:49:24 2008 +0200 + + Added lzma_easy_* functions. These should make using + liblzma as easy as using zlib, because the easy API + don't require developers to know any fancy LZMA options. + + Note that Multi-Block Stream encoding is currently broken. + The easy API should be OK, the bug(s) are elsewhere. + +commit 1747b85a43abc1c3f152dbd349be2ef4089ecf6a +Author: Lasse Collin +Date: Tue Jan 22 21:16:22 2008 +0200 + + Fix Multi-Block Stream encoder's EOPM usage. + +commit 0ed6f1adcea540fb9593ca115d36de537f7f0dc6 +Author: Lasse Collin +Date: Tue Jan 22 00:15:11 2008 +0200 + + Made lzma_extra pointers const in lzma_options_stream. + +commit 305afa38f64c75af8e81c4167e2d8fa8d85b53a4 +Author: Lasse Collin +Date: Sun Jan 20 20:15:21 2008 +0200 + + Updated debug/sync_flush.c. + +commit d53e9b77054cfade6a643e77d085273a348b189c +Author: Lasse Collin +Date: Sun Jan 20 20:14:26 2008 +0200 + + Added debug/repeat.c. + +commit 107259e306bcfc2336a0fb870fb58034c28faa52 +Author: Lasse Collin +Date: Sun Jan 20 20:12:58 2008 +0200 + + Fix alignment handling bugs in Subblock encoder. + + This leaves one known alignment bug unfixed: If repeat count + doesn't fit into 28-bit integer, the encoder has to split + this to multiple Subblocks with Subblock Type `Repeating Data'. + The extra Subblocks may have wrong alignment. Correct alignment + is restored after the split Repeating Data has been completely + written out. + + Since the encoder doesn't even try to fix the alignment unless + the size of Data is at least 4 bytes, to trigger this bug you + need at least 4 GiB of repeating data with sequence length of + 4 or more bytes. Since the worst thing done by this bug is + misaligned data (no data corruption), this bug simply isn't + worth fixing, because a proper fix isn't simple. + +commit e141fe18950400faaa3503ff88ac20eacd73e88c +Author: Lasse Collin +Date: Sat Jan 19 21:16:33 2008 +0200 + + Implemented LZMA_SYNC_FLUSH support to the Subblock encoder. + The API for handing Subfilters was changed to make it + consistent with LZMA_SYNC_FLUSH. + + A few sanity checks were added for Subfilter handling. Some + small bugs were fixed. More comments were added. + +commit 23c227a864a3b69f38c6a74306161d4e6918d1cc +Author: Lasse Collin +Date: Sat Jan 19 15:19:21 2008 +0200 + + Revised the Delta filter implementation. The initialization + function is still shared between encoder and decoder, but the + actual coding is in separate files for encoder and decoder. + + There are now separate functions for the actual delta + calculation depending on if Delta is the last filter in the + chain or not. If it is the last, the new code copies the + data from input to output buffer and does the delta + calculation at the same time. The old code first copied the + data, then did the delta in the target buffer, which required + reading through the data twice. + + Support for LZMA_SYNC_FLUSH was added to the Delta encoder. + This doesn't change anything in the file format. + +commit 61dc82f3e306b25ce3cd3d529df9ec7a0ec04b73 +Author: Lasse Collin +Date: Fri Jan 18 20:18:08 2008 +0200 + + Added the debug directory and the first debug tool + (sync_flush). These tools are not built unless the + user runs "make" in the debug directory. + +commit 0ae3208db94585eb8294b97ded387de0a3a07646 +Author: Lasse Collin +Date: Fri Jan 18 20:13:00 2008 +0200 + + Added test files to test usage of flush marker in LZMA. + +commit ab5feaf1fcc146ef9fd39360c53c290bec39524e +Author: Lasse Collin +Date: Fri Jan 18 20:02:52 2008 +0200 + + Fix LZMA_SYNC_FLUSH handling in LZ and LZMA encoders. + That code is now almost completely in LZ coder, where + it can be shared with other LZ77-based algorithms in + future. + +commit 079c4f7fc26b3d0b33d9ae7536697b45f3b73585 +Author: Lasse Collin +Date: Fri Jan 18 17:21:24 2008 +0200 + + Don't add -g to CFLAGS when --enable-debug is specified. + It's the job of the user to put that in CFLAGS. + +commit 61d1784d8f1761d979a6da6e223e279ca33815e6 +Author: Lasse Collin +Date: Fri Jan 18 14:17:37 2008 +0200 + + Set stdin and stdout to binary mode on Windows. This patch is + a forward port of b7b22fcb979a16d3a47c8001f058c9f7d4416068 + from lzma-utils-legacy.git. I don't know if the new code base + builds on Windows, but this is a start. + +commit c9cba976913e55ff9aac8a8133cc94416c7c1c9c +Author: Lasse Collin +Date: Fri Jan 18 00:50:29 2008 +0200 + + Added test_compress.sh and bunch of files needed by it. + This new set of tests compress and decompress several + test files with many different compression options. + This set of tests will be extended later. + +commit 33be3c0e24d8f43376ccf71cc77d53671e792f07 +Author: Lasse Collin +Date: Thu Jan 17 18:56:53 2008 +0200 + + Subblock decoder: Don't exit the main loop in decode_buffer() + too early if we hit End of Input while decoding a Subblock of + type Repeating Data. To keep the loop termination condition + elegant, the order of enumerations in coder->sequence were + changed. + + To keep the case-labels in roughly the same order as the + enumerations in coder->sequence, large chunks of code was + moved around. This made the diff big and ugly compared to + the amount of the actual changes made. + +commit b254bd97b1cdb68d127523d91ca9e054ed89c4fd +Author: Lasse Collin +Date: Thu Jan 17 17:39:42 2008 +0200 + + Fix wrong too small size of argument unfiltered_max + in ia64_coder_init(). It triggered assert() in + simple_coder.c, and could have caused a buffer overflow. + + This error was probably a copypaste mistake, since most + of the simple filters use unfiltered_max = 4. + +commit 8f5794c8f1a30e8e3b524b415bbe81af2e04c64a +Author: Lasse Collin +Date: Thu Jan 17 17:27:45 2008 +0200 + + Added --delta to the output of "lzma --help". + +commit f88590e0014b38d40465937c19f25f05f16c79ae +Author: Lasse Collin +Date: Thu Jan 17 13:14:20 2008 +0200 + + Fix Subblock docoder: If Subblock filter was used with known + Uncompressed Size, and the last output byte was from RLE, + the code didn't stop decoding as it should have done. + +commit bc0b945ca376e333077644d2f7fd54c2848aab8a +Author: Lasse Collin +Date: Wed Jan 16 16:33:37 2008 +0200 + + Tiny non-technical edits to file-format.txt. + +commit 7599bb7064ccf007f054595dedda7927af868252 +Author: Lasse Collin +Date: Wed Jan 16 14:48:04 2008 +0200 + + Plugged a memory leak in stream_decoder.c. + +commit 0b581539311f3712946e81e747839f8fb5f441a7 +Author: Lasse Collin +Date: Wed Jan 16 14:47:27 2008 +0200 + + Added memory leak detection to lzmadec.c. + +commit 5b5b13c7bb8fde6331064d21f3ebde41072480c4 +Author: Lasse Collin +Date: Wed Jan 16 14:46:50 2008 +0200 + + Added lzma_memlimit_count(). + +commit 19389f2b82ec54fd4c847a18f16482e7be4c9887 +Author: Lasse Collin +Date: Wed Jan 16 14:31:44 2008 +0200 + + Added ARRAY_SIZE(array) macro. + +commit 9bc33a54cbf83952130adbcb1be32c6882485416 +Author: Lasse Collin +Date: Wed Jan 16 13:27:03 2008 +0200 + + Make Uncompresed Size validation more strict + in alone_decoder.c. + +commit 01d71d60b79027e1ce3eb9c79ae5191e1407c883 +Author: Lasse Collin +Date: Tue Jan 15 17:46:59 2008 +0200 + + Free the allocated memory in lzmadec if debugging is + enabled. This should make it possible to detect possible + memory leaks with Valgrind. + +commit 8235e6e5b2878f76633afcda9a334640db503ef5 +Author: Lasse Collin +Date: Tue Jan 15 16:25:38 2008 +0200 + + Fix memory leaks from test_block_header.c. + +commit f10fc6a69d40b6d5c9cfbf8d3746f49869c2e2f6 +Author: Lasse Collin +Date: Tue Jan 15 14:23:35 2008 +0200 + + Use fastpos.h when encoding LZMA dictionary size in + Filter Flags encoder. + +commit e5728142a2048979f5c0c2149ce71ae952a092e1 +Author: Lasse Collin +Date: Tue Jan 15 14:02:22 2008 +0200 + + Revised the fastpos code. It now uses the slightly faster + table-based version from LZMA SDK 4.57. This should be + fast on most systems. + + A simpler and smaller alternative version is also provided. + On some CPUs this can be even a little faster than the + default table-based version (see comments in fastpos.h), + but on most systems the table-based code is faster. + +commit 10437b5b567f6a025ff16c45a572e417a0a9cc26 +Author: Lasse Collin +Date: Tue Jan 15 13:32:13 2008 +0200 + + Added bsr.h. + +commit f3c88e8b8d8dd57f4bba5f0921eebf276437c244 +Author: Lasse Collin +Date: Tue Jan 15 13:29:14 2008 +0200 + + Fixed assembler detection in configure.ac, and added + detection for x86_64. + +commit 54ec204f58287f50d3976288295da4188a19192b +Author: Lasse Collin +Date: Tue Jan 15 12:20:41 2008 +0200 + + Omit invalid space from printf() format string + in price_table_gen.c. + +commit 01b4b19f49f00e17a0f9cb8754c672ac0847b6e1 +Author: Lasse Collin +Date: Tue Jan 15 09:54:34 2008 +0200 + + Removed a few unused macros from lzma_common.h. + +commit 19bd7f3cf25e4ff8487ef7098ca4a7b58681961d +Author: Lasse Collin +Date: Tue Jan 15 08:37:42 2008 +0200 + + Fix a typo in lzma_encoder.c. + +commit 9f9b1983013048f2142e8bc7e240149d2687bedc +Author: Lasse Collin +Date: Tue Jan 15 08:36:25 2008 +0200 + + Convert bittree_get_price() and bittree_reverse_get_price() + from macros to inline functions. + +commit 78e85cb1a7667c54853670d2eb09d754bcbda87d +Author: Lasse Collin +Date: Tue Jan 15 07:44:59 2008 +0200 + + Fix CRC code in case --enable-small is used. + +commit 949d4346e2d75bcd9dcb66c394d8d851d8db3aa0 +Author: Lasse Collin +Date: Tue Jan 15 07:41:39 2008 +0200 + + Fix typo in test_index.c. + +commit d13d693155c176fc9e9ad5c50d48ccba27c2d9c6 +Author: Lasse Collin +Date: Tue Jan 15 07:40:21 2008 +0200 + + Added precomputed range coder probability price table. + +commit 362dc3843b373c1007a50a4719f378981f18ae03 +Author: Lasse Collin +Date: Mon Jan 14 13:42:43 2008 +0200 + + Remove RC_BUFFER_SIZE from lzma_encoder_private.h + and replace it with a sanity check. + +commit e22b37968d153683fec61ad37b6b160cb7ca4ddc +Author: Lasse Collin +Date: Mon Jan 14 13:39:54 2008 +0200 + + Major changes to LZ encoder, LZMA encoder, and range encoder. + These changes implement support for LZMA_SYNC_FLUSH in LZMA + encoder, and move the temporary buffer needed by range encoder + from lzma_range_encoder structure to lzma_lz_encoder. + +commit b59ef3973781f892c0a72b5e5934194567100be5 +Author: Lasse Collin +Date: Mon Jan 14 13:34:29 2008 +0200 + + Added one assert() to process.c of the command line tool. + +commit 9547e734a00ddb64c851fa3f116e4f9e7d763ea7 +Author: Lasse Collin +Date: Mon Jan 14 12:09:52 2008 +0200 + + Don't use coder->lz.stream_end_was_reached in assertions + in match_c.h. + +commit 3e09e1c05871f3757f759b801890ccccc9286608 +Author: Lasse Collin +Date: Mon Jan 14 12:08:02 2008 +0200 + + In lzma_read_match_distances(), don't use + coder->lz.stream_end_was_reached. That variable + will be removed, and the check isn't required anyway. + Rearrange the check so that it doesn't make one to + think that there could be an integer overflow. + +commit a670fec8021e5962429689c194148a04c3418872 +Author: Lasse Collin +Date: Mon Jan 14 11:56:41 2008 +0200 + + Small LZMA_SYNC_FLUSH fixes to Block and Single-Stream encoders. + +commit 3599dba9570a6972a16b6398d6c838e9b420e985 +Author: Lasse Collin +Date: Mon Jan 14 11:54:56 2008 +0200 + + More fixes to LZMA decoder's flush marker handling. + +commit f73c2ab6079ed5675a42b39d584a567befbd4624 +Author: Lasse Collin +Date: Thu Jan 10 17:13:42 2008 +0200 + + Eliminate lzma_lz_encoder.must_move_pos. It's needed + only in one place which isn't performance criticial. + +commit 382808514a42b2f4b4a64515e2dfb3fc1bc48ecd +Author: Lasse Collin +Date: Wed Jan 9 20:05:57 2008 +0200 + + Define HAVE_ASM_X86 when x86 assembler optimizations are + used. This #define will be useful for inline assembly. + +commit 0e70fbe4032351aab13a1cd8e5deced105c0b276 +Author: Lasse Collin +Date: Wed Jan 9 12:06:46 2008 +0200 + + Added good-single-none-empty_3.lzma and + bad-single-none-empty.lzma. + +commit 379fbbe84d922c7cc00afa65c6f0c095da596b19 +Author: Lasse Collin +Date: Tue Jan 8 23:11:59 2008 +0200 + + Take advantage of return_if_error() in block_decoder.c. + +commit 97d5fa82077e57815dfad995dc393c2809a78539 +Author: Lasse Collin +Date: Tue Jan 8 23:10:57 2008 +0200 + + Updated tests/files/README. + +commit 3bb9bb310936cba6a743b4f06739a397dec7c28f +Author: Lasse Collin +Date: Tue Jan 8 23:05:40 2008 +0200 + + Added test files with empty Compressed Data. + +commit 7054c5f5888ac6a7178cd43dc9583ce6c7e78c9f +Author: Lasse Collin +Date: Tue Jan 8 22:58:42 2008 +0200 + + Fix decoding of Blocks that have only Block Header. + +commit 753e4d95cd1cf29c632dfe1a670af7c67aeffbf4 +Author: Lasse Collin +Date: Tue Jan 8 22:27:46 2008 +0200 + + Added good-single-subblock_implicit.lzma. + +commit faeac7b7aca75f86afed1e7cc06279d9d497c627 +Author: Lasse Collin +Date: Tue Jan 8 18:50:30 2008 +0200 + + Disable CRC32 from Block Headers when --check=none + has been specified. + +commit a751126dbb656767ed4666cf0e5d3e17349d93d1 +Author: Lasse Collin +Date: Tue Jan 8 13:36:29 2008 +0200 + + Fixed encoding of empty files. Arguments to is_size_valid() + were in wrong order in block_encoder.c. + +commit 9080267603b1006c4867c823307dca9df8be0d20 +Author: Lasse Collin +Date: Tue Jan 8 13:35:36 2008 +0200 + + Added a few test files. + +commit b4943ccf73b64fc93a90a23474509c316f55eb2b +Author: Lasse Collin +Date: Tue Jan 8 12:29:58 2008 +0200 + + Avoid using ! in test_files.sh, because that doesn't work + with some ancient /bin/sh versions. + +commit e2417b2b9134f3f65e14b61e23cd3644d8954353 +Author: Lasse Collin +Date: Tue Jan 8 00:48:30 2008 +0200 + + More pre-C99 inttypes.h compatibility fixes. Now the code + should work even if the system has no inttypes.h. + +commit 5d227e51c23639423f4ade06aabb54e131f8505e +Author: Lasse Collin +Date: Mon Jan 7 23:25:32 2008 +0200 + + Updated fi.po although it's currently pretty much crap. + +commit c7189d981a1b27c63da0c1ee80d9b5cd8ce1733d +Author: Lasse Collin +Date: Mon Jan 7 23:14:25 2008 +0200 + + Test for $GCC = yes instead of if it is non-empty. This + way it is possible to use ac_cv_c_compiler_gnu=no to + force configure to think it is using non-GNU C compiler. + +commit 3dbbea82b74bb841c995ad332a3aeca613015e10 +Author: Lasse Collin +Date: Mon Jan 7 21:49:41 2008 +0200 + + Added test_files.sh to tests/Makefile.am so it gets + included in the tarball with "make dist". + +commit 2fd2d181543feab1b4003f3ac6e85625fbee04f0 +Author: Lasse Collin +Date: Mon Jan 7 18:22:24 2008 +0200 + + Cosmetic edit to test_files.sh. + +commit 9a71d573100a990ceb30ce0bec6a9a15d795605f +Author: Lasse Collin +Date: Mon Jan 7 18:09:44 2008 +0200 + + Added tests/files/README. + +commit 47f48fe9936ed72617a60fbd015df7e0e47a1e43 +Author: Lasse Collin +Date: Mon Jan 7 14:20:57 2008 +0200 + + Tell in COPYING that everything in tests/files is + public domain. + +commit 3502b3e1d00251d3c8dda96079440705c28d8225 +Author: Lasse Collin +Date: Mon Jan 7 14:19:05 2008 +0200 + + Cleaned up the tests/files directory. + +commit 908b2ac604b9940369d7fe8a45e9eb6da5d2a24c +Author: Lasse Collin +Date: Mon Jan 7 13:49:19 2008 +0200 + + Added test_files.sh to test decoding of the files in + the tests/files directory. It doesn't test the malicious + files yet. + +commit ecb2a6548f5978022a8fa931719dc575f5fd3bf6 +Author: Lasse Collin +Date: Mon Jan 7 11:23:13 2008 +0200 + + Updated README regarding the assembler optimizations. + +commit eacb8050438d3e6146c86eb9732d3fb1ef1825cb +Author: Lasse Collin +Date: Mon Jan 7 10:58:00 2008 +0200 + + Updated THANKS. + +commit 1239649f96132b18e3b7e2dd152ecf53a195caa8 +Author: Lasse Collin +Date: Sun Jan 6 21:47:17 2008 +0200 + + Cosmetic changes to configure.ac. + +commit 88ee301ec2e4506a30ec7ac9aaa2288e2dcadd0e +Author: Lasse Collin +Date: Sun Jan 6 19:46:38 2008 +0200 + + Automatically disable assembler code on Darwin x86. + Darwin has different ABI than GNU+Linux and Solaris, + thus the assembler code doesn't assemble on Darwin. + +commit c15a7abf66e3a70792f7444115e484c7981c8284 +Author: Lasse Collin +Date: Sun Jan 6 19:45:27 2008 +0200 + + With printf(), use PRIu64 with a cast to uint64_t instead + of %zu, because some pre-C99 libc versions don't support %zu. + +commit 4e7e54c4c522ab2f6a7abb92cefc4f707e9568fb +Author: Lasse Collin +Date: Sun Jan 6 16:27:41 2008 +0200 + + Introduced compatibility with systems that have pre-C99 + or no inttypes.h. This is useful when the compiler has + good enough support for C99, but libc headers don't. + + Changed liblzma API so that sys/types.h and inttypes.h + have to be #included before #including lzma.h. On systems + that don't have C99 inttypes.h, it's the problem of the + applications to provide the required types and macros + before #including lzma.h. + + If lzma.h defined the missing types and macros, it could + conflict with third-party applications whose configure + has detected that the types are missing and defined them + in config.h already. An alternative would have been + introducing lzma_uint32 and similar types, but that would + just be an extra pain on modern systems. + +commit a71864f77dfb76b5d78a270641539947c312583a +Author: Lasse Collin +Date: Sat Jan 5 19:57:00 2008 +0200 + + Fix typo in comment (INT64_MAX -> UINT64_MAX). + +commit 072927905a3b66281c6311b4b351caa501d8b73a +Author: Lasse Collin +Date: Sat Jan 5 19:42:04 2008 +0200 + + Rearranged testing of GCC-specific flags. + +commit d160ee32598c6d1cd9054ef019e8c9331208b188 +Author: Lasse Collin +Date: Sat Jan 5 01:20:24 2008 +0200 + + Another bug fix for flush marker detection. + +commit fc67f79f607cbfa78c6f47a69dec098d8659b162 +Author: Lasse Collin +Date: Fri Jan 4 21:37:01 2008 +0200 + + Fix stupid bugs in flush marker detection. + +commit 0029cbbabe87d491fc046a55a629a6d556010baa +Author: Lasse Collin +Date: Fri Jan 4 21:30:33 2008 +0200 + + Added support for flush marker, which will be in files + that use LZMA_SYNC_FLUSH with encoder (not implemented + yet). This is a new feature in the raw LZMA format, + which isn't supported by old decoders. This shouldn't + be a problem in practice, since lzma_alone_encoder() + will not allow LZMA_SYNC_FLUSH, and thus not allow + creating files on decodable with old decoders. + + Made lzma_decoder.c to require tab width of 4 characters + if one wants to fit the code in 80 columns. This makes + the code easier to read. + +commit bbfd1f6ab058a7e661545205befcb7f70c5685ab +Author: Lasse Collin +Date: Fri Jan 4 20:45:05 2008 +0200 + + Moved range decoder initialization (reading the first + five input bytes) from LZMA decoder to range decoder + header. Did the same for decoding of direct bits. + +commit 5db745cd2a74f6ed2e52f5c716c08ed0daf17ebc +Author: Lasse Collin +Date: Fri Dec 14 11:15:21 2007 +0200 + + Added a note to README that --disable-assembler + must be used on Darwin. + +commit 44b333d4615b5aabc557a0e1b6bb0096da3fae24 +Author: Lasse Collin +Date: Fri Dec 14 10:07:10 2007 +0200 + + Use the filename suffix .S instead of .s for assembler files + so that the preprocessor removes the /* */ style comments, + which are not supported by some non-GNU assemblers (Solaris) + that otherwise work with this code. + +commit ec1c82b2e82f395f6e8e19ac212a639644330cd7 +Author: Lasse Collin +Date: Fri Dec 14 09:59:05 2007 +0200 + + Fixed wrong symbol name in crc64_x86.s. + +commit 2881570df6803eed2fe550af34574e8e61794804 +Author: Lasse Collin +Date: Fri Dec 14 09:53:24 2007 +0200 + + Use .globl instead of .global in x86 assembler code for + better portability. Still needs fixing the commenting. + +commit 698470b8f33fc0e5f27dafa93b39b6dd5dde5a66 +Author: Lasse Collin +Date: Thu Dec 13 20:14:37 2007 +0200 + + Fixed a few short options that take an argument. + short_opts[] was missing colons to indicate + required argument. Thanks to Fabio Pedretti for + the bug report. + +commit 918bcb0e0728d2d976621e9f35b56f224f11d989 +Author: Lasse Collin +Date: Tue Dec 11 17:08:04 2007 +0200 + + Removed uncompressed size tracking from Delta encoder too. + +commit 3e16d51dd645667b05ff826665b1fc353aa41cd9 +Author: Lasse Collin +Date: Tue Dec 11 16:49:19 2007 +0200 + + Remove uncompressed size tracking from the filter encoders. + It's not strictly needed there, and just complicates the + code. LZ encoder never even had this feature. + + The primary reason to have uncompressed size tracking in + filter encoders was validating that the application + doesn't give different amount of input that it had + promised. A side effect was to validate internal workings + of liblzma. + + Uncompressed size tracking is still present in the Block + encoder. Maybe it should be added to LZMA_Alone and raw + encoders too. It's simpler to have one coder just to + validate the uncompressed size instead of having it + in every filter. + +commit 5286723e0d1ac386d5b07f08d78e61becf895a5a +Author: Lasse Collin +Date: Tue Dec 11 14:10:53 2007 +0200 + + Get rid of no-NLS gnulib. I don't know how to get it + working with Automake. People who want smaller lzmadec + should use --disable-nls on non-GNU systems. + +commit ce8b036a6c7a43b290356b673d953f6d76b2be64 +Author: Lasse Collin +Date: Tue Dec 11 14:09:35 2007 +0200 + + Fixed a typo in tests/Makefile.am which prevented + building the tests if gnulib was needed. + +commit 7c1ad41eb611ed89e5bb8792a3beb533b7aa59f4 +Author: Lasse Collin +Date: Tue Dec 11 11:18:58 2007 +0200 + + Fixed wrong type of flags_size in Subblock encoder. + +commit ce64df716243fdc40359090d1f6541f3a4f5f21a +Author: Lasse Collin +Date: Mon Dec 10 20:44:16 2007 +0200 + + Bumped version number to 4.42.3alpha. + +commit b499a0403ea5c41d6a25b40275eb6c57643052ce +Author: Lasse Collin +Date: Mon Dec 10 15:02:50 2007 +0200 + + Disabled some unneeded warnings and made "make dist" work. + +commit 2ab8adb5165a0b77114a7eb21f9ff1e6a266f172 +Author: Lasse Collin +Date: Sun Dec 9 21:43:15 2007 +0200 + + Added LZMA_SYNC_FLUSH support to the Copy filter. + +commit 329c272d501e88793dda5540358d55c12428d194 +Author: Lasse Collin +Date: Sun Dec 9 17:14:07 2007 +0200 + + Added missing LZMA_API to the C versions of the CRC functions. + The x86 assembler versions were already OK. + +commit c90daf86ce683fa8cf80491d624ffb158dfbd9d7 +Author: Jim Meyering +Date: Sun Dec 9 15:34:25 2007 +0100 + + * tests/test_block_header.c (test3): Remove duplicate initializer. + +commit 07ac881779a8477f2c1ab112b91a129e24aa743c +Author: Lasse Collin +Date: Sun Dec 9 17:06:45 2007 +0200 + + Take advantage of return_if_error() macro in more places. + Cleaned Subblock filter's initialization code too. + +commit 41338717964f510ee61d70b25bd4c502ec9f77cf +Author: Lasse Collin +Date: Sun Dec 9 12:13:01 2007 +0200 + + Added a bunch of .lzma test files. + +commit ff946ceb7975d4f11950afd33f6315b4d20d1a03 +Author: Lasse Collin +Date: Sun Dec 9 11:24:48 2007 +0200 + + Re-enabled the security checks in Subblock decoder + that were disabled for debugging reasons. + +commit 2bf36d22d2c24ac3f488e63b35564fa2f6dab8d1 +Author: Lasse Collin +Date: Sun Dec 9 11:03:28 2007 +0200 + + Fixed the tests to build with -Werror. + +commit 5d018dc03549c1ee4958364712fb0c94e1bf2741 +Author: Lasse Collin +Date: Sun Dec 9 00:42:33 2007 +0200 + + Imported to git. diff --git a/README b/README new file mode 100644 index 000000000000..6b695d5ccdb8 --- /dev/null +++ b/README @@ -0,0 +1,218 @@ + +XZ Utils +======== + + 0. Overview + 1. Documentation + 1.1. Overall documentation + 1.2. Documentation for command line tools + 1.3. Documentation for liblzma + 2. Version numbering + 3. Reporting bugs + 4. Other implementations of the .xz format + 5. Contact information + + +0. Overview +----------- + + XZ Utils provide a general-purpose data compression library and + command line tools. The native file format is the .xz format, but + also the legacy .lzma format is supported. The .xz format supports + multiple compression algorithms, which are called "filters" in + context of XZ Utils. The primary filter is currently LZMA2. With + typical files, XZ Utils create about 30 % smaller files than gzip. + + To ease adapting support for the .xz format into existing applications + and scripts, the API of liblzma is somewhat similar to the API of the + popular zlib library. For the same reason, the command line tool xz + has similar command line syntax than that of gzip. + + When aiming for the highest compression ratio, LZMA2 encoder uses + a lot of CPU time and may use, depending on the settings, even + hundreds of megabytes of RAM. However, in fast modes, LZMA2 encoder + competes with bzip2 in compression speed, RAM usage, and compression + ratio. + + LZMA2 is reasonably fast to decompress. It is a little slower than + gzip, but a lot faster than bzip2. Being fast to decompress means + that the .xz format is especially nice when the same file will be + decompressed very many times (usually on different computers), which + is the case e.g. when distributing software packages. In such + situations, it's not too bad if the compression takes some time, + since that needs to be done only once to benefit many people. + + With some file types, combining (or "chaining") LZMA2 with an + additional filter can improve compression ratio. A filter chain may + contain up to four filters, although usually only one two is used. + For example, putting a BCJ (Branch/Call/Jump) filter before LZMA2 + in the filter chain can improve compression ratio of executable files. + + Since the .xz format allows adding new filter IDs, it is possible that + some day there will be a filter that is, for example, much faster to + compress than LZMA2 (but probably with worse compression ratio). + Similarly, it is possible that some day there is a filter that will + compress better than LZMA2. + + XZ Utils doesn't support multithreaded compression or decompression + yet. It has been planned though and taken into account when designing + the .xz file format. + + +1. Documentation +---------------- + +1.1. Overall documentation + + README This file + + INSTALL.generic Generic install instructions for those not familiar + with packages using GNU Autotools + INSTALL Installation instructions specific to XZ Utils + PACKAGERS Information to packagers of XZ Utils + + COPYING XZ Utils copyright and license information + COPYING.GPLv2 GNU General Public License version 2 + COPYING.GPLv3 GNU General Public License version 3 + COPYING.LGPLv2.1 GNU Lesser General Public License version 2.1 + + AUTHORS The main authors of XZ Utils + THANKS Incomplete list of people who have helped making + this software + NEWS User-visible changes between XZ Utils releases + ChangeLog Detailed list of changes (commit log) + TODO Known bugs and some sort of to-do list + + Note that only some of the above files are included in binary + packages. + + +1.2. Documentation for command line tools + + The command line tools are documented as man pages. In source code + releases (and possibly also in some binary packages), the man pages + are also provided in plain text (ASCII only) and PDF formats in the + directory "doc/man" to make the man pages more accessible to those + whose operating system doesn't provide an easy way to view man pages. + + +1.3. Documentation for liblzma + + The liblzma API headers include short docs about each function + and data type as Doxygen tags. These docs should be quite OK as + a quick reference. + + I have planned to write a bunch of very well documented example + programs, which (due to comments) should work as a tutorial to + various features of liblzma. No such example programs have been + written yet. + + For now, if you have never used liblzma, libbzip2, or zlib, I + recommend learning *basics* of zlib API. Once you know that, it + should be easier to learn liblzma. + + http://zlib.net/manual.html + http://zlib.net/zlib_how.html + + +2. Version numbering +-------------------- + + The version number format of XZ Utils is X.Y.ZS: + + - X is the major version. When this is incremented, the library + API and ABI break. + + - Y is the minor version. It is incremented when new features are + added without breaking existing API or ABI. Even Y indicates + stable release and odd Y indicates unstable (alpha or beta + version). + + - Z is the revision. This has different meaning for stable and + unstable releases: + * Stable: Z is incremented when bugs get fixed without adding + any new features. + * Unstable: Z is just a counter. API or ABI of features added + in earlier unstable releases having the same X.Y may break. + + - S indicates stability of the release. It is missing from the + stable releases where Y is an even number. When Y is odd, S + is either "alpha" or "beta" to make it very clear that such + versions are not stable releases. The same X.Y.Z combination is + not used for more than one stability level i.e. after X.Y.Zalpha, + the next version can be X.Y.(Z+1)beta but not X.Y.Zbeta. + + +3. Reporting bugs +----------------- + + Naturally it is easiest for me if you already know what causes the + unexpected behavior. Even better if you have a patch to propose. + However, quite often the reason for unexpected behavior is unknown, + so here are a few things to do before sending a bug report: + + 1. Try to create a small example how to reproduce the issue. + + 2. Compile XZ Utils with debugging code using configure switches + --enable-debug and, if possible, --disable-shared. If you are + using GCC, use CFLAGS='-O0 -ggdb3'. Don't strip the resulting + binaries. + + 3. Turn on core dumps. The exact command depends on your shell; + for example in GNU bash it is done with "ulimit -c unlimited", + and in tcsh with "limit coredumpsize unlimited". + + 4. Try to reproduce the suspected bug. If you get "assertion failed" + message, be sure to include the complete message in your bug + report. If the application leaves a coredump, get a backtrace + using gdb: + $ gdb /path/to/app-binary # Load the app to the debugger. + (gdb) core core # Open the coredump. + (gdb) bt # Print the backtrace. Copy & paste to bug report. + (gdb) quit # Quit gdb. + + Report your bug via email or IRC (see Contact information below). + Don't send core dump files or any executables. If you have a small + example file(s) (total size less than 256 KiB), please include + it/them as an attachment. If you have bigger test files, put them + online somewhere and include an URL to the file(s) in the bug report. + + Always include the exact version number of XZ Utils in the bug report. + If you are using a snapshot from the git repository, use "git describe" + to get the exact snapshot version. If you are using XZ Utils shipped + in an operating system distribution, mention the distribution name, + distribution version, and exact xz package version; if you cannot + repeat the bug with the code compiled from unpatched source code, + you probably need to report a bug to your distribution's bug tracking + system. + + +4. Other implementations of the .xz format +------------------------------------------ + + 7-Zip and the p7zip port of 7-Zip support the .xz format starting + from the version 9.00alpha. + + http://7-zip.org/ + http://p7zip.sourceforge.net/ + + XZ Embedded is a limited implementation written for use in the Linux + kernel, but it is also suitable for other embedded use. + + http://tukaani.org/xz/embedded.html + + +5. Contact information +---------------------- + + If you have questions, bug reports, patches etc. related to XZ Utils, + contact Lasse Collin (in Finnish or English). + tukaani.org uses greylisting to reduce spam, thus when you send your + first email, it may get delayed by a few hours. In addition to that, + I'm sometimes slow at replying. If you haven't got a reply within two + weeks, assume that your email has got lost and resend it or use IRC. + + You can find me also from #tukaani on Freenode; my nick is Larhzu. + The channel tends to be pretty quiet, so just ask your question and + someone may wake up. + diff --git a/THANKS b/THANKS new file mode 100644 index 000000000000..2ffbb1e0f5f8 --- /dev/null +++ b/THANKS @@ -0,0 +1,67 @@ + +Thanks +====== + +Some people have helped more, some less, but nevertheless everyone's help +has been important. :-) In alphabetical order: + - Mark Adler + - H. Peter Anvin + - Nelson H. F. Beebe + - Karl Berry + - Anders F. Björklund + - Emmanuel Blot + - Trent W. Buck + - David Burklund + - Marek Černocký + - Andrew Dudman + - Markus Duft + - İsmail Dönmez + - Robert Elz + - Gilles Espinasse + - Mike Frysinger + - Joachim Henke + - Peter Ivanov + - Jouk Jansen + - Per Øyvind Karlsen + - Thomas Klausner + - Richard Koch + - Ville Koskinen + - Stephan Kulow + - Peter Lawler + - Hin-Tak Leung + - Andraž 'ruskie' Levstik + - Jim Meyering + - Rafał Mużyło + - Adrien Nader + - Hongbo Ni + - Jonathan Nieder + - Igor Pavlov + - Elbert Pol + - Mikko Pouru + - Robert Readman + - Bernhard Reutner-Fischer + - Christian von Roques + - Jukka Salmi + - Alexandre Sauvé + - Andreas Schwab + - Dan Shechter + - Stuart Shelton + - Jonathan Stott + - Paul Townsend + - Mohammed Adnène Trojette + - Alexey Tourbin + - Patrick J. Volkerding + - Christian Weisgerber + - Bert Wesarg + - Ralf Wildenhues + - Charles Wilson + - Lars Wirzenius + - Pilorz Wojciech + - Ryan Young + - Andreas Zieringer + +Also thanks to all the people who have participated in the Tukaani project. + +I have probably forgot to add some names to the above list. Sorry about +that and thanks for your help. + diff --git a/TODO b/TODO new file mode 100644 index 000000000000..9fac1b341956 --- /dev/null +++ b/TODO @@ -0,0 +1,60 @@ + +XZ Utils To-Do List +=================== + +Known bugs +---------- + + The test suite is too incomplete. + + If the memory usage limit is less than about 13 MiB, xz is unable to + automatically scale down the compression settings enough even though + it would be possible by switching from BT2/BT3/BT4 match finder to + HC3/HC4. + + The code to detect number of CPU cores doesn't count hyperthreading + as multiple cores. In context of xz, it probably should. + Hyperthreading is good at least with p7zip. + + XZ Utils compress some files significantly worse than LZMA Utils. + This is due to faster compression presets used by XZ Utils, and + can be worked around by using "xz --extreme". However, the presets + need some tweaking and maybe this issue can be minimized without + making the typical case too much slower. + + xz doesn't quote unprintable characters when it displays file names + given on the command line. + + tuklib_exit() doesn't block signals => EINTR is possible. + + +Missing features +---------------- + + xz doesn't support copying extended attributes, access control + lists etc. from source to target file. + + Multithreaded compression + + Multithreaded decompression + + Buffer-to-buffer coding could use less RAM (especially when + decompressing LZMA1 or LZMA2). + + I/O library is not implemented. It will possibly be named libzzf. + + lzma_strerror() to convert lzma_ret to human readable form? + This is tricky, because the same error codes are used with + slightly different meanings. + + +Documentation +------------- + + Some tutorial is needed for liblzma. I have planned to write some + extremely well commented example programs, which would work as + a tutorial. I suppose the Doxygen tags are quite OK as a quick + reference once one is familiar with the liblzma API. + + Document the LZMA1 and LZMA2 algorithms. + diff --git a/po/LINGUAS b/po/LINGUAS new file mode 100644 index 000000000000..841618abade8 --- /dev/null +++ b/po/LINGUAS @@ -0,0 +1 @@ +cs diff --git a/po/Makevars b/po/Makevars new file mode 100644 index 000000000000..dc19bc963132 --- /dev/null +++ b/po/Makevars @@ -0,0 +1,46 @@ +# Makefile variables for PO directory in any package using GNU gettext. + +# Usually the message domain is the same as the package name. +DOMAIN = $(PACKAGE) + +# These two variables depend on the location of this directory. +subdir = po +top_builddir = .. + +# These options get passed to xgettext. +XGETTEXT_OPTIONS = --keyword=_ --keyword=N_ + +# This is the copyright holder that gets inserted into the header of the +# $(DOMAIN).pot file. Set this to the copyright holder of the surrounding +# package. (Note that the msgstr strings, extracted from the package's +# sources, belong to the copyright holder of the package.) Translators are +# expected to transfer the copyright for their translations to this person +# or entity, or to disclaim their copyright. The empty string stands for +# the public domain; in this case the translators are expected to disclaim +# their copyright. +COPYRIGHT_HOLDER = + +# This is the email address or URL to which the translators shall report +# bugs in the untranslated strings: +# - Strings which are not entire sentences, see the maintainer guidelines +# in the GNU gettext documentation, section 'Preparing Strings'. +# - Strings which use unclear terms or require additional context to be +# understood. +# - Strings which make invalid assumptions about notation of date, time or +# money. +# - Pluralisation problems. +# - Incorrect English spelling. +# - Incorrect formatting. +# It can be your email address, or a mailing list address where translators +# can write to without being subscribed, or the URL of a web page through +# which the translators can contact you. +MSGID_BUGS_ADDRESS = + +# This is the list of locale categories, beyond LC_MESSAGES, for which the +# message catalogs shall be used. It is usually empty. +EXTRA_LOCALE_CATEGORIES = + +# Although you may need slightly wider terminal than 80 chars, it is +# much nicer to edit the output of --help when this is set. +XGETTEXT_OPTIONS += --no-wrap +MSGMERGE += --no-wrap diff --git a/po/POTFILES.in b/po/POTFILES.in new file mode 100644 index 000000000000..ee430c5c3abe --- /dev/null +++ b/po/POTFILES.in @@ -0,0 +1,10 @@ +# List of source files which contain translatable strings. +src/xz/args.c +src/xz/coder.c +src/xz/file_io.c +src/xz/hardware.c +src/xz/main.c +src/xz/message.c +src/xz/options.c +src/xz/suffix.c +src/xz/util.c diff --git a/po/cs.po b/po/cs.po new file mode 100644 index 000000000000..a953fd382969 --- /dev/null +++ b/po/cs.po @@ -0,0 +1,636 @@ +msgid "" +msgstr "" +"Project-Id-Version: xz-utils\n" +"Report-Msgid-Bugs-To: lasse.collin@tukaani.org\n" +"POT-Creation-Date: 2009-12-04 16:04+0100\n" +"PO-Revision-Date: 2009-12-05 13:51+0100\n" +"Last-Translator: Marek Černocký \n" +"Language-Team: Czech \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Plural-Forms: nplurals=3; plural=(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2\n" +"X-Poedit-Language: Czech\n" +"X-Poedit-SourceCharset: utf-8\n" + +#: src/xz/args.c:306 +#, c-format +msgid "%s: Unknown file format type" +msgstr "%s: Neznámý typ formátu souboru" + +#: src/xz/args.c:329 src/xz/args.c:337 +#, c-format +msgid "%s: Unsupported integrity check type" +msgstr "%s: Neznámý typ kontroly integrity" + +#: src/xz/args.c:355 +msgid "Only one file can be specified with `--files'or `--files0'." +msgstr "" +"Můžete zadat pouze jeden soubor spolu s přepínači „--files“ nebo „--files0“" + +#: src/xz/args.c:413 +msgid "The environment variable XZ_OPT contains too many arguments" +msgstr "Proměnná prostředí XZ_OPT obsahuje příliš mnoho argumentů" + +#: src/xz/coder.c:95 +msgid "Maximum number of filters is four" +msgstr "Maximální počet filtrů je čtyři" + +#: src/xz/coder.c:108 +#, c-format +msgid "" +"Memory usage limit (% MiB) is too small for the given filter setup (%" +" MiB)" +msgstr "" +"Omezení použitelné paměti (% MiB) je příliš malé pro dané nastavení " +"filtru (% MiB)" + +#: src/xz/coder.c:128 +msgid "Using a preset in raw mode is discouraged." +msgstr "Použití přednastavení v režimu raw je nevhodné." + +#: src/xz/coder.c:130 +msgid "The exact options of the presets may vary between software versions." +msgstr "" +"Přesné volby u přednastavení se mohou lišit mezi různými verzemi softwaru." + +#: src/xz/coder.c:158 +msgid "The .lzma format supports only the LZMA1 filter" +msgstr "Formát .lzma podporuje pouze filtr LZMA1" + +#: src/xz/coder.c:166 +msgid "LZMA1 cannot be used with the .xz format" +msgstr "LZMA1 nelze použít s formátem .xz" + +#: src/xz/coder.c:186 +#, c-format +msgid "%s MiB (%s B) of memory is required per thread, limit is %s MiB (%s B)" +msgstr "" +"Je vyžadováno %s MiB (%s B) paměti pro každé vlákno, limit je %s MiB (%s B)" + +#: src/xz/coder.c:554 +#, c-format +msgid "Limit was %s MiB, but %s MiB would have been needed" +msgstr "Limit byl %s MiB, ale bylo by zapotřebí %s MiB" + +#: src/xz/file_io.c:113 +#, c-format +msgid "%s: File seems to be moved, not removing" +msgstr "%s: Vypadá to, že soubor byl přesunut, neodstraní se" + +#: src/xz/file_io.c:120 src/xz/file_io.c:527 +#, c-format +msgid "%s: Cannot remove: %s" +msgstr "%s: Nelze odstranit: %s" + +#: src/xz/file_io.c:145 +#, c-format +msgid "%s: Cannot set the file owner: %s" +msgstr "%s: Nelze nastavit vlastníka souboru: %s" + +#: src/xz/file_io.c:151 +#, c-format +msgid "%s: Cannot set the file group: %s" +msgstr "%s: Nelze nastavit skupinu souboru: %s" + +#: src/xz/file_io.c:170 +#, c-format +msgid "%s: Cannot set the file permissions: %s" +msgstr "%s: Nelze nastavit oprávnění souboru: %s" + +#: src/xz/file_io.c:308 src/xz/file_io.c:387 +#, c-format +msgid "%s: Is a symbolic link, skipping" +msgstr "%s: Jedná se o symbolický odkaz, vynechává se" + +#: src/xz/file_io.c:422 +#, c-format +msgid "%s: Is a directory, skipping" +msgstr "%s: Jedná se o složku, vynechává se" + +#: src/xz/file_io.c:429 +#, c-format +msgid "%s: Not a regular file, skipping" +msgstr "%s: Nejedná se o běžný soubor, vynechává se" + +#: src/xz/file_io.c:446 +#, c-format +msgid "%s: File has setuid or setgid bit set, skipping" +msgstr "%s: Soubor má nastavený bit setuid nebo setgid, vynechává se" + +#: src/xz/file_io.c:453 +#, c-format +msgid "%s: File has sticky bit set, skipping" +msgstr "%s: Soubor má nastavený bit sticky, vynechává se" + +#: src/xz/file_io.c:460 +#, c-format +msgid "%s: Input file has more than one hard link, skipping" +msgstr "%s: Vstupní soubor má více než jeden pevný odkaz, vynechává se" + +#: src/xz/file_io.c:644 +#, c-format +msgid "Error restoring the O_APPEND flag to standard output: %s" +msgstr "Chyba při obnovení příznaku O_APPEND na standardní výstup: %s" + +#: src/xz/file_io.c:655 +#, c-format +msgid "%s: Closing the file failed: %s" +msgstr "%s: Selhalo zavření souboru: %s" + +#: src/xz/file_io.c:732 src/xz/file_io.c:889 +#, c-format +msgid "%s: Seeking failed when trying to create a sparse file: %s" +msgstr "" +"%s: Selhalo nastavení pozice při pokusu o vytvoření záložního souboru: %s" + +#: src/xz/file_io.c:789 +#, c-format +msgid "%s: Read error: %s" +msgstr "%s: Chyba čtení: %s" + +#: src/xz/file_io.c:847 +#, c-format +msgid "%s: Write error: %s" +msgstr "%s: Chyba zápisu: %s" + +#: src/xz/main.c:76 +#, c-format +msgid "%s: Error reading filenames: %s" +msgstr "%s: Chyba při čtení názvů souborů: %s" + +#: src/xz/main.c:83 +#, c-format +msgid "%s: Unexpected end of input when reading filenames" +msgstr "%s: Neočekávaný konec vstupu při čtení názvů souborů" + +#: src/xz/main.c:107 +#, c-format +msgid "" +"%s: Null character found when reading filenames; maybe you meant to use `--" +"files0' instead of `--files'?" +msgstr "" +"%s: Byl nalezen nulový znak při čtení názvů souborů; nechtěli jste náhodou " +"použít „--files0“ místo „--files“?" + +#: src/xz/main.c:160 +msgid "Compression and decompression with --robot are not supported yet." +msgstr "Komprimace a dekomprimace s přepínačem --robot není zatím podporovaná." + +#: src/xz/main.c:207 +msgid "" +"Cannot read data from standard input when reading filenames from standard " +"input" +msgstr "" +"Ze standardního vstupu nelze číst data, když se ze standardního vstupu " +"načítají názvy souborů" + +#: src/xz/message.c:841 src/xz/message.c:885 +msgid "Internal error (bug)" +msgstr "Interní chyba" + +#: src/xz/message.c:848 +msgid "Cannot establish signal handlers" +msgstr "Nelze ustanovit ovladač signálu" + +#: src/xz/message.c:857 +msgid "No integrity check; not verifying file integrity" +msgstr "Žádná kontrola integrity; integrita souboru se nebude ověřovat" + +#: src/xz/message.c:860 +msgid "Unsupported type of integrity check; not verifying file integrity" +msgstr "" +"Nepodporovaný typ kontroly integrity; integrita souboru se nebude ověřovat" + +#: src/xz/message.c:867 +msgid "Memory usage limit reached" +msgstr "Dosaženo omezení použitelné paměti" + +#: src/xz/message.c:870 +msgid "File format not recognized" +msgstr "Formát souboru nebyl rozpoznán" + +#: src/xz/message.c:873 +msgid "Unsupported options" +msgstr "Nepodporovaná volba" + +#: src/xz/message.c:876 +msgid "Compressed data is corrupt" +msgstr "Komprimovaná data jsou poškozená" + +#: src/xz/message.c:879 +msgid "Unexpected end of input" +msgstr "Neočekávaný konec vstupu" + +#: src/xz/message.c:898 +#, c-format +msgid "%s: Filter chain:" +msgstr "%s: Omezující filtr:" + +#: src/xz/message.c:1009 +#, c-format +msgid "Try `%s --help' for more information." +msgstr "Zkuste „%s --help“ pro více informací" + +#: src/xz/message.c:1021 +#, c-format +msgid "%s MiB (%s bytes)\n" +msgstr "%s MiB (%s bajtů)\n" + +#: src/xz/message.c:1049 +#, c-format +msgid "" +"Usage: %s [OPTION]... [FILE]...\n" +"Compress or decompress FILEs in the .xz format.\n" +"\n" +msgstr "" +"Použití: %s [PŘEPÍNAČ]... [SOUBOR]...\n" +"Komprimuje nebo dekomprimuje SOUBORy ve formátu xz.\n" +"\n" + +#: src/xz/message.c:1053 +msgid "" +"Mandatory arguments to long options are mandatory for short options too.\n" +msgstr "" +"Povinné argumenty pro dlouhé přepínače jsou povinné rovněž pro krátké " +"přepínače.\n" + +#: src/xz/message.c:1057 +msgid " Operation mode:\n" +msgstr "Operační režim:\n" + +#: src/xz/message.c:1060 +msgid "" +" -z, --compress force compression\n" +" -d, --decompress force decompression\n" +" -t, --test test compressed file integrity\n" +" -l, --list list information about files" +msgstr "" +" -z, --compress provést komprimaci\n" +" -d, --decompress provést dekomprimaci\n" +" -t, --test testovat integritu komprimovaného souboru\n" +" -l, --list vypsat informace o souborech" + +#: src/xz/message.c:1066 +msgid "" +"\n" +" Operation modifiers:\n" +msgstr "" +"\n" +"Modifikátory operací:\n" + +#: src/xz/message.c:1069 +msgid "" +" -k, --keep keep (don't delete) input files\n" +" -f, --force force overwrite of output file and (de)compress links\n" +" -c, --stdout write to standard output and don't delete input files" +msgstr "" +" -k, --keep zachovat (nemazat) vstupní soubory\n" +" -f, --force vynutit přepis výstupního souboru a de/komprimovat " +"odkazy\n" +" -c, --stdout zapisovat na standardní výstup a nemazat vstupní " +"soubory" + +#: src/xz/message.c:1075 +msgid "" +" --no-sparse do not create sparse files when decompressing\n" +" -S, --suffix=.SUF use the suffix `.SUF' on compressed files\n" +" --files=[FILE] read filenames to process from FILE; if FILE is\n" +" omitted, filenames are read from the standard input;\n" +" filenames must be terminated with the newline " +"character\n" +" --files0=[FILE] like --files but use the null character as terminator" +msgstr "" +" --no-sparse nevytvářet při dekomprimaci záložní soubory\n" +" -S, --suffix=.SUF použít u komprimovaných souborů příponu „.SUF“\n" +" --files=[SOUBOR] číst názvy souborů, které se mají zpracovat, ze " +"SOUBORu;\n" +" pokud není SOUBOR zadán, čte se ze standardního " +"vstupu;\n" +" názvy souborů musí být zakončeny znakem nového řádku\n" +" --files0=[SOUBOR] stejné jako --files, ale použít k zakončování nulový " +"znak" + +#: src/xz/message.c:1083 +msgid "" +"\n" +" Basic file format and compression options:\n" +msgstr "" +"\n" +"Základní přepínače pro formát souboru a komprimaci:\n" + +#: src/xz/message.c:1085 +msgid "" +" -F, --format=FMT file format to encode or decode; possible values are\n" +" `auto' (default), `xz', `lzma', and `raw'\n" +" -C, --check=CHECK integrity check type: `crc32', `crc64' (default),\n" +" `sha256', or `none' (use with caution)" +msgstr "" +" -F, --format=FORMÁT formát souboru k zakódování nebo dekódování; možné\n" +" hodnoty jsou „auto“ (výchozí), „xz“, „lzma“ a „raw“\n" +" -C, --check=KONTROLA typ kontroly integrity: „crc32“, „crc64“ (výchozí),\n" +" „sha256“ nebo „none“ (používejte s rozmyslem)" + +#: src/xz/message.c:1092 +msgid "" +" -0 .. -9 compression preset; 0-2 fast compression, 3-5 good\n" +" compression, 6-9 excellent compression; default is 6" +msgstr "" +" -0 .. -9 přednastavení komprimace; 0-2 rychlá komprimace, 3-5\n" +" dobrá komprimace, 6-9 skvělá komprimace; výchozí je 6" + +#: src/xz/message.c:1096 +msgid "" +" -e, --extreme use more CPU time when encoding to increase " +"compression\n" +" ratio without increasing memory usage of the decoder" +msgstr "" +" -e, --extreme využít více procesorového času pro kódování, čímž se\n" +" zvýší kompresní poměr bez zvýšení paměti použité " +"kodérem" + +#: src/xz/message.c:1101 +msgid "" +" -M, --memory=NUM use roughly NUM bytes of memory at maximum; 0 " +"indicates\n" +" the default setting, which is 40 % of total RAM" +msgstr "" +" -M, --memory=POČ použít zhruba POČ bajtů paměti jako maximum; 0 " +"znamená\n" +" výchozí nastavení, což je 40% celkového množství " +"paměti" + +#: src/xz/message.c:1106 +msgid "" +"\n" +" Custom filter chain for compression (alternative for using presets):" +msgstr "" +"\n" +"Vlastní omezující filtr pro komprimaci (alternativa k použití " +"přednastavených):" + +#: src/xz/message.c:1111 +msgid "" +"\n" +" --lzma1[=OPTS] LZMA1 or LZMA2; OPTS is a comma-separated list of zero " +"or\n" +" --lzma2[=OPTS] more of the following options (valid values; " +"default):\n" +" preset=NUM reset options to preset number NUM (0-9)\n" +" dict=NUM dictionary size (4KiB - 1536MiB; 8MiB)\n" +" lc=NUM number of literal context bits (0-4; 3)\n" +" lp=NUM number of literal position bits (0-4; 0)\n" +" pb=NUM number of position bits (0-4; 2)\n" +" mode=MODE compression mode (fast, normal; normal)\n" +" nice=NUM nice length of a match (2-273; 64)\n" +" mf=NAME match finder (hc3, hc4, bt2, bt3, bt4; " +"bt4)\n" +" depth=NUM maximum search depth; 0=automatic " +"(default)" +msgstr "" +"\n" +" --lzma1[=VOLBY] LZMA1 nebo LZMA2; VOLBY je čárkou oddělovaný seznam " +"žádné\n" +" --lzma2[=VOLBY] nebo více následujících voleb (platné hodnoty; " +"výchozí):\n" +" preset=POČ obnovení voleb na přednastavený POČet (0-" +"9)\n" +" dict=POČ velikost slovníku (4 KiB - 1536 MiB; 8 " +"MiB)\n" +" lc=POČ počet kontextových bitů literálu (0-4; " +"3)\n" +" lp=POČ počet pozičních bitů literálu (0-4; 0)\n" +" pb=POČ počet pozičních bitů (0-4; 2)\n" +" mode=REŽIM režim komprimace (fast, normal; normal)\n" +" nice=POČ příznivá délka shody (2-273; 64)\n" +" mf=NÁZEV vyhledávač shod (hc3, hc4, bt2, bt3, " +"bt4;\n" +" bt4)\n" +" depth=POČ maximální hloubka prohledávání;\n" +" 0=automaticky (výchozí)" + +#: src/xz/message.c:1126 +msgid "" +"\n" +" --x86[=OPTS] x86 BCJ filter\n" +" --powerpc[=OPTS] PowerPC BCJ filter (big endian only)\n" +" --ia64[=OPTS] IA64 (Itanium) BCJ filter\n" +" --arm[=OPTS] ARM BCJ filter (little endian only)\n" +" --armthumb[=OPTS] ARM-Thumb BCJ filter (little endian only)\n" +" --sparc[=OPTS] SPARC BCJ filter\n" +" Valid OPTS for all BCJ filters:\n" +" start=NUM start offset for conversions (default=0)" +msgstr "" +"\n" +" --x86[=VOLBY] Filtr x86 BCJ\n" +" --powerpc[=VOLBY] Filtr PowerPC BCJ (pouze big endian)\n" +" --ia64[=VOLBY] Filtr IA64 (Itanium) BCJ\n" +" --arm[=VOLBY] Filtr ARM BCJ (pouze little endian)\n" +" --armthumb[=VOLBY] Filtr ARM-Thumb BCJ (pouze little endian)\n" +" --sparc[=VOLBY] Filtr SPARC BCJ\n" +" Platné volby pro všechny filtry BCJ:\n" +" start=POČ počáteční posun pro převody (výchozí=0)" + +#: src/xz/message.c:1138 +msgid "" +"\n" +" --delta[=OPTS] Delta filter; valid OPTS (valid values; default):\n" +" dist=NUM distance between bytes being subtracted\n" +" from each other (1-256; 1)" +msgstr "" +"\n" +" --delta[=VOLBY] Filtr Delta; platné VOLBY (platné hodnoty; výchozí):\n" +" dist=POČ vzdálenost mezi bajty, které jsou " +"odečítány\n" +" jeden od druhého (1-256; 1)" + +#: src/xz/message.c:1146 +msgid "" +"\n" +" --subblock[=OPTS] Subblock filter; valid OPTS (valid values; default):\n" +" size=NUM number of bytes of data per subblock\n" +" (1 - 256Mi; 4Ki)\n" +" rle=NUM run-length encoder chunk size (0-256; 0)" +msgstr "" +"\n" +" --subblock[=VOLBY] Subblokový filtr; platné VOLBY (platné hodnoty; " +"výchozí):\n" +" size=POČ počet bajtů dat na subblok\n" +" (1 - 256 Mi; 4 Ki)\n" +" rle=POČ velikost dávky pro kodér run-length (0-" +"256; 0)" + +#: src/xz/message.c:1155 +msgid "" +"\n" +" Other options:\n" +msgstr "" +"\n" +" Ostatní přepínače:\n" + +#: src/xz/message.c:1158 +msgid "" +" -q, --quiet suppress warnings; specify twice to suppress errors " +"too\n" +" -v, --verbose be verbose; specify twice for even more verbose" +msgstr "" +" -q, --quiet potlačit varování; zadáním dvakrát, potlačíte i " +"chyby\n" +" -v, --verbose podrobnější zprávy; zadáním dvakrát, budou ještě\n" +" podrobnější" + +#: src/xz/message.c:1163 +msgid " -Q, --no-warn make warnings not affect the exit status" +msgstr " -Q, --no-warn způsobí, že varování neovlivní stav ukončení" + +#: src/xz/message.c:1165 +msgid "" +" --robot use machine-parsable messages (useful for scripts)" +msgstr "" +" --robot použít strojově analyzovatelné zprávy (užitečné pro " +"skripty)" + +#: src/xz/message.c:1168 +msgid " --info-memory display the memory usage limit and exit" +msgstr " --info-memory zobrazit omezení využití paměti a skončit" + +#: src/xz/message.c:1170 +msgid "" +" -h, --help display the short help (lists only the basic options)\n" +" -H, --long-help display this long help and exit" +msgstr "" +" -h, --help zobrazit krátkou nápovědu (vypíše jen základní " +"přepínače)\n" +" -H, --long-help zobrazit tuto úplnou nápovědu a skončit" + +#: src/xz/message.c:1174 +msgid "" +" -h, --help display this short help and exit\n" +" -H, --long-help display the long help (lists also the advanced options)" +msgstr "" +" -h, --help zobrazit tuto zkrácenou nápovědu a skončit\n" +" -H, --long-help zobrazit úplnou nápovědu (vypíše i pokročilé " +"přepínače)" + +#: src/xz/message.c:1179 +msgid " -V, --version display the version number and exit" +msgstr " -V, --version zobrazit číslo verze a skončit" + +#: src/xz/message.c:1181 +msgid "" +"\n" +"With no FILE, or when FILE is -, read standard input.\n" +msgstr "" +"\n" +"Pokud SOUBOR není zadán nebo pokud je -, bude se číst ze standardního " +"vstupu.\n" + +#: src/xz/message.c:1185 +#, c-format +msgid "" +"On this system and configuration, this program will use a maximum of " +"roughly\n" +"%s MiB RAM and " +msgstr "" +"Na tomto systému a s tímto nastavením použije tento program maximum ze " +"zhruba\n" +"%s MiB RAM a " + +#: src/xz/message.c:1187 +msgid "" +"one thread.\n" +"\n" +msgstr "" +"jedno vlákno.\n" +"\n" + +#: src/xz/message.c:1192 +#, c-format +msgid "Report bugs to <%s> (in English or Finnish).\n" +msgstr "Chyby hlaste na <%s> (v angličtině nebo finštině).\n" + +#: src/xz/message.c:1194 +#, c-format +msgid "%s home page: <%s>\n" +msgstr "Domovská stránka %s: <%s>\n" + +#: src/xz/options.c:86 +#, c-format +msgid "%s: Options must be `name=value' pairs separated with commas" +msgstr "%s: Volby musí být páry „název=hodnota“ oddělené čárkami" + +#: src/xz/options.c:93 +#, c-format +msgid "%s: Invalid option name" +msgstr "%s: Neplatný název volby" + +#: src/xz/options.c:113 +#, c-format +msgid "%s: Invalid option value" +msgstr "%s: Neplatná hodnota volby" + +#: src/xz/options.c:308 +#, c-format +msgid "Unsupported LZMA1/LZMA2 preset: %s" +msgstr "Nepodporované přednastavení LZMA1/LZMA2: %s" + +#: src/xz/options.c:426 +msgid "The sum of lc and lp must be at maximum of 4" +msgstr "Součet lc a lp musí být maximálně 4" + +#: src/xz/options.c:431 +#, c-format +msgid "The selected match finder requires at least nice=%" +msgstr "Vybraný vyhledávač shod vyžaduje minimálně nice=%" + +#: src/xz/suffix.c:79 src/xz/suffix.c:164 +#, c-format +msgid "" +"%s: With --format=raw, --suffix=.SUF is required unless writing to stdout" +msgstr "" +"%s: S --format=raw je vyžadováno --sufix=.SUF, vyjma zápisu do standardního " +"výstupu" + +#: src/xz/suffix.c:99 +#, c-format +msgid "%s: Filename has an unknown suffix, skipping" +msgstr "%s: Název souboru má neznámou příponu, vynechává se" + +#: src/xz/suffix.c:154 +#, c-format +msgid "%s: File already has `%s' suffix, skipping" +msgstr "%s: Soubor již má příponu „%s“, vynechává se" + +#: src/xz/suffix.c:205 +#, c-format +msgid "%s: Invalid filename suffix" +msgstr "%s: Neplatná přípona názvu souboru" + +#: src/xz/util.c:53 +#, c-format +msgid "%s: Value is not a non-negative decimal integer" +msgstr "%s: Hodnota není nezáporné desítkové číslo" + +#: src/xz/util.c:95 +#, c-format +msgid "%s: Invalid multiplier suffix. Valid suffixes:" +msgstr "%s: Neplatná přípona. Platné přípony jsou:" + +#: src/xz/util.c:115 +#, c-format +msgid "Value of the option `%s' must be in the range [%, %]" +msgstr "Hodnota volby „%s“ musí být v rozsahu [%, %]" + +#: src/xz/util.c:212 +msgid "Empty filename, skipping" +msgstr "Prázdný název souboru, vynechává se" + +#: src/xz/util.c:226 +msgid "Compressed data not read from a terminal unless `--force' is used." +msgstr "" +"Komprimovaná data se nečtou z terminálu, vyjma použití volby „--force“." + +#: src/xz/util.c:239 +msgid "Compressed data not written to a terminal unless `--force' is used." +msgstr "" +"Komprimovaná data se nezapisují do terminálu, vyjma použití volby „--force“." diff --git a/src/common/mythread.h b/src/common/mythread.h new file mode 100644 index 000000000000..476c2fc9e103 --- /dev/null +++ b/src/common/mythread.h @@ -0,0 +1,42 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file mythread.h +/// \brief Wrappers for threads +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "sysdefs.h" + + +#ifdef HAVE_PTHREAD +# include + +# define mythread_once(func) \ + do { \ + static pthread_once_t once_ = PTHREAD_ONCE_INIT; \ + pthread_once(&once_, &func); \ + } while (0) + +# define mythread_sigmask(how, set, oset) \ + pthread_sigmask(how, set, oset) + +#else + +# define mythread_once(func) \ + do { \ + static bool once_ = false; \ + if (!once_) { \ + func(); \ + once_ = true; \ + } \ + } while (0) + +# define mythread_sigmask(how, set, oset) \ + sigprocmask(how, set, oset) + +#endif diff --git a/src/common/sysdefs.h b/src/common/sysdefs.h new file mode 100644 index 000000000000..97a1c045cfb0 --- /dev/null +++ b/src/common/sysdefs.h @@ -0,0 +1,171 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file sysdefs.h +/// \brief Common includes, definitions, system-specific things etc. +/// +/// This file is used also by the lzma command line tool, that's why this +/// file is separate from common.h. +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_SYSDEFS_H +#define LZMA_SYSDEFS_H + +////////////// +// Includes // +////////////// + +#ifdef HAVE_CONFIG_H +# include +#endif + +// size_t and NULL +#include + +#ifdef HAVE_INTTYPES_H +# include +#endif + +// C99 says that inttypes.h always includes stdint.h, but some systems +// don't do that, and require including stdint.h separately. +#ifdef HAVE_STDINT_H +# include +#endif + +// Some pre-C99 systems have SIZE_MAX in limits.h instead of stdint.h. The +// limits are also used to figure out some macros missing from pre-C99 systems. +#ifdef HAVE_LIMITS_H +# include +#endif + +// Be more compatible with systems that have non-conforming inttypes.h. +// We assume that int is 32-bit and that long is either 32-bit or 64-bit. +// Full Autoconf test could be more correct, but this should work well enough. +// Note that this duplicates some code from lzma.h, but this is better since +// we can work without inttypes.h thanks to Autoconf tests. +#ifndef UINT32_C +# if UINT_MAX != 4294967295U +# error UINT32_C is not defined and unsigned int is not 32-bit. +# endif +# define UINT32_C(n) n ## U +#endif +#ifndef UINT32_MAX +# define UINT32_MAX UINT32_C(4294967295) +#endif +#ifndef PRIu32 +# define PRIu32 "u" +#endif +#ifndef PRIX32 +# define PRIX32 "X" +#endif + +#if ULONG_MAX == 4294967295UL +# ifndef UINT64_C +# define UINT64_C(n) n ## ULL +# endif +# ifndef PRIu64 +# define PRIu64 "llu" +# endif +# ifndef PRIX64 +# define PRIX64 "llX" +# endif +#else +# ifndef UINT64_C +# define UINT64_C(n) n ## UL +# endif +# ifndef PRIu64 +# define PRIu64 "lu" +# endif +# ifndef PRIX64 +# define PRIX64 "lX" +# endif +#endif +#ifndef UINT64_MAX +# define UINT64_MAX UINT64_C(18446744073709551615) +#endif + +// Interix has broken header files, which typedef size_t to unsigned long, +// but a few lines later define SIZE_MAX to INT32_MAX. +#ifdef __INTERIX +# undef SIZE_MAX +#endif + +// The code currently assumes that size_t is either 32-bit or 64-bit. +#ifndef SIZE_MAX +# if SIZEOF_SIZE_T == 4 +# define SIZE_MAX UINT32_MAX +# elif SIZEOF_SIZE_T == 8 +# define SIZE_MAX UINT64_MAX +# else +# error sizeof(size_t) is not 32-bit or 64-bit +# endif +#endif +#if SIZE_MAX != UINT32_MAX && SIZE_MAX != UINT64_MAX +# error sizeof(size_t) is not 32-bit or 64-bit +#endif + +#include +#include + +// Pre-C99 systems lack stdbool.h. All the code in LZMA Utils must be written +// so that it works with fake bool type, for example: +// +// bool foo = (flags & 0x100) != 0; +// bool bar = !!(flags & 0x100); +// +// This works with the real C99 bool but breaks with fake bool: +// +// bool baz = (flags & 0x100); +// +#ifdef HAVE_STDBOOL_H +# include +#else +# if ! HAVE__BOOL +typedef unsigned char _Bool; +# endif +# define bool _Bool +# define false 0 +# define true 1 +# define __bool_true_false_are_defined 1 +#endif + +// string.h should be enough but let's include strings.h and memory.h too if +// they exists, since that shouldn't do any harm, but may improve portability. +#ifdef HAVE_STRING_H +# include +#endif + +#ifdef HAVE_STRINGS_H +# include +#endif + +#ifdef HAVE_MEMORY_H +# include +#endif + + +//////////// +// Macros // +//////////// + +#undef memzero +#define memzero(s, n) memset(s, 0, n) + +#ifndef MIN +# define MIN(x, y) ((x) < (y) ? (x) : (y)) +#endif + +#ifndef MAX +# define MAX(x, y) ((x) > (y) ? (x) : (y)) +#endif + +#ifndef ARRAY_SIZE +# define ARRAY_SIZE(array) (sizeof(array) / sizeof((array)[0])) +#endif + +#endif diff --git a/src/common/tuklib_common.h b/src/common/tuklib_common.h new file mode 100644 index 000000000000..31fbab58b005 --- /dev/null +++ b/src/common/tuklib_common.h @@ -0,0 +1,71 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_common.h +/// \brief Common definitions for tuklib modules +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef TUKLIB_COMMON_H +#define TUKLIB_COMMON_H + +// The config file may be replaced by a package-specific file. +// It should include at least stddef.h, inttypes.h, and limits.h. +#include "tuklib_config.h" + +// TUKLIB_SYMBOL_PREFIX is prefixed to all symbols exported by +// the tuklib modules. If you use a tuklib module in a library, +// you should use TUKLIB_SYMBOL_PREFIX to make sure that there +// are no symbol conflicts in case someone links your library +// into application that also uses the same tuklib module. +#ifndef TUKLIB_SYMBOL_PREFIX +# define TUKLIB_SYMBOL_PREFIX +#endif + +#define TUKLIB_CAT_X(a, b) a ## b +#define TUKLIB_CAT(a, b) TUKLIB_CAT_X(a, b) + +#ifndef TUKLIB_SYMBOL +# define TUKLIB_SYMBOL(sym) TUKLIB_CAT(TUKLIB_SYMBOL_PREFIX, sym) +#endif + +#ifndef TUKLIB_DECLS_BEGIN +# ifdef __cplusplus +# define TUKLIB_DECLS_BEGIN extern "C" { +# else +# define TUKLIB_DECLS_BEGIN +# endif +#endif + +#ifndef TUKLIB_DECLS_END +# ifdef __cplusplus +# define TUKLIB_DECLS_END } +# else +# define TUKLIB_DECLS_END +# endif +#endif + +#if defined(__GNUC__) && defined(__GNUC_MINOR__) +# define TUKLIB_GNUC_REQ(major, minor) \ + ((__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)) \ + || __GNUC__ > (major)) +#else +# define TUKLIB_GNUC_REQ(major, minor) 0 +#endif + +#if TUKLIB_GNUC_REQ(2, 5) +# define tuklib_attr_noreturn __attribute__((__noreturn__)) +#else +# define tuklib_attr_noreturn +#endif + +#if (defined(_WIN32) && !defined(__CYGWIN__)) \ + || defined(__OS2__) || defined(__MSDOS__) +# define TUKLIB_DOSLIKE 1 +#endif + +#endif diff --git a/src/common/tuklib_config.h b/src/common/tuklib_config.h new file mode 100644 index 000000000000..549cb24d7738 --- /dev/null +++ b/src/common/tuklib_config.h @@ -0,0 +1,7 @@ +#ifdef HAVE_CONFIG_H +# include "sysdefs.h" +#else +# include +# include +# include +#endif diff --git a/src/common/tuklib_cpucores.c b/src/common/tuklib_cpucores.c new file mode 100644 index 000000000000..e35d9bc74d9a --- /dev/null +++ b/src/common/tuklib_cpucores.c @@ -0,0 +1,52 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_cpucores.c +/// \brief Get the number of CPU cores online +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "tuklib_cpucores.h" + +#if defined(TUKLIB_CPUCORES_SYSCTL) +# ifdef HAVE_SYS_PARAM_H +# include +# endif +# include + +#elif defined(TUKLIB_CPUCORES_SYSCONF) +# include +#endif + + +extern uint32_t +tuklib_cpucores(void) +{ + uint32_t ret = 0; + +#if defined(TUKLIB_CPUCORES_SYSCTL) + int name[2] = { CTL_HW, HW_NCPU }; + int cpus; + size_t cpus_size = sizeof(cpus); + if (sysctl(name, 2, &cpus, &cpus_size, NULL, 0) != -1 + && cpus_size == sizeof(cpus) && cpus > 0) + ret = (uint32_t)cpus; + +#elif defined(TUKLIB_CPUCORES_SYSCONF) +# ifdef _SC_NPROCESSORS_ONLN + // Most systems + const long cpus = sysconf(_SC_NPROCESSORS_ONLN); +# else + // IRIX + const long cpus = sysconf(_SC_NPROC_ONLN); +# endif + if (cpus > 0) + ret = (uint32_t)cpus; +#endif + + return ret; +} diff --git a/src/common/tuklib_cpucores.h b/src/common/tuklib_cpucores.h new file mode 100644 index 000000000000..be1ce1c175ae --- /dev/null +++ b/src/common/tuklib_cpucores.h @@ -0,0 +1,23 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_cpucores.h +/// \brief Get the number of CPU cores online +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef TUKLIB_CPUCORES_H +#define TUKLIB_CPUCORES_H + +#include "tuklib_common.h" +TUKLIB_DECLS_BEGIN + +#define tuklib_cpucores TUKLIB_SYMBOL(tuklib_cpucores) +extern uint32_t tuklib_cpucores(void); + +TUKLIB_DECLS_END +#endif diff --git a/src/common/tuklib_exit.c b/src/common/tuklib_exit.c new file mode 100644 index 000000000000..c393be64d754 --- /dev/null +++ b/src/common/tuklib_exit.c @@ -0,0 +1,57 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_exit.c +/// \brief Close stdout and stderr, and exit +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "tuklib_common.h" + +#include +#include + +#include "tuklib_gettext.h" +#include "tuklib_progname.h" +#include "tuklib_exit.h" + + +extern void +tuklib_exit(int status, int err_status, int show_error) +{ + if (status != err_status) { + // Close stdout. If something goes wrong, + // print an error message to stderr. + const int ferror_err = ferror(stdout); + const int fclose_err = fclose(stdout); + if (ferror_err || fclose_err) { + status = err_status; + + // If it was fclose() that failed, we have the reason + // in errno. If only ferror() indicated an error, + // we have no idea what the reason was. + if (show_error) + fprintf(stderr, "%s: %s: %s\n", progname, + _("Writing to standard " + "output failed"), + fclose_err ? strerror(errno) + : _("Unknown error")); + } + } + + if (status != err_status) { + // Close stderr. If something goes wrong, there's + // nothing where we could print an error message. + // Just set the exit status. + const int ferror_err = ferror(stderr); + const int fclose_err = fclose(stderr); + if (fclose_err || ferror_err) + status = err_status; + } + + exit(status); +} diff --git a/src/common/tuklib_exit.h b/src/common/tuklib_exit.h new file mode 100644 index 000000000000..b11776f0e5bf --- /dev/null +++ b/src/common/tuklib_exit.h @@ -0,0 +1,25 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_exit.h +/// \brief Close stdout and stderr, and exit +/// \note Requires tuklib_progname and tuklib_gettext modules +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef TUKLIB_EXIT_H +#define TUKLIB_EXIT_H + +#include "tuklib_common.h" +TUKLIB_DECLS_BEGIN + +#define tuklib_exit TUKLIB_SYMBOL(tuklib_exit) +extern void tuklib_exit(int status, int err_status, int show_error) + tuklib_attr_noreturn; + +TUKLIB_DECLS_END +#endif diff --git a/src/common/tuklib_gettext.h b/src/common/tuklib_gettext.h new file mode 100644 index 000000000000..248521347ab0 --- /dev/null +++ b/src/common/tuklib_gettext.h @@ -0,0 +1,44 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_gettext.h +/// \brief Wrapper for gettext and friends +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef TUKLIB_GETTEXT_H +#define TUKLIB_GETTEXT_H + +#include "tuklib_common.h" +#include + +#ifndef TUKLIB_GETTEXT +# ifdef ENABLE_NLS +# define TUKLIB_GETTEXT 1 +# else +# define TUKLIB_GETTEXT 0 +# endif +#endif + +#if TUKLIB_GETTEXT +# include +# define tuklib_gettext_init(package, localedir) \ + do { \ + setlocale(LC_ALL, ""); \ + bindtextdomain(package, localedir); \ + textdomain(package); \ + } while (0) +# define _(msgid) gettext(msgid) +# define N_(msgid1, msgid2, n) ngettext(msgid1, msgid2, n) +#else +# define tuklib_gettext_init(package, localedir) \ + setlocale(LC_ALL, "") +# define _(msgid) (msgid) +# define N_(msgid1, msgid2, n) ((n) == 1 ? (msgid1) : (msgid2)) +#endif + +#endif diff --git a/src/common/tuklib_integer.h b/src/common/tuklib_integer.h new file mode 100644 index 000000000000..e6daa772d588 --- /dev/null +++ b/src/common/tuklib_integer.h @@ -0,0 +1,523 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_integer.h +/// \brief Various integer and bit operations +/// +/// This file provides macros or functions to do some basic integer and bit +/// operations. +/// +/// Endianness related integer operations (XX = 16, 32, or 64; Y = b or l): +/// - Byte swapping: bswapXX(num) +/// - Byte order conversions to/from native: convXXYe(num) +/// - Aligned reads: readXXYe(ptr) +/// - Aligned writes: writeXXYe(ptr, num) +/// - Unaligned reads (16/32-bit only): unaligned_readXXYe(ptr) +/// - Unaligned writes (16/32-bit only): unaligned_writeXXYe(ptr, num) +/// +/// Since they can macros, the arguments should have no side effects since +/// they may be evaluated more than once. +/// +/// \todo PowerPC and possibly some other architectures support +/// byte swapping load and store instructions. This file +/// doesn't take advantage of those instructions. +/// +/// Bit scan operations for non-zero 32-bit integers: +/// - Bit scan reverse (find highest non-zero bit): bsr32(num) +/// - Count leading zeros: clz32(num) +/// - Count trailing zeros: ctz32(num) +/// - Bit scan forward (simply an alias for ctz32()): bsf32(num) +/// +/// The above bit scan operations return 0-31. If num is zero, +/// the result is undefined. +// +// Authors: Lasse Collin +// Joachim Henke +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef TUKLIB_INTEGER_H +#define TUKLIB_INTEGER_H + +#include "tuklib_common.h" + + +//////////////////////////////////////// +// Operating system specific features // +//////////////////////////////////////// + +#if defined(HAVE_BYTESWAP_H) + // glibc, uClibc, dietlibc +# include +# ifdef HAVE_BSWAP_16 +# define bswap16(num) bswap_16(num) +# endif +# ifdef HAVE_BSWAP_32 +# define bswap32(num) bswap_32(num) +# endif +# ifdef HAVE_BSWAP_64 +# define bswap64(num) bswap_64(num) +# endif + +#elif defined(HAVE_SYS_ENDIAN_H) + // *BSDs and Darwin +# include + +#elif defined(HAVE_SYS_BYTEORDER_H) + // Solaris +# include +# ifdef BSWAP_16 +# define bswap16(num) BSWAP_16(num) +# endif +# ifdef BSWAP_32 +# define bswap32(num) BSWAP_32(num) +# endif +# ifdef BSWAP_64 +# define bswap64(num) BSWAP_64(num) +# endif +# ifdef BE_16 +# define conv16be(num) BE_16(num) +# endif +# ifdef BE_32 +# define conv32be(num) BE_32(num) +# endif +# ifdef BE_64 +# define conv64be(num) BE_64(num) +# endif +# ifdef LE_16 +# define conv16le(num) LE_16(num) +# endif +# ifdef LE_32 +# define conv32le(num) LE_32(num) +# endif +# ifdef LE_64 +# define conv64le(num) LE_64(num) +# endif +#endif + + +/////////////////// +// Byte swapping // +/////////////////// + +#ifndef bswap16 +# define bswap16(num) \ + (((uint16_t)(num) << 8) | ((uint16_t)(num) >> 8)) +#endif + +#ifndef bswap32 +# define bswap32(num) \ + ( (((uint32_t)(num) << 24) ) \ + | (((uint32_t)(num) << 8) & UINT32_C(0x00FF0000)) \ + | (((uint32_t)(num) >> 8) & UINT32_C(0x0000FF00)) \ + | (((uint32_t)(num) >> 24) ) ) +#endif + +#ifndef bswap64 +# define bswap64(num) \ + ( (((uint64_t)(num) << 56) ) \ + | (((uint64_t)(num) << 40) & UINT64_C(0x00FF000000000000)) \ + | (((uint64_t)(num) << 24) & UINT64_C(0x0000FF0000000000)) \ + | (((uint64_t)(num) << 8) & UINT64_C(0x000000FF00000000)) \ + | (((uint64_t)(num) >> 8) & UINT64_C(0x00000000FF000000)) \ + | (((uint64_t)(num) >> 24) & UINT64_C(0x0000000000FF0000)) \ + | (((uint64_t)(num) >> 40) & UINT64_C(0x000000000000FF00)) \ + | (((uint64_t)(num) >> 56) ) ) +#endif + +// Define conversion macros using the basic byte swapping macros. +#ifdef WORDS_BIGENDIAN +# ifndef conv16be +# define conv16be(num) ((uint16_t)(num)) +# endif +# ifndef conv32be +# define conv32be(num) ((uint32_t)(num)) +# endif +# ifndef conv64be +# define conv64be(num) ((uint64_t)(num)) +# endif +# ifndef conv16le +# define conv16le(num) bswap16(num) +# endif +# ifndef conv32le +# define conv32le(num) bswap32(num) +# endif +# ifndef conv64le +# define conv64le(num) bswap64(num) +# endif +#else +# ifndef conv16be +# define conv16be(num) bswap16(num) +# endif +# ifndef conv32be +# define conv32be(num) bswap32(num) +# endif +# ifndef conv64be +# define conv64be(num) bswap64(num) +# endif +# ifndef conv16le +# define conv16le(num) ((uint16_t)(num)) +# endif +# ifndef conv32le +# define conv32le(num) ((uint32_t)(num)) +# endif +# ifndef conv64le +# define conv64le(num) ((uint64_t)(num)) +# endif +#endif + + +////////////////////////////// +// Aligned reads and writes // +////////////////////////////// + +static inline uint16_t +read16be(const uint8_t *buf) +{ + uint16_t num = *(const uint16_t *)buf; + return conv16be(num); +} + + +static inline uint16_t +read16le(const uint8_t *buf) +{ + uint16_t num = *(const uint16_t *)buf; + return conv16le(num); +} + + +static inline uint32_t +read32be(const uint8_t *buf) +{ + uint32_t num = *(const uint32_t *)buf; + return conv32be(num); +} + + +static inline uint32_t +read32le(const uint8_t *buf) +{ + uint32_t num = *(const uint32_t *)buf; + return conv32le(num); +} + + +static inline uint64_t +read64be(const uint8_t *buf) +{ + uint64_t num = *(const uint64_t *)buf; + return conv64be(num); +} + + +static inline uint64_t +read64le(const uint8_t *buf) +{ + uint64_t num = *(const uint64_t *)buf; + return conv64le(num); +} + + +// NOTE: Possible byte swapping must be done in a macro to allow GCC +// to optimize byte swapping of constants when using glibc's or *BSD's +// byte swapping macros. The actual write is done in an inline function +// to make type checking of the buf pointer possible similarly to readXXYe() +// functions. + +#define write16be(buf, num) write16ne((buf), conv16be(num)) +#define write16le(buf, num) write16ne((buf), conv16le(num)) +#define write32be(buf, num) write32ne((buf), conv32be(num)) +#define write32le(buf, num) write32ne((buf), conv32le(num)) +#define write64be(buf, num) write64ne((buf), conv64be(num)) +#define write64le(buf, num) write64ne((buf), conv64le(num)) + + +static inline void +write16ne(uint8_t *buf, uint16_t num) +{ + *(uint16_t *)buf = num; + return; +} + + +static inline void +write32ne(uint8_t *buf, uint32_t num) +{ + *(uint32_t *)buf = num; + return; +} + + +static inline void +write64ne(uint8_t *buf, uint64_t num) +{ + *(uint64_t *)buf = num; + return; +} + + +//////////////////////////////// +// Unaligned reads and writes // +//////////////////////////////// + +// NOTE: TUKLIB_FAST_UNALIGNED_ACCESS indicates only support for 16-bit and +// 32-bit unaligned integer loads and stores. It's possible that 64-bit +// unaligned access doesn't work or is slower than byte-by-byte access. +// Since unaligned 64-bit is probably not needed as often as 16-bit or +// 32-bit, we simply don't support 64-bit unaligned access for now. +#ifdef TUKLIB_FAST_UNALIGNED_ACCESS +# define unaligned_read16be read16be +# define unaligned_read16le read16le +# define unaligned_read32be read32be +# define unaligned_read32le read32le +# define unaligned_write16be write16be +# define unaligned_write16le write16le +# define unaligned_write32be write32be +# define unaligned_write32le write32le + +#else + +static inline uint16_t +unaligned_read16be(const uint8_t *buf) +{ + uint16_t num = ((uint16_t)buf[0] << 8) | (uint16_t)buf[1]; + return num; +} + + +static inline uint16_t +unaligned_read16le(const uint8_t *buf) +{ + uint16_t num = ((uint16_t)buf[0]) | ((uint16_t)buf[1] << 8); + return num; +} + + +static inline uint32_t +unaligned_read32be(const uint8_t *buf) +{ + uint32_t num = (uint32_t)buf[0] << 24; + num |= (uint32_t)buf[1] << 16; + num |= (uint32_t)buf[2] << 8; + num |= (uint32_t)buf[3]; + return num; +} + + +static inline uint32_t +unaligned_read32le(const uint8_t *buf) +{ + uint32_t num = (uint32_t)buf[0]; + num |= (uint32_t)buf[1] << 8; + num |= (uint32_t)buf[2] << 16; + num |= (uint32_t)buf[3] << 24; + return num; +} + + +static inline void +unaligned_write16be(uint8_t *buf, uint16_t num) +{ + buf[0] = num >> 8; + buf[1] = num; + return; +} + + +static inline void +unaligned_write16le(uint8_t *buf, uint16_t num) +{ + buf[0] = num; + buf[1] = num >> 8; + return; +} + + +static inline void +unaligned_write32be(uint8_t *buf, uint32_t num) +{ + buf[0] = num >> 24; + buf[1] = num >> 16; + buf[2] = num >> 8; + buf[3] = num; + return; +} + + +static inline void +unaligned_write32le(uint8_t *buf, uint32_t num) +{ + buf[0] = num; + buf[1] = num >> 8; + buf[2] = num >> 16; + buf[3] = num >> 24; + return; +} + +#endif + + +static inline uint32_t +bsr32(uint32_t n) +{ + // Check for ICC first, since it tends to define __GNUC__ too. +#if defined(__INTEL_COMPILER) + return _bit_scan_reverse(n); + +#elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX + // GCC >= 3.4 has __builtin_clz(), which gives good results on + // multiple architectures. On x86, __builtin_clz() ^ 31U becomes + // either plain BSR (so the XOR gets optimized away) or LZCNT and + // XOR (if -march indicates that SSE4a instructions are supported). + return __builtin_clz(n) ^ 31U; + +#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + uint32_t i; + __asm__("bsrl %1, %0" : "=r" (i) : "rm" (n)); + return i; + +#elif defined(_MSC_VER) && _MSC_VER >= 1400 + // MSVC isn't supported by tuklib, but since this code exists, + // it doesn't hurt to have it here anyway. + uint32_t i; + _BitScanReverse((DWORD *)&i, n); + return i; + +#else + uint32_t i = 31; + + if ((n & UINT32_C(0xFFFF0000)) == 0) { + n <<= 16; + i = 15; + } + + if ((n & UINT32_C(0xFF000000)) == 0) { + n <<= 8; + i -= 8; + } + + if ((n & UINT32_C(0xF0000000)) == 0) { + n <<= 4; + i -= 4; + } + + if ((n & UINT32_C(0xC0000000)) == 0) { + n <<= 2; + i -= 2; + } + + if ((n & UINT32_C(0x80000000)) == 0) + --i; + + return i; +#endif +} + + +static inline uint32_t +clz32(uint32_t n) +{ +#if defined(__INTEL_COMPILER) + return _bit_scan_reverse(n) ^ 31U; + +#elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX + return __builtin_clz(n); + +#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + uint32_t i; + __asm__("bsrl %1, %0\n\t" + "xorl $31, %0" + : "=r" (i) : "rm" (n)); + return i; + +#elif defined(_MSC_VER) && _MSC_VER >= 1400 + uint32_t i; + _BitScanReverse((DWORD *)&i, n); + return i ^ 31U; + +#else + uint32_t i = 0; + + if ((n & UINT32_C(0xFFFF0000)) == 0) { + n <<= 16; + i = 16; + } + + if ((n & UINT32_C(0xFF000000)) == 0) { + n <<= 8; + i += 8; + } + + if ((n & UINT32_C(0xF0000000)) == 0) { + n <<= 4; + i += 4; + } + + if ((n & UINT32_C(0xC0000000)) == 0) { + n <<= 2; + i += 2; + } + + if ((n & UINT32_C(0x80000000)) == 0) + ++i; + + return i; +#endif +} + + +static inline uint32_t +ctz32(uint32_t n) +{ +#if defined(__INTEL_COMPILER) + return _bit_scan_forward(n); + +#elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX >= UINT32_MAX + return __builtin_ctz(n); + +#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + uint32_t i; + __asm__("bsfl %1, %0" : "=r" (i) : "rm" (n)); + return i; + +#elif defined(_MSC_VER) && _MSC_VER >= 1400 + uint32_t i; + _BitScanForward((DWORD *)&i, n); + return i; + +#else + uint32_t i = 0; + + if ((n & UINT32_C(0x0000FFFF)) == 0) { + n >>= 16; + i = 16; + } + + if ((n & UINT32_C(0x000000FF)) == 0) { + n >>= 8; + i += 8; + } + + if ((n & UINT32_C(0x0000000F)) == 0) { + n >>= 4; + i += 4; + } + + if ((n & UINT32_C(0x00000003)) == 0) { + n >>= 2; + i += 2; + } + + if ((n & UINT32_C(0x00000001)) == 0) + ++i; + + return i; +#endif +} + +#define bsf32 ctz32 + +#endif diff --git a/src/common/tuklib_open_stdxxx.c b/src/common/tuklib_open_stdxxx.c new file mode 100644 index 000000000000..08bc60d8cf87 --- /dev/null +++ b/src/common/tuklib_open_stdxxx.c @@ -0,0 +1,55 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_open_stdxxx.c +/// \brief Make sure that file descriptors 0, 1, and 2 are open +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "tuklib_open_stdxxx.h" + +#ifndef TUKLIB_DOSLIKE +# include +# include +# include +# include +#endif + + +extern void +tuklib_open_stdxxx(int err_status) +{ +#ifdef TUKLIB_DOSLIKE + // Do nothing, just silence warnings. + (void)err_status; + +#else + for (int i = 0; i <= 2; ++i) { + // We use fcntl() to check if the file descriptor is open. + if (fcntl(i, F_GETFD) == -1 && errno == EBADF) { + // With stdin, we could use /dev/full so that + // writing to stdin would fail. However, /dev/full + // is Linux specific, and if the program tries to + // write to stdin, there's already a problem anyway. + const int fd = open("/dev/null", O_NOCTTY + | (i == 0 ? O_WRONLY : O_RDONLY)); + + if (fd != i) { + // Something went wrong. Exit with the + // exit status we were given. Don't try + // to print an error message, since stderr + // may very well be non-existent. This + // error should be extremely rare. + (void)close(fd); + exit(err_status); + } + } + } +#endif + + return; +} diff --git a/src/common/tuklib_open_stdxxx.h b/src/common/tuklib_open_stdxxx.h new file mode 100644 index 000000000000..b91161609ee6 --- /dev/null +++ b/src/common/tuklib_open_stdxxx.h @@ -0,0 +1,23 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_open_stdxxx.h +/// \brief Make sure that file descriptors 0, 1, and 2 are open +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef TUKLIB_OPEN_STDXXX_H +#define TUKLIB_OPEN_STDXXX_H + +#include "tuklib_common.h" +TUKLIB_DECLS_BEGIN + +#define tuklib_open_stdxx TUKLIB_SYMBOL(tuklib_open_stdxxx) +extern void tuklib_open_stdxxx(int err_status); + +TUKLIB_DECLS_END +#endif diff --git a/src/common/tuklib_physmem.c b/src/common/tuklib_physmem.c new file mode 100644 index 000000000000..1536e6e5da1c --- /dev/null +++ b/src/common/tuklib_physmem.c @@ -0,0 +1,165 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_physmem.c +/// \brief Get the amount of physical memory +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "tuklib_physmem.h" + +// We want to use Windows-specific code on Cygwin, which also has memory +// information available via sysconf(), but on Cygwin 1.5 and older it +// gives wrong results (from our point of view). +#if defined(_WIN32) || defined(__CYGWIN__) +# ifndef _WIN32_WINNT +# define _WIN32_WINNT 0x0500 +# endif +# include + +#elif defined(__OS2__) +# define INCL_DOSMISC +# include + +#elif defined(__DJGPP__) +# include + +#elif defined(__VMS) +# include +# include +# include + +#elif defined(TUKLIB_PHYSMEM_SYSCONF) +# include + +#elif defined(TUKLIB_PHYSMEM_SYSCTL) +# ifdef HAVE_SYS_PARAM_H +# include +# endif +# include + +// IRIX +#elif defined(TUKLIB_PHYSMEM_GETINVENT_R) +# include + +// This sysinfo() is Linux-specific. +#elif defined(TUKLIB_PHYSMEM_SYSINFO) +# include +#endif + + +extern uint64_t +tuklib_physmem(void) +{ + uint64_t ret = 0; + +#if defined(_WIN32) || defined(__CYGWIN__) + if ((GetVersion() & 0xFF) >= 5) { + // Windows 2000 and later have GlobalMemoryStatusEx() which + // supports reporting values greater than 4 GiB. To keep the + // code working also on older Windows versions, use + // GlobalMemoryStatusEx() conditionally. + HMODULE kernel32 = GetModuleHandle("kernel32.dll"); + if (kernel32 != NULL) { + BOOL (WINAPI *gmse)(LPMEMORYSTATUSEX) = GetProcAddress( + kernel32, "GlobalMemoryStatusEx"); + if (gmse != NULL) { + MEMORYSTATUSEX meminfo; + meminfo.dwLength = sizeof(meminfo); + if (gmse(&meminfo)) + ret = meminfo.ullTotalPhys; + } + } + } + + if (ret == 0) { + // GlobalMemoryStatus() is supported by Windows 95 and later, + // so it is fine to link against it unconditionally. Note that + // GlobalMemoryStatus() has no return value. + MEMORYSTATUS meminfo; + meminfo.dwLength = sizeof(meminfo); + GlobalMemoryStatus(&meminfo); + ret = meminfo.dwTotalPhys; + } + +#elif defined(__OS2__) + unsigned long mem; + if (DosQuerySysInfo(QSV_TOTPHYSMEM, QSV_TOTPHYSMEM, + &mem, sizeof(mem)) == 0) + ret = mem; + +#elif defined(__DJGPP__) + __dpmi_free_mem_info meminfo; + if (__dpmi_get_free_memory_information(&meminfo) == 0 + && meminfo.total_number_of_physical_pages + != (unsigned long)-1) + ret = (uint64_t)meminfo.total_number_of_physical_pages * 4096; + +#elif defined(__VMS) + int vms_mem; + int val = SYI$_MEMSIZE; + if (LIB$GETSYI(&val, &vms_mem, 0, 0, 0, 0) == SS$_NORMAL) + ret = (uint64_t)vms_mem * 8192; + +#elif defined(TUKLIB_PHYSMEM_SYSCONF) + const long pagesize = sysconf(_SC_PAGESIZE); + const long pages = sysconf(_SC_PHYS_PAGES); + if (pagesize != -1 || pages != -1) + // According to docs, pagesize * pages can overflow. + // Simple case is 32-bit box with 4 GiB or more RAM, + // which may report exactly 4 GiB of RAM, and "long" + // being 32-bit will overflow. Casting to uint64_t + // hopefully avoids overflows in the near future. + ret = (uint64_t)pagesize * (uint64_t)pages; + +#elif defined(TUKLIB_PHYSMEM_SYSCTL) + int name[2] = { + CTL_HW, +#ifdef HW_PHYSMEM64 + HW_PHYSMEM64 +#else + HW_PHYSMEM +#endif + }; + union { + uint32_t u32; + uint64_t u64; + } mem; + size_t mem_ptr_size = sizeof(mem.u64); + if (sysctl(name, 2, &mem.u64, &mem_ptr_size, NULL, 0) != -1) { + // IIRC, 64-bit "return value" is possible on some 64-bit + // BSD systems even with HW_PHYSMEM (instead of HW_PHYSMEM64), + // so support both. + if (mem_ptr_size == sizeof(mem.u64)) + ret = mem.u64; + else if (mem_ptr_size == sizeof(mem.u32)) + ret = mem.u32; + } + +#elif defined(TUKLIB_PHYSMEM_GETINVENT_R) + inv_state_t *st = NULL; + if (setinvent_r(&st) != -1) { + inventory_t *i; + while ((i = getinvent_r(st)) != NULL) { + if (i->inv_class == INV_MEMORY + && i->inv_type == INV_MAIN_MB) { + ret = (uint64_t)i->inv_state << 20; + break; + } + } + + endinvent_r(st); + } + +#elif defined(TUKLIB_PHYSMEM_SYSINFO) + struct sysinfo si; + if (sysinfo(&si) == 0) + ret = (uint64_t)si.totalram * si.mem_unit; +#endif + + return ret; +} diff --git a/src/common/tuklib_physmem.h b/src/common/tuklib_physmem.h new file mode 100644 index 000000000000..09e2a51338ae --- /dev/null +++ b/src/common/tuklib_physmem.h @@ -0,0 +1,28 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_physmem.h +/// \brief Get the amount of physical memory +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef TUKLIB_PHYSMEM_H +#define TUKLIB_PHYSMEM_H + +#include "tuklib_common.h" +TUKLIB_DECLS_BEGIN + +#define tuklib_physmem TUKLIB_SYMBOL(tuklib_physmem) +extern uint64_t tuklib_physmem(void); +///< +/// \brief Get the amount of physical memory in bytes +/// +/// \return Amount of physical memory in bytes. On error, zero is +/// returned. + +TUKLIB_DECLS_END +#endif diff --git a/src/common/tuklib_progname.c b/src/common/tuklib_progname.c new file mode 100644 index 000000000000..7cb7e203dd9d --- /dev/null +++ b/src/common/tuklib_progname.c @@ -0,0 +1,50 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_progname.c +/// \brief Program name to be displayed in messages +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "tuklib_progname.h" +#include + + +#if !HAVE_DECL_PROGRAM_INVOCATION_NAME +char *progname = NULL; +#endif + + +extern void +tuklib_progname_init(char **argv) +{ +#ifdef TUKLIB_DOSLIKE + // On these systems, argv[0] always has the full path and .exe + // suffix even if the user just types the plain program name. + // We modify argv[0] to make it nicer to read. + + // Strip the leading path. + char *p = argv[0] + strlen(argv[0]); + while (argv[0] < p && p[-1] != '/' && p[-1] != '\\') + --p; + + argv[0] = p; + + // Strip the .exe suffix. + p = strrchr(p, '.'); + if (p != NULL) + *p = '\0'; + + // Make it lowercase. + for (p = argv[0]; *p != '\0'; ++p) + if (*p >= 'A' && *p <= 'Z') + *p = *p - 'A' + 'a'; +#endif + + progname = argv[0]; + return; +} diff --git a/src/common/tuklib_progname.h b/src/common/tuklib_progname.h new file mode 100644 index 000000000000..791b12517e59 --- /dev/null +++ b/src/common/tuklib_progname.h @@ -0,0 +1,32 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_progname.h +/// \brief Program name to be displayed in messages +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef TUKLIB_PROGNAME_H +#define TUKLIB_PROGNAME_H + +#include "tuklib_common.h" +#include + +TUKLIB_DECLS_BEGIN + +#if HAVE_DECL_PROGRAM_INVOCATION_NAME +# define progname program_invocation_name +#else +# define progname TUKLIB_SYMBOL(tuklib_progname) + extern char *progname; +#endif + +#define tuklib_progname_init TUKLIB_SYMBOL(tuklib_progname_init) +extern void tuklib_progname_init(char **argv); + +TUKLIB_DECLS_END +#endif diff --git a/src/liblzma/api/lzma.h b/src/liblzma/api/lzma.h new file mode 100644 index 000000000000..5be9b4e31558 --- /dev/null +++ b/src/liblzma/api/lzma.h @@ -0,0 +1,326 @@ +/** + * \file api/lzma.h + * \brief The public API of liblzma data compression library + * + * liblzma is a public domain general-purpose data compression library with + * a zlib-like API. The native file format is .xz, but also the old .lzma + * format and raw (no headers) streams are supported. Multiple compression + * algorithms (filters) are supported. Currently LZMA2 is the primary filter. + * + * liblzma is part of XZ Utils . XZ Utils includes + * a gzip-like command line tool named xz and some other tools. XZ Utils + * is developed and maintained by Lasse Collin. + * + * Major parts of liblzma are based on Igor Pavlov's public domain LZMA SDK + * . + * + * The SHA-256 implementation is based on the public domain code found from + * 7-Zip , which has a modified version of the public + * domain SHA-256 code found from Crypto++ . + * The SHA-256 code in Crypto++ was written by Kevin Springle and Wei Dai. + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#ifndef LZMA_H +#define LZMA_H + +/***************************** + * Required standard headers * + *****************************/ + +/* + * liblzma API headers need some standard types and macros. To allow + * including lzma.h without requiring the application to include other + * headers first, lzma.h includes the required standard headers unless + * they already seem to be included already or if LZMA_MANUAL_HEADERS + * has been defined. + * + * Here's what types and macros are needed and from which headers: + * - stddef.h: size_t, NULL + * - stdint.h: uint8_t, uint32_t, uint64_t, UINT32_C(n), uint64_C(n), + * UINT32_MAX, UINT64_MAX + * + * However, inttypes.h is a little more portable than stdint.h, although + * inttypes.h declares some unneeded things compared to plain stdint.h. + * + * The hacks below aren't perfect, specifically they assume that inttypes.h + * exists and that it typedefs at least uint8_t, uint32_t, and uint64_t, + * and that, in case of incomplete inttypes.h, unsigned int is 32-bit. + * If the application already takes care of setting up all the types and + * macros properly (for example by using gnulib's stdint.h or inttypes.h), + * we try to detect that the macros are already defined and don't include + * inttypes.h here again. However, you may define LZMA_MANUAL_HEADERS to + * force this file to never include any system headers. + * + * Some could argue that liblzma API should provide all the required types, + * for example lzma_uint64, LZMA_UINT64_C(n), and LZMA_UINT64_MAX. This was + * seen unnecessary mess, since most systems already provide all the necessary + * types and macros in the standard headers. + * + * Note that liblzma API still has lzma_bool, because using stdbool.h would + * break C89 and C++ programs on many systems. sizeof(bool) in C99 isn't + * necessarily the same as sizeof(bool) in C++. + */ + +#ifndef LZMA_MANUAL_HEADERS + /* + * I suppose this works portably also in C++. Note that in C++, + * we need to get size_t into the global namespace. + */ +# include + + /* + * Skip inttypes.h if we already have all the required macros. If we + * have the macros, we assume that we have the matching typedefs too. + */ +# if !defined(UINT32_C) || !defined(UINT64_C) \ + || !defined(UINT32_MAX) || !defined(UINT64_MAX) + /* + * MSVC has no C99 support, and thus it cannot be used to + * compile liblzma. The liblzma API has to still be usable + * from MSVC, so we need to define the required standard + * integer types here. + */ +# if defined(_WIN32) && defined(_MSC_VER) + typedef unsigned __int8 uint8_t; + typedef unsigned __int32 uint32_t; + typedef unsigned __int64 uint64_t; +# else + /* Use the standard inttypes.h. */ +# ifdef __cplusplus + /* + * C99 sections 7.18.2 and 7.18.4 specify + * that C++ implementations define the limit + * and constant macros only if specifically + * requested. Note that if you want the + * format macros (PRIu64 etc.) too, you need + * to define __STDC_FORMAT_MACROS before + * including lzma.h, since re-including + * inttypes.h with __STDC_FORMAT_MACROS + * defined doesn't necessarily work. + */ +# ifndef __STDC_LIMIT_MACROS +# define __STDC_LIMIT_MACROS 1 +# endif +# ifndef __STDC_CONSTANT_MACROS +# define __STDC_CONSTANT_MACROS 1 +# endif +# endif + +# include +# endif + + /* + * Some old systems have only the typedefs in inttypes.h, and + * lack all the macros. For those systems, we need a few more + * hacks. We assume that unsigned int is 32-bit and unsigned + * long is either 32-bit or 64-bit. If these hacks aren't + * enough, the application has to setup the types manually + * before including lzma.h. + */ +# ifndef UINT32_C +# if defined(_WIN32) && defined(_MSC_VER) +# define UINT32_C(n) n ## UI32 +# else +# define UINT32_C(n) n ## U +# endif +# endif + +# ifndef UINT64_C +# if defined(_WIN32) && defined(_MSC_VER) +# define UINT64_C(n) n ## UI64 +# else + /* Get ULONG_MAX. */ +# include +# if ULONG_MAX == 4294967295UL +# define UINT64_C(n) n ## ULL +# else +# define UINT64_C(n) n ## UL +# endif +# endif +# endif + +# ifndef UINT32_MAX +# define UINT32_MAX (UINT32_C(4294967295)) +# endif + +# ifndef UINT64_MAX +# define UINT64_MAX (UINT64_C(18446744073709551615)) +# endif +# endif +#endif /* ifdef LZMA_MANUAL_HEADERS */ + + +/****************** + * LZMA_API macro * + ******************/ + +/* + * Some systems require that the functions and function pointers are + * declared specially in the headers. LZMA_API_IMPORT is for importing + * symbols and LZMA_API_CALL is to specify the calling convention. + * + * By default it is assumed that the application will link dynamically + * against liblzma. #define LZMA_API_STATIC in your application if you + * want to link against static liblzma. If you don't care about portability + * to operating systems like Windows, or at least don't care about linking + * against static liblzma on them, don't worry about LZMA_API_STATIC. That + * is, most developers will never need to use LZMA_API_STATIC. + * + * The GCC variants are a special case on Windows (Cygwin and MinGW). + * We rely on GCC doing the right thing with its auto-import feature, + * and thus don't use __declspec(dllimport). This way developers don't + * need to worry about LZMA_API_STATIC. Also the calling convention is + * omitted on Cygwin but not on MinGW. + */ +#ifndef LZMA_API_IMPORT +# if !defined(LZMA_API_STATIC) && defined(_WIN32) && !defined(__GNUC__) +# define LZMA_API_IMPORT __declspec(dllimport) +# else +# define LZMA_API_IMPORT +# endif +#endif + +#ifndef LZMA_API_CALL +# if defined(_WIN32) && !defined(__CYGWIN__) +# define LZMA_API_CALL __cdecl +# else +# define LZMA_API_CALL +# endif +#endif + +#ifndef LZMA_API +# define LZMA_API(type) LZMA_API_IMPORT type LZMA_API_CALL +#endif + + +/*********** + * nothrow * + ***********/ + +/* + * None of the functions in liblzma may throw an exception. Even + * the functions that use callback functions won't throw exceptions, + * because liblzma would break if a callback function threw an exception. + */ +#ifndef lzma_nothrow +# if defined(__cplusplus) +# define lzma_nothrow throw() +# elif __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3) +# define lzma_nothrow __attribute__((__nothrow__)) +# else +# define lzma_nothrow +# endif +#endif + + +/******************** + * GNU C extensions * + ********************/ + +/* + * GNU C extensions are used conditionally in the public API. It doesn't + * break anything if these are sometimes enabled and sometimes not, only + * affects warnings and optimizations. + */ +#if __GNUC__ >= 3 +# ifndef lzma_attribute +# define lzma_attribute(attr) __attribute__(attr) +# endif + +# ifndef lzma_restrict +# define lzma_restrict __restrict__ +# endif + + /* warn_unused_result was added in GCC 3.4. */ +# ifndef lzma_attr_warn_unused_result +# if __GNUC__ == 3 && __GNUC_MINOR__ < 4 +# define lzma_attr_warn_unused_result +# endif +# endif + +#else +# ifndef lzma_attribute +# define lzma_attribute(attr) +# endif + +# ifndef lzma_restrict +# if __STDC_VERSION__ >= 199901L +# define lzma_restrict restrict +# else +# define lzma_restrict +# endif +# endif +#endif + + +#ifndef lzma_attr_pure +# define lzma_attr_pure lzma_attribute((__pure__)) +#endif + +#ifndef lzma_attr_const +# define lzma_attr_const lzma_attribute((__const__)) +#endif + +#ifndef lzma_attr_warn_unused_result +# define lzma_attr_warn_unused_result \ + lzma_attribute((__warn_unused_result__)) +#endif + + +/************** + * Subheaders * + **************/ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Subheaders check that this is defined. It is to prevent including + * them directly from applications. + */ +#define LZMA_H_INTERNAL 1 + +/* Basic features */ +#include "lzma/version.h" +#include "lzma/base.h" +#include "lzma/vli.h" +#include "lzma/check.h" + +/* Filters */ +#include "lzma/filter.h" +#include "lzma/subblock.h" +#include "lzma/bcj.h" +#include "lzma/delta.h" +#include "lzma/lzma.h" + +/* Container formats */ +#include "lzma/container.h" + +/* Advanced features */ +#include "lzma/stream_flags.h" +#include "lzma/block.h" +#include "lzma/index.h" +#include "lzma/index_hash.h" + +/* Hardware information */ +#include "lzma/hardware.h" + +/* + * All subheaders included. Undefine LZMA_H_INTERNAL to prevent applications + * re-including the subheaders. + */ +#undef LZMA_H_INTERNAL + +#ifdef __cplusplus +} +#endif + +#endif /* ifndef LZMA_H */ diff --git a/src/liblzma/api/lzma/base.h b/src/liblzma/api/lzma/base.h new file mode 100644 index 000000000000..993626a4cdfd --- /dev/null +++ b/src/liblzma/api/lzma/base.h @@ -0,0 +1,596 @@ +/** + * \file lzma/base.h + * \brief Data types and functions used in many places in liblzma API + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Boolean + * + * This is here because C89 doesn't have stdbool.h. To set a value for + * variables having type lzma_bool, you can use + * - C99's `true' and `false' from stdbool.h; + * - C++'s internal `true' and `false'; or + * - integers one (true) and zero (false). + */ +typedef unsigned char lzma_bool; + + +/** + * \brief Type of reserved enumeration variable in structures + * + * To avoid breaking library ABI when new features are added, several + * structures contain extra variables that may be used in future. Since + * sizeof(enum) can be different than sizeof(int), and sizeof(enum) may + * even vary depending on the range of enumeration constants, we specify + * a separate type to be used for reserved enumeration variables. All + * enumeration constants in liblzma API will be non-negative and less + * than 128, which should guarantee that the ABI won't break even when + * new constants are added to existing enumerations. + */ +typedef enum { + LZMA_RESERVED_ENUM = 0 +} lzma_reserved_enum; + + +/** + * \brief Return values used by several functions in liblzma + * + * Check the descriptions of specific functions to find out which return + * values they can return. With some functions the return values may have + * more specific meanings than described here; those differences are + * described per-function basis. + */ +typedef enum { + LZMA_OK = 0, + /**< + * \brief Operation completed successfully + */ + + LZMA_STREAM_END = 1, + /**< + * \brief End of stream was reached + * + * In encoder, LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, or + * LZMA_FINISH was finished. In decoder, this indicates + * that all the data was successfully decoded. + * + * In all cases, when LZMA_STREAM_END is returned, the last + * output bytes should be picked from strm->next_out. + */ + + LZMA_NO_CHECK = 2, + /**< + * \brief Input stream has no integrity check + * + * This return value can be returned only if the + * LZMA_TELL_NO_CHECK flag was used when initializing + * the decoder. LZMA_NO_CHECK is just a warning, and + * the decoding can be continued normally. + * + * It is possible to call lzma_get_check() immediately after + * lzma_code has returned LZMA_NO_CHECK. The result will + * naturally be LZMA_CHECK_NONE, but the possibility to call + * lzma_get_check() may be convenient in some applications. + */ + + LZMA_UNSUPPORTED_CHECK = 3, + /**< + * \brief Cannot calculate the integrity check + * + * The usage of this return value is different in encoders + * and decoders. + * + * Encoders can return this value only from the initialization + * function. If initialization fails with this value, the + * encoding cannot be done, because there's no way to produce + * output with the correct integrity check. + * + * Decoders can return this value only from lzma_code() and + * only if the LZMA_TELL_UNSUPPORTED_CHECK flag was used when + * initializing the decoder. The decoding can still be + * continued normally even if the check type is unsupported, + * but naturally the check will not be validated, and possible + * errors may go undetected. + * + * With decoder, it is possible to call lzma_get_check() + * immediately after lzma_code() has returned + * LZMA_UNSUPPORTED_CHECK. This way it is possible to find + * out what the unsupported Check ID was. + */ + + LZMA_GET_CHECK = 4, + /**< + * \brief Integrity check type is now available + * + * This value can be returned only by the lzma_code() function + * and only if the decoder was initialized with the + * LZMA_TELL_ANY_CHECK flag. LZMA_GET_CHECK tells the + * application that it may now call lzma_get_check() to find + * out the Check ID. This can be used, for example, to + * implement a decoder that accepts only files that have + * strong enough integrity check. + */ + + LZMA_MEM_ERROR = 5, + /**< + * \brief Cannot allocate memory + * + * Memory allocation failed, or the size of the allocation + * would be greater than SIZE_MAX. + * + * Due to internal implementation reasons, the coding cannot + * be continued even if more memory were made available after + * LZMA_MEM_ERROR. + */ + + LZMA_MEMLIMIT_ERROR = 6, + /** + * \brief Memory usage limit was reached + * + * Decoder would need more memory than allowed by the + * specified memory usage limit. To continue decoding, + * the memory usage limit has to be increased with + * lzma_memlimit_set(). + */ + + LZMA_FORMAT_ERROR = 7, + /**< + * \brief File format not recognized + * + * The decoder did not recognize the input as supported file + * format. This error can occur, for example, when trying to + * decode .lzma format file with lzma_stream_decoder, + * because lzma_stream_decoder accepts only the .xz format. + */ + + LZMA_OPTIONS_ERROR = 8, + /**< + * \brief Invalid or unsupported options + * + * Invalid or unsupported options, for example + * - unsupported filter(s) or filter options; or + * - reserved bits set in headers (decoder only). + * + * Rebuilding liblzma with more features enabled, or + * upgrading to a newer version of liblzma may help. + */ + + LZMA_DATA_ERROR = 9, + /**< + * \brief Data is corrupt + * + * The usage of this return value is different in encoders + * and decoders. In both encoder and decoder, the coding + * cannot continue after this error. + * + * Encoders return this if size limits of the target file + * format would be exceeded. These limits are huge, thus + * getting this error from an encoder is mostly theoretical. + * For example, the maximum compressed and uncompressed + * size of a .xz Stream is roughly 8 EiB (2^63 bytes). + * + * Decoders return this error if the input data is corrupt. + * This can mean, for example, invalid CRC32 in headers + * or invalid check of uncompressed data. + */ + + LZMA_BUF_ERROR = 10, + /**< + * \brief No progress is possible + * + * This error code is returned when the coder cannot consume + * any new input and produce any new output. The most common + * reason for this error is that the input stream being + * decoded is truncated or corrupt. + * + * This error is not fatal. Coding can be continued normally + * by providing more input and/or more output space, if + * possible. + * + * Typically the first call to lzma_code() that can do no + * progress returns LZMA_OK instead of LZMA_BUF_ERROR. Only + * the second consecutive call doing no progress will return + * LZMA_BUF_ERROR. This is intentional. + * + * With zlib, Z_BUF_ERROR may be returned even if the + * application is doing nothing wrong, so apps will need + * to handle Z_BUF_ERROR specially. The above hack + * guarantees that liblzma never returns LZMA_BUF_ERROR + * to properly written applications unless the input file + * is truncated or corrupt. This should simplify the + * applications a little. + */ + + LZMA_PROG_ERROR = 11, + /**< + * \brief Programming error + * + * This indicates that the arguments given to the function are + * invalid or the internal state of the decoder is corrupt. + * - Function arguments are invalid or the structures + * pointed by the argument pointers are invalid + * e.g. if strm->next_out has been set to NULL and + * strm->avail_out > 0 when calling lzma_code(). + * - lzma_* functions have been called in wrong order + * e.g. lzma_code() was called right after lzma_end(). + * - If errors occur randomly, the reason might be flaky + * hardware. + * + * If you think that your code is correct, this error code + * can be a sign of a bug in liblzma. See the documentation + * how to report bugs. + */ +} lzma_ret; + + +/** + * \brief The `action' argument for lzma_code() + * + * After the first use of LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, or LZMA_FINISH, + * the same `action' must is used until lzma_code() returns LZMA_STREAM_END. + * Also, the amount of input (that is, strm->avail_in) must not be modified + * by the application until lzma_code() returns LZMA_STREAM_END. Changing the + * `action' or modifying the amount of input will make lzma_code() return + * LZMA_PROG_ERROR. + */ +typedef enum { + LZMA_RUN = 0, + /**< + * \brief Continue coding + * + * Encoder: Encode as much input as possible. Some internal + * buffering will probably be done (depends on the filter + * chain in use), which causes latency: the input used won't + * usually be decodeable from the output of the same + * lzma_code() call. + * + * Decoder: Decode as much input as possible and produce as + * much output as possible. + */ + + LZMA_SYNC_FLUSH = 1, + /**< + * \brief Make all the input available at output + * + * Normally the encoder introduces some latency. + * LZMA_SYNC_FLUSH forces all the buffered data to be + * available at output without resetting the internal + * state of the encoder. This way it is possible to use + * compressed stream for example for communication over + * network. + * + * Only some filters support LZMA_SYNC_FLUSH. Trying to use + * LZMA_SYNC_FLUSH with filters that don't support it will + * make lzma_code() return LZMA_OPTIONS_ERROR. For example, + * LZMA1 doesn't support LZMA_SYNC_FLUSH but LZMA2 does. + * + * Using LZMA_SYNC_FLUSH very often can dramatically reduce + * the compression ratio. With some filters (for example, + * LZMA2), fine-tuning the compression options may help + * mitigate this problem significantly. + * + * Decoders don't support LZMA_SYNC_FLUSH. + */ + + LZMA_FULL_FLUSH = 2, + /**< + * \brief Make all the input available at output + * + * Finish encoding of the current Block. All the input + * data going to the current Block must have been given + * to the encoder (the last bytes can still be pending in + * next_in). Call lzma_code() with LZMA_FULL_FLUSH until + * it returns LZMA_STREAM_END. Then continue normally with + * LZMA_RUN or finish the Stream with LZMA_FINISH. + * + * This action is currently supported only by Stream encoder + * and easy encoder (which uses Stream encoder). If there is + * no unfinished Block, no empty Block is created. + */ + + LZMA_FINISH = 3 + /**< + * \brief Finish the coding operation + * + * Finishes the coding operation. All the input data must + * have been given to the encoder (the last bytes can still + * be pending in next_in). Call lzma_code() with LZMA_FINISH + * until it returns LZMA_STREAM_END. Once LZMA_FINISH has + * been used, the amount of input must no longer be changed + * by the application. + * + * When decoding, using LZMA_FINISH is optional unless the + * LZMA_CONCATENATED flag was used when the decoder was + * initialized. When LZMA_CONCATENATED was not used, the only + * effect of LZMA_FINISH is that the amount of input must not + * be changed just like in the encoder. + */ +} lzma_action; + + +/** + * \brief Custom functions for memory handling + * + * A pointer to lzma_allocator may be passed via lzma_stream structure + * to liblzma, and some advanced functions take a pointer to lzma_allocator + * as a separate function argument. The library will use the functions + * specified in lzma_allocator for memory handling instead of the default + * malloc() and free(). C++ users should note that the custom memory + * handling functions must not throw exceptions. + * + * liblzma doesn't make an internal copy of lzma_allocator. Thus, it is + * OK to change these function pointers in the middle of the coding + * process, but obviously it must be done carefully to make sure that the + * replacement `free' can deallocate memory allocated by the earlier + * `alloc' function(s). + */ +typedef struct { + /** + * \brief Pointer to a custom memory allocation function + * + * If you don't want a custom allocator, but still want + * custom free(), set this to NULL and liblzma will use + * the standard malloc(). + * + * \param opaque lzma_allocator.opaque (see below) + * \param nmemb Number of elements like in calloc(). liblzma + * will always set nmemb to 1, so it is safe to + * ignore nmemb in a custom allocator if you like. + * The nmemb argument exists only for + * compatibility with zlib and libbzip2. + * \param size Size of an element in bytes. + * liblzma never sets this to zero. + * + * \return Pointer to the beginning of a memory block of + * `size' bytes, or NULL if allocation fails + * for some reason. When allocation fails, functions + * of liblzma return LZMA_MEM_ERROR. + * + * The allocator should not waste time zeroing the allocated buffers. + * This is not only about speed, but also memory usage, since the + * operating system kernel doesn't necessarily allocate the requested + * memory in physical memory until it is actually used. With small + * input files, liblzma may actually need only a fraction of the + * memory that it requested for allocation. + * + * \note LZMA_MEM_ERROR is also used when the size of the + * allocation would be greater than SIZE_MAX. Thus, + * don't assume that the custom allocator must have + * returned NULL if some function from liblzma + * returns LZMA_MEM_ERROR. + */ + void *(LZMA_API_CALL *alloc)(void *opaque, size_t nmemb, size_t size); + + /** + * \brief Pointer to a custom memory freeing function + * + * If you don't want a custom freeing function, but still + * want a custom allocator, set this to NULL and liblzma + * will use the standard free(). + * + * \param opaque lzma_allocator.opaque (see below) + * \param ptr Pointer returned by lzma_allocator.alloc(), + * or when it is set to NULL, a pointer returned + * by the standard malloc(). + */ + void (LZMA_API_CALL *free)(void *opaque, void *ptr); + + /** + * \brief Pointer passed to .alloc() and .free() + * + * opaque is passed as the first argument to lzma_allocator.alloc() + * and lzma_allocator.free(). This intended to ease implementing + * custom memory allocation functions for use with liblzma. + * + * If you don't need this, you should set this to NULL. + */ + void *opaque; + +} lzma_allocator; + + +/** + * \brief Internal data structure + * + * The contents of this structure is not visible outside the library. + */ +typedef struct lzma_internal_s lzma_internal; + + +/** + * \brief Passing data to and from liblzma + * + * The lzma_stream structure is used for + * - passing pointers to input and output buffers to liblzma; + * - defining custom memory hander functions; and + * - holding a pointer to coder-specific internal data structures. + * + * Typical usage: + * + * - After allocating lzma_stream (on stack or with malloc()), it must be + * initialized to LZMA_STREAM_INIT (see LZMA_STREAM_INIT for details). + * + * - Initialize a coder to the lzma_stream, for example by using + * lzma_easy_encoder() or lzma_auto_decoder(). Some notes: + * - In contrast to zlib, strm->next_in and strm->next_out are + * ignored by all initialization functions, thus it is safe + * to not initialize them yet. + * - The initialization functions always set strm->total_in and + * strm->total_out to zero. + * - If the initialization function fails, no memory is left allocated + * that would require freeing with lzma_end() even if some memory was + * associated with the lzma_stream structure when the initialization + * function was called. + * + * - Use lzma_code() to do the actual work. + * + * - Once the coding has been finished, the existing lzma_stream can be + * reused. It is OK to reuse lzma_stream with different initialization + * function without calling lzma_end() first. Old allocations are + * automatically freed. + * + * - Finally, use lzma_end() to free the allocated memory. lzma_end() never + * frees the lzma_stream structure itself. + * + * Application may modify the values of total_in and total_out as it wants. + * They are updated by liblzma to match the amount of data read and + * written, but aren't used for anything else. + */ +typedef struct { + const uint8_t *next_in; /**< Pointer to the next input byte. */ + size_t avail_in; /**< Number of available input bytes in next_in. */ + uint64_t total_in; /**< Total number of bytes read by liblzma. */ + + uint8_t *next_out; /**< Pointer to the next output position. */ + size_t avail_out; /**< Amount of free space in next_out. */ + uint64_t total_out; /**< Total number of bytes written by liblzma. */ + + /** + * \brief Custom memory allocation functions + * + * In most cases this is NULL which makes liblzma use + * the standard malloc() and free(). + */ + lzma_allocator *allocator; + + /** Internal state is not visible to applications. */ + lzma_internal *internal; + + /* + * Reserved space to allow possible future extensions without + * breaking the ABI. Excluding the initialization of this structure, + * you should not touch these, because the names of these variables + * may change. + */ + void *reserved_ptr1; + void *reserved_ptr2; + uint64_t reserved_int1; + uint64_t reserved_int2; + lzma_reserved_enum reserved_enum1; + lzma_reserved_enum reserved_enum2; + +} lzma_stream; + + +/** + * \brief Initialization for lzma_stream + * + * When you declare an instance of lzma_stream, you can immediately + * initialize it so that initialization functions know that no memory + * has been allocated yet: + * + * lzma_stream strm = LZMA_STREAM_INIT; + * + * If you need to initialize a dynamically allocated lzma_stream, you can use + * memset(strm_pointer, 0, sizeof(lzma_stream)). Strictly speaking, this + * violates the C standard since NULL may have different internal + * representation than zero, but it should be portable enough in practice. + * Anyway, for maximum portability, you can use something like this: + * + * lzma_stream tmp = LZMA_STREAM_INIT; + * *strm = tmp; + */ +#define LZMA_STREAM_INIT \ + { NULL, 0, 0, NULL, 0, 0, NULL, NULL, \ + NULL, NULL, 0, 0, LZMA_RESERVED_ENUM, LZMA_RESERVED_ENUM } + + +/** + * \brief Encode or decode data + * + * Once the lzma_stream has been successfully initialized (e.g. with + * lzma_stream_encoder()), the actual encoding or decoding is done + * using this function. The application has to update strm->next_in, + * strm->avail_in, strm->next_out, and strm->avail_out to pass input + * to and get output from liblzma. + * + * See the description of the coder-specific initialization function to find + * out what `action' values are supported by the coder. + */ +extern LZMA_API(lzma_ret) lzma_code(lzma_stream *strm, lzma_action action) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Free memory allocated for the coder data structures + * + * \param strm Pointer to lzma_stream that is at least initialized + * with LZMA_STREAM_INIT. + * + * After lzma_end(strm), strm->internal is guaranteed to be NULL. No other + * members of the lzma_stream structure are touched. + * + * \note zlib indicates an error if application end()s unfinished + * stream structure. liblzma doesn't do this, and assumes that + * application knows what it is doing. + */ +extern LZMA_API(void) lzma_end(lzma_stream *strm) lzma_nothrow; + + +/** + * \brief Get the memory usage of decoder filter chain + * + * This function is currently supported only when *strm has been initialized + * with a function that takes a memlimit argument. With other functions, you + * should use e.g. lzma_raw_encoder_memusage() or lzma_raw_decoder_memusage() + * to estimate the memory requirements. + * + * This function is useful e.g. after LZMA_MEMLIMIT_ERROR to find out how big + * the memory usage limit should have been to decode the input. Note that + * this may give misleading information if decoding .xz Streams that have + * multiple Blocks, because each Block can have different memory requirements. + * + * \return Rough estimate of how much memory is currently allocated + * for the filter decoders. If no filter chain is currently + * allocated, some non-zero value is still returned, which is + * less than or equal to what any filter chain would indicate + * as its memory requirement. + * + * If this function isn't supported by *strm or some other error + * occurs, zero is returned. + */ +extern LZMA_API(uint64_t) lzma_memusage(const lzma_stream *strm) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the current memory usage limit + * + * This function is supported only when *strm has been initialized with + * a function that takes a memlimit argument. + * + * \return On success, the current memory usage limit is returned + * (always non-zero). On error, zero is returned. + */ +extern LZMA_API(uint64_t) lzma_memlimit_get(const lzma_stream *strm) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Set the memory usage limit + * + * This function is supported only when *strm has been initialized with + * a function that takes a memlimit argument. + * + * \return - LZMA_OK: New memory usage limit successfully set. + * - LZMA_MEMLIMIT_ERROR: The new limit is too small. + * The limit was not changed. + * - LZMA_PROG_ERROR: Invalid arguments, e.g. *strm doesn't + * support memory usage limit or memlimit was zero. + */ +extern LZMA_API(lzma_ret) lzma_memlimit_set( + lzma_stream *strm, uint64_t memlimit) lzma_nothrow; diff --git a/src/liblzma/api/lzma/bcj.h b/src/liblzma/api/lzma/bcj.h new file mode 100644 index 000000000000..82e4a440b218 --- /dev/null +++ b/src/liblzma/api/lzma/bcj.h @@ -0,0 +1,90 @@ +/** + * \file lzma/bcj.h + * \brief Branch/Call/Jump conversion filters + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/* Filter IDs for lzma_filter.id */ + +#define LZMA_FILTER_X86 LZMA_VLI_C(0x04) + /**< + * Filter for x86 binaries + */ + +#define LZMA_FILTER_POWERPC LZMA_VLI_C(0x05) + /**< + * Filter for Big endian PowerPC binaries + */ + +#define LZMA_FILTER_IA64 LZMA_VLI_C(0x06) + /**< + * Filter for IA64 (Itanium) binaries. + */ + +#define LZMA_FILTER_ARM LZMA_VLI_C(0x07) + /**< + * Filter for ARM binaries. + */ + +#define LZMA_FILTER_ARMTHUMB LZMA_VLI_C(0x08) + /**< + * Filter for ARMThumb binaries. + */ + +#define LZMA_FILTER_SPARC LZMA_VLI_C(0x09) + /**< + * Filter for SPARC binaries. + */ + + +/** + * \brief Options for BCJ filters + * + * The BCJ filters never change the size of the data. Specifying options + * for them is optional: if pointer to options is NULL, default value is + * used. You probably never need to specify options to BCJ filters, so just + * set the options pointer to NULL and be happy. + * + * If options with non-default values have been specified when encoding, + * the same options must also be specified when decoding. + * + * \note At the moment, none of the BCJ filters support + * LZMA_SYNC_FLUSH. If LZMA_SYNC_FLUSH is specified, + * LZMA_OPTIONS_ERROR will be returned. If there is need, + * partial support for LZMA_SYNC_FLUSH can be added in future. + * Partial means that flushing would be possible only at + * offsets that are multiple of 2, 4, or 16 depending on + * the filter, except x86 which cannot be made to support + * LZMA_SYNC_FLUSH predictably. + */ +typedef struct { + /** + * \brief Start offset for conversions + * + * This setting is useful only when the same filter is used + * _separately_ for multiple sections of the same executable file, + * and the sections contain cross-section branch/call/jump + * instructions. In that case it is beneficial to set the start + * offset of the non-first sections so that the relative addresses + * of the cross-section branch/call/jump instructions will use the + * same absolute addresses as in the first section. + * + * When the pointer to options is NULL, the default value (zero) + * is used. + */ + uint32_t start_offset; + +} lzma_options_bcj; diff --git a/src/liblzma/api/lzma/block.h b/src/liblzma/api/lzma/block.h new file mode 100644 index 000000000000..8e681ed2811b --- /dev/null +++ b/src/liblzma/api/lzma/block.h @@ -0,0 +1,529 @@ +/** + * \file lzma/block.h + * \brief .xz Block handling + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Options for the Block and Block Header encoders and decoders + * + * Different Block handling functions use different parts of this structure. + * Some read some members, other functions write, and some do both. Only the + * members listed for reading need to be initialized when the specified + * functions are called. The members marked for writing will be assigned + * new values at some point either by calling the given function or by + * later calls to lzma_code(). + */ +typedef struct { + /** + * \brief Block format version + * + * To prevent API and ABI breakages if new features are needed in + * the Block field, a version number is used to indicate which + * fields in this structure are in use. For now, version must always + * be zero. With non-zero version, most Block related functions will + * return LZMA_OPTIONS_ERROR. + * + * Read by: + * - All functions that take pointer to lzma_block as argument, + * including lzma_block_header_decode(). + * + * Written by: + * - lzma_block_header_decode() + */ + uint32_t version; + + /** + * \brief Size of the Block Header field + * + * This is always a multiple of four. + * + * Read by: + * - lzma_block_header_encode() + * - lzma_block_header_decode() + * - lzma_block_compressed_size() + * - lzma_block_unpadded_size() + * - lzma_block_total_size() + * - lzma_block_decoder() + * - lzma_block_buffer_decode() + * + * Written by: + * - lzma_block_header_size() + * - lzma_block_buffer_encode() + */ + uint32_t header_size; +# define LZMA_BLOCK_HEADER_SIZE_MIN 8 +# define LZMA_BLOCK_HEADER_SIZE_MAX 1024 + + /** + * \brief Type of integrity Check + * + * The Check ID is not stored into the Block Header, thus its value + * must be provided also when decoding. + * + * Read by: + * - lzma_block_header_encode() + * - lzma_block_header_decode() + * - lzma_block_compressed_size() + * - lzma_block_unpadded_size() + * - lzma_block_total_size() + * - lzma_block_encoder() + * - lzma_block_decoder() + * - lzma_block_buffer_encode() + * - lzma_block_buffer_decode() + */ + lzma_check check; + + /** + * \brief Size of the Compressed Data in bytes + * + * Encoding: If this is not LZMA_VLI_UNKNOWN, Block Header encoder + * will store this value to the Block Header. Block encoder doesn't + * care about this value, but will set it once the encoding has been + * finished. + * + * Decoding: If this is not LZMA_VLI_UNKNOWN, Block decoder will + * verify that the size of the Compressed Data field matches + * compressed_size. + * + * Usually you don't know this value when encoding in streamed mode, + * and thus cannot write this field into the Block Header. + * + * In non-streamed mode you can reserve space for this field before + * encoding the actual Block. After encoding the data, finish the + * Block by encoding the Block Header. Steps in detail: + * + * - Set compressed_size to some big enough value. If you don't know + * better, use LZMA_VLI_MAX, but remember that bigger values take + * more space in Block Header. + * + * - Call lzma_block_header_size() to see how much space you need to + * reserve for the Block Header. + * + * - Encode the Block using lzma_block_encoder() and lzma_code(). + * It sets compressed_size to the correct value. + * + * - Use lzma_block_header_encode() to encode the Block Header. + * Because space was reserved in the first step, you don't need + * to call lzma_block_header_size() anymore, because due to + * reserving, header_size has to be big enough. If it is "too big", + * lzma_block_header_encode() will add enough Header Padding to + * make Block Header to match the size specified by header_size. + * + * Read by: + * - lzma_block_header_size() + * - lzma_block_header_encode() + * - lzma_block_compressed_size() + * - lzma_block_unpadded_size() + * - lzma_block_total_size() + * - lzma_block_decoder() + * - lzma_block_buffer_decode() + * + * Written by: + * - lzma_block_header_decode() + * - lzma_block_compressed_size() + * - lzma_block_encoder() + * - lzma_block_decoder() + * - lzma_block_buffer_encode() + * - lzma_block_buffer_decode() + */ + lzma_vli compressed_size; + + /** + * \brief Uncompressed Size in bytes + * + * This is handled very similarly to compressed_size above. + * + * uncompressed_size is needed by fewer functions than + * compressed_size. This is because uncompressed_size isn't + * needed to validate that Block stays within proper limits. + * + * Read by: + * - lzma_block_header_size() + * - lzma_block_header_encode() + * - lzma_block_decoder() + * - lzma_block_buffer_decode() + * + * Written by: + * - lzma_block_header_decode() + * - lzma_block_encoder() + * - lzma_block_decoder() + * - lzma_block_buffer_encode() + * - lzma_block_buffer_decode() + */ + lzma_vli uncompressed_size; + + /** + * \brief Array of filters + * + * There can be 1-4 filters. The end of the array is marked with + * .id = LZMA_VLI_UNKNOWN. + * + * Read by: + * - lzma_block_header_size() + * - lzma_block_header_encode() + * - lzma_block_encoder() + * - lzma_block_decoder() + * - lzma_block_buffer_encode() + * - lzma_block_buffer_decode() + * + * Written by: + * - lzma_block_header_decode(): Note that this does NOT free() + * the old filter options structures. All unused filters[] will + * have .id == LZMA_VLI_UNKNOWN and .options == NULL. If + * decoding fails, all filters[] are guaranteed to be + * LZMA_VLI_UNKNOWN and NULL. + * + * \note Because of the array is terminated with + * .id = LZMA_VLI_UNKNOWN, the actual array must + * have LZMA_FILTERS_MAX + 1 members or the Block + * Header decoder will overflow the buffer. + */ + lzma_filter *filters; + + /** + * \brief Raw value stored in the Check field + * + * After successful coding, the first lzma_check_size(check) bytes + * of this array contain the raw value stored in the Check field. + * + * Note that CRC32 and CRC64 are stored in little endian byte order. + * Take it into account if you display the Check values to the user. + * + * Written by: + * - lzma_block_encoder() + * - lzma_block_decoder() + * - lzma_block_buffer_encode() + * - lzma_block_buffer_decode() + */ + uint8_t raw_check[LZMA_CHECK_SIZE_MAX]; + + /* + * Reserved space to allow possible future extensions without + * breaking the ABI. You should not touch these, because the names + * of these variables may change. These are and will never be used + * with the currently supported options, so it is safe to leave these + * uninitialized. + */ + void *reserved_ptr1; + void *reserved_ptr2; + void *reserved_ptr3; + uint32_t reserved_int1; + uint32_t reserved_int2; + lzma_vli reserved_int3; + lzma_vli reserved_int4; + lzma_vli reserved_int5; + lzma_vli reserved_int6; + lzma_vli reserved_int7; + lzma_vli reserved_int8; + lzma_reserved_enum reserved_enum1; + lzma_reserved_enum reserved_enum2; + lzma_reserved_enum reserved_enum3; + lzma_reserved_enum reserved_enum4; + lzma_bool reserved_bool1; + lzma_bool reserved_bool2; + lzma_bool reserved_bool3; + lzma_bool reserved_bool4; + lzma_bool reserved_bool5; + lzma_bool reserved_bool6; + lzma_bool reserved_bool7; + lzma_bool reserved_bool8; + +} lzma_block; + + +/** + * \brief Decode the Block Header Size field + * + * To decode Block Header using lzma_block_header_decode(), the size of the + * Block Header has to be known and stored into lzma_block.header_size. + * The size can be calculated from the first byte of a Block using this macro. + * Note that if the first byte is 0x00, it indicates beginning of Index; use + * this macro only when the byte is not 0x00. + * + * There is no encoding macro, because Block Header encoder is enough for that. + */ +#define lzma_block_header_size_decode(b) (((uint32_t)(b) + 1) * 4) + + +/** + * \brief Calculate Block Header Size + * + * Calculate the minimum size needed for the Block Header field using the + * settings specified in the lzma_block structure. Note that it is OK to + * increase the calculated header_size value as long as it is a multiple of + * four and doesn't exceed LZMA_BLOCK_HEADER_SIZE_MAX. Increasing header_size + * just means that lzma_block_header_encode() will add Header Padding. + * + * \return - LZMA_OK: Size calculated successfully and stored to + * block->header_size. + * - LZMA_OPTIONS_ERROR: Unsupported version, filters or + * filter options. + * - LZMA_PROG_ERROR: Invalid values like compressed_size == 0. + * + * \note This doesn't check that all the options are valid i.e. this + * may return LZMA_OK even if lzma_block_header_encode() or + * lzma_block_encoder() would fail. If you want to validate the + * filter chain, consider using lzma_memlimit_encoder() which as + * a side-effect validates the filter chain. + */ +extern LZMA_API(lzma_ret) lzma_block_header_size(lzma_block *block) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Encode Block Header + * + * The caller must have calculated the size of the Block Header already with + * lzma_block_header_size(). If a value larger than the one calculated by + * lzma_block_header_size() is used, the Block Header will be padded to the + * specified size. + * + * \param out Beginning of the output buffer. This must be + * at least block->header_size bytes. + * \param block Block options to be encoded. + * + * \return - LZMA_OK: Encoding was successful. block->header_size + * bytes were written to output buffer. + * - LZMA_OPTIONS_ERROR: Invalid or unsupported options. + * - LZMA_PROG_ERROR: Invalid arguments, for example + * block->header_size is invalid or block->filters is NULL. + */ +extern LZMA_API(lzma_ret) lzma_block_header_encode( + const lzma_block *block, uint8_t *out) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode Block Header + * + * block->version should be set to the highest value supported by the + * application; currently the only possible version is zero. This function + * will set version to the lowest value that still supports all the features + * required by the Block Header. + * + * The size of the Block Header must have already been decoded with + * lzma_block_header_size_decode() macro and stored to block->header_size. + * + * block->filters must have been allocated, but not necessarily initialized. + * Possible existing filter options are _not_ freed. + * + * \param block Destination for Block options. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() (and also free() + * if an error occurs). + * \param in Beginning of the input buffer. This must be + * at least block->header_size bytes. + * + * \return - LZMA_OK: Decoding was successful. block->header_size + * bytes were read from the input buffer. + * - LZMA_OPTIONS_ERROR: The Block Header specifies some + * unsupported options such as unsupported filters. This can + * happen also if block->version was set to a too low value + * compared to what would be required to properly represent + * the information stored in the Block Header. + * - LZMA_DATA_ERROR: Block Header is corrupt, for example, + * the CRC32 doesn't match. + * - LZMA_PROG_ERROR: Invalid arguments, for example + * block->header_size is invalid or block->filters is NULL. + */ +extern LZMA_API(lzma_ret) lzma_block_header_decode(lzma_block *block, + lzma_allocator *allocator, const uint8_t *in) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Validate and set Compressed Size according to Unpadded Size + * + * Block Header stores Compressed Size, but Index has Unpadded Size. If the + * application has already parsed the Index and is now decoding Blocks, + * it can calculate Compressed Size from Unpadded Size. This function does + * exactly that with error checking: + * + * - Compressed Size calculated from Unpadded Size must be positive integer, + * that is, Unpadded Size must be big enough that after Block Header and + * Check fields there's still at least one byte for Compressed Size. + * + * - If Compressed Size was present in Block Header, the new value + * calculated from Unpadded Size is compared against the value + * from Block Header. + * + * \note This function must be called _after_ decoding the Block Header + * field so that it can properly validate Compressed Size if it + * was present in Block Header. + * + * \return - LZMA_OK: block->compressed_size was set successfully. + * - LZMA_DATA_ERROR: unpadded_size is too small compared to + * block->header_size and lzma_check_size(block->check). + * - LZMA_PROG_ERROR: Some values are invalid. For example, + * block->header_size must be a multiple of four and + * between 8 and 1024 inclusive. + */ +extern LZMA_API(lzma_ret) lzma_block_compressed_size( + lzma_block *block, lzma_vli unpadded_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Calculate Unpadded Size + * + * The Index field stores Unpadded Size and Uncompressed Size. The latter + * can be taken directly from the lzma_block structure after coding a Block, + * but Unpadded Size needs to be calculated from Block Header Size, + * Compressed Size, and size of the Check field. This is where this function + * is needed. + * + * \return Unpadded Size on success, or zero on error. + */ +extern LZMA_API(lzma_vli) lzma_block_unpadded_size(const lzma_block *block) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Calculate the total encoded size of a Block + * + * This is equivalent to lzma_block_unpadded_size() except that the returned + * value includes the size of the Block Padding field. + * + * \return On success, total encoded size of the Block. On error, + * zero is returned. + */ +extern LZMA_API(lzma_vli) lzma_block_total_size(const lzma_block *block) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Initialize .xz Block encoder + * + * Valid actions for lzma_code() are LZMA_RUN, LZMA_SYNC_FLUSH (only if the + * filter chain supports it), and LZMA_FINISH. + * + * \return - LZMA_OK: All good, continue with lzma_code(). + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_UNSUPPORTED_CHECK: block->check specifies a Check ID + * that is not supported by this buid of liblzma. Initializing + * the encoder failed. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_block_encoder( + lzma_stream *strm, lzma_block *block) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize .xz Block decoder + * + * Valid actions for lzma_code() are LZMA_RUN and LZMA_FINISH. Using + * LZMA_FINISH is not required. It is supported only for convenience. + * + * \return - LZMA_OK: All good, continue with lzma_code(). + * - LZMA_UNSUPPORTED_CHECK: Initialization was successful, but + * the given Check ID is not supported, thus Check will be + * ignored. + * - LZMA_PROG_ERROR + * - LZMA_MEM_ERROR + */ +extern LZMA_API(lzma_ret) lzma_block_decoder( + lzma_stream *strm, lzma_block *block) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Calculate maximum output size for single-call Block encoding + * + * This is equivalent to lzma_stream_buffer_bound() but for .xz Blocks. + * See the documentation of lzma_stream_buffer_bound(). + */ +extern LZMA_API(size_t) lzma_block_buffer_bound(size_t uncompressed_size) + lzma_nothrow; + + +/** + * \brief Single-call .xz Block encoder + * + * In contrast to the multi-call encoder initialized with + * lzma_block_encoder(), this function encodes also the Block Header. This + * is required to make it possible to write appropriate Block Header also + * in case the data isn't compressible, and different filter chain has to be + * used to encode the data in uncompressed form using uncompressed chunks + * of the LZMA2 filter. + * + * When the data isn't compressible, header_size, compressed_size, and + * uncompressed_size are set just like when the data was compressible, but + * it is possible that header_size is too small to hold the filter chain + * specified in block->filters, because that isn't necessarily the filter + * chain that was actually used to encode the data. lzma_block_unpadded_size() + * still works normally, because it doesn't read the filters array. + * + * \param block Block options: block->version, block->check, + * and block->filters must have been initialized. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_size Size of the input buffer + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_BUF_ERROR: Not enough output buffer space. + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + * - LZMA_DATA_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_block_buffer_encode( + lzma_block *block, lzma_allocator *allocator, + const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Single-call .xz Block decoder + * + * This is single-call equivalent of lzma_block_decoder(), and requires that + * the caller has already decoded Block Header and checked its memory usage. + * + * \param block Block options just like with lzma_block_decoder(). + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_pos The next byte will be read from in[*in_pos]. + * *in_pos is updated only if decoding succeeds. + * \param in_size Size of the input buffer; the first byte that + * won't be read is in[in_size]. + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Decoding was successful. + * - LZMA_OPTIONS_ERROR + * - LZMA_DATA_ERROR + * - LZMA_MEM_ERROR + * - LZMA_BUF_ERROR: Output buffer was too small. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_block_buffer_decode( + lzma_block *block, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow; diff --git a/src/liblzma/api/lzma/check.h b/src/liblzma/api/lzma/check.h new file mode 100644 index 000000000000..6a243db0d794 --- /dev/null +++ b/src/liblzma/api/lzma/check.h @@ -0,0 +1,150 @@ +/** + * \file lzma/check.h + * \brief Integrity checks + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Type of the integrity check (Check ID) + * + * The .xz format supports multiple types of checks that are calculated + * from the uncompressed data. They vary in both speed and ability to + * detect errors. + */ +typedef enum { + LZMA_CHECK_NONE = 0, + /**< + * No Check is calculated. + * + * Size of the Check field: 0 bytes + */ + + LZMA_CHECK_CRC32 = 1, + /**< + * CRC32 using the polynomial from the IEEE 802.3 standard + * + * Size of the Check field: 4 bytes + */ + + LZMA_CHECK_CRC64 = 4, + /**< + * CRC64 using the polynomial from the ECMA-182 standard + * + * Size of the Check field: 8 bytes + */ + + LZMA_CHECK_SHA256 = 10 + /**< + * SHA-256 + * + * Size of the Check field: 32 bytes + */ +} lzma_check; + + +/** + * \brief Maximum valid Check ID + * + * The .xz file format specification specifies 16 Check IDs (0-15). Some + * of them are only reserved, that is, no actual Check algorithm has been + * assigned. When decoding, liblzma still accepts unknown Check IDs for + * future compatibility. If a valid but unsupported Check ID is detected, + * liblzma can indicate a warning; see the flags LZMA_TELL_NO_CHECK, + * LZMA_TELL_UNSUPPORTED_CHECK, and LZMA_TELL_ANY_CHECK in container.h. + */ +#define LZMA_CHECK_ID_MAX 15 + + +/** + * \brief Test if the given Check ID is supported + * + * Return true if the given Check ID is supported by this liblzma build. + * Otherwise false is returned. It is safe to call this with a value that + * is not in the range [0, 15]; in that case the return value is always false. + * + * You can assume that LZMA_CHECK_NONE and LZMA_CHECK_CRC32 are always + * supported (even if liblzma is built with limited features). + */ +extern LZMA_API(lzma_bool) lzma_check_is_supported(lzma_check check) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Get the size of the Check field with the given Check ID + * + * Although not all Check IDs have a check algorithm associated, the size of + * every Check is already frozen. This function returns the size (in bytes) of + * the Check field with the specified Check ID. The values are: + * { 0, 4, 4, 4, 8, 8, 8, 16, 16, 16, 32, 32, 32, 64, 64, 64 } + * + * If the argument is not in the range [0, 15], UINT32_MAX is returned. + */ +extern LZMA_API(uint32_t) lzma_check_size(lzma_check check) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Maximum size of a Check field + */ +#define LZMA_CHECK_SIZE_MAX 64 + + +/** + * \brief Calculate CRC32 + * + * Calculate CRC32 using the polynomial from the IEEE 802.3 standard. + * + * \param buf Pointer to the input buffer + * \param size Size of the input buffer + * \param crc Previously returned CRC value. This is used to + * calculate the CRC of a big buffer in smaller chunks. + * Set to zero when starting a new calculation. + * + * \return Updated CRC value, which can be passed to this function + * again to continue CRC calculation. + */ +extern LZMA_API(uint32_t) lzma_crc32( + const uint8_t *buf, size_t size, uint32_t crc) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Calculate CRC64 + * + * Calculate CRC64 using the polynomial from the ECMA-182 standard. + * + * This function is used similarly to lzma_crc32(). See its documentation. + */ +extern LZMA_API(uint64_t) lzma_crc64( + const uint8_t *buf, size_t size, uint64_t crc) + lzma_nothrow lzma_attr_pure; + + +/* + * SHA-256 functions are currently not exported to public API. + * Contact Lasse Collin if you think it should be. + */ + + +/** + * \brief Get the type of the integrity check + * + * This function can be called only immediately after lzma_code() has + * returned LZMA_NO_CHECK, LZMA_UNSUPPORTED_CHECK, or LZMA_GET_CHECK. + * Calling this function in any other situation has undefined behavior. + */ +extern LZMA_API(lzma_check) lzma_get_check(const lzma_stream *strm) + lzma_nothrow; diff --git a/src/liblzma/api/lzma/container.h b/src/liblzma/api/lzma/container.h new file mode 100644 index 000000000000..0d907650fc49 --- /dev/null +++ b/src/liblzma/api/lzma/container.h @@ -0,0 +1,404 @@ +/** + * \file lzma/container.h + * \brief File formats + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/************ + * Encoding * + ************/ + +/** + * \brief Default compression preset + * + * It's not straightforward to recommend a default preset, because in some + * cases keeping the resource usage relatively low is more important that + * getting the maximum compression ratio. + */ +#define LZMA_PRESET_DEFAULT UINT32_C(6) + + +/** + * \brief Mask for preset level + * + * This is useful only if you need to extract the level from the preset + * variable. That should be rare. + */ +#define LZMA_PRESET_LEVEL_MASK UINT32_C(0x1F) + + +/* + * Preset flags + * + * Currently only one flag is defined. + */ + +/** + * \brief Extreme compression preset + * + * This flag modifies the preset to make the encoding significantly slower + * while improving the compression ratio only marginally. This is useful + * when you don't mind wasting time to get as small result as possible. + * + * This flag doesn't affect the memory usage requirements of the decoder (at + * least not significantly). The memory usage of the encoder may be increased + * a little but only at the lowest preset levels (0-2). + */ +#define LZMA_PRESET_EXTREME (UINT32_C(1) << 31) + + +/** + * \brief Calculate rough memory usage of easy encoder + * + * This function is a wrapper for lzma_raw_encoder_memusage(). + * + * \param preset Compression preset (level and possible flags) + */ +extern LZMA_API(uint64_t) lzma_easy_encoder_memusage(uint32_t preset) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Calculate rough decoder memory usage of a preset + * + * This function is a wrapper for lzma_raw_decoder_memusage(). + * + * \param preset Compression preset (level and possible flags) + */ +extern LZMA_API(uint64_t) lzma_easy_decoder_memusage(uint32_t preset) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Initialize .xz Stream encoder using a preset number + * + * This function is intended for those who just want to use the basic features + * if liblzma (that is, most developers out there). + * + * \param strm Pointer to lzma_stream that is at least initialized + * with LZMA_STREAM_INIT. + * \param preset Compression preset to use. A preset consist of level + * number and zero or more flags. Usually flags aren't + * used, so preset is simply a number [0, 9] which match + * the options -0 .. -9 of the xz command line tool. + * Additional flags can be be set using bitwise-or with + * the preset level number, e.g. 6 | LZMA_PRESET_EXTREME. + * \param check Integrity check type to use. See check.h for available + * checks. If you are unsure, use LZMA_CHECK_CRC32. + * + * \return - LZMA_OK: Initialization succeeded. Use lzma_code() to + * encode your data. + * - LZMA_MEM_ERROR: Memory allocation failed. + * - LZMA_OPTIONS_ERROR: The given compression level is not + * supported by this build of liblzma. + * - LZMA_UNSUPPORTED_CHECK: The given check type is not + * supported by this liblzma build. + * - LZMA_PROG_ERROR: One or more of the parameters have values + * that will never be valid. For example, strm == NULL. + * + * If initialization fails (return value is not LZMA_OK), all the memory + * allocated for *strm by liblzma is always freed. Thus, there is no need + * to call lzma_end() after failed initialization. + * + * If initialization succeeds, use lzma_code() to do the actual encoding. + * Valid values for `action' (the second argument of lzma_code()) are + * LZMA_RUN, LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, and LZMA_FINISH. In future, + * there may be compression levels or flags that don't support LZMA_SYNC_FLUSH. + */ +extern LZMA_API(lzma_ret) lzma_easy_encoder( + lzma_stream *strm, uint32_t preset, lzma_check check) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Single-call .xz Stream encoding using a preset number + * + * The maximum required output buffer size can be calculated with + * lzma_stream_buffer_bound(). + * + * \param preset Compression preset to use. See the description + * in lzma_easy_encoder(). + * \param check Type of the integrity check to calculate from + * uncompressed data. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_size Size of the input buffer + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_BUF_ERROR: Not enough output buffer space. + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + * - LZMA_DATA_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_easy_buffer_encode( + uint32_t preset, lzma_check check, + lzma_allocator *allocator, const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow; + + +/** + * \brief Initialize .xz Stream encoder using a custom filter chain + * + * \param strm Pointer to properly prepared lzma_stream + * \param filters Array of filters. This must be terminated with + * filters[n].id = LZMA_VLI_UNKNOWN. See filter.h for + * more information. + * \param check Type of the integrity check to calculate from + * uncompressed data. + * + * \return - LZMA_OK: Initialization was successful. + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_stream_encoder(lzma_stream *strm, + const lzma_filter *filters, lzma_check check) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize .lzma encoder (legacy file format) + * + * The .lzma format is sometimes called the LZMA_Alone format, which is the + * reason for the name of this function. The .lzma format supports only the + * LZMA1 filter. There is no support for integrity checks like CRC32. + * + * Use this function if and only if you need to create files readable by + * legacy LZMA tools such as LZMA Utils 4.32.x. Moving to the .xz format + * is strongly recommended. + * + * The valid action values for lzma_code() are LZMA_RUN and LZMA_FINISH. + * No kind of flushing is supported, because the file format doesn't make + * it possible. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_alone_encoder( + lzma_stream *strm, const lzma_options_lzma *options) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Calculate output buffer size for single-call Stream encoder + * + * When trying to compress uncompressible data, the encoded size will be + * slightly bigger than the input data. This function calculates how much + * output buffer space is required to be sure that lzma_stream_buffer_encode() + * doesn't return LZMA_BUF_ERROR. + * + * The calculated value is not exact, but it is guaranteed to be big enough. + * The actual maximum output space required may be slightly smaller (up to + * about 100 bytes). This should not be a problem in practice. + * + * If the calculated maximum size doesn't fit into size_t or would make the + * Stream grow past LZMA_VLI_MAX (which should never happen in practice), + * zero is returned to indicate the error. + * + * \note The limit calculated by this function applies only to + * single-call encoding. Multi-call encoding may (and probably + * will) have larger maximum expansion when encoding + * uncompressible data. Currently there is no function to + * calculate the maximum expansion of multi-call encoding. + */ +extern LZMA_API(size_t) lzma_stream_buffer_bound(size_t uncompressed_size) + lzma_nothrow; + + +/** + * \brief Single-call .xz Stream encoder + * + * \param filters Array of filters. This must be terminated with + * filters[n].id = LZMA_VLI_UNKNOWN. See filter.h + * for more information. + * \param check Type of the integrity check to calculate from + * uncompressed data. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_size Size of the input buffer + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_BUF_ERROR: Not enough output buffer space. + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + * - LZMA_DATA_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_stream_buffer_encode( + lzma_filter *filters, lzma_check check, + lzma_allocator *allocator, const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/************ + * Decoding * + ************/ + +/** + * This flag makes lzma_code() return LZMA_NO_CHECK if the input stream + * being decoded has no integrity check. Note that when used with + * lzma_auto_decoder(), all .lzma files will trigger LZMA_NO_CHECK + * if LZMA_TELL_NO_CHECK is used. + */ +#define LZMA_TELL_NO_CHECK UINT32_C(0x01) + + +/** + * This flag makes lzma_code() return LZMA_UNSUPPORTED_CHECK if the input + * stream has an integrity check, but the type of the integrity check is not + * supported by this liblzma version or build. Such files can still be + * decoded, but the integrity check cannot be verified. + */ +#define LZMA_TELL_UNSUPPORTED_CHECK UINT32_C(0x02) + + +/** + * This flag makes lzma_code() return LZMA_GET_CHECK as soon as the type + * of the integrity check is known. The type can then be got with + * lzma_get_check(). + */ +#define LZMA_TELL_ANY_CHECK UINT32_C(0x04) + + +/** + * This flag enables decoding of concatenated files with file formats that + * allow concatenating compressed files as is. From the formats currently + * supported by liblzma, only the .xz format allows concatenated files. + * Concatenated files are not allowed with the legacy .lzma format. + * + * This flag also affects the usage of the `action' argument for lzma_code(). + * When LZMA_CONCATENATED is used, lzma_code() won't return LZMA_STREAM_END + * unless LZMA_FINISH is used as `action'. Thus, the application has to set + * LZMA_FINISH in the same way as it does when encoding. + * + * If LZMA_CONCATENATED is not used, the decoders still accept LZMA_FINISH + * as `action' for lzma_code(), but the usage of LZMA_FINISH isn't required. + */ +#define LZMA_CONCATENATED UINT32_C(0x08) + + +/** + * \brief Initialize .xz Stream decoder + * + * \param strm Pointer to properly prepared lzma_stream + * \param memlimit Rough memory usage limit as bytes + * \param flags Bitwise-or of zero or more of the decoder flags: + * LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK, + * LZMA_TELL_ANY_CHECK, LZMA_CONCATENATED + * + * \return - LZMA_OK: Initialization was successful. + * - LZMA_MEM_ERROR: Cannot allocate memory. + * - LZMA_OPTIONS_ERROR: Unsupported flags + */ +extern LZMA_API(lzma_ret) lzma_stream_decoder( + lzma_stream *strm, uint64_t memlimit, uint32_t flags) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode .xz Streams and .lzma files with autodetection + * + * This decoder autodetects between the .xz and .lzma file formats, and + * calls lzma_stream_decoder() or lzma_alone_decoder() once the type + * of the input file has been detected. + * + * \param strm Pointer to properly prepared lzma_stream + * \param memlimit Rough memory usage limit as bytes + * \param flags Bitwise-or of flags, or zero for no flags. + * + * \return - LZMA_OK: Initialization was successful. + * - LZMA_MEM_ERROR: Cannot allocate memory. + * - LZMA_OPTIONS_ERROR: Unsupported flags + */ +extern LZMA_API(lzma_ret) lzma_auto_decoder( + lzma_stream *strm, uint64_t memlimit, uint32_t flags) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize .lzma decoder (legacy file format) + * + * Valid `action' arguments to lzma_code() are LZMA_RUN and LZMA_FINISH. + * There is no need to use LZMA_FINISH, but allowing it may simplify + * certain types of applications. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + */ +extern LZMA_API(lzma_ret) lzma_alone_decoder( + lzma_stream *strm, uint64_t memlimit) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Single-call .xz Stream decoder + * + * \param memlimit Pointer to how much memory the decoder is allowed + * to allocate. The value pointed by this pointer is + * modified if and only if LZMA_MEMLIMIT_ERROR is + * returned. + * \param flags Bitwise-or of zero or more of the decoder flags: + * LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK, + * LZMA_CONCATENATED. Note that LZMA_TELL_ANY_CHECK + * is not allowed and will return LZMA_PROG_ERROR. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_pos The next byte will be read from in[*in_pos]. + * *in_pos is updated only if decoding succeeds. + * \param in_size Size of the input buffer; the first byte that + * won't be read is in[in_size]. + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Decoding was successful. + * - LZMA_FORMAT_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_DATA_ERROR + * - LZMA_NO_CHECK: This can be returned only if using + * the LZMA_TELL_NO_CHECK flag. + * - LZMA_UNSUPPORTED_CHECK: This can be returned only if using + * the LZMA_TELL_UNSUPPORTED_CHECK flag. + * - LZMA_MEM_ERROR + * - LZMA_MEMLIMIT_ERROR: Memory usage limit was reached. + * The minimum required memlimit value was stored to *memlimit. + * - LZMA_BUF_ERROR: Output buffer was too small. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_stream_buffer_decode( + uint64_t *memlimit, uint32_t flags, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow lzma_attr_warn_unused_result; diff --git a/src/liblzma/api/lzma/delta.h b/src/liblzma/api/lzma/delta.h new file mode 100644 index 000000000000..592fc4f8496a --- /dev/null +++ b/src/liblzma/api/lzma/delta.h @@ -0,0 +1,77 @@ +/** + * \file lzma/delta.h + * \brief Delta filter + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Filter ID + * + * Filter ID of the Delta filter. This is used as lzma_filter.id. + */ +#define LZMA_FILTER_DELTA LZMA_VLI_C(0x03) + + +/** + * \brief Type of the delta calculation + * + * Currently only byte-wise delta is supported. Other possible types could + * be, for example, delta of 16/32/64-bit little/big endian integers, but + * these are not currently planned since byte-wise delta is almost as good. + */ +typedef enum { + LZMA_DELTA_TYPE_BYTE +} lzma_delta_type; + + +/** + * \brief Options for the Delta filter + * + * These options are needed by both encoder and decoder. + */ +typedef struct { + /** For now, this must always be LZMA_DELTA_TYPE_BYTE. */ + lzma_delta_type type; + + /** + * \brief Delta distance + * + * With the only currently supported type, LZMA_DELTA_TYPE_BYTE, + * the distance is as bytes. + * + * Examples: + * - 16-bit stereo audio: distance = 4 bytes + * - 24-bit RGB image data: distance = 3 bytes + */ + uint32_t dist; +# define LZMA_DELTA_DIST_MIN 1 +# define LZMA_DELTA_DIST_MAX 256 + + /* + * Reserved space to allow possible future extensions without + * breaking the ABI. You should not touch these, because the names + * of these variables may change. These are and will never be used + * when type is LZMA_DELTA_TYPE_BYTE, so it is safe to leave these + * uninitialized. + */ + uint32_t reserved_int1; + uint32_t reserved_int2; + uint32_t reserved_int3; + uint32_t reserved_int4; + void *reserved_ptr1; + void *reserved_ptr2; + +} lzma_options_delta; diff --git a/src/liblzma/api/lzma/filter.h b/src/liblzma/api/lzma/filter.h new file mode 100644 index 000000000000..038a93398c68 --- /dev/null +++ b/src/liblzma/api/lzma/filter.h @@ -0,0 +1,421 @@ +/** + * \file lzma/filter.h + * \brief Common filter related types + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Maximum number of filters in a chain + * + * A filter chain can have 1-4 filters, of which three are allowed to change + * the size of the data. Usually only one or two filters are needed. + */ +#define LZMA_FILTERS_MAX 4 + + +/** + * \brief Filter options + * + * This structure is used to pass Filter ID and a pointer filter's + * options to liblzma. A few functions work with a single lzma_filter + * structure, while most functions expect a filter chain. + * + * A filter chain is indicated with an array of lzma_filter structures. + * The array is terminated with .id = LZMA_VLI_UNKNOWN. Thus, the filter + * array must have LZMA_FILTERS_MAX + 1 elements (that is, five) to + * be able to hold any arbitrary filter chain. This is important when + * using lzma_block_header_decode() from block.h, because too small + * array would make liblzma write past the end of the filters array. + */ +typedef struct { + /** + * \brief Filter ID + * + * Use constants whose name begin with `LZMA_FILTER_' to specify + * different filters. In an array of lzma_filter structures, use + * LZMA_VLI_UNKNOWN to indicate end of filters. + * + * \note This is not an enum, because on some systems enums + * cannot be 64-bit. + */ + lzma_vli id; + + /** + * \brief Pointer to filter-specific options structure + * + * If the filter doesn't need options, set this to NULL. If id is + * set to LZMA_VLI_UNKNOWN, options is ignored, and thus + * doesn't need be initialized. + * + * Some filters support changing the options in the middle of + * the encoding process. These filters store the pointer of the + * options structure and communicate with the application via + * modifications of the options structure. + */ + void *options; + +} lzma_filter; + + +/** + * \brief Test if the given Filter ID is supported for encoding + * + * Return true if the give Filter ID is supported for encoding by this + * liblzma build. Otherwise false is returned. + * + * There is no way to list which filters are available in this particular + * liblzma version and build. It would be useless, because the application + * couldn't know what kind of options the filter would need. + */ +extern LZMA_API(lzma_bool) lzma_filter_encoder_is_supported(lzma_vli id) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Test if the given Filter ID is supported for decoding + * + * Return true if the give Filter ID is supported for decoding by this + * liblzma build. Otherwise false is returned. + */ +extern LZMA_API(lzma_bool) lzma_filter_decoder_is_supported(lzma_vli id) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Copy the filters array + * + * Copy the Filter IDs and filter-specific options from src to dest. + * Up to LZMA_FILTERS_MAX filters are copied, plus the terminating + * .id == LZMA_VLI_UNKNOWN. Thus, dest should have at least + * LZMA_FILTERS_MAX + 1 elements space unless the caller knows that + * src is smaller than that. + * + * Unless the filter-specific options is NULL, the Filter ID has to be + * supported by liblzma, because liblzma needs to know the size of every + * filter-specific options structure. The filter-specific options are not + * validated. If options is NULL, any unsupported Filter IDs are copied + * without returning an error. + * + * Old filter-specific options in dest are not freed, so dest doesn't + * need to be initialized by the caller in any way. + * + * If an error occurs, memory possibly already allocated by this function + * is always freed. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR: Unsupported Filter ID and its options + * is not NULL. + * - LZMA_PROG_ERROR: src or dest is NULL. + */ +extern LZMA_API(lzma_ret) lzma_filters_copy(const lzma_filter *src, + lzma_filter *dest, lzma_allocator *allocator) lzma_nothrow; + + +/** + * \brief Calculate rough memory requirements for raw encoder + * + * Because the calculation is rough, this function can be used to calculate + * the memory requirements for Block and Stream encoders too. + * + * \param filters Array of filters terminated with + * .id == LZMA_VLI_UNKNOWN. + * + * \return Rough number of bytes of memory required for the given + * filter chain when encoding. + */ +extern LZMA_API(uint64_t) lzma_raw_encoder_memusage(const lzma_filter *filters) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Calculate rough memory requirements for raw decoder + * + * Because the calculation is rough, this function can be used to calculate + * the memory requirements for Block and Stream decoders too. + * + * \param filters Array of filters terminated with + * .id == LZMA_VLI_UNKNOWN. + * + * \return Rough number of bytes of memory required for the given + * filter chain when decoding. + */ +extern LZMA_API(uint64_t) lzma_raw_decoder_memusage(const lzma_filter *filters) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Initialize raw encoder + * + * This function may be useful when implementing custom file formats. + * + * \param strm Pointer to properly prepared lzma_stream + * \param filters Array of lzma_filter structures. The end of the + * array must be marked with .id = LZMA_VLI_UNKNOWN. + * + * The `action' with lzma_code() can be LZMA_RUN, LZMA_SYNC_FLUSH (if the + * filter chain supports it), or LZMA_FINISH. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_raw_encoder( + lzma_stream *strm, const lzma_filter *filters) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize raw decoder + * + * The initialization of raw decoder goes similarly to raw encoder. + * + * The `action' with lzma_code() can be LZMA_RUN or LZMA_FINISH. Using + * LZMA_FINISH is not required, it is supported just for convenience. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_raw_decoder( + lzma_stream *strm, const lzma_filter *filters) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Update the filter chain in the encoder + * + * This function is for advanced users only. This function has two slightly + * different purposes: + * + * - After LZMA_FULL_FLUSH when using Stream encoder: Set a new filter + * chain, which will be used starting from the next Block. + * + * - After LZMA_SYNC_FLUSH using Raw, Block, or Stream encoder: Change + * the filter-specific options in the middle of encoding. The actual + * filters in the chain (Filter IDs) cannot be changed. In the future, + * it might become possible to change the filter options without + * using LZMA_SYNC_FLUSH. + * + * While rarely useful, this function may be called also when no data has + * been compressed yet. In that case, this function will behave as if + * LZMA_FULL_FLUSH (Stream encoder) or LZMA_SYNC_FLUSH (Raw or Block + * encoder) had been used right before calling this function. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_MEMLIMIT_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_filters_update( + lzma_stream *strm, const lzma_filter *filters) lzma_nothrow; + + +/** + * \brief Single-call raw encoder + * + * \param filters Array of lzma_filter structures. The end of the + * array must be marked with .id = LZMA_VLI_UNKNOWN. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_size Size of the input buffer + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_BUF_ERROR: Not enough output buffer space. + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + * - LZMA_DATA_ERROR + * - LZMA_PROG_ERROR + * + * \note There is no function to calculate how big output buffer + * would surely be big enough. (lzma_stream_buffer_bound() + * works only for lzma_stream_buffer_encode().) + */ +extern LZMA_API(lzma_ret) lzma_raw_buffer_encode( + const lzma_filter *filters, lzma_allocator *allocator, + const uint8_t *in, size_t in_size, uint8_t *out, + size_t *out_pos, size_t out_size) lzma_nothrow; + + +/** + * \brief Single-call raw decoder + * + * \param filters Array of lzma_filter structures. The end of the + * array must be marked with .id = LZMA_VLI_UNKNOWN. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_pos The next byte will be read from in[*in_pos]. + * *in_pos is updated only if decoding succeeds. + * \param in_size Size of the input buffer; the first byte that + * won't be read is in[in_size]. + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + */ +extern LZMA_API(lzma_ret) lzma_raw_buffer_decode( + const lzma_filter *filters, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow; + + +/** + * \brief Get the size of the Filter Properties field + * + * This function may be useful when implementing custom file formats + * using the raw encoder and decoder. + * + * \param size Pointer to uint32_t to hold the size of the properties + * \param filter Filter ID and options (the size of the properties may + * vary depending on the options) + * + * \return - LZMA_OK + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + * + * \note This function validates the Filter ID, but does not + * necessarily validate the options. Thus, it is possible + * that this returns LZMA_OK while the following call to + * lzma_properties_encode() returns LZMA_OPTIONS_ERROR. + */ +extern LZMA_API(lzma_ret) lzma_properties_size( + uint32_t *size, const lzma_filter *filter) lzma_nothrow; + + +/** + * \brief Encode the Filter Properties field + * + * \param filter Filter ID and options + * \param props Buffer to hold the encoded options. The size of + * buffer must have been already determined with + * lzma_properties_size(). + * + * \return - LZMA_OK + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + * + * \note Even this function won't validate more options than actually + * necessary. Thus, it is possible that encoding the properties + * succeeds but using the same options to initialize the encoder + * will fail. + * + * \note It is OK to skip calling this function if + * lzma_properties_size() indicated that the size + * of the Filter Properties field is zero. + */ +extern LZMA_API(lzma_ret) lzma_properties_encode( + const lzma_filter *filter, uint8_t *props) lzma_nothrow; + + +/** + * \brief Decode the Filter Properties field + * + * \param filter filter->id must have been set to the correct + * Filter ID. filter->options doesn't need to be + * initialized (it's not freed by this function). The + * decoded options will be stored to filter->options. + * filter->options is set to NULL if there are no + * properties or if an error occurs. + * \param allocator Custom memory allocator used to allocate the + * options. Set to NULL to use the default malloc(), + * and in case of an error, also free(). + * \param props Input buffer containing the properties. + * \param props_size Size of the properties. This must be the exact + * size; giving too much or too little input will + * return LZMA_OPTIONS_ERROR. + * + * \return - LZMA_OK + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + */ +extern LZMA_API(lzma_ret) lzma_properties_decode( + lzma_filter *filter, lzma_allocator *allocator, + const uint8_t *props, size_t props_size) lzma_nothrow; + + +/** + * \brief Calculate encoded size of a Filter Flags field + * + * Knowing the size of Filter Flags is useful to know when allocating + * memory to hold the encoded Filter Flags. + * + * \param size Pointer to integer to hold the calculated size + * \param filters Filter ID and associated options whose encoded + * size is to be calculated + * + * \return - LZMA_OK: *size set successfully. Note that this doesn't + * guarantee that filters->options is valid, thus + * lzma_filter_flags_encode() may still fail. + * - LZMA_OPTIONS_ERROR: Unknown Filter ID or unsupported options. + * - LZMA_PROG_ERROR: Invalid options + * + * \note If you need to calculate size of List of Filter Flags, + * you need to loop over every lzma_filter entry. + */ +extern LZMA_API(lzma_ret) lzma_filter_flags_size( + uint32_t *size, const lzma_filter *filters) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Encode Filter Flags into given buffer + * + * In contrast to some functions, this doesn't allocate the needed buffer. + * This is due to how this function is used internally by liblzma. + * + * \param filters Filter ID and options to be encoded + * \param out Beginning of the output buffer + * \param out_pos out[*out_pos] is the next write position. This + * is updated by the encoder. + * \param out_size out[out_size] is the first byte to not write. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_OPTIONS_ERROR: Invalid or unsupported options. + * - LZMA_PROG_ERROR: Invalid options or not enough output + * buffer space (you should have checked it with + * lzma_filter_flags_size()). + */ +extern LZMA_API(lzma_ret) lzma_filter_flags_encode(const lzma_filter *filters, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode Filter Flags from given buffer + * + * The decoded result is stored into *filters. filters->options is + * initialized but the old value is NOT free()d. + * + * \return - LZMA_OK + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_filter_flags_decode( + lzma_filter *filters, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size) + lzma_nothrow lzma_attr_warn_unused_result; diff --git a/src/liblzma/api/lzma/hardware.h b/src/liblzma/api/lzma/hardware.h new file mode 100644 index 000000000000..f44cb602ff90 --- /dev/null +++ b/src/liblzma/api/lzma/hardware.h @@ -0,0 +1,51 @@ +/** + * \file lzma/hardware.h + * \brief Hardware information + * + * Since liblzma can consume a lot of system resources, it also provides + * ways to limit the resource usage. Applications linking against liblzma + * need to do the actual decisions how much resources to let liblzma to use. + * To ease making these decisions, liblzma provides functions to find out + * the relevant capabilities of the underlaying hardware. Currently there + * is only a function to find out the amount of RAM, but in the future there + * will be also a function to detect how many concurrent threads the system + * can run. + * + * \note On some operating systems, these function may temporarily + * load a shared library or open file descriptor(s) to find out + * the requested hardware information. Unless the application + * assumes that specific file descriptors are not touched by + * other threads, this should have no effect on thread safety. + * Possible operations involving file descriptors will restart + * the syscalls if they return EINTR. + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Get the total amount of physical memory (RAM) in bytes + * + * This function may be useful when determining a reasonable memory + * usage limit for decompressing or how much memory it is OK to use + * for compressing. For example, the default limit used by the xz + * command line tool is 40 % of RAM. + * + * \return On success, the total amount of physical memory in bytes + * is returned. If the amount of RAM cannot be determined, + * zero is returned. This can happen if an error occurs + * or if there is no code in liblzma to detect the amount + * of RAM on the specific operating system. + */ +extern LZMA_API(uint64_t) lzma_physmem(void) lzma_nothrow; diff --git a/src/liblzma/api/lzma/index.h b/src/liblzma/api/lzma/index.h new file mode 100644 index 000000000000..8d7a799f4ab5 --- /dev/null +++ b/src/liblzma/api/lzma/index.h @@ -0,0 +1,677 @@ +/** + * \file lzma/index.h + * \brief Handling of .xz Index and related information + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Opaque data type to hold the Index(es) and other information + * + * lzma_index often holds just one .xz Index and possibly the Stream Flags + * of the same Stream and size of the Stream Padding field. However, + * multiple lzma_indexes can be concatenated with lzma_index_cat() and then + * there may be information about multiple Streams in the same lzma_index. + * + * Notes about thread safety: Only one thread may modify lzma_index at + * a time. All functions that take non-const pointer to lzma_index + * modify it. As long as no thread is modifying the lzma_index, getting + * information from the same lzma_index can be done from multiple threads + * at the same time with functions that take a const pointer to + * lzma_index or use lzma_index_iter. The same iterator must be used + * only by one thread at a time, of course, but there can be as many + * iterators for the same lzma_index as needed. + */ +typedef struct lzma_index_s lzma_index; + + +/** + * \brief Iterator to get information about Blocks and Streams + */ +typedef struct { + struct { + /** + * \brief Pointer to Stream Flags + * + * This is NULL if Stream Flags have not been set for + * this Stream with lzma_index_stream_flags(). + */ + const lzma_stream_flags *flags; + + const void *reserved_ptr1; + const void *reserved_ptr2; + const void *reserved_ptr3; + + /** + * \brief Stream number in the lzma_index + * + * The first Stream is 1. + */ + lzma_vli number; + + /** + * \brief Number of Blocks in the Stream + * + * If this is zero, the block structure below has + * undefined values. + */ + lzma_vli block_count; + + /** + * \brief Compressed start offset of this Stream + * + * The offset is relative to the beginning of the lzma_index + * (i.e. usually the beginning of the .xz file). + */ + lzma_vli compressed_offset; + + /** + * \brief Uncompressed start offset of this Stream + * + * The offset is relative to the beginning of the lzma_index + * (i.e. usually the beginning of the .xz file). + */ + lzma_vli uncompressed_offset; + + /** + * \brief Compressed size of this Stream + * + * This includes all headers except the possible + * Stream Padding after this Stream. + */ + lzma_vli compressed_size; + + /** + * \brief Uncompressed size of this Stream + */ + lzma_vli uncompressed_size; + + /** + * \brief Size of Stream Padding after this Stream + * + * If it hasn't been set with lzma_index_stream_padding(), + * this defaults to zero. Stream Padding is always + * a multiple of four bytes. + */ + lzma_vli padding; + + lzma_vli reserved_vli1; + lzma_vli reserved_vli2; + lzma_vli reserved_vli3; + lzma_vli reserved_vli4; + } stream; + + struct { + /** + * \brief Block number in the file + * + * The first Block is 1. + */ + lzma_vli number_in_file; + + /** + * \brief Compressed start offset of this Block + * + * This offset is relative to the beginning of the + * lzma_index (i.e. usually the beginning of the .xz file). + * Normally this is where you should seek in the .xz file + * to start decompressing this Block. + */ + lzma_vli compressed_file_offset; + + /** + * \brief Uncompressed start offset of this Block + * + * This offset is relative to the beginning of the lzma_index + * (i.e. usually the beginning of the .xz file). + */ + lzma_vli uncompressed_file_offset; + + /** + * \brief Block number in this Stream + * + * The first Block is 1. + */ + lzma_vli number_in_stream; + + /** + * \brief Compressed start offset of this Block + * + * This offset is relative to the beginning of the Stream + * containing this Block. + */ + lzma_vli compressed_stream_offset; + + /** + * \brief Uncompressed start offset of this Block + * + * This offset is relative to the beginning of the Stream + * containing this Block. + */ + lzma_vli uncompressed_stream_offset; + + /** + * \brief Uncompressed size of this Block + * + * You should pass this to the Block decoder if you will + * decode this Block. + * + * When doing random-access reading, it is possible that + * the target offset is not exactly at Block boundary. One + * will need to compare the target offset against + * uncompressed_file_offset or uncompressed_stream_offset, + * and possibly decode and throw away some amount of data + * before reaching the target offset. + */ + lzma_vli uncompressed_size; + + /** + * \brief Unpadded size of this Block + * + * You should pass this to the Block decoder if you will + * decode this Block. + */ + lzma_vli unpadded_size; + + /** + * \brief Total compressed size + * + * This includes all headers and padding in this Block. + * This is useful if you need to know how many bytes + * the Block decoder will actually read. + */ + lzma_vli total_size; + + lzma_vli reserved_vli1; + lzma_vli reserved_vli2; + lzma_vli reserved_vli3; + lzma_vli reserved_vli4; + + const void *reserved_ptr1; + const void *reserved_ptr2; + const void *reserved_ptr3; + const void *reserved_ptr4; + } block; + + /* + * Internal data which is used to store the state of the iterator. + * The exact format may vary between liblzma versions, so don't + * touch these in any way. + */ + union { + const void *p; + size_t s; + lzma_vli v; + } internal[6]; +} lzma_index_iter; + + +/** + * \brief Operation mode for lzma_index_iter_next() + */ +typedef enum { + LZMA_INDEX_ITER_ANY = 0, + /**< + * \brief Get the next Block or Stream + * + * Go to the next Block if the current Stream has at least + * one Block left. Otherwise go to the next Stream even if + * it has no Blocks. If the Stream has no Blocks + * (lzma_index_iter.stream.block_count == 0), + * lzma_index_iter.block will have undefined values. + */ + + LZMA_INDEX_ITER_STREAM = 1, + /**< + * \brief Get the next Stream + * + * Go to the next Stream even if the current Stream has + * unread Blocks left. If the next Stream has at least one + * Block, the iterator will point to the first Block. + * If there are no Blocks, lzma_index_iter.block will have + * undefined values. + */ + + LZMA_INDEX_ITER_BLOCK = 2, + /**< + * \brief Get the next Block + * + * Go to the next Block if the current Stream has at least + * one Block left. If the current Stream has no Blocks left, + * the next Stream with at least one Block is located and + * the iterator will be made to point to the first Block of + * that Stream. + */ + + LZMA_INDEX_ITER_NONEMPTY_BLOCK = 3 + /**< + * \brief Get the next non-empty Block + * + * This is like LZMA_INDEX_ITER_BLOCK except that it will + * skip Blocks whose Uncompressed Size is zero. + */ + +} lzma_index_iter_mode; + + +/** + * \brief Calculate memory usage of lzma_index + * + * On disk, the size of the Index field depends on both the number of Records + * stored and how big values the Records store (due to variable-length integer + * encoding). When the Index is kept in lzma_index structure, the memory usage + * depends only on the number of Records/Blocks stored in the Index(es), and + * in case of concatenated lzma_indexes, the number of Streams. The size in + * RAM is almost always significantly bigger than in the encoded form on disk. + * + * This function calculates an approximate amount of memory needed hold + * the given number of Streams and Blocks in lzma_index structure. This + * value may vary between CPU architectures and also between liblzma versions + * if the internal implementation is modified. + */ +extern LZMA_API(uint64_t) lzma_index_memusage( + lzma_vli streams, lzma_vli blocks) lzma_nothrow; + + +/** + * \brief Calculate the memory usage of an existing lzma_index + * + * This is a shorthand for lzma_index_memusage(lzma_index_stream_count(i), + * lzma_index_block_count(i)). + */ +extern LZMA_API(uint64_t) lzma_index_memused(const lzma_index *i) + lzma_nothrow; + + +/** + * \brief Allocate and initialize a new lzma_index structure + * + * \return On success, a pointer to an empty initialized lzma_index is + * returned. If allocation fails, NULL is returned. + */ +extern LZMA_API(lzma_index *) lzma_index_init(lzma_allocator *allocator) + lzma_nothrow; + + +/** + * \brief Deallocate lzma_index + * + * If i is NULL, this does nothing. + */ +extern LZMA_API(void) lzma_index_end(lzma_index *i, lzma_allocator *allocator) + lzma_nothrow; + + +/** + * \brief Add a new Block to lzma_index + * + * \param i Pointer to a lzma_index structure + * \param allocator Pointer to lzma_allocator, or NULL to + * use malloc() + * \param unpadded_size Unpadded Size of a Block. This can be + * calculated with lzma_block_unpadded_size() + * after encoding or decoding the Block. + * \param uncompressed_size Uncompressed Size of a Block. This can be + * taken directly from lzma_block structure + * after encoding or decoding the Block. + * + * Appending a new Block does not invalidate iterators. For example, + * if an iterator was pointing to the end of the lzma_index, after + * lzma_index_append() it is possible to read the next Block with + * an existing iterator. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_DATA_ERROR: Compressed or uncompressed size of the + * Stream or size of the Index field would grow too big. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_append( + lzma_index *i, lzma_allocator *allocator, + lzma_vli unpadded_size, lzma_vli uncompressed_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Set the Stream Flags + * + * Set the Stream Flags of the last (and typically the only) Stream + * in lzma_index. This can be useful when reading information from the + * lzma_index, because to decode Blocks, knowing the integrity check type + * is needed. + * + * The given Stream Flags are copied into internal preallocated structure + * in the lzma_index, thus the caller doesn't need to keep the *stream_flags + * available after calling this function. + * + * \return - LZMA_OK + * - LZMA_OPTIONS_ERROR: Unsupported stream_flags->version. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_stream_flags( + lzma_index *i, const lzma_stream_flags *stream_flags) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Get the types of integrity Checks + * + * If lzma_index_stream_padding() is used to set the Stream Flags for + * every Stream, lzma_index_checks() can be used to get a bitmask to + * indicate which Check types have been used. It can be useful e.g. if + * showing the Check types to the user. + * + * The bitmask is 1 << check_id, e.g. CRC32 is 1 << 1 and SHA-256 is 1 << 10. + */ +extern LZMA_API(uint32_t) lzma_index_checks(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Set the amount of Stream Padding + * + * Set the amount of Stream Padding of the last (and typically the only) + * Stream in the lzma_index. This is needed when planning to do random-access + * reading within multiple concatenated Streams. + * + * By default, the amount of Stream Padding is assumed to be zero bytes. + * + * \return - LZMA_OK + * - LZMA_DATA_ERROR: The file size would grow too big. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_stream_padding( + lzma_index *i, lzma_vli stream_padding) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Get the number of Streams + */ +extern LZMA_API(lzma_vli) lzma_index_stream_count(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the number of Blocks + * + * This returns the total number of Blocks in lzma_index. To get number + * of Blocks in individual Streams, use lzma_index_iter. + */ +extern LZMA_API(lzma_vli) lzma_index_block_count(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the size of the Index field as bytes + * + * This is needed to verify the Backward Size field in the Stream Footer. + */ +extern LZMA_API(lzma_vli) lzma_index_size(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the total size of the Stream + * + * If multiple lzma_indexes have been combined, this works as if the Blocks + * were in a single Stream. This is useful if you are going to combine + * Blocks from multiple Streams into a single new Stream. + */ +extern LZMA_API(lzma_vli) lzma_index_stream_size(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the total size of the Blocks + * + * This doesn't include the Stream Header, Stream Footer, Stream Padding, + * or Index fields. + */ +extern LZMA_API(lzma_vli) lzma_index_total_size(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the total size of the file + * + * When no lzma_indexes have been combined with lzma_index_cat() and there is + * no Stream Padding, this function is identical to lzma_index_stream_size(). + * If multiple lzma_indexes have been combined, this includes also the headers + * of each separate Stream and the possible Stream Padding fields. + */ +extern LZMA_API(lzma_vli) lzma_index_file_size(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the uncompressed size of the file + */ +extern LZMA_API(lzma_vli) lzma_index_uncompressed_size(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Initialize an iterator + * + * \param iter Pointer to a lzma_index_iter structure + * \param i lzma_index to which the iterator will be associated + * + * This function associates the iterator with the given lzma_index, and calls + * lzma_index_iter_rewind() on the iterator. + * + * This function doesn't allocate any memory, thus there is no + * lzma_index_iter_end(). The iterator is valid as long as the + * associated lzma_index is valid, that is, until lzma_index_end() or + * using it as source in lzma_index_cat(). Specifically, lzma_index doesn't + * become invalid if new Blocks are added to it with lzma_index_append() or + * if it is used as the destination in lzma_index_cat(). + * + * It is safe to make copies of an initialized lzma_index_iter, for example, + * to easily restart reading at some particular position. + */ +extern LZMA_API(void) lzma_index_iter_init( + lzma_index_iter *iter, const lzma_index *i) lzma_nothrow; + + +/** + * \brief Rewind the iterator + * + * Rewind the iterator so that next call to lzma_index_iter_next() will + * return the first Block or Stream. + */ +extern LZMA_API(void) lzma_index_iter_rewind(lzma_index_iter *iter) + lzma_nothrow; + + +/** + * \brief Get the next Block or Stream + * + * \param iter Iterator initialized with lzma_index_iter_init() + * \param mode Specify what kind of information the caller wants + * to get. See lzma_index_iter_mode for details. + * + * \return If next Block or Stream matching the mode was found, *iter + * is updated and this function returns false. If no Block or + * Stream matching the mode is found, *iter is not modified + * and this function returns true. If mode is set to an unknown + * value, *iter is not modified and this function returns true. + */ +extern LZMA_API(lzma_bool) lzma_index_iter_next( + lzma_index_iter *iter, lzma_index_iter_mode mode) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Locate a Block + * + * If it is possible to seek in the .xz file, it is possible to parse + * the Index field(s) and use lzma_index_iter_locate() to do random-access + * reading with granularity of Block size. + * + * \param iter Iterator that was earlier initialized with + * lzma_index_iter_init(). + * \param target Uncompressed target offset which the caller would + * like to locate from the Stream + * + * If the target is smaller than the uncompressed size of the Stream (can be + * checked with lzma_index_uncompressed_size()): + * - Information about the Stream and Block containing the requested + * uncompressed offset is stored into *iter. + * - Internal state of the iterator is adjusted so that + * lzma_index_iter_next() can be used to read subsequent Blocks or Streams. + * - This function returns false. + * + * If target is greater than the uncompressed size of the Stream, *iter + * is not modified, and this function returns true. + */ +extern LZMA_API(lzma_bool) lzma_index_iter_locate( + lzma_index_iter *iter, lzma_vli target) lzma_nothrow; + + +/** + * \brief Concatenate lzma_indexes + * + * Concatenating lzma_indexes is useful when doing random-access reading in + * multi-Stream .xz file, or when combining multiple Streams into single + * Stream. + * + * \param dest lzma_index after which src is appended + * \param src lzma_index to be appended after dest. If this + * function succeeds, the memory allocated for src + * is freed or moved to be part of dest, and all + * iterators pointing to src will become invalid. + * \param allocator Custom memory allocator; can be NULL to use + * malloc() and free(). + * + * \return - LZMA_OK: lzma_indexes were concatenated successfully. + * src is now a dangling pointer. + * - LZMA_DATA_ERROR: *dest would grow too big. + * - LZMA_MEM_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_cat(lzma_index *lzma_restrict dest, + lzma_index *lzma_restrict src, + lzma_allocator *allocator) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Duplicate lzma_index + * + * \return A copy of the lzma_index, or NULL if memory allocation failed. + */ +extern LZMA_API(lzma_index *) lzma_index_dup( + const lzma_index *i, lzma_allocator *allocator) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize .xz Index encoder + * + * \param strm Pointer to properly prepared lzma_stream + * \param i Pointer to lzma_index which should be encoded. + * + * The only valid action value for lzma_code() is LZMA_RUN. + * + * \return - LZMA_OK: Initialization succeeded, continue with lzma_code(). + * - LZMA_MEM_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_encoder( + lzma_stream *strm, const lzma_index *i) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize .xz Index decoder + * + * \param strm Pointer to properly prepared lzma_stream + * \param i The decoded Index will be made available via + * this pointer. Initially this function will + * set *i to NULL (the old value is ignored). If + * decoding succeeds (lzma_code() returns + * LZMA_STREAM_END), *i will be set to point + * to a new lzma_index, which the application + * has to later free with lzma_index_end(). + * \param memlimit How much memory the resulting lzma_index is + * allowed to require. + * + * The only valid action value for lzma_code() is LZMA_RUN. + * + * \return - LZMA_OK: Initialization succeeded, continue with lzma_code(). + * - LZMA_MEM_ERROR + * - LZMA_MEMLIMIT_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_decoder( + lzma_stream *strm, lzma_index **i, uint64_t memlimit) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Single-call .xz Index encoder + * + * \param i lzma_index to be encoded + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_BUF_ERROR: Output buffer is too small. Use + * lzma_index_size() to find out how much output + * space is needed. + * - LZMA_PROG_ERROR + * + * \note This function doesn't take allocator argument since all + * the internal data is allocated on stack. + */ +extern LZMA_API(lzma_ret) lzma_index_buffer_encode(const lzma_index *i, + uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow; + + +/** + * \brief Single-call .xz Index decoder + * + * \param i If decoding succeeds, *i will point to a new + * lzma_index, which the application has to + * later free with lzma_index_end(). If an error + * occurs, *i will be NULL. The old value of *i + * is always ignored and thus doesn't need to be + * initialized by the caller. + * \param memlimit Pointer to how much memory the resulting + * lzma_index is allowed to require. The value + * pointed by this pointer is modified if and only + * if LZMA_MEMLIMIT_ERROR is returned. + * \param allocator Pointer to lzma_allocator, or NULL to use malloc() + * \param in Beginning of the input buffer + * \param in_pos The next byte will be read from in[*in_pos]. + * *in_pos is updated only if decoding succeeds. + * \param in_size Size of the input buffer; the first byte that + * won't be read is in[in_size]. + * + * \return - LZMA_OK: Decoding was successful. + * - LZMA_MEM_ERROR + * - LZMA_MEMLIMIT_ERROR: Memory usage limit was reached. + * The minimum required memlimit value was stored to *memlimit. + * - LZMA_DATA_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_buffer_decode(lzma_index **i, + uint64_t *memlimit, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size) + lzma_nothrow; diff --git a/src/liblzma/api/lzma/index_hash.h b/src/liblzma/api/lzma/index_hash.h new file mode 100644 index 000000000000..94726e7beeae --- /dev/null +++ b/src/liblzma/api/lzma/index_hash.h @@ -0,0 +1,107 @@ +/** + * \file lzma/index_hash.h + * \brief Validates Index by using a hash function + * + * Hashing makes it possible to use constant amount of memory to validate + * Index of arbitrary size. + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + +/** + * \brief Opaque data type to hold the Index hash + */ +typedef struct lzma_index_hash_s lzma_index_hash; + + +/** + * \brief Allocate and initialize a new lzma_index_hash structure + * + * If index_hash is NULL, a new lzma_index_hash structure is allocated, + * initialized, and a pointer to it returned. If allocation fails, NULL + * is returned. + * + * If index_hash is non-NULL, it is reinitialized and the same pointer + * returned. In this case, return value cannot be NULL or a different + * pointer than the index_hash that was given as an argument. + */ +extern LZMA_API(lzma_index_hash *) lzma_index_hash_init( + lzma_index_hash *index_hash, lzma_allocator *allocator) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Deallocate lzma_index_hash structure + */ +extern LZMA_API(void) lzma_index_hash_end( + lzma_index_hash *index_hash, lzma_allocator *allocator) + lzma_nothrow; + + +/** + * \brief Add a new Record to an Index hash + * + * \param index Pointer to a lzma_index_hash structure + * \param unpadded_size Unpadded Size of a Block + * \param uncompressed_size Uncompressed Size of a Block + * + * \return - LZMA_OK + * - LZMA_DATA_ERROR: Compressed or uncompressed size of the + * Stream or size of the Index field would grow too big. + * - LZMA_PROG_ERROR: Invalid arguments or this function is being + * used when lzma_index_hash_decode() has already been used. + */ +extern LZMA_API(lzma_ret) lzma_index_hash_append(lzma_index_hash *index_hash, + lzma_vli unpadded_size, lzma_vli uncompressed_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode and validate the Index field + * + * After telling the sizes of all Blocks with lzma_index_hash_append(), + * the actual Index field is decoded with this function. Specifically, + * once decoding of the Index field has been started, no more Records + * can be added using lzma_index_hash_append(). + * + * This function doesn't use lzma_stream structure to pass the input data. + * Instead, the input buffer is specified using three arguments. This is + * because it matches better the internal APIs of liblzma. + * + * \param index_hash Pointer to a lzma_index_hash structure + * \param in Pointer to the beginning of the input buffer + * \param in_pos in[*in_pos] is the next byte to process + * \param in_size in[in_size] is the first byte not to process + * + * \return - LZMA_OK: So far good, but more input is needed. + * - LZMA_STREAM_END: Index decoded successfully and it matches + * the Records given with lzma_index_hash_append(). + * - LZMA_DATA_ERROR: Index is corrupt or doesn't match the + * information given with lzma_index_hash_append(). + * - LZMA_BUF_ERROR: Cannot progress because *in_pos >= in_size. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_hash_decode(lzma_index_hash *index_hash, + const uint8_t *in, size_t *in_pos, size_t in_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Get the size of the Index field as bytes + * + * This is needed to verify the Backward Size field in the Stream Footer. + */ +extern LZMA_API(lzma_vli) lzma_index_hash_size( + const lzma_index_hash *index_hash) + lzma_nothrow lzma_attr_pure; diff --git a/src/liblzma/api/lzma/lzma.h b/src/liblzma/api/lzma/lzma.h new file mode 100644 index 000000000000..c17736d7eaac --- /dev/null +++ b/src/liblzma/api/lzma/lzma.h @@ -0,0 +1,397 @@ +/** + * \file lzma/lzma.h + * \brief LZMA1 and LZMA2 filters + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief LZMA1 Filter ID + * + * LZMA1 is the very same thing as what was called just LZMA in LZMA Utils, + * 7-Zip, and LZMA SDK. It's called LZMA1 here to prevent developers from + * accidentally using LZMA when they actually want LZMA2. + * + * LZMA1 shouldn't be used for new applications unless you _really_ know + * what you are doing. LZMA2 is almost always a better choice. + */ +#define LZMA_FILTER_LZMA1 LZMA_VLI_C(0x4000000000000001) + +/** + * \brief LZMA2 Filter ID + * + * Usually you want this instead of LZMA1. Compared to LZMA1, LZMA2 adds + * support for LZMA_SYNC_FLUSH, uncompressed chunks (smaller expansion + * when trying to compress uncompressible data), possibility to change + * lc/lp/pb in the middle of encoding, and some other internal improvements. + */ +#define LZMA_FILTER_LZMA2 LZMA_VLI_C(0x21) + + +/** + * \brief Match finders + * + * Match finder has major effect on both speed and compression ratio. + * Usually hash chains are faster than binary trees. + * + * The memory usage formulas are only rough estimates, which are closest to + * reality when dict_size is a power of two. The formulas are more complex + * in reality, and can also change a little between liblzma versions. Use + * lzma_memusage_encoder() to get more accurate estimate of memory usage. + */ +typedef enum { + LZMA_MF_HC3 = 0x03, + /**< + * \brief Hash Chain with 2- and 3-byte hashing + * + * Minimum nice_len: 3 + * + * Memory usage: + * - dict_size <= 16 MiB: dict_size * 7.5 + * - dict_size > 16 MiB: dict_size * 5.5 + 64 MiB + */ + + LZMA_MF_HC4 = 0x04, + /**< + * \brief Hash Chain with 2-, 3-, and 4-byte hashing + * + * Minimum nice_len: 4 + * + * Memory usage: dict_size * 7.5 + */ + + LZMA_MF_BT2 = 0x12, + /**< + * \brief Binary Tree with 2-byte hashing + * + * Minimum nice_len: 2 + * + * Memory usage: dict_size * 9.5 + */ + + LZMA_MF_BT3 = 0x13, + /**< + * \brief Binary Tree with 2- and 3-byte hashing + * + * Minimum nice_len: 3 + * + * Memory usage: + * - dict_size <= 16 MiB: dict_size * 11.5 + * - dict_size > 16 MiB: dict_size * 9.5 + 64 MiB + */ + + LZMA_MF_BT4 = 0x14 + /**< + * \brief Binary Tree with 2-, 3-, and 4-byte hashing + * + * Minimum nice_len: 4 + * + * Memory usage: dict_size * 11.5 + */ +} lzma_match_finder; + + +/** + * \brief Test if given match finder is supported + * + * Return true if the given match finder is supported by this liblzma build. + * Otherwise false is returned. It is safe to call this with a value that + * isn't listed in lzma_match_finder enumeration; the return value will be + * false. + * + * There is no way to list which match finders are available in this + * particular liblzma version and build. It would be useless, because + * a new match finder, which the application developer wasn't aware, + * could require giving additional options to the encoder that the older + * match finders don't need. + */ +extern LZMA_API(lzma_bool) lzma_mf_is_supported(lzma_match_finder match_finder) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Compression modes + * + * This selects the function used to analyze the data produced by the match + * finder. + */ +typedef enum { + LZMA_MODE_FAST = 1, + /**< + * \brief Fast compression + * + * Fast mode is usually at its best when combined with + * a hash chain match finder. + */ + + LZMA_MODE_NORMAL = 2 + /**< + * \brief Normal compression + * + * This is usually notably slower than fast mode. Use this + * together with binary tree match finders to expose the + * full potential of the LZMA1 or LZMA2 encoder. + */ +} lzma_mode; + + +/** + * \brief Test if given compression mode is supported + * + * Return true if the given compression mode is supported by this liblzma + * build. Otherwise false is returned. It is safe to call this with a value + * that isn't listed in lzma_mode enumeration; the return value will be false. + * + * There is no way to list which modes are available in this particular + * liblzma version and build. It would be useless, because a new compression + * mode, which the application developer wasn't aware, could require giving + * additional options to the encoder that the older modes don't need. + */ +extern LZMA_API(lzma_bool) lzma_mode_is_supported(lzma_mode mode) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Options specific to the LZMA1 and LZMA2 filters + * + * Since LZMA1 and LZMA2 share most of the code, it's simplest to share + * the options structure too. For encoding, all but the reserved variables + * need to be initialized unless specifically mentioned otherwise. + * + * For raw decoding, both LZMA1 and LZMA2 need dict_size, preset_dict, and + * preset_dict_size (if preset_dict != NULL). LZMA1 needs also lc, lp, and pb. + */ +typedef struct { + /** + * \brief Dictionary size in bytes + * + * Dictionary size indicates how many bytes of the recently processed + * uncompressed data is kept in memory. One method to reduce size of + * the uncompressed data is to store distance-length pairs, which + * indicate what data to repeat from the dictionary buffer. Thus, + * the bigger the dictionary, the better the compression ratio + * usually is. + * + * Maximum size of the dictionary depends on multiple things: + * - Memory usage limit + * - Available address space (not a problem on 64-bit systems) + * - Selected match finder (encoder only) + * + * Currently the maximum dictionary size for encoding is 1.5 GiB + * (i.e. (UINT32_C(1) << 30) + (UINT32_C(1) << 29)) even on 64-bit + * systems for certain match finder implementation reasons. In the + * future, there may be match finders that support bigger + * dictionaries. + * + * Decoder already supports dictionaries up to 4 GiB - 1 B (i.e. + * UINT32_MAX), so increasing the maximum dictionary size of the + * encoder won't cause problems for old decoders. + * + * Because extremely small dictionaries sizes would have unneeded + * overhead in the decoder, the minimum dictionary size is 4096 bytes. + * + * \note When decoding, too big dictionary does no other harm + * than wasting memory. + */ + uint32_t dict_size; +# define LZMA_DICT_SIZE_MIN UINT32_C(4096) +# define LZMA_DICT_SIZE_DEFAULT (UINT32_C(1) << 23) + + /** + * \brief Pointer to an initial dictionary + * + * It is possible to initialize the LZ77 history window using + * a preset dictionary. It is useful when compressing many + * similar, relatively small chunks of data independently from + * each other. The preset dictionary should contain typical + * strings that occur in the files being compressed. The most + * probable strings should be near the end of the preset dictionary. + * + * This feature should be used only in special situations. For + * now, it works correctly only with raw encoding and decoding. + * Currently none of the container formats supported by + * liblzma allow preset dictionary when decoding, thus if + * you create a .xz or .lzma file with preset dictionary, it + * cannot be decoded with the regular decoder functions. In the + * future, the .xz format will likely get support for preset + * dictionary though. + */ + const uint8_t *preset_dict; + + /** + * \brief Size of the preset dictionary + * + * Specifies the size of the preset dictionary. If the size is + * bigger than dict_size, only the last dict_size bytes are + * processed. + * + * This variable is read only when preset_dict is not NULL. + * If preset_dict is not NULL but preset_dict_size is zero, + * no preset dictionary is used (identical to only setting + * preset_dict to NULL). + */ + uint32_t preset_dict_size; + + /** + * \brief Number of literal context bits + * + * How many of the highest bits of the previous uncompressed + * eight-bit byte (also known as `literal') are taken into + * account when predicting the bits of the next literal. + * + * \todo Example + * + * There is a limit that applies to literal context bits and literal + * position bits together: lc + lp <= 4. Without this limit the + * decoding could become very slow, which could have security related + * results in some cases like email servers doing virus scanning. + * This limit also simplifies the internal implementation in liblzma. + * + * There may be LZMA1 streams that have lc + lp > 4 (maximum possible + * lc would be 8). It is not possible to decode such streams with + * liblzma. + */ + uint32_t lc; +# define LZMA_LCLP_MIN 0 +# define LZMA_LCLP_MAX 4 +# define LZMA_LC_DEFAULT 3 + + /** + * \brief Number of literal position bits + * + * How many of the lowest bits of the current position (number + * of bytes from the beginning of the uncompressed data) in the + * uncompressed data is taken into account when predicting the + * bits of the next literal (a single eight-bit byte). + * + * \todo Example + */ + uint32_t lp; +# define LZMA_LP_DEFAULT 0 + + /** + * \brief Number of position bits + * + * How many of the lowest bits of the current position in the + * uncompressed data is taken into account when estimating + * probabilities of matches. A match is a sequence of bytes for + * which a matching sequence is found from the dictionary and + * thus can be stored as distance-length pair. + * + * Example: If most of the matches occur at byte positions of + * 8 * n + 3, that is, 3, 11, 19, ... set pb to 3, because 2**3 == 8. + */ + uint32_t pb; +# define LZMA_PB_MIN 0 +# define LZMA_PB_MAX 4 +# define LZMA_PB_DEFAULT 2 + + /** Compression mode */ + lzma_mode mode; + + /** + * \brief Nice length of a match + * + * This determines how many bytes the encoder compares from the match + * candidates when looking for the best match. Once a match of at + * least nice_len bytes long is found, the encoder stops looking for + * better candidates and encodes the match. (Naturally, if the found + * match is actually longer than nice_len, the actual length is + * encoded; it's not truncated to nice_len.) + * + * Bigger values usually increase the compression ratio and + * compression time. For most files, 32 to 128 is a good value, + * which gives very good compression ratio at good speed. + * + * The exact minimum value depends on the match finder. The maximum + * is 273, which is the maximum length of a match that LZMA1 and + * LZMA2 can encode. + */ + uint32_t nice_len; + + /** Match finder ID */ + lzma_match_finder mf; + + /** + * \brief Maximum search depth in the match finder + * + * For every input byte, match finder searches through the hash chain + * or binary tree in a loop, each iteration going one step deeper in + * the chain or tree. The searching stops if + * - a match of at least nice_len bytes long is found; + * - all match candidates from the hash chain or binary tree have + * been checked; or + * - maximum search depth is reached. + * + * Maximum search depth is needed to prevent the match finder from + * wasting too much time in case there are lots of short match + * candidates. On the other hand, stopping the search before all + * candidates have been checked can reduce compression ratio. + * + * Setting depth to zero tells liblzma to use an automatic default + * value, that depends on the selected match finder and nice_len. + * The default is in the range [10, 200] or so (it may vary between + * liblzma versions). + * + * Using a bigger depth value than the default can increase + * compression ratio in some cases. There is no strict maximum value, + * but high values (thousands or millions) should be used with care: + * the encoder could remain fast enough with typical input, but + * malicious input could cause the match finder to slow down + * dramatically, possibly creating a denial of service attack. + */ + uint32_t depth; + + /* + * Reserved space to allow possible future extensions without + * breaking the ABI. You should not touch these, because the names + * of these variables may change. These are and will never be used + * with the currently supported options, so it is safe to leave these + * uninitialized. + */ + void *reserved_ptr1; + void *reserved_ptr2; + uint32_t reserved_int1; + uint32_t reserved_int2; + uint32_t reserved_int3; + uint32_t reserved_int4; + uint32_t reserved_int5; + uint32_t reserved_int6; + uint32_t reserved_int7; + uint32_t reserved_int8; + lzma_reserved_enum reserved_enum1; + lzma_reserved_enum reserved_enum2; + lzma_reserved_enum reserved_enum3; + lzma_reserved_enum reserved_enum4; + +} lzma_options_lzma; + + +/** + * \brief Set a compression preset to lzma_options_lzma structure + * + * 0 is the fastest and 9 is the slowest. These match the switches -0 .. -9 + * of the xz command line tool. In addition, it is possible to bitwise-or + * flags to the preset. Currently only LZMA_PRESET_EXTREME is supported. + * The flags are defined in container.h, because the flags are used also + * with lzma_easy_encoder(). + * + * The preset values are subject to changes between liblzma versions. + * + * This function is available only if LZMA1 or LZMA2 encoder has been enabled + * when building liblzma. + */ +extern LZMA_API(lzma_bool) lzma_lzma_preset( + lzma_options_lzma *options, uint32_t preset) lzma_nothrow; diff --git a/src/liblzma/api/lzma/stream_flags.h b/src/liblzma/api/lzma/stream_flags.h new file mode 100644 index 000000000000..d255bdda15f6 --- /dev/null +++ b/src/liblzma/api/lzma/stream_flags.h @@ -0,0 +1,227 @@ +/** + * \file lzma/stream_flags.h + * \brief .xz Stream Header and Stream Footer encoder and decoder + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Size of Stream Header and Stream Footer + * + * Stream Header and Stream Footer have the same size and they are not + * going to change even if a newer version of the .xz file format is + * developed in future. + */ +#define LZMA_STREAM_HEADER_SIZE 12 + + +/** + * \brief Options for encoding/decoding Stream Header and Stream Footer + */ +typedef struct { + /** + * \brief Stream Flags format version + * + * To prevent API and ABI breakages if new features are needed in + * Stream Header or Stream Footer, a version number is used to + * indicate which fields in this structure are in use. For now, + * version must always be zero. With non-zero version, the + * lzma_stream_header_encode() and lzma_stream_footer_encode() + * will return LZMA_OPTIONS_ERROR. + * + * lzma_stream_header_decode() and lzma_stream_footer_decode() + * will always set this to the lowest value that supports all the + * features indicated by the Stream Flags field. The application + * must check that the version number set by the decoding functions + * is supported by the application. Otherwise it is possible that + * the application will decode the Stream incorrectly. + */ + uint32_t version; + + /** + * \brief Backward Size + * + * Backward Size must be a multiple of four bytes. In this Stream + * format version, Backward Size is the size of the Index field. + * + * Backward Size isn't actually part of the Stream Flags field, but + * it is convenient to include in this structure anyway. Backward + * Size is present only in the Stream Footer. There is no need to + * initialize backward_size when encoding Stream Header. + * + * lzma_stream_header_decode() always sets backward_size to + * LZMA_VLI_UNKNOWN so that it is convenient to use + * lzma_stream_flags_compare() when both Stream Header and Stream + * Footer have been decoded. + */ + lzma_vli backward_size; +# define LZMA_BACKWARD_SIZE_MIN 4 +# define LZMA_BACKWARD_SIZE_MAX (LZMA_VLI_C(1) << 34) + + /** + * \brief Check ID + * + * This indicates the type of the integrity check calculated from + * uncompressed data. + */ + lzma_check check; + + /* + * Reserved space to allow possible future extensions without + * breaking the ABI. You should not touch these, because the + * names of these variables may change. + * + * (We will never be able to use all of these since Stream Flags + * is just two bytes plus Backward Size of four bytes. But it's + * nice to have the proper types when they are needed.) + */ + lzma_reserved_enum reserved_enum1; + lzma_reserved_enum reserved_enum2; + lzma_reserved_enum reserved_enum3; + lzma_reserved_enum reserved_enum4; + lzma_reserved_enum reserved_enum5; + lzma_reserved_enum reserved_enum6; + lzma_bool reserved_bool1; + lzma_bool reserved_bool2; + lzma_bool reserved_bool3; + lzma_bool reserved_bool4; + lzma_bool reserved_bool5; + lzma_bool reserved_bool6; + lzma_bool reserved_bool7; + lzma_bool reserved_bool8; + uint32_t reserved_int1; + uint32_t reserved_int2; + uint32_t reserved_int3; + uint32_t reserved_int4; + +} lzma_stream_flags; + + +/** + * \brief Encode Stream Header + * + * \param options Stream Header options to be encoded. + * options->backward_size is ignored and doesn't + * need to be initialized. + * \param out Beginning of the output buffer of + * LZMA_STREAM_HEADER_SIZE bytes. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_OPTIONS_ERROR: options->version is not supported by + * this liblzma version. + * - LZMA_PROG_ERROR: Invalid options. + */ +extern LZMA_API(lzma_ret) lzma_stream_header_encode( + const lzma_stream_flags *options, uint8_t *out) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Encode Stream Footer + * + * \param options Stream Footer options to be encoded. + * \param out Beginning of the output buffer of + * LZMA_STREAM_HEADER_SIZE bytes. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_OPTIONS_ERROR: options->version is not supported by + * this liblzma version. + * - LZMA_PROG_ERROR: Invalid options. + */ +extern LZMA_API(lzma_ret) lzma_stream_footer_encode( + const lzma_stream_flags *options, uint8_t *out) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode Stream Header + * + * \param options Stream Header options to be encoded. + * \param in Beginning of the input buffer of + * LZMA_STREAM_HEADER_SIZE bytes. + * + * options->backward_size is always set to LZMA_VLI_UNKNOWN. This is to + * help comparing Stream Flags from Stream Header and Stream Footer with + * lzma_stream_flags_compare(). + * + * \return - LZMA_OK: Decoding was successful. + * - LZMA_FORMAT_ERROR: Magic bytes don't match, thus the given + * buffer cannot be Stream Header. + * - LZMA_DATA_ERROR: CRC32 doesn't match, thus the header + * is corrupt. + * - LZMA_OPTIONS_ERROR: Unsupported options are present + * in the header. + * + * \note When decoding .xz files that contain multiple Streams, it may + * make sense to print "file format not recognized" only if + * decoding of the Stream Header of the _first_ Stream gives + * LZMA_FORMAT_ERROR. If non-first Stream Header gives + * LZMA_FORMAT_ERROR, the message used for LZMA_DATA_ERROR is + * probably more appropriate. + * + * For example, Stream decoder in liblzma uses LZMA_DATA_ERROR if + * LZMA_FORMAT_ERROR is returned by lzma_stream_header_decode() + * when decoding non-first Stream. + */ +extern LZMA_API(lzma_ret) lzma_stream_header_decode( + lzma_stream_flags *options, const uint8_t *in) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode Stream Footer + * + * \param options Stream Header options to be encoded. + * \param in Beginning of the input buffer of + * LZMA_STREAM_HEADER_SIZE bytes. + * + * \return - LZMA_OK: Decoding was successful. + * - LZMA_FORMAT_ERROR: Magic bytes don't match, thus the given + * buffer cannot be Stream Footer. + * - LZMA_DATA_ERROR: CRC32 doesn't match, thus the Stream Footer + * is corrupt. + * - LZMA_OPTIONS_ERROR: Unsupported options are present + * in Stream Footer. + * + * \note If Stream Header was already decoded successfully, but + * decoding Stream Footer returns LZMA_FORMAT_ERROR, the + * application should probably report some other error message + * than "file format not recognized", since the file more likely + * is corrupt (possibly truncated). Stream decoder in liblzma + * uses LZMA_DATA_ERROR in this situation. + */ +extern LZMA_API(lzma_ret) lzma_stream_footer_decode( + lzma_stream_flags *options, const uint8_t *in) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Compare two lzma_stream_flags structures + * + * backward_size values are compared only if both are not + * LZMA_VLI_UNKNOWN. + * + * \return - LZMA_OK: Both are equal. If either had backward_size set + * to LZMA_VLI_UNKNOWN, backward_size values were not + * compared or validated. + * - LZMA_DATA_ERROR: The structures differ. + * - LZMA_OPTIONS_ERROR: version in either structure is greater + * than the maximum supported version (currently zero). + * - LZMA_PROG_ERROR: Invalid value, e.g. invalid check or + * backward_size. + */ +extern LZMA_API(lzma_ret) lzma_stream_flags_compare( + const lzma_stream_flags *a, const lzma_stream_flags *b) + lzma_nothrow lzma_attr_pure; diff --git a/src/liblzma/api/lzma/subblock.h b/src/liblzma/api/lzma/subblock.h new file mode 100644 index 000000000000..4ffb049824b7 --- /dev/null +++ b/src/liblzma/api/lzma/subblock.h @@ -0,0 +1,200 @@ +/** + * \file lzma/subblock.h + * \brief Subblock filter + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Filter ID + * + * Filter ID of the Subblock filter. This is used as lzma_filter.id. + */ +#define LZMA_FILTER_SUBBLOCK LZMA_VLI_C(0x01) + + +/** + * \brief Subfilter mode + * + * See lzma_options_subblock.subfilter_mode for details. + */ +typedef enum { + LZMA_SUBFILTER_NONE, + /**< + * No Subfilter is in use. + */ + + LZMA_SUBFILTER_SET, + /**< + * New Subfilter has been requested to be initialized. + */ + + LZMA_SUBFILTER_RUN, + /**< + * Subfilter is active. + */ + + LZMA_SUBFILTER_FINISH + /**< + * Subfilter has been requested to be finished. + */ +} lzma_subfilter_mode; + + +/** + * \brief Options for the Subblock filter + * + * Specifying options for the Subblock filter is optional: if the pointer + * options is NULL, no subfilters are allowed and the default value is used + * for subblock_data_size. + */ +typedef struct { + /* Options for encoder and decoder */ + + /** + * \brief Allowing subfilters + * + * If this true, subfilters are allowed. + * + * In the encoder, if this is set to false, subfilter_mode and + * subfilter_options are completely ignored. + */ + lzma_bool allow_subfilters; + + /* Options for encoder only */ + + /** + * \brief Alignment + * + * The Subblock filter encapsulates the input data into Subblocks. + * Each Subblock has a header which takes a few bytes of space. + * When the output of the Subblock encoder is fed to another filter + * that takes advantage of the alignment of the input data (e.g. LZMA), + * the Subblock filter can add padding to keep the actual data parts + * in the Subblocks aligned correctly. + * + * The alignment should be a positive integer. Subblock filter will + * add enough padding between Subblocks so that this is true for + * every payload byte: + * input_offset % alignment == output_offset % alignment + * + * The Subblock filter assumes that the first output byte will be + * written to a position in the output stream that is properly + * aligned. This requirement is automatically met when the start + * offset of the Stream or Block is correctly told to Block or + * Stream encoder. + */ + uint32_t alignment; +# define LZMA_SUBBLOCK_ALIGNMENT_MIN 1 +# define LZMA_SUBBLOCK_ALIGNMENT_MAX 32 +# define LZMA_SUBBLOCK_ALIGNMENT_DEFAULT 4 + + /** + * \brief Size of the Subblock Data part of each Subblock + * + * This value is re-read every time a new Subblock is started. + * + * Bigger values + * - save a few bytes of space; + * - increase latency in the encoder (but no effect for decoding); + * - decrease memory locality (increased cache pollution) in the + * encoder (no effect in decoding). + */ + uint32_t subblock_data_size; +# define LZMA_SUBBLOCK_DATA_SIZE_MIN 1 +# define LZMA_SUBBLOCK_DATA_SIZE_MAX (UINT32_C(1) << 28) +# define LZMA_SUBBLOCK_DATA_SIZE_DEFAULT 4096 + + /** + * \brief Run-length encoder remote control + * + * The Subblock filter has an internal run-length encoder (RLE). It + * can be useful when the data includes byte sequences that repeat + * very many times. The RLE can be used also when a Subfilter is + * in use; the RLE will be applied to the output of the Subfilter. + * + * Note that in contrast to traditional RLE, this RLE is intended to + * be used only when there's a lot of data to be repeated. If the + * input data has e.g. 500 bytes of NULs now and then, this RLE + * is probably useless, because plain LZMA should provide better + * results. + * + * Due to above reasons, it was decided to keep the implementation + * of the RLE very simple. When the rle variable is non-zero, it + * subblock_data_size must be a multiple of rle. Once the Subblock + * encoder has got subblock_data_size bytes of input, it will check + * if the whole buffer of the last subblock_data_size can be + * represented with repeats of chunks having size of rle bytes. + * + * If there are consecutive identical buffers of subblock_data_size + * bytes, they will be encoded using a single repeat entry if + * possible. + * + * If need arises, more advanced RLE can be implemented later + * without breaking API or ABI. + */ + uint32_t rle; +# define LZMA_SUBBLOCK_RLE_OFF 0 +# define LZMA_SUBBLOCK_RLE_MIN 1 +# define LZMA_SUBBLOCK_RLE_MAX 256 + + /** + * \brief Subfilter remote control + * + * When the Subblock filter is initialized, this variable must be + * LZMA_SUBFILTER_NONE or LZMA_SUBFILTER_SET. + * + * When subfilter_mode is LZMA_SUBFILTER_NONE, the application may + * put Subfilter options to subfilter_options structure, and then + * set subfilter_mode to LZMA_SUBFILTER_SET. No new input data will + * be read until the Subfilter has been enabled. Once the Subfilter + * has been enabled, liblzma will set subfilter_mode to + * LZMA_SUBFILTER_RUN. + * + * When subfilter_mode is LZMA_SUBFILTER_RUN, the application may + * set subfilter_mode to LZMA_SUBFILTER_FINISH. All the input + * currently available will be encoded before unsetting the + * Subfilter. Application must not change the amount of available + * input until the Subfilter has finished. Once the Subfilter has + * finished, liblzma will set subfilter_mode to LZMA_SUBFILTER_NONE. + * + * If the intent is to have Subfilter enabled to the very end of + * the data, it is not needed to separately disable Subfilter with + * LZMA_SUBFILTER_FINISH. Using LZMA_FINISH as the second argument + * of lzma_code() will make the Subblock encoder to disable the + * Subfilter once all the data has been ran through the Subfilter. + * + * After the first call with LZMA_SYNC_FLUSH or LZMA_FINISH, the + * application must not change subfilter_mode until LZMA_STREAM_END. + * Setting LZMA_SUBFILTER_SET/LZMA_SUBFILTER_FINISH and + * LZMA_SYNC_FLUSH/LZMA_FINISH _at the same time_ is fine. + * + * \note This variable is ignored if allow_subfilters is false. + */ + lzma_subfilter_mode subfilter_mode; + + /** + * \brief Subfilter and its options + * + * When no Subfilter is used, the data is copied as is into Subblocks. + * Setting a Subfilter allows encoding some parts of the data with + * an additional filter. It is possible to many different Subfilters + * in the same Block, although only one can be used at once. + * + * \note This variable is ignored if allow_subfilters is false. + */ + lzma_filter subfilter_options; + +} lzma_options_subblock; diff --git a/src/liblzma/api/lzma/version.h b/src/liblzma/api/lzma/version.h new file mode 100644 index 000000000000..0f7c2d3737db --- /dev/null +++ b/src/liblzma/api/lzma/version.h @@ -0,0 +1,121 @@ +/** + * \file lzma/version.h + * \brief Version number + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/* + * Version number split into components + */ +#define LZMA_VERSION_MAJOR 4 +#define LZMA_VERSION_MINOR 999 +#define LZMA_VERSION_PATCH 9 +#define LZMA_VERSION_STABILITY LZMA_VERSION_STABILITY_BETA + +#ifndef LZMA_VERSION_COMMIT +# define LZMA_VERSION_COMMIT "" +#endif + + +/* + * Map symbolic stability levels to integers. + */ +#define LZMA_VERSION_STABILITY_ALPHA 0 +#define LZMA_VERSION_STABILITY_BETA 1 +#define LZMA_VERSION_STABILITY_STABLE 2 + + +/** + * \brief Compile-time version number + * + * The version number is of format xyyyzzzs where + * - x = major + * - yyy = minor + * - zzz = revision + * - s indicates stability: 0 = alpha, 1 = beta, 2 = stable + * + * The same xyyyzzz triplet is never reused with different stability levels. + * For example, if 5.1.0alpha has been released, there will never be 5.1.0beta + * or 5.1.0 stable. + * + * \note The version number of liblzma has nothing to with + * the version number of Igor Pavlov's LZMA SDK. + */ +#define LZMA_VERSION (LZMA_VERSION_MAJOR * UINT32_C(10000000) \ + + LZMA_VERSION_MINOR * UINT32_C(10000) \ + + LZMA_VERSION_PATCH * UINT32_C(10) \ + + LZMA_VERSION_STABILITY) + + +/* + * Macros to construct the compile-time version string + */ +#if LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_ALPHA +# define LZMA_VERSION_STABILITY_STRING "alpha" +#elif LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_BETA +# define LZMA_VERSION_STABILITY_STRING "beta" +#elif LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_STABLE +# define LZMA_VERSION_STABILITY_STRING "" +#else +# error Incorrect LZMA_VERSION_STABILITY +#endif + +#define LZMA_VERSION_STRING_C_(major, minor, patch, stability, commit) \ + #major "." #minor "." #patch stability commit + +#define LZMA_VERSION_STRING_C(major, minor, patch, stability, commit) \ + LZMA_VERSION_STRING_C_(major, minor, patch, stability, commit) + + +/** + * \brief Compile-time version as a string + * + * This can be for example "4.999.5alpha", "4.999.8beta", or "5.0.0" (stable + * versions don't have any "stable" suffix). In future, a snapshot built + * from source code repository may include an additional suffix, for example + * "4.999.8beta-21-g1d92". The commit ID won't be available in numeric form + * in LZMA_VERSION macro. + */ +#define LZMA_VERSION_STRING LZMA_VERSION_STRING_C( \ + LZMA_VERSION_MAJOR, LZMA_VERSION_MINOR, \ + LZMA_VERSION_PATCH, LZMA_VERSION_STABILITY_STRING, \ + LZMA_VERSION_COMMIT) + + +/* #ifndef is needed for use with windres (MinGW or Cygwin). */ +#ifndef LZMA_H_INTERNAL_RC + +/** + * \brief Run-time version number as an integer + * + * Return the value of LZMA_VERSION macro at the compile time of liblzma. + * This allows the application to compare if it was built against the same, + * older, or newer version of liblzma that is currently running. + */ +extern LZMA_API(uint32_t) lzma_version_number(void) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Run-time version as a string + * + * This function may be useful if you want to display which version of + * liblzma your application is currently using. + */ +extern LZMA_API(const char *) lzma_version_string(void) + lzma_nothrow lzma_attr_const; + +#endif diff --git a/src/liblzma/api/lzma/vli.h b/src/liblzma/api/lzma/vli.h new file mode 100644 index 000000000000..8d4277f71227 --- /dev/null +++ b/src/liblzma/api/lzma/vli.h @@ -0,0 +1,168 @@ +/** + * \file lzma/vli.h + * \brief Variable-length integer handling + * + * In the .xz format, most integers are encoded in a variable-length + * representation, which is sometimes called little endian base-128 encoding. + * This saves space when smaller values are more likely than bigger values. + * + * The encoding scheme encodes seven bits to every byte, using minimum + * number of bytes required to represent the given value. Encodings that use + * non-minimum number of bytes are invalid, thus every integer has exactly + * one encoded representation. The maximum number of bits in a VLI is 63, + * thus the vli argument must be less than or equal to UINT64_MAX / 2. You + * should use LZMA_VLI_MAX for clarity. + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Maximum supported value of variable-length integer + */ +#define LZMA_VLI_MAX (UINT64_MAX / 2) + +/** + * \brief VLI value to denote that the value is unknown + */ +#define LZMA_VLI_UNKNOWN UINT64_MAX + +/** + * \brief Maximum supported length of variable length integers + */ +#define LZMA_VLI_BYTES_MAX 9 + + +/** + * \brief VLI constant suffix + */ +#define LZMA_VLI_C(n) UINT64_C(n) + + +/** + * \brief Variable-length integer type + * + * This will always be unsigned integer. Valid VLI values are in the range + * [0, LZMA_VLI_MAX]. Unknown value is indicated with LZMA_VLI_UNKNOWN, + * which is the maximum value of the underlaying integer type. + * + * In future, even if lzma_vli is defined to be something other than uint64_t, + * it is guaranteed that 2 * LZMA_VLI_MAX will not overflow lzma_vli. + * This simplifies integer overflow detection. + */ +typedef uint64_t lzma_vli; + + +/** + * \brief Simple macro to validate variable-length integer + * + * This is useful to test that application has given acceptable values + * for example in the uncompressed_size and compressed_size variables. + * + * \return True if the integer is representable as VLI or if it + * indicates unknown value. + */ +#define lzma_vli_is_valid(vli) \ + ((vli) <= LZMA_VLI_MAX || (vli) == LZMA_VLI_UNKNOWN) + + +/** + * \brief Encode a variable-length integer + * + * This function has two modes: single-call and multi-call. Single-call mode + * encodes the whole integer at once; it is an error if the output buffer is + * too small. Multi-call mode saves the position in *vli_pos, and thus it is + * possible to continue encoding if the buffer becomes full before the whole + * integer has been encoded. + * + * \param vli Integer to be encoded + * \param vli_pos How many VLI-encoded bytes have already been written + * out. When starting to encode a new integer, *vli_pos + * must be set to zero. To use single-call encoding, + * set vli_pos to NULL. + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return Slightly different return values are used in multi-call and + * single-call modes. + * + * Single-call (vli_pos == NULL): + * - LZMA_OK: Integer successfully encoded. + * - LZMA_PROG_ERROR: Arguments are not sane. This can be due + * to too little output space; single-call mode doesn't use + * LZMA_BUF_ERROR, since the application should have checked + * the encoded size with lzma_vli_size(). + * + * Multi-call (vli_pos != NULL): + * - LZMA_OK: So far all OK, but the integer is not + * completely written out yet. + * - LZMA_STREAM_END: Integer successfully encoded. + * - LZMA_BUF_ERROR: No output space was provided. + * - LZMA_PROG_ERROR: Arguments are not sane. + */ +extern LZMA_API(lzma_ret) lzma_vli_encode(lzma_vli vli, + size_t *vli_pos, uint8_t *lzma_restrict out, + size_t *lzma_restrict out_pos, size_t out_size) lzma_nothrow; + + +/** + * \brief Decode a variable-length integer + * + * Like lzma_vli_encode(), this function has single-call and multi-call modes. + * + * \param vli Pointer to decoded integer. The decoder will + * initialize it to zero when *vli_pos == 0, so + * application isn't required to initialize *vli. + * \param vli_pos How many bytes have already been decoded. When + * starting to decode a new integer, *vli_pos must + * be initialized to zero. To use single-call decoding, + * set this to NULL. + * \param in Beginning of the input buffer + * \param in_pos The next byte will be read from in[*in_pos]. + * \param in_size Size of the input buffer; the first byte that + * won't be read is in[in_size]. + * + * \return Slightly different return values are used in multi-call and + * single-call modes. + * + * Single-call (vli_pos == NULL): + * - LZMA_OK: Integer successfully decoded. + * - LZMA_DATA_ERROR: Integer is corrupt. This includes hitting + * the end of the input buffer before the whole integer was + * decoded; providing no input at all will use LZMA_DATA_ERROR. + * - LZMA_PROG_ERROR: Arguments are not sane. + * + * Multi-call (vli_pos != NULL): + * - LZMA_OK: So far all OK, but the integer is not + * completely decoded yet. + * - LZMA_STREAM_END: Integer successfully decoded. + * - LZMA_DATA_ERROR: Integer is corrupt. + * - LZMA_BUF_ERROR: No input was provided. + * - LZMA_PROG_ERROR: Arguments are not sane. + */ +extern LZMA_API(lzma_ret) lzma_vli_decode(lzma_vli *lzma_restrict vli, + size_t *vli_pos, const uint8_t *lzma_restrict in, + size_t *lzma_restrict in_pos, size_t in_size) lzma_nothrow; + + +/** + * \brief Get the number of bytes required to encode a VLI + * + * \return Number of bytes on success (1-9). If vli isn't valid, + * zero is returned. + */ +extern LZMA_API(uint32_t) lzma_vli_size(lzma_vli vli) + lzma_nothrow lzma_attr_pure; diff --git a/src/liblzma/check/check.c b/src/liblzma/check/check.c new file mode 100644 index 000000000000..428ddaeb7798 --- /dev/null +++ b/src/liblzma/check/check.c @@ -0,0 +1,174 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file check.c +/// \brief Single API to access different integrity checks +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "check.h" + + +extern LZMA_API(lzma_bool) +lzma_check_is_supported(lzma_check type) +{ + if ((unsigned int)(type) > LZMA_CHECK_ID_MAX) + return false; + + static const lzma_bool available_checks[LZMA_CHECK_ID_MAX + 1] = { + true, // LZMA_CHECK_NONE + +#ifdef HAVE_CHECK_CRC32 + true, +#else + false, +#endif + + false, // Reserved + false, // Reserved + +#ifdef HAVE_CHECK_CRC64 + true, +#else + false, +#endif + + false, // Reserved + false, // Reserved + false, // Reserved + false, // Reserved + false, // Reserved + +#ifdef HAVE_CHECK_SHA256 + true, +#else + false, +#endif + + false, // Reserved + false, // Reserved + false, // Reserved + false, // Reserved + false, // Reserved + }; + + return available_checks[(unsigned int)(type)]; +} + + +extern LZMA_API(uint32_t) +lzma_check_size(lzma_check type) +{ + if ((unsigned int)(type) > LZMA_CHECK_ID_MAX) + return UINT32_MAX; + + // See file-format.txt section 2.1.1.2. + static const uint8_t check_sizes[LZMA_CHECK_ID_MAX + 1] = { + 0, + 4, 4, 4, + 8, 8, 8, + 16, 16, 16, + 32, 32, 32, + 64, 64, 64 + }; + + return check_sizes[(unsigned int)(type)]; +} + + +extern void +lzma_check_init(lzma_check_state *check, lzma_check type) +{ + switch (type) { + case LZMA_CHECK_NONE: + break; + +#ifdef HAVE_CHECK_CRC32 + case LZMA_CHECK_CRC32: + check->state.crc32 = 0; + break; +#endif + +#ifdef HAVE_CHECK_CRC64 + case LZMA_CHECK_CRC64: + check->state.crc64 = 0; + break; +#endif + +#ifdef HAVE_CHECK_SHA256 + case LZMA_CHECK_SHA256: + lzma_sha256_init(check); + break; +#endif + + default: + break; + } + + return; +} + + +extern void +lzma_check_update(lzma_check_state *check, lzma_check type, + const uint8_t *buf, size_t size) +{ + switch (type) { +#ifdef HAVE_CHECK_CRC32 + case LZMA_CHECK_CRC32: + check->state.crc32 = lzma_crc32(buf, size, check->state.crc32); + break; +#endif + +#ifdef HAVE_CHECK_CRC64 + case LZMA_CHECK_CRC64: + check->state.crc64 = lzma_crc64(buf, size, check->state.crc64); + break; +#endif + +#ifdef HAVE_CHECK_SHA256 + case LZMA_CHECK_SHA256: + lzma_sha256_update(buf, size, check); + break; +#endif + + default: + break; + } + + return; +} + + +extern void +lzma_check_finish(lzma_check_state *check, lzma_check type) +{ + switch (type) { +#ifdef HAVE_CHECK_CRC32 + case LZMA_CHECK_CRC32: + check->buffer.u32[0] = conv32le(check->state.crc32); + break; +#endif + +#ifdef HAVE_CHECK_CRC64 + case LZMA_CHECK_CRC64: + check->buffer.u64[0] = conv64le(check->state.crc64); + break; +#endif + +#ifdef HAVE_CHECK_SHA256 + case LZMA_CHECK_SHA256: + lzma_sha256_finish(check); + break; +#endif + + default: + break; + } + + return; +} diff --git a/src/liblzma/check/check.h b/src/liblzma/check/check.h new file mode 100644 index 000000000000..e100d2b85303 --- /dev/null +++ b/src/liblzma/check/check.h @@ -0,0 +1,95 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file check.h +/// \brief Internal API to different integrity check functions +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_CHECK_H +#define LZMA_CHECK_H + +#include "common.h" + + +// Index hashing needs the best possible hash function (preferably +// a cryptographic hash) for maximum reliability. +#if defined(HAVE_CHECK_SHA256) +# define LZMA_CHECK_BEST LZMA_CHECK_SHA256 +#elif defined(HAVE_CHECK_CRC64) +# define LZMA_CHECK_BEST LZMA_CHECK_CRC64 +#else +# define LZMA_CHECK_BEST LZMA_CHECK_CRC32 +#endif + + +/// \brief Structure to hold internal state of the check being calculated +/// +/// \note This is not in the public API because this structure may +/// change in future if new integrity check algorithms are added. +typedef struct { + /// Buffer to hold the final result and a temporary buffer for SHA256. + union { + uint8_t u8[64]; + uint32_t u32[16]; + uint64_t u64[8]; + } buffer; + + /// Check-specific data + union { + uint32_t crc32; + uint64_t crc64; + + struct { + /// Internal state + uint32_t state[8]; + + /// Size of the message excluding padding + uint64_t size; + } sha256; + } state; + +} lzma_check_state; + + +/// lzma_crc32_table[0] is needed by LZ encoder so we need to keep +/// the array two-dimensional. +#ifdef HAVE_SMALL +extern uint32_t lzma_crc32_table[1][256]; +extern void lzma_crc32_init(void); +#else +extern const uint32_t lzma_crc32_table[8][256]; +extern const uint64_t lzma_crc64_table[4][256]; +#endif + + +/// \brief Initialize *check depending on type +/// +/// \return LZMA_OK on success. LZMA_UNSUPPORTED_CHECK if the type is not +/// supported by the current version or build of liblzma. +/// LZMA_PROG_ERROR if type > LZMA_CHECK_ID_MAX. +extern void lzma_check_init(lzma_check_state *check, lzma_check type); + +/// Update the check state +extern void lzma_check_update(lzma_check_state *check, lzma_check type, + const uint8_t *buf, size_t size); + +/// Finish the check calculation and store the result to check->buffer.u8. +extern void lzma_check_finish(lzma_check_state *check, lzma_check type); + + +/// Prepare SHA-256 state for new input. +extern void lzma_sha256_init(lzma_check_state *check); + +/// Update the SHA-256 hash state +extern void lzma_sha256_update( + const uint8_t *buf, size_t size, lzma_check_state *check); + +/// Finish the SHA-256 calculation and store the result to check->buffer.u8. +extern void lzma_sha256_finish(lzma_check_state *check); + +#endif diff --git a/src/liblzma/check/crc32_fast.c b/src/liblzma/check/crc32_fast.c new file mode 100644 index 000000000000..94da85592d8a --- /dev/null +++ b/src/liblzma/check/crc32_fast.c @@ -0,0 +1,82 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file crc32.c +/// \brief CRC32 calculation +/// +/// Calculate the CRC32 using the slice-by-eight algorithm. +/// It is explained in this document: +/// http://www.intel.com/technology/comms/perfnet/download/CRC_generators.pdf +/// The code in this file is not the same as in Intel's paper, but +/// the basic principle is identical. +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "check.h" +#include "crc_macros.h" + + +// If you make any changes, do some bench marking! Seemingly unrelated +// changes can very easily ruin the performance (and very probably is +// very compiler dependent). +extern LZMA_API(uint32_t) +lzma_crc32(const uint8_t *buf, size_t size, uint32_t crc) +{ + crc = ~crc; + +#ifdef WORDS_BIGENDIAN + crc = bswap32(crc); +#endif + + if (size > 8) { + // Fix the alignment, if needed. The if statement above + // ensures that this won't read past the end of buf[]. + while ((uintptr_t)(buf) & 7) { + crc = lzma_crc32_table[0][*buf++ ^ A(crc)] ^ S8(crc); + --size; + } + + // Calculate the position where to stop. + const uint8_t *const limit = buf + (size & ~(size_t)(7)); + + // Calculate how many bytes must be calculated separately + // before returning the result. + size &= (size_t)(7); + + // Calculate the CRC32 using the slice-by-eight algorithm. + while (buf < limit) { + crc ^= *(const uint32_t *)(buf); + buf += 4; + + crc = lzma_crc32_table[7][A(crc)] + ^ lzma_crc32_table[6][B(crc)] + ^ lzma_crc32_table[5][C(crc)] + ^ lzma_crc32_table[4][D(crc)]; + + const uint32_t tmp = *(const uint32_t *)(buf); + buf += 4; + + // At least with some compilers, it is critical for + // performance, that the crc variable is XORed + // between the two table-lookup pairs. + crc = lzma_crc32_table[3][A(tmp)] + ^ lzma_crc32_table[2][B(tmp)] + ^ crc + ^ lzma_crc32_table[1][C(tmp)] + ^ lzma_crc32_table[0][D(tmp)]; + } + } + + while (size-- != 0) + crc = lzma_crc32_table[0][*buf++ ^ A(crc)] ^ S8(crc); + +#ifdef WORDS_BIGENDIAN + crc = bswap32(crc); +#endif + + return ~crc; +} diff --git a/src/liblzma/check/crc32_small.c b/src/liblzma/check/crc32_small.c new file mode 100644 index 000000000000..5f8a32868782 --- /dev/null +++ b/src/liblzma/check/crc32_small.c @@ -0,0 +1,61 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file crc32_small.c +/// \brief CRC32 calculation (size-optimized) +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "check.h" + + +uint32_t lzma_crc32_table[1][256]; + + +static void +crc32_init(void) +{ + static const uint32_t poly32 = UINT32_C(0xEDB88320); + + for (size_t b = 0; b < 256; ++b) { + uint32_t r = b; + for (size_t i = 0; i < 8; ++i) { + if (r & 1) + r = (r >> 1) ^ poly32; + else + r >>= 1; + } + + lzma_crc32_table[0][b] = r; + } + + return; +} + + +extern void +lzma_crc32_init(void) +{ + mythread_once(crc32_init); + return; +} + + +extern LZMA_API(uint32_t) +lzma_crc32(const uint8_t *buf, size_t size, uint32_t crc) +{ + lzma_crc32_init(); + + crc = ~crc; + + while (size != 0) { + crc = lzma_crc32_table[0][*buf++ ^ (crc & 0xFF)] ^ (crc >> 8); + --size; + } + + return ~crc; +} diff --git a/src/liblzma/check/crc32_table.c b/src/liblzma/check/crc32_table.c new file mode 100644 index 000000000000..368874eb79d4 --- /dev/null +++ b/src/liblzma/check/crc32_table.c @@ -0,0 +1,19 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file crc32_table.c +/// \brief Precalculated CRC32 table with correct endianness +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + +#ifdef WORDS_BIGENDIAN +# include "crc32_table_be.h" +#else +# include "crc32_table_le.h" +#endif diff --git a/src/liblzma/check/crc32_table_be.h b/src/liblzma/check/crc32_table_be.h new file mode 100644 index 000000000000..c483cb670dcb --- /dev/null +++ b/src/liblzma/check/crc32_table_be.h @@ -0,0 +1,525 @@ +/* This file has been automatically generated by crc32_tablegen.c. */ + +const uint32_t lzma_crc32_table[8][256] = { + { + 0x00000000, 0x96300777, 0x2C610EEE, 0xBA510999, + 0x19C46D07, 0x8FF46A70, 0x35A563E9, 0xA395649E, + 0x3288DB0E, 0xA4B8DC79, 0x1EE9D5E0, 0x88D9D297, + 0x2B4CB609, 0xBD7CB17E, 0x072DB8E7, 0x911DBF90, + 0x6410B71D, 0xF220B06A, 0x4871B9F3, 0xDE41BE84, + 0x7DD4DA1A, 0xEBE4DD6D, 0x51B5D4F4, 0xC785D383, + 0x56986C13, 0xC0A86B64, 0x7AF962FD, 0xECC9658A, + 0x4F5C0114, 0xD96C0663, 0x633D0FFA, 0xF50D088D, + 0xC8206E3B, 0x5E10694C, 0xE44160D5, 0x727167A2, + 0xD1E4033C, 0x47D4044B, 0xFD850DD2, 0x6BB50AA5, + 0xFAA8B535, 0x6C98B242, 0xD6C9BBDB, 0x40F9BCAC, + 0xE36CD832, 0x755CDF45, 0xCF0DD6DC, 0x593DD1AB, + 0xAC30D926, 0x3A00DE51, 0x8051D7C8, 0x1661D0BF, + 0xB5F4B421, 0x23C4B356, 0x9995BACF, 0x0FA5BDB8, + 0x9EB80228, 0x0888055F, 0xB2D90CC6, 0x24E90BB1, + 0x877C6F2F, 0x114C6858, 0xAB1D61C1, 0x3D2D66B6, + 0x9041DC76, 0x0671DB01, 0xBC20D298, 0x2A10D5EF, + 0x8985B171, 0x1FB5B606, 0xA5E4BF9F, 0x33D4B8E8, + 0xA2C90778, 0x34F9000F, 0x8EA80996, 0x18980EE1, + 0xBB0D6A7F, 0x2D3D6D08, 0x976C6491, 0x015C63E6, + 0xF4516B6B, 0x62616C1C, 0xD8306585, 0x4E0062F2, + 0xED95066C, 0x7BA5011B, 0xC1F40882, 0x57C40FF5, + 0xC6D9B065, 0x50E9B712, 0xEAB8BE8B, 0x7C88B9FC, + 0xDF1DDD62, 0x492DDA15, 0xF37CD38C, 0x654CD4FB, + 0x5861B24D, 0xCE51B53A, 0x7400BCA3, 0xE230BBD4, + 0x41A5DF4A, 0xD795D83D, 0x6DC4D1A4, 0xFBF4D6D3, + 0x6AE96943, 0xFCD96E34, 0x468867AD, 0xD0B860DA, + 0x732D0444, 0xE51D0333, 0x5F4C0AAA, 0xC97C0DDD, + 0x3C710550, 0xAA410227, 0x10100BBE, 0x86200CC9, + 0x25B56857, 0xB3856F20, 0x09D466B9, 0x9FE461CE, + 0x0EF9DE5E, 0x98C9D929, 0x2298D0B0, 0xB4A8D7C7, + 0x173DB359, 0x810DB42E, 0x3B5CBDB7, 0xAD6CBAC0, + 0x2083B8ED, 0xB6B3BF9A, 0x0CE2B603, 0x9AD2B174, + 0x3947D5EA, 0xAF77D29D, 0x1526DB04, 0x8316DC73, + 0x120B63E3, 0x843B6494, 0x3E6A6D0D, 0xA85A6A7A, + 0x0BCF0EE4, 0x9DFF0993, 0x27AE000A, 0xB19E077D, + 0x44930FF0, 0xD2A30887, 0x68F2011E, 0xFEC20669, + 0x5D5762F7, 0xCB676580, 0x71366C19, 0xE7066B6E, + 0x761BD4FE, 0xE02BD389, 0x5A7ADA10, 0xCC4ADD67, + 0x6FDFB9F9, 0xF9EFBE8E, 0x43BEB717, 0xD58EB060, + 0xE8A3D6D6, 0x7E93D1A1, 0xC4C2D838, 0x52F2DF4F, + 0xF167BBD1, 0x6757BCA6, 0xDD06B53F, 0x4B36B248, + 0xDA2B0DD8, 0x4C1B0AAF, 0xF64A0336, 0x607A0441, + 0xC3EF60DF, 0x55DF67A8, 0xEF8E6E31, 0x79BE6946, + 0x8CB361CB, 0x1A8366BC, 0xA0D26F25, 0x36E26852, + 0x95770CCC, 0x03470BBB, 0xB9160222, 0x2F260555, + 0xBE3BBAC5, 0x280BBDB2, 0x925AB42B, 0x046AB35C, + 0xA7FFD7C2, 0x31CFD0B5, 0x8B9ED92C, 0x1DAEDE5B, + 0xB0C2649B, 0x26F263EC, 0x9CA36A75, 0x0A936D02, + 0xA906099C, 0x3F360EEB, 0x85670772, 0x13570005, + 0x824ABF95, 0x147AB8E2, 0xAE2BB17B, 0x381BB60C, + 0x9B8ED292, 0x0DBED5E5, 0xB7EFDC7C, 0x21DFDB0B, + 0xD4D2D386, 0x42E2D4F1, 0xF8B3DD68, 0x6E83DA1F, + 0xCD16BE81, 0x5B26B9F6, 0xE177B06F, 0x7747B718, + 0xE65A0888, 0x706A0FFF, 0xCA3B0666, 0x5C0B0111, + 0xFF9E658F, 0x69AE62F8, 0xD3FF6B61, 0x45CF6C16, + 0x78E20AA0, 0xEED20DD7, 0x5483044E, 0xC2B30339, + 0x612667A7, 0xF71660D0, 0x4D476949, 0xDB776E3E, + 0x4A6AD1AE, 0xDC5AD6D9, 0x660BDF40, 0xF03BD837, + 0x53AEBCA9, 0xC59EBBDE, 0x7FCFB247, 0xE9FFB530, + 0x1CF2BDBD, 0x8AC2BACA, 0x3093B353, 0xA6A3B424, + 0x0536D0BA, 0x9306D7CD, 0x2957DE54, 0xBF67D923, + 0x2E7A66B3, 0xB84A61C4, 0x021B685D, 0x942B6F2A, + 0x37BE0BB4, 0xA18E0CC3, 0x1BDF055A, 0x8DEF022D + }, { + 0x00000000, 0x41311B19, 0x82623632, 0xC3532D2B, + 0x04C56C64, 0x45F4777D, 0x86A75A56, 0xC796414F, + 0x088AD9C8, 0x49BBC2D1, 0x8AE8EFFA, 0xCBD9F4E3, + 0x0C4FB5AC, 0x4D7EAEB5, 0x8E2D839E, 0xCF1C9887, + 0x5112C24A, 0x1023D953, 0xD370F478, 0x9241EF61, + 0x55D7AE2E, 0x14E6B537, 0xD7B5981C, 0x96848305, + 0x59981B82, 0x18A9009B, 0xDBFA2DB0, 0x9ACB36A9, + 0x5D5D77E6, 0x1C6C6CFF, 0xDF3F41D4, 0x9E0E5ACD, + 0xA2248495, 0xE3159F8C, 0x2046B2A7, 0x6177A9BE, + 0xA6E1E8F1, 0xE7D0F3E8, 0x2483DEC3, 0x65B2C5DA, + 0xAAAE5D5D, 0xEB9F4644, 0x28CC6B6F, 0x69FD7076, + 0xAE6B3139, 0xEF5A2A20, 0x2C09070B, 0x6D381C12, + 0xF33646DF, 0xB2075DC6, 0x715470ED, 0x30656BF4, + 0xF7F32ABB, 0xB6C231A2, 0x75911C89, 0x34A00790, + 0xFBBC9F17, 0xBA8D840E, 0x79DEA925, 0x38EFB23C, + 0xFF79F373, 0xBE48E86A, 0x7D1BC541, 0x3C2ADE58, + 0x054F79F0, 0x447E62E9, 0x872D4FC2, 0xC61C54DB, + 0x018A1594, 0x40BB0E8D, 0x83E823A6, 0xC2D938BF, + 0x0DC5A038, 0x4CF4BB21, 0x8FA7960A, 0xCE968D13, + 0x0900CC5C, 0x4831D745, 0x8B62FA6E, 0xCA53E177, + 0x545DBBBA, 0x156CA0A3, 0xD63F8D88, 0x970E9691, + 0x5098D7DE, 0x11A9CCC7, 0xD2FAE1EC, 0x93CBFAF5, + 0x5CD76272, 0x1DE6796B, 0xDEB55440, 0x9F844F59, + 0x58120E16, 0x1923150F, 0xDA703824, 0x9B41233D, + 0xA76BFD65, 0xE65AE67C, 0x2509CB57, 0x6438D04E, + 0xA3AE9101, 0xE29F8A18, 0x21CCA733, 0x60FDBC2A, + 0xAFE124AD, 0xEED03FB4, 0x2D83129F, 0x6CB20986, + 0xAB2448C9, 0xEA1553D0, 0x29467EFB, 0x687765E2, + 0xF6793F2F, 0xB7482436, 0x741B091D, 0x352A1204, + 0xF2BC534B, 0xB38D4852, 0x70DE6579, 0x31EF7E60, + 0xFEF3E6E7, 0xBFC2FDFE, 0x7C91D0D5, 0x3DA0CBCC, + 0xFA368A83, 0xBB07919A, 0x7854BCB1, 0x3965A7A8, + 0x4B98833B, 0x0AA99822, 0xC9FAB509, 0x88CBAE10, + 0x4F5DEF5F, 0x0E6CF446, 0xCD3FD96D, 0x8C0EC274, + 0x43125AF3, 0x022341EA, 0xC1706CC1, 0x804177D8, + 0x47D73697, 0x06E62D8E, 0xC5B500A5, 0x84841BBC, + 0x1A8A4171, 0x5BBB5A68, 0x98E87743, 0xD9D96C5A, + 0x1E4F2D15, 0x5F7E360C, 0x9C2D1B27, 0xDD1C003E, + 0x120098B9, 0x533183A0, 0x9062AE8B, 0xD153B592, + 0x16C5F4DD, 0x57F4EFC4, 0x94A7C2EF, 0xD596D9F6, + 0xE9BC07AE, 0xA88D1CB7, 0x6BDE319C, 0x2AEF2A85, + 0xED796BCA, 0xAC4870D3, 0x6F1B5DF8, 0x2E2A46E1, + 0xE136DE66, 0xA007C57F, 0x6354E854, 0x2265F34D, + 0xE5F3B202, 0xA4C2A91B, 0x67918430, 0x26A09F29, + 0xB8AEC5E4, 0xF99FDEFD, 0x3ACCF3D6, 0x7BFDE8CF, + 0xBC6BA980, 0xFD5AB299, 0x3E099FB2, 0x7F3884AB, + 0xB0241C2C, 0xF1150735, 0x32462A1E, 0x73773107, + 0xB4E17048, 0xF5D06B51, 0x3683467A, 0x77B25D63, + 0x4ED7FACB, 0x0FE6E1D2, 0xCCB5CCF9, 0x8D84D7E0, + 0x4A1296AF, 0x0B238DB6, 0xC870A09D, 0x8941BB84, + 0x465D2303, 0x076C381A, 0xC43F1531, 0x850E0E28, + 0x42984F67, 0x03A9547E, 0xC0FA7955, 0x81CB624C, + 0x1FC53881, 0x5EF42398, 0x9DA70EB3, 0xDC9615AA, + 0x1B0054E5, 0x5A314FFC, 0x996262D7, 0xD85379CE, + 0x174FE149, 0x567EFA50, 0x952DD77B, 0xD41CCC62, + 0x138A8D2D, 0x52BB9634, 0x91E8BB1F, 0xD0D9A006, + 0xECF37E5E, 0xADC26547, 0x6E91486C, 0x2FA05375, + 0xE836123A, 0xA9070923, 0x6A542408, 0x2B653F11, + 0xE479A796, 0xA548BC8F, 0x661B91A4, 0x272A8ABD, + 0xE0BCCBF2, 0xA18DD0EB, 0x62DEFDC0, 0x23EFE6D9, + 0xBDE1BC14, 0xFCD0A70D, 0x3F838A26, 0x7EB2913F, + 0xB924D070, 0xF815CB69, 0x3B46E642, 0x7A77FD5B, + 0xB56B65DC, 0xF45A7EC5, 0x370953EE, 0x763848F7, + 0xB1AE09B8, 0xF09F12A1, 0x33CC3F8A, 0x72FD2493 + }, { + 0x00000000, 0x376AC201, 0x6ED48403, 0x59BE4602, + 0xDCA80907, 0xEBC2CB06, 0xB27C8D04, 0x85164F05, + 0xB851130E, 0x8F3BD10F, 0xD685970D, 0xE1EF550C, + 0x64F91A09, 0x5393D808, 0x0A2D9E0A, 0x3D475C0B, + 0x70A3261C, 0x47C9E41D, 0x1E77A21F, 0x291D601E, + 0xAC0B2F1B, 0x9B61ED1A, 0xC2DFAB18, 0xF5B56919, + 0xC8F23512, 0xFF98F713, 0xA626B111, 0x914C7310, + 0x145A3C15, 0x2330FE14, 0x7A8EB816, 0x4DE47A17, + 0xE0464D38, 0xD72C8F39, 0x8E92C93B, 0xB9F80B3A, + 0x3CEE443F, 0x0B84863E, 0x523AC03C, 0x6550023D, + 0x58175E36, 0x6F7D9C37, 0x36C3DA35, 0x01A91834, + 0x84BF5731, 0xB3D59530, 0xEA6BD332, 0xDD011133, + 0x90E56B24, 0xA78FA925, 0xFE31EF27, 0xC95B2D26, + 0x4C4D6223, 0x7B27A022, 0x2299E620, 0x15F32421, + 0x28B4782A, 0x1FDEBA2B, 0x4660FC29, 0x710A3E28, + 0xF41C712D, 0xC376B32C, 0x9AC8F52E, 0xADA2372F, + 0xC08D9A70, 0xF7E75871, 0xAE591E73, 0x9933DC72, + 0x1C259377, 0x2B4F5176, 0x72F11774, 0x459BD575, + 0x78DC897E, 0x4FB64B7F, 0x16080D7D, 0x2162CF7C, + 0xA4748079, 0x931E4278, 0xCAA0047A, 0xFDCAC67B, + 0xB02EBC6C, 0x87447E6D, 0xDEFA386F, 0xE990FA6E, + 0x6C86B56B, 0x5BEC776A, 0x02523168, 0x3538F369, + 0x087FAF62, 0x3F156D63, 0x66AB2B61, 0x51C1E960, + 0xD4D7A665, 0xE3BD6464, 0xBA032266, 0x8D69E067, + 0x20CBD748, 0x17A11549, 0x4E1F534B, 0x7975914A, + 0xFC63DE4F, 0xCB091C4E, 0x92B75A4C, 0xA5DD984D, + 0x989AC446, 0xAFF00647, 0xF64E4045, 0xC1248244, + 0x4432CD41, 0x73580F40, 0x2AE64942, 0x1D8C8B43, + 0x5068F154, 0x67023355, 0x3EBC7557, 0x09D6B756, + 0x8CC0F853, 0xBBAA3A52, 0xE2147C50, 0xD57EBE51, + 0xE839E25A, 0xDF53205B, 0x86ED6659, 0xB187A458, + 0x3491EB5D, 0x03FB295C, 0x5A456F5E, 0x6D2FAD5F, + 0x801B35E1, 0xB771F7E0, 0xEECFB1E2, 0xD9A573E3, + 0x5CB33CE6, 0x6BD9FEE7, 0x3267B8E5, 0x050D7AE4, + 0x384A26EF, 0x0F20E4EE, 0x569EA2EC, 0x61F460ED, + 0xE4E22FE8, 0xD388EDE9, 0x8A36ABEB, 0xBD5C69EA, + 0xF0B813FD, 0xC7D2D1FC, 0x9E6C97FE, 0xA90655FF, + 0x2C101AFA, 0x1B7AD8FB, 0x42C49EF9, 0x75AE5CF8, + 0x48E900F3, 0x7F83C2F2, 0x263D84F0, 0x115746F1, + 0x944109F4, 0xA32BCBF5, 0xFA958DF7, 0xCDFF4FF6, + 0x605D78D9, 0x5737BAD8, 0x0E89FCDA, 0x39E33EDB, + 0xBCF571DE, 0x8B9FB3DF, 0xD221F5DD, 0xE54B37DC, + 0xD80C6BD7, 0xEF66A9D6, 0xB6D8EFD4, 0x81B22DD5, + 0x04A462D0, 0x33CEA0D1, 0x6A70E6D3, 0x5D1A24D2, + 0x10FE5EC5, 0x27949CC4, 0x7E2ADAC6, 0x494018C7, + 0xCC5657C2, 0xFB3C95C3, 0xA282D3C1, 0x95E811C0, + 0xA8AF4DCB, 0x9FC58FCA, 0xC67BC9C8, 0xF1110BC9, + 0x740744CC, 0x436D86CD, 0x1AD3C0CF, 0x2DB902CE, + 0x4096AF91, 0x77FC6D90, 0x2E422B92, 0x1928E993, + 0x9C3EA696, 0xAB546497, 0xF2EA2295, 0xC580E094, + 0xF8C7BC9F, 0xCFAD7E9E, 0x9613389C, 0xA179FA9D, + 0x246FB598, 0x13057799, 0x4ABB319B, 0x7DD1F39A, + 0x3035898D, 0x075F4B8C, 0x5EE10D8E, 0x698BCF8F, + 0xEC9D808A, 0xDBF7428B, 0x82490489, 0xB523C688, + 0x88649A83, 0xBF0E5882, 0xE6B01E80, 0xD1DADC81, + 0x54CC9384, 0x63A65185, 0x3A181787, 0x0D72D586, + 0xA0D0E2A9, 0x97BA20A8, 0xCE0466AA, 0xF96EA4AB, + 0x7C78EBAE, 0x4B1229AF, 0x12AC6FAD, 0x25C6ADAC, + 0x1881F1A7, 0x2FEB33A6, 0x765575A4, 0x413FB7A5, + 0xC429F8A0, 0xF3433AA1, 0xAAFD7CA3, 0x9D97BEA2, + 0xD073C4B5, 0xE71906B4, 0xBEA740B6, 0x89CD82B7, + 0x0CDBCDB2, 0x3BB10FB3, 0x620F49B1, 0x55658BB0, + 0x6822D7BB, 0x5F4815BA, 0x06F653B8, 0x319C91B9, + 0xB48ADEBC, 0x83E01CBD, 0xDA5E5ABF, 0xED3498BE + }, { + 0x00000000, 0x6567BCB8, 0x8BC809AA, 0xEEAFB512, + 0x5797628F, 0x32F0DE37, 0xDC5F6B25, 0xB938D79D, + 0xEF28B4C5, 0x8A4F087D, 0x64E0BD6F, 0x018701D7, + 0xB8BFD64A, 0xDDD86AF2, 0x3377DFE0, 0x56106358, + 0x9F571950, 0xFA30A5E8, 0x149F10FA, 0x71F8AC42, + 0xC8C07BDF, 0xADA7C767, 0x43087275, 0x266FCECD, + 0x707FAD95, 0x1518112D, 0xFBB7A43F, 0x9ED01887, + 0x27E8CF1A, 0x428F73A2, 0xAC20C6B0, 0xC9477A08, + 0x3EAF32A0, 0x5BC88E18, 0xB5673B0A, 0xD00087B2, + 0x6938502F, 0x0C5FEC97, 0xE2F05985, 0x8797E53D, + 0xD1878665, 0xB4E03ADD, 0x5A4F8FCF, 0x3F283377, + 0x8610E4EA, 0xE3775852, 0x0DD8ED40, 0x68BF51F8, + 0xA1F82BF0, 0xC49F9748, 0x2A30225A, 0x4F579EE2, + 0xF66F497F, 0x9308F5C7, 0x7DA740D5, 0x18C0FC6D, + 0x4ED09F35, 0x2BB7238D, 0xC518969F, 0xA07F2A27, + 0x1947FDBA, 0x7C204102, 0x928FF410, 0xF7E848A8, + 0x3D58149B, 0x583FA823, 0xB6901D31, 0xD3F7A189, + 0x6ACF7614, 0x0FA8CAAC, 0xE1077FBE, 0x8460C306, + 0xD270A05E, 0xB7171CE6, 0x59B8A9F4, 0x3CDF154C, + 0x85E7C2D1, 0xE0807E69, 0x0E2FCB7B, 0x6B4877C3, + 0xA20F0DCB, 0xC768B173, 0x29C70461, 0x4CA0B8D9, + 0xF5986F44, 0x90FFD3FC, 0x7E5066EE, 0x1B37DA56, + 0x4D27B90E, 0x284005B6, 0xC6EFB0A4, 0xA3880C1C, + 0x1AB0DB81, 0x7FD76739, 0x9178D22B, 0xF41F6E93, + 0x03F7263B, 0x66909A83, 0x883F2F91, 0xED589329, + 0x546044B4, 0x3107F80C, 0xDFA84D1E, 0xBACFF1A6, + 0xECDF92FE, 0x89B82E46, 0x67179B54, 0x027027EC, + 0xBB48F071, 0xDE2F4CC9, 0x3080F9DB, 0x55E74563, + 0x9CA03F6B, 0xF9C783D3, 0x176836C1, 0x720F8A79, + 0xCB375DE4, 0xAE50E15C, 0x40FF544E, 0x2598E8F6, + 0x73888BAE, 0x16EF3716, 0xF8408204, 0x9D273EBC, + 0x241FE921, 0x41785599, 0xAFD7E08B, 0xCAB05C33, + 0x3BB659ED, 0x5ED1E555, 0xB07E5047, 0xD519ECFF, + 0x6C213B62, 0x094687DA, 0xE7E932C8, 0x828E8E70, + 0xD49EED28, 0xB1F95190, 0x5F56E482, 0x3A31583A, + 0x83098FA7, 0xE66E331F, 0x08C1860D, 0x6DA63AB5, + 0xA4E140BD, 0xC186FC05, 0x2F294917, 0x4A4EF5AF, + 0xF3762232, 0x96119E8A, 0x78BE2B98, 0x1DD99720, + 0x4BC9F478, 0x2EAE48C0, 0xC001FDD2, 0xA566416A, + 0x1C5E96F7, 0x79392A4F, 0x97969F5D, 0xF2F123E5, + 0x05196B4D, 0x607ED7F5, 0x8ED162E7, 0xEBB6DE5F, + 0x528E09C2, 0x37E9B57A, 0xD9460068, 0xBC21BCD0, + 0xEA31DF88, 0x8F566330, 0x61F9D622, 0x049E6A9A, + 0xBDA6BD07, 0xD8C101BF, 0x366EB4AD, 0x53090815, + 0x9A4E721D, 0xFF29CEA5, 0x11867BB7, 0x74E1C70F, + 0xCDD91092, 0xA8BEAC2A, 0x46111938, 0x2376A580, + 0x7566C6D8, 0x10017A60, 0xFEAECF72, 0x9BC973CA, + 0x22F1A457, 0x479618EF, 0xA939ADFD, 0xCC5E1145, + 0x06EE4D76, 0x6389F1CE, 0x8D2644DC, 0xE841F864, + 0x51792FF9, 0x341E9341, 0xDAB12653, 0xBFD69AEB, + 0xE9C6F9B3, 0x8CA1450B, 0x620EF019, 0x07694CA1, + 0xBE519B3C, 0xDB362784, 0x35999296, 0x50FE2E2E, + 0x99B95426, 0xFCDEE89E, 0x12715D8C, 0x7716E134, + 0xCE2E36A9, 0xAB498A11, 0x45E63F03, 0x208183BB, + 0x7691E0E3, 0x13F65C5B, 0xFD59E949, 0x983E55F1, + 0x2106826C, 0x44613ED4, 0xAACE8BC6, 0xCFA9377E, + 0x38417FD6, 0x5D26C36E, 0xB389767C, 0xD6EECAC4, + 0x6FD61D59, 0x0AB1A1E1, 0xE41E14F3, 0x8179A84B, + 0xD769CB13, 0xB20E77AB, 0x5CA1C2B9, 0x39C67E01, + 0x80FEA99C, 0xE5991524, 0x0B36A036, 0x6E511C8E, + 0xA7166686, 0xC271DA3E, 0x2CDE6F2C, 0x49B9D394, + 0xF0810409, 0x95E6B8B1, 0x7B490DA3, 0x1E2EB11B, + 0x483ED243, 0x2D596EFB, 0xC3F6DBE9, 0xA6916751, + 0x1FA9B0CC, 0x7ACE0C74, 0x9461B966, 0xF10605DE + }, { + 0x00000000, 0xB029603D, 0x6053C07A, 0xD07AA047, + 0xC0A680F5, 0x708FE0C8, 0xA0F5408F, 0x10DC20B2, + 0xC14B7030, 0x7162100D, 0xA118B04A, 0x1131D077, + 0x01EDF0C5, 0xB1C490F8, 0x61BE30BF, 0xD1975082, + 0x8297E060, 0x32BE805D, 0xE2C4201A, 0x52ED4027, + 0x42316095, 0xF21800A8, 0x2262A0EF, 0x924BC0D2, + 0x43DC9050, 0xF3F5F06D, 0x238F502A, 0x93A63017, + 0x837A10A5, 0x33537098, 0xE329D0DF, 0x5300B0E2, + 0x042FC1C1, 0xB406A1FC, 0x647C01BB, 0xD4556186, + 0xC4894134, 0x74A02109, 0xA4DA814E, 0x14F3E173, + 0xC564B1F1, 0x754DD1CC, 0xA537718B, 0x151E11B6, + 0x05C23104, 0xB5EB5139, 0x6591F17E, 0xD5B89143, + 0x86B821A1, 0x3691419C, 0xE6EBE1DB, 0x56C281E6, + 0x461EA154, 0xF637C169, 0x264D612E, 0x96640113, + 0x47F35191, 0xF7DA31AC, 0x27A091EB, 0x9789F1D6, + 0x8755D164, 0x377CB159, 0xE706111E, 0x572F7123, + 0x4958F358, 0xF9719365, 0x290B3322, 0x9922531F, + 0x89FE73AD, 0x39D71390, 0xE9ADB3D7, 0x5984D3EA, + 0x88138368, 0x383AE355, 0xE8404312, 0x5869232F, + 0x48B5039D, 0xF89C63A0, 0x28E6C3E7, 0x98CFA3DA, + 0xCBCF1338, 0x7BE67305, 0xAB9CD342, 0x1BB5B37F, + 0x0B6993CD, 0xBB40F3F0, 0x6B3A53B7, 0xDB13338A, + 0x0A846308, 0xBAAD0335, 0x6AD7A372, 0xDAFEC34F, + 0xCA22E3FD, 0x7A0B83C0, 0xAA712387, 0x1A5843BA, + 0x4D773299, 0xFD5E52A4, 0x2D24F2E3, 0x9D0D92DE, + 0x8DD1B26C, 0x3DF8D251, 0xED827216, 0x5DAB122B, + 0x8C3C42A9, 0x3C152294, 0xEC6F82D3, 0x5C46E2EE, + 0x4C9AC25C, 0xFCB3A261, 0x2CC90226, 0x9CE0621B, + 0xCFE0D2F9, 0x7FC9B2C4, 0xAFB31283, 0x1F9A72BE, + 0x0F46520C, 0xBF6F3231, 0x6F159276, 0xDF3CF24B, + 0x0EABA2C9, 0xBE82C2F4, 0x6EF862B3, 0xDED1028E, + 0xCE0D223C, 0x7E244201, 0xAE5EE246, 0x1E77827B, + 0x92B0E6B1, 0x2299868C, 0xF2E326CB, 0x42CA46F6, + 0x52166644, 0xE23F0679, 0x3245A63E, 0x826CC603, + 0x53FB9681, 0xE3D2F6BC, 0x33A856FB, 0x838136C6, + 0x935D1674, 0x23747649, 0xF30ED60E, 0x4327B633, + 0x102706D1, 0xA00E66EC, 0x7074C6AB, 0xC05DA696, + 0xD0818624, 0x60A8E619, 0xB0D2465E, 0x00FB2663, + 0xD16C76E1, 0x614516DC, 0xB13FB69B, 0x0116D6A6, + 0x11CAF614, 0xA1E39629, 0x7199366E, 0xC1B05653, + 0x969F2770, 0x26B6474D, 0xF6CCE70A, 0x46E58737, + 0x5639A785, 0xE610C7B8, 0x366A67FF, 0x864307C2, + 0x57D45740, 0xE7FD377D, 0x3787973A, 0x87AEF707, + 0x9772D7B5, 0x275BB788, 0xF72117CF, 0x470877F2, + 0x1408C710, 0xA421A72D, 0x745B076A, 0xC4726757, + 0xD4AE47E5, 0x648727D8, 0xB4FD879F, 0x04D4E7A2, + 0xD543B720, 0x656AD71D, 0xB510775A, 0x05391767, + 0x15E537D5, 0xA5CC57E8, 0x75B6F7AF, 0xC59F9792, + 0xDBE815E9, 0x6BC175D4, 0xBBBBD593, 0x0B92B5AE, + 0x1B4E951C, 0xAB67F521, 0x7B1D5566, 0xCB34355B, + 0x1AA365D9, 0xAA8A05E4, 0x7AF0A5A3, 0xCAD9C59E, + 0xDA05E52C, 0x6A2C8511, 0xBA562556, 0x0A7F456B, + 0x597FF589, 0xE95695B4, 0x392C35F3, 0x890555CE, + 0x99D9757C, 0x29F01541, 0xF98AB506, 0x49A3D53B, + 0x983485B9, 0x281DE584, 0xF86745C3, 0x484E25FE, + 0x5892054C, 0xE8BB6571, 0x38C1C536, 0x88E8A50B, + 0xDFC7D428, 0x6FEEB415, 0xBF941452, 0x0FBD746F, + 0x1F6154DD, 0xAF4834E0, 0x7F3294A7, 0xCF1BF49A, + 0x1E8CA418, 0xAEA5C425, 0x7EDF6462, 0xCEF6045F, + 0xDE2A24ED, 0x6E0344D0, 0xBE79E497, 0x0E5084AA, + 0x5D503448, 0xED795475, 0x3D03F432, 0x8D2A940F, + 0x9DF6B4BD, 0x2DDFD480, 0xFDA574C7, 0x4D8C14FA, + 0x9C1B4478, 0x2C322445, 0xFC488402, 0x4C61E43F, + 0x5CBDC48D, 0xEC94A4B0, 0x3CEE04F7, 0x8CC764CA + }, { + 0x00000000, 0xA5D35CCB, 0x0BA1C84D, 0xAE729486, + 0x1642919B, 0xB391CD50, 0x1DE359D6, 0xB830051D, + 0x6D8253EC, 0xC8510F27, 0x66239BA1, 0xC3F0C76A, + 0x7BC0C277, 0xDE139EBC, 0x70610A3A, 0xD5B256F1, + 0x9B02D603, 0x3ED18AC8, 0x90A31E4E, 0x35704285, + 0x8D404798, 0x28931B53, 0x86E18FD5, 0x2332D31E, + 0xF68085EF, 0x5353D924, 0xFD214DA2, 0x58F21169, + 0xE0C21474, 0x451148BF, 0xEB63DC39, 0x4EB080F2, + 0x3605AC07, 0x93D6F0CC, 0x3DA4644A, 0x98773881, + 0x20473D9C, 0x85946157, 0x2BE6F5D1, 0x8E35A91A, + 0x5B87FFEB, 0xFE54A320, 0x502637A6, 0xF5F56B6D, + 0x4DC56E70, 0xE81632BB, 0x4664A63D, 0xE3B7FAF6, + 0xAD077A04, 0x08D426CF, 0xA6A6B249, 0x0375EE82, + 0xBB45EB9F, 0x1E96B754, 0xB0E423D2, 0x15377F19, + 0xC08529E8, 0x65567523, 0xCB24E1A5, 0x6EF7BD6E, + 0xD6C7B873, 0x7314E4B8, 0xDD66703E, 0x78B52CF5, + 0x6C0A580F, 0xC9D904C4, 0x67AB9042, 0xC278CC89, + 0x7A48C994, 0xDF9B955F, 0x71E901D9, 0xD43A5D12, + 0x01880BE3, 0xA45B5728, 0x0A29C3AE, 0xAFFA9F65, + 0x17CA9A78, 0xB219C6B3, 0x1C6B5235, 0xB9B80EFE, + 0xF7088E0C, 0x52DBD2C7, 0xFCA94641, 0x597A1A8A, + 0xE14A1F97, 0x4499435C, 0xEAEBD7DA, 0x4F388B11, + 0x9A8ADDE0, 0x3F59812B, 0x912B15AD, 0x34F84966, + 0x8CC84C7B, 0x291B10B0, 0x87698436, 0x22BAD8FD, + 0x5A0FF408, 0xFFDCA8C3, 0x51AE3C45, 0xF47D608E, + 0x4C4D6593, 0xE99E3958, 0x47ECADDE, 0xE23FF115, + 0x378DA7E4, 0x925EFB2F, 0x3C2C6FA9, 0x99FF3362, + 0x21CF367F, 0x841C6AB4, 0x2A6EFE32, 0x8FBDA2F9, + 0xC10D220B, 0x64DE7EC0, 0xCAACEA46, 0x6F7FB68D, + 0xD74FB390, 0x729CEF5B, 0xDCEE7BDD, 0x793D2716, + 0xAC8F71E7, 0x095C2D2C, 0xA72EB9AA, 0x02FDE561, + 0xBACDE07C, 0x1F1EBCB7, 0xB16C2831, 0x14BF74FA, + 0xD814B01E, 0x7DC7ECD5, 0xD3B57853, 0x76662498, + 0xCE562185, 0x6B857D4E, 0xC5F7E9C8, 0x6024B503, + 0xB596E3F2, 0x1045BF39, 0xBE372BBF, 0x1BE47774, + 0xA3D47269, 0x06072EA2, 0xA875BA24, 0x0DA6E6EF, + 0x4316661D, 0xE6C53AD6, 0x48B7AE50, 0xED64F29B, + 0x5554F786, 0xF087AB4D, 0x5EF53FCB, 0xFB266300, + 0x2E9435F1, 0x8B47693A, 0x2535FDBC, 0x80E6A177, + 0x38D6A46A, 0x9D05F8A1, 0x33776C27, 0x96A430EC, + 0xEE111C19, 0x4BC240D2, 0xE5B0D454, 0x4063889F, + 0xF8538D82, 0x5D80D149, 0xF3F245CF, 0x56211904, + 0x83934FF5, 0x2640133E, 0x883287B8, 0x2DE1DB73, + 0x95D1DE6E, 0x300282A5, 0x9E701623, 0x3BA34AE8, + 0x7513CA1A, 0xD0C096D1, 0x7EB20257, 0xDB615E9C, + 0x63515B81, 0xC682074A, 0x68F093CC, 0xCD23CF07, + 0x189199F6, 0xBD42C53D, 0x133051BB, 0xB6E30D70, + 0x0ED3086D, 0xAB0054A6, 0x0572C020, 0xA0A19CEB, + 0xB41EE811, 0x11CDB4DA, 0xBFBF205C, 0x1A6C7C97, + 0xA25C798A, 0x078F2541, 0xA9FDB1C7, 0x0C2EED0C, + 0xD99CBBFD, 0x7C4FE736, 0xD23D73B0, 0x77EE2F7B, + 0xCFDE2A66, 0x6A0D76AD, 0xC47FE22B, 0x61ACBEE0, + 0x2F1C3E12, 0x8ACF62D9, 0x24BDF65F, 0x816EAA94, + 0x395EAF89, 0x9C8DF342, 0x32FF67C4, 0x972C3B0F, + 0x429E6DFE, 0xE74D3135, 0x493FA5B3, 0xECECF978, + 0x54DCFC65, 0xF10FA0AE, 0x5F7D3428, 0xFAAE68E3, + 0x821B4416, 0x27C818DD, 0x89BA8C5B, 0x2C69D090, + 0x9459D58D, 0x318A8946, 0x9FF81DC0, 0x3A2B410B, + 0xEF9917FA, 0x4A4A4B31, 0xE438DFB7, 0x41EB837C, + 0xF9DB8661, 0x5C08DAAA, 0xF27A4E2C, 0x57A912E7, + 0x19199215, 0xBCCACEDE, 0x12B85A58, 0xB76B0693, + 0x0F5B038E, 0xAA885F45, 0x04FACBC3, 0xA1299708, + 0x749BC1F9, 0xD1489D32, 0x7F3A09B4, 0xDAE9557F, + 0x62D95062, 0xC70A0CA9, 0x6978982F, 0xCCABC4E4 + }, { + 0x00000000, 0xB40B77A6, 0x29119F97, 0x9D1AE831, + 0x13244FF4, 0xA72F3852, 0x3A35D063, 0x8E3EA7C5, + 0x674EEF33, 0xD3459895, 0x4E5F70A4, 0xFA540702, + 0x746AA0C7, 0xC061D761, 0x5D7B3F50, 0xE97048F6, + 0xCE9CDE67, 0x7A97A9C1, 0xE78D41F0, 0x53863656, + 0xDDB89193, 0x69B3E635, 0xF4A90E04, 0x40A279A2, + 0xA9D23154, 0x1DD946F2, 0x80C3AEC3, 0x34C8D965, + 0xBAF67EA0, 0x0EFD0906, 0x93E7E137, 0x27EC9691, + 0x9C39BDCF, 0x2832CA69, 0xB5282258, 0x012355FE, + 0x8F1DF23B, 0x3B16859D, 0xA60C6DAC, 0x12071A0A, + 0xFB7752FC, 0x4F7C255A, 0xD266CD6B, 0x666DBACD, + 0xE8531D08, 0x5C586AAE, 0xC142829F, 0x7549F539, + 0x52A563A8, 0xE6AE140E, 0x7BB4FC3F, 0xCFBF8B99, + 0x41812C5C, 0xF58A5BFA, 0x6890B3CB, 0xDC9BC46D, + 0x35EB8C9B, 0x81E0FB3D, 0x1CFA130C, 0xA8F164AA, + 0x26CFC36F, 0x92C4B4C9, 0x0FDE5CF8, 0xBBD52B5E, + 0x79750B44, 0xCD7E7CE2, 0x506494D3, 0xE46FE375, + 0x6A5144B0, 0xDE5A3316, 0x4340DB27, 0xF74BAC81, + 0x1E3BE477, 0xAA3093D1, 0x372A7BE0, 0x83210C46, + 0x0D1FAB83, 0xB914DC25, 0x240E3414, 0x900543B2, + 0xB7E9D523, 0x03E2A285, 0x9EF84AB4, 0x2AF33D12, + 0xA4CD9AD7, 0x10C6ED71, 0x8DDC0540, 0x39D772E6, + 0xD0A73A10, 0x64AC4DB6, 0xF9B6A587, 0x4DBDD221, + 0xC38375E4, 0x77880242, 0xEA92EA73, 0x5E999DD5, + 0xE54CB68B, 0x5147C12D, 0xCC5D291C, 0x78565EBA, + 0xF668F97F, 0x42638ED9, 0xDF7966E8, 0x6B72114E, + 0x820259B8, 0x36092E1E, 0xAB13C62F, 0x1F18B189, + 0x9126164C, 0x252D61EA, 0xB83789DB, 0x0C3CFE7D, + 0x2BD068EC, 0x9FDB1F4A, 0x02C1F77B, 0xB6CA80DD, + 0x38F42718, 0x8CFF50BE, 0x11E5B88F, 0xA5EECF29, + 0x4C9E87DF, 0xF895F079, 0x658F1848, 0xD1846FEE, + 0x5FBAC82B, 0xEBB1BF8D, 0x76AB57BC, 0xC2A0201A, + 0xF2EA1688, 0x46E1612E, 0xDBFB891F, 0x6FF0FEB9, + 0xE1CE597C, 0x55C52EDA, 0xC8DFC6EB, 0x7CD4B14D, + 0x95A4F9BB, 0x21AF8E1D, 0xBCB5662C, 0x08BE118A, + 0x8680B64F, 0x328BC1E9, 0xAF9129D8, 0x1B9A5E7E, + 0x3C76C8EF, 0x887DBF49, 0x15675778, 0xA16C20DE, + 0x2F52871B, 0x9B59F0BD, 0x0643188C, 0xB2486F2A, + 0x5B3827DC, 0xEF33507A, 0x7229B84B, 0xC622CFED, + 0x481C6828, 0xFC171F8E, 0x610DF7BF, 0xD5068019, + 0x6ED3AB47, 0xDAD8DCE1, 0x47C234D0, 0xF3C94376, + 0x7DF7E4B3, 0xC9FC9315, 0x54E67B24, 0xE0ED0C82, + 0x099D4474, 0xBD9633D2, 0x208CDBE3, 0x9487AC45, + 0x1AB90B80, 0xAEB27C26, 0x33A89417, 0x87A3E3B1, + 0xA04F7520, 0x14440286, 0x895EEAB7, 0x3D559D11, + 0xB36B3AD4, 0x07604D72, 0x9A7AA543, 0x2E71D2E5, + 0xC7019A13, 0x730AEDB5, 0xEE100584, 0x5A1B7222, + 0xD425D5E7, 0x602EA241, 0xFD344A70, 0x493F3DD6, + 0x8B9F1DCC, 0x3F946A6A, 0xA28E825B, 0x1685F5FD, + 0x98BB5238, 0x2CB0259E, 0xB1AACDAF, 0x05A1BA09, + 0xECD1F2FF, 0x58DA8559, 0xC5C06D68, 0x71CB1ACE, + 0xFFF5BD0B, 0x4BFECAAD, 0xD6E4229C, 0x62EF553A, + 0x4503C3AB, 0xF108B40D, 0x6C125C3C, 0xD8192B9A, + 0x56278C5F, 0xE22CFBF9, 0x7F3613C8, 0xCB3D646E, + 0x224D2C98, 0x96465B3E, 0x0B5CB30F, 0xBF57C4A9, + 0x3169636C, 0x856214CA, 0x1878FCFB, 0xAC738B5D, + 0x17A6A003, 0xA3ADD7A5, 0x3EB73F94, 0x8ABC4832, + 0x0482EFF7, 0xB0899851, 0x2D937060, 0x999807C6, + 0x70E84F30, 0xC4E33896, 0x59F9D0A7, 0xEDF2A701, + 0x63CC00C4, 0xD7C77762, 0x4ADD9F53, 0xFED6E8F5, + 0xD93A7E64, 0x6D3109C2, 0xF02BE1F3, 0x44209655, + 0xCA1E3190, 0x7E154636, 0xE30FAE07, 0x5704D9A1, + 0xBE749157, 0x0A7FE6F1, 0x97650EC0, 0x236E7966, + 0xAD50DEA3, 0x195BA905, 0x84414134, 0x304A3692 + }, { + 0x00000000, 0x9E00AACC, 0x7D072542, 0xE3078F8E, + 0xFA0E4A84, 0x640EE048, 0x87096FC6, 0x1909C50A, + 0xB51BE5D3, 0x2B1B4F1F, 0xC81CC091, 0x561C6A5D, + 0x4F15AF57, 0xD115059B, 0x32128A15, 0xAC1220D9, + 0x2B31BB7C, 0xB53111B0, 0x56369E3E, 0xC83634F2, + 0xD13FF1F8, 0x4F3F5B34, 0xAC38D4BA, 0x32387E76, + 0x9E2A5EAF, 0x002AF463, 0xE32D7BED, 0x7D2DD121, + 0x6424142B, 0xFA24BEE7, 0x19233169, 0x87239BA5, + 0x566276F9, 0xC862DC35, 0x2B6553BB, 0xB565F977, + 0xAC6C3C7D, 0x326C96B1, 0xD16B193F, 0x4F6BB3F3, + 0xE379932A, 0x7D7939E6, 0x9E7EB668, 0x007E1CA4, + 0x1977D9AE, 0x87777362, 0x6470FCEC, 0xFA705620, + 0x7D53CD85, 0xE3536749, 0x0054E8C7, 0x9E54420B, + 0x875D8701, 0x195D2DCD, 0xFA5AA243, 0x645A088F, + 0xC8482856, 0x5648829A, 0xB54F0D14, 0x2B4FA7D8, + 0x324662D2, 0xAC46C81E, 0x4F414790, 0xD141ED5C, + 0xEDC29D29, 0x73C237E5, 0x90C5B86B, 0x0EC512A7, + 0x17CCD7AD, 0x89CC7D61, 0x6ACBF2EF, 0xF4CB5823, + 0x58D978FA, 0xC6D9D236, 0x25DE5DB8, 0xBBDEF774, + 0xA2D7327E, 0x3CD798B2, 0xDFD0173C, 0x41D0BDF0, + 0xC6F32655, 0x58F38C99, 0xBBF40317, 0x25F4A9DB, + 0x3CFD6CD1, 0xA2FDC61D, 0x41FA4993, 0xDFFAE35F, + 0x73E8C386, 0xEDE8694A, 0x0EEFE6C4, 0x90EF4C08, + 0x89E68902, 0x17E623CE, 0xF4E1AC40, 0x6AE1068C, + 0xBBA0EBD0, 0x25A0411C, 0xC6A7CE92, 0x58A7645E, + 0x41AEA154, 0xDFAE0B98, 0x3CA98416, 0xA2A92EDA, + 0x0EBB0E03, 0x90BBA4CF, 0x73BC2B41, 0xEDBC818D, + 0xF4B54487, 0x6AB5EE4B, 0x89B261C5, 0x17B2CB09, + 0x909150AC, 0x0E91FA60, 0xED9675EE, 0x7396DF22, + 0x6A9F1A28, 0xF49FB0E4, 0x17983F6A, 0x899895A6, + 0x258AB57F, 0xBB8A1FB3, 0x588D903D, 0xC68D3AF1, + 0xDF84FFFB, 0x41845537, 0xA283DAB9, 0x3C837075, + 0xDA853B53, 0x4485919F, 0xA7821E11, 0x3982B4DD, + 0x208B71D7, 0xBE8BDB1B, 0x5D8C5495, 0xC38CFE59, + 0x6F9EDE80, 0xF19E744C, 0x1299FBC2, 0x8C99510E, + 0x95909404, 0x0B903EC8, 0xE897B146, 0x76971B8A, + 0xF1B4802F, 0x6FB42AE3, 0x8CB3A56D, 0x12B30FA1, + 0x0BBACAAB, 0x95BA6067, 0x76BDEFE9, 0xE8BD4525, + 0x44AF65FC, 0xDAAFCF30, 0x39A840BE, 0xA7A8EA72, + 0xBEA12F78, 0x20A185B4, 0xC3A60A3A, 0x5DA6A0F6, + 0x8CE74DAA, 0x12E7E766, 0xF1E068E8, 0x6FE0C224, + 0x76E9072E, 0xE8E9ADE2, 0x0BEE226C, 0x95EE88A0, + 0x39FCA879, 0xA7FC02B5, 0x44FB8D3B, 0xDAFB27F7, + 0xC3F2E2FD, 0x5DF24831, 0xBEF5C7BF, 0x20F56D73, + 0xA7D6F6D6, 0x39D65C1A, 0xDAD1D394, 0x44D17958, + 0x5DD8BC52, 0xC3D8169E, 0x20DF9910, 0xBEDF33DC, + 0x12CD1305, 0x8CCDB9C9, 0x6FCA3647, 0xF1CA9C8B, + 0xE8C35981, 0x76C3F34D, 0x95C47CC3, 0x0BC4D60F, + 0x3747A67A, 0xA9470CB6, 0x4A408338, 0xD44029F4, + 0xCD49ECFE, 0x53494632, 0xB04EC9BC, 0x2E4E6370, + 0x825C43A9, 0x1C5CE965, 0xFF5B66EB, 0x615BCC27, + 0x7852092D, 0xE652A3E1, 0x05552C6F, 0x9B5586A3, + 0x1C761D06, 0x8276B7CA, 0x61713844, 0xFF719288, + 0xE6785782, 0x7878FD4E, 0x9B7F72C0, 0x057FD80C, + 0xA96DF8D5, 0x376D5219, 0xD46ADD97, 0x4A6A775B, + 0x5363B251, 0xCD63189D, 0x2E649713, 0xB0643DDF, + 0x6125D083, 0xFF257A4F, 0x1C22F5C1, 0x82225F0D, + 0x9B2B9A07, 0x052B30CB, 0xE62CBF45, 0x782C1589, + 0xD43E3550, 0x4A3E9F9C, 0xA9391012, 0x3739BADE, + 0x2E307FD4, 0xB030D518, 0x53375A96, 0xCD37F05A, + 0x4A146BFF, 0xD414C133, 0x37134EBD, 0xA913E471, + 0xB01A217B, 0x2E1A8BB7, 0xCD1D0439, 0x531DAEF5, + 0xFF0F8E2C, 0x610F24E0, 0x8208AB6E, 0x1C0801A2, + 0x0501C4A8, 0x9B016E64, 0x7806E1EA, 0xE6064B26 + } +}; diff --git a/src/liblzma/check/crc32_table_le.h b/src/liblzma/check/crc32_table_le.h new file mode 100644 index 000000000000..25f4fc443537 --- /dev/null +++ b/src/liblzma/check/crc32_table_le.h @@ -0,0 +1,525 @@ +/* This file has been automatically generated by crc32_tablegen.c. */ + +const uint32_t lzma_crc32_table[8][256] = { + { + 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, + 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, + 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, + 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, + 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, + 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, + 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, + 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, + 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, + 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, + 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, + 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, + 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, + 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, + 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, + 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, + 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, + 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, + 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, + 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, + 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, + 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, + 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, + 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, + 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, + 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, + 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, + 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, + 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, + 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, + 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, + 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, + 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, + 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, + 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, + 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, + 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, + 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, + 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, + 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, + 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, + 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, + 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, + 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, + 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, + 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, + 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, + 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, + 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, + 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, + 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, + 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, + 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, + 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, + 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, + 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, + 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, + 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, + 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, + 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, + 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, + 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, + 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D + }, { + 0x00000000, 0x191B3141, 0x32366282, 0x2B2D53C3, + 0x646CC504, 0x7D77F445, 0x565AA786, 0x4F4196C7, + 0xC8D98A08, 0xD1C2BB49, 0xFAEFE88A, 0xE3F4D9CB, + 0xACB54F0C, 0xB5AE7E4D, 0x9E832D8E, 0x87981CCF, + 0x4AC21251, 0x53D92310, 0x78F470D3, 0x61EF4192, + 0x2EAED755, 0x37B5E614, 0x1C98B5D7, 0x05838496, + 0x821B9859, 0x9B00A918, 0xB02DFADB, 0xA936CB9A, + 0xE6775D5D, 0xFF6C6C1C, 0xD4413FDF, 0xCD5A0E9E, + 0x958424A2, 0x8C9F15E3, 0xA7B24620, 0xBEA97761, + 0xF1E8E1A6, 0xE8F3D0E7, 0xC3DE8324, 0xDAC5B265, + 0x5D5DAEAA, 0x44469FEB, 0x6F6BCC28, 0x7670FD69, + 0x39316BAE, 0x202A5AEF, 0x0B07092C, 0x121C386D, + 0xDF4636F3, 0xC65D07B2, 0xED705471, 0xF46B6530, + 0xBB2AF3F7, 0xA231C2B6, 0x891C9175, 0x9007A034, + 0x179FBCFB, 0x0E848DBA, 0x25A9DE79, 0x3CB2EF38, + 0x73F379FF, 0x6AE848BE, 0x41C51B7D, 0x58DE2A3C, + 0xF0794F05, 0xE9627E44, 0xC24F2D87, 0xDB541CC6, + 0x94158A01, 0x8D0EBB40, 0xA623E883, 0xBF38D9C2, + 0x38A0C50D, 0x21BBF44C, 0x0A96A78F, 0x138D96CE, + 0x5CCC0009, 0x45D73148, 0x6EFA628B, 0x77E153CA, + 0xBABB5D54, 0xA3A06C15, 0x888D3FD6, 0x91960E97, + 0xDED79850, 0xC7CCA911, 0xECE1FAD2, 0xF5FACB93, + 0x7262D75C, 0x6B79E61D, 0x4054B5DE, 0x594F849F, + 0x160E1258, 0x0F152319, 0x243870DA, 0x3D23419B, + 0x65FD6BA7, 0x7CE65AE6, 0x57CB0925, 0x4ED03864, + 0x0191AEA3, 0x188A9FE2, 0x33A7CC21, 0x2ABCFD60, + 0xAD24E1AF, 0xB43FD0EE, 0x9F12832D, 0x8609B26C, + 0xC94824AB, 0xD05315EA, 0xFB7E4629, 0xE2657768, + 0x2F3F79F6, 0x362448B7, 0x1D091B74, 0x04122A35, + 0x4B53BCF2, 0x52488DB3, 0x7965DE70, 0x607EEF31, + 0xE7E6F3FE, 0xFEFDC2BF, 0xD5D0917C, 0xCCCBA03D, + 0x838A36FA, 0x9A9107BB, 0xB1BC5478, 0xA8A76539, + 0x3B83984B, 0x2298A90A, 0x09B5FAC9, 0x10AECB88, + 0x5FEF5D4F, 0x46F46C0E, 0x6DD93FCD, 0x74C20E8C, + 0xF35A1243, 0xEA412302, 0xC16C70C1, 0xD8774180, + 0x9736D747, 0x8E2DE606, 0xA500B5C5, 0xBC1B8484, + 0x71418A1A, 0x685ABB5B, 0x4377E898, 0x5A6CD9D9, + 0x152D4F1E, 0x0C367E5F, 0x271B2D9C, 0x3E001CDD, + 0xB9980012, 0xA0833153, 0x8BAE6290, 0x92B553D1, + 0xDDF4C516, 0xC4EFF457, 0xEFC2A794, 0xF6D996D5, + 0xAE07BCE9, 0xB71C8DA8, 0x9C31DE6B, 0x852AEF2A, + 0xCA6B79ED, 0xD37048AC, 0xF85D1B6F, 0xE1462A2E, + 0x66DE36E1, 0x7FC507A0, 0x54E85463, 0x4DF36522, + 0x02B2F3E5, 0x1BA9C2A4, 0x30849167, 0x299FA026, + 0xE4C5AEB8, 0xFDDE9FF9, 0xD6F3CC3A, 0xCFE8FD7B, + 0x80A96BBC, 0x99B25AFD, 0xB29F093E, 0xAB84387F, + 0x2C1C24B0, 0x350715F1, 0x1E2A4632, 0x07317773, + 0x4870E1B4, 0x516BD0F5, 0x7A468336, 0x635DB277, + 0xCBFAD74E, 0xD2E1E60F, 0xF9CCB5CC, 0xE0D7848D, + 0xAF96124A, 0xB68D230B, 0x9DA070C8, 0x84BB4189, + 0x03235D46, 0x1A386C07, 0x31153FC4, 0x280E0E85, + 0x674F9842, 0x7E54A903, 0x5579FAC0, 0x4C62CB81, + 0x8138C51F, 0x9823F45E, 0xB30EA79D, 0xAA1596DC, + 0xE554001B, 0xFC4F315A, 0xD7626299, 0xCE7953D8, + 0x49E14F17, 0x50FA7E56, 0x7BD72D95, 0x62CC1CD4, + 0x2D8D8A13, 0x3496BB52, 0x1FBBE891, 0x06A0D9D0, + 0x5E7EF3EC, 0x4765C2AD, 0x6C48916E, 0x7553A02F, + 0x3A1236E8, 0x230907A9, 0x0824546A, 0x113F652B, + 0x96A779E4, 0x8FBC48A5, 0xA4911B66, 0xBD8A2A27, + 0xF2CBBCE0, 0xEBD08DA1, 0xC0FDDE62, 0xD9E6EF23, + 0x14BCE1BD, 0x0DA7D0FC, 0x268A833F, 0x3F91B27E, + 0x70D024B9, 0x69CB15F8, 0x42E6463B, 0x5BFD777A, + 0xDC656BB5, 0xC57E5AF4, 0xEE530937, 0xF7483876, + 0xB809AEB1, 0xA1129FF0, 0x8A3FCC33, 0x9324FD72 + }, { + 0x00000000, 0x01C26A37, 0x0384D46E, 0x0246BE59, + 0x0709A8DC, 0x06CBC2EB, 0x048D7CB2, 0x054F1685, + 0x0E1351B8, 0x0FD13B8F, 0x0D9785D6, 0x0C55EFE1, + 0x091AF964, 0x08D89353, 0x0A9E2D0A, 0x0B5C473D, + 0x1C26A370, 0x1DE4C947, 0x1FA2771E, 0x1E601D29, + 0x1B2F0BAC, 0x1AED619B, 0x18ABDFC2, 0x1969B5F5, + 0x1235F2C8, 0x13F798FF, 0x11B126A6, 0x10734C91, + 0x153C5A14, 0x14FE3023, 0x16B88E7A, 0x177AE44D, + 0x384D46E0, 0x398F2CD7, 0x3BC9928E, 0x3A0BF8B9, + 0x3F44EE3C, 0x3E86840B, 0x3CC03A52, 0x3D025065, + 0x365E1758, 0x379C7D6F, 0x35DAC336, 0x3418A901, + 0x3157BF84, 0x3095D5B3, 0x32D36BEA, 0x331101DD, + 0x246BE590, 0x25A98FA7, 0x27EF31FE, 0x262D5BC9, + 0x23624D4C, 0x22A0277B, 0x20E69922, 0x2124F315, + 0x2A78B428, 0x2BBADE1F, 0x29FC6046, 0x283E0A71, + 0x2D711CF4, 0x2CB376C3, 0x2EF5C89A, 0x2F37A2AD, + 0x709A8DC0, 0x7158E7F7, 0x731E59AE, 0x72DC3399, + 0x7793251C, 0x76514F2B, 0x7417F172, 0x75D59B45, + 0x7E89DC78, 0x7F4BB64F, 0x7D0D0816, 0x7CCF6221, + 0x798074A4, 0x78421E93, 0x7A04A0CA, 0x7BC6CAFD, + 0x6CBC2EB0, 0x6D7E4487, 0x6F38FADE, 0x6EFA90E9, + 0x6BB5866C, 0x6A77EC5B, 0x68315202, 0x69F33835, + 0x62AF7F08, 0x636D153F, 0x612BAB66, 0x60E9C151, + 0x65A6D7D4, 0x6464BDE3, 0x662203BA, 0x67E0698D, + 0x48D7CB20, 0x4915A117, 0x4B531F4E, 0x4A917579, + 0x4FDE63FC, 0x4E1C09CB, 0x4C5AB792, 0x4D98DDA5, + 0x46C49A98, 0x4706F0AF, 0x45404EF6, 0x448224C1, + 0x41CD3244, 0x400F5873, 0x4249E62A, 0x438B8C1D, + 0x54F16850, 0x55330267, 0x5775BC3E, 0x56B7D609, + 0x53F8C08C, 0x523AAABB, 0x507C14E2, 0x51BE7ED5, + 0x5AE239E8, 0x5B2053DF, 0x5966ED86, 0x58A487B1, + 0x5DEB9134, 0x5C29FB03, 0x5E6F455A, 0x5FAD2F6D, + 0xE1351B80, 0xE0F771B7, 0xE2B1CFEE, 0xE373A5D9, + 0xE63CB35C, 0xE7FED96B, 0xE5B86732, 0xE47A0D05, + 0xEF264A38, 0xEEE4200F, 0xECA29E56, 0xED60F461, + 0xE82FE2E4, 0xE9ED88D3, 0xEBAB368A, 0xEA695CBD, + 0xFD13B8F0, 0xFCD1D2C7, 0xFE976C9E, 0xFF5506A9, + 0xFA1A102C, 0xFBD87A1B, 0xF99EC442, 0xF85CAE75, + 0xF300E948, 0xF2C2837F, 0xF0843D26, 0xF1465711, + 0xF4094194, 0xF5CB2BA3, 0xF78D95FA, 0xF64FFFCD, + 0xD9785D60, 0xD8BA3757, 0xDAFC890E, 0xDB3EE339, + 0xDE71F5BC, 0xDFB39F8B, 0xDDF521D2, 0xDC374BE5, + 0xD76B0CD8, 0xD6A966EF, 0xD4EFD8B6, 0xD52DB281, + 0xD062A404, 0xD1A0CE33, 0xD3E6706A, 0xD2241A5D, + 0xC55EFE10, 0xC49C9427, 0xC6DA2A7E, 0xC7184049, + 0xC25756CC, 0xC3953CFB, 0xC1D382A2, 0xC011E895, + 0xCB4DAFA8, 0xCA8FC59F, 0xC8C97BC6, 0xC90B11F1, + 0xCC440774, 0xCD866D43, 0xCFC0D31A, 0xCE02B92D, + 0x91AF9640, 0x906DFC77, 0x922B422E, 0x93E92819, + 0x96A63E9C, 0x976454AB, 0x9522EAF2, 0x94E080C5, + 0x9FBCC7F8, 0x9E7EADCF, 0x9C381396, 0x9DFA79A1, + 0x98B56F24, 0x99770513, 0x9B31BB4A, 0x9AF3D17D, + 0x8D893530, 0x8C4B5F07, 0x8E0DE15E, 0x8FCF8B69, + 0x8A809DEC, 0x8B42F7DB, 0x89044982, 0x88C623B5, + 0x839A6488, 0x82580EBF, 0x801EB0E6, 0x81DCDAD1, + 0x8493CC54, 0x8551A663, 0x8717183A, 0x86D5720D, + 0xA9E2D0A0, 0xA820BA97, 0xAA6604CE, 0xABA46EF9, + 0xAEEB787C, 0xAF29124B, 0xAD6FAC12, 0xACADC625, + 0xA7F18118, 0xA633EB2F, 0xA4755576, 0xA5B73F41, + 0xA0F829C4, 0xA13A43F3, 0xA37CFDAA, 0xA2BE979D, + 0xB5C473D0, 0xB40619E7, 0xB640A7BE, 0xB782CD89, + 0xB2CDDB0C, 0xB30FB13B, 0xB1490F62, 0xB08B6555, + 0xBBD72268, 0xBA15485F, 0xB853F606, 0xB9919C31, + 0xBCDE8AB4, 0xBD1CE083, 0xBF5A5EDA, 0xBE9834ED + }, { + 0x00000000, 0xB8BC6765, 0xAA09C88B, 0x12B5AFEE, + 0x8F629757, 0x37DEF032, 0x256B5FDC, 0x9DD738B9, + 0xC5B428EF, 0x7D084F8A, 0x6FBDE064, 0xD7018701, + 0x4AD6BFB8, 0xF26AD8DD, 0xE0DF7733, 0x58631056, + 0x5019579F, 0xE8A530FA, 0xFA109F14, 0x42ACF871, + 0xDF7BC0C8, 0x67C7A7AD, 0x75720843, 0xCDCE6F26, + 0x95AD7F70, 0x2D111815, 0x3FA4B7FB, 0x8718D09E, + 0x1ACFE827, 0xA2738F42, 0xB0C620AC, 0x087A47C9, + 0xA032AF3E, 0x188EC85B, 0x0A3B67B5, 0xB28700D0, + 0x2F503869, 0x97EC5F0C, 0x8559F0E2, 0x3DE59787, + 0x658687D1, 0xDD3AE0B4, 0xCF8F4F5A, 0x7733283F, + 0xEAE41086, 0x525877E3, 0x40EDD80D, 0xF851BF68, + 0xF02BF8A1, 0x48979FC4, 0x5A22302A, 0xE29E574F, + 0x7F496FF6, 0xC7F50893, 0xD540A77D, 0x6DFCC018, + 0x359FD04E, 0x8D23B72B, 0x9F9618C5, 0x272A7FA0, + 0xBAFD4719, 0x0241207C, 0x10F48F92, 0xA848E8F7, + 0x9B14583D, 0x23A83F58, 0x311D90B6, 0x89A1F7D3, + 0x1476CF6A, 0xACCAA80F, 0xBE7F07E1, 0x06C36084, + 0x5EA070D2, 0xE61C17B7, 0xF4A9B859, 0x4C15DF3C, + 0xD1C2E785, 0x697E80E0, 0x7BCB2F0E, 0xC377486B, + 0xCB0D0FA2, 0x73B168C7, 0x6104C729, 0xD9B8A04C, + 0x446F98F5, 0xFCD3FF90, 0xEE66507E, 0x56DA371B, + 0x0EB9274D, 0xB6054028, 0xA4B0EFC6, 0x1C0C88A3, + 0x81DBB01A, 0x3967D77F, 0x2BD27891, 0x936E1FF4, + 0x3B26F703, 0x839A9066, 0x912F3F88, 0x299358ED, + 0xB4446054, 0x0CF80731, 0x1E4DA8DF, 0xA6F1CFBA, + 0xFE92DFEC, 0x462EB889, 0x549B1767, 0xEC277002, + 0x71F048BB, 0xC94C2FDE, 0xDBF98030, 0x6345E755, + 0x6B3FA09C, 0xD383C7F9, 0xC1366817, 0x798A0F72, + 0xE45D37CB, 0x5CE150AE, 0x4E54FF40, 0xF6E89825, + 0xAE8B8873, 0x1637EF16, 0x048240F8, 0xBC3E279D, + 0x21E91F24, 0x99557841, 0x8BE0D7AF, 0x335CB0CA, + 0xED59B63B, 0x55E5D15E, 0x47507EB0, 0xFFEC19D5, + 0x623B216C, 0xDA874609, 0xC832E9E7, 0x708E8E82, + 0x28ED9ED4, 0x9051F9B1, 0x82E4565F, 0x3A58313A, + 0xA78F0983, 0x1F336EE6, 0x0D86C108, 0xB53AA66D, + 0xBD40E1A4, 0x05FC86C1, 0x1749292F, 0xAFF54E4A, + 0x322276F3, 0x8A9E1196, 0x982BBE78, 0x2097D91D, + 0x78F4C94B, 0xC048AE2E, 0xD2FD01C0, 0x6A4166A5, + 0xF7965E1C, 0x4F2A3979, 0x5D9F9697, 0xE523F1F2, + 0x4D6B1905, 0xF5D77E60, 0xE762D18E, 0x5FDEB6EB, + 0xC2098E52, 0x7AB5E937, 0x680046D9, 0xD0BC21BC, + 0x88DF31EA, 0x3063568F, 0x22D6F961, 0x9A6A9E04, + 0x07BDA6BD, 0xBF01C1D8, 0xADB46E36, 0x15080953, + 0x1D724E9A, 0xA5CE29FF, 0xB77B8611, 0x0FC7E174, + 0x9210D9CD, 0x2AACBEA8, 0x38191146, 0x80A57623, + 0xD8C66675, 0x607A0110, 0x72CFAEFE, 0xCA73C99B, + 0x57A4F122, 0xEF189647, 0xFDAD39A9, 0x45115ECC, + 0x764DEE06, 0xCEF18963, 0xDC44268D, 0x64F841E8, + 0xF92F7951, 0x41931E34, 0x5326B1DA, 0xEB9AD6BF, + 0xB3F9C6E9, 0x0B45A18C, 0x19F00E62, 0xA14C6907, + 0x3C9B51BE, 0x842736DB, 0x96929935, 0x2E2EFE50, + 0x2654B999, 0x9EE8DEFC, 0x8C5D7112, 0x34E11677, + 0xA9362ECE, 0x118A49AB, 0x033FE645, 0xBB838120, + 0xE3E09176, 0x5B5CF613, 0x49E959FD, 0xF1553E98, + 0x6C820621, 0xD43E6144, 0xC68BCEAA, 0x7E37A9CF, + 0xD67F4138, 0x6EC3265D, 0x7C7689B3, 0xC4CAEED6, + 0x591DD66F, 0xE1A1B10A, 0xF3141EE4, 0x4BA87981, + 0x13CB69D7, 0xAB770EB2, 0xB9C2A15C, 0x017EC639, + 0x9CA9FE80, 0x241599E5, 0x36A0360B, 0x8E1C516E, + 0x866616A7, 0x3EDA71C2, 0x2C6FDE2C, 0x94D3B949, + 0x090481F0, 0xB1B8E695, 0xA30D497B, 0x1BB12E1E, + 0x43D23E48, 0xFB6E592D, 0xE9DBF6C3, 0x516791A6, + 0xCCB0A91F, 0x740CCE7A, 0x66B96194, 0xDE0506F1 + }, { + 0x00000000, 0x3D6029B0, 0x7AC05360, 0x47A07AD0, + 0xF580A6C0, 0xC8E08F70, 0x8F40F5A0, 0xB220DC10, + 0x30704BC1, 0x0D106271, 0x4AB018A1, 0x77D03111, + 0xC5F0ED01, 0xF890C4B1, 0xBF30BE61, 0x825097D1, + 0x60E09782, 0x5D80BE32, 0x1A20C4E2, 0x2740ED52, + 0x95603142, 0xA80018F2, 0xEFA06222, 0xD2C04B92, + 0x5090DC43, 0x6DF0F5F3, 0x2A508F23, 0x1730A693, + 0xA5107A83, 0x98705333, 0xDFD029E3, 0xE2B00053, + 0xC1C12F04, 0xFCA106B4, 0xBB017C64, 0x866155D4, + 0x344189C4, 0x0921A074, 0x4E81DAA4, 0x73E1F314, + 0xF1B164C5, 0xCCD14D75, 0x8B7137A5, 0xB6111E15, + 0x0431C205, 0x3951EBB5, 0x7EF19165, 0x4391B8D5, + 0xA121B886, 0x9C419136, 0xDBE1EBE6, 0xE681C256, + 0x54A11E46, 0x69C137F6, 0x2E614D26, 0x13016496, + 0x9151F347, 0xAC31DAF7, 0xEB91A027, 0xD6F18997, + 0x64D15587, 0x59B17C37, 0x1E1106E7, 0x23712F57, + 0x58F35849, 0x659371F9, 0x22330B29, 0x1F532299, + 0xAD73FE89, 0x9013D739, 0xD7B3ADE9, 0xEAD38459, + 0x68831388, 0x55E33A38, 0x124340E8, 0x2F236958, + 0x9D03B548, 0xA0639CF8, 0xE7C3E628, 0xDAA3CF98, + 0x3813CFCB, 0x0573E67B, 0x42D39CAB, 0x7FB3B51B, + 0xCD93690B, 0xF0F340BB, 0xB7533A6B, 0x8A3313DB, + 0x0863840A, 0x3503ADBA, 0x72A3D76A, 0x4FC3FEDA, + 0xFDE322CA, 0xC0830B7A, 0x872371AA, 0xBA43581A, + 0x9932774D, 0xA4525EFD, 0xE3F2242D, 0xDE920D9D, + 0x6CB2D18D, 0x51D2F83D, 0x167282ED, 0x2B12AB5D, + 0xA9423C8C, 0x9422153C, 0xD3826FEC, 0xEEE2465C, + 0x5CC29A4C, 0x61A2B3FC, 0x2602C92C, 0x1B62E09C, + 0xF9D2E0CF, 0xC4B2C97F, 0x8312B3AF, 0xBE729A1F, + 0x0C52460F, 0x31326FBF, 0x7692156F, 0x4BF23CDF, + 0xC9A2AB0E, 0xF4C282BE, 0xB362F86E, 0x8E02D1DE, + 0x3C220DCE, 0x0142247E, 0x46E25EAE, 0x7B82771E, + 0xB1E6B092, 0x8C869922, 0xCB26E3F2, 0xF646CA42, + 0x44661652, 0x79063FE2, 0x3EA64532, 0x03C66C82, + 0x8196FB53, 0xBCF6D2E3, 0xFB56A833, 0xC6368183, + 0x74165D93, 0x49767423, 0x0ED60EF3, 0x33B62743, + 0xD1062710, 0xEC660EA0, 0xABC67470, 0x96A65DC0, + 0x248681D0, 0x19E6A860, 0x5E46D2B0, 0x6326FB00, + 0xE1766CD1, 0xDC164561, 0x9BB63FB1, 0xA6D61601, + 0x14F6CA11, 0x2996E3A1, 0x6E369971, 0x5356B0C1, + 0x70279F96, 0x4D47B626, 0x0AE7CCF6, 0x3787E546, + 0x85A73956, 0xB8C710E6, 0xFF676A36, 0xC2074386, + 0x4057D457, 0x7D37FDE7, 0x3A978737, 0x07F7AE87, + 0xB5D77297, 0x88B75B27, 0xCF1721F7, 0xF2770847, + 0x10C70814, 0x2DA721A4, 0x6A075B74, 0x576772C4, + 0xE547AED4, 0xD8278764, 0x9F87FDB4, 0xA2E7D404, + 0x20B743D5, 0x1DD76A65, 0x5A7710B5, 0x67173905, + 0xD537E515, 0xE857CCA5, 0xAFF7B675, 0x92979FC5, + 0xE915E8DB, 0xD475C16B, 0x93D5BBBB, 0xAEB5920B, + 0x1C954E1B, 0x21F567AB, 0x66551D7B, 0x5B3534CB, + 0xD965A31A, 0xE4058AAA, 0xA3A5F07A, 0x9EC5D9CA, + 0x2CE505DA, 0x11852C6A, 0x562556BA, 0x6B457F0A, + 0x89F57F59, 0xB49556E9, 0xF3352C39, 0xCE550589, + 0x7C75D999, 0x4115F029, 0x06B58AF9, 0x3BD5A349, + 0xB9853498, 0x84E51D28, 0xC34567F8, 0xFE254E48, + 0x4C059258, 0x7165BBE8, 0x36C5C138, 0x0BA5E888, + 0x28D4C7DF, 0x15B4EE6F, 0x521494BF, 0x6F74BD0F, + 0xDD54611F, 0xE03448AF, 0xA794327F, 0x9AF41BCF, + 0x18A48C1E, 0x25C4A5AE, 0x6264DF7E, 0x5F04F6CE, + 0xED242ADE, 0xD044036E, 0x97E479BE, 0xAA84500E, + 0x4834505D, 0x755479ED, 0x32F4033D, 0x0F942A8D, + 0xBDB4F69D, 0x80D4DF2D, 0xC774A5FD, 0xFA148C4D, + 0x78441B9C, 0x4524322C, 0x028448FC, 0x3FE4614C, + 0x8DC4BD5C, 0xB0A494EC, 0xF704EE3C, 0xCA64C78C + }, { + 0x00000000, 0xCB5CD3A5, 0x4DC8A10B, 0x869472AE, + 0x9B914216, 0x50CD91B3, 0xD659E31D, 0x1D0530B8, + 0xEC53826D, 0x270F51C8, 0xA19B2366, 0x6AC7F0C3, + 0x77C2C07B, 0xBC9E13DE, 0x3A0A6170, 0xF156B2D5, + 0x03D6029B, 0xC88AD13E, 0x4E1EA390, 0x85427035, + 0x9847408D, 0x531B9328, 0xD58FE186, 0x1ED33223, + 0xEF8580F6, 0x24D95353, 0xA24D21FD, 0x6911F258, + 0x7414C2E0, 0xBF481145, 0x39DC63EB, 0xF280B04E, + 0x07AC0536, 0xCCF0D693, 0x4A64A43D, 0x81387798, + 0x9C3D4720, 0x57619485, 0xD1F5E62B, 0x1AA9358E, + 0xEBFF875B, 0x20A354FE, 0xA6372650, 0x6D6BF5F5, + 0x706EC54D, 0xBB3216E8, 0x3DA66446, 0xF6FAB7E3, + 0x047A07AD, 0xCF26D408, 0x49B2A6A6, 0x82EE7503, + 0x9FEB45BB, 0x54B7961E, 0xD223E4B0, 0x197F3715, + 0xE82985C0, 0x23755665, 0xA5E124CB, 0x6EBDF76E, + 0x73B8C7D6, 0xB8E41473, 0x3E7066DD, 0xF52CB578, + 0x0F580A6C, 0xC404D9C9, 0x4290AB67, 0x89CC78C2, + 0x94C9487A, 0x5F959BDF, 0xD901E971, 0x125D3AD4, + 0xE30B8801, 0x28575BA4, 0xAEC3290A, 0x659FFAAF, + 0x789ACA17, 0xB3C619B2, 0x35526B1C, 0xFE0EB8B9, + 0x0C8E08F7, 0xC7D2DB52, 0x4146A9FC, 0x8A1A7A59, + 0x971F4AE1, 0x5C439944, 0xDAD7EBEA, 0x118B384F, + 0xE0DD8A9A, 0x2B81593F, 0xAD152B91, 0x6649F834, + 0x7B4CC88C, 0xB0101B29, 0x36846987, 0xFDD8BA22, + 0x08F40F5A, 0xC3A8DCFF, 0x453CAE51, 0x8E607DF4, + 0x93654D4C, 0x58399EE9, 0xDEADEC47, 0x15F13FE2, + 0xE4A78D37, 0x2FFB5E92, 0xA96F2C3C, 0x6233FF99, + 0x7F36CF21, 0xB46A1C84, 0x32FE6E2A, 0xF9A2BD8F, + 0x0B220DC1, 0xC07EDE64, 0x46EAACCA, 0x8DB67F6F, + 0x90B34FD7, 0x5BEF9C72, 0xDD7BEEDC, 0x16273D79, + 0xE7718FAC, 0x2C2D5C09, 0xAAB92EA7, 0x61E5FD02, + 0x7CE0CDBA, 0xB7BC1E1F, 0x31286CB1, 0xFA74BF14, + 0x1EB014D8, 0xD5ECC77D, 0x5378B5D3, 0x98246676, + 0x852156CE, 0x4E7D856B, 0xC8E9F7C5, 0x03B52460, + 0xF2E396B5, 0x39BF4510, 0xBF2B37BE, 0x7477E41B, + 0x6972D4A3, 0xA22E0706, 0x24BA75A8, 0xEFE6A60D, + 0x1D661643, 0xD63AC5E6, 0x50AEB748, 0x9BF264ED, + 0x86F75455, 0x4DAB87F0, 0xCB3FF55E, 0x006326FB, + 0xF135942E, 0x3A69478B, 0xBCFD3525, 0x77A1E680, + 0x6AA4D638, 0xA1F8059D, 0x276C7733, 0xEC30A496, + 0x191C11EE, 0xD240C24B, 0x54D4B0E5, 0x9F886340, + 0x828D53F8, 0x49D1805D, 0xCF45F2F3, 0x04192156, + 0xF54F9383, 0x3E134026, 0xB8873288, 0x73DBE12D, + 0x6EDED195, 0xA5820230, 0x2316709E, 0xE84AA33B, + 0x1ACA1375, 0xD196C0D0, 0x5702B27E, 0x9C5E61DB, + 0x815B5163, 0x4A0782C6, 0xCC93F068, 0x07CF23CD, + 0xF6999118, 0x3DC542BD, 0xBB513013, 0x700DE3B6, + 0x6D08D30E, 0xA65400AB, 0x20C07205, 0xEB9CA1A0, + 0x11E81EB4, 0xDAB4CD11, 0x5C20BFBF, 0x977C6C1A, + 0x8A795CA2, 0x41258F07, 0xC7B1FDA9, 0x0CED2E0C, + 0xFDBB9CD9, 0x36E74F7C, 0xB0733DD2, 0x7B2FEE77, + 0x662ADECF, 0xAD760D6A, 0x2BE27FC4, 0xE0BEAC61, + 0x123E1C2F, 0xD962CF8A, 0x5FF6BD24, 0x94AA6E81, + 0x89AF5E39, 0x42F38D9C, 0xC467FF32, 0x0F3B2C97, + 0xFE6D9E42, 0x35314DE7, 0xB3A53F49, 0x78F9ECEC, + 0x65FCDC54, 0xAEA00FF1, 0x28347D5F, 0xE368AEFA, + 0x16441B82, 0xDD18C827, 0x5B8CBA89, 0x90D0692C, + 0x8DD55994, 0x46898A31, 0xC01DF89F, 0x0B412B3A, + 0xFA1799EF, 0x314B4A4A, 0xB7DF38E4, 0x7C83EB41, + 0x6186DBF9, 0xAADA085C, 0x2C4E7AF2, 0xE712A957, + 0x15921919, 0xDECECABC, 0x585AB812, 0x93066BB7, + 0x8E035B0F, 0x455F88AA, 0xC3CBFA04, 0x089729A1, + 0xF9C19B74, 0x329D48D1, 0xB4093A7F, 0x7F55E9DA, + 0x6250D962, 0xA90C0AC7, 0x2F987869, 0xE4C4ABCC + }, { + 0x00000000, 0xA6770BB4, 0x979F1129, 0x31E81A9D, + 0xF44F2413, 0x52382FA7, 0x63D0353A, 0xC5A73E8E, + 0x33EF4E67, 0x959845D3, 0xA4705F4E, 0x020754FA, + 0xC7A06A74, 0x61D761C0, 0x503F7B5D, 0xF64870E9, + 0x67DE9CCE, 0xC1A9977A, 0xF0418DE7, 0x56368653, + 0x9391B8DD, 0x35E6B369, 0x040EA9F4, 0xA279A240, + 0x5431D2A9, 0xF246D91D, 0xC3AEC380, 0x65D9C834, + 0xA07EF6BA, 0x0609FD0E, 0x37E1E793, 0x9196EC27, + 0xCFBD399C, 0x69CA3228, 0x582228B5, 0xFE552301, + 0x3BF21D8F, 0x9D85163B, 0xAC6D0CA6, 0x0A1A0712, + 0xFC5277FB, 0x5A257C4F, 0x6BCD66D2, 0xCDBA6D66, + 0x081D53E8, 0xAE6A585C, 0x9F8242C1, 0x39F54975, + 0xA863A552, 0x0E14AEE6, 0x3FFCB47B, 0x998BBFCF, + 0x5C2C8141, 0xFA5B8AF5, 0xCBB39068, 0x6DC49BDC, + 0x9B8CEB35, 0x3DFBE081, 0x0C13FA1C, 0xAA64F1A8, + 0x6FC3CF26, 0xC9B4C492, 0xF85CDE0F, 0x5E2BD5BB, + 0x440B7579, 0xE27C7ECD, 0xD3946450, 0x75E36FE4, + 0xB044516A, 0x16335ADE, 0x27DB4043, 0x81AC4BF7, + 0x77E43B1E, 0xD19330AA, 0xE07B2A37, 0x460C2183, + 0x83AB1F0D, 0x25DC14B9, 0x14340E24, 0xB2430590, + 0x23D5E9B7, 0x85A2E203, 0xB44AF89E, 0x123DF32A, + 0xD79ACDA4, 0x71EDC610, 0x4005DC8D, 0xE672D739, + 0x103AA7D0, 0xB64DAC64, 0x87A5B6F9, 0x21D2BD4D, + 0xE47583C3, 0x42028877, 0x73EA92EA, 0xD59D995E, + 0x8BB64CE5, 0x2DC14751, 0x1C295DCC, 0xBA5E5678, + 0x7FF968F6, 0xD98E6342, 0xE86679DF, 0x4E11726B, + 0xB8590282, 0x1E2E0936, 0x2FC613AB, 0x89B1181F, + 0x4C162691, 0xEA612D25, 0xDB8937B8, 0x7DFE3C0C, + 0xEC68D02B, 0x4A1FDB9F, 0x7BF7C102, 0xDD80CAB6, + 0x1827F438, 0xBE50FF8C, 0x8FB8E511, 0x29CFEEA5, + 0xDF879E4C, 0x79F095F8, 0x48188F65, 0xEE6F84D1, + 0x2BC8BA5F, 0x8DBFB1EB, 0xBC57AB76, 0x1A20A0C2, + 0x8816EAF2, 0x2E61E146, 0x1F89FBDB, 0xB9FEF06F, + 0x7C59CEE1, 0xDA2EC555, 0xEBC6DFC8, 0x4DB1D47C, + 0xBBF9A495, 0x1D8EAF21, 0x2C66B5BC, 0x8A11BE08, + 0x4FB68086, 0xE9C18B32, 0xD82991AF, 0x7E5E9A1B, + 0xEFC8763C, 0x49BF7D88, 0x78576715, 0xDE206CA1, + 0x1B87522F, 0xBDF0599B, 0x8C184306, 0x2A6F48B2, + 0xDC27385B, 0x7A5033EF, 0x4BB82972, 0xEDCF22C6, + 0x28681C48, 0x8E1F17FC, 0xBFF70D61, 0x198006D5, + 0x47ABD36E, 0xE1DCD8DA, 0xD034C247, 0x7643C9F3, + 0xB3E4F77D, 0x1593FCC9, 0x247BE654, 0x820CEDE0, + 0x74449D09, 0xD23396BD, 0xE3DB8C20, 0x45AC8794, + 0x800BB91A, 0x267CB2AE, 0x1794A833, 0xB1E3A387, + 0x20754FA0, 0x86024414, 0xB7EA5E89, 0x119D553D, + 0xD43A6BB3, 0x724D6007, 0x43A57A9A, 0xE5D2712E, + 0x139A01C7, 0xB5ED0A73, 0x840510EE, 0x22721B5A, + 0xE7D525D4, 0x41A22E60, 0x704A34FD, 0xD63D3F49, + 0xCC1D9F8B, 0x6A6A943F, 0x5B828EA2, 0xFDF58516, + 0x3852BB98, 0x9E25B02C, 0xAFCDAAB1, 0x09BAA105, + 0xFFF2D1EC, 0x5985DA58, 0x686DC0C5, 0xCE1ACB71, + 0x0BBDF5FF, 0xADCAFE4B, 0x9C22E4D6, 0x3A55EF62, + 0xABC30345, 0x0DB408F1, 0x3C5C126C, 0x9A2B19D8, + 0x5F8C2756, 0xF9FB2CE2, 0xC813367F, 0x6E643DCB, + 0x982C4D22, 0x3E5B4696, 0x0FB35C0B, 0xA9C457BF, + 0x6C636931, 0xCA146285, 0xFBFC7818, 0x5D8B73AC, + 0x03A0A617, 0xA5D7ADA3, 0x943FB73E, 0x3248BC8A, + 0xF7EF8204, 0x519889B0, 0x6070932D, 0xC6079899, + 0x304FE870, 0x9638E3C4, 0xA7D0F959, 0x01A7F2ED, + 0xC400CC63, 0x6277C7D7, 0x539FDD4A, 0xF5E8D6FE, + 0x647E3AD9, 0xC209316D, 0xF3E12BF0, 0x55962044, + 0x90311ECA, 0x3646157E, 0x07AE0FE3, 0xA1D90457, + 0x579174BE, 0xF1E67F0A, 0xC00E6597, 0x66796E23, + 0xA3DE50AD, 0x05A95B19, 0x34414184, 0x92364A30 + }, { + 0x00000000, 0xCCAA009E, 0x4225077D, 0x8E8F07E3, + 0x844A0EFA, 0x48E00E64, 0xC66F0987, 0x0AC50919, + 0xD3E51BB5, 0x1F4F1B2B, 0x91C01CC8, 0x5D6A1C56, + 0x57AF154F, 0x9B0515D1, 0x158A1232, 0xD92012AC, + 0x7CBB312B, 0xB01131B5, 0x3E9E3656, 0xF23436C8, + 0xF8F13FD1, 0x345B3F4F, 0xBAD438AC, 0x767E3832, + 0xAF5E2A9E, 0x63F42A00, 0xED7B2DE3, 0x21D12D7D, + 0x2B142464, 0xE7BE24FA, 0x69312319, 0xA59B2387, + 0xF9766256, 0x35DC62C8, 0xBB53652B, 0x77F965B5, + 0x7D3C6CAC, 0xB1966C32, 0x3F196BD1, 0xF3B36B4F, + 0x2A9379E3, 0xE639797D, 0x68B67E9E, 0xA41C7E00, + 0xAED97719, 0x62737787, 0xECFC7064, 0x205670FA, + 0x85CD537D, 0x496753E3, 0xC7E85400, 0x0B42549E, + 0x01875D87, 0xCD2D5D19, 0x43A25AFA, 0x8F085A64, + 0x562848C8, 0x9A824856, 0x140D4FB5, 0xD8A74F2B, + 0xD2624632, 0x1EC846AC, 0x9047414F, 0x5CED41D1, + 0x299DC2ED, 0xE537C273, 0x6BB8C590, 0xA712C50E, + 0xADD7CC17, 0x617DCC89, 0xEFF2CB6A, 0x2358CBF4, + 0xFA78D958, 0x36D2D9C6, 0xB85DDE25, 0x74F7DEBB, + 0x7E32D7A2, 0xB298D73C, 0x3C17D0DF, 0xF0BDD041, + 0x5526F3C6, 0x998CF358, 0x1703F4BB, 0xDBA9F425, + 0xD16CFD3C, 0x1DC6FDA2, 0x9349FA41, 0x5FE3FADF, + 0x86C3E873, 0x4A69E8ED, 0xC4E6EF0E, 0x084CEF90, + 0x0289E689, 0xCE23E617, 0x40ACE1F4, 0x8C06E16A, + 0xD0EBA0BB, 0x1C41A025, 0x92CEA7C6, 0x5E64A758, + 0x54A1AE41, 0x980BAEDF, 0x1684A93C, 0xDA2EA9A2, + 0x030EBB0E, 0xCFA4BB90, 0x412BBC73, 0x8D81BCED, + 0x8744B5F4, 0x4BEEB56A, 0xC561B289, 0x09CBB217, + 0xAC509190, 0x60FA910E, 0xEE7596ED, 0x22DF9673, + 0x281A9F6A, 0xE4B09FF4, 0x6A3F9817, 0xA6959889, + 0x7FB58A25, 0xB31F8ABB, 0x3D908D58, 0xF13A8DC6, + 0xFBFF84DF, 0x37558441, 0xB9DA83A2, 0x7570833C, + 0x533B85DA, 0x9F918544, 0x111E82A7, 0xDDB48239, + 0xD7718B20, 0x1BDB8BBE, 0x95548C5D, 0x59FE8CC3, + 0x80DE9E6F, 0x4C749EF1, 0xC2FB9912, 0x0E51998C, + 0x04949095, 0xC83E900B, 0x46B197E8, 0x8A1B9776, + 0x2F80B4F1, 0xE32AB46F, 0x6DA5B38C, 0xA10FB312, + 0xABCABA0B, 0x6760BA95, 0xE9EFBD76, 0x2545BDE8, + 0xFC65AF44, 0x30CFAFDA, 0xBE40A839, 0x72EAA8A7, + 0x782FA1BE, 0xB485A120, 0x3A0AA6C3, 0xF6A0A65D, + 0xAA4DE78C, 0x66E7E712, 0xE868E0F1, 0x24C2E06F, + 0x2E07E976, 0xE2ADE9E8, 0x6C22EE0B, 0xA088EE95, + 0x79A8FC39, 0xB502FCA7, 0x3B8DFB44, 0xF727FBDA, + 0xFDE2F2C3, 0x3148F25D, 0xBFC7F5BE, 0x736DF520, + 0xD6F6D6A7, 0x1A5CD639, 0x94D3D1DA, 0x5879D144, + 0x52BCD85D, 0x9E16D8C3, 0x1099DF20, 0xDC33DFBE, + 0x0513CD12, 0xC9B9CD8C, 0x4736CA6F, 0x8B9CCAF1, + 0x8159C3E8, 0x4DF3C376, 0xC37CC495, 0x0FD6C40B, + 0x7AA64737, 0xB60C47A9, 0x3883404A, 0xF42940D4, + 0xFEEC49CD, 0x32464953, 0xBCC94EB0, 0x70634E2E, + 0xA9435C82, 0x65E95C1C, 0xEB665BFF, 0x27CC5B61, + 0x2D095278, 0xE1A352E6, 0x6F2C5505, 0xA386559B, + 0x061D761C, 0xCAB77682, 0x44387161, 0x889271FF, + 0x825778E6, 0x4EFD7878, 0xC0727F9B, 0x0CD87F05, + 0xD5F86DA9, 0x19526D37, 0x97DD6AD4, 0x5B776A4A, + 0x51B26353, 0x9D1863CD, 0x1397642E, 0xDF3D64B0, + 0x83D02561, 0x4F7A25FF, 0xC1F5221C, 0x0D5F2282, + 0x079A2B9B, 0xCB302B05, 0x45BF2CE6, 0x89152C78, + 0x50353ED4, 0x9C9F3E4A, 0x121039A9, 0xDEBA3937, + 0xD47F302E, 0x18D530B0, 0x965A3753, 0x5AF037CD, + 0xFF6B144A, 0x33C114D4, 0xBD4E1337, 0x71E413A9, + 0x7B211AB0, 0xB78B1A2E, 0x39041DCD, 0xF5AE1D53, + 0x2C8E0FFF, 0xE0240F61, 0x6EAB0882, 0xA201081C, + 0xA8C40105, 0x646E019B, 0xEAE10678, 0x264B06E6 + } +}; diff --git a/src/liblzma/check/crc32_tablegen.c b/src/liblzma/check/crc32_tablegen.c new file mode 100644 index 000000000000..31a4d2751db2 --- /dev/null +++ b/src/liblzma/check/crc32_tablegen.c @@ -0,0 +1,117 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file crc32_tablegen.c +/// \brief Generate crc32_table_le.h and crc32_table_be.h +/// +/// Compiling: gcc -std=c99 -o crc32_tablegen crc32_tablegen.c +/// Add -DWORDS_BIGENDIAN to generate big endian table. +/// Add -DLZ_HASH_TABLE to generate lz_encoder_hash_table.h (little endian). +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include +#include "../../common/tuklib_integer.h" + + +static uint32_t crc32_table[8][256]; + + +static void +init_crc32_table(void) +{ + static const uint32_t poly32 = UINT32_C(0xEDB88320); + + for (size_t s = 0; s < 8; ++s) { + for (size_t b = 0; b < 256; ++b) { + uint32_t r = s == 0 ? b : crc32_table[s - 1][b]; + + for (size_t i = 0; i < 8; ++i) { + if (r & 1) + r = (r >> 1) ^ poly32; + else + r >>= 1; + } + + crc32_table[s][b] = r; + } + } + +#ifdef WORDS_BIGENDIAN + for (size_t s = 0; s < 8; ++s) + for (size_t b = 0; b < 256; ++b) + crc32_table[s][b] = bswap32(crc32_table[s][b]); +#endif + + return; +} + + +static void +print_crc32_table(void) +{ + printf("/* This file has been automatically generated by " + "crc32_tablegen.c. */\n\n" + "const uint32_t lzma_crc32_table[8][256] = {\n\t{"); + + for (size_t s = 0; s < 8; ++s) { + for (size_t b = 0; b < 256; ++b) { + if ((b % 4) == 0) + printf("\n\t\t"); + + printf("0x%08" PRIX32, crc32_table[s][b]); + + if (b != 255) + printf(",%s", (b+1) % 4 == 0 ? "" : " "); + } + + if (s == 7) + printf("\n\t}\n};\n"); + else + printf("\n\t}, {"); + } + + return; +} + + +static void +print_lz_table(void) +{ + printf("/* This file has been automatically generated by " + "crc32_tablegen.c. */\n\n" + "const uint32_t lzma_lz_hash_table[256] = {"); + + for (size_t b = 0; b < 256; ++b) { + if ((b % 4) == 0) + printf("\n\t"); + + printf("0x%08" PRIX32, crc32_table[0][b]); + + if (b != 255) + printf(",%s", (b+1) % 4 == 0 ? "" : " "); + } + + printf("\n};\n"); + + return; +} + + +int +main(void) +{ + init_crc32_table(); + +#ifdef LZ_HASH_TABLE + print_lz_table(); +#else + print_crc32_table(); +#endif + + return 0; +} diff --git a/src/liblzma/check/crc32_x86.S b/src/liblzma/check/crc32_x86.S new file mode 100644 index 000000000000..67f68a4145f8 --- /dev/null +++ b/src/liblzma/check/crc32_x86.S @@ -0,0 +1,304 @@ +/* + * Speed-optimized CRC32 using slicing-by-eight algorithm + * + * This uses only i386 instructions, but it is optimized for i686 and later + * (including e.g. Pentium II/III/IV, Athlon XP, and Core 2). For i586 + * (e.g. Pentium), slicing-by-four would be better, and even the C version + * of slicing-by-eight built with gcc -march=i586 tends to be a little bit + * better than this. Very few probably run this code on i586 or older x86 + * so this shouldn't be a problem in practice. + * + * Authors: Igor Pavlov (original version) + * Lasse Collin (AT&T syntax, PIC support, better portability) + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * This code needs lzma_crc32_table, which can be created using the + * following C code: + +uint32_t lzma_crc32_table[8][256]; + +void +init_table(void) +{ + // IEEE-802.3 + static const uint32_t poly32 = UINT32_C(0xEDB88320); + + // Castagnoli + // static const uint32_t poly32 = UINT32_C(0x82F63B78); + + // Koopman + // static const uint32_t poly32 = UINT32_C(0xEB31D82E); + + for (size_t s = 0; s < 8; ++s) { + for (size_t b = 0; b < 256; ++b) { + uint32_t r = s == 0 ? b : lzma_crc32_table[s - 1][b]; + + for (size_t i = 0; i < 8; ++i) { + if (r & 1) + r = (r >> 1) ^ poly32; + else + r >>= 1; + } + + lzma_crc32_table[s][b] = r; + } + } +} + + * The prototype of the CRC32 function: + * extern uint32_t lzma_crc32(const uint8_t *buf, size_t size, uint32_t crc); + */ + +/* + * On some systems, the functions need to be prefixed. The prefix is + * usually an underscore. + */ +#ifndef __USER_LABEL_PREFIX__ +# define __USER_LABEL_PREFIX__ +#endif +#define MAKE_SYM_CAT(prefix, sym) prefix ## sym +#define MAKE_SYM(prefix, sym) MAKE_SYM_CAT(prefix, sym) +#define LZMA_CRC32 MAKE_SYM(__USER_LABEL_PREFIX__, lzma_crc32) +#define LZMA_CRC32_TABLE MAKE_SYM(__USER_LABEL_PREFIX__, lzma_crc32_table) + +/* + * Solaris assembler doesn't have .p2align, and Darwin uses .align + * differently than GNU/Linux and Solaris. + */ +#if defined(__APPLE__) || defined(__MSDOS__) +# define ALIGN(pow2, abs) .align pow2 +#else +# define ALIGN(pow2, abs) .align abs +#endif + + .text + .globl LZMA_CRC32 + +#if !defined(__APPLE__) && !defined(_WIN32) && !defined(__CYGWIN__) \ + && !defined(__MSDOS__) + .type LZMA_CRC32, @function +#endif + + ALIGN(4, 16) +LZMA_CRC32: + /* + * Register usage: + * %eax crc + * %esi buf + * %edi size or buf + size + * %ebx lzma_crc32_table + * %ebp Table index + * %ecx Temporary + * %edx Temporary + */ + pushl %ebx + pushl %esi + pushl %edi + pushl %ebp + movl 0x14(%esp), %esi /* buf */ + movl 0x18(%esp), %edi /* size */ + movl 0x1C(%esp), %eax /* crc */ + + /* + * Store the address of lzma_crc32_table to %ebx. This is needed to + * get position-independent code (PIC). + * + * The PIC macro is defined by libtool, while __PIC__ is defined + * by GCC but only on some systems. Testing for both makes it simpler + * to test this code without libtool, and keeps the code working also + * when built with libtool but using something else than GCC. + * + * I understood that libtool may define PIC on Windows even though + * the code in Windows DLLs is not PIC in sense that it is in ELF + * binaries, so we need a separate check to always use the non-PIC + * code on Windows. + */ +#if (!defined(PIC) && !defined(__PIC__)) \ + || (defined(_WIN32) || defined(__CYGWIN__)) + /* Not PIC */ + movl $ LZMA_CRC32_TABLE, %ebx +#elif defined(__APPLE__) + /* Mach-O */ + call .L_get_pc +.L_pic: + leal .L_lzma_crc32_table$non_lazy_ptr-.L_pic(%ebx), %ebx + movl (%ebx), %ebx +#else + /* ELF */ + call .L_get_pc + addl $_GLOBAL_OFFSET_TABLE_, %ebx + movl LZMA_CRC32_TABLE@GOT(%ebx), %ebx +#endif + + /* Complement the initial value. */ + notl %eax + + ALIGN(4, 16) +.L_align: + /* + * Check if there is enough input to use slicing-by-eight. + * We need 16 bytes, because the loop pre-reads eight bytes. + */ + cmpl $16, %edi + jb .L_rest + + /* Check if we have reached alignment of eight bytes. */ + testl $7, %esi + jz .L_slice + + /* Calculate CRC of the next input byte. */ + movzbl (%esi), %ebp + incl %esi + movzbl %al, %ecx + xorl %ecx, %ebp + shrl $8, %eax + xorl (%ebx, %ebp, 4), %eax + decl %edi + jmp .L_align + + ALIGN(2, 4) +.L_slice: + /* + * If we get here, there's at least 16 bytes of aligned input + * available. Make %edi multiple of eight bytes. Store the possible + * remainder over the "size" variable in the argument stack. + */ + movl %edi, 0x18(%esp) + andl $-8, %edi + subl %edi, 0x18(%esp) + + /* + * Let %edi be buf + size - 8 while running the main loop. This way + * we can compare for equality to determine when exit the loop. + */ + addl %esi, %edi + subl $8, %edi + + /* Read in the first eight aligned bytes. */ + xorl (%esi), %eax + movl 4(%esi), %ecx + movzbl %cl, %ebp + +.L_loop: + movl 0x0C00(%ebx, %ebp, 4), %edx + movzbl %ch, %ebp + xorl 0x0800(%ebx, %ebp, 4), %edx + shrl $16, %ecx + xorl 8(%esi), %edx + movzbl %cl, %ebp + xorl 0x0400(%ebx, %ebp, 4), %edx + movzbl %ch, %ebp + xorl (%ebx, %ebp, 4), %edx + movzbl %al, %ebp + + /* + * Read the next four bytes, for which the CRC is calculated + * on the next interation of the loop. + */ + movl 12(%esi), %ecx + + xorl 0x1C00(%ebx, %ebp, 4), %edx + movzbl %ah, %ebp + shrl $16, %eax + xorl 0x1800(%ebx, %ebp, 4), %edx + movzbl %ah, %ebp + movzbl %al, %eax + movl 0x1400(%ebx, %eax, 4), %eax + addl $8, %esi + xorl %edx, %eax + xorl 0x1000(%ebx, %ebp, 4), %eax + + /* Check for end of aligned input. */ + cmpl %edi, %esi + movzbl %cl, %ebp + jne .L_loop + + /* + * Process the remaining eight bytes, which we have already + * copied to %ecx and %edx. + */ + movl 0x0C00(%ebx, %ebp, 4), %edx + movzbl %ch, %ebp + xorl 0x0800(%ebx, %ebp, 4), %edx + shrl $16, %ecx + movzbl %cl, %ebp + xorl 0x0400(%ebx, %ebp, 4), %edx + movzbl %ch, %ebp + xorl (%ebx, %ebp, 4), %edx + movzbl %al, %ebp + + xorl 0x1C00(%ebx, %ebp, 4), %edx + movzbl %ah, %ebp + shrl $16, %eax + xorl 0x1800(%ebx, %ebp, 4), %edx + movzbl %ah, %ebp + movzbl %al, %eax + movl 0x1400(%ebx, %eax, 4), %eax + addl $8, %esi + xorl %edx, %eax + xorl 0x1000(%ebx, %ebp, 4), %eax + + /* Copy the number of remaining bytes to %edi. */ + movl 0x18(%esp), %edi + +.L_rest: + /* Check for end of input. */ + testl %edi, %edi + jz .L_return + + /* Calculate CRC of the next input byte. */ + movzbl (%esi), %ebp + incl %esi + movzbl %al, %ecx + xorl %ecx, %ebp + shrl $8, %eax + xorl (%ebx, %ebp, 4), %eax + decl %edi + jmp .L_rest + +.L_return: + /* Complement the final value. */ + notl %eax + + popl %ebp + popl %edi + popl %esi + popl %ebx + ret + +#if defined(PIC) || defined(__PIC__) + ALIGN(4, 16) +.L_get_pc: + movl (%esp), %ebx + ret +#endif + +#if defined(__APPLE__) && (defined(PIC) || defined(__PIC__)) + /* Mach-O PIC */ + .section __IMPORT,__pointers,non_lazy_symbol_pointers +.L_lzma_crc32_table$non_lazy_ptr: + .indirect_symbol LZMA_CRC32_TABLE + .long 0 + +#elif defined(_WIN32) || defined(__CYGWIN__) +# ifdef DLL_EXPORT + /* This is equivalent of __declspec(dllexport). */ + .section .drectve + .ascii " -export:lzma_crc32" +# endif + +#elif !defined(__MSDOS__) + /* ELF */ + .size LZMA_CRC32, .-LZMA_CRC32 +#endif + +/* + * This is needed to support non-executable stack. It's ugly to + * use __linux__ here, but I don't know a way to detect when + * we are using GNU assembler. + */ +#if defined(__ELF__) && defined(__linux__) + .section .note.GNU-stack,"",@progbits +#endif diff --git a/src/liblzma/check/crc64_fast.c b/src/liblzma/check/crc64_fast.c new file mode 100644 index 000000000000..52af29ed48d8 --- /dev/null +++ b/src/liblzma/check/crc64_fast.c @@ -0,0 +1,72 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file crc64.c +/// \brief CRC64 calculation +/// +/// Calculate the CRC64 using the slice-by-four algorithm. This is the same +/// idea that is used in crc32_fast.c, but for CRC64 we use only four tables +/// instead of eight to avoid increasing CPU cache usage. +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "check.h" +#include "crc_macros.h" + + +#ifdef WORDS_BIGENDIAN +# define A1(x) ((x) >> 56) +#else +# define A1 A +#endif + + +// See the comments in crc32_fast.c. They aren't duplicated here. +extern LZMA_API(uint64_t) +lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc) +{ + crc = ~crc; + +#ifdef WORDS_BIGENDIAN + crc = bswap64(crc); +#endif + + if (size > 4) { + while ((uintptr_t)(buf) & 3) { + crc = lzma_crc64_table[0][*buf++ ^ A1(crc)] ^ S8(crc); + --size; + } + + const uint8_t *const limit = buf + (size & ~(size_t)(3)); + size &= (size_t)(3); + + while (buf < limit) { +#ifdef WORDS_BIGENDIAN + const uint32_t tmp = (crc >> 32) + ^ *(const uint32_t *)(buf); +#else + const uint32_t tmp = crc ^ *(const uint32_t *)(buf); +#endif + buf += 4; + + crc = lzma_crc64_table[3][A(tmp)] + ^ lzma_crc64_table[2][B(tmp)] + ^ S32(crc) + ^ lzma_crc64_table[1][C(tmp)] + ^ lzma_crc64_table[0][D(tmp)]; + } + } + + while (size-- != 0) + crc = lzma_crc64_table[0][*buf++ ^ A1(crc)] ^ S8(crc); + +#ifdef WORDS_BIGENDIAN + crc = bswap64(crc); +#endif + + return ~crc; +} diff --git a/src/liblzma/check/crc64_small.c b/src/liblzma/check/crc64_small.c new file mode 100644 index 000000000000..55d72316bce7 --- /dev/null +++ b/src/liblzma/check/crc64_small.c @@ -0,0 +1,53 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file crc64_small.c +/// \brief CRC64 calculation (size-optimized) +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "check.h" + + +static uint64_t crc64_table[256]; + + +static void +crc64_init(void) +{ + static const uint64_t poly64 = UINT64_C(0xC96C5795D7870F42); + + for (size_t b = 0; b < 256; ++b) { + uint64_t r = b; + for (size_t i = 0; i < 8; ++i) { + if (r & 1) + r = (r >> 1) ^ poly64; + else + r >>= 1; + } + + crc64_table[b] = r; + } + + return; +} + + +extern LZMA_API(uint64_t) +lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc) +{ + mythread_once(crc64_init); + + crc = ~crc; + + while (size != 0) { + crc = crc64_table[*buf++ ^ (crc & 0xFF)] ^ (crc >> 8); + --size; + } + + return ~crc; +} diff --git a/src/liblzma/check/crc64_table.c b/src/liblzma/check/crc64_table.c new file mode 100644 index 000000000000..1fbcd94703c7 --- /dev/null +++ b/src/liblzma/check/crc64_table.c @@ -0,0 +1,19 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file crc64_table.c +/// \brief Precalculated CRC64 table with correct endianness +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + +#ifdef WORDS_BIGENDIAN +# include "crc64_table_be.h" +#else +# include "crc64_table_le.h" +#endif diff --git a/src/liblzma/check/crc64_table_be.h b/src/liblzma/check/crc64_table_be.h new file mode 100644 index 000000000000..ea074f397a70 --- /dev/null +++ b/src/liblzma/check/crc64_table_be.h @@ -0,0 +1,521 @@ +/* This file has been automatically generated by crc64_tablegen.c. */ + +const uint64_t lzma_crc64_table[4][256] = { + { + UINT64_C(0x0000000000000000), UINT64_C(0x6F5FA703BE4C2EB3), + UINT64_C(0x5BA040A8573684F4), UINT64_C(0x34FFE7ABE97AAA47), + UINT64_C(0x335E8FFF84C3D07B), UINT64_C(0x5C0128FC3A8FFEC8), + UINT64_C(0x68FECF57D3F5548F), UINT64_C(0x07A168546DB97A3C), + UINT64_C(0x66BC1EFF0987A1F7), UINT64_C(0x09E3B9FCB7CB8F44), + UINT64_C(0x3D1C5E575EB12503), UINT64_C(0x5243F954E0FD0BB0), + UINT64_C(0x55E291008D44718C), UINT64_C(0x3ABD360333085F3F), + UINT64_C(0x0E42D1A8DA72F578), UINT64_C(0x611D76AB643EDBCB), + UINT64_C(0x4966335138A19B7D), UINT64_C(0x2639945286EDB5CE), + UINT64_C(0x12C673F96F971F89), UINT64_C(0x7D99D4FAD1DB313A), + UINT64_C(0x7A38BCAEBC624B06), UINT64_C(0x15671BAD022E65B5), + UINT64_C(0x2198FC06EB54CFF2), UINT64_C(0x4EC75B055518E141), + UINT64_C(0x2FDA2DAE31263A8A), UINT64_C(0x40858AAD8F6A1439), + UINT64_C(0x747A6D066610BE7E), UINT64_C(0x1B25CA05D85C90CD), + UINT64_C(0x1C84A251B5E5EAF1), UINT64_C(0x73DB05520BA9C442), + UINT64_C(0x4724E2F9E2D36E05), UINT64_C(0x287B45FA5C9F40B6), + UINT64_C(0x92CC66A2704237FB), UINT64_C(0xFD93C1A1CE0E1948), + UINT64_C(0xC96C260A2774B30F), UINT64_C(0xA633810999389DBC), + UINT64_C(0xA192E95DF481E780), UINT64_C(0xCECD4E5E4ACDC933), + UINT64_C(0xFA32A9F5A3B76374), UINT64_C(0x956D0EF61DFB4DC7), + UINT64_C(0xF470785D79C5960C), UINT64_C(0x9B2FDF5EC789B8BF), + UINT64_C(0xAFD038F52EF312F8), UINT64_C(0xC08F9FF690BF3C4B), + UINT64_C(0xC72EF7A2FD064677), UINT64_C(0xA87150A1434A68C4), + UINT64_C(0x9C8EB70AAA30C283), UINT64_C(0xF3D11009147CEC30), + UINT64_C(0xDBAA55F348E3AC86), UINT64_C(0xB4F5F2F0F6AF8235), + UINT64_C(0x800A155B1FD52872), UINT64_C(0xEF55B258A19906C1), + UINT64_C(0xE8F4DA0CCC207CFD), UINT64_C(0x87AB7D0F726C524E), + UINT64_C(0xB3549AA49B16F809), UINT64_C(0xDC0B3DA7255AD6BA), + UINT64_C(0xBD164B0C41640D71), UINT64_C(0xD249EC0FFF2823C2), + UINT64_C(0xE6B60BA416528985), UINT64_C(0x89E9ACA7A81EA736), + UINT64_C(0x8E48C4F3C5A7DD0A), UINT64_C(0xE11763F07BEBF3B9), + UINT64_C(0xD5E8845B929159FE), UINT64_C(0xBAB723582CDD774D), + UINT64_C(0xA187C3EBCA2BB664), UINT64_C(0xCED864E8746798D7), + UINT64_C(0xFA2783439D1D3290), UINT64_C(0x9578244023511C23), + UINT64_C(0x92D94C144EE8661F), UINT64_C(0xFD86EB17F0A448AC), + UINT64_C(0xC9790CBC19DEE2EB), UINT64_C(0xA626ABBFA792CC58), + UINT64_C(0xC73BDD14C3AC1793), UINT64_C(0xA8647A177DE03920), + UINT64_C(0x9C9B9DBC949A9367), UINT64_C(0xF3C43ABF2AD6BDD4), + UINT64_C(0xF46552EB476FC7E8), UINT64_C(0x9B3AF5E8F923E95B), + UINT64_C(0xAFC512431059431C), UINT64_C(0xC09AB540AE156DAF), + UINT64_C(0xE8E1F0BAF28A2D19), UINT64_C(0x87BE57B94CC603AA), + UINT64_C(0xB341B012A5BCA9ED), UINT64_C(0xDC1E17111BF0875E), + UINT64_C(0xDBBF7F457649FD62), UINT64_C(0xB4E0D846C805D3D1), + UINT64_C(0x801F3FED217F7996), UINT64_C(0xEF4098EE9F335725), + UINT64_C(0x8E5DEE45FB0D8CEE), UINT64_C(0xE10249464541A25D), + UINT64_C(0xD5FDAEEDAC3B081A), UINT64_C(0xBAA209EE127726A9), + UINT64_C(0xBD0361BA7FCE5C95), UINT64_C(0xD25CC6B9C1827226), + UINT64_C(0xE6A3211228F8D861), UINT64_C(0x89FC861196B4F6D2), + UINT64_C(0x334BA549BA69819F), UINT64_C(0x5C14024A0425AF2C), + UINT64_C(0x68EBE5E1ED5F056B), UINT64_C(0x07B442E253132BD8), + UINT64_C(0x00152AB63EAA51E4), UINT64_C(0x6F4A8DB580E67F57), + UINT64_C(0x5BB56A1E699CD510), UINT64_C(0x34EACD1DD7D0FBA3), + UINT64_C(0x55F7BBB6B3EE2068), UINT64_C(0x3AA81CB50DA20EDB), + UINT64_C(0x0E57FB1EE4D8A49C), UINT64_C(0x61085C1D5A948A2F), + UINT64_C(0x66A93449372DF013), UINT64_C(0x09F6934A8961DEA0), + UINT64_C(0x3D0974E1601B74E7), UINT64_C(0x5256D3E2DE575A54), + UINT64_C(0x7A2D961882C81AE2), UINT64_C(0x1572311B3C843451), + UINT64_C(0x218DD6B0D5FE9E16), UINT64_C(0x4ED271B36BB2B0A5), + UINT64_C(0x497319E7060BCA99), UINT64_C(0x262CBEE4B847E42A), + UINT64_C(0x12D3594F513D4E6D), UINT64_C(0x7D8CFE4CEF7160DE), + UINT64_C(0x1C9188E78B4FBB15), UINT64_C(0x73CE2FE4350395A6), + UINT64_C(0x4731C84FDC793FE1), UINT64_C(0x286E6F4C62351152), + UINT64_C(0x2FCF07180F8C6B6E), UINT64_C(0x4090A01BB1C045DD), + UINT64_C(0x746F47B058BAEF9A), UINT64_C(0x1B30E0B3E6F6C129), + UINT64_C(0x420F87D795576CC9), UINT64_C(0x2D5020D42B1B427A), + UINT64_C(0x19AFC77FC261E83D), UINT64_C(0x76F0607C7C2DC68E), + UINT64_C(0x715108281194BCB2), UINT64_C(0x1E0EAF2BAFD89201), + UINT64_C(0x2AF1488046A23846), UINT64_C(0x45AEEF83F8EE16F5), + UINT64_C(0x24B399289CD0CD3E), UINT64_C(0x4BEC3E2B229CE38D), + UINT64_C(0x7F13D980CBE649CA), UINT64_C(0x104C7E8375AA6779), + UINT64_C(0x17ED16D718131D45), UINT64_C(0x78B2B1D4A65F33F6), + UINT64_C(0x4C4D567F4F2599B1), UINT64_C(0x2312F17CF169B702), + UINT64_C(0x0B69B486ADF6F7B4), UINT64_C(0x6436138513BAD907), + UINT64_C(0x50C9F42EFAC07340), UINT64_C(0x3F96532D448C5DF3), + UINT64_C(0x38373B79293527CF), UINT64_C(0x57689C7A9779097C), + UINT64_C(0x63977BD17E03A33B), UINT64_C(0x0CC8DCD2C04F8D88), + UINT64_C(0x6DD5AA79A4715643), UINT64_C(0x028A0D7A1A3D78F0), + UINT64_C(0x3675EAD1F347D2B7), UINT64_C(0x592A4DD24D0BFC04), + UINT64_C(0x5E8B258620B28638), UINT64_C(0x31D482859EFEA88B), + UINT64_C(0x052B652E778402CC), UINT64_C(0x6A74C22DC9C82C7F), + UINT64_C(0xD0C3E175E5155B32), UINT64_C(0xBF9C46765B597581), + UINT64_C(0x8B63A1DDB223DFC6), UINT64_C(0xE43C06DE0C6FF175), + UINT64_C(0xE39D6E8A61D68B49), UINT64_C(0x8CC2C989DF9AA5FA), + UINT64_C(0xB83D2E2236E00FBD), UINT64_C(0xD762892188AC210E), + UINT64_C(0xB67FFF8AEC92FAC5), UINT64_C(0xD920588952DED476), + UINT64_C(0xEDDFBF22BBA47E31), UINT64_C(0x8280182105E85082), + UINT64_C(0x8521707568512ABE), UINT64_C(0xEA7ED776D61D040D), + UINT64_C(0xDE8130DD3F67AE4A), UINT64_C(0xB1DE97DE812B80F9), + UINT64_C(0x99A5D224DDB4C04F), UINT64_C(0xF6FA752763F8EEFC), + UINT64_C(0xC205928C8A8244BB), UINT64_C(0xAD5A358F34CE6A08), + UINT64_C(0xAAFB5DDB59771034), UINT64_C(0xC5A4FAD8E73B3E87), + UINT64_C(0xF15B1D730E4194C0), UINT64_C(0x9E04BA70B00DBA73), + UINT64_C(0xFF19CCDBD43361B8), UINT64_C(0x90466BD86A7F4F0B), + UINT64_C(0xA4B98C738305E54C), UINT64_C(0xCBE62B703D49CBFF), + UINT64_C(0xCC47432450F0B1C3), UINT64_C(0xA318E427EEBC9F70), + UINT64_C(0x97E7038C07C63537), UINT64_C(0xF8B8A48FB98A1B84), + UINT64_C(0xE388443C5F7CDAAD), UINT64_C(0x8CD7E33FE130F41E), + UINT64_C(0xB8280494084A5E59), UINT64_C(0xD777A397B60670EA), + UINT64_C(0xD0D6CBC3DBBF0AD6), UINT64_C(0xBF896CC065F32465), + UINT64_C(0x8B768B6B8C898E22), UINT64_C(0xE4292C6832C5A091), + UINT64_C(0x85345AC356FB7B5A), UINT64_C(0xEA6BFDC0E8B755E9), + UINT64_C(0xDE941A6B01CDFFAE), UINT64_C(0xB1CBBD68BF81D11D), + UINT64_C(0xB66AD53CD238AB21), UINT64_C(0xD935723F6C748592), + UINT64_C(0xEDCA9594850E2FD5), UINT64_C(0x829532973B420166), + UINT64_C(0xAAEE776D67DD41D0), UINT64_C(0xC5B1D06ED9916F63), + UINT64_C(0xF14E37C530EBC524), UINT64_C(0x9E1190C68EA7EB97), + UINT64_C(0x99B0F892E31E91AB), UINT64_C(0xF6EF5F915D52BF18), + UINT64_C(0xC210B83AB428155F), UINT64_C(0xAD4F1F390A643BEC), + UINT64_C(0xCC5269926E5AE027), UINT64_C(0xA30DCE91D016CE94), + UINT64_C(0x97F2293A396C64D3), UINT64_C(0xF8AD8E3987204A60), + UINT64_C(0xFF0CE66DEA99305C), UINT64_C(0x9053416E54D51EEF), + UINT64_C(0xA4ACA6C5BDAFB4A8), UINT64_C(0xCBF301C603E39A1B), + UINT64_C(0x7144229E2F3EED56), UINT64_C(0x1E1B859D9172C3E5), + UINT64_C(0x2AE46236780869A2), UINT64_C(0x45BBC535C6444711), + UINT64_C(0x421AAD61ABFD3D2D), UINT64_C(0x2D450A6215B1139E), + UINT64_C(0x19BAEDC9FCCBB9D9), UINT64_C(0x76E54ACA4287976A), + UINT64_C(0x17F83C6126B94CA1), UINT64_C(0x78A79B6298F56212), + UINT64_C(0x4C587CC9718FC855), UINT64_C(0x2307DBCACFC3E6E6), + UINT64_C(0x24A6B39EA27A9CDA), UINT64_C(0x4BF9149D1C36B269), + UINT64_C(0x7F06F336F54C182E), UINT64_C(0x105954354B00369D), + UINT64_C(0x382211CF179F762B), UINT64_C(0x577DB6CCA9D35898), + UINT64_C(0x6382516740A9F2DF), UINT64_C(0x0CDDF664FEE5DC6C), + UINT64_C(0x0B7C9E30935CA650), UINT64_C(0x642339332D1088E3), + UINT64_C(0x50DCDE98C46A22A4), UINT64_C(0x3F83799B7A260C17), + UINT64_C(0x5E9E0F301E18D7DC), UINT64_C(0x31C1A833A054F96F), + UINT64_C(0x053E4F98492E5328), UINT64_C(0x6A61E89BF7627D9B), + UINT64_C(0x6DC080CF9ADB07A7), UINT64_C(0x029F27CC24972914), + UINT64_C(0x3660C067CDED8353), UINT64_C(0x593F676473A1ADE0) + }, { + UINT64_C(0x0000000000000000), UINT64_C(0x0DF1D05C9279E954), + UINT64_C(0x1AE2A1B924F3D2A9), UINT64_C(0x171371E5B68A3BFD), + UINT64_C(0xB1DA4DDC62497DC1), UINT64_C(0xBC2B9D80F0309495), + UINT64_C(0xAB38EC6546BAAF68), UINT64_C(0xA6C93C39D4C3463C), + UINT64_C(0xE7AB9517EE3D2210), UINT64_C(0xEA5A454B7C44CB44), + UINT64_C(0xFD4934AECACEF0B9), UINT64_C(0xF0B8E4F258B719ED), + UINT64_C(0x5671D8CB8C745FD1), UINT64_C(0x5B8008971E0DB685), + UINT64_C(0x4C937972A8878D78), UINT64_C(0x4162A92E3AFE642C), + UINT64_C(0xCE572B2FDC7B4420), UINT64_C(0xC3A6FB734E02AD74), + UINT64_C(0xD4B58A96F8889689), UINT64_C(0xD9445ACA6AF17FDD), + UINT64_C(0x7F8D66F3BE3239E1), UINT64_C(0x727CB6AF2C4BD0B5), + UINT64_C(0x656FC74A9AC1EB48), UINT64_C(0x689E171608B8021C), + UINT64_C(0x29FCBE3832466630), UINT64_C(0x240D6E64A03F8F64), + UINT64_C(0x331E1F8116B5B499), UINT64_C(0x3EEFCFDD84CC5DCD), + UINT64_C(0x9826F3E4500F1BF1), UINT64_C(0x95D723B8C276F2A5), + UINT64_C(0x82C4525D74FCC958), UINT64_C(0x8F358201E685200C), + UINT64_C(0x9CAF565EB8F78840), UINT64_C(0x915E86022A8E6114), + UINT64_C(0x864DF7E79C045AE9), UINT64_C(0x8BBC27BB0E7DB3BD), + UINT64_C(0x2D751B82DABEF581), UINT64_C(0x2084CBDE48C71CD5), + UINT64_C(0x3797BA3BFE4D2728), UINT64_C(0x3A666A676C34CE7C), + UINT64_C(0x7B04C34956CAAA50), UINT64_C(0x76F51315C4B34304), + UINT64_C(0x61E662F0723978F9), UINT64_C(0x6C17B2ACE04091AD), + UINT64_C(0xCADE8E953483D791), UINT64_C(0xC72F5EC9A6FA3EC5), + UINT64_C(0xD03C2F2C10700538), UINT64_C(0xDDCDFF708209EC6C), + UINT64_C(0x52F87D71648CCC60), UINT64_C(0x5F09AD2DF6F52534), + UINT64_C(0x481ADCC8407F1EC9), UINT64_C(0x45EB0C94D206F79D), + UINT64_C(0xE32230AD06C5B1A1), UINT64_C(0xEED3E0F194BC58F5), + UINT64_C(0xF9C0911422366308), UINT64_C(0xF4314148B04F8A5C), + UINT64_C(0xB553E8668AB1EE70), UINT64_C(0xB8A2383A18C80724), + UINT64_C(0xAFB149DFAE423CD9), UINT64_C(0xA24099833C3BD58D), + UINT64_C(0x0489A5BAE8F893B1), UINT64_C(0x097875E67A817AE5), + UINT64_C(0x1E6B0403CC0B4118), UINT64_C(0x139AD45F5E72A84C), + UINT64_C(0x385FADBC70EF1181), UINT64_C(0x35AE7DE0E296F8D5), + UINT64_C(0x22BD0C05541CC328), UINT64_C(0x2F4CDC59C6652A7C), + UINT64_C(0x8985E06012A66C40), UINT64_C(0x8474303C80DF8514), + UINT64_C(0x936741D93655BEE9), UINT64_C(0x9E969185A42C57BD), + UINT64_C(0xDFF438AB9ED23391), UINT64_C(0xD205E8F70CABDAC5), + UINT64_C(0xC5169912BA21E138), UINT64_C(0xC8E7494E2858086C), + UINT64_C(0x6E2E7577FC9B4E50), UINT64_C(0x63DFA52B6EE2A704), + UINT64_C(0x74CCD4CED8689CF9), UINT64_C(0x793D04924A1175AD), + UINT64_C(0xF6088693AC9455A1), UINT64_C(0xFBF956CF3EEDBCF5), + UINT64_C(0xECEA272A88678708), UINT64_C(0xE11BF7761A1E6E5C), + UINT64_C(0x47D2CB4FCEDD2860), UINT64_C(0x4A231B135CA4C134), + UINT64_C(0x5D306AF6EA2EFAC9), UINT64_C(0x50C1BAAA7857139D), + UINT64_C(0x11A3138442A977B1), UINT64_C(0x1C52C3D8D0D09EE5), + UINT64_C(0x0B41B23D665AA518), UINT64_C(0x06B06261F4234C4C), + UINT64_C(0xA0795E5820E00A70), UINT64_C(0xAD888E04B299E324), + UINT64_C(0xBA9BFFE10413D8D9), UINT64_C(0xB76A2FBD966A318D), + UINT64_C(0xA4F0FBE2C81899C1), UINT64_C(0xA9012BBE5A617095), + UINT64_C(0xBE125A5BECEB4B68), UINT64_C(0xB3E38A077E92A23C), + UINT64_C(0x152AB63EAA51E400), UINT64_C(0x18DB666238280D54), + UINT64_C(0x0FC817878EA236A9), UINT64_C(0x0239C7DB1CDBDFFD), + UINT64_C(0x435B6EF52625BBD1), UINT64_C(0x4EAABEA9B45C5285), + UINT64_C(0x59B9CF4C02D66978), UINT64_C(0x54481F1090AF802C), + UINT64_C(0xF2812329446CC610), UINT64_C(0xFF70F375D6152F44), + UINT64_C(0xE8638290609F14B9), UINT64_C(0xE59252CCF2E6FDED), + UINT64_C(0x6AA7D0CD1463DDE1), UINT64_C(0x67560091861A34B5), + UINT64_C(0x7045717430900F48), UINT64_C(0x7DB4A128A2E9E61C), + UINT64_C(0xDB7D9D11762AA020), UINT64_C(0xD68C4D4DE4534974), + UINT64_C(0xC19F3CA852D97289), UINT64_C(0xCC6EECF4C0A09BDD), + UINT64_C(0x8D0C45DAFA5EFFF1), UINT64_C(0x80FD9586682716A5), + UINT64_C(0x97EEE463DEAD2D58), UINT64_C(0x9A1F343F4CD4C40C), + UINT64_C(0x3CD6080698178230), UINT64_C(0x3127D85A0A6E6B64), + UINT64_C(0x2634A9BFBCE45099), UINT64_C(0x2BC579E32E9DB9CD), + UINT64_C(0xF5A054D6CA71FB90), UINT64_C(0xF851848A580812C4), + UINT64_C(0xEF42F56FEE822939), UINT64_C(0xE2B325337CFBC06D), + UINT64_C(0x447A190AA8388651), UINT64_C(0x498BC9563A416F05), + UINT64_C(0x5E98B8B38CCB54F8), UINT64_C(0x536968EF1EB2BDAC), + UINT64_C(0x120BC1C1244CD980), UINT64_C(0x1FFA119DB63530D4), + UINT64_C(0x08E9607800BF0B29), UINT64_C(0x0518B02492C6E27D), + UINT64_C(0xA3D18C1D4605A441), UINT64_C(0xAE205C41D47C4D15), + UINT64_C(0xB9332DA462F676E8), UINT64_C(0xB4C2FDF8F08F9FBC), + UINT64_C(0x3BF77FF9160ABFB0), UINT64_C(0x3606AFA5847356E4), + UINT64_C(0x2115DE4032F96D19), UINT64_C(0x2CE40E1CA080844D), + UINT64_C(0x8A2D32257443C271), UINT64_C(0x87DCE279E63A2B25), + UINT64_C(0x90CF939C50B010D8), UINT64_C(0x9D3E43C0C2C9F98C), + UINT64_C(0xDC5CEAEEF8379DA0), UINT64_C(0xD1AD3AB26A4E74F4), + UINT64_C(0xC6BE4B57DCC44F09), UINT64_C(0xCB4F9B0B4EBDA65D), + UINT64_C(0x6D86A7329A7EE061), UINT64_C(0x6077776E08070935), + UINT64_C(0x7764068BBE8D32C8), UINT64_C(0x7A95D6D72CF4DB9C), + UINT64_C(0x690F0288728673D0), UINT64_C(0x64FED2D4E0FF9A84), + UINT64_C(0x73EDA3315675A179), UINT64_C(0x7E1C736DC40C482D), + UINT64_C(0xD8D54F5410CF0E11), UINT64_C(0xD5249F0882B6E745), + UINT64_C(0xC237EEED343CDCB8), UINT64_C(0xCFC63EB1A64535EC), + UINT64_C(0x8EA4979F9CBB51C0), UINT64_C(0x835547C30EC2B894), + UINT64_C(0x94463626B8488369), UINT64_C(0x99B7E67A2A316A3D), + UINT64_C(0x3F7EDA43FEF22C01), UINT64_C(0x328F0A1F6C8BC555), + UINT64_C(0x259C7BFADA01FEA8), UINT64_C(0x286DABA6487817FC), + UINT64_C(0xA75829A7AEFD37F0), UINT64_C(0xAAA9F9FB3C84DEA4), + UINT64_C(0xBDBA881E8A0EE559), UINT64_C(0xB04B584218770C0D), + UINT64_C(0x1682647BCCB44A31), UINT64_C(0x1B73B4275ECDA365), + UINT64_C(0x0C60C5C2E8479898), UINT64_C(0x0191159E7A3E71CC), + UINT64_C(0x40F3BCB040C015E0), UINT64_C(0x4D026CECD2B9FCB4), + UINT64_C(0x5A111D096433C749), UINT64_C(0x57E0CD55F64A2E1D), + UINT64_C(0xF129F16C22896821), UINT64_C(0xFCD82130B0F08175), + UINT64_C(0xEBCB50D5067ABA88), UINT64_C(0xE63A8089940353DC), + UINT64_C(0xCDFFF96ABA9EEA11), UINT64_C(0xC00E293628E70345), + UINT64_C(0xD71D58D39E6D38B8), UINT64_C(0xDAEC888F0C14D1EC), + UINT64_C(0x7C25B4B6D8D797D0), UINT64_C(0x71D464EA4AAE7E84), + UINT64_C(0x66C7150FFC244579), UINT64_C(0x6B36C5536E5DAC2D), + UINT64_C(0x2A546C7D54A3C801), UINT64_C(0x27A5BC21C6DA2155), + UINT64_C(0x30B6CDC470501AA8), UINT64_C(0x3D471D98E229F3FC), + UINT64_C(0x9B8E21A136EAB5C0), UINT64_C(0x967FF1FDA4935C94), + UINT64_C(0x816C801812196769), UINT64_C(0x8C9D504480608E3D), + UINT64_C(0x03A8D24566E5AE31), UINT64_C(0x0E590219F49C4765), + UINT64_C(0x194A73FC42167C98), UINT64_C(0x14BBA3A0D06F95CC), + UINT64_C(0xB2729F9904ACD3F0), UINT64_C(0xBF834FC596D53AA4), + UINT64_C(0xA8903E20205F0159), UINT64_C(0xA561EE7CB226E80D), + UINT64_C(0xE403475288D88C21), UINT64_C(0xE9F2970E1AA16575), + UINT64_C(0xFEE1E6EBAC2B5E88), UINT64_C(0xF31036B73E52B7DC), + UINT64_C(0x55D90A8EEA91F1E0), UINT64_C(0x5828DAD278E818B4), + UINT64_C(0x4F3BAB37CE622349), UINT64_C(0x42CA7B6B5C1BCA1D), + UINT64_C(0x5150AF3402696251), UINT64_C(0x5CA17F6890108B05), + UINT64_C(0x4BB20E8D269AB0F8), UINT64_C(0x4643DED1B4E359AC), + UINT64_C(0xE08AE2E860201F90), UINT64_C(0xED7B32B4F259F6C4), + UINT64_C(0xFA68435144D3CD39), UINT64_C(0xF799930DD6AA246D), + UINT64_C(0xB6FB3A23EC544041), UINT64_C(0xBB0AEA7F7E2DA915), + UINT64_C(0xAC199B9AC8A792E8), UINT64_C(0xA1E84BC65ADE7BBC), + UINT64_C(0x072177FF8E1D3D80), UINT64_C(0x0AD0A7A31C64D4D4), + UINT64_C(0x1DC3D646AAEEEF29), UINT64_C(0x1032061A3897067D), + UINT64_C(0x9F07841BDE122671), UINT64_C(0x92F654474C6BCF25), + UINT64_C(0x85E525A2FAE1F4D8), UINT64_C(0x8814F5FE68981D8C), + UINT64_C(0x2EDDC9C7BC5B5BB0), UINT64_C(0x232C199B2E22B2E4), + UINT64_C(0x343F687E98A88919), UINT64_C(0x39CEB8220AD1604D), + UINT64_C(0x78AC110C302F0461), UINT64_C(0x755DC150A256ED35), + UINT64_C(0x624EB0B514DCD6C8), UINT64_C(0x6FBF60E986A53F9C), + UINT64_C(0xC9765CD0526679A0), UINT64_C(0xC4878C8CC01F90F4), + UINT64_C(0xD394FD697695AB09), UINT64_C(0xDE652D35E4EC425D) + }, { + UINT64_C(0x0000000000000000), UINT64_C(0xCB6D6A914AE10B3F), + UINT64_C(0x96DBD42295C2177E), UINT64_C(0x5DB6BEB3DF231C41), + UINT64_C(0x2CB7A9452A852FFC), UINT64_C(0xE7DAC3D4606424C3), + UINT64_C(0xBA6C7D67BF473882), UINT64_C(0x710117F6F5A633BD), + UINT64_C(0xDD705D247FA5876A), UINT64_C(0x161D37B535448C55), + UINT64_C(0x4BAB8906EA679014), UINT64_C(0x80C6E397A0869B2B), + UINT64_C(0xF1C7F4615520A896), UINT64_C(0x3AAA9EF01FC1A3A9), + UINT64_C(0x671C2043C0E2BFE8), UINT64_C(0xAC714AD28A03B4D7), + UINT64_C(0xBAE1BA48FE4A0FD5), UINT64_C(0x718CD0D9B4AB04EA), + UINT64_C(0x2C3A6E6A6B8818AB), UINT64_C(0xE75704FB21691394), + UINT64_C(0x9656130DD4CF2029), UINT64_C(0x5D3B799C9E2E2B16), + UINT64_C(0x008DC72F410D3757), UINT64_C(0xCBE0ADBE0BEC3C68), + UINT64_C(0x6791E76C81EF88BF), UINT64_C(0xACFC8DFDCB0E8380), + UINT64_C(0xF14A334E142D9FC1), UINT64_C(0x3A2759DF5ECC94FE), + UINT64_C(0x4B264E29AB6AA743), UINT64_C(0x804B24B8E18BAC7C), + UINT64_C(0xDDFD9A0B3EA8B03D), UINT64_C(0x1690F09A7449BB02), + UINT64_C(0xF1DD7B3ED73AC638), UINT64_C(0x3AB011AF9DDBCD07), + UINT64_C(0x6706AF1C42F8D146), UINT64_C(0xAC6BC58D0819DA79), + UINT64_C(0xDD6AD27BFDBFE9C4), UINT64_C(0x1607B8EAB75EE2FB), + UINT64_C(0x4BB10659687DFEBA), UINT64_C(0x80DC6CC8229CF585), + UINT64_C(0x2CAD261AA89F4152), UINT64_C(0xE7C04C8BE27E4A6D), + UINT64_C(0xBA76F2383D5D562C), UINT64_C(0x711B98A977BC5D13), + UINT64_C(0x001A8F5F821A6EAE), UINT64_C(0xCB77E5CEC8FB6591), + UINT64_C(0x96C15B7D17D879D0), UINT64_C(0x5DAC31EC5D3972EF), + UINT64_C(0x4B3CC1762970C9ED), UINT64_C(0x8051ABE76391C2D2), + UINT64_C(0xDDE71554BCB2DE93), UINT64_C(0x168A7FC5F653D5AC), + UINT64_C(0x678B683303F5E611), UINT64_C(0xACE602A24914ED2E), + UINT64_C(0xF150BC119637F16F), UINT64_C(0x3A3DD680DCD6FA50), + UINT64_C(0x964C9C5256D54E87), UINT64_C(0x5D21F6C31C3445B8), + UINT64_C(0x00974870C31759F9), UINT64_C(0xCBFA22E189F652C6), + UINT64_C(0xBAFB35177C50617B), UINT64_C(0x71965F8636B16A44), + UINT64_C(0x2C20E135E9927605), UINT64_C(0xE74D8BA4A3737D3A), + UINT64_C(0xE2BBF77CAE758C71), UINT64_C(0x29D69DEDE494874E), + UINT64_C(0x7460235E3BB79B0F), UINT64_C(0xBF0D49CF71569030), + UINT64_C(0xCE0C5E3984F0A38D), UINT64_C(0x056134A8CE11A8B2), + UINT64_C(0x58D78A1B1132B4F3), UINT64_C(0x93BAE08A5BD3BFCC), + UINT64_C(0x3FCBAA58D1D00B1B), UINT64_C(0xF4A6C0C99B310024), + UINT64_C(0xA9107E7A44121C65), UINT64_C(0x627D14EB0EF3175A), + UINT64_C(0x137C031DFB5524E7), UINT64_C(0xD811698CB1B42FD8), + UINT64_C(0x85A7D73F6E973399), UINT64_C(0x4ECABDAE247638A6), + UINT64_C(0x585A4D34503F83A4), UINT64_C(0x933727A51ADE889B), + UINT64_C(0xCE819916C5FD94DA), UINT64_C(0x05ECF3878F1C9FE5), + UINT64_C(0x74EDE4717ABAAC58), UINT64_C(0xBF808EE0305BA767), + UINT64_C(0xE2363053EF78BB26), UINT64_C(0x295B5AC2A599B019), + UINT64_C(0x852A10102F9A04CE), UINT64_C(0x4E477A81657B0FF1), + UINT64_C(0x13F1C432BA5813B0), UINT64_C(0xD89CAEA3F0B9188F), + UINT64_C(0xA99DB955051F2B32), UINT64_C(0x62F0D3C44FFE200D), + UINT64_C(0x3F466D7790DD3C4C), UINT64_C(0xF42B07E6DA3C3773), + UINT64_C(0x13668C42794F4A49), UINT64_C(0xD80BE6D333AE4176), + UINT64_C(0x85BD5860EC8D5D37), UINT64_C(0x4ED032F1A66C5608), + UINT64_C(0x3FD1250753CA65B5), UINT64_C(0xF4BC4F96192B6E8A), + UINT64_C(0xA90AF125C60872CB), UINT64_C(0x62679BB48CE979F4), + UINT64_C(0xCE16D16606EACD23), UINT64_C(0x057BBBF74C0BC61C), + UINT64_C(0x58CD05449328DA5D), UINT64_C(0x93A06FD5D9C9D162), + UINT64_C(0xE2A178232C6FE2DF), UINT64_C(0x29CC12B2668EE9E0), + UINT64_C(0x747AAC01B9ADF5A1), UINT64_C(0xBF17C690F34CFE9E), + UINT64_C(0xA987360A8705459C), UINT64_C(0x62EA5C9BCDE44EA3), + UINT64_C(0x3F5CE22812C752E2), UINT64_C(0xF43188B9582659DD), + UINT64_C(0x85309F4FAD806A60), UINT64_C(0x4E5DF5DEE761615F), + UINT64_C(0x13EB4B6D38427D1E), UINT64_C(0xD88621FC72A37621), + UINT64_C(0x74F76B2EF8A0C2F6), UINT64_C(0xBF9A01BFB241C9C9), + UINT64_C(0xE22CBF0C6D62D588), UINT64_C(0x2941D59D2783DEB7), + UINT64_C(0x5840C26BD225ED0A), UINT64_C(0x932DA8FA98C4E635), + UINT64_C(0xCE9B164947E7FA74), UINT64_C(0x05F67CD80D06F14B), + UINT64_C(0xC477EFF95CEB18E3), UINT64_C(0x0F1A8568160A13DC), + UINT64_C(0x52AC3BDBC9290F9D), UINT64_C(0x99C1514A83C804A2), + UINT64_C(0xE8C046BC766E371F), UINT64_C(0x23AD2C2D3C8F3C20), + UINT64_C(0x7E1B929EE3AC2061), UINT64_C(0xB576F80FA94D2B5E), + UINT64_C(0x1907B2DD234E9F89), UINT64_C(0xD26AD84C69AF94B6), + UINT64_C(0x8FDC66FFB68C88F7), UINT64_C(0x44B10C6EFC6D83C8), + UINT64_C(0x35B01B9809CBB075), UINT64_C(0xFEDD7109432ABB4A), + UINT64_C(0xA36BCFBA9C09A70B), UINT64_C(0x6806A52BD6E8AC34), + UINT64_C(0x7E9655B1A2A11736), UINT64_C(0xB5FB3F20E8401C09), + UINT64_C(0xE84D819337630048), UINT64_C(0x2320EB027D820B77), + UINT64_C(0x5221FCF4882438CA), UINT64_C(0x994C9665C2C533F5), + UINT64_C(0xC4FA28D61DE62FB4), UINT64_C(0x0F9742475707248B), + UINT64_C(0xA3E60895DD04905C), UINT64_C(0x688B620497E59B63), + UINT64_C(0x353DDCB748C68722), UINT64_C(0xFE50B62602278C1D), + UINT64_C(0x8F51A1D0F781BFA0), UINT64_C(0x443CCB41BD60B49F), + UINT64_C(0x198A75F26243A8DE), UINT64_C(0xD2E71F6328A2A3E1), + UINT64_C(0x35AA94C78BD1DEDB), UINT64_C(0xFEC7FE56C130D5E4), + UINT64_C(0xA37140E51E13C9A5), UINT64_C(0x681C2A7454F2C29A), + UINT64_C(0x191D3D82A154F127), UINT64_C(0xD2705713EBB5FA18), + UINT64_C(0x8FC6E9A03496E659), UINT64_C(0x44AB83317E77ED66), + UINT64_C(0xE8DAC9E3F47459B1), UINT64_C(0x23B7A372BE95528E), + UINT64_C(0x7E011DC161B64ECF), UINT64_C(0xB56C77502B5745F0), + UINT64_C(0xC46D60A6DEF1764D), UINT64_C(0x0F000A3794107D72), + UINT64_C(0x52B6B4844B336133), UINT64_C(0x99DBDE1501D26A0C), + UINT64_C(0x8F4B2E8F759BD10E), UINT64_C(0x4426441E3F7ADA31), + UINT64_C(0x1990FAADE059C670), UINT64_C(0xD2FD903CAAB8CD4F), + UINT64_C(0xA3FC87CA5F1EFEF2), UINT64_C(0x6891ED5B15FFF5CD), + UINT64_C(0x352753E8CADCE98C), UINT64_C(0xFE4A3979803DE2B3), + UINT64_C(0x523B73AB0A3E5664), UINT64_C(0x9956193A40DF5D5B), + UINT64_C(0xC4E0A7899FFC411A), UINT64_C(0x0F8DCD18D51D4A25), + UINT64_C(0x7E8CDAEE20BB7998), UINT64_C(0xB5E1B07F6A5A72A7), + UINT64_C(0xE8570ECCB5796EE6), UINT64_C(0x233A645DFF9865D9), + UINT64_C(0x26CC1885F29E9492), UINT64_C(0xEDA17214B87F9FAD), + UINT64_C(0xB017CCA7675C83EC), UINT64_C(0x7B7AA6362DBD88D3), + UINT64_C(0x0A7BB1C0D81BBB6E), UINT64_C(0xC116DB5192FAB051), + UINT64_C(0x9CA065E24DD9AC10), UINT64_C(0x57CD0F730738A72F), + UINT64_C(0xFBBC45A18D3B13F8), UINT64_C(0x30D12F30C7DA18C7), + UINT64_C(0x6D67918318F90486), UINT64_C(0xA60AFB1252180FB9), + UINT64_C(0xD70BECE4A7BE3C04), UINT64_C(0x1C668675ED5F373B), + UINT64_C(0x41D038C6327C2B7A), UINT64_C(0x8ABD5257789D2045), + UINT64_C(0x9C2DA2CD0CD49B47), UINT64_C(0x5740C85C46359078), + UINT64_C(0x0AF676EF99168C39), UINT64_C(0xC19B1C7ED3F78706), + UINT64_C(0xB09A0B882651B4BB), UINT64_C(0x7BF761196CB0BF84), + UINT64_C(0x2641DFAAB393A3C5), UINT64_C(0xED2CB53BF972A8FA), + UINT64_C(0x415DFFE973711C2D), UINT64_C(0x8A30957839901712), + UINT64_C(0xD7862BCBE6B30B53), UINT64_C(0x1CEB415AAC52006C), + UINT64_C(0x6DEA56AC59F433D1), UINT64_C(0xA6873C3D131538EE), + UINT64_C(0xFB31828ECC3624AF), UINT64_C(0x305CE81F86D72F90), + UINT64_C(0xD71163BB25A452AA), UINT64_C(0x1C7C092A6F455995), + UINT64_C(0x41CAB799B06645D4), UINT64_C(0x8AA7DD08FA874EEB), + UINT64_C(0xFBA6CAFE0F217D56), UINT64_C(0x30CBA06F45C07669), + UINT64_C(0x6D7D1EDC9AE36A28), UINT64_C(0xA610744DD0026117), + UINT64_C(0x0A613E9F5A01D5C0), UINT64_C(0xC10C540E10E0DEFF), + UINT64_C(0x9CBAEABDCFC3C2BE), UINT64_C(0x57D7802C8522C981), + UINT64_C(0x26D697DA7084FA3C), UINT64_C(0xEDBBFD4B3A65F103), + UINT64_C(0xB00D43F8E546ED42), UINT64_C(0x7B602969AFA7E67D), + UINT64_C(0x6DF0D9F3DBEE5D7F), UINT64_C(0xA69DB362910F5640), + UINT64_C(0xFB2B0DD14E2C4A01), UINT64_C(0x3046674004CD413E), + UINT64_C(0x414770B6F16B7283), UINT64_C(0x8A2A1A27BB8A79BC), + UINT64_C(0xD79CA49464A965FD), UINT64_C(0x1CF1CE052E486EC2), + UINT64_C(0xB08084D7A44BDA15), UINT64_C(0x7BEDEE46EEAAD12A), + UINT64_C(0x265B50F53189CD6B), UINT64_C(0xED363A647B68C654), + UINT64_C(0x9C372D928ECEF5E9), UINT64_C(0x575A4703C42FFED6), + UINT64_C(0x0AECF9B01B0CE297), UINT64_C(0xC181932151EDE9A8) + }, { + UINT64_C(0x0000000000000000), UINT64_C(0xDCA12C225E8AEE1D), + UINT64_C(0xB8435944BC14DD3B), UINT64_C(0x64E27566E29E3326), + UINT64_C(0x7087B2887829BA77), UINT64_C(0xAC269EAA26A3546A), + UINT64_C(0xC8C4EBCCC43D674C), UINT64_C(0x1465C7EE9AB78951), + UINT64_C(0xE00E6511F15274EF), UINT64_C(0x3CAF4933AFD89AF2), + UINT64_C(0x584D3C554D46A9D4), UINT64_C(0x84EC107713CC47C9), + UINT64_C(0x9089D799897BCE98), UINT64_C(0x4C28FBBBD7F12085), + UINT64_C(0x28CA8EDD356F13A3), UINT64_C(0xF46BA2FF6BE5FDBE), + UINT64_C(0x4503C48DC90A304C), UINT64_C(0x99A2E8AF9780DE51), + UINT64_C(0xFD409DC9751EED77), UINT64_C(0x21E1B1EB2B94036A), + UINT64_C(0x35847605B1238A3B), UINT64_C(0xE9255A27EFA96426), + UINT64_C(0x8DC72F410D375700), UINT64_C(0x5166036353BDB91D), + UINT64_C(0xA50DA19C385844A3), UINT64_C(0x79AC8DBE66D2AABE), + UINT64_C(0x1D4EF8D8844C9998), UINT64_C(0xC1EFD4FADAC67785), + UINT64_C(0xD58A13144071FED4), UINT64_C(0x092B3F361EFB10C9), + UINT64_C(0x6DC94A50FC6523EF), UINT64_C(0xB1686672A2EFCDF2), + UINT64_C(0x8A06881B93156098), UINT64_C(0x56A7A439CD9F8E85), + UINT64_C(0x3245D15F2F01BDA3), UINT64_C(0xEEE4FD7D718B53BE), + UINT64_C(0xFA813A93EB3CDAEF), UINT64_C(0x262016B1B5B634F2), + UINT64_C(0x42C263D7572807D4), UINT64_C(0x9E634FF509A2E9C9), + UINT64_C(0x6A08ED0A62471477), UINT64_C(0xB6A9C1283CCDFA6A), + UINT64_C(0xD24BB44EDE53C94C), UINT64_C(0x0EEA986C80D92751), + UINT64_C(0x1A8F5F821A6EAE00), UINT64_C(0xC62E73A044E4401D), + UINT64_C(0xA2CC06C6A67A733B), UINT64_C(0x7E6D2AE4F8F09D26), + UINT64_C(0xCF054C965A1F50D4), UINT64_C(0x13A460B40495BEC9), + UINT64_C(0x774615D2E60B8DEF), UINT64_C(0xABE739F0B88163F2), + UINT64_C(0xBF82FE1E2236EAA3), UINT64_C(0x6323D23C7CBC04BE), + UINT64_C(0x07C1A75A9E223798), UINT64_C(0xDB608B78C0A8D985), + UINT64_C(0x2F0B2987AB4D243B), UINT64_C(0xF3AA05A5F5C7CA26), + UINT64_C(0x974870C31759F900), UINT64_C(0x4BE95CE149D3171D), + UINT64_C(0x5F8C9B0FD3649E4C), UINT64_C(0x832DB72D8DEE7051), + UINT64_C(0xE7CFC24B6F704377), UINT64_C(0x3B6EEE6931FAAD6A), + UINT64_C(0x91131E980D8418A2), UINT64_C(0x4DB232BA530EF6BF), + UINT64_C(0x295047DCB190C599), UINT64_C(0xF5F16BFEEF1A2B84), + UINT64_C(0xE194AC1075ADA2D5), UINT64_C(0x3D3580322B274CC8), + UINT64_C(0x59D7F554C9B97FEE), UINT64_C(0x8576D976973391F3), + UINT64_C(0x711D7B89FCD66C4D), UINT64_C(0xADBC57ABA25C8250), + UINT64_C(0xC95E22CD40C2B176), UINT64_C(0x15FF0EEF1E485F6B), + UINT64_C(0x019AC90184FFD63A), UINT64_C(0xDD3BE523DA753827), + UINT64_C(0xB9D9904538EB0B01), UINT64_C(0x6578BC676661E51C), + UINT64_C(0xD410DA15C48E28EE), UINT64_C(0x08B1F6379A04C6F3), + UINT64_C(0x6C538351789AF5D5), UINT64_C(0xB0F2AF7326101BC8), + UINT64_C(0xA497689DBCA79299), UINT64_C(0x783644BFE22D7C84), + UINT64_C(0x1CD431D900B34FA2), UINT64_C(0xC0751DFB5E39A1BF), + UINT64_C(0x341EBF0435DC5C01), UINT64_C(0xE8BF93266B56B21C), + UINT64_C(0x8C5DE64089C8813A), UINT64_C(0x50FCCA62D7426F27), + UINT64_C(0x44990D8C4DF5E676), UINT64_C(0x983821AE137F086B), + UINT64_C(0xFCDA54C8F1E13B4D), UINT64_C(0x207B78EAAF6BD550), + UINT64_C(0x1B1596839E91783A), UINT64_C(0xC7B4BAA1C01B9627), + UINT64_C(0xA356CFC72285A501), UINT64_C(0x7FF7E3E57C0F4B1C), + UINT64_C(0x6B92240BE6B8C24D), UINT64_C(0xB7330829B8322C50), + UINT64_C(0xD3D17D4F5AAC1F76), UINT64_C(0x0F70516D0426F16B), + UINT64_C(0xFB1BF3926FC30CD5), UINT64_C(0x27BADFB03149E2C8), + UINT64_C(0x4358AAD6D3D7D1EE), UINT64_C(0x9FF986F48D5D3FF3), + UINT64_C(0x8B9C411A17EAB6A2), UINT64_C(0x573D6D38496058BF), + UINT64_C(0x33DF185EABFE6B99), UINT64_C(0xEF7E347CF5748584), + UINT64_C(0x5E16520E579B4876), UINT64_C(0x82B77E2C0911A66B), + UINT64_C(0xE6550B4AEB8F954D), UINT64_C(0x3AF42768B5057B50), + UINT64_C(0x2E91E0862FB2F201), UINT64_C(0xF230CCA471381C1C), + UINT64_C(0x96D2B9C293A62F3A), UINT64_C(0x4A7395E0CD2CC127), + UINT64_C(0xBE18371FA6C93C99), UINT64_C(0x62B91B3DF843D284), + UINT64_C(0x065B6E5B1ADDE1A2), UINT64_C(0xDAFA427944570FBF), + UINT64_C(0xCE9F8597DEE086EE), UINT64_C(0x123EA9B5806A68F3), + UINT64_C(0x76DCDCD362F45BD5), UINT64_C(0xAA7DF0F13C7EB5C8), + UINT64_C(0xA739329F30A7E9D6), UINT64_C(0x7B981EBD6E2D07CB), + UINT64_C(0x1F7A6BDB8CB334ED), UINT64_C(0xC3DB47F9D239DAF0), + UINT64_C(0xD7BE8017488E53A1), UINT64_C(0x0B1FAC351604BDBC), + UINT64_C(0x6FFDD953F49A8E9A), UINT64_C(0xB35CF571AA106087), + UINT64_C(0x4737578EC1F59D39), UINT64_C(0x9B967BAC9F7F7324), + UINT64_C(0xFF740ECA7DE14002), UINT64_C(0x23D522E8236BAE1F), + UINT64_C(0x37B0E506B9DC274E), UINT64_C(0xEB11C924E756C953), + UINT64_C(0x8FF3BC4205C8FA75), UINT64_C(0x535290605B421468), + UINT64_C(0xE23AF612F9ADD99A), UINT64_C(0x3E9BDA30A7273787), + UINT64_C(0x5A79AF5645B904A1), UINT64_C(0x86D883741B33EABC), + UINT64_C(0x92BD449A818463ED), UINT64_C(0x4E1C68B8DF0E8DF0), + UINT64_C(0x2AFE1DDE3D90BED6), UINT64_C(0xF65F31FC631A50CB), + UINT64_C(0x0234930308FFAD75), UINT64_C(0xDE95BF2156754368), + UINT64_C(0xBA77CA47B4EB704E), UINT64_C(0x66D6E665EA619E53), + UINT64_C(0x72B3218B70D61702), UINT64_C(0xAE120DA92E5CF91F), + UINT64_C(0xCAF078CFCCC2CA39), UINT64_C(0x165154ED92482424), + UINT64_C(0x2D3FBA84A3B2894E), UINT64_C(0xF19E96A6FD386753), + UINT64_C(0x957CE3C01FA65475), UINT64_C(0x49DDCFE2412CBA68), + UINT64_C(0x5DB8080CDB9B3339), UINT64_C(0x8119242E8511DD24), + UINT64_C(0xE5FB5148678FEE02), UINT64_C(0x395A7D6A3905001F), + UINT64_C(0xCD31DF9552E0FDA1), UINT64_C(0x1190F3B70C6A13BC), + UINT64_C(0x757286D1EEF4209A), UINT64_C(0xA9D3AAF3B07ECE87), + UINT64_C(0xBDB66D1D2AC947D6), UINT64_C(0x6117413F7443A9CB), + UINT64_C(0x05F5345996DD9AED), UINT64_C(0xD954187BC85774F0), + UINT64_C(0x683C7E096AB8B902), UINT64_C(0xB49D522B3432571F), + UINT64_C(0xD07F274DD6AC6439), UINT64_C(0x0CDE0B6F88268A24), + UINT64_C(0x18BBCC8112910375), UINT64_C(0xC41AE0A34C1BED68), + UINT64_C(0xA0F895C5AE85DE4E), UINT64_C(0x7C59B9E7F00F3053), + UINT64_C(0x88321B189BEACDED), UINT64_C(0x5493373AC56023F0), + UINT64_C(0x3071425C27FE10D6), UINT64_C(0xECD06E7E7974FECB), + UINT64_C(0xF8B5A990E3C3779A), UINT64_C(0x241485B2BD499987), + UINT64_C(0x40F6F0D45FD7AAA1), UINT64_C(0x9C57DCF6015D44BC), + UINT64_C(0x362A2C073D23F174), UINT64_C(0xEA8B002563A91F69), + UINT64_C(0x8E69754381372C4F), UINT64_C(0x52C85961DFBDC252), + UINT64_C(0x46AD9E8F450A4B03), UINT64_C(0x9A0CB2AD1B80A51E), + UINT64_C(0xFEEEC7CBF91E9638), UINT64_C(0x224FEBE9A7947825), + UINT64_C(0xD6244916CC71859B), UINT64_C(0x0A85653492FB6B86), + UINT64_C(0x6E671052706558A0), UINT64_C(0xB2C63C702EEFB6BD), + UINT64_C(0xA6A3FB9EB4583FEC), UINT64_C(0x7A02D7BCEAD2D1F1), + UINT64_C(0x1EE0A2DA084CE2D7), UINT64_C(0xC2418EF856C60CCA), + UINT64_C(0x7329E88AF429C138), UINT64_C(0xAF88C4A8AAA32F25), + UINT64_C(0xCB6AB1CE483D1C03), UINT64_C(0x17CB9DEC16B7F21E), + UINT64_C(0x03AE5A028C007B4F), UINT64_C(0xDF0F7620D28A9552), + UINT64_C(0xBBED03463014A674), UINT64_C(0x674C2F646E9E4869), + UINT64_C(0x93278D9B057BB5D7), UINT64_C(0x4F86A1B95BF15BCA), + UINT64_C(0x2B64D4DFB96F68EC), UINT64_C(0xF7C5F8FDE7E586F1), + UINT64_C(0xE3A03F137D520FA0), UINT64_C(0x3F01133123D8E1BD), + UINT64_C(0x5BE36657C146D29B), UINT64_C(0x87424A759FCC3C86), + UINT64_C(0xBC2CA41CAE3691EC), UINT64_C(0x608D883EF0BC7FF1), + UINT64_C(0x046FFD5812224CD7), UINT64_C(0xD8CED17A4CA8A2CA), + UINT64_C(0xCCAB1694D61F2B9B), UINT64_C(0x100A3AB68895C586), + UINT64_C(0x74E84FD06A0BF6A0), UINT64_C(0xA84963F2348118BD), + UINT64_C(0x5C22C10D5F64E503), UINT64_C(0x8083ED2F01EE0B1E), + UINT64_C(0xE4619849E3703838), UINT64_C(0x38C0B46BBDFAD625), + UINT64_C(0x2CA57385274D5F74), UINT64_C(0xF0045FA779C7B169), + UINT64_C(0x94E62AC19B59824F), UINT64_C(0x484706E3C5D36C52), + UINT64_C(0xF92F6091673CA1A0), UINT64_C(0x258E4CB339B64FBD), + UINT64_C(0x416C39D5DB287C9B), UINT64_C(0x9DCD15F785A29286), + UINT64_C(0x89A8D2191F151BD7), UINT64_C(0x5509FE3B419FF5CA), + UINT64_C(0x31EB8B5DA301C6EC), UINT64_C(0xED4AA77FFD8B28F1), + UINT64_C(0x19210580966ED54F), UINT64_C(0xC58029A2C8E43B52), + UINT64_C(0xA1625CC42A7A0874), UINT64_C(0x7DC370E674F0E669), + UINT64_C(0x69A6B708EE476F38), UINT64_C(0xB5079B2AB0CD8125), + UINT64_C(0xD1E5EE4C5253B203), UINT64_C(0x0D44C26E0CD95C1E) + } +}; diff --git a/src/liblzma/check/crc64_table_le.h b/src/liblzma/check/crc64_table_le.h new file mode 100644 index 000000000000..1196b31e1323 --- /dev/null +++ b/src/liblzma/check/crc64_table_le.h @@ -0,0 +1,521 @@ +/* This file has been automatically generated by crc64_tablegen.c. */ + +const uint64_t lzma_crc64_table[4][256] = { + { + UINT64_C(0x0000000000000000), UINT64_C(0xB32E4CBE03A75F6F), + UINT64_C(0xF4843657A840A05B), UINT64_C(0x47AA7AE9ABE7FF34), + UINT64_C(0x7BD0C384FF8F5E33), UINT64_C(0xC8FE8F3AFC28015C), + UINT64_C(0x8F54F5D357CFFE68), UINT64_C(0x3C7AB96D5468A107), + UINT64_C(0xF7A18709FF1EBC66), UINT64_C(0x448FCBB7FCB9E309), + UINT64_C(0x0325B15E575E1C3D), UINT64_C(0xB00BFDE054F94352), + UINT64_C(0x8C71448D0091E255), UINT64_C(0x3F5F08330336BD3A), + UINT64_C(0x78F572DAA8D1420E), UINT64_C(0xCBDB3E64AB761D61), + UINT64_C(0x7D9BA13851336649), UINT64_C(0xCEB5ED8652943926), + UINT64_C(0x891F976FF973C612), UINT64_C(0x3A31DBD1FAD4997D), + UINT64_C(0x064B62BCAEBC387A), UINT64_C(0xB5652E02AD1B6715), + UINT64_C(0xF2CF54EB06FC9821), UINT64_C(0x41E11855055BC74E), + UINT64_C(0x8A3A2631AE2DDA2F), UINT64_C(0x39146A8FAD8A8540), + UINT64_C(0x7EBE1066066D7A74), UINT64_C(0xCD905CD805CA251B), + UINT64_C(0xF1EAE5B551A2841C), UINT64_C(0x42C4A90B5205DB73), + UINT64_C(0x056ED3E2F9E22447), UINT64_C(0xB6409F5CFA457B28), + UINT64_C(0xFB374270A266CC92), UINT64_C(0x48190ECEA1C193FD), + UINT64_C(0x0FB374270A266CC9), UINT64_C(0xBC9D3899098133A6), + UINT64_C(0x80E781F45DE992A1), UINT64_C(0x33C9CD4A5E4ECDCE), + UINT64_C(0x7463B7A3F5A932FA), UINT64_C(0xC74DFB1DF60E6D95), + UINT64_C(0x0C96C5795D7870F4), UINT64_C(0xBFB889C75EDF2F9B), + UINT64_C(0xF812F32EF538D0AF), UINT64_C(0x4B3CBF90F69F8FC0), + UINT64_C(0x774606FDA2F72EC7), UINT64_C(0xC4684A43A15071A8), + UINT64_C(0x83C230AA0AB78E9C), UINT64_C(0x30EC7C140910D1F3), + UINT64_C(0x86ACE348F355AADB), UINT64_C(0x3582AFF6F0F2F5B4), + UINT64_C(0x7228D51F5B150A80), UINT64_C(0xC10699A158B255EF), + UINT64_C(0xFD7C20CC0CDAF4E8), UINT64_C(0x4E526C720F7DAB87), + UINT64_C(0x09F8169BA49A54B3), UINT64_C(0xBAD65A25A73D0BDC), + UINT64_C(0x710D64410C4B16BD), UINT64_C(0xC22328FF0FEC49D2), + UINT64_C(0x85895216A40BB6E6), UINT64_C(0x36A71EA8A7ACE989), + UINT64_C(0x0ADDA7C5F3C4488E), UINT64_C(0xB9F3EB7BF06317E1), + UINT64_C(0xFE5991925B84E8D5), UINT64_C(0x4D77DD2C5823B7BA), + UINT64_C(0x64B62BCAEBC387A1), UINT64_C(0xD7986774E864D8CE), + UINT64_C(0x90321D9D438327FA), UINT64_C(0x231C512340247895), + UINT64_C(0x1F66E84E144CD992), UINT64_C(0xAC48A4F017EB86FD), + UINT64_C(0xEBE2DE19BC0C79C9), UINT64_C(0x58CC92A7BFAB26A6), + UINT64_C(0x9317ACC314DD3BC7), UINT64_C(0x2039E07D177A64A8), + UINT64_C(0x67939A94BC9D9B9C), UINT64_C(0xD4BDD62ABF3AC4F3), + UINT64_C(0xE8C76F47EB5265F4), UINT64_C(0x5BE923F9E8F53A9B), + UINT64_C(0x1C4359104312C5AF), UINT64_C(0xAF6D15AE40B59AC0), + UINT64_C(0x192D8AF2BAF0E1E8), UINT64_C(0xAA03C64CB957BE87), + UINT64_C(0xEDA9BCA512B041B3), UINT64_C(0x5E87F01B11171EDC), + UINT64_C(0x62FD4976457FBFDB), UINT64_C(0xD1D305C846D8E0B4), + UINT64_C(0x96797F21ED3F1F80), UINT64_C(0x2557339FEE9840EF), + UINT64_C(0xEE8C0DFB45EE5D8E), UINT64_C(0x5DA24145464902E1), + UINT64_C(0x1A083BACEDAEFDD5), UINT64_C(0xA9267712EE09A2BA), + UINT64_C(0x955CCE7FBA6103BD), UINT64_C(0x267282C1B9C65CD2), + UINT64_C(0x61D8F8281221A3E6), UINT64_C(0xD2F6B4961186FC89), + UINT64_C(0x9F8169BA49A54B33), UINT64_C(0x2CAF25044A02145C), + UINT64_C(0x6B055FEDE1E5EB68), UINT64_C(0xD82B1353E242B407), + UINT64_C(0xE451AA3EB62A1500), UINT64_C(0x577FE680B58D4A6F), + UINT64_C(0x10D59C691E6AB55B), UINT64_C(0xA3FBD0D71DCDEA34), + UINT64_C(0x6820EEB3B6BBF755), UINT64_C(0xDB0EA20DB51CA83A), + UINT64_C(0x9CA4D8E41EFB570E), UINT64_C(0x2F8A945A1D5C0861), + UINT64_C(0x13F02D374934A966), UINT64_C(0xA0DE61894A93F609), + UINT64_C(0xE7741B60E174093D), UINT64_C(0x545A57DEE2D35652), + UINT64_C(0xE21AC88218962D7A), UINT64_C(0x5134843C1B317215), + UINT64_C(0x169EFED5B0D68D21), UINT64_C(0xA5B0B26BB371D24E), + UINT64_C(0x99CA0B06E7197349), UINT64_C(0x2AE447B8E4BE2C26), + UINT64_C(0x6D4E3D514F59D312), UINT64_C(0xDE6071EF4CFE8C7D), + UINT64_C(0x15BB4F8BE788911C), UINT64_C(0xA6950335E42FCE73), + UINT64_C(0xE13F79DC4FC83147), UINT64_C(0x521135624C6F6E28), + UINT64_C(0x6E6B8C0F1807CF2F), UINT64_C(0xDD45C0B11BA09040), + UINT64_C(0x9AEFBA58B0476F74), UINT64_C(0x29C1F6E6B3E0301B), + UINT64_C(0xC96C5795D7870F42), UINT64_C(0x7A421B2BD420502D), + UINT64_C(0x3DE861C27FC7AF19), UINT64_C(0x8EC62D7C7C60F076), + UINT64_C(0xB2BC941128085171), UINT64_C(0x0192D8AF2BAF0E1E), + UINT64_C(0x4638A2468048F12A), UINT64_C(0xF516EEF883EFAE45), + UINT64_C(0x3ECDD09C2899B324), UINT64_C(0x8DE39C222B3EEC4B), + UINT64_C(0xCA49E6CB80D9137F), UINT64_C(0x7967AA75837E4C10), + UINT64_C(0x451D1318D716ED17), UINT64_C(0xF6335FA6D4B1B278), + UINT64_C(0xB199254F7F564D4C), UINT64_C(0x02B769F17CF11223), + UINT64_C(0xB4F7F6AD86B4690B), UINT64_C(0x07D9BA1385133664), + UINT64_C(0x4073C0FA2EF4C950), UINT64_C(0xF35D8C442D53963F), + UINT64_C(0xCF273529793B3738), UINT64_C(0x7C0979977A9C6857), + UINT64_C(0x3BA3037ED17B9763), UINT64_C(0x888D4FC0D2DCC80C), + UINT64_C(0x435671A479AAD56D), UINT64_C(0xF0783D1A7A0D8A02), + UINT64_C(0xB7D247F3D1EA7536), UINT64_C(0x04FC0B4DD24D2A59), + UINT64_C(0x3886B22086258B5E), UINT64_C(0x8BA8FE9E8582D431), + UINT64_C(0xCC0284772E652B05), UINT64_C(0x7F2CC8C92DC2746A), + UINT64_C(0x325B15E575E1C3D0), UINT64_C(0x8175595B76469CBF), + UINT64_C(0xC6DF23B2DDA1638B), UINT64_C(0x75F16F0CDE063CE4), + UINT64_C(0x498BD6618A6E9DE3), UINT64_C(0xFAA59ADF89C9C28C), + UINT64_C(0xBD0FE036222E3DB8), UINT64_C(0x0E21AC88218962D7), + UINT64_C(0xC5FA92EC8AFF7FB6), UINT64_C(0x76D4DE52895820D9), + UINT64_C(0x317EA4BB22BFDFED), UINT64_C(0x8250E80521188082), + UINT64_C(0xBE2A516875702185), UINT64_C(0x0D041DD676D77EEA), + UINT64_C(0x4AAE673FDD3081DE), UINT64_C(0xF9802B81DE97DEB1), + UINT64_C(0x4FC0B4DD24D2A599), UINT64_C(0xFCEEF8632775FAF6), + UINT64_C(0xBB44828A8C9205C2), UINT64_C(0x086ACE348F355AAD), + UINT64_C(0x34107759DB5DFBAA), UINT64_C(0x873E3BE7D8FAA4C5), + UINT64_C(0xC094410E731D5BF1), UINT64_C(0x73BA0DB070BA049E), + UINT64_C(0xB86133D4DBCC19FF), UINT64_C(0x0B4F7F6AD86B4690), + UINT64_C(0x4CE50583738CB9A4), UINT64_C(0xFFCB493D702BE6CB), + UINT64_C(0xC3B1F050244347CC), UINT64_C(0x709FBCEE27E418A3), + UINT64_C(0x3735C6078C03E797), UINT64_C(0x841B8AB98FA4B8F8), + UINT64_C(0xADDA7C5F3C4488E3), UINT64_C(0x1EF430E13FE3D78C), + UINT64_C(0x595E4A08940428B8), UINT64_C(0xEA7006B697A377D7), + UINT64_C(0xD60ABFDBC3CBD6D0), UINT64_C(0x6524F365C06C89BF), + UINT64_C(0x228E898C6B8B768B), UINT64_C(0x91A0C532682C29E4), + UINT64_C(0x5A7BFB56C35A3485), UINT64_C(0xE955B7E8C0FD6BEA), + UINT64_C(0xAEFFCD016B1A94DE), UINT64_C(0x1DD181BF68BDCBB1), + UINT64_C(0x21AB38D23CD56AB6), UINT64_C(0x9285746C3F7235D9), + UINT64_C(0xD52F0E859495CAED), UINT64_C(0x6601423B97329582), + UINT64_C(0xD041DD676D77EEAA), UINT64_C(0x636F91D96ED0B1C5), + UINT64_C(0x24C5EB30C5374EF1), UINT64_C(0x97EBA78EC690119E), + UINT64_C(0xAB911EE392F8B099), UINT64_C(0x18BF525D915FEFF6), + UINT64_C(0x5F1528B43AB810C2), UINT64_C(0xEC3B640A391F4FAD), + UINT64_C(0x27E05A6E926952CC), UINT64_C(0x94CE16D091CE0DA3), + UINT64_C(0xD3646C393A29F297), UINT64_C(0x604A2087398EADF8), + UINT64_C(0x5C3099EA6DE60CFF), UINT64_C(0xEF1ED5546E415390), + UINT64_C(0xA8B4AFBDC5A6ACA4), UINT64_C(0x1B9AE303C601F3CB), + UINT64_C(0x56ED3E2F9E224471), UINT64_C(0xE5C372919D851B1E), + UINT64_C(0xA26908783662E42A), UINT64_C(0x114744C635C5BB45), + UINT64_C(0x2D3DFDAB61AD1A42), UINT64_C(0x9E13B115620A452D), + UINT64_C(0xD9B9CBFCC9EDBA19), UINT64_C(0x6A978742CA4AE576), + UINT64_C(0xA14CB926613CF817), UINT64_C(0x1262F598629BA778), + UINT64_C(0x55C88F71C97C584C), UINT64_C(0xE6E6C3CFCADB0723), + UINT64_C(0xDA9C7AA29EB3A624), UINT64_C(0x69B2361C9D14F94B), + UINT64_C(0x2E184CF536F3067F), UINT64_C(0x9D36004B35545910), + UINT64_C(0x2B769F17CF112238), UINT64_C(0x9858D3A9CCB67D57), + UINT64_C(0xDFF2A94067518263), UINT64_C(0x6CDCE5FE64F6DD0C), + UINT64_C(0x50A65C93309E7C0B), UINT64_C(0xE388102D33392364), + UINT64_C(0xA4226AC498DEDC50), UINT64_C(0x170C267A9B79833F), + UINT64_C(0xDCD7181E300F9E5E), UINT64_C(0x6FF954A033A8C131), + UINT64_C(0x28532E49984F3E05), UINT64_C(0x9B7D62F79BE8616A), + UINT64_C(0xA707DB9ACF80C06D), UINT64_C(0x14299724CC279F02), + UINT64_C(0x5383EDCD67C06036), UINT64_C(0xE0ADA17364673F59) + }, { + UINT64_C(0x0000000000000000), UINT64_C(0x54E979925CD0F10D), + UINT64_C(0xA9D2F324B9A1E21A), UINT64_C(0xFD3B8AB6E5711317), + UINT64_C(0xC17D4962DC4DDAB1), UINT64_C(0x959430F0809D2BBC), + UINT64_C(0x68AFBA4665EC38AB), UINT64_C(0x3C46C3D4393CC9A6), + UINT64_C(0x10223DEE1795ABE7), UINT64_C(0x44CB447C4B455AEA), + UINT64_C(0xB9F0CECAAE3449FD), UINT64_C(0xED19B758F2E4B8F0), + UINT64_C(0xD15F748CCBD87156), UINT64_C(0x85B60D1E9708805B), + UINT64_C(0x788D87A87279934C), UINT64_C(0x2C64FE3A2EA96241), + UINT64_C(0x20447BDC2F2B57CE), UINT64_C(0x74AD024E73FBA6C3), + UINT64_C(0x899688F8968AB5D4), UINT64_C(0xDD7FF16ACA5A44D9), + UINT64_C(0xE13932BEF3668D7F), UINT64_C(0xB5D04B2CAFB67C72), + UINT64_C(0x48EBC19A4AC76F65), UINT64_C(0x1C02B80816179E68), + UINT64_C(0x3066463238BEFC29), UINT64_C(0x648F3FA0646E0D24), + UINT64_C(0x99B4B516811F1E33), UINT64_C(0xCD5DCC84DDCFEF3E), + UINT64_C(0xF11B0F50E4F32698), UINT64_C(0xA5F276C2B823D795), + UINT64_C(0x58C9FC745D52C482), UINT64_C(0x0C2085E60182358F), + UINT64_C(0x4088F7B85E56AF9C), UINT64_C(0x14618E2A02865E91), + UINT64_C(0xE95A049CE7F74D86), UINT64_C(0xBDB37D0EBB27BC8B), + UINT64_C(0x81F5BEDA821B752D), UINT64_C(0xD51CC748DECB8420), + UINT64_C(0x28274DFE3BBA9737), UINT64_C(0x7CCE346C676A663A), + UINT64_C(0x50AACA5649C3047B), UINT64_C(0x0443B3C41513F576), + UINT64_C(0xF9783972F062E661), UINT64_C(0xAD9140E0ACB2176C), + UINT64_C(0x91D78334958EDECA), UINT64_C(0xC53EFAA6C95E2FC7), + UINT64_C(0x380570102C2F3CD0), UINT64_C(0x6CEC098270FFCDDD), + UINT64_C(0x60CC8C64717DF852), UINT64_C(0x3425F5F62DAD095F), + UINT64_C(0xC91E7F40C8DC1A48), UINT64_C(0x9DF706D2940CEB45), + UINT64_C(0xA1B1C506AD3022E3), UINT64_C(0xF558BC94F1E0D3EE), + UINT64_C(0x086336221491C0F9), UINT64_C(0x5C8A4FB0484131F4), + UINT64_C(0x70EEB18A66E853B5), UINT64_C(0x2407C8183A38A2B8), + UINT64_C(0xD93C42AEDF49B1AF), UINT64_C(0x8DD53B3C839940A2), + UINT64_C(0xB193F8E8BAA58904), UINT64_C(0xE57A817AE6757809), + UINT64_C(0x18410BCC03046B1E), UINT64_C(0x4CA8725E5FD49A13), + UINT64_C(0x8111EF70BCAD5F38), UINT64_C(0xD5F896E2E07DAE35), + UINT64_C(0x28C31C54050CBD22), UINT64_C(0x7C2A65C659DC4C2F), + UINT64_C(0x406CA61260E08589), UINT64_C(0x1485DF803C307484), + UINT64_C(0xE9BE5536D9416793), UINT64_C(0xBD572CA48591969E), + UINT64_C(0x9133D29EAB38F4DF), UINT64_C(0xC5DAAB0CF7E805D2), + UINT64_C(0x38E121BA129916C5), UINT64_C(0x6C0858284E49E7C8), + UINT64_C(0x504E9BFC77752E6E), UINT64_C(0x04A7E26E2BA5DF63), + UINT64_C(0xF99C68D8CED4CC74), UINT64_C(0xAD75114A92043D79), + UINT64_C(0xA15594AC938608F6), UINT64_C(0xF5BCED3ECF56F9FB), + UINT64_C(0x088767882A27EAEC), UINT64_C(0x5C6E1E1A76F71BE1), + UINT64_C(0x6028DDCE4FCBD247), UINT64_C(0x34C1A45C131B234A), + UINT64_C(0xC9FA2EEAF66A305D), UINT64_C(0x9D135778AABAC150), + UINT64_C(0xB177A9428413A311), UINT64_C(0xE59ED0D0D8C3521C), + UINT64_C(0x18A55A663DB2410B), UINT64_C(0x4C4C23F46162B006), + UINT64_C(0x700AE020585E79A0), UINT64_C(0x24E399B2048E88AD), + UINT64_C(0xD9D81304E1FF9BBA), UINT64_C(0x8D316A96BD2F6AB7), + UINT64_C(0xC19918C8E2FBF0A4), UINT64_C(0x9570615ABE2B01A9), + UINT64_C(0x684BEBEC5B5A12BE), UINT64_C(0x3CA2927E078AE3B3), + UINT64_C(0x00E451AA3EB62A15), UINT64_C(0x540D28386266DB18), + UINT64_C(0xA936A28E8717C80F), UINT64_C(0xFDDFDB1CDBC73902), + UINT64_C(0xD1BB2526F56E5B43), UINT64_C(0x85525CB4A9BEAA4E), + UINT64_C(0x7869D6024CCFB959), UINT64_C(0x2C80AF90101F4854), + UINT64_C(0x10C66C44292381F2), UINT64_C(0x442F15D675F370FF), + UINT64_C(0xB9149F60908263E8), UINT64_C(0xEDFDE6F2CC5292E5), + UINT64_C(0xE1DD6314CDD0A76A), UINT64_C(0xB5341A8691005667), + UINT64_C(0x480F903074714570), UINT64_C(0x1CE6E9A228A1B47D), + UINT64_C(0x20A02A76119D7DDB), UINT64_C(0x744953E44D4D8CD6), + UINT64_C(0x8972D952A83C9FC1), UINT64_C(0xDD9BA0C0F4EC6ECC), + UINT64_C(0xF1FF5EFADA450C8D), UINT64_C(0xA51627688695FD80), + UINT64_C(0x582DADDE63E4EE97), UINT64_C(0x0CC4D44C3F341F9A), + UINT64_C(0x308217980608D63C), UINT64_C(0x646B6E0A5AD82731), + UINT64_C(0x9950E4BCBFA93426), UINT64_C(0xCDB99D2EE379C52B), + UINT64_C(0x90FB71CAD654A0F5), UINT64_C(0xC41208588A8451F8), + UINT64_C(0x392982EE6FF542EF), UINT64_C(0x6DC0FB7C3325B3E2), + UINT64_C(0x518638A80A197A44), UINT64_C(0x056F413A56C98B49), + UINT64_C(0xF854CB8CB3B8985E), UINT64_C(0xACBDB21EEF686953), + UINT64_C(0x80D94C24C1C10B12), UINT64_C(0xD43035B69D11FA1F), + UINT64_C(0x290BBF007860E908), UINT64_C(0x7DE2C69224B01805), + UINT64_C(0x41A405461D8CD1A3), UINT64_C(0x154D7CD4415C20AE), + UINT64_C(0xE876F662A42D33B9), UINT64_C(0xBC9F8FF0F8FDC2B4), + UINT64_C(0xB0BF0A16F97FF73B), UINT64_C(0xE4567384A5AF0636), + UINT64_C(0x196DF93240DE1521), UINT64_C(0x4D8480A01C0EE42C), + UINT64_C(0x71C2437425322D8A), UINT64_C(0x252B3AE679E2DC87), + UINT64_C(0xD810B0509C93CF90), UINT64_C(0x8CF9C9C2C0433E9D), + UINT64_C(0xA09D37F8EEEA5CDC), UINT64_C(0xF4744E6AB23AADD1), + UINT64_C(0x094FC4DC574BBEC6), UINT64_C(0x5DA6BD4E0B9B4FCB), + UINT64_C(0x61E07E9A32A7866D), UINT64_C(0x350907086E777760), + UINT64_C(0xC8328DBE8B066477), UINT64_C(0x9CDBF42CD7D6957A), + UINT64_C(0xD073867288020F69), UINT64_C(0x849AFFE0D4D2FE64), + UINT64_C(0x79A1755631A3ED73), UINT64_C(0x2D480CC46D731C7E), + UINT64_C(0x110ECF10544FD5D8), UINT64_C(0x45E7B682089F24D5), + UINT64_C(0xB8DC3C34EDEE37C2), UINT64_C(0xEC3545A6B13EC6CF), + UINT64_C(0xC051BB9C9F97A48E), UINT64_C(0x94B8C20EC3475583), + UINT64_C(0x698348B826364694), UINT64_C(0x3D6A312A7AE6B799), + UINT64_C(0x012CF2FE43DA7E3F), UINT64_C(0x55C58B6C1F0A8F32), + UINT64_C(0xA8FE01DAFA7B9C25), UINT64_C(0xFC177848A6AB6D28), + UINT64_C(0xF037FDAEA72958A7), UINT64_C(0xA4DE843CFBF9A9AA), + UINT64_C(0x59E50E8A1E88BABD), UINT64_C(0x0D0C771842584BB0), + UINT64_C(0x314AB4CC7B648216), UINT64_C(0x65A3CD5E27B4731B), + UINT64_C(0x989847E8C2C5600C), UINT64_C(0xCC713E7A9E159101), + UINT64_C(0xE015C040B0BCF340), UINT64_C(0xB4FCB9D2EC6C024D), + UINT64_C(0x49C73364091D115A), UINT64_C(0x1D2E4AF655CDE057), + UINT64_C(0x216889226CF129F1), UINT64_C(0x7581F0B03021D8FC), + UINT64_C(0x88BA7A06D550CBEB), UINT64_C(0xDC53039489803AE6), + UINT64_C(0x11EA9EBA6AF9FFCD), UINT64_C(0x4503E72836290EC0), + UINT64_C(0xB8386D9ED3581DD7), UINT64_C(0xECD1140C8F88ECDA), + UINT64_C(0xD097D7D8B6B4257C), UINT64_C(0x847EAE4AEA64D471), + UINT64_C(0x794524FC0F15C766), UINT64_C(0x2DAC5D6E53C5366B), + UINT64_C(0x01C8A3547D6C542A), UINT64_C(0x5521DAC621BCA527), + UINT64_C(0xA81A5070C4CDB630), UINT64_C(0xFCF329E2981D473D), + UINT64_C(0xC0B5EA36A1218E9B), UINT64_C(0x945C93A4FDF17F96), + UINT64_C(0x6967191218806C81), UINT64_C(0x3D8E608044509D8C), + UINT64_C(0x31AEE56645D2A803), UINT64_C(0x65479CF41902590E), + UINT64_C(0x987C1642FC734A19), UINT64_C(0xCC956FD0A0A3BB14), + UINT64_C(0xF0D3AC04999F72B2), UINT64_C(0xA43AD596C54F83BF), + UINT64_C(0x59015F20203E90A8), UINT64_C(0x0DE826B27CEE61A5), + UINT64_C(0x218CD888524703E4), UINT64_C(0x7565A11A0E97F2E9), + UINT64_C(0x885E2BACEBE6E1FE), UINT64_C(0xDCB7523EB73610F3), + UINT64_C(0xE0F191EA8E0AD955), UINT64_C(0xB418E878D2DA2858), + UINT64_C(0x492362CE37AB3B4F), UINT64_C(0x1DCA1B5C6B7BCA42), + UINT64_C(0x5162690234AF5051), UINT64_C(0x058B1090687FA15C), + UINT64_C(0xF8B09A268D0EB24B), UINT64_C(0xAC59E3B4D1DE4346), + UINT64_C(0x901F2060E8E28AE0), UINT64_C(0xC4F659F2B4327BED), + UINT64_C(0x39CDD344514368FA), UINT64_C(0x6D24AAD60D9399F7), + UINT64_C(0x414054EC233AFBB6), UINT64_C(0x15A92D7E7FEA0ABB), + UINT64_C(0xE892A7C89A9B19AC), UINT64_C(0xBC7BDE5AC64BE8A1), + UINT64_C(0x803D1D8EFF772107), UINT64_C(0xD4D4641CA3A7D00A), + UINT64_C(0x29EFEEAA46D6C31D), UINT64_C(0x7D0697381A063210), + UINT64_C(0x712612DE1B84079F), UINT64_C(0x25CF6B4C4754F692), + UINT64_C(0xD8F4E1FAA225E585), UINT64_C(0x8C1D9868FEF51488), + UINT64_C(0xB05B5BBCC7C9DD2E), UINT64_C(0xE4B2222E9B192C23), + UINT64_C(0x1989A8987E683F34), UINT64_C(0x4D60D10A22B8CE39), + UINT64_C(0x61042F300C11AC78), UINT64_C(0x35ED56A250C15D75), + UINT64_C(0xC8D6DC14B5B04E62), UINT64_C(0x9C3FA586E960BF6F), + UINT64_C(0xA0796652D05C76C9), UINT64_C(0xF4901FC08C8C87C4), + UINT64_C(0x09AB957669FD94D3), UINT64_C(0x5D42ECE4352D65DE) + }, { + UINT64_C(0x0000000000000000), UINT64_C(0x3F0BE14A916A6DCB), + UINT64_C(0x7E17C29522D4DB96), UINT64_C(0x411C23DFB3BEB65D), + UINT64_C(0xFC2F852A45A9B72C), UINT64_C(0xC3246460D4C3DAE7), + UINT64_C(0x823847BF677D6CBA), UINT64_C(0xBD33A6F5F6170171), + UINT64_C(0x6A87A57F245D70DD), UINT64_C(0x558C4435B5371D16), + UINT64_C(0x149067EA0689AB4B), UINT64_C(0x2B9B86A097E3C680), + UINT64_C(0x96A8205561F4C7F1), UINT64_C(0xA9A3C11FF09EAA3A), + UINT64_C(0xE8BFE2C043201C67), UINT64_C(0xD7B4038AD24A71AC), + UINT64_C(0xD50F4AFE48BAE1BA), UINT64_C(0xEA04ABB4D9D08C71), + UINT64_C(0xAB18886B6A6E3A2C), UINT64_C(0x94136921FB0457E7), + UINT64_C(0x2920CFD40D135696), UINT64_C(0x162B2E9E9C793B5D), + UINT64_C(0x57370D412FC78D00), UINT64_C(0x683CEC0BBEADE0CB), + UINT64_C(0xBF88EF816CE79167), UINT64_C(0x80830ECBFD8DFCAC), + UINT64_C(0xC19F2D144E334AF1), UINT64_C(0xFE94CC5EDF59273A), + UINT64_C(0x43A76AAB294E264B), UINT64_C(0x7CAC8BE1B8244B80), + UINT64_C(0x3DB0A83E0B9AFDDD), UINT64_C(0x02BB49749AF09016), + UINT64_C(0x38C63AD73E7BDDF1), UINT64_C(0x07CDDB9DAF11B03A), + UINT64_C(0x46D1F8421CAF0667), UINT64_C(0x79DA19088DC56BAC), + UINT64_C(0xC4E9BFFD7BD26ADD), UINT64_C(0xFBE25EB7EAB80716), + UINT64_C(0xBAFE7D685906B14B), UINT64_C(0x85F59C22C86CDC80), + UINT64_C(0x52419FA81A26AD2C), UINT64_C(0x6D4A7EE28B4CC0E7), + UINT64_C(0x2C565D3D38F276BA), UINT64_C(0x135DBC77A9981B71), + UINT64_C(0xAE6E1A825F8F1A00), UINT64_C(0x9165FBC8CEE577CB), + UINT64_C(0xD079D8177D5BC196), UINT64_C(0xEF72395DEC31AC5D), + UINT64_C(0xEDC9702976C13C4B), UINT64_C(0xD2C29163E7AB5180), + UINT64_C(0x93DEB2BC5415E7DD), UINT64_C(0xACD553F6C57F8A16), + UINT64_C(0x11E6F50333688B67), UINT64_C(0x2EED1449A202E6AC), + UINT64_C(0x6FF1379611BC50F1), UINT64_C(0x50FAD6DC80D63D3A), + UINT64_C(0x874ED556529C4C96), UINT64_C(0xB845341CC3F6215D), + UINT64_C(0xF95917C370489700), UINT64_C(0xC652F689E122FACB), + UINT64_C(0x7B61507C1735FBBA), UINT64_C(0x446AB136865F9671), + UINT64_C(0x057692E935E1202C), UINT64_C(0x3A7D73A3A48B4DE7), + UINT64_C(0x718C75AE7CF7BBE2), UINT64_C(0x4E8794E4ED9DD629), + UINT64_C(0x0F9BB73B5E236074), UINT64_C(0x30905671CF490DBF), + UINT64_C(0x8DA3F084395E0CCE), UINT64_C(0xB2A811CEA8346105), + UINT64_C(0xF3B432111B8AD758), UINT64_C(0xCCBFD35B8AE0BA93), + UINT64_C(0x1B0BD0D158AACB3F), UINT64_C(0x2400319BC9C0A6F4), + UINT64_C(0x651C12447A7E10A9), UINT64_C(0x5A17F30EEB147D62), + UINT64_C(0xE72455FB1D037C13), UINT64_C(0xD82FB4B18C6911D8), + UINT64_C(0x9933976E3FD7A785), UINT64_C(0xA6387624AEBDCA4E), + UINT64_C(0xA4833F50344D5A58), UINT64_C(0x9B88DE1AA5273793), + UINT64_C(0xDA94FDC5169981CE), UINT64_C(0xE59F1C8F87F3EC05), + UINT64_C(0x58ACBA7A71E4ED74), UINT64_C(0x67A75B30E08E80BF), + UINT64_C(0x26BB78EF533036E2), UINT64_C(0x19B099A5C25A5B29), + UINT64_C(0xCE049A2F10102A85), UINT64_C(0xF10F7B65817A474E), + UINT64_C(0xB01358BA32C4F113), UINT64_C(0x8F18B9F0A3AE9CD8), + UINT64_C(0x322B1F0555B99DA9), UINT64_C(0x0D20FE4FC4D3F062), + UINT64_C(0x4C3CDD90776D463F), UINT64_C(0x73373CDAE6072BF4), + UINT64_C(0x494A4F79428C6613), UINT64_C(0x7641AE33D3E60BD8), + UINT64_C(0x375D8DEC6058BD85), UINT64_C(0x08566CA6F132D04E), + UINT64_C(0xB565CA530725D13F), UINT64_C(0x8A6E2B19964FBCF4), + UINT64_C(0xCB7208C625F10AA9), UINT64_C(0xF479E98CB49B6762), + UINT64_C(0x23CDEA0666D116CE), UINT64_C(0x1CC60B4CF7BB7B05), + UINT64_C(0x5DDA28934405CD58), UINT64_C(0x62D1C9D9D56FA093), + UINT64_C(0xDFE26F2C2378A1E2), UINT64_C(0xE0E98E66B212CC29), + UINT64_C(0xA1F5ADB901AC7A74), UINT64_C(0x9EFE4CF390C617BF), + UINT64_C(0x9C4505870A3687A9), UINT64_C(0xA34EE4CD9B5CEA62), + UINT64_C(0xE252C71228E25C3F), UINT64_C(0xDD592658B98831F4), + UINT64_C(0x606A80AD4F9F3085), UINT64_C(0x5F6161E7DEF55D4E), + UINT64_C(0x1E7D42386D4BEB13), UINT64_C(0x2176A372FC2186D8), + UINT64_C(0xF6C2A0F82E6BF774), UINT64_C(0xC9C941B2BF019ABF), + UINT64_C(0x88D5626D0CBF2CE2), UINT64_C(0xB7DE83279DD54129), + UINT64_C(0x0AED25D26BC24058), UINT64_C(0x35E6C498FAA82D93), + UINT64_C(0x74FAE74749169BCE), UINT64_C(0x4BF1060DD87CF605), + UINT64_C(0xE318EB5CF9EF77C4), UINT64_C(0xDC130A1668851A0F), + UINT64_C(0x9D0F29C9DB3BAC52), UINT64_C(0xA204C8834A51C199), + UINT64_C(0x1F376E76BC46C0E8), UINT64_C(0x203C8F3C2D2CAD23), + UINT64_C(0x6120ACE39E921B7E), UINT64_C(0x5E2B4DA90FF876B5), + UINT64_C(0x899F4E23DDB20719), UINT64_C(0xB694AF694CD86AD2), + UINT64_C(0xF7888CB6FF66DC8F), UINT64_C(0xC8836DFC6E0CB144), + UINT64_C(0x75B0CB09981BB035), UINT64_C(0x4ABB2A430971DDFE), + UINT64_C(0x0BA7099CBACF6BA3), UINT64_C(0x34ACE8D62BA50668), + UINT64_C(0x3617A1A2B155967E), UINT64_C(0x091C40E8203FFBB5), + UINT64_C(0x4800633793814DE8), UINT64_C(0x770B827D02EB2023), + UINT64_C(0xCA382488F4FC2152), UINT64_C(0xF533C5C265964C99), + UINT64_C(0xB42FE61DD628FAC4), UINT64_C(0x8B2407574742970F), + UINT64_C(0x5C9004DD9508E6A3), UINT64_C(0x639BE59704628B68), + UINT64_C(0x2287C648B7DC3D35), UINT64_C(0x1D8C270226B650FE), + UINT64_C(0xA0BF81F7D0A1518F), UINT64_C(0x9FB460BD41CB3C44), + UINT64_C(0xDEA84362F2758A19), UINT64_C(0xE1A3A228631FE7D2), + UINT64_C(0xDBDED18BC794AA35), UINT64_C(0xE4D530C156FEC7FE), + UINT64_C(0xA5C9131EE54071A3), UINT64_C(0x9AC2F254742A1C68), + UINT64_C(0x27F154A1823D1D19), UINT64_C(0x18FAB5EB135770D2), + UINT64_C(0x59E69634A0E9C68F), UINT64_C(0x66ED777E3183AB44), + UINT64_C(0xB15974F4E3C9DAE8), UINT64_C(0x8E5295BE72A3B723), + UINT64_C(0xCF4EB661C11D017E), UINT64_C(0xF045572B50776CB5), + UINT64_C(0x4D76F1DEA6606DC4), UINT64_C(0x727D1094370A000F), + UINT64_C(0x3361334B84B4B652), UINT64_C(0x0C6AD20115DEDB99), + UINT64_C(0x0ED19B758F2E4B8F), UINT64_C(0x31DA7A3F1E442644), + UINT64_C(0x70C659E0ADFA9019), UINT64_C(0x4FCDB8AA3C90FDD2), + UINT64_C(0xF2FE1E5FCA87FCA3), UINT64_C(0xCDF5FF155BED9168), + UINT64_C(0x8CE9DCCAE8532735), UINT64_C(0xB3E23D8079394AFE), + UINT64_C(0x64563E0AAB733B52), UINT64_C(0x5B5DDF403A195699), + UINT64_C(0x1A41FC9F89A7E0C4), UINT64_C(0x254A1DD518CD8D0F), + UINT64_C(0x9879BB20EEDA8C7E), UINT64_C(0xA7725A6A7FB0E1B5), + UINT64_C(0xE66E79B5CC0E57E8), UINT64_C(0xD96598FF5D643A23), + UINT64_C(0x92949EF28518CC26), UINT64_C(0xAD9F7FB81472A1ED), + UINT64_C(0xEC835C67A7CC17B0), UINT64_C(0xD388BD2D36A67A7B), + UINT64_C(0x6EBB1BD8C0B17B0A), UINT64_C(0x51B0FA9251DB16C1), + UINT64_C(0x10ACD94DE265A09C), UINT64_C(0x2FA73807730FCD57), + UINT64_C(0xF8133B8DA145BCFB), UINT64_C(0xC718DAC7302FD130), + UINT64_C(0x8604F9188391676D), UINT64_C(0xB90F185212FB0AA6), + UINT64_C(0x043CBEA7E4EC0BD7), UINT64_C(0x3B375FED7586661C), + UINT64_C(0x7A2B7C32C638D041), UINT64_C(0x45209D785752BD8A), + UINT64_C(0x479BD40CCDA22D9C), UINT64_C(0x789035465CC84057), + UINT64_C(0x398C1699EF76F60A), UINT64_C(0x0687F7D37E1C9BC1), + UINT64_C(0xBBB45126880B9AB0), UINT64_C(0x84BFB06C1961F77B), + UINT64_C(0xC5A393B3AADF4126), UINT64_C(0xFAA872F93BB52CED), + UINT64_C(0x2D1C7173E9FF5D41), UINT64_C(0x121790397895308A), + UINT64_C(0x530BB3E6CB2B86D7), UINT64_C(0x6C0052AC5A41EB1C), + UINT64_C(0xD133F459AC56EA6D), UINT64_C(0xEE3815133D3C87A6), + UINT64_C(0xAF2436CC8E8231FB), UINT64_C(0x902FD7861FE85C30), + UINT64_C(0xAA52A425BB6311D7), UINT64_C(0x9559456F2A097C1C), + UINT64_C(0xD44566B099B7CA41), UINT64_C(0xEB4E87FA08DDA78A), + UINT64_C(0x567D210FFECAA6FB), UINT64_C(0x6976C0456FA0CB30), + UINT64_C(0x286AE39ADC1E7D6D), UINT64_C(0x176102D04D7410A6), + UINT64_C(0xC0D5015A9F3E610A), UINT64_C(0xFFDEE0100E540CC1), + UINT64_C(0xBEC2C3CFBDEABA9C), UINT64_C(0x81C922852C80D757), + UINT64_C(0x3CFA8470DA97D626), UINT64_C(0x03F1653A4BFDBBED), + UINT64_C(0x42ED46E5F8430DB0), UINT64_C(0x7DE6A7AF6929607B), + UINT64_C(0x7F5DEEDBF3D9F06D), UINT64_C(0x40560F9162B39DA6), + UINT64_C(0x014A2C4ED10D2BFB), UINT64_C(0x3E41CD0440674630), + UINT64_C(0x83726BF1B6704741), UINT64_C(0xBC798ABB271A2A8A), + UINT64_C(0xFD65A96494A49CD7), UINT64_C(0xC26E482E05CEF11C), + UINT64_C(0x15DA4BA4D78480B0), UINT64_C(0x2AD1AAEE46EEED7B), + UINT64_C(0x6BCD8931F5505B26), UINT64_C(0x54C6687B643A36ED), + UINT64_C(0xE9F5CE8E922D379C), UINT64_C(0xD6FE2FC403475A57), + UINT64_C(0x97E20C1BB0F9EC0A), UINT64_C(0xA8E9ED51219381C1) + }, { + UINT64_C(0x0000000000000000), UINT64_C(0x1DEE8A5E222CA1DC), + UINT64_C(0x3BDD14BC445943B8), UINT64_C(0x26339EE26675E264), + UINT64_C(0x77BA297888B28770), UINT64_C(0x6A54A326AA9E26AC), + UINT64_C(0x4C673DC4CCEBC4C8), UINT64_C(0x5189B79AEEC76514), + UINT64_C(0xEF7452F111650EE0), UINT64_C(0xF29AD8AF3349AF3C), + UINT64_C(0xD4A9464D553C4D58), UINT64_C(0xC947CC137710EC84), + UINT64_C(0x98CE7B8999D78990), UINT64_C(0x8520F1D7BBFB284C), + UINT64_C(0xA3136F35DD8ECA28), UINT64_C(0xBEFDE56BFFA26BF4), + UINT64_C(0x4C300AC98DC40345), UINT64_C(0x51DE8097AFE8A299), + UINT64_C(0x77ED1E75C99D40FD), UINT64_C(0x6A03942BEBB1E121), + UINT64_C(0x3B8A23B105768435), UINT64_C(0x2664A9EF275A25E9), + UINT64_C(0x0057370D412FC78D), UINT64_C(0x1DB9BD5363036651), + UINT64_C(0xA34458389CA10DA5), UINT64_C(0xBEAAD266BE8DAC79), + UINT64_C(0x98994C84D8F84E1D), UINT64_C(0x8577C6DAFAD4EFC1), + UINT64_C(0xD4FE714014138AD5), UINT64_C(0xC910FB1E363F2B09), + UINT64_C(0xEF2365FC504AC96D), UINT64_C(0xF2CDEFA2726668B1), + UINT64_C(0x986015931B88068A), UINT64_C(0x858E9FCD39A4A756), + UINT64_C(0xA3BD012F5FD14532), UINT64_C(0xBE538B717DFDE4EE), + UINT64_C(0xEFDA3CEB933A81FA), UINT64_C(0xF234B6B5B1162026), + UINT64_C(0xD4072857D763C242), UINT64_C(0xC9E9A209F54F639E), + UINT64_C(0x771447620AED086A), UINT64_C(0x6AFACD3C28C1A9B6), + UINT64_C(0x4CC953DE4EB44BD2), UINT64_C(0x5127D9806C98EA0E), + UINT64_C(0x00AE6E1A825F8F1A), UINT64_C(0x1D40E444A0732EC6), + UINT64_C(0x3B737AA6C606CCA2), UINT64_C(0x269DF0F8E42A6D7E), + UINT64_C(0xD4501F5A964C05CF), UINT64_C(0xC9BE9504B460A413), + UINT64_C(0xEF8D0BE6D2154677), UINT64_C(0xF26381B8F039E7AB), + UINT64_C(0xA3EA36221EFE82BF), UINT64_C(0xBE04BC7C3CD22363), + UINT64_C(0x9837229E5AA7C107), UINT64_C(0x85D9A8C0788B60DB), + UINT64_C(0x3B244DAB87290B2F), UINT64_C(0x26CAC7F5A505AAF3), + UINT64_C(0x00F95917C3704897), UINT64_C(0x1D17D349E15CE94B), + UINT64_C(0x4C9E64D30F9B8C5F), UINT64_C(0x5170EE8D2DB72D83), + UINT64_C(0x7743706F4BC2CFE7), UINT64_C(0x6AADFA3169EE6E3B), + UINT64_C(0xA218840D981E1391), UINT64_C(0xBFF60E53BA32B24D), + UINT64_C(0x99C590B1DC475029), UINT64_C(0x842B1AEFFE6BF1F5), + UINT64_C(0xD5A2AD7510AC94E1), UINT64_C(0xC84C272B3280353D), + UINT64_C(0xEE7FB9C954F5D759), UINT64_C(0xF391339776D97685), + UINT64_C(0x4D6CD6FC897B1D71), UINT64_C(0x50825CA2AB57BCAD), + UINT64_C(0x76B1C240CD225EC9), UINT64_C(0x6B5F481EEF0EFF15), + UINT64_C(0x3AD6FF8401C99A01), UINT64_C(0x273875DA23E53BDD), + UINT64_C(0x010BEB384590D9B9), UINT64_C(0x1CE5616667BC7865), + UINT64_C(0xEE288EC415DA10D4), UINT64_C(0xF3C6049A37F6B108), + UINT64_C(0xD5F59A785183536C), UINT64_C(0xC81B102673AFF2B0), + UINT64_C(0x9992A7BC9D6897A4), UINT64_C(0x847C2DE2BF443678), + UINT64_C(0xA24FB300D931D41C), UINT64_C(0xBFA1395EFB1D75C0), + UINT64_C(0x015CDC3504BF1E34), UINT64_C(0x1CB2566B2693BFE8), + UINT64_C(0x3A81C88940E65D8C), UINT64_C(0x276F42D762CAFC50), + UINT64_C(0x76E6F54D8C0D9944), UINT64_C(0x6B087F13AE213898), + UINT64_C(0x4D3BE1F1C854DAFC), UINT64_C(0x50D56BAFEA787B20), + UINT64_C(0x3A78919E8396151B), UINT64_C(0x27961BC0A1BAB4C7), + UINT64_C(0x01A58522C7CF56A3), UINT64_C(0x1C4B0F7CE5E3F77F), + UINT64_C(0x4DC2B8E60B24926B), UINT64_C(0x502C32B8290833B7), + UINT64_C(0x761FAC5A4F7DD1D3), UINT64_C(0x6BF126046D51700F), + UINT64_C(0xD50CC36F92F31BFB), UINT64_C(0xC8E24931B0DFBA27), + UINT64_C(0xEED1D7D3D6AA5843), UINT64_C(0xF33F5D8DF486F99F), + UINT64_C(0xA2B6EA171A419C8B), UINT64_C(0xBF586049386D3D57), + UINT64_C(0x996BFEAB5E18DF33), UINT64_C(0x848574F57C347EEF), + UINT64_C(0x76489B570E52165E), UINT64_C(0x6BA611092C7EB782), + UINT64_C(0x4D958FEB4A0B55E6), UINT64_C(0x507B05B56827F43A), + UINT64_C(0x01F2B22F86E0912E), UINT64_C(0x1C1C3871A4CC30F2), + UINT64_C(0x3A2FA693C2B9D296), UINT64_C(0x27C12CCDE095734A), + UINT64_C(0x993CC9A61F3718BE), UINT64_C(0x84D243F83D1BB962), + UINT64_C(0xA2E1DD1A5B6E5B06), UINT64_C(0xBF0F57447942FADA), + UINT64_C(0xEE86E0DE97859FCE), UINT64_C(0xF3686A80B5A93E12), + UINT64_C(0xD55BF462D3DCDC76), UINT64_C(0xC8B57E3CF1F07DAA), + UINT64_C(0xD6E9A7309F3239A7), UINT64_C(0xCB072D6EBD1E987B), + UINT64_C(0xED34B38CDB6B7A1F), UINT64_C(0xF0DA39D2F947DBC3), + UINT64_C(0xA1538E481780BED7), UINT64_C(0xBCBD041635AC1F0B), + UINT64_C(0x9A8E9AF453D9FD6F), UINT64_C(0x876010AA71F55CB3), + UINT64_C(0x399DF5C18E573747), UINT64_C(0x24737F9FAC7B969B), + UINT64_C(0x0240E17DCA0E74FF), UINT64_C(0x1FAE6B23E822D523), + UINT64_C(0x4E27DCB906E5B037), UINT64_C(0x53C956E724C911EB), + UINT64_C(0x75FAC80542BCF38F), UINT64_C(0x6814425B60905253), + UINT64_C(0x9AD9ADF912F63AE2), UINT64_C(0x873727A730DA9B3E), + UINT64_C(0xA104B94556AF795A), UINT64_C(0xBCEA331B7483D886), + UINT64_C(0xED6384819A44BD92), UINT64_C(0xF08D0EDFB8681C4E), + UINT64_C(0xD6BE903DDE1DFE2A), UINT64_C(0xCB501A63FC315FF6), + UINT64_C(0x75ADFF0803933402), UINT64_C(0x6843755621BF95DE), + UINT64_C(0x4E70EBB447CA77BA), UINT64_C(0x539E61EA65E6D666), + UINT64_C(0x0217D6708B21B372), UINT64_C(0x1FF95C2EA90D12AE), + UINT64_C(0x39CAC2CCCF78F0CA), UINT64_C(0x24244892ED545116), + UINT64_C(0x4E89B2A384BA3F2D), UINT64_C(0x536738FDA6969EF1), + UINT64_C(0x7554A61FC0E37C95), UINT64_C(0x68BA2C41E2CFDD49), + UINT64_C(0x39339BDB0C08B85D), UINT64_C(0x24DD11852E241981), + UINT64_C(0x02EE8F674851FBE5), UINT64_C(0x1F0005396A7D5A39), + UINT64_C(0xA1FDE05295DF31CD), UINT64_C(0xBC136A0CB7F39011), + UINT64_C(0x9A20F4EED1867275), UINT64_C(0x87CE7EB0F3AAD3A9), + UINT64_C(0xD647C92A1D6DB6BD), UINT64_C(0xCBA943743F411761), + UINT64_C(0xED9ADD965934F505), UINT64_C(0xF07457C87B1854D9), + UINT64_C(0x02B9B86A097E3C68), UINT64_C(0x1F5732342B529DB4), + UINT64_C(0x3964ACD64D277FD0), UINT64_C(0x248A26886F0BDE0C), + UINT64_C(0x7503911281CCBB18), UINT64_C(0x68ED1B4CA3E01AC4), + UINT64_C(0x4EDE85AEC595F8A0), UINT64_C(0x53300FF0E7B9597C), + UINT64_C(0xEDCDEA9B181B3288), UINT64_C(0xF02360C53A379354), + UINT64_C(0xD610FE275C427130), UINT64_C(0xCBFE74797E6ED0EC), + UINT64_C(0x9A77C3E390A9B5F8), UINT64_C(0x879949BDB2851424), + UINT64_C(0xA1AAD75FD4F0F640), UINT64_C(0xBC445D01F6DC579C), + UINT64_C(0x74F1233D072C2A36), UINT64_C(0x691FA96325008BEA), + UINT64_C(0x4F2C37814375698E), UINT64_C(0x52C2BDDF6159C852), + UINT64_C(0x034B0A458F9EAD46), UINT64_C(0x1EA5801BADB20C9A), + UINT64_C(0x38961EF9CBC7EEFE), UINT64_C(0x257894A7E9EB4F22), + UINT64_C(0x9B8571CC164924D6), UINT64_C(0x866BFB923465850A), + UINT64_C(0xA05865705210676E), UINT64_C(0xBDB6EF2E703CC6B2), + UINT64_C(0xEC3F58B49EFBA3A6), UINT64_C(0xF1D1D2EABCD7027A), + UINT64_C(0xD7E24C08DAA2E01E), UINT64_C(0xCA0CC656F88E41C2), + UINT64_C(0x38C129F48AE82973), UINT64_C(0x252FA3AAA8C488AF), + UINT64_C(0x031C3D48CEB16ACB), UINT64_C(0x1EF2B716EC9DCB17), + UINT64_C(0x4F7B008C025AAE03), UINT64_C(0x52958AD220760FDF), + UINT64_C(0x74A614304603EDBB), UINT64_C(0x69489E6E642F4C67), + UINT64_C(0xD7B57B059B8D2793), UINT64_C(0xCA5BF15BB9A1864F), + UINT64_C(0xEC686FB9DFD4642B), UINT64_C(0xF186E5E7FDF8C5F7), + UINT64_C(0xA00F527D133FA0E3), UINT64_C(0xBDE1D8233113013F), + UINT64_C(0x9BD246C15766E35B), UINT64_C(0x863CCC9F754A4287), + UINT64_C(0xEC9136AE1CA42CBC), UINT64_C(0xF17FBCF03E888D60), + UINT64_C(0xD74C221258FD6F04), UINT64_C(0xCAA2A84C7AD1CED8), + UINT64_C(0x9B2B1FD69416ABCC), UINT64_C(0x86C59588B63A0A10), + UINT64_C(0xA0F60B6AD04FE874), UINT64_C(0xBD188134F26349A8), + UINT64_C(0x03E5645F0DC1225C), UINT64_C(0x1E0BEE012FED8380), + UINT64_C(0x383870E3499861E4), UINT64_C(0x25D6FABD6BB4C038), + UINT64_C(0x745F4D278573A52C), UINT64_C(0x69B1C779A75F04F0), + UINT64_C(0x4F82599BC12AE694), UINT64_C(0x526CD3C5E3064748), + UINT64_C(0xA0A13C6791602FF9), UINT64_C(0xBD4FB639B34C8E25), + UINT64_C(0x9B7C28DBD5396C41), UINT64_C(0x8692A285F715CD9D), + UINT64_C(0xD71B151F19D2A889), UINT64_C(0xCAF59F413BFE0955), + UINT64_C(0xECC601A35D8BEB31), UINT64_C(0xF1288BFD7FA74AED), + UINT64_C(0x4FD56E9680052119), UINT64_C(0x523BE4C8A22980C5), + UINT64_C(0x74087A2AC45C62A1), UINT64_C(0x69E6F074E670C37D), + UINT64_C(0x386F47EE08B7A669), UINT64_C(0x2581CDB02A9B07B5), + UINT64_C(0x03B253524CEEE5D1), UINT64_C(0x1E5CD90C6EC2440D) + } +}; diff --git a/src/liblzma/check/crc64_tablegen.c b/src/liblzma/check/crc64_tablegen.c new file mode 100644 index 000000000000..fddaa7ed1400 --- /dev/null +++ b/src/liblzma/check/crc64_tablegen.c @@ -0,0 +1,88 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file crc64_tablegen.c +/// \brief Generate crc64_table_le.h and crc64_table_be.h +/// +/// Compiling: gcc -std=c99 -o crc64_tablegen crc64_tablegen.c +/// Add -DWORDS_BIGENDIAN to generate big endian table. +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include +#include "../../common/tuklib_integer.h" + + +static uint64_t crc64_table[4][256]; + + +extern void +init_crc64_table(void) +{ + static const uint64_t poly64 = UINT64_C(0xC96C5795D7870F42); + + for (size_t s = 0; s < 4; ++s) { + for (size_t b = 0; b < 256; ++b) { + uint64_t r = s == 0 ? b : crc64_table[s - 1][b]; + + for (size_t i = 0; i < 8; ++i) { + if (r & 1) + r = (r >> 1) ^ poly64; + else + r >>= 1; + } + + crc64_table[s][b] = r; + } + } + +#ifdef WORDS_BIGENDIAN + for (size_t s = 0; s < 4; ++s) + for (size_t b = 0; b < 256; ++b) + crc64_table[s][b] = bswap64(crc64_table[s][b]); +#endif + + return; +} + + +static void +print_crc64_table(void) +{ + printf("/* This file has been automatically generated by " + "crc64_tablegen.c. */\n\n" + "const uint64_t lzma_crc64_table[4][256] = {\n\t{"); + + for (size_t s = 0; s < 4; ++s) { + for (size_t b = 0; b < 256; ++b) { + if ((b % 2) == 0) + printf("\n\t\t"); + + printf("UINT64_C(0x%016" PRIX64 ")", + crc64_table[s][b]); + + if (b != 255) + printf(",%s", (b+1) % 2 == 0 ? "" : " "); + } + + if (s == 3) + printf("\n\t}\n};\n"); + else + printf("\n\t}, {"); + } + + return; +} + + +int +main(void) +{ + init_crc64_table(); + print_crc64_table(); + return 0; +} diff --git a/src/liblzma/check/crc64_x86.S b/src/liblzma/check/crc64_x86.S new file mode 100644 index 000000000000..f5bb84b97e0a --- /dev/null +++ b/src/liblzma/check/crc64_x86.S @@ -0,0 +1,287 @@ +/* + * Speed-optimized CRC64 using slicing-by-four algorithm + * + * This uses only i386 instructions, but it is optimized for i686 and later + * (including e.g. Pentium II/III/IV, Athlon XP, and Core 2). + * + * Authors: Igor Pavlov (original CRC32 assembly code) + * Lasse Collin (CRC64 adaptation of the modified CRC32 code) + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * This code needs lzma_crc64_table, which can be created using the + * following C code: + +uint64_t lzma_crc64_table[4][256]; + +void +init_table(void) +{ + // ECMA-182 + static const uint64_t poly64 = UINT64_C(0xC96C5795D7870F42); + + for (size_t s = 0; s < 4; ++s) { + for (size_t b = 0; b < 256; ++b) { + uint64_t r = s == 0 ? b : lzma_crc64_table[s - 1][b]; + + for (size_t i = 0; i < 8; ++i) { + if (r & 1) + r = (r >> 1) ^ poly64; + else + r >>= 1; + } + + lzma_crc64_table[s][b] = r; + } + } +} + + * The prototype of the CRC64 function: + * extern uint64_t lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc); + */ + +/* + * On some systems, the functions need to be prefixed. The prefix is + * usually an underscore. + */ +#ifndef __USER_LABEL_PREFIX__ +# define __USER_LABEL_PREFIX__ +#endif +#define MAKE_SYM_CAT(prefix, sym) prefix ## sym +#define MAKE_SYM(prefix, sym) MAKE_SYM_CAT(prefix, sym) +#define LZMA_CRC64 MAKE_SYM(__USER_LABEL_PREFIX__, lzma_crc64) +#define LZMA_CRC64_TABLE MAKE_SYM(__USER_LABEL_PREFIX__, lzma_crc64_table) + +/* + * Solaris assembler doesn't have .p2align, and Darwin uses .align + * differently than GNU/Linux and Solaris. + */ +#if defined(__APPLE__) || defined(__MSDOS__) +# define ALIGN(pow2, abs) .align pow2 +#else +# define ALIGN(pow2, abs) .align abs +#endif + + .text + .globl LZMA_CRC64 + +#if !defined(__APPLE__) && !defined(_WIN32) && !defined(__CYGWIN__) \ + && !defined(__MSDOS__) + .type LZMA_CRC64, @function +#endif + + ALIGN(4, 16) +LZMA_CRC64: + /* + * Register usage: + * %eax crc LSB + * %edx crc MSB + * %esi buf + * %edi size or buf + size + * %ebx lzma_crc64_table + * %ebp Table index + * %ecx Temporary + */ + pushl %ebx + pushl %esi + pushl %edi + pushl %ebp + movl 0x14(%esp), %esi /* buf */ + movl 0x18(%esp), %edi /* size */ + movl 0x1C(%esp), %eax /* crc LSB */ + movl 0x20(%esp), %edx /* crc MSB */ + + /* + * Store the address of lzma_crc64_table to %ebx. This is needed to + * get position-independent code (PIC). + * + * The PIC macro is defined by libtool, while __PIC__ is defined + * by GCC but only on some systems. Testing for both makes it simpler + * to test this code without libtool, and keeps the code working also + * when built with libtool but using something else than GCC. + * + * I understood that libtool may define PIC on Windows even though + * the code in Windows DLLs is not PIC in sense that it is in ELF + * binaries, so we need a separate check to always use the non-PIC + * code on Windows. + */ +#if (!defined(PIC) && !defined(__PIC__)) \ + || (defined(_WIN32) || defined(__CYGWIN__)) + /* Not PIC */ + movl $ LZMA_CRC64_TABLE, %ebx +#elif defined(__APPLE__) + /* Mach-O */ + call .L_get_pc +.L_pic: + leal .L_lzma_crc64_table$non_lazy_ptr-.L_pic(%ebx), %ebx + movl (%ebx), %ebx +#else + /* ELF */ + call .L_get_pc + addl $_GLOBAL_OFFSET_TABLE_, %ebx + movl LZMA_CRC64_TABLE@GOT(%ebx), %ebx +#endif + + /* Complement the initial value. */ + notl %eax + notl %edx + +.L_align: + /* + * Check if there is enough input to use slicing-by-four. + * We need eight bytes, because the loop pre-reads four bytes. + */ + cmpl $8, %edi + jb .L_rest + + /* Check if we have reached alignment of four bytes. */ + testl $3, %esi + jz .L_slice + + /* Calculate CRC of the next input byte. */ + movzbl (%esi), %ebp + incl %esi + movzbl %al, %ecx + xorl %ecx, %ebp + shrdl $8, %edx, %eax + xorl (%ebx, %ebp, 8), %eax + shrl $8, %edx + xorl 4(%ebx, %ebp, 8), %edx + decl %edi + jmp .L_align + +.L_slice: + /* + * If we get here, there's at least eight bytes of aligned input + * available. Make %edi multiple of four bytes. Store the possible + * remainder over the "size" variable in the argument stack. + */ + movl %edi, 0x18(%esp) + andl $-4, %edi + subl %edi, 0x18(%esp) + + /* + * Let %edi be buf + size - 4 while running the main loop. This way + * we can compare for equality to determine when exit the loop. + */ + addl %esi, %edi + subl $4, %edi + + /* Read in the first four aligned bytes. */ + movl (%esi), %ecx + +.L_loop: + xorl %eax, %ecx + movzbl %cl, %ebp + movl 0x1800(%ebx, %ebp, 8), %eax + xorl %edx, %eax + movl 0x1804(%ebx, %ebp, 8), %edx + movzbl %ch, %ebp + xorl 0x1000(%ebx, %ebp, 8), %eax + xorl 0x1004(%ebx, %ebp, 8), %edx + shrl $16, %ecx + movzbl %cl, %ebp + xorl 0x0800(%ebx, %ebp, 8), %eax + xorl 0x0804(%ebx, %ebp, 8), %edx + movzbl %ch, %ebp + addl $4, %esi + xorl (%ebx, %ebp, 8), %eax + xorl 4(%ebx, %ebp, 8), %edx + + /* Check for end of aligned input. */ + cmpl %edi, %esi + + /* + * Copy the next input byte to %ecx. It is slightly faster to + * read it here than at the top of the loop. + */ + movl (%esi), %ecx + jb .L_loop + + /* + * Process the remaining four bytes, which we have already + * copied to %ecx. + */ + xorl %eax, %ecx + movzbl %cl, %ebp + movl 0x1800(%ebx, %ebp, 8), %eax + xorl %edx, %eax + movl 0x1804(%ebx, %ebp, 8), %edx + movzbl %ch, %ebp + xorl 0x1000(%ebx, %ebp, 8), %eax + xorl 0x1004(%ebx, %ebp, 8), %edx + shrl $16, %ecx + movzbl %cl, %ebp + xorl 0x0800(%ebx, %ebp, 8), %eax + xorl 0x0804(%ebx, %ebp, 8), %edx + movzbl %ch, %ebp + addl $4, %esi + xorl (%ebx, %ebp, 8), %eax + xorl 4(%ebx, %ebp, 8), %edx + + /* Copy the number of remaining bytes to %edi. */ + movl 0x18(%esp), %edi + +.L_rest: + /* Check for end of input. */ + testl %edi, %edi + jz .L_return + + /* Calculate CRC of the next input byte. */ + movzbl (%esi), %ebp + incl %esi + movzbl %al, %ecx + xorl %ecx, %ebp + shrdl $8, %edx, %eax + xorl (%ebx, %ebp, 8), %eax + shrl $8, %edx + xorl 4(%ebx, %ebp, 8), %edx + decl %edi + jmp .L_rest + +.L_return: + /* Complement the final value. */ + notl %eax + notl %edx + + popl %ebp + popl %edi + popl %esi + popl %ebx + ret + +#if defined(PIC) || defined(__PIC__) + ALIGN(4, 16) +.L_get_pc: + movl (%esp), %ebx + ret +#endif + +#if defined(__APPLE__) && (defined(PIC) || defined(__PIC__)) + /* Mach-O PIC */ + .section __IMPORT,__pointers,non_lazy_symbol_pointers +.L_lzma_crc64_table$non_lazy_ptr: + .indirect_symbol LZMA_CRC64_TABLE + .long 0 + +#elif defined(_WIN32) || defined(__CYGWIN__) +# ifdef DLL_EXPORT + /* This is equivalent of __declspec(dllexport). */ + .section .drectve + .ascii " -export:lzma_crc64" +# endif + +#elif !defined(__MSDOS__) + /* ELF */ + .size LZMA_CRC64, .-LZMA_CRC64 +#endif + +/* + * This is needed to support non-executable stack. It's ugly to + * use __linux__ here, but I don't know a way to detect when + * we are using GNU assembler. + */ +#if defined(__ELF__) && defined(__linux__) + .section .note.GNU-stack,"",@progbits +#endif diff --git a/src/liblzma/check/crc_macros.h b/src/liblzma/check/crc_macros.h new file mode 100644 index 000000000000..a7c21b765dca --- /dev/null +++ b/src/liblzma/check/crc_macros.h @@ -0,0 +1,30 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file crc_macros.h +/// \brief Some endian-dependent macros for CRC32 and CRC64 +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifdef WORDS_BIGENDIAN +# define A(x) ((x) >> 24) +# define B(x) (((x) >> 16) & 0xFF) +# define C(x) (((x) >> 8) & 0xFF) +# define D(x) ((x) & 0xFF) + +# define S8(x) ((x) << 8) +# define S32(x) ((x) << 32) + +#else +# define A(x) ((x) & 0xFF) +# define B(x) (((x) >> 8) & 0xFF) +# define C(x) (((x) >> 16) & 0xFF) +# define D(x) ((x) >> 24) + +# define S8(x) ((x) >> 8) +# define S32(x) ((x) >> 32) +#endif diff --git a/src/liblzma/check/sha256.c b/src/liblzma/check/sha256.c new file mode 100644 index 000000000000..04231dbaef99 --- /dev/null +++ b/src/liblzma/check/sha256.c @@ -0,0 +1,201 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file sha256.c +/// \brief SHA-256 +/// +/// \todo Crypto++ has x86 ASM optimizations. They use SSE so if they +/// are imported to liblzma, SSE instructions need to be used +/// conditionally to keep the code working on older boxes. +/// We could also support using some external libary for SHA-256. +// +// This code is based on the code found from 7-Zip, which has a modified +// version of the SHA-256 found from Crypto++ . +// The code was modified a little to fit into liblzma. +// +// Authors: Kevin Springle +// Wei Dai +// Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +// Avoid bogus warnings in transform(). +#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __GNUC__ > 4 +# pragma GCC diagnostic ignored "-Wuninitialized" +#endif + +#include "check.h" + +// At least on x86, GCC is able to optimize this to a rotate instruction. +#define rotr_32(num, amount) ((num) >> (amount) | (num) << (32 - (amount))) + +#define blk0(i) (W[i] = data[i]) +#define blk2(i) (W[i & 15] += s1(W[(i - 2) & 15]) + W[(i - 7) & 15] \ + + s0(W[(i - 15) & 15])) + +#define Ch(x, y, z) (z ^ (x & (y ^ z))) +#define Maj(x, y, z) ((x & y) | (z & (x | y))) + +#define a(i) T[(0 - i) & 7] +#define b(i) T[(1 - i) & 7] +#define c(i) T[(2 - i) & 7] +#define d(i) T[(3 - i) & 7] +#define e(i) T[(4 - i) & 7] +#define f(i) T[(5 - i) & 7] +#define g(i) T[(6 - i) & 7] +#define h(i) T[(7 - i) & 7] + +#define R(i) \ + h(i) += S1(e(i)) + Ch(e(i), f(i), g(i)) + SHA256_K[i + j] \ + + (j ? blk2(i) : blk0(i)); \ + d(i) += h(i); \ + h(i) += S0(a(i)) + Maj(a(i), b(i), c(i)) + +#define S0(x) (rotr_32(x, 2) ^ rotr_32(x, 13) ^ rotr_32(x, 22)) +#define S1(x) (rotr_32(x, 6) ^ rotr_32(x, 11) ^ rotr_32(x, 25)) +#define s0(x) (rotr_32(x, 7) ^ rotr_32(x, 18) ^ (x >> 3)) +#define s1(x) (rotr_32(x, 17) ^ rotr_32(x, 19) ^ (x >> 10)) + + +static const uint32_t SHA256_K[64] = { + 0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5, + 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5, + 0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3, + 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174, + 0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC, + 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA, + 0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7, + 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967, + 0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13, + 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85, + 0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3, + 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070, + 0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5, + 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3, + 0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208, + 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2, +}; + + +static void +transform(uint32_t state[static 8], const uint32_t data[static 16]) +{ + uint32_t W[16]; + uint32_t T[8]; + + // Copy state[] to working vars. + memcpy(T, state, sizeof(T)); + + // 64 operations, partially loop unrolled + for (unsigned int j = 0; j < 64; j += 16) { + R( 0); R( 1); R( 2); R( 3); + R( 4); R( 5); R( 6); R( 7); + R( 8); R( 9); R(10); R(11); + R(12); R(13); R(14); R(15); + } + + // Add the working vars back into state[]. + state[0] += a(0); + state[1] += b(0); + state[2] += c(0); + state[3] += d(0); + state[4] += e(0); + state[5] += f(0); + state[6] += g(0); + state[7] += h(0); +} + + +static void +process(lzma_check_state *check) +{ +#ifdef WORDS_BIGENDIAN + transform(check->state.sha256.state, check->buffer.u32); + +#else + uint32_t data[16]; + + for (size_t i = 0; i < 16; ++i) + data[i] = bswap32(check->buffer.u32[i]); + + transform(check->state.sha256.state, data); +#endif + + return; +} + + +extern void +lzma_sha256_init(lzma_check_state *check) +{ + static const uint32_t s[8] = { + 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, + 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19, + }; + + memcpy(check->state.sha256.state, s, sizeof(s)); + check->state.sha256.size = 0; + + return; +} + + +extern void +lzma_sha256_update(const uint8_t *buf, size_t size, lzma_check_state *check) +{ + // Copy the input data into a properly aligned temporary buffer. + // This way we can be called with arbitrarily sized buffers + // (no need to be multiple of 64 bytes), and the code works also + // on architectures that don't allow unaligned memory access. + while (size > 0) { + const size_t copy_start = check->state.sha256.size & 0x3F; + size_t copy_size = 64 - copy_start; + if (copy_size > size) + copy_size = size; + + memcpy(check->buffer.u8 + copy_start, buf, copy_size); + + buf += copy_size; + size -= copy_size; + check->state.sha256.size += copy_size; + + if ((check->state.sha256.size & 0x3F) == 0) + process(check); + } + + return; +} + + +extern void +lzma_sha256_finish(lzma_check_state *check) +{ + // Add padding as described in RFC 3174 (it describes SHA-1 but + // the same padding style is used for SHA-256 too). + size_t pos = check->state.sha256.size & 0x3F; + check->buffer.u8[pos++] = 0x80; + + while (pos != 64 - 8) { + if (pos == 64) { + process(check); + pos = 0; + } + + check->buffer.u8[pos++] = 0x00; + } + + // Convert the message size from bytes to bits. + check->state.sha256.size *= 8; + + check->buffer.u64[(64 - 8) / 8] = conv64be(check->state.sha256.size); + + process(check); + + for (size_t i = 0; i < 8; ++i) + check->buffer.u32[i] = conv32be(check->state.sha256.state[i]); + + return; +} diff --git a/src/liblzma/common/alone_decoder.c b/src/liblzma/common/alone_decoder.c new file mode 100644 index 000000000000..039b42859546 --- /dev/null +++ b/src/liblzma/common/alone_decoder.c @@ -0,0 +1,232 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file alone_decoder.c +/// \brief Decoder for LZMA_Alone files +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "alone_decoder.h" +#include "lzma_decoder.h" +#include "lz_decoder.h" + + +struct lzma_coder_s { + lzma_next_coder next; + + enum { + SEQ_PROPERTIES, + SEQ_DICTIONARY_SIZE, + SEQ_UNCOMPRESSED_SIZE, + SEQ_CODER_INIT, + SEQ_CODE, + } sequence; + + /// Position in the header fields + size_t pos; + + /// Uncompressed size decoded from the header + lzma_vli uncompressed_size; + + /// Memory usage limit + uint64_t memlimit; + + /// Amount of memory actually needed (only an estimate) + uint64_t memusage; + + /// Options decoded from the header needed to initialize + /// the LZMA decoder + lzma_options_lzma options; +}; + + +static lzma_ret +alone_decode(lzma_coder *coder, + lzma_allocator *allocator lzma_attribute((unused)), + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + lzma_action action) +{ + while (*out_pos < out_size + && (coder->sequence == SEQ_CODE || *in_pos < in_size)) + switch (coder->sequence) { + case SEQ_PROPERTIES: + if (lzma_lzma_lclppb_decode(&coder->options, in[*in_pos])) + return LZMA_FORMAT_ERROR; + + coder->sequence = SEQ_DICTIONARY_SIZE; + ++*in_pos; + break; + + case SEQ_DICTIONARY_SIZE: + coder->options.dict_size + |= (size_t)(in[*in_pos]) << (coder->pos * 8); + + if (++coder->pos == 4) { + if (coder->options.dict_size != UINT32_MAX) { + // A hack to ditch tons of false positives: + // We allow only dictionary sizes that are + // 2^n or 2^n + 2^(n-1). LZMA_Alone created + // only files with 2^n, but accepts any + // dictionary size. If someone complains, this + // will be reconsidered. + uint32_t d = coder->options.dict_size - 1; + d |= d >> 2; + d |= d >> 3; + d |= d >> 4; + d |= d >> 8; + d |= d >> 16; + ++d; + + if (d != coder->options.dict_size) + return LZMA_FORMAT_ERROR; + } + + coder->pos = 0; + coder->sequence = SEQ_UNCOMPRESSED_SIZE; + } + + ++*in_pos; + break; + + case SEQ_UNCOMPRESSED_SIZE: + coder->uncompressed_size + |= (lzma_vli)(in[*in_pos]) << (coder->pos * 8); + ++*in_pos; + if (++coder->pos < 8) + break; + + // Another hack to ditch false positives: Assume that + // if the uncompressed size is known, it must be less + // than 256 GiB. Again, if someone complains, this + // will be reconsidered. + if (coder->uncompressed_size != LZMA_VLI_UNKNOWN + && coder->uncompressed_size + >= (LZMA_VLI_C(1) << 38)) + return LZMA_FORMAT_ERROR; + + // Calculate the memory usage so that it is ready + // for SEQ_CODER_INIT. + coder->memusage = lzma_lzma_decoder_memusage(&coder->options) + + LZMA_MEMUSAGE_BASE; + + coder->pos = 0; + coder->sequence = SEQ_CODER_INIT; + + // Fall through + + case SEQ_CODER_INIT: { + if (coder->memusage > coder->memlimit) + return LZMA_MEMLIMIT_ERROR; + + lzma_filter_info filters[2] = { + { + .init = &lzma_lzma_decoder_init, + .options = &coder->options, + }, { + .init = NULL, + } + }; + + const lzma_ret ret = lzma_next_filter_init(&coder->next, + allocator, filters); + if (ret != LZMA_OK) + return ret; + + // Use a hack to set the uncompressed size. + lzma_lz_decoder_uncompressed(coder->next.coder, + coder->uncompressed_size); + + coder->sequence = SEQ_CODE; + break; + } + + case SEQ_CODE: { + return coder->next.code(coder->next.coder, + allocator, in, in_pos, in_size, + out, out_pos, out_size, action); + } + + default: + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static void +alone_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->next, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +alone_decoder_memconfig(lzma_coder *coder, uint64_t *memusage, + uint64_t *old_memlimit, uint64_t new_memlimit) +{ + *memusage = coder->memusage; + *old_memlimit = coder->memlimit; + + if (new_memlimit != 0) { + if (new_memlimit < coder->memusage) + return LZMA_MEMLIMIT_ERROR; + + coder->memlimit = new_memlimit; + } + + return LZMA_OK; +} + + +extern lzma_ret +lzma_alone_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + uint64_t memlimit) +{ + lzma_next_coder_init(&lzma_alone_decoder_init, next, allocator); + + if (memlimit == 0) + return LZMA_PROG_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &alone_decode; + next->end = &alone_decoder_end; + next->memconfig = &alone_decoder_memconfig; + next->coder->next = LZMA_NEXT_CODER_INIT; + } + + next->coder->sequence = SEQ_PROPERTIES; + next->coder->pos = 0; + next->coder->options.dict_size = 0; + next->coder->options.preset_dict = NULL; + next->coder->options.preset_dict_size = 0; + next->coder->uncompressed_size = 0; + next->coder->memlimit = memlimit; + next->coder->memusage = LZMA_MEMUSAGE_BASE; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_alone_decoder(lzma_stream *strm, uint64_t memlimit) +{ + lzma_next_strm_init(lzma_alone_decoder_init, strm, memlimit); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} diff --git a/src/liblzma/common/alone_decoder.h b/src/liblzma/common/alone_decoder.h new file mode 100644 index 000000000000..70d0d2a0ffa4 --- /dev/null +++ b/src/liblzma/common/alone_decoder.h @@ -0,0 +1,22 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file alone_decoder.h +/// \brief Decoder for LZMA_Alone files +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_ALONE_DECODER_H +#define LZMA_ALONE_DECODER_H + +#include "common.h" + + +extern lzma_ret lzma_alone_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, uint64_t memlimit); + +#endif diff --git a/src/liblzma/common/alone_encoder.c b/src/liblzma/common/alone_encoder.c new file mode 100644 index 000000000000..d8c0170f0238 --- /dev/null +++ b/src/liblzma/common/alone_encoder.c @@ -0,0 +1,157 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file alone_decoder.c +/// \brief Decoder for LZMA_Alone files +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" +#include "lzma_encoder.h" + + +#define ALONE_HEADER_SIZE (1 + 4 + 8) + + +struct lzma_coder_s { + lzma_next_coder next; + + enum { + SEQ_HEADER, + SEQ_CODE, + } sequence; + + size_t header_pos; + uint8_t header[ALONE_HEADER_SIZE]; +}; + + +static lzma_ret +alone_encode(lzma_coder *coder, + lzma_allocator *allocator lzma_attribute((unused)), + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + lzma_action action) +{ + while (*out_pos < out_size) + switch (coder->sequence) { + case SEQ_HEADER: + lzma_bufcpy(coder->header, &coder->header_pos, + ALONE_HEADER_SIZE, + out, out_pos, out_size); + if (coder->header_pos < ALONE_HEADER_SIZE) + return LZMA_OK; + + coder->sequence = SEQ_CODE; + break; + + case SEQ_CODE: + return coder->next.code(coder->next.coder, + allocator, in, in_pos, in_size, + out, out_pos, out_size, action); + + default: + assert(0); + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static void +alone_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->next, allocator); + lzma_free(coder, allocator); + return; +} + + +// At least for now, this is not used by any internal function. +static lzma_ret +alone_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_options_lzma *options) +{ + lzma_next_coder_init(&alone_encoder_init, next, allocator); + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &alone_encode; + next->end = &alone_encoder_end; + next->coder->next = LZMA_NEXT_CODER_INIT; + } + + // Basic initializations + next->coder->sequence = SEQ_HEADER; + next->coder->header_pos = 0; + + // Encode the header: + // - Properties (1 byte) + if (lzma_lzma_lclppb_encode(options, next->coder->header)) + return LZMA_OPTIONS_ERROR; + + // - Dictionary size (4 bytes) + if (options->dict_size < LZMA_DICT_SIZE_MIN) + return LZMA_OPTIONS_ERROR; + + // Round up to to the next 2^n or 2^n + 2^(n - 1) depending on which + // one is the next unless it is UINT32_MAX. While the header would + // allow any 32-bit integer, we do this to keep the decoder of liblzma + // accepting the resulting files. + uint32_t d = options->dict_size - 1; + d |= d >> 2; + d |= d >> 3; + d |= d >> 4; + d |= d >> 8; + d |= d >> 16; + if (d != UINT32_MAX) + ++d; + + unaligned_write32le(next->coder->header + 1, d); + + // - Uncompressed size (always unknown and using EOPM) + memset(next->coder->header + 1 + 4, 0xFF, 8); + + // Initialize the LZMA encoder. + const lzma_filter_info filters[2] = { + { + .init = &lzma_lzma_encoder_init, + .options = (void *)(options), + }, { + .init = NULL, + } + }; + + return lzma_next_filter_init(&next->coder->next, allocator, filters); +} + + +/* +extern lzma_ret +lzma_alone_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_options_alone *options) +{ + lzma_next_coder_init(&alone_encoder_init, next, allocator, options); +} +*/ + + +extern LZMA_API(lzma_ret) +lzma_alone_encoder(lzma_stream *strm, const lzma_options_lzma *options) +{ + lzma_next_strm_init(alone_encoder_init, strm, options); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} diff --git a/src/liblzma/common/auto_decoder.c b/src/liblzma/common/auto_decoder.c new file mode 100644 index 000000000000..ae6c3e78642c --- /dev/null +++ b/src/liblzma/common/auto_decoder.c @@ -0,0 +1,186 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file auto_decoder.c +/// \brief Autodetect between .xz Stream and .lzma (LZMA_Alone) formats +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_decoder.h" +#include "alone_decoder.h" + + +struct lzma_coder_s { + /// Stream decoder or LZMA_Alone decoder + lzma_next_coder next; + + uint64_t memlimit; + uint32_t flags; + + enum { + SEQ_INIT, + SEQ_CODE, + SEQ_FINISH, + } sequence; +}; + + +static lzma_ret +auto_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + switch (coder->sequence) { + case SEQ_INIT: + if (*in_pos >= in_size) + return LZMA_OK; + + // Update the sequence now, because we want to continue from + // SEQ_CODE even if we return some LZMA_*_CHECK. + coder->sequence = SEQ_CODE; + + // Detect the file format. For now this is simple, since if + // it doesn't start with 0xFD (the first magic byte of the + // new format), it has to be LZMA_Alone, or something that + // we don't support at all. + if (in[*in_pos] == 0xFD) { + return_if_error(lzma_stream_decoder_init( + &coder->next, allocator, + coder->memlimit, coder->flags)); + } else { + return_if_error(lzma_alone_decoder_init(&coder->next, + allocator, coder->memlimit)); + + // If the application wants to know about missing + // integrity check or about the check in general, we + // need to handle it here, because LZMA_Alone decoder + // doesn't accept any flags. + if (coder->flags & LZMA_TELL_NO_CHECK) + return LZMA_NO_CHECK; + + if (coder->flags & LZMA_TELL_ANY_CHECK) + return LZMA_GET_CHECK; + } + + // Fall through + + case SEQ_CODE: { + const lzma_ret ret = coder->next.code( + coder->next.coder, allocator, + in, in_pos, in_size, + out, out_pos, out_size, action); + if (ret != LZMA_STREAM_END + || (coder->flags & LZMA_CONCATENATED) == 0) + return ret; + + coder->sequence = SEQ_FINISH; + } + + // Fall through + + case SEQ_FINISH: + // When LZMA_DECODE_CONCATENATED was used and we were decoding + // LZMA_Alone file, we need to check check that there is no + // trailing garbage and wait for LZMA_FINISH. + if (*in_pos < in_size) + return LZMA_DATA_ERROR; + + return action == LZMA_FINISH ? LZMA_STREAM_END : LZMA_OK; + + default: + assert(0); + return LZMA_PROG_ERROR; + } +} + + +static void +auto_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->next, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_check +auto_decoder_get_check(const lzma_coder *coder) +{ + // It is LZMA_Alone if get_check is NULL. + return coder->next.get_check == NULL ? LZMA_CHECK_NONE + : coder->next.get_check(coder->next.coder); +} + + +static lzma_ret +auto_decoder_memconfig(lzma_coder *coder, uint64_t *memusage, + uint64_t *old_memlimit, uint64_t new_memlimit) +{ + lzma_ret ret; + + if (coder->next.memconfig != NULL) { + ret = coder->next.memconfig(coder->next.coder, + memusage, old_memlimit, new_memlimit); + assert(*old_memlimit == coder->memlimit); + } else { + // No coder is configured yet. Use the base value as + // the current memory usage. + *memusage = LZMA_MEMUSAGE_BASE; + *old_memlimit = coder->memlimit; + ret = LZMA_OK; + } + + if (ret == LZMA_OK && new_memlimit != 0) + coder->memlimit = new_memlimit; + + return ret; +} + + +static lzma_ret +auto_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + uint64_t memlimit, uint32_t flags) +{ + lzma_next_coder_init(&auto_decoder_init, next, allocator); + + if (memlimit == 0) + return LZMA_PROG_ERROR; + + if (flags & ~LZMA_SUPPORTED_FLAGS) + return LZMA_OPTIONS_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &auto_decode; + next->end = &auto_decoder_end; + next->get_check = &auto_decoder_get_check; + next->memconfig = &auto_decoder_memconfig; + next->coder->next = LZMA_NEXT_CODER_INIT; + } + + next->coder->memlimit = memlimit; + next->coder->flags = flags; + next->coder->sequence = SEQ_INIT; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_auto_decoder(lzma_stream *strm, uint64_t memlimit, uint32_t flags) +{ + lzma_next_strm_init(auto_decoder_init, strm, memlimit, flags); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} diff --git a/src/liblzma/common/block_buffer_decoder.c b/src/liblzma/common/block_buffer_decoder.c new file mode 100644 index 000000000000..ff27a11ccfe6 --- /dev/null +++ b/src/liblzma/common/block_buffer_decoder.c @@ -0,0 +1,80 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_buffer_decoder.c +/// \brief Single-call .xz Block decoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "block_decoder.h" + + +extern LZMA_API(lzma_ret) +lzma_block_buffer_decode(lzma_block *block, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + if (in_pos == NULL || (in == NULL && *in_pos != in_size) + || *in_pos > in_size || out_pos == NULL + || (out == NULL && *out_pos != out_size) + || *out_pos > out_size) + return LZMA_PROG_ERROR; + + // Initialize the Block decoder. + lzma_next_coder block_decoder = LZMA_NEXT_CODER_INIT; + lzma_ret ret = lzma_block_decoder_init( + &block_decoder, allocator, block); + + if (ret == LZMA_OK) { + // Save the positions so that we can restore them in case + // an error occurs. + const size_t in_start = *in_pos; + const size_t out_start = *out_pos; + + // Do the actual decoding. + ret = block_decoder.code(block_decoder.coder, allocator, + in, in_pos, in_size, out, out_pos, out_size, + LZMA_FINISH); + + if (ret == LZMA_STREAM_END) { + ret = LZMA_OK; + } else { + if (ret == LZMA_OK) { + // Either the input was truncated or the + // output buffer was too small. + assert(*in_pos == in_size + || *out_pos == out_size); + + // If all the input was consumed, then the + // input is truncated, even if the output + // buffer is also full. This is because + // processing the last byte of the Block + // never produces output. + // + // NOTE: This assumption may break when new + // filters are added, if the end marker of + // the filter doesn't consume at least one + // complete byte. + if (*in_pos == in_size) + ret = LZMA_DATA_ERROR; + else + ret = LZMA_BUF_ERROR; + } + + // Restore the positions. + *in_pos = in_start; + *out_pos = out_start; + } + } + + // Free the decoder memory. This needs to be done even if + // initialization fails, because the internal API doesn't + // require the initialization function to free its memory on error. + lzma_next_end(&block_decoder, allocator); + + return ret; +} diff --git a/src/liblzma/common/block_buffer_encoder.c b/src/liblzma/common/block_buffer_encoder.c new file mode 100644 index 000000000000..4d90feef50bd --- /dev/null +++ b/src/liblzma/common/block_buffer_encoder.c @@ -0,0 +1,299 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_buffer_encoder.c +/// \brief Single-call .xz Block encoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "block_encoder.h" +#include "filter_encoder.h" +#include "lzma2_encoder.h" +#include "check.h" + + +/// Estimate the maximum size of the Block Header and Check fields for +/// a Block that uses LZMA2 uncompressed chunks. We could use +/// lzma_block_header_size() but this is simpler. +/// +/// Block Header Size + Block Flags + Compressed Size +/// + Uncompressed Size + Filter Flags for LZMA2 + CRC32 + Check +/// and round up to the next multiple of four to take Header Padding +/// into account. +#define HEADERS_BOUND ((1 + 1 + 2 * LZMA_VLI_BYTES_MAX + 3 + 4 \ + + LZMA_CHECK_SIZE_MAX + 3) & ~3) + + +static lzma_vli +lzma2_bound(lzma_vli uncompressed_size) +{ + // Prevent integer overflow in overhead calculation. + if (uncompressed_size > COMPRESSED_SIZE_MAX) + return 0; + + // Calculate the exact overhead of the LZMA2 headers: Round + // uncompressed_size up to the next multiple of LZMA2_CHUNK_MAX, + // multiply by the size of per-chunk header, and add one byte for + // the end marker. + const lzma_vli overhead = ((uncompressed_size + LZMA2_CHUNK_MAX - 1) + / LZMA2_CHUNK_MAX) + * LZMA2_HEADER_UNCOMPRESSED + 1; + + // Catch the possible integer overflow. + if (COMPRESSED_SIZE_MAX - overhead < uncompressed_size) + return 0; + + return uncompressed_size + overhead; +} + + +extern LZMA_API(size_t) +lzma_block_buffer_bound(size_t uncompressed_size) +{ + // For now, if the data doesn't compress, we always use uncompressed + // chunks of LZMA2. In future we may use Subblock filter too, but + // but for simplicity we probably will still use the same bound + // calculation even though Subblock filter would have slightly less + // overhead. + lzma_vli lzma2_size = lzma2_bound(uncompressed_size); + if (lzma2_size == 0) + return 0; + + // Take Block Padding into account. + lzma2_size = (lzma2_size + 3) & ~LZMA_VLI_C(3); + +#if SIZE_MAX < LZMA_VLI_MAX + // Catch the possible integer overflow on 32-bit systems. There's no + // overflow on 64-bit systems, because lzma2_bound() already takes + // into account the size of the headers in the Block. + if (SIZE_MAX - HEADERS_BOUND < lzma2_size) + return 0; +#endif + + return HEADERS_BOUND + lzma2_size; +} + + +static lzma_ret +block_encode_uncompressed(lzma_block *block, const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + // TODO: Figure out if the last filter is LZMA2 or Subblock and use + // that filter to encode the uncompressed chunks. + + // Use LZMA2 uncompressed chunks. We wouldn't need a dictionary at + // all, but LZMA2 always requires a dictionary, so use the minimum + // value to minimize memory usage of the decoder. + lzma_options_lzma lzma2 = { + .dict_size = LZMA_DICT_SIZE_MIN, + }; + + lzma_filter filters[2]; + filters[0].id = LZMA_FILTER_LZMA2; + filters[0].options = &lzma2; + filters[1].id = LZMA_VLI_UNKNOWN; + + // Set the above filter options to *block temporarily so that we can + // encode the Block Header. + lzma_filter *filters_orig = block->filters; + block->filters = filters; + + if (lzma_block_header_size(block) != LZMA_OK) { + block->filters = filters_orig; + return LZMA_PROG_ERROR; + } + + // Check that there's enough output space. The caller has already + // set block->compressed_size to what lzma2_bound() has returned, + // so we can reuse that value. We know that compressed_size is a + // known valid VLI and header_size is a small value so their sum + // will never overflow. + assert(block->compressed_size == lzma2_bound(in_size)); + if (out_size - *out_pos + < block->header_size + block->compressed_size) { + block->filters = filters_orig; + return LZMA_BUF_ERROR; + } + + if (lzma_block_header_encode(block, out + *out_pos) != LZMA_OK) { + block->filters = filters_orig; + return LZMA_PROG_ERROR; + } + + block->filters = filters_orig; + *out_pos += block->header_size; + + // Encode the data using LZMA2 uncompressed chunks. + size_t in_pos = 0; + uint8_t control = 0x01; // Dictionary reset + + while (in_pos < in_size) { + // Control byte: Indicate uncompressed chunk, of which + // the first resets the dictionary. + out[(*out_pos)++] = control; + control = 0x02; // No dictionary reset + + // Size of the uncompressed chunk + const size_t copy_size + = MIN(in_size - in_pos, LZMA2_CHUNK_MAX); + out[(*out_pos)++] = (copy_size - 1) >> 8; + out[(*out_pos)++] = (copy_size - 1) & 0xFF; + + // The actual data + assert(*out_pos + copy_size <= out_size); + memcpy(out + *out_pos, in + in_pos, copy_size); + + in_pos += copy_size; + *out_pos += copy_size; + } + + // End marker + out[(*out_pos)++] = 0x00; + assert(*out_pos <= out_size); + + return LZMA_OK; +} + + +static lzma_ret +block_encode_normal(lzma_block *block, lzma_allocator *allocator, + const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + // Find out the size of the Block Header. + block->compressed_size = lzma2_bound(in_size); + if (block->compressed_size == 0) + return LZMA_DATA_ERROR; + + block->uncompressed_size = in_size; + return_if_error(lzma_block_header_size(block)); + + // Reserve space for the Block Header and skip it for now. + if (out_size - *out_pos <= block->header_size) + return LZMA_BUF_ERROR; + + const size_t out_start = *out_pos; + *out_pos += block->header_size; + + // Limit out_size so that we stop encoding if the output would grow + // bigger than what uncompressed Block would be. + if (out_size - *out_pos > block->compressed_size) + out_size = *out_pos + block->compressed_size; + + // TODO: In many common cases this could be optimized to use + // significantly less memory. + lzma_next_coder raw_encoder = LZMA_NEXT_CODER_INIT; + lzma_ret ret = lzma_raw_encoder_init( + &raw_encoder, allocator, block->filters); + + if (ret == LZMA_OK) { + size_t in_pos = 0; + ret = raw_encoder.code(raw_encoder.coder, allocator, + in, &in_pos, in_size, out, out_pos, out_size, + LZMA_FINISH); + } + + // NOTE: This needs to be run even if lzma_raw_encoder_init() failed. + lzma_next_end(&raw_encoder, allocator); + + if (ret == LZMA_STREAM_END) { + // Compression was successful. Write the Block Header. + block->compressed_size + = *out_pos - (out_start + block->header_size); + ret = lzma_block_header_encode(block, out + out_start); + if (ret != LZMA_OK) + ret = LZMA_PROG_ERROR; + + } else if (ret == LZMA_OK) { + // Output buffer became full. + ret = LZMA_BUF_ERROR; + } + + // Reset *out_pos if something went wrong. + if (ret != LZMA_OK) + *out_pos = out_start; + + return ret; +} + + +extern LZMA_API(lzma_ret) +lzma_block_buffer_encode(lzma_block *block, lzma_allocator *allocator, + const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + // Sanity checks + if (block == NULL || block->filters == NULL + || (in == NULL && in_size != 0) || out == NULL + || out_pos == NULL || *out_pos > out_size) + return LZMA_PROG_ERROR; + + // Check the version field. + if (block->version != 0) + return LZMA_OPTIONS_ERROR; + + // Size of a Block has to be a multiple of four, so limit the size + // here already. This way we don't need to check it again when adding + // Block Padding. + out_size -= (out_size - *out_pos) & 3; + + // Get the size of the Check field. + const size_t check_size = lzma_check_size(block->check); + if (check_size == UINT32_MAX) + return LZMA_PROG_ERROR; + + // Reserve space for the Check field. + if (out_size - *out_pos <= check_size) + return LZMA_BUF_ERROR; + + out_size -= check_size; + + // Do the actual compression. + const lzma_ret ret = block_encode_normal(block, allocator, + in, in_size, out, out_pos, out_size); + if (ret != LZMA_OK) { + // If the error was something else than output buffer + // becoming full, return the error now. + if (ret != LZMA_BUF_ERROR) + return ret; + + // The data was uncompressible (at least with the options + // given to us) or the output buffer was too small. Use the + // uncompressed chunks of LZMA2 to wrap the data into a valid + // Block. If we haven't been given enough output space, even + // this may fail. + return_if_error(block_encode_uncompressed(block, in, in_size, + out, out_pos, out_size)); + } + + assert(*out_pos <= out_size); + + // Block Padding. No buffer overflow here, because we already adjusted + // out_size so that (out_size - out_start) is a multiple of four. + // Thus, if the buffer is full, the loop body can never run. + for (size_t i = (size_t)(block->compressed_size); i & 3; ++i) { + assert(*out_pos < out_size); + out[(*out_pos)++] = 0x00; + } + + // If there's no Check field, we are done now. + if (check_size > 0) { + // Calculate the integrity check. We reserved space for + // the Check field earlier so we don't need to check for + // available output space here. + lzma_check_state check; + lzma_check_init(&check, block->check); + lzma_check_update(&check, block->check, in, in_size); + lzma_check_finish(&check, block->check); + + memcpy(block->raw_check, check.buffer.u8, check_size); + memcpy(out + *out_pos, check.buffer.u8, check_size); + *out_pos += check_size; + } + + return LZMA_OK; +} diff --git a/src/liblzma/common/block_decoder.c b/src/liblzma/common/block_decoder.c new file mode 100644 index 000000000000..a3ce6f49500c --- /dev/null +++ b/src/liblzma/common/block_decoder.c @@ -0,0 +1,242 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_decoder.c +/// \brief Decodes .xz Blocks +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "block_decoder.h" +#include "filter_decoder.h" +#include "check.h" + + +struct lzma_coder_s { + enum { + SEQ_CODE, + SEQ_PADDING, + SEQ_CHECK, + } sequence; + + /// The filters in the chain; initialized with lzma_raw_decoder_init(). + lzma_next_coder next; + + /// Decoding options; we also write Compressed Size and Uncompressed + /// Size back to this structure when the decoding has been finished. + lzma_block *block; + + /// Compressed Size calculated while decoding + lzma_vli compressed_size; + + /// Uncompressed Size calculated while decoding + lzma_vli uncompressed_size; + + /// Maximum allowed Compressed Size; this takes into account the + /// size of the Block Header and Check fields when Compressed Size + /// is unknown. + lzma_vli compressed_limit; + + /// Position when reading the Check field + size_t check_pos; + + /// Check of the uncompressed data + lzma_check_state check; +}; + + +static inline bool +update_size(lzma_vli *size, lzma_vli add, lzma_vli limit) +{ + if (limit > LZMA_VLI_MAX) + limit = LZMA_VLI_MAX; + + if (limit < *size || limit - *size < add) + return true; + + *size += add; + + return false; +} + + +static inline bool +is_size_valid(lzma_vli size, lzma_vli reference) +{ + return reference == LZMA_VLI_UNKNOWN || reference == size; +} + + +static lzma_ret +block_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + switch (coder->sequence) { + case SEQ_CODE: { + const size_t in_start = *in_pos; + const size_t out_start = *out_pos; + + const lzma_ret ret = coder->next.code(coder->next.coder, + allocator, in, in_pos, in_size, + out, out_pos, out_size, action); + + const size_t in_used = *in_pos - in_start; + const size_t out_used = *out_pos - out_start; + + // NOTE: We compare to compressed_limit here, which prevents + // the total size of the Block growing past LZMA_VLI_MAX. + if (update_size(&coder->compressed_size, in_used, + coder->compressed_limit) + || update_size(&coder->uncompressed_size, + out_used, + coder->block->uncompressed_size)) + return LZMA_DATA_ERROR; + + lzma_check_update(&coder->check, coder->block->check, + out + out_start, out_used); + + if (ret != LZMA_STREAM_END) + return ret; + + // Compressed and Uncompressed Sizes are now at their final + // values. Verify that they match the values given to us. + if (!is_size_valid(coder->compressed_size, + coder->block->compressed_size) + || !is_size_valid(coder->uncompressed_size, + coder->block->uncompressed_size)) + return LZMA_DATA_ERROR; + + // Copy the values into coder->block. The caller + // may use this information to construct Index. + coder->block->compressed_size = coder->compressed_size; + coder->block->uncompressed_size = coder->uncompressed_size; + + coder->sequence = SEQ_PADDING; + } + + // Fall through + + case SEQ_PADDING: + // Compressed Data is padded to a multiple of four bytes. + while (coder->compressed_size & 3) { + if (*in_pos >= in_size) + return LZMA_OK; + + // We use compressed_size here just get the Padding + // right. The actual Compressed Size was stored to + // coder->block already, and won't be modified by + // us anymore. + ++coder->compressed_size; + + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + } + + if (coder->block->check == LZMA_CHECK_NONE) + return LZMA_STREAM_END; + + lzma_check_finish(&coder->check, coder->block->check); + coder->sequence = SEQ_CHECK; + + // Fall through + + case SEQ_CHECK: { + const size_t check_size = lzma_check_size(coder->block->check); + lzma_bufcpy(in, in_pos, in_size, coder->block->raw_check, + &coder->check_pos, check_size); + if (coder->check_pos < check_size) + return LZMA_OK; + + // Validate the Check only if we support it. + // coder->check.buffer may be uninitialized + // when the Check ID is not supported. + if (lzma_check_is_supported(coder->block->check) + && memcmp(coder->block->raw_check, + coder->check.buffer.u8, + check_size) != 0) + return LZMA_DATA_ERROR; + + return LZMA_STREAM_END; + } + } + + return LZMA_PROG_ERROR; +} + + +static void +block_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->next, allocator); + lzma_free(coder, allocator); + return; +} + + +extern lzma_ret +lzma_block_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_block *block) +{ + lzma_next_coder_init(&lzma_block_decoder_init, next, allocator); + + // Validate the options. lzma_block_unpadded_size() does that for us + // except for Uncompressed Size and filters. Filters are validated + // by the raw decoder. + if (lzma_block_unpadded_size(block) == 0 + || !lzma_vli_is_valid(block->uncompressed_size)) + return LZMA_PROG_ERROR; + + // Allocate and initialize *next->coder if needed. + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &block_decode; + next->end = &block_decoder_end; + next->coder->next = LZMA_NEXT_CODER_INIT; + } + + // Basic initializations + next->coder->sequence = SEQ_CODE; + next->coder->block = block; + next->coder->compressed_size = 0; + next->coder->uncompressed_size = 0; + + // If Compressed Size is not known, we calculate the maximum allowed + // value so that encoded size of the Block (including Block Padding) + // is still a valid VLI and a multiple of four. + next->coder->compressed_limit + = block->compressed_size == LZMA_VLI_UNKNOWN + ? (LZMA_VLI_MAX & ~LZMA_VLI_C(3)) + - block->header_size + - lzma_check_size(block->check) + : block->compressed_size; + + // Initialize the check. It's caller's problem if the Check ID is not + // supported, and the Block decoder cannot verify the Check field. + // Caller can test lzma_check_is_supported(block->check). + next->coder->check_pos = 0; + lzma_check_init(&next->coder->check, block->check); + + // Initialize the filter chain. + return lzma_raw_decoder_init(&next->coder->next, allocator, + block->filters); +} + + +extern LZMA_API(lzma_ret) +lzma_block_decoder(lzma_stream *strm, lzma_block *block) +{ + lzma_next_strm_init(lzma_block_decoder_init, strm, block); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} diff --git a/src/liblzma/common/block_decoder.h b/src/liblzma/common/block_decoder.h new file mode 100644 index 000000000000..7da9df63f767 --- /dev/null +++ b/src/liblzma/common/block_decoder.h @@ -0,0 +1,22 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_decoder.h +/// \brief Decodes .xz Blocks +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_BLOCK_DECODER_H +#define LZMA_BLOCK_DECODER_H + +#include "common.h" + + +extern lzma_ret lzma_block_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, lzma_block *block); + +#endif diff --git a/src/liblzma/common/block_encoder.c b/src/liblzma/common/block_encoder.c new file mode 100644 index 000000000000..ca5152357e7d --- /dev/null +++ b/src/liblzma/common/block_encoder.c @@ -0,0 +1,212 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_encoder.c +/// \brief Encodes .xz Blocks +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "block_encoder.h" +#include "filter_encoder.h" +#include "check.h" + + +struct lzma_coder_s { + /// The filters in the chain; initialized with lzma_raw_decoder_init(). + lzma_next_coder next; + + /// Encoding options; we also write Unpadded Size, Compressed Size, + /// and Uncompressed Size back to this structure when the encoding + /// has been finished. + lzma_block *block; + + enum { + SEQ_CODE, + SEQ_PADDING, + SEQ_CHECK, + } sequence; + + /// Compressed Size calculated while encoding + lzma_vli compressed_size; + + /// Uncompressed Size calculated while encoding + lzma_vli uncompressed_size; + + /// Position in the Check field + size_t pos; + + /// Check of the uncompressed data + lzma_check_state check; +}; + + +static lzma_ret +block_encode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + // Check that our amount of input stays in proper limits. + if (LZMA_VLI_MAX - coder->uncompressed_size < in_size - *in_pos) + return LZMA_DATA_ERROR; + + switch (coder->sequence) { + case SEQ_CODE: { + const size_t in_start = *in_pos; + const size_t out_start = *out_pos; + + const lzma_ret ret = coder->next.code(coder->next.coder, + allocator, in, in_pos, in_size, + out, out_pos, out_size, action); + + const size_t in_used = *in_pos - in_start; + const size_t out_used = *out_pos - out_start; + + if (COMPRESSED_SIZE_MAX - coder->compressed_size < out_used) + return LZMA_DATA_ERROR; + + coder->compressed_size += out_used; + + // No need to check for overflow because we have already + // checked it at the beginning of this function. + coder->uncompressed_size += in_used; + + lzma_check_update(&coder->check, coder->block->check, + in + in_start, in_used); + + if (ret != LZMA_STREAM_END || action == LZMA_SYNC_FLUSH) + return ret; + + assert(*in_pos == in_size); + assert(action == LZMA_FINISH); + + // Copy the values into coder->block. The caller + // may use this information to construct Index. + coder->block->compressed_size = coder->compressed_size; + coder->block->uncompressed_size = coder->uncompressed_size; + + coder->sequence = SEQ_PADDING; + } + + // Fall through + + case SEQ_PADDING: + // Pad Compressed Data to a multiple of four bytes. We can + // use coder->compressed_size for this since we don't need + // it for anything else anymore. + while (coder->compressed_size & 3) { + if (*out_pos >= out_size) + return LZMA_OK; + + out[*out_pos] = 0x00; + ++*out_pos; + ++coder->compressed_size; + } + + if (coder->block->check == LZMA_CHECK_NONE) + return LZMA_STREAM_END; + + lzma_check_finish(&coder->check, coder->block->check); + + coder->sequence = SEQ_CHECK; + + // Fall through + + case SEQ_CHECK: { + const size_t check_size = lzma_check_size(coder->block->check); + lzma_bufcpy(coder->check.buffer.u8, &coder->pos, check_size, + out, out_pos, out_size); + if (coder->pos < check_size) + return LZMA_OK; + + memcpy(coder->block->raw_check, coder->check.buffer.u8, + check_size); + return LZMA_STREAM_END; + } + } + + return LZMA_PROG_ERROR; +} + + +static void +block_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->next, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +block_encoder_update(lzma_coder *coder, lzma_allocator *allocator, + const lzma_filter *filters lzma_attribute((unused)), + const lzma_filter *reversed_filters) +{ + if (coder->sequence != SEQ_CODE) + return LZMA_PROG_ERROR; + + return lzma_next_filter_update( + &coder->next, allocator, reversed_filters); +} + + +extern lzma_ret +lzma_block_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_block *block) +{ + lzma_next_coder_init(&lzma_block_encoder_init, next, allocator); + + if (block->version != 0) + return LZMA_OPTIONS_ERROR; + + // If the Check ID is not supported, we cannot calculate the check and + // thus not create a proper Block. + if ((unsigned int)(block->check) > LZMA_CHECK_ID_MAX) + return LZMA_PROG_ERROR; + + if (!lzma_check_is_supported(block->check)) + return LZMA_UNSUPPORTED_CHECK; + + // Allocate and initialize *next->coder if needed. + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &block_encode; + next->end = &block_encoder_end; + next->update = &block_encoder_update; + next->coder->next = LZMA_NEXT_CODER_INIT; + } + + // Basic initializations + next->coder->sequence = SEQ_CODE; + next->coder->block = block; + next->coder->compressed_size = 0; + next->coder->uncompressed_size = 0; + next->coder->pos = 0; + + // Initialize the check + lzma_check_init(&next->coder->check, block->check); + + // Initialize the requested filters. + return lzma_raw_encoder_init(&next->coder->next, allocator, + block->filters); +} + + +extern LZMA_API(lzma_ret) +lzma_block_encoder(lzma_stream *strm, lzma_block *block) +{ + lzma_next_strm_init(lzma_block_encoder_init, strm, block); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} diff --git a/src/liblzma/common/block_encoder.h b/src/liblzma/common/block_encoder.h new file mode 100644 index 000000000000..b9eff0be2736 --- /dev/null +++ b/src/liblzma/common/block_encoder.h @@ -0,0 +1,47 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_encoder.h +/// \brief Encodes .xz Blocks +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_BLOCK_ENCODER_H +#define LZMA_BLOCK_ENCODER_H + +#include "common.h" + + +/// \brief Biggest Compressed Size value that the Block encoder supports +/// +/// The maximum size of a single Block is limited by the maximum size of +/// a Stream, which in theory is 2^63 - 3 bytes (i.e. LZMA_VLI_MAX - 3). +/// While the size is really big and no one should hit it in practice, we +/// take it into account in some places anyway to catch some errors e.g. if +/// application passes insanely big value to some function. +/// +/// We could take into account the headers etc. to determine the exact +/// maximum size of the Compressed Data field, but the complexity would give +/// us nothing useful. Instead, limit the size of Compressed Data so that +/// even with biggest possible Block Header and Check fields the total +/// encoded size of the Block stays as a valid VLI. This doesn't guarantee +/// that the size of the Stream doesn't grow too big, but that problem is +/// taken care outside the Block handling code. +/// +/// ~LZMA_VLI_C(3) is to guarantee that if we need padding at the end of +/// the Compressed Data field, it will still stay in the proper limit. +/// +/// This constant is in this file because it is needed in both +/// block_encoder.c and block_buffer_encoder.c. +#define COMPRESSED_SIZE_MAX ((LZMA_VLI_MAX - LZMA_BLOCK_HEADER_SIZE_MAX \ + - LZMA_CHECK_SIZE_MAX) & ~LZMA_VLI_C(3)) + + +extern lzma_ret lzma_block_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, lzma_block *block); + +#endif diff --git a/src/liblzma/common/block_header_decoder.c b/src/liblzma/common/block_header_decoder.c new file mode 100644 index 000000000000..2c9573ee204c --- /dev/null +++ b/src/liblzma/common/block_header_decoder.c @@ -0,0 +1,116 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_header_decoder.c +/// \brief Decodes Block Header from .xz files +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" +#include "check.h" + + +static void +free_properties(lzma_block *block, lzma_allocator *allocator) +{ + // Free allocated filter options. The last array member is not + // touched after the initialization in the beginning of + // lzma_block_header_decode(), so we don't need to touch that here. + for (size_t i = 0; i < LZMA_FILTERS_MAX; ++i) { + lzma_free(block->filters[i].options, allocator); + block->filters[i].id = LZMA_VLI_UNKNOWN; + block->filters[i].options = NULL; + } + + return; +} + + +extern LZMA_API(lzma_ret) +lzma_block_header_decode(lzma_block *block, + lzma_allocator *allocator, const uint8_t *in) +{ + // NOTE: We consider the header to be corrupt not only when the + // CRC32 doesn't match, but also when variable-length integers + // are invalid or over 63 bits, or if the header is too small + // to contain the claimed information. + + // Initialize the filter options array. This way the caller can + // safely free() the options even if an error occurs in this function. + for (size_t i = 0; i <= LZMA_FILTERS_MAX; ++i) { + block->filters[i].id = LZMA_VLI_UNKNOWN; + block->filters[i].options = NULL; + } + + // Always zero for now. + block->version = 0; + + // Validate Block Header Size and Check type. The caller must have + // already set these, so it is a programming error if this test fails. + if (lzma_block_header_size_decode(in[0]) != block->header_size + || (unsigned int)(block->check) > LZMA_CHECK_ID_MAX) + return LZMA_PROG_ERROR; + + // Exclude the CRC32 field. + const size_t in_size = block->header_size - 4; + + // Verify CRC32 + if (lzma_crc32(in, in_size, 0) != unaligned_read32le(in + in_size)) + return LZMA_DATA_ERROR; + + // Check for unsupported flags. + if (in[1] & 0x3C) + return LZMA_OPTIONS_ERROR; + + // Start after the Block Header Size and Block Flags fields. + size_t in_pos = 2; + + // Compressed Size + if (in[1] & 0x40) { + return_if_error(lzma_vli_decode(&block->compressed_size, + NULL, in, &in_pos, in_size)); + + // Validate Compressed Size. This checks that it isn't zero + // and that the total size of the Block is a valid VLI. + if (lzma_block_unpadded_size(block) == 0) + return LZMA_DATA_ERROR; + } else { + block->compressed_size = LZMA_VLI_UNKNOWN; + } + + // Uncompressed Size + if (in[1] & 0x80) + return_if_error(lzma_vli_decode(&block->uncompressed_size, + NULL, in, &in_pos, in_size)); + else + block->uncompressed_size = LZMA_VLI_UNKNOWN; + + // Filter Flags + const size_t filter_count = (in[1] & 3) + 1; + for (size_t i = 0; i < filter_count; ++i) { + const lzma_ret ret = lzma_filter_flags_decode( + &block->filters[i], allocator, + in, &in_pos, in_size); + if (ret != LZMA_OK) { + free_properties(block, allocator); + return ret; + } + } + + // Padding + while (in_pos < in_size) { + if (in[in_pos++] != 0x00) { + free_properties(block, allocator); + + // Possibly some new field present so use + // LZMA_OPTIONS_ERROR instead of LZMA_DATA_ERROR. + return LZMA_OPTIONS_ERROR; + } + } + + return LZMA_OK; +} diff --git a/src/liblzma/common/block_header_encoder.c b/src/liblzma/common/block_header_encoder.c new file mode 100644 index 000000000000..707dd0cb14a2 --- /dev/null +++ b/src/liblzma/common/block_header_encoder.c @@ -0,0 +1,132 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_header_encoder.c +/// \brief Encodes Block Header for .xz files +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" +#include "check.h" + + +extern LZMA_API(lzma_ret) +lzma_block_header_size(lzma_block *block) +{ + if (block->version != 0) + return LZMA_OPTIONS_ERROR; + + // Block Header Size + Block Flags + CRC32. + uint32_t size = 1 + 1 + 4; + + // Compressed Size + if (block->compressed_size != LZMA_VLI_UNKNOWN) { + const uint32_t add = lzma_vli_size(block->compressed_size); + if (add == 0 || block->compressed_size == 0) + return LZMA_PROG_ERROR; + + size += add; + } + + // Uncompressed Size + if (block->uncompressed_size != LZMA_VLI_UNKNOWN) { + const uint32_t add = lzma_vli_size(block->uncompressed_size); + if (add == 0) + return LZMA_PROG_ERROR; + + size += add; + } + + // List of Filter Flags + if (block->filters == NULL || block->filters[0].id == LZMA_VLI_UNKNOWN) + return LZMA_PROG_ERROR; + + for (size_t i = 0; block->filters[i].id != LZMA_VLI_UNKNOWN; ++i) { + // Don't allow too many filters. + if (i == LZMA_FILTERS_MAX) + return LZMA_PROG_ERROR; + + uint32_t add; + return_if_error(lzma_filter_flags_size(&add, + block->filters + i)); + + size += add; + } + + // Pad to a multiple of four bytes. + block->header_size = (size + 3) & ~UINT32_C(3); + + // NOTE: We don't verify that the encoded size of the Block stays + // within limits. This is because it is possible that we are called + // with exaggerated Compressed Size (e.g. LZMA_VLI_MAX) to reserve + // space for Block Header, and later called again with lower, + // real values. + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_block_header_encode(const lzma_block *block, uint8_t *out) +{ + // Validate everything but filters. + if (lzma_block_unpadded_size(block) == 0 + || !lzma_vli_is_valid(block->uncompressed_size)) + return LZMA_PROG_ERROR; + + // Indicate the size of the buffer _excluding_ the CRC32 field. + const size_t out_size = block->header_size - 4; + + // Store the Block Header Size. + out[0] = out_size / 4; + + // We write Block Flags in pieces. + out[1] = 0x00; + size_t out_pos = 2; + + // Compressed Size + if (block->compressed_size != LZMA_VLI_UNKNOWN) { + return_if_error(lzma_vli_encode(block->compressed_size, NULL, + out, &out_pos, out_size)); + + out[1] |= 0x40; + } + + // Uncompressed Size + if (block->uncompressed_size != LZMA_VLI_UNKNOWN) { + return_if_error(lzma_vli_encode(block->uncompressed_size, NULL, + out, &out_pos, out_size)); + + out[1] |= 0x80; + } + + // Filter Flags + if (block->filters == NULL || block->filters[0].id == LZMA_VLI_UNKNOWN) + return LZMA_PROG_ERROR; + + size_t filter_count = 0; + do { + // There can be a maximum of four filters. + if (filter_count == LZMA_FILTERS_MAX) + return LZMA_PROG_ERROR; + + return_if_error(lzma_filter_flags_encode( + block->filters + filter_count, + out, &out_pos, out_size)); + + } while (block->filters[++filter_count].id != LZMA_VLI_UNKNOWN); + + out[1] |= filter_count - 1; + + // Padding + memzero(out + out_pos, out_size - out_pos); + + // CRC32 + unaligned_write32le(out + out_size, lzma_crc32(out, out_size, 0)); + + return LZMA_OK; +} diff --git a/src/liblzma/common/block_util.c b/src/liblzma/common/block_util.c new file mode 100644 index 000000000000..cb9cde255e65 --- /dev/null +++ b/src/liblzma/common/block_util.c @@ -0,0 +1,90 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_header.c +/// \brief Utility functions to handle lzma_block +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" +#include "index.h" + + +extern LZMA_API(lzma_ret) +lzma_block_compressed_size(lzma_block *block, lzma_vli total_size) +{ + // Validate everything but Uncompressed Size and filters. + if (lzma_block_unpadded_size(block) == 0) + return LZMA_PROG_ERROR; + + const uint32_t container_size = block->header_size + + lzma_check_size(block->check); + + // Validate that Compressed Size will be greater than zero. + if (container_size <= total_size) + return LZMA_DATA_ERROR; + + // Calculate what Compressed Size is supposed to be. + // If Compressed Size was present in Block Header, + // compare that the new value matches it. + const lzma_vli compressed_size = total_size - container_size; + if (block->compressed_size != LZMA_VLI_UNKNOWN + && block->compressed_size != compressed_size) + return LZMA_DATA_ERROR; + + block->compressed_size = compressed_size; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_vli) +lzma_block_unpadded_size(const lzma_block *block) +{ + // Validate the values that we are interested in i.e. all but + // Uncompressed Size and the filters. + // + // NOTE: This function is used for validation too, so it is + // essential that these checks are always done even if + // Compressed Size is unknown. + if (block == NULL || block->version != 0 + || block->header_size < LZMA_BLOCK_HEADER_SIZE_MIN + || block->header_size > LZMA_BLOCK_HEADER_SIZE_MAX + || (block->header_size & 3) + || !lzma_vli_is_valid(block->compressed_size) + || block->compressed_size == 0 + || (unsigned int)(block->check) > LZMA_CHECK_ID_MAX) + return 0; + + // If Compressed Size is unknown, return that we cannot know + // size of the Block either. + if (block->compressed_size == LZMA_VLI_UNKNOWN) + return LZMA_VLI_UNKNOWN; + + // Calculate Unpadded Size and validate it. + const lzma_vli unpadded_size = block->compressed_size + + block->header_size + + lzma_check_size(block->check); + + assert(unpadded_size >= UNPADDED_SIZE_MIN); + if (unpadded_size > UNPADDED_SIZE_MAX) + return 0; + + return unpadded_size; +} + + +extern LZMA_API(lzma_vli) +lzma_block_total_size(const lzma_block *block) +{ + lzma_vli unpadded_size = lzma_block_unpadded_size(block); + + if (unpadded_size != LZMA_VLI_UNKNOWN) + unpadded_size = vli_ceil4(unpadded_size); + + return unpadded_size; +} diff --git a/src/liblzma/common/chunk_size.c b/src/liblzma/common/chunk_size.c new file mode 100644 index 000000000000..363f07ece8bb --- /dev/null +++ b/src/liblzma/common/chunk_size.c @@ -0,0 +1,67 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file chunk_size.c +/// \brief Finds out the minimal reasonable chunk size for a filter chain +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +/** + * \brief Finds out the minimal reasonable chunk size for a filter chain + * + * This function helps determining the Uncompressed Sizes of the Blocks when + * doing multi-threaded encoding. + * + * When compressing a large file on a system having multiple CPUs or CPU + * cores, the file can be split into smaller chunks, that are compressed + * independently into separate Blocks in the same .lzma Stream. + * + * \return Minimum reasonable Uncompressed Size of a Block. The + * recommended minimum Uncompressed Size is between this value + * and the value times two. + + Zero if the Uncompressed Sizes of Blocks don't matter + */ +extern LZMA_API(size_t) +lzma_chunk_size(const lzma_options_filter *filters) +{ + while (filters->id != LZMA_VLI_UNKNOWN) { + switch (filters->id) { + // TODO LZMA_FILTER_SPARSE + + case LZMA_FILTER_COPY: + case LZMA_FILTER_SUBBLOCK: + case LZMA_FILTER_X86: + case LZMA_FILTER_POWERPC: + case LZMA_FILTER_IA64: + case LZMA_FILTER_ARM: + case LZMA_FILTER_ARMTHUMB: + case LZMA_FILTER_SPARC: + // These are very fast, thus there is no point in + // splitting the data into smaller blocks. + break; + + case LZMA_FILTER_LZMA1: + // The block sizes of the possible next filters in + // the chain are irrelevant after the LZMA filter. + return ((lzma_options_lzma *)(filters->options)) + ->dictionary_size; + + default: + // Unknown filters + return 0; + } + + ++filters; + } + + // Indicate that splitting would be useless. + return SIZE_MAX; +} diff --git a/src/liblzma/common/common.c b/src/liblzma/common/common.c new file mode 100644 index 000000000000..2f185e490d48 --- /dev/null +++ b/src/liblzma/common/common.c @@ -0,0 +1,374 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file common.h +/// \brief Common functions needed in many places in liblzma +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +///////////// +// Version // +///////////// + +extern LZMA_API(uint32_t) +lzma_version_number(void) +{ + return LZMA_VERSION; +} + + +extern LZMA_API(const char *) +lzma_version_string(void) +{ + return LZMA_VERSION_STRING; +} + + +/////////////////////// +// Memory allocation // +/////////////////////// + +extern void * lzma_attribute((malloc)) +lzma_alloc(size_t size, lzma_allocator *allocator) +{ + // Some malloc() variants return NULL if called with size == 0. + if (size == 0) + size = 1; + + void *ptr; + + if (allocator != NULL && allocator->alloc != NULL) + ptr = allocator->alloc(allocator->opaque, 1, size); + else + ptr = malloc(size); + + return ptr; +} + + +extern void +lzma_free(void *ptr, lzma_allocator *allocator) +{ + if (allocator != NULL && allocator->free != NULL) + allocator->free(allocator->opaque, ptr); + else + free(ptr); + + return; +} + + +////////// +// Misc // +////////// + +extern size_t +lzma_bufcpy(const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size) +{ + const size_t in_avail = in_size - *in_pos; + const size_t out_avail = out_size - *out_pos; + const size_t copy_size = MIN(in_avail, out_avail); + + memcpy(out + *out_pos, in + *in_pos, copy_size); + + *in_pos += copy_size; + *out_pos += copy_size; + + return copy_size; +} + + +extern lzma_ret +lzma_next_filter_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + lzma_next_coder_init(filters[0].init, next, allocator); + next->id = filters[0].id; + return filters[0].init == NULL + ? LZMA_OK : filters[0].init(next, allocator, filters); +} + + +extern lzma_ret +lzma_next_filter_update(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter *reversed_filters) +{ + // Check that the application isn't trying to change the Filter ID. + // End of filters is indicated with LZMA_VLI_UNKNOWN in both + // reversed_filters[0].id and next->id. + if (reversed_filters[0].id != next->id) + return LZMA_PROG_ERROR; + + if (reversed_filters[0].id == LZMA_VLI_UNKNOWN) + return LZMA_OK; + + assert(next->update != NULL); + return next->update(next->coder, allocator, NULL, reversed_filters); +} + + +extern void +lzma_next_end(lzma_next_coder *next, lzma_allocator *allocator) +{ + if (next->init != (uintptr_t)(NULL)) { + // To avoid tiny end functions that simply call + // lzma_free(coder, allocator), we allow leaving next->end + // NULL and call lzma_free() here. + if (next->end != NULL) + next->end(next->coder, allocator); + else + lzma_free(next->coder, allocator); + + // Reset the variables so the we don't accidentally think + // that it is an already initialized coder. + *next = LZMA_NEXT_CODER_INIT; + } + + return; +} + + +////////////////////////////////////// +// External to internal API wrapper // +////////////////////////////////////// + +extern lzma_ret +lzma_strm_init(lzma_stream *strm) +{ + if (strm == NULL) + return LZMA_PROG_ERROR; + + if (strm->internal == NULL) { + strm->internal = lzma_alloc(sizeof(lzma_internal), + strm->allocator); + if (strm->internal == NULL) + return LZMA_MEM_ERROR; + + strm->internal->next = LZMA_NEXT_CODER_INIT; + } + + strm->internal->supported_actions[LZMA_RUN] = false; + strm->internal->supported_actions[LZMA_SYNC_FLUSH] = false; + strm->internal->supported_actions[LZMA_FULL_FLUSH] = false; + strm->internal->supported_actions[LZMA_FINISH] = false; + strm->internal->sequence = ISEQ_RUN; + strm->internal->allow_buf_error = false; + + strm->total_in = 0; + strm->total_out = 0; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_code(lzma_stream *strm, lzma_action action) +{ + // Sanity checks + if ((strm->next_in == NULL && strm->avail_in != 0) + || (strm->next_out == NULL && strm->avail_out != 0) + || strm->internal == NULL + || strm->internal->next.code == NULL + || (unsigned int)(action) > LZMA_FINISH + || !strm->internal->supported_actions[action]) + return LZMA_PROG_ERROR; + + switch (strm->internal->sequence) { + case ISEQ_RUN: + switch (action) { + case LZMA_RUN: + break; + + case LZMA_SYNC_FLUSH: + strm->internal->sequence = ISEQ_SYNC_FLUSH; + break; + + case LZMA_FULL_FLUSH: + strm->internal->sequence = ISEQ_FULL_FLUSH; + break; + + case LZMA_FINISH: + strm->internal->sequence = ISEQ_FINISH; + break; + } + + break; + + case ISEQ_SYNC_FLUSH: + // The same action must be used until we return + // LZMA_STREAM_END, and the amount of input must not change. + if (action != LZMA_SYNC_FLUSH + || strm->internal->avail_in != strm->avail_in) + return LZMA_PROG_ERROR; + + break; + + case ISEQ_FULL_FLUSH: + if (action != LZMA_FULL_FLUSH + || strm->internal->avail_in != strm->avail_in) + return LZMA_PROG_ERROR; + + break; + + case ISEQ_FINISH: + if (action != LZMA_FINISH + || strm->internal->avail_in != strm->avail_in) + return LZMA_PROG_ERROR; + + break; + + case ISEQ_END: + return LZMA_STREAM_END; + + case ISEQ_ERROR: + default: + return LZMA_PROG_ERROR; + } + + size_t in_pos = 0; + size_t out_pos = 0; + lzma_ret ret = strm->internal->next.code( + strm->internal->next.coder, strm->allocator, + strm->next_in, &in_pos, strm->avail_in, + strm->next_out, &out_pos, strm->avail_out, action); + + strm->next_in += in_pos; + strm->avail_in -= in_pos; + strm->total_in += in_pos; + + strm->next_out += out_pos; + strm->avail_out -= out_pos; + strm->total_out += out_pos; + + strm->internal->avail_in = strm->avail_in; + + switch (ret) { + case LZMA_OK: + // Don't return LZMA_BUF_ERROR when it happens the first time. + // This is to avoid returning LZMA_BUF_ERROR when avail_out + // was zero but still there was no more data left to written + // to next_out. + if (out_pos == 0 && in_pos == 0) { + if (strm->internal->allow_buf_error) + ret = LZMA_BUF_ERROR; + else + strm->internal->allow_buf_error = true; + } else { + strm->internal->allow_buf_error = false; + } + break; + + case LZMA_STREAM_END: + if (strm->internal->sequence == ISEQ_SYNC_FLUSH + || strm->internal->sequence == ISEQ_FULL_FLUSH) + strm->internal->sequence = ISEQ_RUN; + else + strm->internal->sequence = ISEQ_END; + + // Fall through + + case LZMA_NO_CHECK: + case LZMA_UNSUPPORTED_CHECK: + case LZMA_GET_CHECK: + case LZMA_MEMLIMIT_ERROR: + // Something else than LZMA_OK, but not a fatal error, + // that is, coding may be continued (except if ISEQ_END). + strm->internal->allow_buf_error = false; + break; + + default: + // All the other errors are fatal; coding cannot be continued. + assert(ret != LZMA_BUF_ERROR); + strm->internal->sequence = ISEQ_ERROR; + break; + } + + return ret; +} + + +extern LZMA_API(void) +lzma_end(lzma_stream *strm) +{ + if (strm != NULL && strm->internal != NULL) { + lzma_next_end(&strm->internal->next, strm->allocator); + lzma_free(strm->internal, strm->allocator); + strm->internal = NULL; + } + + return; +} + + +extern LZMA_API(lzma_check) +lzma_get_check(const lzma_stream *strm) +{ + // Return LZMA_CHECK_NONE if we cannot know the check type. + // It's a bug in the application if this happens. + if (strm->internal->next.get_check == NULL) + return LZMA_CHECK_NONE; + + return strm->internal->next.get_check(strm->internal->next.coder); +} + + +extern LZMA_API(uint64_t) +lzma_memusage(const lzma_stream *strm) +{ + uint64_t memusage; + uint64_t old_memlimit; + + if (strm == NULL || strm->internal == NULL + || strm->internal->next.memconfig == NULL + || strm->internal->next.memconfig( + strm->internal->next.coder, + &memusage, &old_memlimit, 0) != LZMA_OK) + return 0; + + return memusage; +} + + +extern LZMA_API(uint64_t) +lzma_memlimit_get(const lzma_stream *strm) +{ + uint64_t old_memlimit; + uint64_t memusage; + + if (strm == NULL || strm->internal == NULL + || strm->internal->next.memconfig == NULL + || strm->internal->next.memconfig( + strm->internal->next.coder, + &memusage, &old_memlimit, 0) != LZMA_OK) + return 0; + + return old_memlimit; +} + + +extern LZMA_API(lzma_ret) +lzma_memlimit_set(lzma_stream *strm, uint64_t new_memlimit) +{ + // Dummy variables to simplify memconfig functions + uint64_t old_memlimit; + uint64_t memusage; + + if (strm == NULL || strm->internal == NULL + || strm->internal->next.memconfig == NULL) + return LZMA_PROG_ERROR; + + if (new_memlimit != 0 && new_memlimit < LZMA_MEMUSAGE_BASE) + return LZMA_MEMLIMIT_ERROR; + + return strm->internal->next.memconfig(strm->internal->next.coder, + &memusage, &old_memlimit, new_memlimit); +} diff --git a/src/liblzma/common/common.h b/src/liblzma/common/common.h new file mode 100644 index 000000000000..7b7fbb11c3d8 --- /dev/null +++ b/src/liblzma/common/common.h @@ -0,0 +1,290 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file common.h +/// \brief Definitions common to the whole liblzma library +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_COMMON_H +#define LZMA_COMMON_H + +#include "sysdefs.h" +#include "mythread.h" +#include "tuklib_integer.h" + +#if defined(_WIN32) || defined(__CYGWIN__) +# ifdef DLL_EXPORT +# define LZMA_API_EXPORT __declspec(dllexport) +# else +# define LZMA_API_EXPORT +# endif +// Don't use ifdef or defined() below. +#elif HAVE_VISIBILITY +# define LZMA_API_EXPORT __attribute__((__visibility__("default"))) +#else +# define LZMA_API_EXPORT +#endif + +#define LZMA_API(type) LZMA_API_EXPORT type LZMA_API_CALL + +#include "lzma.h" + +// These allow helping the compiler in some often-executed branches, whose +// result is almost always the same. +#ifdef __GNUC__ +# define likely(expr) __builtin_expect(expr, true) +# define unlikely(expr) __builtin_expect(expr, false) +#else +# define likely(expr) (expr) +# define unlikely(expr) (expr) +#endif + + +/// Size of temporary buffers needed in some filters +#define LZMA_BUFFER_SIZE 4096 + + +/// Starting value for memory usage estimates. Instead of calculating size +/// of _every_ structure and taking into account malloc() overhead etc., we +/// add a base size to all memory usage estimates. It's not very accurate +/// but should be easily good enough. +#define LZMA_MEMUSAGE_BASE (UINT64_C(1) << 15) + +/// Start of internal Filter ID space. These IDs must never be used +/// in Streams. +#define LZMA_FILTER_RESERVED_START (LZMA_VLI_C(1) << 62) + + +/// Internal helper filter used by Subblock decoder. It is mapped to an +/// otherwise invalid Filter ID, which is impossible to get from any input +/// file (even if malicious file). +#define LZMA_FILTER_SUBBLOCK_HELPER LZMA_VLI_C(0x7000000000000001) + + +/// Supported flags that can be passed to lzma_stream_decoder() +/// or lzma_auto_decoder(). +#define LZMA_SUPPORTED_FLAGS \ + ( LZMA_TELL_NO_CHECK \ + | LZMA_TELL_UNSUPPORTED_CHECK \ + | LZMA_TELL_ANY_CHECK \ + | LZMA_CONCATENATED ) + + +/// Type of encoder/decoder specific data; the actual structure is defined +/// differently in different coders. +typedef struct lzma_coder_s lzma_coder; + +typedef struct lzma_next_coder_s lzma_next_coder; + +typedef struct lzma_filter_info_s lzma_filter_info; + + +/// Type of a function used to initialize a filter encoder or decoder +typedef lzma_ret (*lzma_init_function)( + lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters); + +/// Type of a function to do some kind of coding work (filters, Stream, +/// Block encoders/decoders etc.). Some special coders use don't use both +/// input and output buffers, but for simplicity they still use this same +/// function prototype. +typedef lzma_ret (*lzma_code_function)( + lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + lzma_action action); + +/// Type of a function to free the memory allocated for the coder +typedef void (*lzma_end_function)( + lzma_coder *coder, lzma_allocator *allocator); + + +/// Raw coder validates and converts an array of lzma_filter structures to +/// an array of lzma_filter_info structures. This array is used with +/// lzma_next_filter_init to initialize the filter chain. +struct lzma_filter_info_s { + /// Filter ID. This is used only by the encoder + /// with lzma_filters_update(). + lzma_vli id; + + /// Pointer to function used to initialize the filter. + /// This is NULL to indicate end of array. + lzma_init_function init; + + /// Pointer to filter's options structure + void *options; +}; + + +/// Hold data and function pointers of the next filter in the chain. +struct lzma_next_coder_s { + /// Pointer to coder-specific data + lzma_coder *coder; + + /// Filter ID. This is LZMA_VLI_UNKNOWN when this structure doesn't + /// point to a filter coder. + lzma_vli id; + + /// "Pointer" to init function. This is never called here. + /// We need only to detect if we are initializing a coder + /// that was allocated earlier. See lzma_next_coder_init and + /// lzma_next_strm_init macros in this file. + uintptr_t init; + + /// Pointer to function to do the actual coding + lzma_code_function code; + + /// Pointer to function to free lzma_next_coder.coder. This can + /// be NULL; in that case, lzma_free is called to free + /// lzma_next_coder.coder. + lzma_end_function end; + + /// Pointer to function to return the type of the integrity check. + /// Most coders won't support this. + lzma_check (*get_check)(const lzma_coder *coder); + + /// Pointer to function to get and/or change the memory usage limit. + /// If new_memlimit == 0, the limit is not changed. + lzma_ret (*memconfig)(lzma_coder *coder, uint64_t *memusage, + uint64_t *old_memlimit, uint64_t new_memlimit); + + /// Update the filter-specific options or the whole filter chain + /// in the encoder. + lzma_ret (*update)(lzma_coder *coder, lzma_allocator *allocator, + const lzma_filter *filters, + const lzma_filter *reversed_filters); +}; + + +/// Macro to initialize lzma_next_coder structure +#define LZMA_NEXT_CODER_INIT \ + (lzma_next_coder){ \ + .coder = NULL, \ + .init = (uintptr_t)(NULL), \ + .id = LZMA_VLI_UNKNOWN, \ + .code = NULL, \ + .end = NULL, \ + .get_check = NULL, \ + .memconfig = NULL, \ + .update = NULL, \ + } + + +/// Internal data for lzma_strm_init, lzma_code, and lzma_end. A pointer to +/// this is stored in lzma_stream. +struct lzma_internal_s { + /// The actual coder that should do something useful + lzma_next_coder next; + + /// Track the state of the coder. This is used to validate arguments + /// so that the actual coders can rely on e.g. that LZMA_SYNC_FLUSH + /// is used on every call to lzma_code until next.code has returned + /// LZMA_STREAM_END. + enum { + ISEQ_RUN, + ISEQ_SYNC_FLUSH, + ISEQ_FULL_FLUSH, + ISEQ_FINISH, + ISEQ_END, + ISEQ_ERROR, + } sequence; + + /// A copy of lzma_stream avail_in. This is used to verify that the + /// amount of input doesn't change once e.g. LZMA_FINISH has been + /// used. + size_t avail_in; + + /// Indicates which lzma_action values are allowed by next.code. + bool supported_actions[4]; + + /// If true, lzma_code will return LZMA_BUF_ERROR if no progress was + /// made (no input consumed and no output produced by next.code). + bool allow_buf_error; +}; + + +/// Allocates memory +extern void *lzma_alloc(size_t size, lzma_allocator *allocator) + lzma_attribute((malloc)); + +/// Frees memory +extern void lzma_free(void *ptr, lzma_allocator *allocator); + + +/// Allocates strm->internal if it is NULL, and initializes *strm and +/// strm->internal. This function is only called via lzma_next_strm_init macro. +extern lzma_ret lzma_strm_init(lzma_stream *strm); + +/// Initializes the next filter in the chain, if any. This takes care of +/// freeing the memory of previously initialized filter if it is different +/// than the filter being initialized now. This way the actual filter +/// initialization functions don't need to use lzma_next_coder_init macro. +extern lzma_ret lzma_next_filter_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +/// Update the next filter in the chain, if any. This checks that +/// the application is not trying to change the Filter IDs. +extern lzma_ret lzma_next_filter_update( + lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter *reversed_filters); + +/// Frees the memory allocated for next->coder either using next->end or, +/// if next->end is NULL, using lzma_free. +extern void lzma_next_end(lzma_next_coder *next, lzma_allocator *allocator); + + +/// Copy as much data as possible from in[] to out[] and update *in_pos +/// and *out_pos accordingly. Returns the number of bytes copied. +extern size_t lzma_bufcpy(const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size); + + +/// \brief Return if expression doesn't evaluate to LZMA_OK +/// +/// There are several situations where we want to return immediately +/// with the value of expr if it isn't LZMA_OK. This macro shortens +/// the code a little. +#define return_if_error(expr) \ +do { \ + const lzma_ret ret_ = (expr); \ + if (ret_ != LZMA_OK) \ + return ret_; \ +} while (0) + + +/// If next isn't already initialized, free the previous coder. Then mark +/// that next is _possibly_ initialized for the coder using this macro. +/// "Possibly" means that if e.g. allocation of next->coder fails, the +/// structure isn't actually initialized for this coder, but leaving +/// next->init to func is still OK. +#define lzma_next_coder_init(func, next, allocator) \ +do { \ + if ((uintptr_t)(func) != (next)->init) \ + lzma_next_end(next, allocator); \ + (next)->init = (uintptr_t)(func); \ +} while (0) + + +/// Initializes lzma_strm and calls func() to initialize strm->internal->next. +/// (The function being called will use lzma_next_coder_init()). If +/// initialization fails, memory that wasn't freed by func() is freed +/// along strm->internal. +#define lzma_next_strm_init(func, strm, ...) \ +do { \ + return_if_error(lzma_strm_init(strm)); \ + const lzma_ret ret_ = func(&(strm)->internal->next, \ + (strm)->allocator, __VA_ARGS__); \ + if (ret_ != LZMA_OK) { \ + lzma_end(strm); \ + return ret_; \ + } \ +} while (0) + +#endif diff --git a/src/liblzma/common/easy_buffer_encoder.c b/src/liblzma/common/easy_buffer_encoder.c new file mode 100644 index 000000000000..c4be34ccfa27 --- /dev/null +++ b/src/liblzma/common/easy_buffer_encoder.c @@ -0,0 +1,27 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file easy_buffer_encoder.c +/// \brief Easy single-call .xz Stream encoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "easy_preset.h" + + +extern LZMA_API(lzma_ret) +lzma_easy_buffer_encode(uint32_t preset, lzma_check check, + lzma_allocator *allocator, const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + lzma_options_easy opt_easy; + if (lzma_easy_preset(&opt_easy, preset)) + return LZMA_OPTIONS_ERROR; + + return lzma_stream_buffer_encode(opt_easy.filters, check, + allocator, in, in_size, out, out_pos, out_size); +} diff --git a/src/liblzma/common/easy_decoder_memusage.c b/src/liblzma/common/easy_decoder_memusage.c new file mode 100644 index 000000000000..20bcd5b71758 --- /dev/null +++ b/src/liblzma/common/easy_decoder_memusage.c @@ -0,0 +1,24 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file easy_decoder_memusage.c +/// \brief Decoder memory usage calculation to match easy encoder presets +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "easy_preset.h" + + +extern LZMA_API(uint64_t) +lzma_easy_decoder_memusage(uint32_t preset) +{ + lzma_options_easy opt_easy; + if (lzma_easy_preset(&opt_easy, preset)) + return UINT32_MAX; + + return lzma_raw_decoder_memusage(opt_easy.filters); +} diff --git a/src/liblzma/common/easy_encoder.c b/src/liblzma/common/easy_encoder.c new file mode 100644 index 000000000000..d13ccd7351f1 --- /dev/null +++ b/src/liblzma/common/easy_encoder.c @@ -0,0 +1,25 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file easy_encoder.c +/// \brief Easy .xz Stream encoder initialization +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "easy_preset.h" +#include "stream_encoder.h" + + +extern LZMA_API(lzma_ret) +lzma_easy_encoder(lzma_stream *strm, uint32_t preset, lzma_check check) +{ + lzma_options_easy opt_easy; + if (lzma_easy_preset(&opt_easy, preset)) + return LZMA_OPTIONS_ERROR; + + return lzma_stream_encoder(strm, opt_easy.filters, check); +} diff --git a/src/liblzma/common/easy_encoder_memusage.c b/src/liblzma/common/easy_encoder_memusage.c new file mode 100644 index 000000000000..e91057584233 --- /dev/null +++ b/src/liblzma/common/easy_encoder_memusage.c @@ -0,0 +1,24 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file easy_encoder_memusage.c +/// \brief Easy .xz Stream encoder memory usage calculation +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "easy_preset.h" + + +extern LZMA_API(uint64_t) +lzma_easy_encoder_memusage(uint32_t preset) +{ + lzma_options_easy opt_easy; + if (lzma_easy_preset(&opt_easy, preset)) + return UINT32_MAX; + + return lzma_raw_encoder_memusage(opt_easy.filters); +} diff --git a/src/liblzma/common/easy_preset.c b/src/liblzma/common/easy_preset.c new file mode 100644 index 000000000000..2f9859860ad7 --- /dev/null +++ b/src/liblzma/common/easy_preset.c @@ -0,0 +1,27 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file easy_preset.c +/// \brief Preset handling for easy encoder and decoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "easy_preset.h" + + +extern bool +lzma_easy_preset(lzma_options_easy *opt_easy, uint32_t preset) +{ + if (lzma_lzma_preset(&opt_easy->opt_lzma, preset)) + return true; + + opt_easy->filters[0].id = LZMA_FILTER_LZMA2; + opt_easy->filters[0].options = &opt_easy->opt_lzma; + opt_easy->filters[1].id = LZMA_VLI_UNKNOWN; + + return false; +} diff --git a/src/liblzma/common/easy_preset.h b/src/liblzma/common/easy_preset.h new file mode 100644 index 000000000000..382ade894066 --- /dev/null +++ b/src/liblzma/common/easy_preset.h @@ -0,0 +1,32 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file easy_preset.h +/// \brief Preset handling for easy encoder and decoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +typedef struct { + /// We need to keep the filters array available in case + /// LZMA_FULL_FLUSH is used. + lzma_filter filters[LZMA_FILTERS_MAX + 1]; + + /// Options for LZMA2 + lzma_options_lzma opt_lzma; + + // Options for more filters can be added later, so this struct + // is not ready to be put into the public API. + +} lzma_options_easy; + + +/// Set *easy to the settings given by the preset. Returns true on error, +/// false on success. +extern bool lzma_easy_preset(lzma_options_easy *easy, uint32_t preset); diff --git a/src/liblzma/common/filter_buffer_decoder.c b/src/liblzma/common/filter_buffer_decoder.c new file mode 100644 index 000000000000..2d35ef8e0af4 --- /dev/null +++ b/src/liblzma/common/filter_buffer_decoder.c @@ -0,0 +1,87 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_buffer_decoder.c +/// \brief Single-call raw decoding +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "filter_decoder.h" + + +extern LZMA_API(lzma_ret) +lzma_raw_buffer_decode(const lzma_filter *filters, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + // Validate what isn't validated later in filter_common.c. + if (in == NULL || in_pos == NULL || *in_pos > in_size || out == NULL + || out_pos == NULL || *out_pos > out_size) + return LZMA_PROG_ERROR; + + // Initialize the decoer. + lzma_next_coder next = LZMA_NEXT_CODER_INIT; + return_if_error(lzma_raw_decoder_init(&next, allocator, filters)); + + // Store the positions so that we can restore them if something + // goes wrong. + const size_t in_start = *in_pos; + const size_t out_start = *out_pos; + + // Do the actual decoding and free decoder's memory. + lzma_ret ret = next.code(next.coder, allocator, in, in_pos, in_size, + out, out_pos, out_size, LZMA_FINISH); + + if (ret == LZMA_STREAM_END) { + ret = LZMA_OK; + } else { + if (ret == LZMA_OK) { + // Either the input was truncated or the + // output buffer was too small. + assert(*in_pos == in_size || *out_pos == out_size); + + if (*in_pos != in_size) { + // Since input wasn't consumed completely, + // the output buffer became full and is + // too small. + ret = LZMA_BUF_ERROR; + + } else if (*out_pos != out_size) { + // Since output didn't became full, the input + // has to be truncated. + ret = LZMA_DATA_ERROR; + + } else { + // All the input was consumed and output + // buffer is full. Now we don't immediately + // know the reason for the error. Try + // decoding one more byte. If it succeeds, + // then the output buffer was too small. If + // we cannot get a new output byte, the input + // is truncated. + uint8_t tmp[1]; + size_t tmp_pos = 0; + (void)next.code(next.coder, allocator, + in, in_pos, in_size, + tmp, &tmp_pos, 1, LZMA_FINISH); + + if (tmp_pos == 1) + ret = LZMA_BUF_ERROR; + else + ret = LZMA_DATA_ERROR; + } + } + + // Restore the positions. + *in_pos = in_start; + *out_pos = out_start; + } + + lzma_next_end(&next, allocator); + + return ret; +} diff --git a/src/liblzma/common/filter_buffer_encoder.c b/src/liblzma/common/filter_buffer_encoder.c new file mode 100644 index 000000000000..646e1b30374e --- /dev/null +++ b/src/liblzma/common/filter_buffer_encoder.c @@ -0,0 +1,54 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_buffer_encoder.c +/// \brief Single-call raw encoding +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "filter_encoder.h" + + +extern LZMA_API(lzma_ret) +lzma_raw_buffer_encode(const lzma_filter *filters, lzma_allocator *allocator, + const uint8_t *in, size_t in_size, uint8_t *out, + size_t *out_pos, size_t out_size) +{ + // Validate what isn't validated later in filter_common.c. + if ((in == NULL && in_size != 0) || out == NULL + || out_pos == NULL || *out_pos > out_size) + return LZMA_PROG_ERROR; + + // Initialize the encoder + lzma_next_coder next = LZMA_NEXT_CODER_INIT; + return_if_error(lzma_raw_encoder_init(&next, allocator, filters)); + + // Store the output position so that we can restore it if + // something goes wrong. + const size_t out_start = *out_pos; + + // Do the actual encoding and free coder's memory. + size_t in_pos = 0; + lzma_ret ret = next.code(next.coder, allocator, in, &in_pos, in_size, + out, out_pos, out_size, LZMA_FINISH); + lzma_next_end(&next, allocator); + + if (ret == LZMA_STREAM_END) { + ret = LZMA_OK; + } else { + if (ret == LZMA_OK) { + // Output buffer was too small. + assert(*out_pos == out_size); + ret = LZMA_BUF_ERROR; + } + + // Restore the output position. + *out_pos = out_start; + } + + return ret; +} diff --git a/src/liblzma/common/filter_common.c b/src/liblzma/common/filter_common.c new file mode 100644 index 000000000000..2322d7deec9a --- /dev/null +++ b/src/liblzma/common/filter_common.c @@ -0,0 +1,346 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_common.c +/// \brief Filter-specific stuff common for both encoder and decoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "filter_common.h" + + +static const struct { + /// Filter ID + lzma_vli id; + + /// Size of the filter-specific options structure + size_t options_size; + + /// True if it is OK to use this filter as non-last filter in + /// the chain. + bool non_last_ok; + + /// True if it is OK to use this filter as the last filter in + /// the chain. + bool last_ok; + + /// True if the filter may change the size of the data (that is, the + /// amount of encoded output can be different than the amount of + /// uncompressed input). + bool changes_size; + +} features[] = { +#if defined (HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1) + { + .id = LZMA_FILTER_LZMA1, + .options_size = sizeof(lzma_options_lzma), + .non_last_ok = false, + .last_ok = true, + .changes_size = true, + }, +#endif +#ifdef HAVE_DECODER_LZMA2 + { + .id = LZMA_FILTER_LZMA2, + .options_size = sizeof(lzma_options_lzma), + .non_last_ok = false, + .last_ok = true, + .changes_size = true, + }, +#endif +#if defined(HAVE_ENCODER_SUBBLOCK) || defined(HAVE_DECODER_SUBBLOCK) + { + .id = LZMA_FILTER_SUBBLOCK, + .options_size = sizeof(lzma_options_subblock), + .non_last_ok = true, + .last_ok = true, + .changes_size = true, + }, +#endif +#ifdef HAVE_DECODER_X86 + { + .id = LZMA_FILTER_X86, + .options_size = sizeof(lzma_options_bcj), + .non_last_ok = true, + .last_ok = false, + .changes_size = false, + }, +#endif +#if defined(HAVE_ENCODER_POWERPC) || defined(HAVE_DECODER_POWERPC) + { + .id = LZMA_FILTER_POWERPC, + .options_size = sizeof(lzma_options_bcj), + .non_last_ok = true, + .last_ok = false, + .changes_size = false, + }, +#endif +#ifdef HAVE_DECODER_IA64 + { + .id = LZMA_FILTER_IA64, + .options_size = sizeof(lzma_options_bcj), + .non_last_ok = true, + .last_ok = false, + .changes_size = false, + }, +#endif +#if defined(HAVE_ENCODER_ARM) || defined(HAVE_DECODER_ARM) + { + .id = LZMA_FILTER_ARM, + .options_size = sizeof(lzma_options_bcj), + .non_last_ok = true, + .last_ok = false, + .changes_size = false, + }, +#endif +#if defined(HAVE_ENCODER_ARMTHUMB) || defined(HAVE_DECODER_ARMTHUMB) + { + .id = LZMA_FILTER_ARMTHUMB, + .options_size = sizeof(lzma_options_bcj), + .non_last_ok = true, + .last_ok = false, + .changes_size = false, + }, +#endif +#if defined(HAVE_ENCODER_SPARC) || defined(HAVE_DECODER_SPARC) + { + .id = LZMA_FILTER_SPARC, + .options_size = sizeof(lzma_options_bcj), + .non_last_ok = true, + .last_ok = false, + .changes_size = false, + }, +#endif +#if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA) + { + .id = LZMA_FILTER_DELTA, + .options_size = sizeof(lzma_options_delta), + .non_last_ok = true, + .last_ok = false, + .changes_size = false, + }, +#endif + { + .id = LZMA_VLI_UNKNOWN + } +}; + + +extern LZMA_API(lzma_ret) +lzma_filters_copy(const lzma_filter *src, lzma_filter *dest, + lzma_allocator *allocator) +{ + if (src == NULL || dest == NULL) + return LZMA_PROG_ERROR; + + lzma_ret ret; + size_t i; + for (i = 0; src[i].id != LZMA_VLI_UNKNOWN; ++i) { + // There must be a maximum of four filters plus + // the array terminator. + if (i == LZMA_FILTERS_MAX) { + ret = LZMA_OPTIONS_ERROR; + goto error; + } + + dest[i].id = src[i].id; + + if (src[i].options == NULL) { + dest[i].options = NULL; + } else { + // See if the filter is supported only when the + // options is not NULL. This might be convenient + // sometimes if the app is actually copying only + // a partial filter chain with a place holder ID. + // + // When options is not NULL, the Filter ID must be + // supported by us, because otherwise we don't know + // how big the options are. + size_t j; + for (j = 0; src[i].id != features[j].id; ++j) { + if (features[j].id == LZMA_VLI_UNKNOWN) { + ret = LZMA_OPTIONS_ERROR; + goto error; + } + } + + // Allocate and copy the options. + dest[i].options = lzma_alloc(features[j].options_size, + allocator); + if (dest[i].options == NULL) { + ret = LZMA_MEM_ERROR; + goto error; + } + + memcpy(dest[i].options, src[i].options, + features[j].options_size); + } + } + + // Terminate the filter array. + assert(i <= LZMA_FILTERS_MAX + 1); + dest[i].id = LZMA_VLI_UNKNOWN; + dest[i].options = NULL; + + return LZMA_OK; + +error: + // Free the options which we have already allocated. + while (i-- > 0) { + lzma_free(dest[i].options, allocator); + dest[i].options = NULL; + } + + return ret; +} + + +static lzma_ret +validate_chain(const lzma_filter *filters, size_t *count) +{ + // There must be at least one filter. + if (filters == NULL || filters[0].id == LZMA_VLI_UNKNOWN) + return LZMA_PROG_ERROR; + + // Number of non-last filters that may change the size of the data + // significantly (that is, more than 1-2 % or so). + size_t changes_size_count = 0; + + // True if it is OK to add a new filter after the current filter. + bool non_last_ok = true; + + // True if the last filter in the given chain is actually usable as + // the last filter. Only filters that support embedding End of Payload + // Marker can be used as the last filter in the chain. + bool last_ok = false; + + size_t i = 0; + do { + size_t j; + for (j = 0; filters[i].id != features[j].id; ++j) + if (features[j].id == LZMA_VLI_UNKNOWN) + return LZMA_OPTIONS_ERROR; + + // If the previous filter in the chain cannot be a non-last + // filter, the chain is invalid. + if (!non_last_ok) + return LZMA_OPTIONS_ERROR; + + non_last_ok = features[j].non_last_ok; + last_ok = features[j].last_ok; + changes_size_count += features[j].changes_size; + + } while (filters[++i].id != LZMA_VLI_UNKNOWN); + + // There must be 1-4 filters. The last filter must be usable as + // the last filter in the chain. A maximum of three filters are + // allowed to change the size of the data. + if (i > LZMA_FILTERS_MAX || !last_ok || changes_size_count > 3) + return LZMA_OPTIONS_ERROR; + + *count = i; + return LZMA_OK; +} + + +extern lzma_ret +lzma_raw_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter *options, + lzma_filter_find coder_find, bool is_encoder) +{ + // Do some basic validation and get the number of filters. + size_t count; + return_if_error(validate_chain(options, &count)); + + // Set the filter functions and copy the options pointer. + lzma_filter_info filters[LZMA_FILTERS_MAX + 1]; + if (is_encoder) { + for (size_t i = 0; i < count; ++i) { + // The order of the filters is reversed in the + // encoder. It allows more efficient handling + // of the uncompressed data. + const size_t j = count - i - 1; + + const lzma_filter_coder *const fc + = coder_find(options[i].id); + if (fc == NULL || fc->init == NULL) + return LZMA_OPTIONS_ERROR; + + filters[j].id = options[i].id; + filters[j].init = fc->init; + filters[j].options = options[i].options; + } + } else { + for (size_t i = 0; i < count; ++i) { + const lzma_filter_coder *const fc + = coder_find(options[i].id); + if (fc == NULL || fc->init == NULL) + return LZMA_OPTIONS_ERROR; + + filters[i].id = options[i].id; + filters[i].init = fc->init; + filters[i].options = options[i].options; + } + } + + // Terminate the array. + filters[count].id = LZMA_VLI_UNKNOWN; + filters[count].init = NULL; + + // Initialize the filters. + const lzma_ret ret = lzma_next_filter_init(next, allocator, filters); + if (ret != LZMA_OK) + lzma_next_end(next, allocator); + + return ret; +} + + +extern uint64_t +lzma_raw_coder_memusage(lzma_filter_find coder_find, + const lzma_filter *filters) +{ + // The chain has to have at least one filter. + { + size_t tmp; + if (validate_chain(filters, &tmp) != LZMA_OK) + return UINT64_MAX; + } + + uint64_t total = 0; + size_t i = 0; + + do { + const lzma_filter_coder *const fc + = coder_find(filters[i].id); + if (fc == NULL) + return UINT64_MAX; // Unsupported Filter ID + + if (fc->memusage == NULL) { + // This filter doesn't have a function to calculate + // the memory usage and validate the options. Such + // filters need only little memory, so we use 1 KiB + // as a good estimate. They also accept all possible + // options, so there's no need to worry about lack + // of validation. + total += 1024; + } else { + // Call the filter-specific memory usage calculation + // function. + const uint64_t usage + = fc->memusage(filters[i].options); + if (usage == UINT64_MAX) + return UINT64_MAX; // Invalid options + + total += usage; + } + } while (filters[++i].id != LZMA_VLI_UNKNOWN); + + // Add some fixed amount of extra. It's to compensate memory usage + // of Stream, Block etc. coders, malloc() overhead, stack etc. + return total + LZMA_MEMUSAGE_BASE; +} diff --git a/src/liblzma/common/filter_common.h b/src/liblzma/common/filter_common.h new file mode 100644 index 000000000000..cd61fc0724f3 --- /dev/null +++ b/src/liblzma/common/filter_common.h @@ -0,0 +1,48 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_common.c +/// \brief Filter-specific stuff common for both encoder and decoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_FILTER_COMMON_H +#define LZMA_FILTER_COMMON_H + +#include "common.h" + + +/// Both lzma_filter_encoder and lzma_filter_decoder begin with these members. +typedef struct { + /// Filter ID + lzma_vli id; + + /// Initializes the filter encoder and calls lzma_next_filter_init() + /// for filters + 1. + lzma_init_function init; + + /// Calculates memory usage of the encoder. If the options are + /// invalid, UINT64_MAX is returned. + uint64_t (*memusage)(const void *options); + +} lzma_filter_coder; + + +typedef const lzma_filter_coder *(*lzma_filter_find)(lzma_vli id); + + +extern lzma_ret lzma_raw_coder_init( + lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter *filters, + lzma_filter_find coder_find, bool is_encoder); + + +extern uint64_t lzma_raw_coder_memusage(lzma_filter_find coder_find, + const lzma_filter *filters); + + +#endif diff --git a/src/liblzma/common/filter_decoder.c b/src/liblzma/common/filter_decoder.c new file mode 100644 index 000000000000..95f77b76cf7a --- /dev/null +++ b/src/liblzma/common/filter_decoder.c @@ -0,0 +1,199 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_decoder.c +/// \brief Filter ID mapping to filter-specific functions +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "filter_decoder.h" +#include "filter_common.h" +#include "lzma_decoder.h" +#include "lzma2_decoder.h" +#include "subblock_decoder.h" +#include "subblock_decoder_helper.h" +#include "simple_decoder.h" +#include "delta_decoder.h" + + +typedef struct { + /// Filter ID + lzma_vli id; + + /// Initializes the filter encoder and calls lzma_next_filter_init() + /// for filters + 1. + lzma_init_function init; + + /// Calculates memory usage of the encoder. If the options are + /// invalid, UINT64_MAX is returned. + uint64_t (*memusage)(const void *options); + + /// Decodes Filter Properties. + /// + /// \return - LZMA_OK: Properties decoded successfully. + /// - LZMA_OPTIONS_ERROR: Unsupported properties + /// - LZMA_MEM_ERROR: Memory allocation failed. + lzma_ret (*props_decode)(void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size); + +} lzma_filter_decoder; + + +static const lzma_filter_decoder decoders[] = { +#ifdef HAVE_DECODER_LZMA1 + { + .id = LZMA_FILTER_LZMA1, + .init = &lzma_lzma_decoder_init, + .memusage = &lzma_lzma_decoder_memusage, + .props_decode = &lzma_lzma_props_decode, + }, +#endif +#ifdef HAVE_DECODER_LZMA2 + { + .id = LZMA_FILTER_LZMA2, + .init = &lzma_lzma2_decoder_init, + .memusage = &lzma_lzma2_decoder_memusage, + .props_decode = &lzma_lzma2_props_decode, + }, +#endif +#ifdef HAVE_DECODER_SUBBLOCK + { + .id = LZMA_FILTER_SUBBLOCK, + .init = &lzma_subblock_decoder_init, +// .memusage = &lzma_subblock_decoder_memusage, + .props_decode = NULL, + }, + { + .id = LZMA_FILTER_SUBBLOCK_HELPER, + .init = &lzma_subblock_decoder_helper_init, + .memusage = NULL, + .props_decode = NULL, + }, +#endif +#ifdef HAVE_DECODER_X86 + { + .id = LZMA_FILTER_X86, + .init = &lzma_simple_x86_decoder_init, + .memusage = NULL, + .props_decode = &lzma_simple_props_decode, + }, +#endif +#ifdef HAVE_DECODER_POWERPC + { + .id = LZMA_FILTER_POWERPC, + .init = &lzma_simple_powerpc_decoder_init, + .memusage = NULL, + .props_decode = &lzma_simple_props_decode, + }, +#endif +#ifdef HAVE_DECODER_IA64 + { + .id = LZMA_FILTER_IA64, + .init = &lzma_simple_ia64_decoder_init, + .memusage = NULL, + .props_decode = &lzma_simple_props_decode, + }, +#endif +#ifdef HAVE_DECODER_ARM + { + .id = LZMA_FILTER_ARM, + .init = &lzma_simple_arm_decoder_init, + .memusage = NULL, + .props_decode = &lzma_simple_props_decode, + }, +#endif +#ifdef HAVE_DECODER_ARMTHUMB + { + .id = LZMA_FILTER_ARMTHUMB, + .init = &lzma_simple_armthumb_decoder_init, + .memusage = NULL, + .props_decode = &lzma_simple_props_decode, + }, +#endif +#ifdef HAVE_DECODER_SPARC + { + .id = LZMA_FILTER_SPARC, + .init = &lzma_simple_sparc_decoder_init, + .memusage = NULL, + .props_decode = &lzma_simple_props_decode, + }, +#endif +#ifdef HAVE_DECODER_DELTA + { + .id = LZMA_FILTER_DELTA, + .init = &lzma_delta_decoder_init, + .memusage = &lzma_delta_coder_memusage, + .props_decode = &lzma_delta_props_decode, + }, +#endif +}; + + +static const lzma_filter_decoder * +decoder_find(lzma_vli id) +{ + for (size_t i = 0; i < ARRAY_SIZE(decoders); ++i) + if (decoders[i].id == id) + return decoders + i; + + return NULL; +} + + +extern LZMA_API(lzma_bool) +lzma_filter_decoder_is_supported(lzma_vli id) +{ + return decoder_find(id) != NULL; +} + + +extern lzma_ret +lzma_raw_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter *options) +{ + return lzma_raw_coder_init(next, allocator, + options, (lzma_filter_find)(&decoder_find), false); +} + + +extern LZMA_API(lzma_ret) +lzma_raw_decoder(lzma_stream *strm, const lzma_filter *options) +{ + lzma_next_strm_init(lzma_raw_decoder_init, strm, options); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} + + +extern LZMA_API(uint64_t) +lzma_raw_decoder_memusage(const lzma_filter *filters) +{ + return lzma_raw_coder_memusage( + (lzma_filter_find)(&decoder_find), filters); +} + + +extern LZMA_API(lzma_ret) +lzma_properties_decode(lzma_filter *filter, lzma_allocator *allocator, + const uint8_t *props, size_t props_size) +{ + // Make it always NULL so that the caller can always safely free() it. + filter->options = NULL; + + const lzma_filter_decoder *const fd = decoder_find(filter->id); + if (fd == NULL) + return LZMA_OPTIONS_ERROR; + + if (fd->props_decode == NULL) + return props_size == 0 ? LZMA_OK : LZMA_OPTIONS_ERROR; + + return fd->props_decode( + &filter->options, allocator, props, props_size); +} diff --git a/src/liblzma/common/filter_decoder.h b/src/liblzma/common/filter_decoder.h new file mode 100644 index 000000000000..d5c68bdd4a68 --- /dev/null +++ b/src/liblzma/common/filter_decoder.h @@ -0,0 +1,23 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_decoder.c +/// \brief Filter ID mapping to filter-specific functions +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_FILTER_DECODER_H +#define LZMA_FILTER_DECODER_H + +#include "common.h" + + +extern lzma_ret lzma_raw_decoder_init( + lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter *options); + +#endif diff --git a/src/liblzma/common/filter_encoder.c b/src/liblzma/common/filter_encoder.c new file mode 100644 index 000000000000..ab3d3af15e23 --- /dev/null +++ b/src/liblzma/common/filter_encoder.c @@ -0,0 +1,298 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_decoder.c +/// \brief Filter ID mapping to filter-specific functions +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "filter_encoder.h" +#include "filter_common.h" +#include "lzma_encoder.h" +#include "lzma2_encoder.h" +#include "subblock_encoder.h" +#include "simple_encoder.h" +#include "delta_encoder.h" + + +typedef struct { + /// Filter ID + lzma_vli id; + + /// Initializes the filter encoder and calls lzma_next_filter_init() + /// for filters + 1. + lzma_init_function init; + + /// Calculates memory usage of the encoder. If the options are + /// invalid, UINT64_MAX is returned. + uint64_t (*memusage)(const void *options); + + /// Calculates the minimum sane size for Blocks (or other types of + /// chunks) to which the input data can be split to make + /// multithreaded encoding possible. If this is NULL, it is assumed + /// that the encoder is fast enough with single thread. + lzma_vli (*chunk_size)(const void *options); + + /// Tells the size of the Filter Properties field. If options are + /// invalid, UINT32_MAX is returned. If this is NULL, props_size_fixed + /// is used. + lzma_ret (*props_size_get)(uint32_t *size, const void *options); + uint32_t props_size_fixed; + + /// Encodes Filter Properties. + /// + /// \return - LZMA_OK: Properties encoded successfully. + /// - LZMA_OPTIONS_ERROR: Unsupported options + /// - LZMA_PROG_ERROR: Invalid options or not enough + /// output space + lzma_ret (*props_encode)(const void *options, uint8_t *out); + +} lzma_filter_encoder; + + +static const lzma_filter_encoder encoders[] = { +#ifdef HAVE_ENCODER_LZMA1 + { + .id = LZMA_FILTER_LZMA1, + .init = &lzma_lzma_encoder_init, + .memusage = &lzma_lzma_encoder_memusage, + .chunk_size = NULL, // FIXME + .props_size_get = NULL, + .props_size_fixed = 5, + .props_encode = &lzma_lzma_props_encode, + }, +#endif +#ifdef HAVE_ENCODER_LZMA2 + { + .id = LZMA_FILTER_LZMA2, + .init = &lzma_lzma2_encoder_init, + .memusage = &lzma_lzma2_encoder_memusage, + .chunk_size = NULL, // FIXME + .props_size_get = NULL, + .props_size_fixed = 1, + .props_encode = &lzma_lzma2_props_encode, + }, +#endif +#ifdef HAVE_ENCODER_SUBBLOCK + { + .id = LZMA_FILTER_SUBBLOCK, + .init = &lzma_subblock_encoder_init, +// .memusage = &lzma_subblock_encoder_memusage, + .chunk_size = NULL, + .props_size_get = NULL, + .props_size_fixed = 0, + .props_encode = NULL, + }, +#endif +#ifdef HAVE_ENCODER_X86 + { + .id = LZMA_FILTER_X86, + .init = &lzma_simple_x86_encoder_init, + .memusage = NULL, + .chunk_size = NULL, + .props_size_get = &lzma_simple_props_size, + .props_encode = &lzma_simple_props_encode, + }, +#endif +#ifdef HAVE_ENCODER_POWERPC + { + .id = LZMA_FILTER_POWERPC, + .init = &lzma_simple_powerpc_encoder_init, + .memusage = NULL, + .chunk_size = NULL, + .props_size_get = &lzma_simple_props_size, + .props_encode = &lzma_simple_props_encode, + }, +#endif +#ifdef HAVE_ENCODER_IA64 + { + .id = LZMA_FILTER_IA64, + .init = &lzma_simple_ia64_encoder_init, + .memusage = NULL, + .chunk_size = NULL, + .props_size_get = &lzma_simple_props_size, + .props_encode = &lzma_simple_props_encode, + }, +#endif +#ifdef HAVE_ENCODER_ARM + { + .id = LZMA_FILTER_ARM, + .init = &lzma_simple_arm_encoder_init, + .memusage = NULL, + .chunk_size = NULL, + .props_size_get = &lzma_simple_props_size, + .props_encode = &lzma_simple_props_encode, + }, +#endif +#ifdef HAVE_ENCODER_ARMTHUMB + { + .id = LZMA_FILTER_ARMTHUMB, + .init = &lzma_simple_armthumb_encoder_init, + .memusage = NULL, + .chunk_size = NULL, + .props_size_get = &lzma_simple_props_size, + .props_encode = &lzma_simple_props_encode, + }, +#endif +#ifdef HAVE_ENCODER_SPARC + { + .id = LZMA_FILTER_SPARC, + .init = &lzma_simple_sparc_encoder_init, + .memusage = NULL, + .chunk_size = NULL, + .props_size_get = &lzma_simple_props_size, + .props_encode = &lzma_simple_props_encode, + }, +#endif +#ifdef HAVE_ENCODER_DELTA + { + .id = LZMA_FILTER_DELTA, + .init = &lzma_delta_encoder_init, + .memusage = &lzma_delta_coder_memusage, + .chunk_size = NULL, + .props_size_get = NULL, + .props_size_fixed = 1, + .props_encode = &lzma_delta_props_encode, + }, +#endif +}; + + +static const lzma_filter_encoder * +encoder_find(lzma_vli id) +{ + for (size_t i = 0; i < ARRAY_SIZE(encoders); ++i) + if (encoders[i].id == id) + return encoders + i; + + return NULL; +} + + +extern LZMA_API(lzma_bool) +lzma_filter_encoder_is_supported(lzma_vli id) +{ + return encoder_find(id) != NULL; +} + + +extern LZMA_API(lzma_ret) +lzma_filters_update(lzma_stream *strm, const lzma_filter *filters) +{ + if (strm->internal->next.update == NULL) + return LZMA_PROG_ERROR; + + // Validate the filter chain. + if (lzma_raw_encoder_memusage(filters) == UINT64_MAX) + return LZMA_OPTIONS_ERROR; + + // The actual filter chain in the encoder is reversed. Some things + // still want the normal order chain, so we provide both. + size_t count = 1; + while (filters[count].id != LZMA_VLI_UNKNOWN) + ++count; + + lzma_filter reversed_filters[LZMA_FILTERS_MAX + 1]; + for (size_t i = 0; i < count; ++i) + reversed_filters[count - i - 1] = filters[i]; + + reversed_filters[count].id = LZMA_VLI_UNKNOWN; + + return strm->internal->next.update(strm->internal->next.coder, + strm->allocator, filters, reversed_filters); +} + + +extern lzma_ret +lzma_raw_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter *options) +{ + return lzma_raw_coder_init(next, allocator, + options, (lzma_filter_find)(&encoder_find), true); +} + + +extern LZMA_API(lzma_ret) +lzma_raw_encoder(lzma_stream *strm, const lzma_filter *options) +{ + lzma_next_strm_init(lzma_raw_coder_init, strm, options, + (lzma_filter_find)(&encoder_find), true); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} + + +extern LZMA_API(uint64_t) +lzma_raw_encoder_memusage(const lzma_filter *filters) +{ + return lzma_raw_coder_memusage( + (lzma_filter_find)(&encoder_find), filters); +} + + +extern LZMA_API(lzma_vli) +lzma_chunk_size(const lzma_filter *filters) +{ + lzma_vli max = 0; + + for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) { + const lzma_filter_encoder *const fe + = encoder_find(filters[i].id); + if (fe->chunk_size != NULL) { + const lzma_vli size + = fe->chunk_size(filters[i].options); + if (size == LZMA_VLI_UNKNOWN) + return LZMA_VLI_UNKNOWN; + + if (size > max) + max = size; + } + } + + return max; +} + + +extern LZMA_API(lzma_ret) +lzma_properties_size(uint32_t *size, const lzma_filter *filter) +{ + const lzma_filter_encoder *const fe = encoder_find(filter->id); + if (fe == NULL) { + // Unknown filter - if the Filter ID is a proper VLI, + // return LZMA_OPTIONS_ERROR instead of LZMA_PROG_ERROR, + // because it's possible that we just don't have support + // compiled in for the requested filter. + return filter->id <= LZMA_VLI_MAX + ? LZMA_OPTIONS_ERROR : LZMA_PROG_ERROR; + } + + if (fe->props_size_get == NULL) { + // No props_size_get() function, use props_size_fixed. + *size = fe->props_size_fixed; + return LZMA_OK; + } + + return fe->props_size_get(size, filter->options); +} + + +extern LZMA_API(lzma_ret) +lzma_properties_encode(const lzma_filter *filter, uint8_t *props) +{ + const lzma_filter_encoder *const fe = encoder_find(filter->id); + if (fe == NULL) + return LZMA_PROG_ERROR; + + if (fe->props_encode == NULL) + return LZMA_OK; + + return fe->props_encode(filter->options, props); +} diff --git a/src/liblzma/common/filter_encoder.h b/src/liblzma/common/filter_encoder.h new file mode 100644 index 000000000000..a978932def75 --- /dev/null +++ b/src/liblzma/common/filter_encoder.h @@ -0,0 +1,27 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_encoder.c +/// \brief Filter ID mapping to filter-specific functions +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_FILTER_ENCODER_H +#define LZMA_FILTER_ENCODER_H + +#include "common.h" + + +// FIXME !!! Public API +extern lzma_vli lzma_chunk_size(const lzma_filter *filters); + + +extern lzma_ret lzma_raw_encoder_init( + lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter *filters); + +#endif diff --git a/src/liblzma/common/filter_flags_decoder.c b/src/liblzma/common/filter_flags_decoder.c new file mode 100644 index 000000000000..caae10ce79a0 --- /dev/null +++ b/src/liblzma/common/filter_flags_decoder.c @@ -0,0 +1,46 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_flags_decoder.c +/// \brief Decodes a Filter Flags field +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "filter_decoder.h" + + +extern LZMA_API(lzma_ret) +lzma_filter_flags_decode( + lzma_filter *filter, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size) +{ + // Set the pointer to NULL so the caller can always safely free it. + filter->options = NULL; + + // Filter ID + return_if_error(lzma_vli_decode(&filter->id, NULL, + in, in_pos, in_size)); + + if (filter->id >= LZMA_FILTER_RESERVED_START) + return LZMA_DATA_ERROR; + + // Size of Properties + lzma_vli props_size; + return_if_error(lzma_vli_decode(&props_size, NULL, + in, in_pos, in_size)); + + // Filter Properties + if (in_size - *in_pos < props_size) + return LZMA_DATA_ERROR; + + const lzma_ret ret = lzma_properties_decode( + filter, allocator, in + *in_pos, props_size); + + *in_pos += props_size; + + return ret; +} diff --git a/src/liblzma/common/filter_flags_encoder.c b/src/liblzma/common/filter_flags_encoder.c new file mode 100644 index 000000000000..d110566de99f --- /dev/null +++ b/src/liblzma/common/filter_flags_encoder.c @@ -0,0 +1,56 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_flags_encoder.c +/// \brief Decodes a Filter Flags field +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "filter_encoder.h" + + +extern LZMA_API(lzma_ret) +lzma_filter_flags_size(uint32_t *size, const lzma_filter *filter) +{ + if (filter->id >= LZMA_FILTER_RESERVED_START) + return LZMA_PROG_ERROR; + + return_if_error(lzma_properties_size(size, filter)); + + *size += lzma_vli_size(filter->id) + lzma_vli_size(*size); + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_filter_flags_encode(const lzma_filter *filter, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + // Filter ID + if (filter->id >= LZMA_FILTER_RESERVED_START) + return LZMA_PROG_ERROR; + + return_if_error(lzma_vli_encode(filter->id, NULL, + out, out_pos, out_size)); + + // Size of Properties + uint32_t props_size; + return_if_error(lzma_properties_size(&props_size, filter)); + return_if_error(lzma_vli_encode(props_size, NULL, + out, out_pos, out_size)); + + // Filter Properties + if (out_size - *out_pos < props_size) + return LZMA_PROG_ERROR; + + return_if_error(lzma_properties_encode(filter, out + *out_pos)); + + *out_pos += props_size; + + return LZMA_OK; +} diff --git a/src/liblzma/common/hardware_physmem.c b/src/liblzma/common/hardware_physmem.c new file mode 100644 index 000000000000..7405b658af76 --- /dev/null +++ b/src/liblzma/common/hardware_physmem.c @@ -0,0 +1,25 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file hardware_physmem.c +/// \brief Get the total amount of physical memory (RAM) +// +// Author: Jonathan Nieder +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + +#include "tuklib_physmem.h" + + +extern LZMA_API(uint64_t) +lzma_physmem(void) +{ + // It is simpler to make lzma_physmem() a wrapper for + // tuklib_physmem() than to hack appropriate symbol visiblity + // support for the tuklib modules. + return tuklib_physmem(); +} diff --git a/src/liblzma/common/index.c b/src/liblzma/common/index.c new file mode 100644 index 000000000000..3941e28ba724 --- /dev/null +++ b/src/liblzma/common/index.c @@ -0,0 +1,1241 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index.c +/// \brief Handling of .xz Indexes and some other Stream information +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "index.h" +#include "stream_flags_common.h" + + +/// \brief How many Records to allocate at once +/// +/// This should be big enough to avoid making lots of tiny allocations +/// but small enough to avoid too much unused memory at once. +#define INDEX_GROUP_SIZE 500 + + +/// \brief How many Records can be allocated at once at maximum +#define PREALLOC_MAX ((SIZE_MAX - sizeof(index_group)) / sizeof(index_record)) + + +/// \brief Base structure for index_stream and index_group structures +typedef struct index_tree_node_s index_tree_node; +struct index_tree_node_s { + /// Uncompressed start offset of this Stream (relative to the + /// beginning of the file) or Block (relative to the beginning + /// of the Stream) + lzma_vli uncompressed_base; + + /// Compressed start offset of this Stream or Block + lzma_vli compressed_base; + + index_tree_node *parent; + index_tree_node *left; + index_tree_node *right; +}; + + +/// \brief AVL tree to hold index_stream or index_group structures +typedef struct { + /// Root node + index_tree_node *root; + + /// Leftmost node. Since the tree will be filled sequentially, + /// this won't change after the first node has been added to + /// the tree. + index_tree_node *leftmost; + + /// The rightmost node in the tree. Since the tree is filled + /// sequentially, this is always the node where to add the new data. + index_tree_node *rightmost; + + /// Number of nodes in the tree + uint32_t count; + +} index_tree; + + +typedef struct { + lzma_vli uncompressed_sum; + lzma_vli unpadded_sum; +} index_record; + + +typedef struct { + /// Every Record group is part of index_stream.groups tree. + index_tree_node node; + + /// Number of Blocks in this Stream before this group. + lzma_vli number_base; + + /// Number of Records that can be put in records[]. + size_t allocated; + + /// Index of the last Record in use. + size_t last; + + /// The sizes in this array are stored as cumulative sums relative + /// to the beginning of the Stream. This makes it possible to + /// use binary search in lzma_index_locate(). + /// + /// Note that the cumulative summing is done specially for + /// unpadded_sum: The previous value is rounded up to the next + /// multiple of four before adding the Unpadded Size of the new + /// Block. The total encoded size of the Blocks in the Stream + /// is records[last].unpadded_sum in the last Record group of + /// the Stream. + /// + /// For example, if the Unpadded Sizes are 39, 57, and 81, the + /// stored values are 39, 97 (40 + 57), and 181 (100 + 181). + /// The total encoded size of these Blocks is 184. + /// + /// This is a flexible array, because it makes easy to optimize + /// memory usage in case someone concatenates many Streams that + /// have only one or few Blocks. + index_record records[]; + +} index_group; + + +typedef struct { + /// Every index_stream is a node in the tree of Sreams. + index_tree_node node; + + /// Number of this Stream (first one is 1) + uint32_t number; + + /// Total number of Blocks before this Stream + lzma_vli block_number_base; + + /// Record groups of this Stream are stored in a tree. + /// It's a T-tree with AVL-tree balancing. There are + /// INDEX_GROUP_SIZE Records per node by default. + /// This keeps the number of memory allocations reasonable + /// and finding a Record is fast. + index_tree groups; + + /// Number of Records in this Stream + lzma_vli record_count; + + /// Size of the List of Records field in this Stream. This is used + /// together with record_count to calculate the size of the Index + /// field and thus the total size of the Stream. + lzma_vli index_list_size; + + /// Stream Flags of this Stream. This is meaningful only if + /// the Stream Flags have been told us with lzma_index_stream_flags(). + /// Initially stream_flags.version is set to UINT32_MAX to indicate + /// that the Stream Flags are unknown. + lzma_stream_flags stream_flags; + + /// Amount of Stream Padding after this Stream. This defaults to + /// zero and can be set with lzma_index_stream_padding(). + lzma_vli stream_padding; + +} index_stream; + + +struct lzma_index_s { + /// AVL-tree containing the Stream(s). Often there is just one + /// Stream, but using a tree keeps lookups fast even when there + /// are many concatenated Streams. + index_tree streams; + + /// Uncompressed size of all the Blocks in the Stream(s) + lzma_vli uncompressed_size; + + /// Total size of all the Blocks in the Stream(s) + lzma_vli total_size; + + /// Total number of Records in all Streams in this lzma_index + lzma_vli record_count; + + /// Size of the List of Records field if all the Streams in this + /// lzma_index were packed into a single Stream (makes it simpler to + /// take many .xz files and combine them into a single Stream). + /// + /// This value together with record_count is needed to calculate + /// Backward Size that is stored into Stream Footer. + lzma_vli index_list_size; + + /// How many Records to allocate at once in lzma_index_append(). + /// This defaults to INDEX_GROUP_SIZE but can be overriden with + /// lzma_index_prealloc(). + size_t prealloc; + + /// Bitmask indicating what integrity check types have been used + /// as set by lzma_index_stream_flags(). The bit of the last Stream + /// is not included here, since it is possible to change it by + /// calling lzma_index_stream_flags() again. + uint32_t checks; +}; + + +static void +index_tree_init(index_tree *tree) +{ + tree->root = NULL; + tree->leftmost = NULL; + tree->rightmost = NULL; + tree->count = 0; + return; +} + + +/// Helper for index_tree_end() +static void +index_tree_node_end(index_tree_node *node, lzma_allocator *allocator, + void (*free_func)(void *node, lzma_allocator *allocator)) +{ + // The tree won't ever be very huge, so recursion should be fine. + // 20 levels in the tree is likely quite a lot already in practice. + if (node->left != NULL) + index_tree_node_end(node->left, allocator, free_func); + + if (node->right != NULL) + index_tree_node_end(node->right, allocator, free_func); + + if (free_func != NULL) + free_func(node, allocator); + + lzma_free(node, allocator); + return; +} + + +/// Free the meory allocated for a tree. If free_func is not NULL, +/// it is called on each node before freeing the node. This is used +/// to free the Record groups from each index_stream before freeing +/// the index_stream itself. +static void +index_tree_end(index_tree *tree, lzma_allocator *allocator, + void (*free_func)(void *node, lzma_allocator *allocator)) +{ + if (tree->root != NULL) + index_tree_node_end(tree->root, allocator, free_func); + + return; +} + + +/// Add a new node to the tree. node->uncompressed_base and +/// node->compressed_base must have been set by the caller already. +static void +index_tree_append(index_tree *tree, index_tree_node *node) +{ + node->parent = tree->rightmost; + node->left = NULL; + node->right = NULL; + + ++tree->count; + + // Handle the special case of adding the first node. + if (tree->root == NULL) { + tree->root = node; + tree->leftmost = node; + tree->rightmost = node; + return; + } + + // The tree is always filled sequentially. + assert(tree->rightmost->uncompressed_base <= node->uncompressed_base); + assert(tree->rightmost->compressed_base < node->compressed_base); + + // Add the new node after the rightmost node. It's the correct + // place due to the reason above. + tree->rightmost->right = node; + tree->rightmost = node; + + // Balance the AVL-tree if needed. We don't need to keep the balance + // factors in nodes, because we always fill the tree sequentially, + // and thus know the state of the tree just by looking at the node + // count. From the node count we can calculate how many steps to go + // up in the tree to find the rotation root. + uint32_t up = tree->count ^ (UINT32_C(1) << bsr32(tree->count)); + if (up != 0) { + // Locate the root node for the rotation. + up = ctz32(tree->count) + 2; + do { + node = node->parent; + } while (--up > 0); + + // Rotate left using node as the rotation root. + index_tree_node *pivot = node->right; + + if (node->parent == NULL) { + tree->root = pivot; + } else { + assert(node->parent->right == node); + node->parent->right = pivot; + } + + pivot->parent = node->parent; + + node->right = pivot->left; + if (node->right != NULL) + node->right->parent = node; + + pivot->left = node; + node->parent = pivot; + } + + return; +} + + +/// Get the next node in the tree. Return NULL if there are no more nodes. +static void * +index_tree_next(const index_tree_node *node) +{ + if (node->right != NULL) { + node = node->right; + while (node->left != NULL) + node = node->left; + + return (void *)(node); + } + + while (node->parent != NULL && node->parent->right == node) + node = node->parent; + + return (void *)(node->parent); +} + + +/// Locate a node that contains the given uncompressed offset. It is +/// caller's job to check that target is not bigger than the uncompressed +/// size of the tree (the last node would be returned in that case still). +static void * +index_tree_locate(const index_tree *tree, lzma_vli target) +{ + const index_tree_node *result = NULL; + const index_tree_node *node = tree->root; + + assert(tree->leftmost == NULL + || tree->leftmost->uncompressed_base == 0); + + // Consecutive nodes may have the same uncompressed_base. + // We must pick the rightmost one. + while (node != NULL) { + if (node->uncompressed_base > target) { + node = node->left; + } else { + result = node; + node = node->right; + } + } + + return (void *)(result); +} + + +/// Allocate and initialize a new Stream using the given base offsets. +static index_stream * +index_stream_init(lzma_vli compressed_base, lzma_vli uncompressed_base, + lzma_vli stream_number, lzma_vli block_number_base, + lzma_allocator *allocator) +{ + index_stream *s = lzma_alloc(sizeof(index_stream), allocator); + if (s == NULL) + return NULL; + + s->node.uncompressed_base = uncompressed_base; + s->node.compressed_base = compressed_base; + s->node.parent = NULL; + s->node.left = NULL; + s->node.right = NULL; + + s->number = stream_number; + s->block_number_base = block_number_base; + + index_tree_init(&s->groups); + + s->record_count = 0; + s->index_list_size = 0; + s->stream_flags.version = UINT32_MAX; + s->stream_padding = 0; + + return s; +} + + +/// Free the memory allocated for a Stream and its Record groups. +static void +index_stream_end(void *node, lzma_allocator *allocator) +{ + index_stream *s = node; + index_tree_end(&s->groups, allocator, NULL); + return; +} + + +static lzma_index * +index_init_plain(lzma_allocator *allocator) +{ + lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator); + if (i != NULL) { + index_tree_init(&i->streams); + i->uncompressed_size = 0; + i->total_size = 0; + i->record_count = 0; + i->index_list_size = 0; + i->prealloc = INDEX_GROUP_SIZE; + i->checks = 0; + } + + return i; +} + + +extern LZMA_API(lzma_index *) +lzma_index_init(lzma_allocator *allocator) +{ + lzma_index *i = index_init_plain(allocator); + index_stream *s = index_stream_init(0, 0, 1, 0, allocator); + if (i == NULL || s == NULL) { + index_stream_end(s, allocator); + lzma_free(i, allocator); + } + + index_tree_append(&i->streams, &s->node); + + return i; +} + + +extern LZMA_API(void) +lzma_index_end(lzma_index *i, lzma_allocator *allocator) +{ + // NOTE: If you modify this function, check also the bottom + // of lzma_index_cat(). + if (i != NULL) { + index_tree_end(&i->streams, allocator, &index_stream_end); + lzma_free(i, allocator); + } + + return; +} + + +extern void +lzma_index_prealloc(lzma_index *i, lzma_vli records) +{ + if (records > PREALLOC_MAX) + records = PREALLOC_MAX; + + i->prealloc = (size_t)(records); + return; +} + + +extern LZMA_API(uint64_t) +lzma_index_memusage(lzma_vli streams, lzma_vli blocks) +{ + // This calculates an upper bound that is only a little bit + // bigger than the exact maximum memory usage with the given + // parameters. + + // Typical malloc() overhead is 2 * sizeof(void *) but we take + // a little bit extra just in case. Using LZMA_MEMUSAGE_BASE + // instead would give too inaccurate estimate. + const size_t alloc_overhead = 4 * sizeof(void *); + + // Amount of memory needed for each Stream base structures. + // We assume that every Stream has at least one Block and + // thus at least one group. + const size_t stream_base = sizeof(index_stream) + + sizeof(index_group) + 2 * alloc_overhead; + + // Amount of memory needed per group. + const size_t group_base = sizeof(index_group) + + INDEX_GROUP_SIZE * sizeof(index_record) + + alloc_overhead; + + // Number of groups. There may actually be more, but that overhead + // has been taken into account in stream_base already. + const lzma_vli groups + = (blocks + INDEX_GROUP_SIZE - 1) / INDEX_GROUP_SIZE; + + // Memory used by index_stream and index_group structures. + const uint64_t streams_mem = streams * stream_base; + const uint64_t groups_mem = groups * group_base; + + // Memory used by the base structure. + const uint64_t index_base = sizeof(lzma_index) + alloc_overhead; + + // Validate the arguments and catch integer overflows. + // Maximum number of Streams is "only" UINT32_MAX, because + // that limit is used by the tree containing the Streams. + const uint64_t limit = UINT64_MAX - index_base; + if (streams == 0 || streams > UINT32_MAX || blocks > LZMA_VLI_MAX + || streams > limit / stream_base + || groups > limit / group_base + || limit - streams_mem < groups_mem) + return UINT64_MAX; + + return index_base + streams_mem + groups_mem; +} + + +extern LZMA_API(uint64_t) +lzma_index_memused(const lzma_index *i) +{ + return lzma_index_memusage(i->streams.count, i->record_count); +} + + +extern LZMA_API(lzma_vli) +lzma_index_block_count(const lzma_index *i) +{ + return i->record_count; +} + + +extern LZMA_API(lzma_vli) +lzma_index_stream_count(const lzma_index *i) +{ + return i->streams.count; +} + + +extern LZMA_API(lzma_vli) +lzma_index_size(const lzma_index *i) +{ + return index_size(i->record_count, i->index_list_size); +} + + +extern LZMA_API(lzma_vli) +lzma_index_total_size(const lzma_index *i) +{ + return i->total_size; +} + + +extern LZMA_API(lzma_vli) +lzma_index_stream_size(const lzma_index *i) +{ + // Stream Header + Blocks + Index + Stream Footer + return LZMA_STREAM_HEADER_SIZE + i->total_size + + index_size(i->record_count, i->index_list_size) + + LZMA_STREAM_HEADER_SIZE; +} + + +static lzma_vli +index_file_size(lzma_vli compressed_base, lzma_vli unpadded_sum, + lzma_vli record_count, lzma_vli index_list_size, + lzma_vli stream_padding) +{ + // Earlier Streams and Stream Paddings + Stream Header + // + Blocks + Index + Stream Footer + Stream Padding + // + // This might go over LZMA_VLI_MAX due to too big unpadded_sum + // when this function is used in lzma_index_append(). + lzma_vli file_size = compressed_base + 2 * LZMA_STREAM_HEADER_SIZE + + stream_padding + vli_ceil4(unpadded_sum); + if (file_size > LZMA_VLI_MAX) + return LZMA_VLI_UNKNOWN; + + // The same applies here. + file_size += index_size(record_count, index_list_size); + if (file_size > LZMA_VLI_MAX) + return LZMA_VLI_UNKNOWN; + + return file_size; +} + + +extern LZMA_API(lzma_vli) +lzma_index_file_size(const lzma_index *i) +{ + const index_stream *s = (const index_stream *)(i->streams.rightmost); + const index_group *g = (const index_group *)(s->groups.rightmost); + return index_file_size(s->node.compressed_base, + g == NULL ? 0 : g->records[g->last].unpadded_sum, + s->record_count, s->index_list_size, + s->stream_padding); +} + + +extern LZMA_API(lzma_vli) +lzma_index_uncompressed_size(const lzma_index *i) +{ + return i->uncompressed_size; +} + + +extern LZMA_API(uint32_t) +lzma_index_checks(const lzma_index *i) +{ + uint32_t checks = i->checks; + + // Get the type of the Check of the last Stream too. + const index_stream *s = (const index_stream *)(i->streams.rightmost); + if (s->stream_flags.version != UINT32_MAX) + checks |= UINT32_C(1) << s->stream_flags.check; + + return checks; +} + + +extern uint32_t +lzma_index_padding_size(const lzma_index *i) +{ + return (LZMA_VLI_C(4) - index_size_unpadded( + i->record_count, i->index_list_size)) & 3; +} + + +extern LZMA_API(lzma_ret) +lzma_index_stream_flags(lzma_index *i, const lzma_stream_flags *stream_flags) +{ + if (i == NULL || stream_flags == NULL) + return LZMA_PROG_ERROR; + + // Validate the Stream Flags. + return_if_error(lzma_stream_flags_compare( + stream_flags, stream_flags)); + + index_stream *s = (index_stream *)(i->streams.rightmost); + s->stream_flags = *stream_flags; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_index_stream_padding(lzma_index *i, lzma_vli stream_padding) +{ + if (i == NULL || stream_padding > LZMA_VLI_MAX + || (stream_padding & 3) != 0) + return LZMA_PROG_ERROR; + + index_stream *s = (index_stream *)(i->streams.rightmost); + + // Check that the new value won't make the file grow too big. + const lzma_vli old_stream_padding = s->stream_padding; + s->stream_padding = 0; + if (lzma_index_file_size(i) + stream_padding > LZMA_VLI_MAX) { + s->stream_padding = old_stream_padding; + return LZMA_DATA_ERROR; + } + + s->stream_padding = stream_padding; + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_index_append(lzma_index *i, lzma_allocator *allocator, + lzma_vli unpadded_size, lzma_vli uncompressed_size) +{ + // Validate. + if (i == NULL || unpadded_size < UNPADDED_SIZE_MIN + || unpadded_size > UNPADDED_SIZE_MAX + || uncompressed_size > LZMA_VLI_MAX) + return LZMA_PROG_ERROR; + + index_stream *s = (index_stream *)(i->streams.rightmost); + index_group *g = (index_group *)(s->groups.rightmost); + + const lzma_vli compressed_base = g == NULL ? 0 + : vli_ceil4(g->records[g->last].unpadded_sum); + const lzma_vli uncompressed_base = g == NULL ? 0 + : g->records[g->last].uncompressed_sum; + const uint32_t index_list_size_add = lzma_vli_size(unpadded_size) + + lzma_vli_size(uncompressed_size); + + // Check that the file size will stay within limits. + if (index_file_size(s->node.compressed_base, + compressed_base + unpadded_size, s->record_count + 1, + s->index_list_size + index_list_size_add, + s->stream_padding) == LZMA_VLI_UNKNOWN) + return LZMA_DATA_ERROR; + + // The size of the Index field must not exceed the maximum value + // that can be stored in the Backward Size field. + if (index_size(i->record_count + 1, + i->index_list_size + index_list_size_add) + > LZMA_BACKWARD_SIZE_MAX) + return LZMA_DATA_ERROR; + + if (g != NULL && g->last + 1 < g->allocated) { + // There is space in the last group at least for one Record. + ++g->last; + } else { + // We need to allocate a new group. + g = lzma_alloc(sizeof(index_group) + + i->prealloc * sizeof(index_record), + allocator); + if (g == NULL) + return LZMA_MEM_ERROR; + + g->last = 0; + g->allocated = i->prealloc; + + // Reset prealloc so that if the application happens to + // add new Records, the allocation size will be sane. + i->prealloc = INDEX_GROUP_SIZE; + + // Set the start offsets of this group. + g->node.uncompressed_base = uncompressed_base; + g->node.compressed_base = compressed_base; + g->number_base = s->record_count + 1; + + // Add the new group to the Stream. + index_tree_append(&s->groups, &g->node); + } + + // Add the new Record to the group. + g->records[g->last].uncompressed_sum + = uncompressed_base + uncompressed_size; + g->records[g->last].unpadded_sum + = compressed_base + unpadded_size; + + // Update the totals. + ++s->record_count; + s->index_list_size += index_list_size_add; + + i->total_size += vli_ceil4(unpadded_size); + i->uncompressed_size += uncompressed_size; + ++i->record_count; + i->index_list_size += index_list_size_add; + + return LZMA_OK; +} + + +/// Structure to pass info to index_cat_helper() +typedef struct { + /// Uncompressed size of the destination + lzma_vli uncompressed_size; + + /// Compressed file size of the destination + lzma_vli file_size; + + /// Same as above but for Block numbers + lzma_vli block_number_add; + + /// Number of Streams that were in the destination index before we + /// started appending new Streams from the source index. This is + /// used to fix the Stream numbering. + uint32_t stream_number_add; + + /// Destination index' Stream tree + index_tree *streams; + +} index_cat_info; + + +/// Add the Stream nodes from the source index to dest using recursion. +/// Simplest iterative traversal of the source tree wouldn't work, because +/// we update the pointers in nodes when moving them to the destination tree. +static void +index_cat_helper(const index_cat_info *info, index_stream *this) +{ + index_stream *left = (index_stream *)(this->node.left); + index_stream *right = (index_stream *)(this->node.right); + + if (left != NULL) + index_cat_helper(info, left); + + this->node.uncompressed_base += info->uncompressed_size; + this->node.compressed_base += info->file_size; + this->number += info->stream_number_add; + this->block_number_base += info->block_number_add; + index_tree_append(info->streams, &this->node); + + if (right != NULL) + index_cat_helper(info, right); + + return; +} + + +extern LZMA_API(lzma_ret) +lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, + lzma_allocator *allocator) +{ + const lzma_vli dest_file_size = lzma_index_file_size(dest); + + // Check that we don't exceed the file size limits. + if (dest_file_size + lzma_index_file_size(src) > LZMA_VLI_MAX + || dest->uncompressed_size + src->uncompressed_size + > LZMA_VLI_MAX) + return LZMA_DATA_ERROR; + + // Check that the encoded size of the combined lzma_indexes stays + // within limits. In theory, this should be done only if we know + // that the user plans to actually combine the Streams and thus + // construct a single Index (probably rare). However, exceeding + // this limit is quite theoretical, so we do this check always + // to simplify things elsewhere. + { + const lzma_vli dest_size = index_size_unpadded( + dest->record_count, dest->index_list_size); + const lzma_vli src_size = index_size_unpadded( + src->record_count, src->index_list_size); + if (vli_ceil4(dest_size + src_size) > LZMA_BACKWARD_SIZE_MAX) + return LZMA_DATA_ERROR; + } + + // Optimize the last group to minimize memory usage. Allocation has + // to be done before modifying dest or src. + { + index_stream *s = (index_stream *)(dest->streams.rightmost); + index_group *g = (index_group *)(s->groups.rightmost); + if (g != NULL && g->last + 1 < g->allocated) { + assert(g->node.left == NULL); + assert(g->node.right == NULL); + + index_group *newg = lzma_alloc(sizeof(index_group) + + (g->last + 1) + * sizeof(index_record), + allocator); + if (newg == NULL) + return LZMA_MEM_ERROR; + + newg->node = g->node; + newg->allocated = g->last + 1; + newg->last = g->last; + newg->number_base = g->number_base; + + memcpy(newg->records, g->records, newg->allocated + * sizeof(index_record)); + + if (g->node.parent != NULL) { + assert(g->node.parent->right == &g->node); + g->node.parent->right = &newg->node; + } + + if (s->groups.leftmost == &g->node) { + assert(s->groups.root == &g->node); + s->groups.leftmost = &newg->node; + s->groups.root = &newg->node; + } + + if (s->groups.rightmost == &g->node) + s->groups.rightmost = &newg->node; + + lzma_free(g, allocator); + } + } + + // Add all the Streams from src to dest. Update the base offsets + // of each Stream from src. + const index_cat_info info = { + .uncompressed_size = dest->uncompressed_size, + .file_size = dest_file_size, + .stream_number_add = dest->streams.count, + .block_number_add = dest->record_count, + .streams = &dest->streams, + }; + index_cat_helper(&info, (index_stream *)(src->streams.root)); + + // Update info about all the combined Streams. + dest->uncompressed_size += src->uncompressed_size; + dest->total_size += src->total_size; + dest->record_count += src->record_count; + dest->index_list_size += src->index_list_size; + dest->checks = lzma_index_checks(dest) | src->checks; + + // There's nothing else left in src than the base structure. + lzma_free(src, allocator); + + return LZMA_OK; +} + + +/// Duplicate an index_stream. +static index_stream * +index_dup_stream(const index_stream *src, lzma_allocator *allocator) +{ + // Catch a somewhat theoretical integer overflow. + if (src->record_count > PREALLOC_MAX) + return NULL; + + // Allocate and initialize a new Stream. + index_stream *dest = index_stream_init(src->node.compressed_base, + src->node.uncompressed_base, src->number, + src->block_number_base, allocator); + + // Return immediately if allocation failed or if there are + // no groups to duplicate. + if (dest == NULL || src->groups.leftmost == NULL) + return dest; + + // Copy the overall information. + dest->record_count = src->record_count; + dest->index_list_size = src->index_list_size; + dest->stream_flags = src->stream_flags; + dest->stream_padding = src->stream_padding; + + // Allocate memory for the Records. We put all the Records into + // a single group. It's simplest and also tends to make + // lzma_index_locate() a little bit faster with very big Indexes. + index_group *destg = lzma_alloc(sizeof(index_group) + + src->record_count * sizeof(index_record), + allocator); + if (destg == NULL) { + index_stream_end(dest, allocator); + return NULL; + } + + // Initialize destg. + destg->node.uncompressed_base = 0; + destg->node.compressed_base = 0; + destg->number_base = 1; + destg->allocated = src->record_count; + destg->last = src->record_count - 1; + + // Go through all the groups in src and copy the Records into destg. + const index_group *srcg = (const index_group *)(src->groups.leftmost); + size_t i = 0; + do { + memcpy(destg->records + i, srcg->records, + (srcg->last + 1) * sizeof(index_record)); + i += srcg->last + 1; + srcg = index_tree_next(&srcg->node); + } while (srcg != NULL); + + assert(i == destg->allocated); + + // Add the group to the new Stream. + index_tree_append(&dest->groups, &destg->node); + + return dest; +} + + +extern LZMA_API(lzma_index *) +lzma_index_dup(const lzma_index *src, lzma_allocator *allocator) +{ + // Allocate the base structure (no initial Stream). + lzma_index *dest = index_init_plain(allocator); + if (dest == NULL) + return NULL; + + // Copy the totals. + dest->uncompressed_size = src->uncompressed_size; + dest->total_size = src->total_size; + dest->record_count = src->record_count; + dest->index_list_size = src->index_list_size; + + // Copy the Streams and the groups in them. + const index_stream *srcstream + = (const index_stream *)(src->streams.leftmost); + do { + index_stream *deststream = index_dup_stream( + srcstream, allocator); + if (deststream == NULL) { + lzma_index_end(dest, allocator); + return NULL; + } + + index_tree_append(&dest->streams, &deststream->node); + + srcstream = index_tree_next(&srcstream->node); + } while (srcstream != NULL); + + return dest; +} + + +/// Indexing for lzma_index_iter.internal[] +enum { + ITER_INDEX, + ITER_STREAM, + ITER_GROUP, + ITER_RECORD, + ITER_METHOD, +}; + + +/// Values for lzma_index_iter.internal[ITER_METHOD].s +enum { + ITER_METHOD_NORMAL, + ITER_METHOD_NEXT, + ITER_METHOD_LEFTMOST, +}; + + +static void +iter_set_info(lzma_index_iter *iter) +{ + const lzma_index *i = iter->internal[ITER_INDEX].p; + const index_stream *stream = iter->internal[ITER_STREAM].p; + const index_group *group = iter->internal[ITER_GROUP].p; + const size_t record = iter->internal[ITER_RECORD].s; + + // lzma_index_iter.internal must not contain a pointer to the last + // group in the index, because that may be reallocated by + // lzma_index_cat(). + if (group == NULL) { + // There are no groups. + assert(stream->groups.root == NULL); + iter->internal[ITER_METHOD].s = ITER_METHOD_LEFTMOST; + + } else if (i->streams.rightmost != &stream->node + || stream->groups.rightmost != &group->node) { + // The group is not not the last group in the index. + iter->internal[ITER_METHOD].s = ITER_METHOD_NORMAL; + + } else if (stream->groups.leftmost != &group->node) { + // The group isn't the only group in the Stream, thus we + // know that it must have a parent group i.e. it's not + // the root node. + assert(stream->groups.root != &group->node); + assert(group->node.parent->right == &group->node); + iter->internal[ITER_METHOD].s = ITER_METHOD_NEXT; + iter->internal[ITER_GROUP].p = group->node.parent; + + } else { + // The Stream has only one group. + assert(stream->groups.root == &group->node); + assert(group->node.parent == NULL); + iter->internal[ITER_METHOD].s = ITER_METHOD_LEFTMOST; + iter->internal[ITER_GROUP].p = NULL; + } + + iter->stream.number = stream->number; + iter->stream.block_count = stream->record_count; + iter->stream.compressed_offset = stream->node.compressed_base; + iter->stream.uncompressed_offset = stream->node.uncompressed_base; + + // iter->stream.flags will be NULL if the Stream Flags haven't been + // set with lzma_index_stream_flags(). + iter->stream.flags = stream->stream_flags.version == UINT32_MAX + ? NULL : &stream->stream_flags; + iter->stream.padding = stream->stream_padding; + + if (stream->groups.rightmost == NULL) { + // Stream has no Blocks. + iter->stream.compressed_size = index_size(0, 0) + + 2 * LZMA_STREAM_HEADER_SIZE; + iter->stream.uncompressed_size = 0; + } else { + const index_group *g = (const index_group *)( + stream->groups.rightmost); + + // Stream Header + Stream Footer + Index + Blocks + iter->stream.compressed_size = 2 * LZMA_STREAM_HEADER_SIZE + + index_size(stream->record_count, + stream->index_list_size) + + vli_ceil4(g->records[g->last].unpadded_sum); + iter->stream.uncompressed_size + = g->records[g->last].uncompressed_sum; + } + + if (group != NULL) { + iter->block.number_in_stream = group->number_base + record; + iter->block.number_in_file = iter->block.number_in_stream + + stream->block_number_base; + + iter->block.compressed_stream_offset + = record == 0 ? group->node.compressed_base + : vli_ceil4(group->records[ + record - 1].unpadded_sum); + iter->block.uncompressed_stream_offset + = record == 0 ? group->node.uncompressed_base + : group->records[record - 1].uncompressed_sum; + + iter->block.uncompressed_size + = group->records[record].uncompressed_sum + - iter->block.uncompressed_stream_offset; + iter->block.unpadded_size + = group->records[record].unpadded_sum + - iter->block.compressed_stream_offset; + iter->block.total_size = vli_ceil4(iter->block.unpadded_size); + + iter->block.compressed_stream_offset + += LZMA_STREAM_HEADER_SIZE; + + iter->block.compressed_file_offset + = iter->block.compressed_stream_offset + + iter->stream.compressed_offset; + iter->block.uncompressed_file_offset + = iter->block.uncompressed_stream_offset + + iter->stream.uncompressed_offset; + } + + return; +} + + +extern LZMA_API(void) +lzma_index_iter_init(lzma_index_iter *iter, const lzma_index *i) +{ + iter->internal[ITER_INDEX].p = i; + lzma_index_iter_rewind(iter); + return; +} + + +extern LZMA_API(void) +lzma_index_iter_rewind(lzma_index_iter *iter) +{ + iter->internal[ITER_STREAM].p = NULL; + iter->internal[ITER_GROUP].p = NULL; + iter->internal[ITER_RECORD].s = 0; + iter->internal[ITER_METHOD].s = ITER_METHOD_NORMAL; + return; +} + + +extern LZMA_API(lzma_bool) +lzma_index_iter_next(lzma_index_iter *iter, lzma_index_iter_mode mode) +{ + // Catch unsupported mode values. + if ((unsigned int)(mode) > LZMA_INDEX_ITER_NONEMPTY_BLOCK) + return true; + + const lzma_index *i = iter->internal[ITER_INDEX].p; + const index_stream *stream = iter->internal[ITER_STREAM].p; + const index_group *group = NULL; + size_t record = iter->internal[ITER_RECORD].s; + + // If we are being asked for the next Stream, leave group to NULL + // so that the rest of the this function thinks that this Stream + // has no groups and will thus go to the next Stream. + if (mode != LZMA_INDEX_ITER_STREAM) { + // Get the pointer to the current group. See iter_set_inf() + // for explanation. + switch (iter->internal[ITER_METHOD].s) { + case ITER_METHOD_NORMAL: + group = iter->internal[ITER_GROUP].p; + break; + + case ITER_METHOD_NEXT: + group = index_tree_next(iter->internal[ITER_GROUP].p); + break; + + case ITER_METHOD_LEFTMOST: + group = (const index_group *)( + stream->groups.leftmost); + break; + } + } + +again: + if (stream == NULL) { + // We at the beginning of the lzma_index. + // Locate the first Stream. + stream = (const index_stream *)(i->streams.leftmost); + if (mode >= LZMA_INDEX_ITER_BLOCK) { + // Since we are being asked to return information + // about the first a Block, skip Streams that have + // no Blocks. + while (stream->groups.leftmost == NULL) { + stream = index_tree_next(&stream->node); + if (stream == NULL) + return true; + } + } + + // Start from the first Record in the Stream. + group = (const index_group *)(stream->groups.leftmost); + record = 0; + + } else if (group != NULL && record < group->last) { + // The next Record is in the same group. + ++record; + + } else { + // This group has no more Records or this Stream has + // no Blocks at all. + record = 0; + + // If group is not NULL, this Stream has at least one Block + // and thus at least one group. Find the next group. + if (group != NULL) + group = index_tree_next(&group->node); + + if (group == NULL) { + // This Stream has no more Records. Find the next + // Stream. If we are being asked to return information + // about a Block, we skip empty Streams. + do { + stream = index_tree_next(&stream->node); + if (stream == NULL) + return true; + } while (mode >= LZMA_INDEX_ITER_BLOCK + && stream->groups.leftmost == NULL); + + group = (const index_group *)( + stream->groups.leftmost); + } + } + + if (mode == LZMA_INDEX_ITER_NONEMPTY_BLOCK) { + // We need to look for the next Block again if this Block + // is empty. + if (record == 0) { + if (group->node.uncompressed_base + == group->records[0].uncompressed_sum) + goto again; + } else if (group->records[record - 1].uncompressed_sum + == group->records[record].uncompressed_sum) { + goto again; + } + } + + iter->internal[ITER_STREAM].p = stream; + iter->internal[ITER_GROUP].p = group; + iter->internal[ITER_RECORD].s = record; + + iter_set_info(iter); + + return false; +} + + +extern LZMA_API(lzma_bool) +lzma_index_iter_locate(lzma_index_iter *iter, lzma_vli target) +{ + const lzma_index *i = iter->internal[ITER_INDEX].p; + + // If the target is past the end of the file, return immediately. + if (i->uncompressed_size <= target) + return true; + + // Locate the Stream containing the target offset. + const index_stream *stream = index_tree_locate(&i->streams, target); + assert(stream != NULL); + target -= stream->node.uncompressed_base; + + // Locate the group containing the target offset. + const index_group *group = index_tree_locate(&stream->groups, target); + assert(group != NULL); + + // Use binary search to locate the exact Record. It is the first + // Record whose uncompressed_sum is greater than target. + // This is because we want the rightmost Record that fullfills the + // search criterion. It is possible that there are empty Blocks; + // we don't want to return them. + size_t left = 0; + size_t right = group->last; + + while (left < right) { + const size_t pos = left + (right - left) / 2; + if (group->records[pos].uncompressed_sum <= target) + left = pos + 1; + else + right = pos; + } + + iter->internal[ITER_STREAM].p = stream; + iter->internal[ITER_GROUP].p = group; + iter->internal[ITER_RECORD].s = left; + + iter_set_info(iter); + + return false; +} diff --git a/src/liblzma/common/index.h b/src/liblzma/common/index.h new file mode 100644 index 000000000000..64e97247dd33 --- /dev/null +++ b/src/liblzma/common/index.h @@ -0,0 +1,73 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index.h +/// \brief Handling of Index +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_INDEX_H +#define LZMA_INDEX_H + +#include "common.h" + + +/// Minimum Unpadded Size +#define UNPADDED_SIZE_MIN LZMA_VLI_C(5) + +/// Maximum Unpadded Size +#define UNPADDED_SIZE_MAX (LZMA_VLI_MAX & ~LZMA_VLI_C(3)) + + +/// Get the size of the Index Padding field. This is needed by Index encoder +/// and decoder, but applications should have no use for this. +extern uint32_t lzma_index_padding_size(const lzma_index *i); + + +/// Set for how many Records to allocate memory the next time +/// lzma_index_append() needs to allocate space for a new Record. +/// This is used only by the Index decoder. +extern void lzma_index_prealloc(lzma_index *i, lzma_vli records); + + +/// Round the variable-length integer to the next multiple of four. +static inline lzma_vli +vli_ceil4(lzma_vli vli) +{ + assert(vli <= LZMA_VLI_MAX); + return (vli + 3) & ~LZMA_VLI_C(3); +} + + +/// Calculate the size of the Index field excluding Index Padding +static inline lzma_vli +index_size_unpadded(lzma_vli count, lzma_vli index_list_size) +{ + // Index Indicator + Number of Records + List of Records + CRC32 + return 1 + lzma_vli_size(count) + index_list_size + 4; +} + + +/// Calculate the size of the Index field including Index Padding +static inline lzma_vli +index_size(lzma_vli count, lzma_vli index_list_size) +{ + return vli_ceil4(index_size_unpadded(count, index_list_size)); +} + + +/// Calculate the total size of the Stream +static inline lzma_vli +index_stream_size(lzma_vli blocks_size, + lzma_vli count, lzma_vli index_list_size) +{ + return LZMA_STREAM_HEADER_SIZE + blocks_size + + index_size(count, index_list_size) + + LZMA_STREAM_HEADER_SIZE; +} + +#endif diff --git a/src/liblzma/common/index_decoder.c b/src/liblzma/common/index_decoder.c new file mode 100644 index 000000000000..86a22971b064 --- /dev/null +++ b/src/liblzma/common/index_decoder.c @@ -0,0 +1,343 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index_decoder.c +/// \brief Decodes the Index field +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "index.h" +#include "check.h" + + +struct lzma_coder_s { + enum { + SEQ_INDICATOR, + SEQ_COUNT, + SEQ_MEMUSAGE, + SEQ_UNPADDED, + SEQ_UNCOMPRESSED, + SEQ_PADDING_INIT, + SEQ_PADDING, + SEQ_CRC32, + } sequence; + + /// Memory usage limit + uint64_t memlimit; + + /// Target Index + lzma_index *index; + + /// Pointer give by the application, which is set after + /// successful decoding. + lzma_index **index_ptr; + + /// Number of Records left to decode. + lzma_vli count; + + /// The most recent Unpadded Size field + lzma_vli unpadded_size; + + /// The most recent Uncompressed Size field + lzma_vli uncompressed_size; + + /// Position in integers + size_t pos; + + /// CRC32 of the List of Records field + uint32_t crc32; +}; + + +static lzma_ret +index_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out lzma_attribute((unused)), + size_t *restrict out_pos lzma_attribute((unused)), + size_t out_size lzma_attribute((unused)), + lzma_action action lzma_attribute((unused))) +{ + // Similar optimization as in index_encoder.c + const size_t in_start = *in_pos; + lzma_ret ret = LZMA_OK; + + while (*in_pos < in_size) + switch (coder->sequence) { + case SEQ_INDICATOR: + // Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or + // LZMA_FORMAT_ERROR, because a typical usage case for Index + // decoder is when parsing the Stream backwards. If seeking + // backward from the Stream Footer gives us something that + // doesn't begin with Index Indicator, the file is considered + // corrupt, not "programming error" or "unrecognized file + // format". One could argue that the application should + // verify the Index Indicator before trying to decode the + // Index, but well, I suppose it is simpler this way. + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + + coder->sequence = SEQ_COUNT; + break; + + case SEQ_COUNT: + ret = lzma_vli_decode(&coder->count, &coder->pos, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + coder->pos = 0; + coder->sequence = SEQ_MEMUSAGE; + + // Fall through + + case SEQ_MEMUSAGE: + if (lzma_index_memusage(1, coder->count) > coder->memlimit) { + ret = LZMA_MEMLIMIT_ERROR; + goto out; + } + + // Tell the Index handling code how many Records this + // Index has to allow it to allocate memory more efficiently. + lzma_index_prealloc(coder->index, coder->count); + + ret = LZMA_OK; + coder->sequence = coder->count == 0 + ? SEQ_PADDING_INIT : SEQ_UNPADDED; + break; + + case SEQ_UNPADDED: + case SEQ_UNCOMPRESSED: { + lzma_vli *size = coder->sequence == SEQ_UNPADDED + ? &coder->unpadded_size + : &coder->uncompressed_size; + + ret = lzma_vli_decode(size, &coder->pos, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + coder->pos = 0; + + if (coder->sequence == SEQ_UNPADDED) { + // Validate that encoded Unpadded Size isn't too small + // or too big. + if (coder->unpadded_size < UNPADDED_SIZE_MIN + || coder->unpadded_size + > UNPADDED_SIZE_MAX) + return LZMA_DATA_ERROR; + + coder->sequence = SEQ_UNCOMPRESSED; + } else { + // Add the decoded Record to the Index. + return_if_error(lzma_index_append( + coder->index, allocator, + coder->unpadded_size, + coder->uncompressed_size)); + + // Check if this was the last Record. + coder->sequence = --coder->count == 0 + ? SEQ_PADDING_INIT + : SEQ_UNPADDED; + } + + break; + } + + case SEQ_PADDING_INIT: + coder->pos = lzma_index_padding_size(coder->index); + coder->sequence = SEQ_PADDING; + + // Fall through + + case SEQ_PADDING: + if (coder->pos > 0) { + --coder->pos; + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + + break; + } + + // Finish the CRC32 calculation. + coder->crc32 = lzma_crc32(in + in_start, + *in_pos - in_start, coder->crc32); + + coder->sequence = SEQ_CRC32; + + // Fall through + + case SEQ_CRC32: + do { + if (*in_pos == in_size) + return LZMA_OK; + + if (((coder->crc32 >> (coder->pos * 8)) & 0xFF) + != in[(*in_pos)++]) + return LZMA_DATA_ERROR; + + } while (++coder->pos < 4); + + // Decoding was successful, now we can let the application + // see the decoded Index. + *coder->index_ptr = coder->index; + + // Make index NULL so we don't free it unintentionally. + coder->index = NULL; + + return LZMA_STREAM_END; + + default: + assert(0); + return LZMA_PROG_ERROR; + } + +out: + // Update the CRC32, + coder->crc32 = lzma_crc32(in + in_start, + *in_pos - in_start, coder->crc32); + + return ret; +} + + +static void +index_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_index_end(coder->index, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +index_decoder_memconfig(lzma_coder *coder, uint64_t *memusage, + uint64_t *old_memlimit, uint64_t new_memlimit) +{ + *memusage = lzma_index_memusage(1, coder->count); + *old_memlimit = coder->memlimit; + + if (new_memlimit != 0) { + if (new_memlimit < *memusage) + return LZMA_MEMLIMIT_ERROR; + + coder->memlimit = new_memlimit; + } + + return LZMA_OK; +} + + +static lzma_ret +index_decoder_reset(lzma_coder *coder, lzma_allocator *allocator, + lzma_index **i, uint64_t memlimit) +{ + // Remember the pointer given by the application. We will set it + // to point to the decoded Index only if decoding is successful. + // Before that, keep it NULL so that applications can always safely + // pass it to lzma_index_end() no matter did decoding succeed or not. + coder->index_ptr = i; + *i = NULL; + + // We always allocate a new lzma_index. + coder->index = lzma_index_init(allocator); + if (coder->index == NULL) + return LZMA_MEM_ERROR; + + // Initialize the rest. + coder->sequence = SEQ_INDICATOR; + coder->memlimit = memlimit; + coder->count = 0; // Needs to be initialized due to _memconfig(). + coder->pos = 0; + coder->crc32 = 0; + + return LZMA_OK; +} + + +static lzma_ret +index_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_index **i, uint64_t memlimit) +{ + lzma_next_coder_init(&index_decoder_init, next, allocator); + + if (i == NULL || memlimit == 0) + return LZMA_PROG_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &index_decode; + next->end = &index_decoder_end; + next->memconfig = &index_decoder_memconfig; + next->coder->index = NULL; + } else { + lzma_index_end(next->coder->index, allocator); + } + + return index_decoder_reset(next->coder, allocator, i, memlimit); +} + + +extern LZMA_API(lzma_ret) +lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit) +{ + lzma_next_strm_init(index_decoder_init, strm, i, memlimit); + + strm->internal->supported_actions[LZMA_RUN] = true; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_index_buffer_decode( + lzma_index **i, uint64_t *memlimit, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size) +{ + // Sanity checks + if (i == NULL || memlimit == NULL + || in == NULL || in_pos == NULL || *in_pos > in_size) + return LZMA_PROG_ERROR; + + // Initialize the decoder. + lzma_coder coder; + return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit)); + + // Store the input start position so that we can restore it in case + // of an error. + const size_t in_start = *in_pos; + + // Do the actual decoding. + lzma_ret ret = index_decode(&coder, allocator, in, in_pos, in_size, + NULL, NULL, 0, LZMA_RUN); + + if (ret == LZMA_STREAM_END) { + ret = LZMA_OK; + } else { + // Something went wrong, free the Index structure and restore + // the input position. + lzma_index_end(coder.index, allocator); + *in_pos = in_start; + + if (ret == LZMA_OK) { + // The input is truncated or otherwise corrupt. + // Use LZMA_DATA_ERROR instead of LZMA_BUF_ERROR + // like lzma_vli_decode() does in single-call mode. + ret = LZMA_DATA_ERROR; + + } else if (ret == LZMA_MEMLIMIT_ERROR) { + // Tell the caller how much memory would have + // been needed. + *memlimit = lzma_index_memusage(1, coder.count); + } + } + + return ret; +} diff --git a/src/liblzma/common/index_encoder.c b/src/liblzma/common/index_encoder.c new file mode 100644 index 000000000000..706f1fd79905 --- /dev/null +++ b/src/liblzma/common/index_encoder.c @@ -0,0 +1,252 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index_encoder.c +/// \brief Encodes the Index field +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "index_encoder.h" +#include "index.h" +#include "check.h" + + +struct lzma_coder_s { + enum { + SEQ_INDICATOR, + SEQ_COUNT, + SEQ_UNPADDED, + SEQ_UNCOMPRESSED, + SEQ_NEXT, + SEQ_PADDING, + SEQ_CRC32, + } sequence; + + /// Index being encoded + const lzma_index *index; + + /// Iterator for the Index being encoded + lzma_index_iter iter; + + /// Position in integers + size_t pos; + + /// CRC32 of the List of Records field + uint32_t crc32; +}; + + +static lzma_ret +index_encode(lzma_coder *coder, + lzma_allocator *allocator lzma_attribute((unused)), + const uint8_t *restrict in lzma_attribute((unused)), + size_t *restrict in_pos lzma_attribute((unused)), + size_t in_size lzma_attribute((unused)), + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size, lzma_action action lzma_attribute((unused))) +{ + // Position where to start calculating CRC32. The idea is that we + // need to call lzma_crc32() only once per call to index_encode(). + const size_t out_start = *out_pos; + + // Return value to use if we return at the end of this function. + // We use "goto out" to jump out of the while-switch construct + // instead of returning directly, because that way we don't need + // to copypaste the lzma_crc32() call to many places. + lzma_ret ret = LZMA_OK; + + while (*out_pos < out_size) + switch (coder->sequence) { + case SEQ_INDICATOR: + out[*out_pos] = 0x00; + ++*out_pos; + coder->sequence = SEQ_COUNT; + break; + + case SEQ_COUNT: { + const lzma_vli count = lzma_index_block_count(coder->index); + ret = lzma_vli_encode(count, &coder->pos, + out, out_pos, out_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + coder->pos = 0; + coder->sequence = SEQ_NEXT; + break; + } + + case SEQ_NEXT: + if (lzma_index_iter_next( + &coder->iter, LZMA_INDEX_ITER_BLOCK)) { + // Get the size of the Index Padding field. + coder->pos = lzma_index_padding_size(coder->index); + assert(coder->pos <= 3); + coder->sequence = SEQ_PADDING; + break; + } + + coder->sequence = SEQ_UNPADDED; + + // Fall through + + case SEQ_UNPADDED: + case SEQ_UNCOMPRESSED: { + const lzma_vli size = coder->sequence == SEQ_UNPADDED + ? coder->iter.block.unpadded_size + : coder->iter.block.uncompressed_size; + + ret = lzma_vli_encode(size, &coder->pos, + out, out_pos, out_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + coder->pos = 0; + + // Advance to SEQ_UNCOMPRESSED or SEQ_NEXT. + ++coder->sequence; + break; + } + + case SEQ_PADDING: + if (coder->pos > 0) { + --coder->pos; + out[(*out_pos)++] = 0x00; + break; + } + + // Finish the CRC32 calculation. + coder->crc32 = lzma_crc32(out + out_start, + *out_pos - out_start, coder->crc32); + + coder->sequence = SEQ_CRC32; + + // Fall through + + case SEQ_CRC32: + // We don't use the main loop, because we don't want + // coder->crc32 to be touched anymore. + do { + if (*out_pos == out_size) + return LZMA_OK; + + out[*out_pos] = (coder->crc32 >> (coder->pos * 8)) + & 0xFF; + ++*out_pos; + + } while (++coder->pos < 4); + + return LZMA_STREAM_END; + + default: + assert(0); + return LZMA_PROG_ERROR; + } + +out: + // Update the CRC32. + coder->crc32 = lzma_crc32(out + out_start, + *out_pos - out_start, coder->crc32); + + return ret; +} + + +static void +index_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_free(coder, allocator); + return; +} + + +static void +index_encoder_reset(lzma_coder *coder, const lzma_index *i) +{ + lzma_index_iter_init(&coder->iter, i); + + coder->sequence = SEQ_INDICATOR; + coder->index = i; + coder->pos = 0; + coder->crc32 = 0; + + return; +} + + +extern lzma_ret +lzma_index_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_index *i) +{ + lzma_next_coder_init(&lzma_index_encoder_init, next, allocator); + + if (i == NULL) + return LZMA_PROG_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &index_encode; + next->end = &index_encoder_end; + } + + index_encoder_reset(next->coder, i); + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_index_encoder(lzma_stream *strm, const lzma_index *i) +{ + lzma_next_strm_init(lzma_index_encoder_init, strm, i); + + strm->internal->supported_actions[LZMA_RUN] = true; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_index_buffer_encode(const lzma_index *i, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + // Validate the arguments. + if (i == NULL || out == NULL || out_pos == NULL || *out_pos > out_size) + return LZMA_PROG_ERROR; + + // Don't try to encode if there's not enough output space. + if (out_size - *out_pos < lzma_index_size(i)) + return LZMA_BUF_ERROR; + + // The Index encoder needs just one small data structure so we can + // allocate it on stack. + lzma_coder coder; + index_encoder_reset(&coder, i); + + // Do the actual encoding. This should never fail, but store + // the original *out_pos just in case. + const size_t out_start = *out_pos; + lzma_ret ret = index_encode(&coder, NULL, NULL, NULL, 0, + out, out_pos, out_size, LZMA_RUN); + + if (ret == LZMA_STREAM_END) { + ret = LZMA_OK; + } else { + // We should never get here, but just in case, restore the + // output position and set the error accordingly if something + // goes wrong and debugging isn't enabled. + assert(0); + *out_pos = out_start; + ret = LZMA_PROG_ERROR; + } + + return ret; +} diff --git a/src/liblzma/common/index_encoder.h b/src/liblzma/common/index_encoder.h new file mode 100644 index 000000000000..a13c94dcd077 --- /dev/null +++ b/src/liblzma/common/index_encoder.h @@ -0,0 +1,23 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index_encoder.h +/// \brief Encodes the Index field +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_INDEX_ENCODER_H +#define LZMA_INDEX_ENCODER_H + +#include "common.h" + + +extern lzma_ret lzma_index_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_index *i); + + +#endif diff --git a/src/liblzma/common/index_hash.c b/src/liblzma/common/index_hash.c new file mode 100644 index 000000000000..e3e9386ae8e3 --- /dev/null +++ b/src/liblzma/common/index_hash.c @@ -0,0 +1,332 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index_hash.c +/// \brief Validates Index by using a hash function +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" +#include "index.h" +#include "check.h" + + +typedef struct { + /// Sum of the Block sizes (including Block Padding) + lzma_vli blocks_size; + + /// Sum of the Uncompressed Size fields + lzma_vli uncompressed_size; + + /// Number of Records + lzma_vli count; + + /// Size of the List of Index Records as bytes + lzma_vli index_list_size; + + /// Check calculated from Unpadded Sizes and Uncompressed Sizes. + lzma_check_state check; + +} lzma_index_hash_info; + + +struct lzma_index_hash_s { + enum { + SEQ_BLOCK, + SEQ_COUNT, + SEQ_UNPADDED, + SEQ_UNCOMPRESSED, + SEQ_PADDING_INIT, + SEQ_PADDING, + SEQ_CRC32, + } sequence; + + /// Information collected while decoding the actual Blocks. + lzma_index_hash_info blocks; + + /// Information collected from the Index field. + lzma_index_hash_info records; + + /// Number of Records not fully decoded + lzma_vli remaining; + + /// Unpadded Size currently being read from an Index Record. + lzma_vli unpadded_size; + + /// Uncompressed Size currently being read from an Index Record. + lzma_vli uncompressed_size; + + /// Position in variable-length integers when decoding them from + /// the List of Records. + size_t pos; + + /// CRC32 of the Index + uint32_t crc32; +}; + + +extern LZMA_API(lzma_index_hash *) +lzma_index_hash_init(lzma_index_hash *index_hash, lzma_allocator *allocator) +{ + if (index_hash == NULL) { + index_hash = lzma_alloc(sizeof(lzma_index_hash), allocator); + if (index_hash == NULL) + return NULL; + } + + index_hash->sequence = SEQ_BLOCK; + index_hash->blocks.blocks_size = 0; + index_hash->blocks.uncompressed_size = 0; + index_hash->blocks.count = 0; + index_hash->blocks.index_list_size = 0; + index_hash->records.blocks_size = 0; + index_hash->records.uncompressed_size = 0; + index_hash->records.count = 0; + index_hash->records.index_list_size = 0; + index_hash->unpadded_size = 0; + index_hash->uncompressed_size = 0; + index_hash->pos = 0; + index_hash->crc32 = 0; + + // These cannot fail because LZMA_CHECK_BEST is known to be supported. + (void)lzma_check_init(&index_hash->blocks.check, LZMA_CHECK_BEST); + (void)lzma_check_init(&index_hash->records.check, LZMA_CHECK_BEST); + + return index_hash; +} + + +extern LZMA_API(void) +lzma_index_hash_end(lzma_index_hash *index_hash, lzma_allocator *allocator) +{ + lzma_free(index_hash, allocator); + return; +} + + +extern LZMA_API(lzma_vli) +lzma_index_hash_size(const lzma_index_hash *index_hash) +{ + // Get the size of the Index from ->blocks instead of ->records for + // cases where application wants to know the Index Size before + // decoding the Index. + return index_size(index_hash->blocks.count, + index_hash->blocks.index_list_size); +} + + +/// Updates the sizes and the hash without any validation. +static lzma_ret +hash_append(lzma_index_hash_info *info, lzma_vli unpadded_size, + lzma_vli uncompressed_size) +{ + info->blocks_size += vli_ceil4(unpadded_size); + info->uncompressed_size += uncompressed_size; + info->index_list_size += lzma_vli_size(unpadded_size) + + lzma_vli_size(uncompressed_size); + ++info->count; + + const lzma_vli sizes[2] = { unpadded_size, uncompressed_size }; + lzma_check_update(&info->check, LZMA_CHECK_BEST, + (const uint8_t *)(sizes), sizeof(sizes)); + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli unpadded_size, + lzma_vli uncompressed_size) +{ + // Validate the arguments. + if (index_hash->sequence != SEQ_BLOCK + || unpadded_size < UNPADDED_SIZE_MIN + || unpadded_size > UNPADDED_SIZE_MAX + || uncompressed_size > LZMA_VLI_MAX) + return LZMA_PROG_ERROR; + + // Update the hash. + return_if_error(hash_append(&index_hash->blocks, + unpadded_size, uncompressed_size)); + + // Validate the properties of *info are still in allowed limits. + if (index_hash->blocks.blocks_size > LZMA_VLI_MAX + || index_hash->blocks.uncompressed_size > LZMA_VLI_MAX + || index_size(index_hash->blocks.count, + index_hash->blocks.index_list_size) + > LZMA_BACKWARD_SIZE_MAX + || index_stream_size(index_hash->blocks.blocks_size, + index_hash->blocks.count, + index_hash->blocks.index_list_size) + > LZMA_VLI_MAX) + return LZMA_DATA_ERROR; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in, + size_t *in_pos, size_t in_size) +{ + // Catch zero input buffer here, because in contrast to Index encoder + // and decoder functions, applications call this function directly + // instead of via lzma_code(), which does the buffer checking. + if (*in_pos >= in_size) + return LZMA_BUF_ERROR; + + // NOTE: This function has many similarities to index_encode() and + // index_decode() functions found from index_encoder.c and + // index_decoder.c. See the comments especially in index_encoder.c. + const size_t in_start = *in_pos; + lzma_ret ret = LZMA_OK; + + while (*in_pos < in_size) + switch (index_hash->sequence) { + case SEQ_BLOCK: + // Check the Index Indicator is present. + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + + index_hash->sequence = SEQ_COUNT; + break; + + case SEQ_COUNT: { + ret = lzma_vli_decode(&index_hash->remaining, + &index_hash->pos, in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + // The count must match the count of the Blocks decoded. + if (index_hash->remaining != index_hash->blocks.count) + return LZMA_DATA_ERROR; + + ret = LZMA_OK; + index_hash->pos = 0; + + // Handle the special case when there are no Blocks. + index_hash->sequence = index_hash->remaining == 0 + ? SEQ_PADDING_INIT : SEQ_UNPADDED; + break; + } + + case SEQ_UNPADDED: + case SEQ_UNCOMPRESSED: { + lzma_vli *size = index_hash->sequence == SEQ_UNPADDED + ? &index_hash->unpadded_size + : &index_hash->uncompressed_size; + + ret = lzma_vli_decode(size, &index_hash->pos, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + index_hash->pos = 0; + + if (index_hash->sequence == SEQ_UNPADDED) { + if (index_hash->unpadded_size < UNPADDED_SIZE_MIN + || index_hash->unpadded_size + > UNPADDED_SIZE_MAX) + return LZMA_DATA_ERROR; + + index_hash->sequence = SEQ_UNCOMPRESSED; + } else { + // Update the hash. + return_if_error(hash_append(&index_hash->records, + index_hash->unpadded_size, + index_hash->uncompressed_size)); + + // Verify that we don't go over the known sizes. Note + // that this validation is simpler than the one used + // in lzma_index_hash_append(), because here we know + // that values in index_hash->blocks are already + // validated and we are fine as long as we don't + // exceed them in index_hash->records. + if (index_hash->blocks.blocks_size + < index_hash->records.blocks_size + || index_hash->blocks.uncompressed_size + < index_hash->records.uncompressed_size + || index_hash->blocks.index_list_size + < index_hash->records.index_list_size) + return LZMA_DATA_ERROR; + + // Check if this was the last Record. + index_hash->sequence = --index_hash->remaining == 0 + ? SEQ_PADDING_INIT : SEQ_UNPADDED; + } + + break; + } + + case SEQ_PADDING_INIT: + index_hash->pos = (LZMA_VLI_C(4) - index_size_unpadded( + index_hash->records.count, + index_hash->records.index_list_size)) & 3; + index_hash->sequence = SEQ_PADDING; + + // Fall through + + case SEQ_PADDING: + if (index_hash->pos > 0) { + --index_hash->pos; + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + + break; + } + + // Compare the sizes. + if (index_hash->blocks.blocks_size + != index_hash->records.blocks_size + || index_hash->blocks.uncompressed_size + != index_hash->records.uncompressed_size + || index_hash->blocks.index_list_size + != index_hash->records.index_list_size) + return LZMA_DATA_ERROR; + + // Finish the hashes and compare them. + lzma_check_finish(&index_hash->blocks.check, LZMA_CHECK_BEST); + lzma_check_finish(&index_hash->records.check, LZMA_CHECK_BEST); + if (memcmp(index_hash->blocks.check.buffer.u8, + index_hash->records.check.buffer.u8, + lzma_check_size(LZMA_CHECK_BEST)) != 0) + return LZMA_DATA_ERROR; + + // Finish the CRC32 calculation. + index_hash->crc32 = lzma_crc32(in + in_start, + *in_pos - in_start, index_hash->crc32); + + index_hash->sequence = SEQ_CRC32; + + // Fall through + + case SEQ_CRC32: + do { + if (*in_pos == in_size) + return LZMA_OK; + + if (((index_hash->crc32 >> (index_hash->pos * 8)) + & 0xFF) != in[(*in_pos)++]) + return LZMA_DATA_ERROR; + + } while (++index_hash->pos < 4); + + return LZMA_STREAM_END; + + default: + assert(0); + return LZMA_PROG_ERROR; + } + +out: + // Update the CRC32, + index_hash->crc32 = lzma_crc32(in + in_start, + *in_pos - in_start, index_hash->crc32); + + return ret; +} diff --git a/src/liblzma/common/stream_buffer_decoder.c b/src/liblzma/common/stream_buffer_decoder.c new file mode 100644 index 000000000000..ae753155180d --- /dev/null +++ b/src/liblzma/common/stream_buffer_decoder.c @@ -0,0 +1,91 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_buffer_decoder.c +/// \brief Single-call .xz Stream decoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_decoder.h" + + +extern LZMA_API(lzma_ret) +lzma_stream_buffer_decode(uint64_t *memlimit, uint32_t flags, + lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + // Sanity checks + if (in_pos == NULL || (in == NULL && *in_pos != in_size) + || *in_pos > in_size || out_pos == NULL + || (out == NULL && *out_pos != out_size) + || *out_pos > out_size) + return LZMA_PROG_ERROR; + + // Catch flags that are not allowed in buffer-to-buffer decoding. + if (flags & LZMA_TELL_ANY_CHECK) + return LZMA_PROG_ERROR; + + // Initialize the Stream decoder. + // TODO: We need something to tell the decoder that it can use the + // output buffer as workspace, and thus save significant amount of RAM. + lzma_next_coder stream_decoder = LZMA_NEXT_CODER_INIT; + lzma_ret ret = lzma_stream_decoder_init( + &stream_decoder, allocator, *memlimit, flags); + + if (ret == LZMA_OK) { + // Save the positions so that we can restore them in case + // an error occurs. + const size_t in_start = *in_pos; + const size_t out_start = *out_pos; + + // Do the actual decoding. + ret = stream_decoder.code(stream_decoder.coder, allocator, + in, in_pos, in_size, out, out_pos, out_size, + LZMA_FINISH); + + if (ret == LZMA_STREAM_END) { + ret = LZMA_OK; + } else { + // Something went wrong, restore the positions. + *in_pos = in_start; + *out_pos = out_start; + + if (ret == LZMA_OK) { + // Either the input was truncated or the + // output buffer was too small. + assert(*in_pos == in_size + || *out_pos == out_size); + + // If all the input was consumed, then the + // input is truncated, even if the output + // buffer is also full. This is because + // processing the last byte of the Stream + // never produces output. + if (*in_pos == in_size) + ret = LZMA_DATA_ERROR; + else + ret = LZMA_BUF_ERROR; + + } else if (ret == LZMA_MEMLIMIT_ERROR) { + // Let the caller know how much memory would + // have been needed. + uint64_t memusage; + (void)stream_decoder.memconfig( + stream_decoder.coder, + memlimit, &memusage, 0); + } + } + } + + // Free the decoder memory. This needs to be done even if + // initialization fails, because the internal API doesn't + // require the initialization function to free its memory on error. + lzma_next_end(&stream_decoder, allocator); + + return ret; +} diff --git a/src/liblzma/common/stream_buffer_encoder.c b/src/liblzma/common/stream_buffer_encoder.c new file mode 100644 index 000000000000..bbafaa6d01d9 --- /dev/null +++ b/src/liblzma/common/stream_buffer_encoder.c @@ -0,0 +1,131 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_buffer_encoder.c +/// \brief Single-call .xz Stream encoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "index.h" + + +/// Maximum size of Index that has exactly one Record. +/// Index Indicator + Number of Records + Record + CRC32 rounded up to +/// the next multiple of four. +#define INDEX_BOUND ((1 + 1 + 2 * LZMA_VLI_BYTES_MAX + 4 + 3) & ~3) + +/// Stream Header, Stream Footer, and Index +#define HEADERS_BOUND (2 * LZMA_STREAM_HEADER_SIZE + INDEX_BOUND) + + +extern LZMA_API(size_t) +lzma_stream_buffer_bound(size_t uncompressed_size) +{ + // Get the maximum possible size of a Block. + const size_t block_bound = lzma_block_buffer_bound(uncompressed_size); + if (block_bound == 0) + return 0; + + // Catch the possible integer overflow and also prevent the size of + // the Stream exceeding LZMA_VLI_MAX (theoretically possible on + // 64-bit systems). + if (MIN(SIZE_MAX, LZMA_VLI_MAX) - block_bound < HEADERS_BOUND) + return 0; + + return block_bound + HEADERS_BOUND; +} + + +extern LZMA_API(lzma_ret) +lzma_stream_buffer_encode(lzma_filter *filters, lzma_check check, + lzma_allocator *allocator, const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos_ptr, size_t out_size) +{ + // Sanity checks + if (filters == NULL || (unsigned int)(check) > LZMA_CHECK_ID_MAX + || (in == NULL && in_size != 0) || out == NULL + || out_pos_ptr == NULL || *out_pos_ptr > out_size) + return LZMA_PROG_ERROR; + + // Note for the paranoids: Index encoder prevents the Stream from + // getting too big and still being accepted with LZMA_OK, and Block + // encoder catches if the input is too big. So we don't need to + // separately check if the buffers are too big. + + // Use a local copy. We update *out_pos_ptr only if everything + // succeeds. + size_t out_pos = *out_pos_ptr; + + // Check that there's enough space for both Stream Header and + // Stream Footer. + if (out_size - out_pos <= 2 * LZMA_STREAM_HEADER_SIZE) + return LZMA_BUF_ERROR; + + // Reserve space for Stream Footer so we don't need to check for + // available space again before encoding Stream Footer. + out_size -= LZMA_STREAM_HEADER_SIZE; + + // Encode the Stream Header. + lzma_stream_flags stream_flags = { + .version = 0, + .check = check, + }; + + if (lzma_stream_header_encode(&stream_flags, out + out_pos) + != LZMA_OK) + return LZMA_PROG_ERROR; + + out_pos += LZMA_STREAM_HEADER_SIZE; + + // Block + lzma_block block = { + .version = 0, + .check = check, + .filters = filters, + }; + + return_if_error(lzma_block_buffer_encode(&block, allocator, + in, in_size, out, &out_pos, out_size)); + + // Index + { + // Create an Index with one Record. + lzma_index *i = lzma_index_init(allocator); + if (i == NULL) + return LZMA_MEM_ERROR; + + lzma_ret ret = lzma_index_append(i, allocator, + lzma_block_unpadded_size(&block), + block.uncompressed_size); + + // If adding the Record was successful, encode the Index + // and get its size which will be stored into Stream Footer. + if (ret == LZMA_OK) { + ret = lzma_index_buffer_encode( + i, out, &out_pos, out_size); + + stream_flags.backward_size = lzma_index_size(i); + } + + lzma_index_end(i, allocator); + + if (ret != LZMA_OK) + return ret; + } + + // Stream Footer. We have already reserved space for this. + if (lzma_stream_footer_encode(&stream_flags, out + out_pos) + != LZMA_OK) + return LZMA_PROG_ERROR; + + out_pos += LZMA_STREAM_HEADER_SIZE; + + // Everything went fine, make the new output position available + // to the application. + *out_pos_ptr = out_pos; + return LZMA_OK; +} diff --git a/src/liblzma/common/stream_decoder.c b/src/liblzma/common/stream_decoder.c new file mode 100644 index 000000000000..37ea71edbd71 --- /dev/null +++ b/src/liblzma/common/stream_decoder.c @@ -0,0 +1,451 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_decoder.c +/// \brief Decodes .xz Streams +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_decoder.h" +#include "block_decoder.h" + + +struct lzma_coder_s { + enum { + SEQ_STREAM_HEADER, + SEQ_BLOCK_HEADER, + SEQ_BLOCK, + SEQ_INDEX, + SEQ_STREAM_FOOTER, + SEQ_STREAM_PADDING, + } sequence; + + /// Block or Metadata decoder. This takes little memory and the same + /// data structure can be used to decode every Block Header, so it's + /// a good idea to have a separate lzma_next_coder structure for it. + lzma_next_coder block_decoder; + + /// Block options decoded by the Block Header decoder and used by + /// the Block decoder. + lzma_block block_options; + + /// Stream Flags from Stream Header + lzma_stream_flags stream_flags; + + /// Index is hashed so that it can be compared to the sizes of Blocks + /// with O(1) memory usage. + lzma_index_hash *index_hash; + + /// Memory usage limit + uint64_t memlimit; + + /// Amount of memory actually needed (only an estimate) + uint64_t memusage; + + /// If true, LZMA_NO_CHECK is returned if the Stream has + /// no integrity check. + bool tell_no_check; + + /// If true, LZMA_UNSUPPORTED_CHECK is returned if the Stream has + /// an integrity check that isn't supported by this liblzma build. + bool tell_unsupported_check; + + /// If true, LZMA_GET_CHECK is returned after decoding Stream Header. + bool tell_any_check; + + /// If true, we will decode concatenated Streams that possibly have + /// Stream Padding between or after them. LZMA_STREAM_END is returned + /// once the application isn't giving us any new input, and we aren't + /// in the middle of a Stream, and possible Stream Padding is a + /// multiple of four bytes. + bool concatenated; + + /// When decoding concatenated Streams, this is true as long as we + /// are decoding the first Stream. This is needed to avoid misleading + /// LZMA_FORMAT_ERROR in case the later Streams don't have valid magic + /// bytes. + bool first_stream; + + /// Write position in buffer[] and position in Stream Padding + size_t pos; + + /// Buffer to hold Stream Header, Block Header, and Stream Footer. + /// Block Header has biggest maximum size. + uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX]; +}; + + +static lzma_ret +stream_decoder_reset(lzma_coder *coder, lzma_allocator *allocator) +{ + // Initialize the Index hash used to verify the Index. + coder->index_hash = lzma_index_hash_init(coder->index_hash, allocator); + if (coder->index_hash == NULL) + return LZMA_MEM_ERROR; + + // Reset the rest of the variables. + coder->sequence = SEQ_STREAM_HEADER; + coder->pos = 0; + + return LZMA_OK; +} + + +static lzma_ret +stream_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + // When decoding the actual Block, it may be able to produce more + // output even if we don't give it any new input. + while (true) + switch (coder->sequence) { + case SEQ_STREAM_HEADER: { + // Copy the Stream Header to the internal buffer. + lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, + LZMA_STREAM_HEADER_SIZE); + + // Return if we didn't get the whole Stream Header yet. + if (coder->pos < LZMA_STREAM_HEADER_SIZE) + return LZMA_OK; + + coder->pos = 0; + + // Decode the Stream Header. + const lzma_ret ret = lzma_stream_header_decode( + &coder->stream_flags, coder->buffer); + if (ret != LZMA_OK) + return ret == LZMA_FORMAT_ERROR && !coder->first_stream + ? LZMA_DATA_ERROR : ret; + + // If we are decoding concatenated Streams, and the later + // Streams have invalid Header Magic Bytes, we give + // LZMA_DATA_ERROR instead of LZMA_FORMAT_ERROR. + coder->first_stream = false; + + // Copy the type of the Check so that Block Header and Block + // decoders see it. + coder->block_options.check = coder->stream_flags.check; + + // Even if we return LZMA_*_CHECK below, we want + // to continue from Block Header decoding. + coder->sequence = SEQ_BLOCK_HEADER; + + // Detect if there's no integrity check or if it is + // unsupported if those were requested by the application. + if (coder->tell_no_check && coder->stream_flags.check + == LZMA_CHECK_NONE) + return LZMA_NO_CHECK; + + if (coder->tell_unsupported_check + && !lzma_check_is_supported( + coder->stream_flags.check)) + return LZMA_UNSUPPORTED_CHECK; + + if (coder->tell_any_check) + return LZMA_GET_CHECK; + } + + // Fall through + + case SEQ_BLOCK_HEADER: { + if (*in_pos >= in_size) + return LZMA_OK; + + if (coder->pos == 0) { + // Detect if it's Index. + if (in[*in_pos] == 0x00) { + coder->sequence = SEQ_INDEX; + break; + } + + // Calculate the size of the Block Header. Note that + // Block Header decoder wants to see this byte too + // so don't advance *in_pos. + coder->block_options.header_size + = lzma_block_header_size_decode( + in[*in_pos]); + } + + // Copy the Block Header to the internal buffer. + lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, + coder->block_options.header_size); + + // Return if we didn't get the whole Block Header yet. + if (coder->pos < coder->block_options.header_size) + return LZMA_OK; + + coder->pos = 0; + + // Version 0 is currently the only possible version. + coder->block_options.version = 0; + + // Set up a buffer to hold the filter chain. Block Header + // decoder will initialize all members of this array so + // we don't need to do it here. + lzma_filter filters[LZMA_FILTERS_MAX + 1]; + coder->block_options.filters = filters; + + // Decode the Block Header. + return_if_error(lzma_block_header_decode(&coder->block_options, + allocator, coder->buffer)); + + // Check the memory usage limit. + const uint64_t memusage = lzma_raw_decoder_memusage(filters); + lzma_ret ret; + + if (memusage == UINT64_MAX) { + // One or more unknown Filter IDs. + ret = LZMA_OPTIONS_ERROR; + } else { + // Now we can set coder->memusage since we know that + // the filter chain is valid. We don't want + // lzma_memusage() to return UINT64_MAX in case of + // invalid filter chain. + coder->memusage = memusage; + + if (memusage > coder->memlimit) { + // The chain would need too much memory. + ret = LZMA_MEMLIMIT_ERROR; + } else { + // Memory usage is OK. + // Initialize the Block decoder. + ret = lzma_block_decoder_init( + &coder->block_decoder, + allocator, + &coder->block_options); + } + } + + // Free the allocated filter options since they are needed + // only to initialize the Block decoder. + for (size_t i = 0; i < LZMA_FILTERS_MAX; ++i) + lzma_free(filters[i].options, allocator); + + coder->block_options.filters = NULL; + + // Check if memory usage calculation and Block enocoder + // initialization succeeded. + if (ret != LZMA_OK) + return ret; + + coder->sequence = SEQ_BLOCK; + } + + // Fall through + + case SEQ_BLOCK: { + const lzma_ret ret = coder->block_decoder.code( + coder->block_decoder.coder, allocator, + in, in_pos, in_size, out, out_pos, out_size, + action); + + if (ret != LZMA_STREAM_END) + return ret; + + // Block decoded successfully. Add the new size pair to + // the Index hash. + return_if_error(lzma_index_hash_append(coder->index_hash, + lzma_block_unpadded_size( + &coder->block_options), + coder->block_options.uncompressed_size)); + + coder->sequence = SEQ_BLOCK_HEADER; + break; + } + + case SEQ_INDEX: { + // If we don't have any input, don't call + // lzma_index_hash_decode() since it would return + // LZMA_BUF_ERROR, which we must not do here. + if (*in_pos >= in_size) + return LZMA_OK; + + // Decode the Index and compare it to the hash calculated + // from the sizes of the Blocks (if any). + const lzma_ret ret = lzma_index_hash_decode(coder->index_hash, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + return ret; + + coder->sequence = SEQ_STREAM_FOOTER; + } + + // Fall through + + case SEQ_STREAM_FOOTER: { + // Copy the Stream Footer to the internal buffer. + lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, + LZMA_STREAM_HEADER_SIZE); + + // Return if we didn't get the whole Stream Footer yet. + if (coder->pos < LZMA_STREAM_HEADER_SIZE) + return LZMA_OK; + + coder->pos = 0; + + // Decode the Stream Footer. The decoder gives + // LZMA_FORMAT_ERROR if the magic bytes don't match, + // so convert that return code to LZMA_DATA_ERROR. + lzma_stream_flags footer_flags; + const lzma_ret ret = lzma_stream_footer_decode( + &footer_flags, coder->buffer); + if (ret != LZMA_OK) + return ret == LZMA_FORMAT_ERROR + ? LZMA_DATA_ERROR : ret; + + // Check that Index Size stored in the Stream Footer matches + // the real size of the Index field. + if (lzma_index_hash_size(coder->index_hash) + != footer_flags.backward_size) + return LZMA_DATA_ERROR; + + // Compare that the Stream Flags fields are identical in + // both Stream Header and Stream Footer. + return_if_error(lzma_stream_flags_compare( + &coder->stream_flags, &footer_flags)); + + if (!coder->concatenated) + return LZMA_STREAM_END; + + coder->sequence = SEQ_STREAM_PADDING; + } + + // Fall through + + case SEQ_STREAM_PADDING: + assert(coder->concatenated); + + // Skip over possible Stream Padding. + while (true) { + if (*in_pos >= in_size) { + // Unless LZMA_FINISH was used, we cannot + // know if there's more input coming later. + if (action != LZMA_FINISH) + return LZMA_OK; + + // Stream Padding must be a multiple of + // four bytes. + return coder->pos == 0 + ? LZMA_STREAM_END + : LZMA_DATA_ERROR; + } + + // If the byte is not zero, it probably indicates + // beginning of a new Stream (or the file is corrupt). + if (in[*in_pos] != 0x00) + break; + + ++*in_pos; + coder->pos = (coder->pos + 1) & 3; + } + + // Stream Padding must be a multiple of four bytes (empty + // Stream Padding is OK). + if (coder->pos != 0) { + ++*in_pos; + return LZMA_DATA_ERROR; + } + + // Prepare to decode the next Stream. + return_if_error(stream_decoder_reset(coder, allocator)); + break; + + default: + assert(0); + return LZMA_PROG_ERROR; + } + + // Never reached +} + + +static void +stream_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->block_decoder, allocator); + lzma_index_hash_end(coder->index_hash, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_check +stream_decoder_get_check(const lzma_coder *coder) +{ + return coder->stream_flags.check; +} + + +static lzma_ret +stream_decoder_memconfig(lzma_coder *coder, uint64_t *memusage, + uint64_t *old_memlimit, uint64_t new_memlimit) +{ + *memusage = coder->memusage; + *old_memlimit = coder->memlimit; + + if (new_memlimit != 0) { + if (new_memlimit < coder->memusage) + return LZMA_MEMLIMIT_ERROR; + + coder->memlimit = new_memlimit; + } + + return LZMA_OK; +} + + +extern lzma_ret +lzma_stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + uint64_t memlimit, uint32_t flags) +{ + lzma_next_coder_init(&lzma_stream_decoder_init, next, allocator); + + if (memlimit == 0) + return LZMA_PROG_ERROR; + + if (flags & ~LZMA_SUPPORTED_FLAGS) + return LZMA_OPTIONS_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &stream_decode; + next->end = &stream_decoder_end; + next->get_check = &stream_decoder_get_check; + next->memconfig = &stream_decoder_memconfig; + + next->coder->block_decoder = LZMA_NEXT_CODER_INIT; + next->coder->index_hash = NULL; + } + + next->coder->memlimit = memlimit; + next->coder->memusage = LZMA_MEMUSAGE_BASE; + next->coder->tell_no_check = (flags & LZMA_TELL_NO_CHECK) != 0; + next->coder->tell_unsupported_check + = (flags & LZMA_TELL_UNSUPPORTED_CHECK) != 0; + next->coder->tell_any_check = (flags & LZMA_TELL_ANY_CHECK) != 0; + next->coder->concatenated = (flags & LZMA_CONCATENATED) != 0; + next->coder->first_stream = true; + + return stream_decoder_reset(next->coder, allocator); +} + + +extern LZMA_API(lzma_ret) +lzma_stream_decoder(lzma_stream *strm, uint64_t memlimit, uint32_t flags) +{ + lzma_next_strm_init(lzma_stream_decoder_init, strm, memlimit, flags); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} diff --git a/src/liblzma/common/stream_decoder.h b/src/liblzma/common/stream_decoder.h new file mode 100644 index 000000000000..e54ac28f44a5 --- /dev/null +++ b/src/liblzma/common/stream_decoder.h @@ -0,0 +1,21 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_decoder.h +/// \brief Decodes .xz Streams +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_STREAM_DECODER_H +#define LZMA_STREAM_DECODER_H + +#include "common.h" + +extern lzma_ret lzma_stream_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, uint64_t memlimit, uint32_t flags); + +#endif diff --git a/src/liblzma/common/stream_encoder.c b/src/liblzma/common/stream_encoder.c new file mode 100644 index 000000000000..48d91da7933c --- /dev/null +++ b/src/liblzma/common/stream_encoder.c @@ -0,0 +1,331 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_encoder.c +/// \brief Encodes .xz Streams +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_encoder.h" +#include "block_encoder.h" +#include "index_encoder.h" + + +struct lzma_coder_s { + enum { + SEQ_STREAM_HEADER, + SEQ_BLOCK_INIT, + SEQ_BLOCK_HEADER, + SEQ_BLOCK_ENCODE, + SEQ_INDEX_ENCODE, + SEQ_STREAM_FOOTER, + } sequence; + + /// True if Block encoder has been initialized by + /// lzma_stream_encoder_init() or stream_encoder_update() + /// and thus doesn't need to be initialized in stream_encode(). + bool block_encoder_is_initialized; + + /// Block + lzma_next_coder block_encoder; + + /// Options for the Block encoder + lzma_block block_options; + + /// The filter chain currently in use + lzma_filter filters[LZMA_FILTERS_MAX + 1]; + + /// Index encoder. This is separate from Block encoder, because this + /// doesn't take much memory, and when encoding multiple Streams + /// with the same encoding options we avoid reallocating memory. + lzma_next_coder index_encoder; + + /// Index to hold sizes of the Blocks + lzma_index *index; + + /// Read position in buffer[] + size_t buffer_pos; + + /// Total number of bytes in buffer[] + size_t buffer_size; + + /// Buffer to hold Stream Header, Block Header, and Stream Footer. + /// Block Header has biggest maximum size. + uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX]; +}; + + +static lzma_ret +block_encoder_init(lzma_coder *coder, lzma_allocator *allocator) +{ + // Prepare the Block options. Even though Block encoder doesn't need + // compressed_size, uncompressed_size, and header_size to be + // initialized, it is a good idea to do it here, because this way + // we catch if someone gave us Filter ID that cannot be used in + // Blocks/Streams. + coder->block_options.compressed_size = LZMA_VLI_UNKNOWN; + coder->block_options.uncompressed_size = LZMA_VLI_UNKNOWN; + + return_if_error(lzma_block_header_size(&coder->block_options)); + + // Initialize the actual Block encoder. + return lzma_block_encoder_init(&coder->block_encoder, allocator, + &coder->block_options); +} + + +static lzma_ret +stream_encode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + // Main loop + while (*out_pos < out_size) + switch (coder->sequence) { + case SEQ_STREAM_HEADER: + case SEQ_BLOCK_HEADER: + case SEQ_STREAM_FOOTER: + lzma_bufcpy(coder->buffer, &coder->buffer_pos, + coder->buffer_size, out, out_pos, out_size); + if (coder->buffer_pos < coder->buffer_size) + return LZMA_OK; + + if (coder->sequence == SEQ_STREAM_FOOTER) + return LZMA_STREAM_END; + + coder->buffer_pos = 0; + ++coder->sequence; + break; + + case SEQ_BLOCK_INIT: { + if (*in_pos == in_size) { + // If we are requested to flush or finish the current + // Block, return LZMA_STREAM_END immediately since + // there's nothing to do. + if (action != LZMA_FINISH) + return action == LZMA_RUN + ? LZMA_OK : LZMA_STREAM_END; + + // The application had used LZMA_FULL_FLUSH to finish + // the previous Block, but now wants to finish without + // encoding new data, or it is simply creating an + // empty Stream with no Blocks. + // + // Initialize the Index encoder, and continue to + // actually encoding the Index. + return_if_error(lzma_index_encoder_init( + &coder->index_encoder, allocator, + coder->index)); + coder->sequence = SEQ_INDEX_ENCODE; + break; + } + + // Initialize the Block encoder unless it was already + // initialized by lzma_stream_encoder_init() or + // stream_encoder_update(). + if (!coder->block_encoder_is_initialized) + return_if_error(block_encoder_init(coder, allocator)); + + // Make it false so that we don't skip the initialization + // with the next Block. + coder->block_encoder_is_initialized = false; + + // Encode the Block Header. This shouldn't fail since we have + // already initialized the Block encoder. + if (lzma_block_header_encode(&coder->block_options, + coder->buffer) != LZMA_OK) + return LZMA_PROG_ERROR; + + coder->buffer_size = coder->block_options.header_size; + coder->sequence = SEQ_BLOCK_HEADER; + break; + } + + case SEQ_BLOCK_ENCODE: { + static const lzma_action convert[4] = { + LZMA_RUN, + LZMA_SYNC_FLUSH, + LZMA_FINISH, + LZMA_FINISH, + }; + + const lzma_ret ret = coder->block_encoder.code( + coder->block_encoder.coder, allocator, + in, in_pos, in_size, + out, out_pos, out_size, convert[action]); + if (ret != LZMA_STREAM_END || action == LZMA_SYNC_FLUSH) + return ret; + + // Add a new Index Record. + const lzma_vli unpadded_size = lzma_block_unpadded_size( + &coder->block_options); + assert(unpadded_size != 0); + return_if_error(lzma_index_append(coder->index, allocator, + unpadded_size, + coder->block_options.uncompressed_size)); + + coder->sequence = SEQ_BLOCK_INIT; + break; + } + + case SEQ_INDEX_ENCODE: { + // Call the Index encoder. It doesn't take any input, so + // those pointers can be NULL. + const lzma_ret ret = coder->index_encoder.code( + coder->index_encoder.coder, allocator, + NULL, NULL, 0, + out, out_pos, out_size, LZMA_RUN); + if (ret != LZMA_STREAM_END) + return ret; + + // Encode the Stream Footer into coder->buffer. + const lzma_stream_flags stream_flags = { + .version = 0, + .backward_size = lzma_index_size(coder->index), + .check = coder->block_options.check, + }; + + if (lzma_stream_footer_encode(&stream_flags, coder->buffer) + != LZMA_OK) + return LZMA_PROG_ERROR; + + coder->buffer_size = LZMA_STREAM_HEADER_SIZE; + coder->sequence = SEQ_STREAM_FOOTER; + break; + } + + default: + assert(0); + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static void +stream_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->block_encoder, allocator); + lzma_next_end(&coder->index_encoder, allocator); + lzma_index_end(coder->index, allocator); + + for (size_t i = 0; coder->filters[i].id != LZMA_VLI_UNKNOWN; ++i) + lzma_free(coder->filters[i].options, allocator); + + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +stream_encoder_update(lzma_coder *coder, lzma_allocator *allocator, + const lzma_filter *filters, + const lzma_filter *reversed_filters) +{ + if (coder->sequence <= SEQ_BLOCK_INIT) { + // There is no incomplete Block waiting to be finished, + // thus we can change the whole filter chain. Start by + // trying to initialize the Block encoder with the new + // chain. This way we detect if the chain is valid. + coder->block_encoder_is_initialized = false; + coder->block_options.filters = (lzma_filter *)(filters); + const lzma_ret ret = block_encoder_init(coder, allocator); + coder->block_options.filters = coder->filters; + if (ret != LZMA_OK) + return ret; + + coder->block_encoder_is_initialized = true; + + } else if (coder->sequence <= SEQ_BLOCK_ENCODE) { + // We are in the middle of a Block. Try to update only + // the filter-specific options. + return_if_error(coder->block_encoder.update( + coder->block_encoder.coder, allocator, + filters, reversed_filters)); + } else { + // Trying to update the filter chain when we are already + // encoding Index or Stream Footer. + return LZMA_PROG_ERROR; + } + + // Free the copy of the old chain and make a copy of the new chain. + for (size_t i = 0; coder->filters[i].id != LZMA_VLI_UNKNOWN; ++i) + lzma_free(coder->filters[i].options, allocator); + + return lzma_filters_copy(filters, coder->filters, allocator); +} + + +extern lzma_ret +lzma_stream_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter *filters, lzma_check check) +{ + lzma_next_coder_init(&lzma_stream_encoder_init, next, allocator); + + if (filters == NULL) + return LZMA_PROG_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &stream_encode; + next->end = &stream_encoder_end; + next->update = &stream_encoder_update; + + next->coder->block_encoder = LZMA_NEXT_CODER_INIT; + next->coder->index_encoder = LZMA_NEXT_CODER_INIT; + next->coder->index = NULL; + } + + // Basic initializations + next->coder->sequence = SEQ_STREAM_HEADER; + next->coder->block_options.version = 0; + next->coder->block_options.check = check; + next->coder->filters[0].id = LZMA_VLI_UNKNOWN; + + // Initialize the Index + lzma_index_end(next->coder->index, allocator); + next->coder->index = lzma_index_init(allocator); + if (next->coder->index == NULL) + return LZMA_MEM_ERROR; + + // Encode the Stream Header + lzma_stream_flags stream_flags = { + .version = 0, + .check = check, + }; + return_if_error(lzma_stream_header_encode( + &stream_flags, next->coder->buffer)); + + next->coder->buffer_pos = 0; + next->coder->buffer_size = LZMA_STREAM_HEADER_SIZE; + + // Initialize the Block encoder. This way we detect unsupported + // filter chains when initializing the Stream encoder instead of + // giving an error after Stream Header has already written out. + return stream_encoder_update( + next->coder, allocator, filters, NULL); +} + + +extern LZMA_API(lzma_ret) +lzma_stream_encoder(lzma_stream *strm, + const lzma_filter *filters, lzma_check check) +{ + lzma_next_strm_init(lzma_stream_encoder_init, strm, filters, check); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; + strm->internal->supported_actions[LZMA_FULL_FLUSH] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} diff --git a/src/liblzma/common/stream_encoder.h b/src/liblzma/common/stream_encoder.h new file mode 100644 index 000000000000..46a7aed72eaa --- /dev/null +++ b/src/liblzma/common/stream_encoder.h @@ -0,0 +1,23 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_encoder.h +/// \brief Encodes .xz Streams +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_STREAM_ENCODER_H +#define LZMA_STREAM_ENCODER_H + +#include "common.h" + + +extern lzma_ret lzma_stream_encoder_init( + lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter *filters, lzma_check check); + +#endif diff --git a/src/liblzma/common/stream_flags_common.c b/src/liblzma/common/stream_flags_common.c new file mode 100644 index 000000000000..fbe8eb8abda2 --- /dev/null +++ b/src/liblzma/common/stream_flags_common.c @@ -0,0 +1,47 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_flags_common.c +/// \brief Common stuff for Stream flags coders +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_flags_common.h" + + +const uint8_t lzma_header_magic[6] = { 0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00 }; +const uint8_t lzma_footer_magic[2] = { 0x59, 0x5A }; + + +extern LZMA_API(lzma_ret) +lzma_stream_flags_compare( + const lzma_stream_flags *a, const lzma_stream_flags *b) +{ + // We can compare only version 0 structures. + if (a->version != 0 || b->version != 0) + return LZMA_OPTIONS_ERROR; + + // Check type + if ((unsigned int)(a->check) > LZMA_CHECK_ID_MAX + || (unsigned int)(b->check) > LZMA_CHECK_ID_MAX) + return LZMA_PROG_ERROR; + + if (a->check != b->check) + return LZMA_DATA_ERROR; + + // Backward Sizes are compared only if they are known in both. + if (a->backward_size != LZMA_VLI_UNKNOWN + && b->backward_size != LZMA_VLI_UNKNOWN) { + if (!is_backward_size_valid(a) || !is_backward_size_valid(b)) + return LZMA_PROG_ERROR; + + if (a->backward_size != b->backward_size) + return LZMA_DATA_ERROR; + } + + return LZMA_OK; +} diff --git a/src/liblzma/common/stream_flags_common.h b/src/liblzma/common/stream_flags_common.h new file mode 100644 index 000000000000..9f3122a3b1e5 --- /dev/null +++ b/src/liblzma/common/stream_flags_common.h @@ -0,0 +1,33 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_flags_common.h +/// \brief Common stuff for Stream flags coders +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_STREAM_FLAGS_COMMON_H +#define LZMA_STREAM_FLAGS_COMMON_H + +#include "common.h" + +/// Size of the Stream Flags field +#define LZMA_STREAM_FLAGS_SIZE 2 + +extern const uint8_t lzma_header_magic[6]; +extern const uint8_t lzma_footer_magic[2]; + + +static inline bool +is_backward_size_valid(const lzma_stream_flags *options) +{ + return options->backward_size >= LZMA_BACKWARD_SIZE_MIN + && options->backward_size <= LZMA_BACKWARD_SIZE_MAX + && (options->backward_size & 3) == 0; +} + +#endif diff --git a/src/liblzma/common/stream_flags_decoder.c b/src/liblzma/common/stream_flags_decoder.c new file mode 100644 index 000000000000..1bc2f97c5190 --- /dev/null +++ b/src/liblzma/common/stream_flags_decoder.c @@ -0,0 +1,82 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_flags_decoder.c +/// \brief Decodes Stream Header and Stream Footer from .xz files +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_flags_common.h" + + +static bool +stream_flags_decode(lzma_stream_flags *options, const uint8_t *in) +{ + // Reserved bits must be unset. + if (in[0] != 0x00 || (in[1] & 0xF0)) + return true; + + options->version = 0; + options->check = in[1] & 0x0F; + + return false; +} + + +extern LZMA_API(lzma_ret) +lzma_stream_header_decode(lzma_stream_flags *options, const uint8_t *in) +{ + // Magic + if (memcmp(in, lzma_header_magic, sizeof(lzma_header_magic)) != 0) + return LZMA_FORMAT_ERROR; + + // Verify the CRC32 so we can distinguish between corrupt + // and unsupported files. + const uint32_t crc = lzma_crc32(in + sizeof(lzma_header_magic), + LZMA_STREAM_FLAGS_SIZE, 0); + if (crc != unaligned_read32le(in + sizeof(lzma_header_magic) + + LZMA_STREAM_FLAGS_SIZE)) + return LZMA_DATA_ERROR; + + // Stream Flags + if (stream_flags_decode(options, in + sizeof(lzma_header_magic))) + return LZMA_OPTIONS_ERROR; + + // Set Backward Size to indicate unknown value. That way + // lzma_stream_flags_compare() can be used to compare Stream Header + // and Stream Footer while keeping it useful also for comparing + // two Stream Footers. + options->backward_size = LZMA_VLI_UNKNOWN; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_stream_footer_decode(lzma_stream_flags *options, const uint8_t *in) +{ + // Magic + if (memcmp(in + sizeof(uint32_t) * 2 + LZMA_STREAM_FLAGS_SIZE, + lzma_footer_magic, sizeof(lzma_footer_magic)) != 0) + return LZMA_FORMAT_ERROR; + + // CRC32 + const uint32_t crc = lzma_crc32(in + sizeof(uint32_t), + sizeof(uint32_t) + LZMA_STREAM_FLAGS_SIZE, 0); + if (crc != unaligned_read32le(in)) + return LZMA_DATA_ERROR; + + // Stream Flags + if (stream_flags_decode(options, in + sizeof(uint32_t) * 2)) + return LZMA_OPTIONS_ERROR; + + // Backward Size + options->backward_size = unaligned_read32le(in + sizeof(uint32_t)); + options->backward_size = (options->backward_size + 1) * 4; + + return LZMA_OK; +} diff --git a/src/liblzma/common/stream_flags_encoder.c b/src/liblzma/common/stream_flags_encoder.c new file mode 100644 index 000000000000..4e717159f1e7 --- /dev/null +++ b/src/liblzma/common/stream_flags_encoder.c @@ -0,0 +1,86 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_flags_encoder.c +/// \brief Encodes Stream Header and Stream Footer for .xz files +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_flags_common.h" + + +static bool +stream_flags_encode(const lzma_stream_flags *options, uint8_t *out) +{ + if ((unsigned int)(options->check) > LZMA_CHECK_ID_MAX) + return true; + + out[0] = 0x00; + out[1] = options->check; + + return false; +} + + +extern LZMA_API(lzma_ret) +lzma_stream_header_encode(const lzma_stream_flags *options, uint8_t *out) +{ + assert(sizeof(lzma_header_magic) + LZMA_STREAM_FLAGS_SIZE + + 4 == LZMA_STREAM_HEADER_SIZE); + + if (options->version != 0) + return LZMA_OPTIONS_ERROR; + + // Magic + memcpy(out, lzma_header_magic, sizeof(lzma_header_magic)); + + // Stream Flags + if (stream_flags_encode(options, out + sizeof(lzma_header_magic))) + return LZMA_PROG_ERROR; + + // CRC32 of the Stream Header + const uint32_t crc = lzma_crc32(out + sizeof(lzma_header_magic), + LZMA_STREAM_FLAGS_SIZE, 0); + + unaligned_write32le(out + sizeof(lzma_header_magic) + + LZMA_STREAM_FLAGS_SIZE, crc); + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_stream_footer_encode(const lzma_stream_flags *options, uint8_t *out) +{ + assert(2 * 4 + LZMA_STREAM_FLAGS_SIZE + sizeof(lzma_footer_magic) + == LZMA_STREAM_HEADER_SIZE); + + if (options->version != 0) + return LZMA_OPTIONS_ERROR; + + // Backward Size + if (!is_backward_size_valid(options)) + return LZMA_PROG_ERROR; + + unaligned_write32le(out + 4, options->backward_size / 4 - 1); + + // Stream Flags + if (stream_flags_encode(options, out + 2 * 4)) + return LZMA_PROG_ERROR; + + // CRC32 + const uint32_t crc = lzma_crc32( + out + 4, 4 + LZMA_STREAM_FLAGS_SIZE, 0); + + unaligned_write32le(out, crc); + + // Magic + memcpy(out + 2 * 4 + LZMA_STREAM_FLAGS_SIZE, + lzma_footer_magic, sizeof(lzma_footer_magic)); + + return LZMA_OK; +} diff --git a/src/liblzma/common/vli_decoder.c b/src/liblzma/common/vli_decoder.c new file mode 100644 index 000000000000..c181828bf564 --- /dev/null +++ b/src/liblzma/common/vli_decoder.c @@ -0,0 +1,86 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file vli_decoder.c +/// \brief Decodes variable-length integers +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +extern LZMA_API(lzma_ret) +lzma_vli_decode(lzma_vli *restrict vli, size_t *vli_pos, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size) +{ + // If we haven't been given vli_pos, work in single-call mode. + size_t vli_pos_internal = 0; + if (vli_pos == NULL) { + vli_pos = &vli_pos_internal; + *vli = 0; + + // If there's no input, use LZMA_DATA_ERROR. This way it is + // easy to decode VLIs from buffers that have known size, + // and get the correct error code in case the buffer is + // too short. + if (*in_pos >= in_size) + return LZMA_DATA_ERROR; + + } else { + // Initialize *vli when starting to decode a new integer. + if (*vli_pos == 0) + *vli = 0; + + // Validate the arguments. + if (*vli_pos >= LZMA_VLI_BYTES_MAX + || (*vli >> (*vli_pos * 7)) != 0) + return LZMA_PROG_ERROR;; + + if (*in_pos >= in_size) + return LZMA_BUF_ERROR; + } + + do { + // Read the next byte. Use a temporary variable so that we + // can update *in_pos immediately. + const uint8_t byte = in[*in_pos]; + ++*in_pos; + + // Add the newly read byte to *vli. + *vli += (lzma_vli)(byte & 0x7F) << (*vli_pos * 7); + ++*vli_pos; + + // Check if this is the last byte of a multibyte integer. + if ((byte & 0x80) == 0) { + // We don't allow using variable-length integers as + // padding i.e. the encoding must use the most the + // compact form. + if (byte == 0x00 && *vli_pos > 1) + return LZMA_DATA_ERROR; + + return vli_pos == &vli_pos_internal + ? LZMA_OK : LZMA_STREAM_END; + } + + // There is at least one more byte coming. If we have already + // read maximum number of bytes, the integer is considered + // corrupt. + // + // If we need bigger integers in future, old versions liblzma + // will confusingly indicate the file being corrupt istead of + // unsupported. I suppose it's still better this way, because + // in the foreseeable future (writing this in 2008) the only + // reason why files would appear having over 63-bit integers + // is that the files are simply corrupt. + if (*vli_pos == LZMA_VLI_BYTES_MAX) + return LZMA_DATA_ERROR; + + } while (*in_pos < in_size); + + return vli_pos == &vli_pos_internal ? LZMA_DATA_ERROR : LZMA_OK; +} diff --git a/src/liblzma/common/vli_encoder.c b/src/liblzma/common/vli_encoder.c new file mode 100644 index 000000000000..f8642694e291 --- /dev/null +++ b/src/liblzma/common/vli_encoder.c @@ -0,0 +1,69 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file vli_encoder.c +/// \brief Encodes variable-length integers +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +extern LZMA_API(lzma_ret) +lzma_vli_encode(lzma_vli vli, size_t *vli_pos, + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size) +{ + // If we haven't been given vli_pos, work in single-call mode. + size_t vli_pos_internal = 0; + if (vli_pos == NULL) { + vli_pos = &vli_pos_internal; + + // In single-call mode, we expect that the caller has + // reserved enough output space. + if (*out_pos >= out_size) + return LZMA_PROG_ERROR; + } else { + // This never happens when we are called by liblzma, but + // may happen if called directly from an application. + if (*out_pos >= out_size) + return LZMA_BUF_ERROR; + } + + // Validate the arguments. + if (*vli_pos >= LZMA_VLI_BYTES_MAX || vli > LZMA_VLI_MAX) + return LZMA_PROG_ERROR; + + // Shift vli so that the next bits to encode are the lowest. In + // single-call mode this never changes vli since *vli_pos is zero. + vli >>= *vli_pos * 7; + + // Write the non-last bytes in a loop. + while (vli >= 0x80) { + // We don't need *vli_pos during this function call anymore, + // but update it here so that it is ready if we need to + // return before the whole integer has been decoded. + ++*vli_pos; + assert(*vli_pos < LZMA_VLI_BYTES_MAX); + + // Write the next byte. + out[*out_pos] = (uint8_t)(vli) | 0x80; + vli >>= 7; + + if (++*out_pos == out_size) + return vli_pos == &vli_pos_internal + ? LZMA_PROG_ERROR : LZMA_OK; + } + + // Write the last byte. + out[*out_pos] = (uint8_t)(vli); + ++*out_pos; + ++*vli_pos; + + return vli_pos == &vli_pos_internal ? LZMA_OK : LZMA_STREAM_END; + +} diff --git a/src/liblzma/common/vli_size.c b/src/liblzma/common/vli_size.c new file mode 100644 index 000000000000..ec1b4fa488b6 --- /dev/null +++ b/src/liblzma/common/vli_size.c @@ -0,0 +1,30 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file vli_size.c +/// \brief Calculates the encoded size of a variable-length integer +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +extern LZMA_API(uint32_t) +lzma_vli_size(lzma_vli vli) +{ + if (vli > LZMA_VLI_MAX) + return 0; + + uint32_t i = 0; + do { + vli >>= 7; + ++i; + } while (vli != 0); + + assert(i <= LZMA_VLI_BYTES_MAX); + return i; +} diff --git a/src/liblzma/delta/delta_common.c b/src/liblzma/delta/delta_common.c new file mode 100644 index 000000000000..930ad215131c --- /dev/null +++ b/src/liblzma/delta/delta_common.c @@ -0,0 +1,70 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file delta_common.c +/// \brief Common stuff for Delta encoder and decoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "delta_common.h" +#include "delta_private.h" + + +static void +delta_coder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->next, allocator); + lzma_free(coder, allocator); + return; +} + + +extern lzma_ret +lzma_delta_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + // Allocate memory for the decoder if needed. + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + // End function is the same for encoder and decoder. + next->end = &delta_coder_end; + next->coder->next = LZMA_NEXT_CODER_INIT; + } + + // Validate the options. + if (lzma_delta_coder_memusage(filters[0].options) == UINT64_MAX) + return LZMA_OPTIONS_ERROR; + + // Set the delta distance. + const lzma_options_delta *opt = filters[0].options; + next->coder->distance = opt->dist; + + // Initialize the rest of the variables. + next->coder->pos = 0; + memzero(next->coder->history, LZMA_DELTA_DIST_MAX); + + // Initialize the next decoder in the chain, if any. + return lzma_next_filter_init(&next->coder->next, + allocator, filters + 1); +} + + +extern uint64_t +lzma_delta_coder_memusage(const void *options) +{ + const lzma_options_delta *opt = options; + + if (opt == NULL || opt->type != LZMA_DELTA_TYPE_BYTE + || opt->dist < LZMA_DELTA_DIST_MIN + || opt->dist > LZMA_DELTA_DIST_MAX) + return UINT64_MAX; + + return sizeof(lzma_coder); +} diff --git a/src/liblzma/delta/delta_common.h b/src/liblzma/delta/delta_common.h new file mode 100644 index 000000000000..7e7e1baaf680 --- /dev/null +++ b/src/liblzma/delta/delta_common.h @@ -0,0 +1,20 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file delta_common.h +/// \brief Common stuff for Delta encoder and decoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_DELTA_COMMON_H +#define LZMA_DELTA_COMMON_H + +#include "common.h" + +extern uint64_t lzma_delta_coder_memusage(const void *options); + +#endif diff --git a/src/liblzma/delta/delta_decoder.c b/src/liblzma/delta/delta_decoder.c new file mode 100644 index 000000000000..2cf60d5bdc7c --- /dev/null +++ b/src/liblzma/delta/delta_decoder.c @@ -0,0 +1,76 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file delta_decoder.c +/// \brief Delta filter decoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "delta_decoder.h" +#include "delta_private.h" + + +static void +decode_buffer(lzma_coder *coder, uint8_t *buffer, size_t size) +{ + const size_t distance = coder->distance; + + for (size_t i = 0; i < size; ++i) { + buffer[i] += coder->history[(distance + coder->pos) & 0xFF]; + coder->history[coder->pos-- & 0xFF] = buffer[i]; + } +} + + +static lzma_ret +delta_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + assert(coder->next.code != NULL); + + const size_t out_start = *out_pos; + + const lzma_ret ret = coder->next.code(coder->next.coder, allocator, + in, in_pos, in_size, out, out_pos, out_size, + action); + + decode_buffer(coder, out + out_start, *out_pos - out_start); + + return ret; +} + + +extern lzma_ret +lzma_delta_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + next->code = &delta_decode; + return lzma_delta_coder_init(next, allocator, filters); +} + + +extern lzma_ret +lzma_delta_props_decode(void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size) +{ + if (props_size != 1) + return LZMA_OPTIONS_ERROR; + + lzma_options_delta *opt + = lzma_alloc(sizeof(lzma_options_delta), allocator); + if (opt == NULL) + return LZMA_MEM_ERROR; + + opt->type = LZMA_DELTA_TYPE_BYTE; + opt->dist = props[0] + 1; + + *options = opt; + + return LZMA_OK; +} diff --git a/src/liblzma/delta/delta_decoder.h b/src/liblzma/delta/delta_decoder.h new file mode 100644 index 000000000000..ae89acc59f8c --- /dev/null +++ b/src/liblzma/delta/delta_decoder.h @@ -0,0 +1,25 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file delta_decoder.h +/// \brief Delta filter decoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_DELTA_DECODER_H +#define LZMA_DELTA_DECODER_H + +#include "delta_common.h" + +extern lzma_ret lzma_delta_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern lzma_ret lzma_delta_props_decode( + void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size); + +#endif diff --git a/src/liblzma/delta/delta_encoder.c b/src/liblzma/delta/delta_encoder.c new file mode 100644 index 000000000000..80d0d1764ddc --- /dev/null +++ b/src/liblzma/delta/delta_encoder.c @@ -0,0 +1,121 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file delta_encoder.c +/// \brief Delta filter encoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "delta_encoder.h" +#include "delta_private.h" + + +/// Copies and encodes the data at the same time. This is used when Delta +/// is the first filter in the chain (and thus the last filter in the +/// encoder's filter stack). +static void +copy_and_encode(lzma_coder *coder, + const uint8_t *restrict in, uint8_t *restrict out, size_t size) +{ + const size_t distance = coder->distance; + + for (size_t i = 0; i < size; ++i) { + const uint8_t tmp = coder->history[ + (distance + coder->pos) & 0xFF]; + coder->history[coder->pos-- & 0xFF] = in[i]; + out[i] = in[i] - tmp; + } +} + + +/// Encodes the data in place. This is used when we are the last filter +/// in the chain (and thus non-last filter in the encoder's filter stack). +static void +encode_in_place(lzma_coder *coder, uint8_t *buffer, size_t size) +{ + const size_t distance = coder->distance; + + for (size_t i = 0; i < size; ++i) { + const uint8_t tmp = coder->history[ + (distance + coder->pos) & 0xFF]; + coder->history[coder->pos-- & 0xFF] = buffer[i]; + buffer[i] -= tmp; + } +} + + +static lzma_ret +delta_encode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + lzma_ret ret; + + if (coder->next.code == NULL) { + const size_t in_avail = in_size - *in_pos; + const size_t out_avail = out_size - *out_pos; + const size_t size = MIN(in_avail, out_avail); + + copy_and_encode(coder, in + *in_pos, out + *out_pos, size); + + *in_pos += size; + *out_pos += size; + + ret = action != LZMA_RUN && *in_pos == in_size + ? LZMA_STREAM_END : LZMA_OK; + + } else { + const size_t out_start = *out_pos; + + ret = coder->next.code(coder->next.coder, allocator, + in, in_pos, in_size, out, out_pos, out_size, + action); + + encode_in_place(coder, out + out_start, *out_pos - out_start); + } + + return ret; +} + + +static lzma_ret +delta_encoder_update(lzma_coder *coder, lzma_allocator *allocator, + const lzma_filter *filters_null lzma_attribute((unused)), + const lzma_filter *reversed_filters) +{ + // Delta doesn't and will never support changing the options in + // the middle of encoding. If the app tries to change them, we + // simply ignore them. + return lzma_next_filter_update( + &coder->next, allocator, reversed_filters + 1); +} + + +extern lzma_ret +lzma_delta_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + next->code = &delta_encode; + next->update = &delta_encoder_update; + return lzma_delta_coder_init(next, allocator, filters); +} + + +extern lzma_ret +lzma_delta_props_encode(const void *options, uint8_t *out) +{ + // The caller must have already validated the options, so it's + // LZMA_PROG_ERROR if they are invalid. + if (lzma_delta_coder_memusage(options) == UINT64_MAX) + return LZMA_PROG_ERROR; + + const lzma_options_delta *opt = options; + out[0] = opt->dist - LZMA_DELTA_DIST_MIN; + + return LZMA_OK; +} diff --git a/src/liblzma/delta/delta_encoder.h b/src/liblzma/delta/delta_encoder.h new file mode 100644 index 000000000000..a447862f205b --- /dev/null +++ b/src/liblzma/delta/delta_encoder.h @@ -0,0 +1,23 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file delta_encoder.h +/// \brief Delta filter encoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_DELTA_ENCODER_H +#define LZMA_DELTA_ENCODER_H + +#include "delta_common.h" + +extern lzma_ret lzma_delta_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern lzma_ret lzma_delta_props_encode(const void *options, uint8_t *out); + +#endif diff --git a/src/liblzma/delta/delta_private.h b/src/liblzma/delta/delta_private.h new file mode 100644 index 000000000000..62b7fed86e45 --- /dev/null +++ b/src/liblzma/delta/delta_private.h @@ -0,0 +1,37 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file delta_private.h +/// \brief Private common stuff for Delta encoder and decoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_DELTA_PRIVATE_H +#define LZMA_DELTA_PRIVATE_H + +#include "delta_common.h" + +struct lzma_coder_s { + /// Next coder in the chain + lzma_next_coder next; + + /// Delta distance + size_t distance; + + /// Position in history[] + uint8_t pos; + + /// Buffer to hold history of the original data + uint8_t history[LZMA_DELTA_DIST_MAX]; +}; + + +extern lzma_ret lzma_delta_coder_init( + lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters); + +#endif diff --git a/src/liblzma/lz/lz_decoder.c b/src/liblzma/lz/lz_decoder.c new file mode 100644 index 000000000000..350b1f898125 --- /dev/null +++ b/src/liblzma/lz/lz_decoder.c @@ -0,0 +1,299 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lz_decoder.c +/// \brief LZ out window +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +// liblzma supports multiple LZ77-based filters. The LZ part is shared +// between these filters. The LZ code takes care of dictionary handling +// and passing the data between filters in the chain. The filter-specific +// part decodes from the input buffer to the dictionary. + + +#include "lz_decoder.h" + + +struct lzma_coder_s { + /// Dictionary (history buffer) + lzma_dict dict; + + /// The actual LZ-based decoder e.g. LZMA + lzma_lz_decoder lz; + + /// Next filter in the chain, if any. Note that LZMA and LZMA2 are + /// only allowed as the last filter, but the long-range filter in + /// future can be in the middle of the chain. + lzma_next_coder next; + + /// True if the next filter in the chain has returned LZMA_STREAM_END. + bool next_finished; + + /// True if the LZ decoder (e.g. LZMA) has detected end of payload + /// marker. This may become true before next_finished becomes true. + bool this_finished; + + /// Temporary buffer needed when the LZ-based filter is not the last + /// filter in the chain. The output of the next filter is first + /// decoded into buffer[], which is then used as input for the actual + /// LZ-based decoder. + struct { + size_t pos; + size_t size; + uint8_t buffer[LZMA_BUFFER_SIZE]; + } temp; +}; + + +static void +lz_decoder_reset(lzma_coder *coder) +{ + coder->dict.pos = 0; + coder->dict.full = 0; + coder->dict.buf[coder->dict.size - 1] = '\0'; + coder->dict.need_reset = false; + return; +} + + +static lzma_ret +decode_buffer(lzma_coder *coder, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size) +{ + while (true) { + // Wrap the dictionary if needed. + if (coder->dict.pos == coder->dict.size) + coder->dict.pos = 0; + + // Store the current dictionary position. It is needed to know + // where to start copying to the out[] buffer. + const size_t dict_start = coder->dict.pos; + + // Calculate how much we allow coder->lz.code() to decode. + // It must not decode past the end of the dictionary + // buffer, and we don't want it to decode more than is + // actually needed to fill the out[] buffer. + coder->dict.limit = coder->dict.pos + MIN(out_size - *out_pos, + coder->dict.size - coder->dict.pos); + + // Call the coder->lz.code() to do the actual decoding. + const lzma_ret ret = coder->lz.code( + coder->lz.coder, &coder->dict, + in, in_pos, in_size); + + // Copy the decoded data from the dictionary to the out[] + // buffer. + const size_t copy_size = coder->dict.pos - dict_start; + assert(copy_size <= out_size - *out_pos); + memcpy(out + *out_pos, coder->dict.buf + dict_start, + copy_size); + *out_pos += copy_size; + + // Reset the dictionary if so requested by coder->lz.code(). + if (coder->dict.need_reset) { + lz_decoder_reset(coder); + + // Since we reset dictionary, we don't check if + // dictionary became full. + if (ret != LZMA_OK || *out_pos == out_size) + return ret; + } else { + // Return if everything got decoded or an error + // occurred, or if there's no more data to decode. + // + // Note that detecting if there's something to decode + // is done by looking if dictionary become full + // instead of looking if *in_pos == in_size. This + // is because it is possible that all the input was + // consumed already but some data is pending to be + // written to the dictionary. + if (ret != LZMA_OK || *out_pos == out_size + || coder->dict.pos < coder->dict.size) + return ret; + } + } +} + + +static lzma_ret +lz_decode(lzma_coder *coder, + lzma_allocator *allocator lzma_attribute((unused)), + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + lzma_action action) +{ + if (coder->next.code == NULL) + return decode_buffer(coder, in, in_pos, in_size, + out, out_pos, out_size); + + // We aren't the last coder in the chain, we need to decode + // our input to a temporary buffer. + while (*out_pos < out_size) { + // Fill the temporary buffer if it is empty. + if (!coder->next_finished + && coder->temp.pos == coder->temp.size) { + coder->temp.pos = 0; + coder->temp.size = 0; + + const lzma_ret ret = coder->next.code( + coder->next.coder, + allocator, in, in_pos, in_size, + coder->temp.buffer, &coder->temp.size, + LZMA_BUFFER_SIZE, action); + + if (ret == LZMA_STREAM_END) + coder->next_finished = true; + else if (ret != LZMA_OK || coder->temp.size == 0) + return ret; + } + + if (coder->this_finished) { + if (coder->temp.size != 0) + return LZMA_DATA_ERROR; + + if (coder->next_finished) + return LZMA_STREAM_END; + + return LZMA_OK; + } + + const lzma_ret ret = decode_buffer(coder, coder->temp.buffer, + &coder->temp.pos, coder->temp.size, + out, out_pos, out_size); + + if (ret == LZMA_STREAM_END) + coder->this_finished = true; + else if (ret != LZMA_OK) + return ret; + else if (coder->next_finished && *out_pos < out_size) + return LZMA_DATA_ERROR; + } + + return LZMA_OK; +} + + +static void +lz_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->next, allocator); + lzma_free(coder->dict.buf, allocator); + + if (coder->lz.end != NULL) + coder->lz.end(coder->lz.coder, allocator); + else + lzma_free(coder->lz.coder, allocator); + + lzma_free(coder, allocator); + return; +} + + +extern lzma_ret +lzma_lz_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, + lzma_ret (*lz_init)(lzma_lz_decoder *lz, + lzma_allocator *allocator, const void *options, + lzma_lz_options *lz_options)) +{ + // Allocate the base structure if it isn't already allocated. + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &lz_decode; + next->end = &lz_decoder_end; + + next->coder->dict.buf = NULL; + next->coder->dict.size = 0; + next->coder->lz = LZMA_LZ_DECODER_INIT; + next->coder->next = LZMA_NEXT_CODER_INIT; + } + + // Allocate and initialize the LZ-based decoder. It will also give + // us the dictionary size. + lzma_lz_options lz_options; + return_if_error(lz_init(&next->coder->lz, allocator, + filters[0].options, &lz_options)); + + // If the dictionary size is very small, increase it to 4096 bytes. + // This is to prevent constant wrapping of the dictionary, which + // would slow things down. The downside is that since we don't check + // separately for the real dictionary size, we may happily accept + // corrupt files. + if (lz_options.dict_size < 4096) + lz_options.dict_size = 4096; + + // Make dictionary size a multipe of 16. Some LZ-based decoders like + // LZMA use the lowest bits lzma_dict.pos to know the alignment of the + // data. Aligned buffer is also good when memcpying from the + // dictionary to the output buffer, since applications are + // recommended to give aligned buffers to liblzma. + // + // Avoid integer overflow. + if (lz_options.dict_size > SIZE_MAX - 15) + return LZMA_MEM_ERROR; + + lz_options.dict_size = (lz_options.dict_size + 15) & ~((size_t)(15)); + + // Allocate and initialize the dictionary. + if (next->coder->dict.size != lz_options.dict_size) { + lzma_free(next->coder->dict.buf, allocator); + next->coder->dict.buf + = lzma_alloc(lz_options.dict_size, allocator); + if (next->coder->dict.buf == NULL) + return LZMA_MEM_ERROR; + + next->coder->dict.size = lz_options.dict_size; + } + + lz_decoder_reset(next->coder); + + // Use the preset dictionary if it was given to us. + if (lz_options.preset_dict != NULL + && lz_options.preset_dict_size > 0) { + // If the preset dictionary is bigger than the actual + // dictionary, copy only the tail. + const size_t copy_size = MIN(lz_options.preset_dict_size, + lz_options.dict_size); + const size_t offset = lz_options.preset_dict_size - copy_size; + memcpy(next->coder->dict.buf, lz_options.preset_dict + offset, + copy_size); + next->coder->dict.pos = copy_size; + next->coder->dict.full = copy_size; + } + + // Miscellaneous initializations + next->coder->next_finished = false; + next->coder->this_finished = false; + next->coder->temp.pos = 0; + next->coder->temp.size = 0; + + // Initialize the next filter in the chain, if any. + return lzma_next_filter_init(&next->coder->next, allocator, + filters + 1); +} + + +extern uint64_t +lzma_lz_decoder_memusage(size_t dictionary_size) +{ + return sizeof(lzma_coder) + (uint64_t)(dictionary_size); +} + + +extern void +lzma_lz_decoder_uncompressed(lzma_coder *coder, lzma_vli uncompressed_size) +{ + coder->lz.set_uncompressed(coder->lz.coder, uncompressed_size); +} diff --git a/src/liblzma/lz/lz_decoder.h b/src/liblzma/lz/lz_decoder.h new file mode 100644 index 000000000000..bf1609dbedf9 --- /dev/null +++ b/src/liblzma/lz/lz_decoder.h @@ -0,0 +1,234 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lz_decoder.h +/// \brief LZ out window +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZ_DECODER_H +#define LZMA_LZ_DECODER_H + +#include "common.h" + + +typedef struct { + /// Pointer to the dictionary buffer. It can be an allocated buffer + /// internal to liblzma, or it can a be a buffer given by the + /// application when in single-call mode (not implemented yet). + uint8_t *buf; + + /// Write position in dictionary. The next byte will be written to + /// buf[pos]. + size_t pos; + + /// Indicates how full the dictionary is. This is used by + /// dict_is_distance_valid() to detect corrupt files that would + /// read beyond the beginning of the dictionary. + size_t full; + + /// Write limit + size_t limit; + + /// Size of the dictionary + size_t size; + + /// True when dictionary should be reset before decoding more data. + bool need_reset; + +} lzma_dict; + + +typedef struct { + size_t dict_size; + const uint8_t *preset_dict; + size_t preset_dict_size; +} lzma_lz_options; + + +typedef struct { + /// Data specific to the LZ-based decoder + lzma_coder *coder; + + /// Function to decode from in[] to *dict + lzma_ret (*code)(lzma_coder *restrict coder, + lzma_dict *restrict dict, const uint8_t *restrict in, + size_t *restrict in_pos, size_t in_size); + + void (*reset)(lzma_coder *coder, const void *options); + + /// Set the uncompressed size + void (*set_uncompressed)(lzma_coder *coder, + lzma_vli uncompressed_size); + + /// Free allocated resources + void (*end)(lzma_coder *coder, lzma_allocator *allocator); + +} lzma_lz_decoder; + + +#define LZMA_LZ_DECODER_INIT \ + (lzma_lz_decoder){ \ + .coder = NULL, \ + .code = NULL, \ + .reset = NULL, \ + .set_uncompressed = NULL, \ + .end = NULL, \ + } + + +extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters, + lzma_ret (*lz_init)(lzma_lz_decoder *lz, + lzma_allocator *allocator, const void *options, + lzma_lz_options *lz_options)); + +extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size); + +extern void lzma_lz_decoder_uncompressed( + lzma_coder *coder, lzma_vli uncompressed_size); + + +////////////////////// +// Inline functions // +////////////////////// + +/// Get a byte from the history buffer. +static inline uint8_t +dict_get(const lzma_dict *const dict, const uint32_t distance) +{ + return dict->buf[dict->pos - distance - 1 + + (distance < dict->pos ? 0 : dict->size)]; +} + + +/// Test if dictionary is empty. +static inline bool +dict_is_empty(const lzma_dict *const dict) +{ + return dict->full == 0; +} + + +/// Validate the match distance +static inline bool +dict_is_distance_valid(const lzma_dict *const dict, const size_t distance) +{ + return dict->full > distance; +} + + +/// Repeat *len bytes at distance. +static inline bool +dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len) +{ + // Don't write past the end of the dictionary. + const size_t dict_avail = dict->limit - dict->pos; + uint32_t left = MIN(dict_avail, *len); + *len -= left; + + // Repeat a block of data from the history. Because memcpy() is faster + // than copying byte by byte in a loop, the copying process gets split + // into three cases. + if (distance < left) { + // Source and target areas overlap, thus we can't use + // memcpy() nor even memmove() safely. + do { + dict->buf[dict->pos] = dict_get(dict, distance); + ++dict->pos; + } while (--left > 0); + + } else if (distance < dict->pos) { + // The easiest and fastest case + memcpy(dict->buf + dict->pos, + dict->buf + dict->pos - distance - 1, + left); + dict->pos += left; + + } else { + // The bigger the dictionary, the more rare this + // case occurs. We need to "wrap" the dict, thus + // we might need two memcpy() to copy all the data. + assert(dict->full == dict->size); + const uint32_t copy_pos + = dict->pos - distance - 1 + dict->size; + uint32_t copy_size = dict->size - copy_pos; + + if (copy_size < left) { + memmove(dict->buf + dict->pos, dict->buf + copy_pos, + copy_size); + dict->pos += copy_size; + copy_size = left - copy_size; + memcpy(dict->buf + dict->pos, dict->buf, copy_size); + dict->pos += copy_size; + } else { + memmove(dict->buf + dict->pos, dict->buf + copy_pos, + left); + dict->pos += left; + } + } + + // Update how full the dictionary is. + if (dict->full < dict->pos) + dict->full = dict->pos; + + return unlikely(*len != 0); +} + + +/// Puts one byte into the dictionary. Returns true if the dictionary was +/// already full and the byte couldn't be added. +static inline bool +dict_put(lzma_dict *dict, uint8_t byte) +{ + if (unlikely(dict->pos == dict->limit)) + return true; + + dict->buf[dict->pos++] = byte; + + if (dict->pos > dict->full) + dict->full = dict->pos; + + return false; +} + + +/// Copies arbitrary amount of data into the dictionary. +static inline void +dict_write(lzma_dict *restrict dict, const uint8_t *restrict in, + size_t *restrict in_pos, size_t in_size, + size_t *restrict left) +{ + // NOTE: If we are being given more data than the size of the + // dictionary, it could be possible to optimize the LZ decoder + // so that not everything needs to go through the dictionary. + // This shouldn't be very common thing in practice though, and + // the slowdown of one extra memcpy() isn't bad compared to how + // much time it would have taken if the data were compressed. + + if (in_size - *in_pos > *left) + in_size = *in_pos + *left; + + *left -= lzma_bufcpy(in, in_pos, in_size, + dict->buf, &dict->pos, dict->limit); + + if (dict->pos > dict->full) + dict->full = dict->pos; + + return; +} + + +static inline void +dict_reset(lzma_dict *dict) +{ + dict->need_reset = true; + return; +} + +#endif diff --git a/src/liblzma/lz/lz_encoder.c b/src/liblzma/lz/lz_encoder.c new file mode 100644 index 000000000000..757e53749c4d --- /dev/null +++ b/src/liblzma/lz/lz_encoder.c @@ -0,0 +1,578 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lz_encoder.c +/// \brief LZ in window +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lz_encoder.h" +#include "lz_encoder_hash.h" + +// See lz_encoder_hash.h. This is a bit hackish but avoids making +// endianness a conditional in makefiles. +#if defined(WORDS_BIGENDIAN) && !defined(HAVE_SMALL) +# include "lz_encoder_hash_table.h" +#endif + + +struct lzma_coder_s { + /// LZ-based encoder e.g. LZMA + lzma_lz_encoder lz; + + /// History buffer and match finder + lzma_mf mf; + + /// Next coder in the chain + lzma_next_coder next; +}; + + +/// \brief Moves the data in the input window to free space for new data +/// +/// mf->buffer is a sliding input window, which keeps mf->keep_size_before +/// bytes of input history available all the time. Now and then we need to +/// "slide" the buffer to make space for the new data to the end of the +/// buffer. At the same time, data older than keep_size_before is dropped. +/// +static void +move_window(lzma_mf *mf) +{ + // Align the move to a multiple of 16 bytes. Some LZ-based encoders + // like LZMA use the lowest bits of mf->read_pos to know the + // alignment of the uncompressed data. We also get better speed + // for memmove() with aligned buffers. + assert(mf->read_pos > mf->keep_size_before); + const uint32_t move_offset + = (mf->read_pos - mf->keep_size_before) & ~UINT32_C(15); + + assert(mf->write_pos > move_offset); + const size_t move_size = mf->write_pos - move_offset; + + assert(move_offset + move_size <= mf->size); + + memmove(mf->buffer, mf->buffer + move_offset, move_size); + + mf->offset += move_offset; + mf->read_pos -= move_offset; + mf->read_limit -= move_offset; + mf->write_pos -= move_offset; + + return; +} + + +/// \brief Tries to fill the input window (mf->buffer) +/// +/// If we are the last encoder in the chain, our input data is in in[]. +/// Otherwise we call the next filter in the chain to process in[] and +/// write its output to mf->buffer. +/// +/// This function must not be called once it has returned LZMA_STREAM_END. +/// +static lzma_ret +fill_window(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *in, + size_t *in_pos, size_t in_size, lzma_action action) +{ + assert(coder->mf.read_pos <= coder->mf.write_pos); + + // Move the sliding window if needed. + if (coder->mf.read_pos >= coder->mf.size - coder->mf.keep_size_after) + move_window(&coder->mf); + + // Maybe this is ugly, but lzma_mf uses uint32_t for most things + // (which I find cleanest), but we need size_t here when filling + // the history window. + size_t write_pos = coder->mf.write_pos; + lzma_ret ret; + if (coder->next.code == NULL) { + // Not using a filter, simply memcpy() as much as possible. + lzma_bufcpy(in, in_pos, in_size, coder->mf.buffer, + &write_pos, coder->mf.size); + + ret = action != LZMA_RUN && *in_pos == in_size + ? LZMA_STREAM_END : LZMA_OK; + + } else { + ret = coder->next.code(coder->next.coder, allocator, + in, in_pos, in_size, + coder->mf.buffer, &write_pos, + coder->mf.size, action); + } + + coder->mf.write_pos = write_pos; + + // If end of stream has been reached or flushing completed, we allow + // the encoder to process all the input (that is, read_pos is allowed + // to reach write_pos). Otherwise we keep keep_size_after bytes + // available as prebuffer. + if (ret == LZMA_STREAM_END) { + assert(*in_pos == in_size); + ret = LZMA_OK; + coder->mf.action = action; + coder->mf.read_limit = coder->mf.write_pos; + + } else if (coder->mf.write_pos > coder->mf.keep_size_after) { + // This needs to be done conditionally, because if we got + // only little new input, there may be too little input + // to do any encoding yet. + coder->mf.read_limit = coder->mf.write_pos + - coder->mf.keep_size_after; + } + + // Restart the match finder after finished LZMA_SYNC_FLUSH. + if (coder->mf.pending > 0 + && coder->mf.read_pos < coder->mf.read_limit) { + // Match finder may update coder->pending and expects it to + // start from zero, so use a temporary variable. + const size_t pending = coder->mf.pending; + coder->mf.pending = 0; + + // Rewind read_pos so that the match finder can hash + // the pending bytes. + assert(coder->mf.read_pos >= pending); + coder->mf.read_pos -= pending; + + // Call the skip function directly instead of using + // mf_skip(), since we don't want to touch mf->read_ahead. + coder->mf.skip(&coder->mf, pending); + } + + return ret; +} + + +static lzma_ret +lz_encode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size, lzma_action action) +{ + while (*out_pos < out_size + && (*in_pos < in_size || action != LZMA_RUN)) { + // Read more data to coder->mf.buffer if needed. + if (coder->mf.action == LZMA_RUN && coder->mf.read_pos + >= coder->mf.read_limit) + return_if_error(fill_window(coder, allocator, + in, in_pos, in_size, action)); + + // Encode + const lzma_ret ret = coder->lz.code(coder->lz.coder, + &coder->mf, out, out_pos, out_size); + if (ret != LZMA_OK) { + // Setting this to LZMA_RUN for cases when we are + // flushing. It doesn't matter when finishing or if + // an error occurred. + coder->mf.action = LZMA_RUN; + return ret; + } + } + + return LZMA_OK; +} + + +static bool +lz_encoder_prepare(lzma_mf *mf, lzma_allocator *allocator, + const lzma_lz_options *lz_options) +{ + // For now, the dictionary size is limited to 1.5 GiB. This may grow + // in the future if needed, but it needs a little more work than just + // changing this check. + if (lz_options->dict_size < LZMA_DICT_SIZE_MIN + || lz_options->dict_size + > (UINT32_C(1) << 30) + (UINT32_C(1) << 29) + || lz_options->nice_len > lz_options->match_len_max) + return true; + + mf->keep_size_before = lz_options->before_size + lz_options->dict_size; + + mf->keep_size_after = lz_options->after_size + + lz_options->match_len_max; + + // To avoid constant memmove()s, allocate some extra space. Since + // memmove()s become more expensive when the size of the buffer + // increases, we reserve more space when a large dictionary is + // used to make the memmove() calls rarer. + // + // This works with dictionaries up to about 3 GiB. If bigger + // dictionary is wanted, some extra work is needed: + // - Several variables in lzma_mf have to be changed from uint32_t + // to size_t. + // - Memory usage calculation needs something too, e.g. use uint64_t + // for mf->size. + uint32_t reserve = lz_options->dict_size / 2; + if (reserve > (UINT32_C(1) << 30)) + reserve /= 2; + + reserve += (lz_options->before_size + lz_options->match_len_max + + lz_options->after_size) / 2 + (UINT32_C(1) << 19); + + const uint32_t old_size = mf->size; + mf->size = mf->keep_size_before + reserve + mf->keep_size_after; + + // Deallocate the old history buffer if it exists but has different + // size than what is needed now. + if (mf->buffer != NULL && old_size != mf->size) { + lzma_free(mf->buffer, allocator); + mf->buffer = NULL; + } + + // Match finder options + mf->match_len_max = lz_options->match_len_max; + mf->nice_len = lz_options->nice_len; + + // cyclic_size has to stay smaller than 2 Gi. Note that this doesn't + // mean limiting dictionary size to less than 2 GiB. With a match + // finder that uses multibyte resolution (hashes start at e.g. every + // fourth byte), cyclic_size would stay below 2 Gi even when + // dictionary size is greater than 2 GiB. + // + // It would be possible to allow cyclic_size >= 2 Gi, but then we + // would need to be careful to use 64-bit types in various places + // (size_t could do since we would need bigger than 32-bit address + // space anyway). It would also require either zeroing a multigigabyte + // buffer at initialization (waste of time and RAM) or allow + // normalization in lz_encoder_mf.c to access uninitialized + // memory to keep the code simpler. The current way is simple and + // still allows pretty big dictionaries, so I don't expect these + // limits to change. + mf->cyclic_size = lz_options->dict_size + 1; + + // Validate the match finder ID and setup the function pointers. + switch (lz_options->match_finder) { +#ifdef HAVE_MF_HC3 + case LZMA_MF_HC3: + mf->find = &lzma_mf_hc3_find; + mf->skip = &lzma_mf_hc3_skip; + break; +#endif +#ifdef HAVE_MF_HC4 + case LZMA_MF_HC4: + mf->find = &lzma_mf_hc4_find; + mf->skip = &lzma_mf_hc4_skip; + break; +#endif +#ifdef HAVE_MF_BT2 + case LZMA_MF_BT2: + mf->find = &lzma_mf_bt2_find; + mf->skip = &lzma_mf_bt2_skip; + break; +#endif +#ifdef HAVE_MF_BT3 + case LZMA_MF_BT3: + mf->find = &lzma_mf_bt3_find; + mf->skip = &lzma_mf_bt3_skip; + break; +#endif +#ifdef HAVE_MF_BT4 + case LZMA_MF_BT4: + mf->find = &lzma_mf_bt4_find; + mf->skip = &lzma_mf_bt4_skip; + break; +#endif + + default: + return true; + } + + // Calculate the sizes of mf->hash and mf->son and check that + // nice_len is big enough for the selected match finder. + const uint32_t hash_bytes = lz_options->match_finder & 0x0F; + if (hash_bytes > mf->nice_len) + return true; + + const bool is_bt = (lz_options->match_finder & 0x10) != 0; + uint32_t hs; + + if (hash_bytes == 2) { + hs = 0xFFFF; + } else { + // Round dictionary size up to the next 2^n - 1 so it can + // be used as a hash mask. + hs = lz_options->dict_size - 1; + hs |= hs >> 1; + hs |= hs >> 2; + hs |= hs >> 4; + hs |= hs >> 8; + hs >>= 1; + hs |= 0xFFFF; + + if (hs > (UINT32_C(1) << 24)) { + if (hash_bytes == 3) + hs = (UINT32_C(1) << 24) - 1; + else + hs >>= 1; + } + } + + mf->hash_mask = hs; + + ++hs; + if (hash_bytes > 2) + hs += HASH_2_SIZE; + if (hash_bytes > 3) + hs += HASH_3_SIZE; +/* + No match finder uses this at the moment. + if (mf->hash_bytes > 4) + hs += HASH_4_SIZE; +*/ + + // If the above code calculating hs is modified, make sure that + // this assertion stays valid (UINT32_MAX / 5 is not strictly the + // exact limit). If it doesn't, you need to calculate that + // hash_size_sum + sons_count cannot overflow. + assert(hs < UINT32_MAX / 5); + + const uint32_t old_count = mf->hash_size_sum + mf->sons_count; + mf->hash_size_sum = hs; + mf->sons_count = mf->cyclic_size; + if (is_bt) + mf->sons_count *= 2; + + const uint32_t new_count = mf->hash_size_sum + mf->sons_count; + + // Deallocate the old hash array if it exists and has different size + // than what is needed now. + if (mf->hash != NULL && old_count != new_count) { + lzma_free(mf->hash, allocator); + mf->hash = NULL; + } + + // Maximum number of match finder cycles + mf->depth = lz_options->depth; + if (mf->depth == 0) { + mf->depth = 16 + (mf->nice_len / 2); + if (!is_bt) + mf->depth /= 2; + } + + return false; +} + + +static bool +lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator, + const lzma_lz_options *lz_options) +{ + // Allocate the history buffer. + if (mf->buffer == NULL) { + mf->buffer = lzma_alloc(mf->size, allocator); + if (mf->buffer == NULL) + return true; + } + + // Use cyclic_size as initial mf->offset. This allows + // avoiding a few branches in the match finders. The downside is + // that match finder needs to be normalized more often, which may + // hurt performance with huge dictionaries. + mf->offset = mf->cyclic_size; + mf->read_pos = 0; + mf->read_ahead = 0; + mf->read_limit = 0; + mf->write_pos = 0; + mf->pending = 0; + + // Allocate match finder's hash array. + const size_t alloc_count = mf->hash_size_sum + mf->sons_count; + +#if UINT32_MAX >= SIZE_MAX / 4 + // Check for integer overflow. (Huge dictionaries are not + // possible on 32-bit CPU.) + if (alloc_count > SIZE_MAX / sizeof(uint32_t)) + return true; +#endif + + if (mf->hash == NULL) { + mf->hash = lzma_alloc(alloc_count * sizeof(uint32_t), + allocator); + if (mf->hash == NULL) + return true; + } + + mf->son = mf->hash + mf->hash_size_sum; + mf->cyclic_pos = 0; + + // Initialize the hash table. Since EMPTY_HASH_VALUE is zero, we + // can use memset(). +/* + for (uint32_t i = 0; i < hash_size_sum; ++i) + mf->hash[i] = EMPTY_HASH_VALUE; +*/ + memzero(mf->hash, (size_t)(mf->hash_size_sum) * sizeof(uint32_t)); + + // We don't need to initialize mf->son, but not doing that will + // make Valgrind complain in normalization (see normalize() in + // lz_encoder_mf.c). + // + // Skipping this initialization is *very* good when big dictionary is + // used but only small amount of data gets actually compressed: most + // of the mf->hash won't get actually allocated by the kernel, so + // we avoid wasting RAM and improve initialization speed a lot. + //memzero(mf->son, (size_t)(mf->sons_count) * sizeof(uint32_t)); + + // Handle preset dictionary. + if (lz_options->preset_dict != NULL + && lz_options->preset_dict_size > 0) { + // If the preset dictionary is bigger than the actual + // dictionary, use only the tail. + mf->write_pos = MIN(lz_options->preset_dict_size, mf->size); + memcpy(mf->buffer, lz_options->preset_dict + + lz_options->preset_dict_size - mf->write_pos, + mf->write_pos); + mf->action = LZMA_SYNC_FLUSH; + mf->skip(mf, mf->write_pos); + } + + mf->action = LZMA_RUN; + + return false; +} + + +extern uint64_t +lzma_lz_encoder_memusage(const lzma_lz_options *lz_options) +{ + // Old buffers must not exist when calling lz_encoder_prepare(). + lzma_mf mf = { + .buffer = NULL, + .hash = NULL, + }; + + // Setup the size information into mf. + if (lz_encoder_prepare(&mf, NULL, lz_options)) + return UINT64_MAX; + + // Calculate the memory usage. + return (uint64_t)(mf.hash_size_sum + mf.sons_count) + * sizeof(uint32_t) + + (uint64_t)(mf.size) + sizeof(lzma_coder); +} + + +static void +lz_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->next, allocator); + + lzma_free(coder->mf.hash, allocator); + lzma_free(coder->mf.buffer, allocator); + + if (coder->lz.end != NULL) + coder->lz.end(coder->lz.coder, allocator); + else + lzma_free(coder->lz.coder, allocator); + + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +lz_encoder_update(lzma_coder *coder, lzma_allocator *allocator, + const lzma_filter *filters_null lzma_attribute((unused)), + const lzma_filter *reversed_filters) +{ + if (coder->lz.options_update == NULL) + return LZMA_PROG_ERROR; + + return_if_error(coder->lz.options_update( + coder->lz.coder, reversed_filters)); + + return lzma_next_filter_update( + &coder->next, allocator, reversed_filters + 1); +} + + +extern lzma_ret +lzma_lz_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, + lzma_ret (*lz_init)(lzma_lz_encoder *lz, + lzma_allocator *allocator, const void *options, + lzma_lz_options *lz_options)) +{ +#ifdef HAVE_SMALL + // We need that the CRC32 table has been initialized. + lzma_crc32_init(); +#endif + + // Allocate and initialize the base data structure. + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &lz_encode; + next->end = &lz_encoder_end; + next->update = &lz_encoder_update; + + next->coder->lz.coder = NULL; + next->coder->lz.code = NULL; + next->coder->lz.end = NULL; + + next->coder->mf.buffer = NULL; + next->coder->mf.hash = NULL; + + next->coder->next = LZMA_NEXT_CODER_INIT; + } + + // Initialize the LZ-based encoder. + lzma_lz_options lz_options; + return_if_error(lz_init(&next->coder->lz, allocator, + filters[0].options, &lz_options)); + + // Setup the size information into next->coder->mf and deallocate + // old buffers if they have wrong size. + if (lz_encoder_prepare(&next->coder->mf, allocator, &lz_options)) + return LZMA_OPTIONS_ERROR; + + // Allocate new buffers if needed, and do the rest of + // the initialization. + if (lz_encoder_init(&next->coder->mf, allocator, &lz_options)) + return LZMA_MEM_ERROR; + + // Initialize the next filter in the chain, if any. + return lzma_next_filter_init(&next->coder->next, allocator, + filters + 1); +} + + +extern LZMA_API(lzma_bool) +lzma_mf_is_supported(lzma_match_finder mf) +{ + bool ret = false; + +#ifdef HAVE_MF_HC3 + if (mf == LZMA_MF_HC3) + ret = true; +#endif + +#ifdef HAVE_MF_HC4 + if (mf == LZMA_MF_HC4) + ret = true; +#endif + +#ifdef HAVE_MF_BT2 + if (mf == LZMA_MF_BT2) + ret = true; +#endif + +#ifdef HAVE_MF_BT3 + if (mf == LZMA_MF_BT3) + ret = true; +#endif + +#ifdef HAVE_MF_BT4 + if (mf == LZMA_MF_BT4) + ret = true; +#endif + + return ret; +} diff --git a/src/liblzma/lz/lz_encoder.h b/src/liblzma/lz/lz_encoder.h new file mode 100644 index 000000000000..f6352a47227e --- /dev/null +++ b/src/liblzma/lz/lz_encoder.h @@ -0,0 +1,328 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lz_encoder.h +/// \brief LZ in window and match finder API +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZ_ENCODER_H +#define LZMA_LZ_ENCODER_H + +#include "common.h" + + +/// A table of these is used by the LZ-based encoder to hold +/// the length-distance pairs found by the match finder. +typedef struct { + uint32_t len; + uint32_t dist; +} lzma_match; + + +typedef struct lzma_mf_s lzma_mf; +struct lzma_mf_s { + /////////////// + // In Window // + /////////////// + + /// Pointer to buffer with data to be compressed + uint8_t *buffer; + + /// Total size of the allocated buffer (that is, including all + /// the extra space) + uint32_t size; + + /// Number of bytes that must be kept available in our input history. + /// That is, once keep_size_before bytes have been processed, + /// buffer[read_pos - keep_size_before] is the oldest byte that + /// must be available for reading. + uint32_t keep_size_before; + + /// Number of bytes that must be kept in buffer after read_pos. + /// That is, read_pos <= write_pos - keep_size_after as long as + /// action is LZMA_RUN; when action != LZMA_RUN, read_pos is allowed + /// to reach write_pos so that the last bytes get encoded too. + uint32_t keep_size_after; + + /// Match finders store locations of matches using 32-bit integers. + /// To avoid adjusting several megabytes of integers every time the + /// input window is moved with move_window, we only adjust the + /// offset of the buffer. Thus, buffer[value_in_hash_table - offset] + /// is the byte pointed by value_in_hash_table. + uint32_t offset; + + /// buffer[read_pos] is the next byte to run through the match + /// finder. This is incremented in the match finder once the byte + /// has been processed. + uint32_t read_pos; + + /// Number of bytes that have been ran through the match finder, but + /// which haven't been encoded by the LZ-based encoder yet. + uint32_t read_ahead; + + /// As long as read_pos is less than read_limit, there is enough + /// input available in buffer for at least one encoding loop. + /// + /// Because of the stateful API, read_limit may and will get greater + /// than read_pos quite often. This is taken into account when + /// calculating the value for keep_size_after. + uint32_t read_limit; + + /// buffer[write_pos] is the first byte that doesn't contain valid + /// uncompressed data; that is, the next input byte will be copied + /// to buffer[write_pos]. + uint32_t write_pos; + + /// Number of bytes not hashed before read_pos. This is needed to + /// restart the match finder after LZMA_SYNC_FLUSH. + uint32_t pending; + + ////////////////// + // Match Finder // + ////////////////// + + /// Find matches. Returns the number of distance-length pairs written + /// to the matches array. This is called only via lzma_mf_find(). + uint32_t (*find)(lzma_mf *mf, lzma_match *matches); + + /// Skips num bytes. This is like find() but doesn't make the + /// distance-length pairs available, thus being a little faster. + /// This is called only via mf_skip(). + void (*skip)(lzma_mf *mf, uint32_t num); + + uint32_t *hash; + uint32_t *son; + uint32_t cyclic_pos; + uint32_t cyclic_size; // Must be dictionary size + 1. + uint32_t hash_mask; + + /// Maximum number of loops in the match finder + uint32_t depth; + + /// Maximum length of a match that the match finder will try to find. + uint32_t nice_len; + + /// Maximum length of a match supported by the LZ-based encoder. + /// If the longest match found by the match finder is nice_len, + /// mf_find() tries to expand it up to match_len_max bytes. + uint32_t match_len_max; + + /// When running out of input, binary tree match finders need to know + /// if it is due to flushing or finishing. The action is used also + /// by the LZ-based encoders themselves. + lzma_action action; + + /// Number of elements in hash[] + uint32_t hash_size_sum; + + /// Number of elements in son[] + uint32_t sons_count; +}; + + +typedef struct { + /// Extra amount of data to keep available before the "actual" + /// dictionary. + size_t before_size; + + /// Size of the history buffer + size_t dict_size; + + /// Extra amount of data to keep available after the "actual" + /// dictionary. + size_t after_size; + + /// Maximum length of a match that the LZ-based encoder can accept. + /// This is used to extend matches of length nice_len to the + /// maximum possible length. + size_t match_len_max; + + /// Match finder will search matches up to this length. + /// This must be less than or equal to match_len_max. + size_t nice_len; + + /// Type of the match finder to use + lzma_match_finder match_finder; + + /// Maximum search depth + uint32_t depth; + + /// TODO: Comment + const uint8_t *preset_dict; + + uint32_t preset_dict_size; + +} lzma_lz_options; + + +// The total usable buffer space at any moment outside the match finder: +// before_size + dict_size + after_size + match_len_max +// +// In reality, there's some extra space allocated to prevent the number of +// memmove() calls reasonable. The bigger the dict_size is, the bigger +// this extra buffer will be since with bigger dictionaries memmove() would +// also take longer. +// +// A single encoder loop in the LZ-based encoder may call the match finder +// (mf_find() or mf_skip()) at most after_size times. In other words, +// a single encoder loop may increment lzma_mf.read_pos at most after_size +// times. Since matches are looked up to +// lzma_mf.buffer[lzma_mf.read_pos + match_len_max - 1], the total +// amount of extra buffer needed after dict_size becomes +// after_size + match_len_max. +// +// before_size has two uses. The first one is to keep literals available +// in cases when the LZ-based encoder has made some read ahead. +// TODO: Maybe this could be changed by making the LZ-based encoders to +// store the actual literals as they do with length-distance pairs. +// +// Algorithms such as LZMA2 first try to compress a chunk, and then check +// if the encoded result is smaller than the uncompressed one. If the chunk +// was uncompressible, it is better to store it in uncompressed form in +// the output stream. To do this, the whole uncompressed chunk has to be +// still available in the history buffer. before_size achieves that. + + +typedef struct { + /// Data specific to the LZ-based encoder + lzma_coder *coder; + + /// Function to encode from *dict to out[] + lzma_ret (*code)(lzma_coder *restrict coder, + lzma_mf *restrict mf, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size); + + /// Free allocated resources + void (*end)(lzma_coder *coder, lzma_allocator *allocator); + + /// Update the options in the middle of the encoding. + lzma_ret (*options_update)(lzma_coder *coder, + const lzma_filter *filter); + +} lzma_lz_encoder; + + +// Basic steps: +// 1. Input gets copied into the dictionary. +// 2. Data in dictionary gets run through the match finder byte by byte. +// 3. The literals and matches are encoded using e.g. LZMA. +// +// The bytes that have been ran through the match finder, but not encoded yet, +// are called `read ahead'. + + +/// Get pointer to the first byte not ran through the match finder +static inline const uint8_t * +mf_ptr(const lzma_mf *mf) +{ + return mf->buffer + mf->read_pos; +} + + +/// Get the number of bytes that haven't been ran through the match finder yet. +static inline uint32_t +mf_avail(const lzma_mf *mf) +{ + return mf->write_pos - mf->read_pos; +} + + +/// Get the number of bytes that haven't been encoded yet (some of these +/// bytes may have been ran through the match finder though). +static inline uint32_t +mf_unencoded(const lzma_mf *mf) +{ + return mf->write_pos - mf->read_pos + mf->read_ahead; +} + + +/// Calculate the absolute offset from the beginning of the most recent +/// dictionary reset. Only the lowest four bits are important, so there's no +/// problem that we don't know the 64-bit size of the data encoded so far. +/// +/// NOTE: When moving the input window, we need to do it so that the lowest +/// bits of dict->read_pos are not modified to keep this macro working +/// as intended. +static inline uint32_t +mf_position(const lzma_mf *mf) +{ + return mf->read_pos - mf->read_ahead; +} + + +/// Since everything else begins with mf_, use it also for lzma_mf_find(). +#define mf_find lzma_mf_find + + +/// Skip the given number of bytes. This is used when a good match was found. +/// For example, if mf_find() finds a match of 200 bytes long, the first byte +/// of that match was already consumed by mf_find(), and the rest 199 bytes +/// have to be skipped with mf_skip(mf, 199). +static inline void +mf_skip(lzma_mf *mf, uint32_t amount) +{ + if (amount != 0) { + mf->skip(mf, amount); + mf->read_ahead += amount; + } +} + + +/// Copies at most *left number of bytes from the history buffer +/// to out[]. This is needed by LZMA2 to encode uncompressed chunks. +static inline void +mf_read(lzma_mf *mf, uint8_t *out, size_t *out_pos, size_t out_size, + size_t *left) +{ + const size_t out_avail = out_size - *out_pos; + const size_t copy_size = MIN(out_avail, *left); + + assert(mf->read_ahead == 0); + assert(mf->read_pos >= *left); + + memcpy(out + *out_pos, mf->buffer + mf->read_pos - *left, + copy_size); + + *out_pos += copy_size; + *left -= copy_size; + return; +} + + +extern lzma_ret lzma_lz_encoder_init( + lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, + lzma_ret (*lz_init)(lzma_lz_encoder *lz, + lzma_allocator *allocator, const void *options, + lzma_lz_options *lz_options)); + + +extern uint64_t lzma_lz_encoder_memusage(const lzma_lz_options *lz_options); + + +// These are only for LZ encoder's internal use. +extern uint32_t lzma_mf_find( + lzma_mf *mf, uint32_t *count, lzma_match *matches); + +extern uint32_t lzma_mf_hc3_find(lzma_mf *dict, lzma_match *matches); +extern void lzma_mf_hc3_skip(lzma_mf *dict, uint32_t amount); + +extern uint32_t lzma_mf_hc4_find(lzma_mf *dict, lzma_match *matches); +extern void lzma_mf_hc4_skip(lzma_mf *dict, uint32_t amount); + +extern uint32_t lzma_mf_bt2_find(lzma_mf *dict, lzma_match *matches); +extern void lzma_mf_bt2_skip(lzma_mf *dict, uint32_t amount); + +extern uint32_t lzma_mf_bt3_find(lzma_mf *dict, lzma_match *matches); +extern void lzma_mf_bt3_skip(lzma_mf *dict, uint32_t amount); + +extern uint32_t lzma_mf_bt4_find(lzma_mf *dict, lzma_match *matches); +extern void lzma_mf_bt4_skip(lzma_mf *dict, uint32_t amount); + +#endif diff --git a/src/liblzma/lz/lz_encoder_hash.h b/src/liblzma/lz/lz_encoder_hash.h new file mode 100644 index 000000000000..c398d7d05e4d --- /dev/null +++ b/src/liblzma/lz/lz_encoder_hash.h @@ -0,0 +1,108 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lz_encoder_hash.h +/// \brief Hash macros for match finders +// +// Author: Igor Pavlov +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZ_ENCODER_HASH_H +#define LZMA_LZ_ENCODER_HASH_H + +#if defined(WORDS_BIGENDIAN) && !defined(HAVE_SMALL) + // This is to make liblzma produce the same output on big endian + // systems that it does on little endian systems. lz_encoder.c + // takes care of including the actual table. + extern const uint32_t lzma_lz_hash_table[256]; +# define hash_table lzma_lz_hash_table +#else +# include "check.h" +# define hash_table lzma_crc32_table[0] +#endif + +#define HASH_2_SIZE (UINT32_C(1) << 10) +#define HASH_3_SIZE (UINT32_C(1) << 16) +#define HASH_4_SIZE (UINT32_C(1) << 20) + +#define HASH_2_MASK (HASH_2_SIZE - 1) +#define HASH_3_MASK (HASH_3_SIZE - 1) +#define HASH_4_MASK (HASH_4_SIZE - 1) + +#define FIX_3_HASH_SIZE (HASH_2_SIZE) +#define FIX_4_HASH_SIZE (HASH_2_SIZE + HASH_3_SIZE) +#define FIX_5_HASH_SIZE (HASH_2_SIZE + HASH_3_SIZE + HASH_4_SIZE) + +// Endianness doesn't matter in hash_2_calc() (no effect on the output). +#ifdef TUKLIB_FAST_UNALIGNED_ACCESS +# define hash_2_calc() \ + const uint32_t hash_value = *(const uint16_t *)(cur); +#else +# define hash_2_calc() \ + const uint32_t hash_value \ + = (uint32_t)(cur[0]) | ((uint32_t)(cur[1]) << 8) +#endif + +#define hash_3_calc() \ + const uint32_t temp = hash_table[cur[0]] ^ cur[1]; \ + const uint32_t hash_2_value = temp & HASH_2_MASK; \ + const uint32_t hash_value \ + = (temp ^ ((uint32_t)(cur[2]) << 8)) & mf->hash_mask + +#define hash_4_calc() \ + const uint32_t temp = hash_table[cur[0]] ^ cur[1]; \ + const uint32_t hash_2_value = temp & HASH_2_MASK; \ + const uint32_t hash_3_value \ + = (temp ^ ((uint32_t)(cur[2]) << 8)) & HASH_3_MASK; \ + const uint32_t hash_value = (temp ^ ((uint32_t)(cur[2]) << 8) \ + ^ (hash_table[cur[3]] << 5)) & mf->hash_mask + + +// The following are not currently used. + +#define hash_5_calc() \ + const uint32_t temp = hash_table[cur[0]] ^ cur[1]; \ + const uint32_t hash_2_value = temp & HASH_2_MASK; \ + const uint32_t hash_3_value \ + = (temp ^ ((uint32_t)(cur[2]) << 8)) & HASH_3_MASK; \ + uint32_t hash_4_value = (temp ^ ((uint32_t)(cur[2]) << 8) ^ \ + ^ hash_table[cur[3]] << 5); \ + const uint32_t hash_value \ + = (hash_4_value ^ (hash_table[cur[4]] << 3)) \ + & mf->hash_mask; \ + hash_4_value &= HASH_4_MASK + +/* +#define hash_zip_calc() \ + const uint32_t hash_value \ + = (((uint32_t)(cur[0]) | ((uint32_t)(cur[1]) << 8)) \ + ^ hash_table[cur[2]]) & 0xFFFF +*/ + +#define hash_zip_calc() \ + const uint32_t hash_value \ + = (((uint32_t)(cur[2]) | ((uint32_t)(cur[0]) << 8)) \ + ^ hash_table[cur[1]]) & 0xFFFF + +#define mt_hash_2_calc() \ + const uint32_t hash_2_value \ + = (hash_table[cur[0]] ^ cur[1]) & HASH_2_MASK + +#define mt_hash_3_calc() \ + const uint32_t temp = hash_table[cur[0]] ^ cur[1]; \ + const uint32_t hash_2_value = temp & HASH_2_MASK; \ + const uint32_t hash_3_value \ + = (temp ^ ((uint32_t)(cur[2]) << 8)) & HASH_3_MASK + +#define mt_hash_4_calc() \ + const uint32_t temp = hash_table[cur[0]] ^ cur[1]; \ + const uint32_t hash_2_value = temp & HASH_2_MASK; \ + const uint32_t hash_3_value \ + = (temp ^ ((uint32_t)(cur[2]) << 8)) & HASH_3_MASK; \ + const uint32_t hash_4_value = (temp ^ ((uint32_t)(cur[2]) << 8) ^ \ + (hash_table[cur[3]] << 5)) & HASH_4_MASK + +#endif diff --git a/src/liblzma/lz/lz_encoder_hash_table.h b/src/liblzma/lz/lz_encoder_hash_table.h new file mode 100644 index 000000000000..8c51717d704f --- /dev/null +++ b/src/liblzma/lz/lz_encoder_hash_table.h @@ -0,0 +1,68 @@ +/* This file has been automatically generated by crc32_tablegen.c. */ + +const uint32_t lzma_lz_hash_table[256] = { + 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, + 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, + 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, + 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, + 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, + 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, + 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, + 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, + 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, + 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, + 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, + 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, + 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, + 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, + 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, + 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, + 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, + 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, + 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, + 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, + 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, + 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, + 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, + 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, + 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, + 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, + 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, + 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, + 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, + 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, + 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, + 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, + 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, + 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, + 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, + 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, + 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, + 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, + 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, + 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, + 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, + 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, + 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, + 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, + 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, + 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, + 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, + 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, + 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, + 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, + 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, + 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, + 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, + 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, + 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, + 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, + 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, + 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, + 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, + 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, + 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, + 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, + 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D +}; diff --git a/src/liblzma/lz/lz_encoder_mf.c b/src/liblzma/lz/lz_encoder_mf.c new file mode 100644 index 000000000000..b31b08578d45 --- /dev/null +++ b/src/liblzma/lz/lz_encoder_mf.c @@ -0,0 +1,753 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lz_encoder_mf.c +/// \brief Match finders +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lz_encoder.h" +#include "lz_encoder_hash.h" + + +/// \brief Find matches starting from the current byte +/// +/// \return The length of the longest match found +extern uint32_t +lzma_mf_find(lzma_mf *mf, uint32_t *count_ptr, lzma_match *matches) +{ + // Call the match finder. It returns the number of length-distance + // pairs found. + // FIXME: Minimum count is zero, what _exactly_ is the maximum? + const uint32_t count = mf->find(mf, matches); + + // Length of the longest match; assume that no matches were found + // and thus the maximum length is zero. + uint32_t len_best = 0; + + if (count > 0) { +#ifndef NDEBUG + // Validate the matches. + for (uint32_t i = 0; i < count; ++i) { + assert(matches[i].len <= mf->nice_len); + assert(matches[i].dist < mf->read_pos); + assert(memcmp(mf_ptr(mf) - 1, + mf_ptr(mf) - matches[i].dist - 2, + matches[i].len) == 0); + } +#endif + + // The last used element in the array contains + // the longest match. + len_best = matches[count - 1].len; + + // If a match of maximum search length was found, try to + // extend the match to maximum possible length. + if (len_best == mf->nice_len) { + // The limit for the match length is either the + // maximum match length supported by the LZ-based + // encoder or the number of bytes left in the + // dictionary, whichever is smaller. + uint32_t limit = mf_avail(mf) + 1; + if (limit > mf->match_len_max) + limit = mf->match_len_max; + + // Pointer to the byte we just ran through + // the match finder. + const uint8_t *p1 = mf_ptr(mf) - 1; + + // Pointer to the beginning of the match. We need -1 + // here because the match distances are zero based. + const uint8_t *p2 = p1 - matches[count - 1].dist - 1; + + while (len_best < limit + && p1[len_best] == p2[len_best]) + ++len_best; + } + } + + *count_ptr = count; + + // Finally update the read position to indicate that match finder was + // run for this dictionary offset. + ++mf->read_ahead; + + return len_best; +} + + +/// Hash value to indicate unused element in the hash. Since we start the +/// positions from dict_size + 1, zero is always too far to qualify +/// as usable match position. +#define EMPTY_HASH_VALUE 0 + + +/// Normalization must be done when lzma_mf.offset + lzma_mf.read_pos +/// reaches MUST_NORMALIZE_POS. +#define MUST_NORMALIZE_POS UINT32_MAX + + +/// \brief Normalizes hash values +/// +/// The hash arrays store positions of match candidates. The positions are +/// relative to an arbitrary offset that is not the same as the absolute +/// offset in the input stream. The relative position of the current byte +/// is lzma_mf.offset + lzma_mf.read_pos. The distances of the matches are +/// the differences of the current read position and the position found from +/// the hash. +/// +/// To prevent integer overflows of the offsets stored in the hash arrays, +/// we need to "normalize" the stored values now and then. During the +/// normalization, we drop values that indicate distance greater than the +/// dictionary size, thus making space for new values. +static void +normalize(lzma_mf *mf) +{ + assert(mf->read_pos + mf->offset == MUST_NORMALIZE_POS); + + // In future we may not want to touch the lowest bits, because there + // may be match finders that use larger resolution than one byte. + const uint32_t subvalue + = (MUST_NORMALIZE_POS - mf->cyclic_size); + // & (~(UINT32_C(1) << 10) - 1); + + const uint32_t count = mf->hash_size_sum + mf->sons_count; + uint32_t *hash = mf->hash; + + for (uint32_t i = 0; i < count; ++i) { + // If the distance is greater than the dictionary size, + // we can simply mark the hash element as empty. + // + // NOTE: Only the first mf->hash_size_sum elements are + // initialized for sure. There may be uninitialized elements + // in mf->son. Since we go through both mf->hash and + // mf->son here in normalization, Valgrind may complain + // that the "if" below depends on uninitialized value. In + // this case it is safe to ignore the warning. See also the + // comments in lz_encoder_init() in lz_encoder.c. + if (hash[i] <= subvalue) + hash[i] = EMPTY_HASH_VALUE; + else + hash[i] -= subvalue; + } + + // Update offset to match the new locations. + mf->offset -= subvalue; + + return; +} + + +/// Mark the current byte as processed from point of view of the match finder. +static void +move_pos(lzma_mf *mf) +{ + if (++mf->cyclic_pos == mf->cyclic_size) + mf->cyclic_pos = 0; + + ++mf->read_pos; + assert(mf->read_pos <= mf->write_pos); + + if (unlikely(mf->read_pos + mf->offset == UINT32_MAX)) + normalize(mf); +} + + +/// When flushing, we cannot run the match finder unless there is nice_len +/// bytes available in the dictionary. Instead, we skip running the match +/// finder (indicating that no match was found), and count how many bytes we +/// have ignored this way. +/// +/// When new data is given after the flushing was completed, the match finder +/// is restarted by rewinding mf->read_pos backwards by mf->pending. Then +/// the missed bytes are added to the hash using the match finder's skip +/// function (with small amount of input, it may start using mf->pending +/// again if flushing). +/// +/// Due to this rewinding, we don't touch cyclic_pos or test for +/// normalization. It will be done when the match finder's skip function +/// catches up after a flush. +static void +move_pending(lzma_mf *mf) +{ + ++mf->read_pos; + assert(mf->read_pos <= mf->write_pos); + ++mf->pending; +} + + +/// Calculate len_limit and determine if there is enough input to run +/// the actual match finder code. Sets up "cur" and "pos". This macro +/// is used by all find functions and binary tree skip functions. Hash +/// chain skip function doesn't need len_limit so a simpler code is used +/// in them. +#define header(is_bt, len_min, ret_op) \ + uint32_t len_limit = mf_avail(mf); \ + if (mf->nice_len <= len_limit) { \ + len_limit = mf->nice_len; \ + } else if (len_limit < (len_min) \ + || (is_bt && mf->action == LZMA_SYNC_FLUSH)) { \ + assert(mf->action != LZMA_RUN); \ + move_pending(mf); \ + ret_op; \ + } \ + const uint8_t *cur = mf_ptr(mf); \ + const uint32_t pos = mf->read_pos + mf->offset + + +/// Header for find functions. "return 0" indicates that zero matches +/// were found. +#define header_find(is_bt, len_min) \ + header(is_bt, len_min, return 0); \ + uint32_t matches_count = 0 + + +/// Header for a loop in a skip function. "continue" tells to skip the rest +/// of the code in the loop. +#define header_skip(is_bt, len_min) \ + header(is_bt, len_min, continue) + + +/// Calls hc_find_func() or bt_find_func() and calculates the total number +/// of matches found. Updates the dictionary position and returns the number +/// of matches found. +#define call_find(func, len_best) \ +do { \ + matches_count = func(len_limit, pos, cur, cur_match, mf->depth, \ + mf->son, mf->cyclic_pos, mf->cyclic_size, \ + matches + matches_count, len_best) \ + - matches; \ + move_pos(mf); \ + return matches_count; \ +} while (0) + + +//////////////// +// Hash Chain // +//////////////// + +#if defined(HAVE_MF_HC3) || defined(HAVE_MF_HC4) +/// +/// +/// \param len_limit Don't look for matches longer than len_limit. +/// \param pos lzma_mf.read_pos + lzma_mf.offset +/// \param cur Pointer to current byte (mf_ptr(mf)) +/// \param cur_match Start position of the current match candidate +/// \param depth Maximum length of the hash chain +/// \param son lzma_mf.son (contains the hash chain) +/// \param cyclic_pos +/// \param cyclic_size +/// \param matches Array to hold the matches. +/// \param len_best The length of the longest match found so far. +static lzma_match * +hc_find_func( + const uint32_t len_limit, + const uint32_t pos, + const uint8_t *const cur, + uint32_t cur_match, + uint32_t depth, + uint32_t *const son, + const uint32_t cyclic_pos, + const uint32_t cyclic_size, + lzma_match *matches, + uint32_t len_best) +{ + son[cyclic_pos] = cur_match; + + while (true) { + const uint32_t delta = pos - cur_match; + if (depth-- == 0 || delta >= cyclic_size) + return matches; + + const uint8_t *const pb = cur - delta; + cur_match = son[cyclic_pos - delta + + (delta > cyclic_pos ? cyclic_size : 0)]; + + if (pb[len_best] == cur[len_best] && pb[0] == cur[0]) { + uint32_t len = 0; + while (++len != len_limit) + if (pb[len] != cur[len]) + break; + + if (len_best < len) { + len_best = len; + matches->len = len; + matches->dist = delta - 1; + ++matches; + + if (len == len_limit) + return matches; + } + } + } +} + + +#define hc_find(len_best) \ + call_find(hc_find_func, len_best) + + +#define hc_skip() \ +do { \ + mf->son[mf->cyclic_pos] = cur_match; \ + move_pos(mf); \ +} while (0) + +#endif + + +#ifdef HAVE_MF_HC3 +extern uint32_t +lzma_mf_hc3_find(lzma_mf *mf, lzma_match *matches) +{ + header_find(false, 3); + + hash_3_calc(); + + const uint32_t delta2 = pos - mf->hash[hash_2_value]; + const uint32_t cur_match = mf->hash[FIX_3_HASH_SIZE + hash_value]; + + mf->hash[hash_2_value] = pos; + mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; + + uint32_t len_best = 2; + + if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { + for ( ; len_best != len_limit; ++len_best) + if (*(cur + len_best - delta2) != cur[len_best]) + break; + + matches[0].len = len_best; + matches[0].dist = delta2 - 1; + matches_count = 1; + + if (len_best == len_limit) { + hc_skip(); + return 1; // matches_count + } + } + + hc_find(len_best); +} + + +extern void +lzma_mf_hc3_skip(lzma_mf *mf, uint32_t amount) +{ + do { + if (mf_avail(mf) < 3) { + move_pending(mf); + continue; + } + + const uint8_t *cur = mf_ptr(mf); + const uint32_t pos = mf->read_pos + mf->offset; + + hash_3_calc(); + + const uint32_t cur_match + = mf->hash[FIX_3_HASH_SIZE + hash_value]; + + mf->hash[hash_2_value] = pos; + mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; + + hc_skip(); + + } while (--amount != 0); +} +#endif + + +#ifdef HAVE_MF_HC4 +extern uint32_t +lzma_mf_hc4_find(lzma_mf *mf, lzma_match *matches) +{ + header_find(false, 4); + + hash_4_calc(); + + uint32_t delta2 = pos - mf->hash[hash_2_value]; + const uint32_t delta3 + = pos - mf->hash[FIX_3_HASH_SIZE + hash_3_value]; + const uint32_t cur_match = mf->hash[FIX_4_HASH_SIZE + hash_value]; + + mf->hash[hash_2_value ] = pos; + mf->hash[FIX_3_HASH_SIZE + hash_3_value] = pos; + mf->hash[FIX_4_HASH_SIZE + hash_value] = pos; + + uint32_t len_best = 1; + + if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { + len_best = 2; + matches[0].len = 2; + matches[0].dist = delta2 - 1; + matches_count = 1; + } + + if (delta2 != delta3 && delta3 < mf->cyclic_size + && *(cur - delta3) == *cur) { + len_best = 3; + matches[matches_count++].dist = delta3 - 1; + delta2 = delta3; + } + + if (matches_count != 0) { + for ( ; len_best != len_limit; ++len_best) + if (*(cur + len_best - delta2) != cur[len_best]) + break; + + matches[matches_count - 1].len = len_best; + + if (len_best == len_limit) { + hc_skip(); + return matches_count; + } + } + + if (len_best < 3) + len_best = 3; + + hc_find(len_best); +} + + +extern void +lzma_mf_hc4_skip(lzma_mf *mf, uint32_t amount) +{ + do { + if (mf_avail(mf) < 4) { + move_pending(mf); + continue; + } + + const uint8_t *cur = mf_ptr(mf); + const uint32_t pos = mf->read_pos + mf->offset; + + hash_4_calc(); + + const uint32_t cur_match + = mf->hash[FIX_4_HASH_SIZE + hash_value]; + + mf->hash[hash_2_value] = pos; + mf->hash[FIX_3_HASH_SIZE + hash_3_value] = pos; + mf->hash[FIX_4_HASH_SIZE + hash_value] = pos; + + hc_skip(); + + } while (--amount != 0); +} +#endif + + +///////////////// +// Binary Tree // +///////////////// + +#if defined(HAVE_MF_BT2) || defined(HAVE_MF_BT3) || defined(HAVE_MF_BT4) +static lzma_match * +bt_find_func( + const uint32_t len_limit, + const uint32_t pos, + const uint8_t *const cur, + uint32_t cur_match, + uint32_t depth, + uint32_t *const son, + const uint32_t cyclic_pos, + const uint32_t cyclic_size, + lzma_match *matches, + uint32_t len_best) +{ + uint32_t *ptr0 = son + (cyclic_pos << 1) + 1; + uint32_t *ptr1 = son + (cyclic_pos << 1); + + uint32_t len0 = 0; + uint32_t len1 = 0; + + while (true) { + const uint32_t delta = pos - cur_match; + if (depth-- == 0 || delta >= cyclic_size) { + *ptr0 = EMPTY_HASH_VALUE; + *ptr1 = EMPTY_HASH_VALUE; + return matches; + } + + uint32_t *const pair = son + ((cyclic_pos - delta + + (delta > cyclic_pos ? cyclic_size : 0)) + << 1); + + const uint8_t *const pb = cur - delta; + uint32_t len = MIN(len0, len1); + + if (pb[len] == cur[len]) { + while (++len != len_limit) + if (pb[len] != cur[len]) + break; + + if (len_best < len) { + len_best = len; + matches->len = len; + matches->dist = delta - 1; + ++matches; + + if (len == len_limit) { + *ptr1 = pair[0]; + *ptr0 = pair[1]; + return matches; + } + } + } + + if (pb[len] < cur[len]) { + *ptr1 = cur_match; + ptr1 = pair + 1; + cur_match = *ptr1; + len1 = len; + } else { + *ptr0 = cur_match; + ptr0 = pair; + cur_match = *ptr0; + len0 = len; + } + } +} + + +static void +bt_skip_func( + const uint32_t len_limit, + const uint32_t pos, + const uint8_t *const cur, + uint32_t cur_match, + uint32_t depth, + uint32_t *const son, + const uint32_t cyclic_pos, + const uint32_t cyclic_size) +{ + uint32_t *ptr0 = son + (cyclic_pos << 1) + 1; + uint32_t *ptr1 = son + (cyclic_pos << 1); + + uint32_t len0 = 0; + uint32_t len1 = 0; + + while (true) { + const uint32_t delta = pos - cur_match; + if (depth-- == 0 || delta >= cyclic_size) { + *ptr0 = EMPTY_HASH_VALUE; + *ptr1 = EMPTY_HASH_VALUE; + return; + } + + uint32_t *pair = son + ((cyclic_pos - delta + + (delta > cyclic_pos ? cyclic_size : 0)) + << 1); + const uint8_t *pb = cur - delta; + uint32_t len = MIN(len0, len1); + + if (pb[len] == cur[len]) { + while (++len != len_limit) + if (pb[len] != cur[len]) + break; + + if (len == len_limit) { + *ptr1 = pair[0]; + *ptr0 = pair[1]; + return; + } + } + + if (pb[len] < cur[len]) { + *ptr1 = cur_match; + ptr1 = pair + 1; + cur_match = *ptr1; + len1 = len; + } else { + *ptr0 = cur_match; + ptr0 = pair; + cur_match = *ptr0; + len0 = len; + } + } +} + + +#define bt_find(len_best) \ + call_find(bt_find_func, len_best) + +#define bt_skip() \ +do { \ + bt_skip_func(len_limit, pos, cur, cur_match, mf->depth, \ + mf->son, mf->cyclic_pos, \ + mf->cyclic_size); \ + move_pos(mf); \ +} while (0) + +#endif + + +#ifdef HAVE_MF_BT2 +extern uint32_t +lzma_mf_bt2_find(lzma_mf *mf, lzma_match *matches) +{ + header_find(true, 2); + + hash_2_calc(); + + const uint32_t cur_match = mf->hash[hash_value]; + mf->hash[hash_value] = pos; + + bt_find(1); +} + + +extern void +lzma_mf_bt2_skip(lzma_mf *mf, uint32_t amount) +{ + do { + header_skip(true, 2); + + hash_2_calc(); + + const uint32_t cur_match = mf->hash[hash_value]; + mf->hash[hash_value] = pos; + + bt_skip(); + + } while (--amount != 0); +} +#endif + + +#ifdef HAVE_MF_BT3 +extern uint32_t +lzma_mf_bt3_find(lzma_mf *mf, lzma_match *matches) +{ + header_find(true, 3); + + hash_3_calc(); + + const uint32_t delta2 = pos - mf->hash[hash_2_value]; + const uint32_t cur_match = mf->hash[FIX_3_HASH_SIZE + hash_value]; + + mf->hash[hash_2_value] = pos; + mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; + + uint32_t len_best = 2; + + if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { + for ( ; len_best != len_limit; ++len_best) + if (*(cur + len_best - delta2) != cur[len_best]) + break; + + matches[0].len = len_best; + matches[0].dist = delta2 - 1; + matches_count = 1; + + if (len_best == len_limit) { + bt_skip(); + return 1; // matches_count + } + } + + bt_find(len_best); +} + + +extern void +lzma_mf_bt3_skip(lzma_mf *mf, uint32_t amount) +{ + do { + header_skip(true, 3); + + hash_3_calc(); + + const uint32_t cur_match + = mf->hash[FIX_3_HASH_SIZE + hash_value]; + + mf->hash[hash_2_value] = pos; + mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; + + bt_skip(); + + } while (--amount != 0); +} +#endif + + +#ifdef HAVE_MF_BT4 +extern uint32_t +lzma_mf_bt4_find(lzma_mf *mf, lzma_match *matches) +{ + header_find(true, 4); + + hash_4_calc(); + + uint32_t delta2 = pos - mf->hash[hash_2_value]; + const uint32_t delta3 + = pos - mf->hash[FIX_3_HASH_SIZE + hash_3_value]; + const uint32_t cur_match = mf->hash[FIX_4_HASH_SIZE + hash_value]; + + mf->hash[hash_2_value] = pos; + mf->hash[FIX_3_HASH_SIZE + hash_3_value] = pos; + mf->hash[FIX_4_HASH_SIZE + hash_value] = pos; + + uint32_t len_best = 1; + + if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { + len_best = 2; + matches[0].len = 2; + matches[0].dist = delta2 - 1; + matches_count = 1; + } + + if (delta2 != delta3 && delta3 < mf->cyclic_size + && *(cur - delta3) == *cur) { + len_best = 3; + matches[matches_count++].dist = delta3 - 1; + delta2 = delta3; + } + + if (matches_count != 0) { + for ( ; len_best != len_limit; ++len_best) + if (*(cur + len_best - delta2) != cur[len_best]) + break; + + matches[matches_count - 1].len = len_best; + + if (len_best == len_limit) { + bt_skip(); + return matches_count; + } + } + + if (len_best < 3) + len_best = 3; + + bt_find(len_best); +} + + +extern void +lzma_mf_bt4_skip(lzma_mf *mf, uint32_t amount) +{ + do { + header_skip(true, 4); + + hash_4_calc(); + + const uint32_t cur_match + = mf->hash[FIX_4_HASH_SIZE + hash_value]; + + mf->hash[hash_2_value] = pos; + mf->hash[FIX_3_HASH_SIZE + hash_3_value] = pos; + mf->hash[FIX_4_HASH_SIZE + hash_value] = pos; + + bt_skip(); + + } while (--amount != 0); +} +#endif diff --git a/src/liblzma/lzma/fastpos.h b/src/liblzma/lzma/fastpos.h new file mode 100644 index 000000000000..4aea23181ab6 --- /dev/null +++ b/src/liblzma/lzma/fastpos.h @@ -0,0 +1,140 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file fastpos.h +/// \brief Kind of two-bit version of bit scan reverse +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_FASTPOS_H +#define LZMA_FASTPOS_H + +// LZMA encodes match distances (positions) by storing the highest two +// bits using a six-bit value [0, 63], and then the missing lower bits. +// Dictionary size is also stored using this encoding in the new .lzma +// file format header. +// +// fastpos.h provides a way to quickly find out the correct six-bit +// values. The following table gives some examples of this encoding: +// +// pos return +// 0 0 +// 1 1 +// 2 2 +// 3 3 +// 4 4 +// 5 4 +// 6 5 +// 7 5 +// 8 6 +// 11 6 +// 12 7 +// ... ... +// 15 7 +// 16 8 +// 17 8 +// ... ... +// 23 8 +// 24 9 +// 25 9 +// ... ... +// +// +// Provided functions or macros +// ---------------------------- +// +// get_pos_slot(pos) is the basic version. get_pos_slot_2(pos) +// assumes that pos >= FULL_DISTANCES, thus the result is at least +// FULL_DISTANCES_BITS * 2. Using get_pos_slot(pos) instead of +// get_pos_slot_2(pos) would give the same result, but get_pos_slot_2(pos) +// should be tiny bit faster due to the assumption being made. +// +// +// Size vs. speed +// -------------- +// +// With some CPUs that have fast BSR (bit scan reverse) instruction, the +// size optimized version is slightly faster than the bigger table based +// approach. Such CPUs include Intel Pentium Pro, Pentium II, Pentium III +// and Core 2 (possibly others). AMD K7 seems to have slower BSR, but that +// would still have speed roughly comparable to the table version. Older +// x86 CPUs like the original Pentium have very slow BSR; on those systems +// the table version is a lot faster. +// +// On some CPUs, the table version is a lot faster when using position +// dependent code, but with position independent code the size optimized +// version is slightly faster. This occurs at least on 32-bit SPARC (no +// ASM optimizations). +// +// I'm making the table version the default, because that has good speed +// on all systems I have tried. The size optimized version is sometimes +// slightly faster, but sometimes it is a lot slower. + +#ifdef HAVE_SMALL +# define get_pos_slot(pos) ((pos) <= 4 ? (pos) : get_pos_slot_2(pos)) + +static inline uint32_t +get_pos_slot_2(uint32_t pos) +{ + const uint32_t i = bsr32(pos); + return (i + i) + ((pos >> (i - 1)) & 1); +} + + +#else + +#define FASTPOS_BITS 13 + +extern const uint8_t lzma_fastpos[1 << FASTPOS_BITS]; + + +#define fastpos_shift(extra, n) \ + ((extra) + (n) * (FASTPOS_BITS - 1)) + +#define fastpos_limit(extra, n) \ + (UINT32_C(1) << (FASTPOS_BITS + fastpos_shift(extra, n))) + +#define fastpos_result(pos, extra, n) \ + lzma_fastpos[(pos) >> fastpos_shift(extra, n)] \ + + 2 * fastpos_shift(extra, n) + + +static inline uint32_t +get_pos_slot(uint32_t pos) +{ + // If it is small enough, we can pick the result directly from + // the precalculated table. + if (pos < fastpos_limit(0, 0)) + return lzma_fastpos[pos]; + + if (pos < fastpos_limit(0, 1)) + return fastpos_result(pos, 0, 1); + + return fastpos_result(pos, 0, 2); +} + + +#ifdef FULL_DISTANCES_BITS +static inline uint32_t +get_pos_slot_2(uint32_t pos) +{ + assert(pos >= FULL_DISTANCES); + + if (pos < fastpos_limit(FULL_DISTANCES_BITS - 1, 0)) + return fastpos_result(pos, FULL_DISTANCES_BITS - 1, 0); + + if (pos < fastpos_limit(FULL_DISTANCES_BITS - 1, 1)) + return fastpos_result(pos, FULL_DISTANCES_BITS - 1, 1); + + return fastpos_result(pos, FULL_DISTANCES_BITS - 1, 2); +} +#endif + +#endif + +#endif diff --git a/src/liblzma/lzma/fastpos_table.c b/src/liblzma/lzma/fastpos_table.c new file mode 100644 index 000000000000..6a3ceac0e90a --- /dev/null +++ b/src/liblzma/lzma/fastpos_table.c @@ -0,0 +1,519 @@ +/* This file has been automatically generated by fastpos_tablegen.c. */ + +#include "common.h" +#include "fastpos.h" + +const uint8_t lzma_fastpos[1 << FASTPOS_BITS] = { + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25 +}; diff --git a/src/liblzma/lzma/fastpos_tablegen.c b/src/liblzma/lzma/fastpos_tablegen.c new file mode 100644 index 000000000000..c97e6f411c27 --- /dev/null +++ b/src/liblzma/lzma/fastpos_tablegen.c @@ -0,0 +1,56 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file fastpos_tablegen.c +/// \brief Generates the lzma_fastpos[] lookup table +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include "fastpos.h" + + +int +main(void) +{ + uint8_t fastpos[1 << FASTPOS_BITS]; + + const uint8_t fast_slots = 2 * FASTPOS_BITS; + uint32_t c = 2; + + fastpos[0] = 0; + fastpos[1] = 1; + + for (uint8_t slot_fast = 2; slot_fast < fast_slots; ++slot_fast) { + const uint32_t k = 1 << ((slot_fast >> 1) - 1); + for (uint32_t j = 0; j < k; ++j, ++c) + fastpos[c] = slot_fast; + } + + printf("/* This file has been automatically generated " + "by fastpos_tablegen.c. */\n\n" + "#include \"common.h\"\n" + "#include \"fastpos.h\"\n\n" + "const uint8_t lzma_fastpos[1 << FASTPOS_BITS] = {"); + + for (size_t i = 0; i < (1 << FASTPOS_BITS); ++i) { + if (i % 16 == 0) + printf("\n\t"); + + printf("%3u", (unsigned int)(fastpos[i])); + + if (i != (1 << FASTPOS_BITS) - 1) + printf(","); + } + + printf("\n};\n"); + + return 0; +} diff --git a/src/liblzma/lzma/lzma2_decoder.c b/src/liblzma/lzma/lzma2_decoder.c new file mode 100644 index 000000000000..b4c2f2d5ba70 --- /dev/null +++ b/src/liblzma/lzma/lzma2_decoder.c @@ -0,0 +1,305 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma2_decoder.c +/// \brief LZMA2 decoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lzma2_decoder.h" +#include "lz_decoder.h" +#include "lzma_decoder.h" + + +struct lzma_coder_s { + enum sequence { + SEQ_CONTROL, + SEQ_UNCOMPRESSED_1, + SEQ_UNCOMPRESSED_2, + SEQ_COMPRESSED_0, + SEQ_COMPRESSED_1, + SEQ_PROPERTIES, + SEQ_LZMA, + SEQ_COPY, + } sequence; + + /// Sequence after the size fields have been decoded. + enum sequence next_sequence; + + /// LZMA decoder + lzma_lz_decoder lzma; + + /// Uncompressed size of LZMA chunk + size_t uncompressed_size; + + /// Compressed size of the chunk (naturally equals to uncompressed + /// size of uncompressed chunk) + size_t compressed_size; + + /// True if properties are needed. This is false before the + /// first LZMA chunk. + bool need_properties; + + /// True if dictionary reset is needed. This is false before the + /// first chunk (LZMA or uncompressed). + bool need_dictionary_reset; + + lzma_options_lzma options; +}; + + +static lzma_ret +lzma2_decode(lzma_coder *restrict coder, lzma_dict *restrict dict, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size) +{ + // With SEQ_LZMA it is possible that no new input is needed to do + // some progress. The rest of the sequences assume that there is + // at least one byte of input. + while (*in_pos < in_size || coder->sequence == SEQ_LZMA) + switch (coder->sequence) { + case SEQ_CONTROL: { + const uint32_t control = in[*in_pos]; + ++*in_pos; + + if (control >= 0xE0 || control == 1) { + // Dictionary reset implies that next LZMA chunk has + // to set new properties. + coder->need_properties = true; + coder->need_dictionary_reset = true; + } else if (coder->need_dictionary_reset) { + return LZMA_DATA_ERROR; + } + + if (control >= 0x80) { + // LZMA chunk. The highest five bits of the + // uncompressed size are taken from the control byte. + coder->uncompressed_size = (control & 0x1F) << 16; + coder->sequence = SEQ_UNCOMPRESSED_1; + + // See if there are new properties or if we need to + // reset the state. + if (control >= 0xC0) { + // When there are new properties, state reset + // is done at SEQ_PROPERTIES. + coder->need_properties = false; + coder->next_sequence = SEQ_PROPERTIES; + + } else if (coder->need_properties) { + return LZMA_DATA_ERROR; + + } else { + coder->next_sequence = SEQ_LZMA; + + // If only state reset is wanted with old + // properties, do the resetting here for + // simplicity. + if (control >= 0xA0) + coder->lzma.reset(coder->lzma.coder, + &coder->options); + } + } else { + // End marker + if (control == 0x00) + return LZMA_STREAM_END; + + // Invalid control values + if (control > 2) + return LZMA_DATA_ERROR; + + // It's uncompressed chunk + coder->sequence = SEQ_COMPRESSED_0; + coder->next_sequence = SEQ_COPY; + } + + if (coder->need_dictionary_reset) { + // Finish the dictionary reset and let the caller + // flush the dictionary to the actual output buffer. + coder->need_dictionary_reset = false; + dict_reset(dict); + return LZMA_OK; + } + + break; + } + + case SEQ_UNCOMPRESSED_1: + coder->uncompressed_size += (uint32_t)(in[(*in_pos)++]) << 8; + coder->sequence = SEQ_UNCOMPRESSED_2; + break; + + case SEQ_UNCOMPRESSED_2: + coder->uncompressed_size += in[(*in_pos)++] + 1; + coder->sequence = SEQ_COMPRESSED_0; + coder->lzma.set_uncompressed(coder->lzma.coder, + coder->uncompressed_size); + break; + + case SEQ_COMPRESSED_0: + coder->compressed_size = (uint32_t)(in[(*in_pos)++]) << 8; + coder->sequence = SEQ_COMPRESSED_1; + break; + + case SEQ_COMPRESSED_1: + coder->compressed_size += in[(*in_pos)++] + 1; + coder->sequence = coder->next_sequence; + break; + + case SEQ_PROPERTIES: + if (lzma_lzma_lclppb_decode(&coder->options, in[(*in_pos)++])) + return LZMA_DATA_ERROR; + + coder->lzma.reset(coder->lzma.coder, &coder->options); + + coder->sequence = SEQ_LZMA; + break; + + case SEQ_LZMA: { + // Store the start offset so that we can update + // coder->compressed_size later. + const size_t in_start = *in_pos; + + // Decode from in[] to *dict. + const lzma_ret ret = coder->lzma.code(coder->lzma.coder, + dict, in, in_pos, in_size); + + // Validate and update coder->compressed_size. + const size_t in_used = *in_pos - in_start; + if (in_used > coder->compressed_size) + return LZMA_DATA_ERROR; + + coder->compressed_size -= in_used; + + // Return if we didn't finish the chunk, or an error occurred. + if (ret != LZMA_STREAM_END) + return ret; + + // The LZMA decoder must have consumed the whole chunk now. + // We don't need to worry about uncompressed size since it + // is checked by the LZMA decoder. + if (coder->compressed_size != 0) + return LZMA_DATA_ERROR; + + coder->sequence = SEQ_CONTROL; + break; + } + + case SEQ_COPY: { + // Copy from input to the dictionary as is. + // FIXME Can copy too much? + dict_write(dict, in, in_pos, in_size, &coder->compressed_size); + if (coder->compressed_size != 0) + return LZMA_OK; + + coder->sequence = SEQ_CONTROL; + break; + } + + default: + assert(0); + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static void +lzma2_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + assert(coder->lzma.end == NULL); + lzma_free(coder->lzma.coder, allocator); + + lzma_free(coder, allocator); + + return; +} + + +static lzma_ret +lzma2_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator, + const void *opt, lzma_lz_options *lz_options) +{ + if (lz->coder == NULL) { + lz->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (lz->coder == NULL) + return LZMA_MEM_ERROR; + + lz->code = &lzma2_decode; + lz->end = &lzma2_decoder_end; + + lz->coder->lzma = LZMA_LZ_DECODER_INIT; + } + + const lzma_options_lzma *options = opt; + + lz->coder->sequence = SEQ_CONTROL; + lz->coder->need_properties = true; + lz->coder->need_dictionary_reset = options->preset_dict == NULL + || options->preset_dict_size == 0; + + return lzma_lzma_decoder_create(&lz->coder->lzma, + allocator, options, lz_options); +} + + +extern lzma_ret +lzma_lzma2_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + // LZMA2 can only be the last filter in the chain. This is enforced + // by the raw_decoder initialization. + assert(filters[1].init == NULL); + + return lzma_lz_decoder_init(next, allocator, filters, + &lzma2_decoder_init); +} + + +extern uint64_t +lzma_lzma2_decoder_memusage(const void *options) +{ + return sizeof(lzma_coder) + + lzma_lzma_decoder_memusage_nocheck(options); +} + + +extern lzma_ret +lzma_lzma2_props_decode(void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size) +{ + if (props_size != 1) + return LZMA_OPTIONS_ERROR; + + // Check that reserved bits are unset. + if (props[0] & 0xC0) + return LZMA_OPTIONS_ERROR; + + // Decode the dictionary size. + if (props[0] > 40) + return LZMA_OPTIONS_ERROR; + + lzma_options_lzma *opt = lzma_alloc( + sizeof(lzma_options_lzma), allocator); + if (opt == NULL) + return LZMA_MEM_ERROR; + + if (props[0] == 40) { + opt->dict_size = UINT32_MAX; + } else { + opt->dict_size = 2 | (props[0] & 1); + opt->dict_size <<= props[0] / 2 + 11; + } + + opt->preset_dict = NULL; + opt->preset_dict_size = 0; + + *options = opt; + + return LZMA_OK; +} diff --git a/src/liblzma/lzma/lzma2_decoder.h b/src/liblzma/lzma/lzma2_decoder.h new file mode 100644 index 000000000000..fac4ac487b07 --- /dev/null +++ b/src/liblzma/lzma/lzma2_decoder.h @@ -0,0 +1,28 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma2_decoder.h +/// \brief LZMA2 decoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZMA2_DECODER_H +#define LZMA_LZMA2_DECODER_H + +#include "common.h" + +extern lzma_ret lzma_lzma2_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern uint64_t lzma_lzma2_decoder_memusage(const void *options); + +extern lzma_ret lzma_lzma2_props_decode( + void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size); + +#endif diff --git a/src/liblzma/lzma/lzma2_encoder.c b/src/liblzma/lzma/lzma2_encoder.c new file mode 100644 index 000000000000..1e0569a4a956 --- /dev/null +++ b/src/liblzma/lzma/lzma2_encoder.c @@ -0,0 +1,393 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma2_encoder.c +/// \brief LZMA2 encoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lz_encoder.h" +#include "lzma_encoder.h" +#include "fastpos.h" +#include "lzma2_encoder.h" + + +struct lzma_coder_s { + enum { + SEQ_INIT, + SEQ_LZMA_ENCODE, + SEQ_LZMA_COPY, + SEQ_UNCOMPRESSED_HEADER, + SEQ_UNCOMPRESSED_COPY, + } sequence; + + /// LZMA encoder + lzma_coder *lzma; + + /// LZMA options currently in use. + lzma_options_lzma opt_cur; + + bool need_properties; + bool need_state_reset; + bool need_dictionary_reset; + + /// Uncompressed size of a chunk + size_t uncompressed_size; + + /// Compressed size of a chunk (excluding headers); this is also used + /// to indicate the end of buf[] in SEQ_LZMA_COPY. + size_t compressed_size; + + /// Read position in buf[] + size_t buf_pos; + + /// Buffer to hold the chunk header and LZMA compressed data + uint8_t buf[LZMA2_HEADER_MAX + LZMA2_CHUNK_MAX]; +}; + + +static void +lzma2_header_lzma(lzma_coder *coder) +{ + assert(coder->uncompressed_size > 0); + assert(coder->uncompressed_size <= LZMA2_UNCOMPRESSED_MAX); + assert(coder->compressed_size > 0); + assert(coder->compressed_size <= LZMA2_CHUNK_MAX); + + size_t pos; + + if (coder->need_properties) { + pos = 0; + + if (coder->need_dictionary_reset) + coder->buf[pos] = 0x80 + (3 << 5); + else + coder->buf[pos] = 0x80 + (2 << 5); + } else { + pos = 1; + + if (coder->need_state_reset) + coder->buf[pos] = 0x80 + (1 << 5); + else + coder->buf[pos] = 0x80; + } + + // Set the start position for copying. + coder->buf_pos = pos; + + // Uncompressed size + size_t size = coder->uncompressed_size - 1; + coder->buf[pos++] += size >> 16; + coder->buf[pos++] = (size >> 8) & 0xFF; + coder->buf[pos++] = size & 0xFF; + + // Compressed size + size = coder->compressed_size - 1; + coder->buf[pos++] = size >> 8; + coder->buf[pos++] = size & 0xFF; + + // Properties, if needed + if (coder->need_properties) + lzma_lzma_lclppb_encode(&coder->opt_cur, coder->buf + pos); + + coder->need_properties = false; + coder->need_state_reset = false; + coder->need_dictionary_reset = false; + + // The copying code uses coder->compressed_size to indicate the end + // of coder->buf[], so we need add the maximum size of the header here. + coder->compressed_size += LZMA2_HEADER_MAX; + + return; +} + + +static void +lzma2_header_uncompressed(lzma_coder *coder) +{ + assert(coder->uncompressed_size > 0); + assert(coder->uncompressed_size <= LZMA2_CHUNK_MAX); + + // If this is the first chunk, we need to include dictionary + // reset indicator. + if (coder->need_dictionary_reset) + coder->buf[0] = 1; + else + coder->buf[0] = 2; + + coder->need_dictionary_reset = false; + + // "Compressed" size + coder->buf[1] = (coder->uncompressed_size - 1) >> 8; + coder->buf[2] = (coder->uncompressed_size - 1) & 0xFF; + + // Set the start position for copying. + coder->buf_pos = 0; + return; +} + + +static lzma_ret +lzma2_encode(lzma_coder *restrict coder, lzma_mf *restrict mf, + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size) +{ + while (*out_pos < out_size) + switch (coder->sequence) { + case SEQ_INIT: + // If there's no input left and we are flushing or finishing, + // don't start a new chunk. + if (mf_unencoded(mf) == 0) { + // Write end of payload marker if finishing. + if (mf->action == LZMA_FINISH) + out[(*out_pos)++] = 0; + + return mf->action == LZMA_RUN + ? LZMA_OK : LZMA_STREAM_END; + } + + if (coder->need_state_reset) + return_if_error(lzma_lzma_encoder_reset( + coder->lzma, &coder->opt_cur)); + + coder->uncompressed_size = 0; + coder->compressed_size = 0; + coder->sequence = SEQ_LZMA_ENCODE; + + // Fall through + + case SEQ_LZMA_ENCODE: { + // Calculate how much more uncompressed data this chunk + // could accept. + const uint32_t left = LZMA2_UNCOMPRESSED_MAX + - coder->uncompressed_size; + uint32_t limit; + + if (left < mf->match_len_max) { + // Must flush immediately since the next LZMA symbol + // could make the uncompressed size of the chunk too + // big. + limit = 0; + } else { + // Calculate maximum read_limit that is OK from point + // of view of LZMA2 chunk size. + limit = mf->read_pos - mf->read_ahead + + left - mf->match_len_max; + } + + // Save the start position so that we can update + // coder->uncompressed_size. + const uint32_t read_start = mf->read_pos - mf->read_ahead; + + // Call the LZMA encoder until the chunk is finished. + const lzma_ret ret = lzma_lzma_encode(coder->lzma, mf, + coder->buf + LZMA2_HEADER_MAX, + &coder->compressed_size, + LZMA2_CHUNK_MAX, limit); + + coder->uncompressed_size += mf->read_pos - mf->read_ahead + - read_start; + + assert(coder->compressed_size <= LZMA2_CHUNK_MAX); + assert(coder->uncompressed_size <= LZMA2_UNCOMPRESSED_MAX); + + if (ret != LZMA_STREAM_END) + return LZMA_OK; + + // See if the chunk compressed. If it didn't, we encode it + // as uncompressed chunk. This saves a few bytes of space + // and makes decoding faster. + if (coder->compressed_size >= coder->uncompressed_size) { + coder->uncompressed_size += mf->read_ahead; + assert(coder->uncompressed_size + <= LZMA2_UNCOMPRESSED_MAX); + mf->read_ahead = 0; + lzma2_header_uncompressed(coder); + coder->need_state_reset = true; + coder->sequence = SEQ_UNCOMPRESSED_HEADER; + break; + } + + // The chunk did compress at least by one byte, so we store + // the chunk as LZMA. + lzma2_header_lzma(coder); + + coder->sequence = SEQ_LZMA_COPY; + } + + // Fall through + + case SEQ_LZMA_COPY: + // Copy the compressed chunk along its headers to the + // output buffer. + lzma_bufcpy(coder->buf, &coder->buf_pos, + coder->compressed_size, + out, out_pos, out_size); + if (coder->buf_pos != coder->compressed_size) + return LZMA_OK; + + coder->sequence = SEQ_INIT; + break; + + case SEQ_UNCOMPRESSED_HEADER: + // Copy the three-byte header to indicate uncompressed chunk. + lzma_bufcpy(coder->buf, &coder->buf_pos, + LZMA2_HEADER_UNCOMPRESSED, + out, out_pos, out_size); + if (coder->buf_pos != LZMA2_HEADER_UNCOMPRESSED) + return LZMA_OK; + + coder->sequence = SEQ_UNCOMPRESSED_COPY; + + // Fall through + + case SEQ_UNCOMPRESSED_COPY: + // Copy the uncompressed data as is from the dictionary + // to the output buffer. + mf_read(mf, out, out_pos, out_size, &coder->uncompressed_size); + if (coder->uncompressed_size != 0) + return LZMA_OK; + + coder->sequence = SEQ_INIT; + break; + } + + return LZMA_OK; +} + + +static void +lzma2_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_free(coder->lzma, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +lzma2_encoder_options_update(lzma_coder *coder, const lzma_filter *filter) +{ + // New options can be set only when there is no incomplete chunk. + // This is the case at the beginning of the raw stream and right + // after LZMA_SYNC_FLUSH. + if (filter->options == NULL || coder->sequence != SEQ_INIT) + return LZMA_PROG_ERROR; + + // Look if there are new options. At least for now, + // only lc/lp/pb can be changed. + const lzma_options_lzma *opt = filter->options; + if (coder->opt_cur.lc != opt->lc || coder->opt_cur.lp != opt->lp + || coder->opt_cur.pb != opt->pb) { + // Validate the options. + if (opt->lc > LZMA_LCLP_MAX || opt->lp > LZMA_LCLP_MAX + || opt->lc + opt->lp > LZMA_LCLP_MAX + || opt->pb > LZMA_PB_MAX) + return LZMA_OPTIONS_ERROR; + + // The new options will be used when the encoder starts + // a new LZMA2 chunk. + coder->opt_cur.lc = opt->lc; + coder->opt_cur.lp = opt->lp; + coder->opt_cur.pb = opt->pb; + coder->need_properties = true; + coder->need_state_reset = true; + } + + return LZMA_OK; +} + + +static lzma_ret +lzma2_encoder_init(lzma_lz_encoder *lz, lzma_allocator *allocator, + const void *options, lzma_lz_options *lz_options) +{ + if (options == NULL) + return LZMA_PROG_ERROR; + + if (lz->coder == NULL) { + lz->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (lz->coder == NULL) + return LZMA_MEM_ERROR; + + lz->code = &lzma2_encode; + lz->end = &lzma2_encoder_end; + lz->options_update = &lzma2_encoder_options_update; + + lz->coder->lzma = NULL; + } + + lz->coder->opt_cur = *(const lzma_options_lzma *)(options); + + lz->coder->sequence = SEQ_INIT; + lz->coder->need_properties = true; + lz->coder->need_state_reset = false; + lz->coder->need_dictionary_reset + = lz->coder->opt_cur.preset_dict == NULL + || lz->coder->opt_cur.preset_dict_size == 0; + + // Initialize LZMA encoder + return_if_error(lzma_lzma_encoder_create(&lz->coder->lzma, allocator, + &lz->coder->opt_cur, lz_options)); + + // Make sure that we will always have enough history available in + // case we need to use uncompressed chunks. They are used when the + // compressed size of a chunk is not smaller than the uncompressed + // size, so we need to have at least LZMA2_COMPRESSED_MAX bytes + // history available. + if (lz_options->before_size + lz_options->dict_size < LZMA2_CHUNK_MAX) + lz_options->before_size + = LZMA2_CHUNK_MAX - lz_options->dict_size; + + return LZMA_OK; +} + + +extern lzma_ret +lzma_lzma2_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return lzma_lz_encoder_init( + next, allocator, filters, &lzma2_encoder_init); +} + + +extern uint64_t +lzma_lzma2_encoder_memusage(const void *options) +{ + const uint64_t lzma_mem = lzma_lzma_encoder_memusage(options); + if (lzma_mem == UINT64_MAX) + return UINT64_MAX; + + return sizeof(lzma_coder) + lzma_mem; +} + + +extern lzma_ret +lzma_lzma2_props_encode(const void *options, uint8_t *out) +{ + const lzma_options_lzma *const opt = options; + uint32_t d = MAX(opt->dict_size, LZMA_DICT_SIZE_MIN); + + // Round up to to the next 2^n - 1 or 2^n + 2^(n - 1) - 1 depending + // on which one is the next: + --d; + d |= d >> 2; + d |= d >> 3; + d |= d >> 4; + d |= d >> 8; + d |= d >> 16; + + // Get the highest two bits using the proper encoding: + if (d == UINT32_MAX) + out[0] = 40; + else + out[0] = get_pos_slot(d + 1) - 24; + + return LZMA_OK; +} diff --git a/src/liblzma/lzma/lzma2_encoder.h b/src/liblzma/lzma/lzma2_encoder.h new file mode 100644 index 000000000000..ca19ef4691cc --- /dev/null +++ b/src/liblzma/lzma/lzma2_encoder.h @@ -0,0 +1,41 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma2_encoder.h +/// \brief LZMA2 encoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZMA2_ENCODER_H +#define LZMA_LZMA2_ENCODER_H + +#include "common.h" + + +/// Maximum number of bytes of actual data per chunk (no headers) +#define LZMA2_CHUNK_MAX (UINT32_C(1) << 16) + +/// Maximum uncompressed size of LZMA chunk (no headers) +#define LZMA2_UNCOMPRESSED_MAX (UINT32_C(1) << 21) + +/// Maximum size of LZMA2 headers +#define LZMA2_HEADER_MAX 6 + +/// Size of a header for uncompressed chunk +#define LZMA2_HEADER_UNCOMPRESSED 3 + + +extern lzma_ret lzma_lzma2_encoder_init( + lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters); + +extern uint64_t lzma_lzma2_encoder_memusage(const void *options); + +extern lzma_ret lzma_lzma2_props_encode(const void *options, uint8_t *out); + +#endif diff --git a/src/liblzma/lzma/lzma_common.h b/src/liblzma/lzma/lzma_common.h new file mode 100644 index 000000000000..e31e285f9a52 --- /dev/null +++ b/src/liblzma/lzma/lzma_common.h @@ -0,0 +1,223 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_common.h +/// \brief Private definitions common to LZMA encoder and decoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZMA_COMMON_H +#define LZMA_LZMA_COMMON_H + +#include "common.h" +#include "range_common.h" + + +/////////////////// +// Miscellaneous // +/////////////////// + +/// Maximum number of position states. A position state is the lowest pos bits +/// number of bits of the current uncompressed offset. In some places there +/// are different sets of probabilities for different pos states. +#define POS_STATES_MAX (1 << LZMA_PB_MAX) + + +/// Validates lc, lp, and pb. +static inline bool +is_lclppb_valid(const lzma_options_lzma *options) +{ + return options->lc <= LZMA_LCLP_MAX && options->lp <= LZMA_LCLP_MAX + && options->lc + options->lp <= LZMA_LCLP_MAX + && options->pb <= LZMA_PB_MAX; +} + + +/////////// +// State // +/////////// + +/// This enum is used to track which events have occurred most recently and +/// in which order. This information is used to predict the next event. +/// +/// Events: +/// - Literal: One 8-bit byte +/// - Match: Repeat a chunk of data at some distance +/// - Long repeat: Multi-byte match at a recently seen distance +/// - Short repeat: One-byte repeat at a recently seen distance +/// +/// The event names are in from STATE_oldest_older_previous. REP means +/// either short or long repeated match, and NONLIT means any non-literal. +typedef enum { + STATE_LIT_LIT, + STATE_MATCH_LIT_LIT, + STATE_REP_LIT_LIT, + STATE_SHORTREP_LIT_LIT, + STATE_MATCH_LIT, + STATE_REP_LIT, + STATE_SHORTREP_LIT, + STATE_LIT_MATCH, + STATE_LIT_LONGREP, + STATE_LIT_SHORTREP, + STATE_NONLIT_MATCH, + STATE_NONLIT_REP, +} lzma_lzma_state; + + +/// Total number of states +#define STATES 12 + +/// The lowest 7 states indicate that the previous state was a literal. +#define LIT_STATES 7 + + +/// Indicate that the latest state was a literal. +#define update_literal(state) \ + state = ((state) <= STATE_SHORTREP_LIT_LIT \ + ? STATE_LIT_LIT \ + : ((state) <= STATE_LIT_SHORTREP \ + ? (state) - 3 \ + : (state) - 6)) + +/// Indicate that the latest state was a match. +#define update_match(state) \ + state = ((state) < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH) + +/// Indicate that the latest state was a long repeated match. +#define update_long_rep(state) \ + state = ((state) < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP) + +/// Indicate that the latest state was a short match. +#define update_short_rep(state) \ + state = ((state) < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP) + +/// Test if the previous state was a literal. +#define is_literal_state(state) \ + ((state) < LIT_STATES) + + +///////////// +// Literal // +///////////// + +/// Each literal coder is divided in three sections: +/// - 0x001-0x0FF: Without match byte +/// - 0x101-0x1FF: With match byte; match bit is 0 +/// - 0x201-0x2FF: With match byte; match bit is 1 +/// +/// Match byte is used when the previous LZMA symbol was something else than +/// a literal (that is, it was some kind of match). +#define LITERAL_CODER_SIZE 0x300 + +/// Maximum number of literal coders +#define LITERAL_CODERS_MAX (1 << LZMA_LCLP_MAX) + +/// Locate the literal coder for the next literal byte. The choice depends on +/// - the lowest literal_pos_bits bits of the position of the current +/// byte; and +/// - the highest literal_context_bits bits of the previous byte. +#define literal_subcoder(probs, lc, lp_mask, pos, prev_byte) \ + ((probs)[(((pos) & lp_mask) << lc) + ((prev_byte) >> (8 - lc))]) + + +static inline void +literal_init(probability (*probs)[LITERAL_CODER_SIZE], + uint32_t lc, uint32_t lp) +{ + assert(lc + lp <= LZMA_LCLP_MAX); + + const uint32_t coders = 1U << (lc + lp); + + for (uint32_t i = 0; i < coders; ++i) + for (uint32_t j = 0; j < LITERAL_CODER_SIZE; ++j) + bit_reset(probs[i][j]); + + return; +} + + +////////////////// +// Match length // +////////////////// + +// Minimum length of a match is two bytes. +#define MATCH_LEN_MIN 2 + +// Match length is encoded with 4, 5, or 10 bits. +// +// Length Bits +// 2-9 4 = Choice=0 + 3 bits +// 10-17 5 = Choice=1 + Choice2=0 + 3 bits +// 18-273 10 = Choice=1 + Choice2=1 + 8 bits +#define LEN_LOW_BITS 3 +#define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS) +#define LEN_MID_BITS 3 +#define LEN_MID_SYMBOLS (1 << LEN_MID_BITS) +#define LEN_HIGH_BITS 8 +#define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS) +#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS) + +// Maximum length of a match is 273 which is a result of the encoding +// described above. +#define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1) + + +//////////////////// +// Match distance // +//////////////////// + +// Different set of probabilities is used for match distances that have very +// short match length: Lengths of 2, 3, and 4 bytes have a separate set of +// probabilities for each length. The matches with longer length use a shared +// set of probabilities. +#define LEN_TO_POS_STATES 4 + +// Macro to get the index of the appropriate probability array. +#define get_len_to_pos_state(len) \ + ((len) < LEN_TO_POS_STATES + MATCH_LEN_MIN \ + ? (len) - MATCH_LEN_MIN \ + : LEN_TO_POS_STATES - 1) + +// The highest two bits of a match distance (pos slot) are encoded using six +// bits. See fastpos.h for more explanation. +#define POS_SLOT_BITS 6 +#define POS_SLOTS (1 << POS_SLOT_BITS) + +// Match distances up to 127 are fully encoded using probabilities. Since +// the highest two bits (pos slot) are always encoded using six bits, the +// distances 0-3 don't need any additional bits to encode, since the pos +// slot itself is the same as the actual distance. START_POS_MODEL_INDEX +// indicates the first pos slot where at least one additional bit is needed. +#define START_POS_MODEL_INDEX 4 + +// Match distances greater than 127 are encoded in three pieces: +// - pos slot: the highest two bits +// - direct bits: 2-26 bits below the highest two bits +// - alignment bits: four lowest bits +// +// Direct bits don't use any probabilities. +// +// The pos slot value of 14 is for distances 128-191 (see the table in +// fastpos.h to understand why). +#define END_POS_MODEL_INDEX 14 + +// Pos slots that indicate a distance <= 127. +#define FULL_DISTANCES_BITS (END_POS_MODEL_INDEX / 2) +#define FULL_DISTANCES (1 << FULL_DISTANCES_BITS) + +// For match distances greater than 127, only the highest two bits and the +// lowest four bits (alignment) is encoded using probabilities. +#define ALIGN_BITS 4 +#define ALIGN_TABLE_SIZE (1 << ALIGN_BITS) +#define ALIGN_MASK (ALIGN_TABLE_SIZE - 1) + +// LZMA remembers the four most recent match distances. Reusing these distances +// tends to take less space than re-encoding the actual distance value. +#define REP_DISTANCES 4 + +#endif diff --git a/src/liblzma/lzma/lzma_decoder.c b/src/liblzma/lzma/lzma_decoder.c new file mode 100644 index 000000000000..4329e0199273 --- /dev/null +++ b/src/liblzma/lzma/lzma_decoder.c @@ -0,0 +1,1057 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_decoder.c +/// \brief LZMA decoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lz_decoder.h" +#include "lzma_common.h" +#include "lzma_decoder.h" +#include "range_decoder.h" + + +#ifdef HAVE_SMALL + +// Macros for (somewhat) size-optimized code. +#define seq_4(seq) seq + +#define seq_6(seq) seq + +#define seq_8(seq) seq + +#define seq_len(seq) \ + seq ## _CHOICE, \ + seq ## _CHOICE2, \ + seq ## _BITTREE + +#define len_decode(target, ld, pos_state, seq) \ +do { \ +case seq ## _CHOICE: \ + rc_if_0(ld.choice, seq ## _CHOICE) { \ + rc_update_0(ld.choice); \ + probs = ld.low[pos_state];\ + limit = LEN_LOW_SYMBOLS; \ + target = MATCH_LEN_MIN; \ + } else { \ + rc_update_1(ld.choice); \ +case seq ## _CHOICE2: \ + rc_if_0(ld.choice2, seq ## _CHOICE2) { \ + rc_update_0(ld.choice2); \ + probs = ld.mid[pos_state]; \ + limit = LEN_MID_SYMBOLS; \ + target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \ + } else { \ + rc_update_1(ld.choice2); \ + probs = ld.high; \ + limit = LEN_HIGH_SYMBOLS; \ + target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS \ + + LEN_MID_SYMBOLS; \ + } \ + } \ + symbol = 1; \ +case seq ## _BITTREE: \ + do { \ + rc_bit(probs[symbol], , , seq ## _BITTREE); \ + } while (symbol < limit); \ + target += symbol - limit; \ +} while (0) + +#else // HAVE_SMALL + +// Unrolled versions +#define seq_4(seq) \ + seq ## 0, \ + seq ## 1, \ + seq ## 2, \ + seq ## 3 + +#define seq_6(seq) \ + seq ## 0, \ + seq ## 1, \ + seq ## 2, \ + seq ## 3, \ + seq ## 4, \ + seq ## 5 + +#define seq_8(seq) \ + seq ## 0, \ + seq ## 1, \ + seq ## 2, \ + seq ## 3, \ + seq ## 4, \ + seq ## 5, \ + seq ## 6, \ + seq ## 7 + +#define seq_len(seq) \ + seq ## _CHOICE, \ + seq ## _LOW0, \ + seq ## _LOW1, \ + seq ## _LOW2, \ + seq ## _CHOICE2, \ + seq ## _MID0, \ + seq ## _MID1, \ + seq ## _MID2, \ + seq ## _HIGH0, \ + seq ## _HIGH1, \ + seq ## _HIGH2, \ + seq ## _HIGH3, \ + seq ## _HIGH4, \ + seq ## _HIGH5, \ + seq ## _HIGH6, \ + seq ## _HIGH7 + +#define len_decode(target, ld, pos_state, seq) \ +do { \ + symbol = 1; \ +case seq ## _CHOICE: \ + rc_if_0(ld.choice, seq ## _CHOICE) { \ + rc_update_0(ld.choice); \ + rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW0); \ + rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW1); \ + rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW2); \ + target = symbol - LEN_LOW_SYMBOLS + MATCH_LEN_MIN; \ + } else { \ + rc_update_1(ld.choice); \ +case seq ## _CHOICE2: \ + rc_if_0(ld.choice2, seq ## _CHOICE2) { \ + rc_update_0(ld.choice2); \ + rc_bit_case(ld.mid[pos_state][symbol], , , \ + seq ## _MID0); \ + rc_bit_case(ld.mid[pos_state][symbol], , , \ + seq ## _MID1); \ + rc_bit_case(ld.mid[pos_state][symbol], , , \ + seq ## _MID2); \ + target = symbol - LEN_MID_SYMBOLS \ + + MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \ + } else { \ + rc_update_1(ld.choice2); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH0); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH1); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH2); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH3); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH4); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH5); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH6); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH7); \ + target = symbol - LEN_HIGH_SYMBOLS \ + + MATCH_LEN_MIN \ + + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \ + } \ + } \ +} while (0) + +#endif // HAVE_SMALL + + +/// Length decoder probabilities; see comments in lzma_common.h. +typedef struct { + probability choice; + probability choice2; + probability low[POS_STATES_MAX][LEN_LOW_SYMBOLS]; + probability mid[POS_STATES_MAX][LEN_MID_SYMBOLS]; + probability high[LEN_HIGH_SYMBOLS]; +} lzma_length_decoder; + + +struct lzma_coder_s { + /////////////////// + // Probabilities // + /////////////////// + + /// Literals; see comments in lzma_common.h. + probability literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE]; + + /// If 1, it's a match. Otherwise it's a single 8-bit literal. + probability is_match[STATES][POS_STATES_MAX]; + + /// If 1, it's a repeated match. The distance is one of rep0 .. rep3. + probability is_rep[STATES]; + + /// If 0, distance of a repeated match is rep0. + /// Otherwise check is_rep1. + probability is_rep0[STATES]; + + /// If 0, distance of a repeated match is rep1. + /// Otherwise check is_rep2. + probability is_rep1[STATES]; + + /// If 0, distance of a repeated match is rep2. Otherwise it is rep3. + probability is_rep2[STATES]; + + /// If 1, the repeated match has length of one byte. Otherwise + /// the length is decoded from rep_len_decoder. + probability is_rep0_long[STATES][POS_STATES_MAX]; + + /// Probability tree for the highest two bits of the match distance. + /// There is a separate probability tree for match lengths of + /// 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273]. + probability pos_slot[LEN_TO_POS_STATES][POS_SLOTS]; + + /// Probability trees for additional bits for match distance when the + /// distance is in the range [4, 127]. + probability pos_special[FULL_DISTANCES - END_POS_MODEL_INDEX]; + + /// Probability tree for the lowest four bits of a match distance + /// that is equal to or greater than 128. + probability pos_align[ALIGN_TABLE_SIZE]; + + /// Length of a normal match + lzma_length_decoder match_len_decoder; + + /// Length of a repeated match + lzma_length_decoder rep_len_decoder; + + /////////////////// + // Decoder state // + /////////////////// + + // Range coder + lzma_range_decoder rc; + + // Types of the most recently seen LZMA symbols + lzma_lzma_state state; + + uint32_t rep0; ///< Distance of the latest match + uint32_t rep1; ///< Distance of second latest match + uint32_t rep2; ///< Distance of third latest match + uint32_t rep3; ///< Distance of fourth latest match + + uint32_t pos_mask; // (1U << pb) - 1 + uint32_t literal_context_bits; + uint32_t literal_pos_mask; + + /// Uncompressed size as bytes, or LZMA_VLI_UNKNOWN if end of + /// payload marker is expected. + lzma_vli uncompressed_size; + + //////////////////////////////// + // State of incomplete symbol // + //////////////////////////////// + + /// Position where to continue the decoder loop + enum { + SEQ_NORMALIZE, + SEQ_IS_MATCH, + seq_8(SEQ_LITERAL), + seq_8(SEQ_LITERAL_MATCHED), + SEQ_LITERAL_WRITE, + SEQ_IS_REP, + seq_len(SEQ_MATCH_LEN), + seq_6(SEQ_POS_SLOT), + SEQ_POS_MODEL, + SEQ_DIRECT, + seq_4(SEQ_ALIGN), + SEQ_EOPM, + SEQ_IS_REP0, + SEQ_SHORTREP, + SEQ_IS_REP0_LONG, + SEQ_IS_REP1, + SEQ_IS_REP2, + seq_len(SEQ_REP_LEN), + SEQ_COPY, + } sequence; + + /// Base of the current probability tree + probability *probs; + + /// Symbol being decoded. This is also used as an index variable in + /// bittree decoders: probs[symbol] + uint32_t symbol; + + /// Used as a loop termination condition on bittree decoders and + /// direct bits decoder. + uint32_t limit; + + /// Matched literal decoder: 0x100 or 0 to help avoiding branches. + /// Bittree reverse decoders: Offset of the next bit: 1 << offset + uint32_t offset; + + /// If decoding a literal: match byte. + /// If decoding a match: length of the match. + uint32_t len; +}; + + +static lzma_ret +lzma_decode(lzma_coder *restrict coder, lzma_dict *restrict dictptr, + const uint8_t *restrict in, + size_t *restrict in_pos, size_t in_size) +{ + //////////////////// + // Initialization // + //////////////////// + + if (!rc_read_init(&coder->rc, in, in_pos, in_size)) + return LZMA_OK; + + /////////////// + // Variables // + /////////////// + + // Making local copies of often-used variables improves both + // speed and readability. + + lzma_dict dict = *dictptr; + + const size_t dict_start = dict.pos; + + // Range decoder + rc_to_local(coder->rc, *in_pos); + + // State + uint32_t state = coder->state; + uint32_t rep0 = coder->rep0; + uint32_t rep1 = coder->rep1; + uint32_t rep2 = coder->rep2; + uint32_t rep3 = coder->rep3; + + const uint32_t pos_mask = coder->pos_mask; + + // These variables are actually needed only if we last time ran + // out of input in the middle of the decoder loop. + probability *probs = coder->probs; + uint32_t symbol = coder->symbol; + uint32_t limit = coder->limit; + uint32_t offset = coder->offset; + uint32_t len = coder->len; + + const uint32_t literal_pos_mask = coder->literal_pos_mask; + const uint32_t literal_context_bits = coder->literal_context_bits; + + // Temporary variables + uint32_t pos_state = dict.pos & pos_mask; + + lzma_ret ret = LZMA_OK; + + // If uncompressed size is known, there must be no end of payload + // marker. + const bool no_eopm = coder->uncompressed_size + != LZMA_VLI_UNKNOWN; + if (no_eopm && coder->uncompressed_size < dict.limit - dict.pos) + dict.limit = dict.pos + (size_t)(coder->uncompressed_size); + + // The main decoder loop. The "switch" is used to restart the decoder at + // correct location. Once restarted, the "switch" is no longer used. + switch (coder->sequence) + while (true) { + // Calculate new pos_state. This is skipped on the first loop + // since we already calculated it when setting up the local + // variables. + pos_state = dict.pos & pos_mask; + + case SEQ_NORMALIZE: + case SEQ_IS_MATCH: + if (unlikely(no_eopm && dict.pos == dict.limit)) + break; + + rc_if_0(coder->is_match[state][pos_state], SEQ_IS_MATCH) { + rc_update_0(coder->is_match[state][pos_state]); + + // It's a literal i.e. a single 8-bit byte. + + probs = literal_subcoder(coder->literal, + literal_context_bits, literal_pos_mask, + dict.pos, dict_get(&dict, 0)); + symbol = 1; + + if (is_literal_state(state)) { + // Decode literal without match byte. +#ifdef HAVE_SMALL + case SEQ_LITERAL: + do { + rc_bit(probs[symbol], , , SEQ_LITERAL); + } while (symbol < (1 << 8)); +#else + rc_bit_case(probs[symbol], , , SEQ_LITERAL0); + rc_bit_case(probs[symbol], , , SEQ_LITERAL1); + rc_bit_case(probs[symbol], , , SEQ_LITERAL2); + rc_bit_case(probs[symbol], , , SEQ_LITERAL3); + rc_bit_case(probs[symbol], , , SEQ_LITERAL4); + rc_bit_case(probs[symbol], , , SEQ_LITERAL5); + rc_bit_case(probs[symbol], , , SEQ_LITERAL6); + rc_bit_case(probs[symbol], , , SEQ_LITERAL7); +#endif + } else { + // Decode literal with match byte. + // + // We store the byte we compare against + // ("match byte") to "len" to minimize the + // number of variables we need to store + // between decoder calls. + len = dict_get(&dict, rep0) << 1; + + // The usage of "offset" allows omitting some + // branches, which should give tiny speed + // improvement on some CPUs. "offset" gets + // set to zero if match_bit didn't match. + offset = 0x100; + +#ifdef HAVE_SMALL + case SEQ_LITERAL_MATCHED: + do { + const uint32_t match_bit + = len & offset; + const uint32_t subcoder_index + = offset + match_bit + + symbol; + + rc_bit(probs[subcoder_index], + offset &= ~match_bit, + offset &= match_bit, + SEQ_LITERAL_MATCHED); + + // It seems to be faster to do this + // here instead of putting it to the + // beginning of the loop and then + // putting the "case" in the middle + // of the loop. + len <<= 1; + + } while (symbol < (1 << 8)); +#else + // Unroll the loop. + uint32_t match_bit; + uint32_t subcoder_index; + +# define d(seq) \ + case seq: \ + match_bit = len & offset; \ + subcoder_index = offset + match_bit + symbol; \ + rc_bit(probs[subcoder_index], \ + offset &= ~match_bit, \ + offset &= match_bit, \ + seq) + + d(SEQ_LITERAL_MATCHED0); + len <<= 1; + d(SEQ_LITERAL_MATCHED1); + len <<= 1; + d(SEQ_LITERAL_MATCHED2); + len <<= 1; + d(SEQ_LITERAL_MATCHED3); + len <<= 1; + d(SEQ_LITERAL_MATCHED4); + len <<= 1; + d(SEQ_LITERAL_MATCHED5); + len <<= 1; + d(SEQ_LITERAL_MATCHED6); + len <<= 1; + d(SEQ_LITERAL_MATCHED7); +# undef d +#endif + } + + //update_literal(state); + // Use a lookup table to update to literal state, + // since compared to other state updates, this would + // need two branches. + static const lzma_lzma_state next_state[] = { + STATE_LIT_LIT, + STATE_LIT_LIT, + STATE_LIT_LIT, + STATE_LIT_LIT, + STATE_MATCH_LIT_LIT, + STATE_REP_LIT_LIT, + STATE_SHORTREP_LIT_LIT, + STATE_MATCH_LIT, + STATE_REP_LIT, + STATE_SHORTREP_LIT, + STATE_MATCH_LIT, + STATE_REP_LIT + }; + state = next_state[state]; + + case SEQ_LITERAL_WRITE: + if (unlikely(dict_put(&dict, symbol))) { + coder->sequence = SEQ_LITERAL_WRITE; + goto out; + } + + continue; + } + + // Instead of a new byte we are going to get a byte range + // (distance and length) which will be repeated from our + // output history. + + rc_update_1(coder->is_match[state][pos_state]); + + case SEQ_IS_REP: + rc_if_0(coder->is_rep[state], SEQ_IS_REP) { + // Not a repeated match + rc_update_0(coder->is_rep[state]); + update_match(state); + + // The latest three match distances are kept in + // memory in case there are repeated matches. + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + + // Decode the length of the match. + len_decode(len, coder->match_len_decoder, + pos_state, SEQ_MATCH_LEN); + + // Prepare to decode the highest two bits of the + // match distance. + probs = coder->pos_slot[get_len_to_pos_state(len)]; + symbol = 1; + +#ifdef HAVE_SMALL + case SEQ_POS_SLOT: + do { + rc_bit(probs[symbol], , , SEQ_POS_SLOT); + } while (symbol < POS_SLOTS); +#else + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT0); + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT1); + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT2); + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT3); + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT4); + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT5); +#endif + // Get rid of the highest bit that was needed for + // indexing of the probability array. + symbol -= POS_SLOTS; + assert(symbol <= 63); + + if (symbol < START_POS_MODEL_INDEX) { + // Match distances [0, 3] have only two bits. + rep0 = symbol; + } else { + // Decode the lowest [1, 29] bits of + // the match distance. + limit = (symbol >> 1) - 1; + assert(limit >= 1 && limit <= 30); + rep0 = 2 + (symbol & 1); + + if (symbol < END_POS_MODEL_INDEX) { + // Prepare to decode the low bits for + // a distance of [4, 127]. + assert(limit <= 5); + rep0 <<= limit; + assert(rep0 <= 96); + // -1 is fine, because we start + // decoding at probs[1], not probs[0]. + // NOTE: This violates the C standard, + // since we are doing pointer + // arithmetic past the beginning of + // the array. + assert((int32_t)(rep0 - symbol - 1) + >= -1); + assert((int32_t)(rep0 - symbol - 1) + <= 82); + probs = coder->pos_special + rep0 + - symbol - 1; + symbol = 1; + offset = 0; + case SEQ_POS_MODEL: +#ifdef HAVE_SMALL + do { + rc_bit(probs[symbol], , + rep0 += 1 << offset, + SEQ_POS_MODEL); + } while (++offset < limit); +#else + switch (limit) { + case 5: + assert(offset == 0); + rc_bit(probs[symbol], , + rep0 += 1, + SEQ_POS_MODEL); + ++offset; + --limit; + case 4: + rc_bit(probs[symbol], , + rep0 += 1 << offset, + SEQ_POS_MODEL); + ++offset; + --limit; + case 3: + rc_bit(probs[symbol], , + rep0 += 1 << offset, + SEQ_POS_MODEL); + ++offset; + --limit; + case 2: + rc_bit(probs[symbol], , + rep0 += 1 << offset, + SEQ_POS_MODEL); + ++offset; + --limit; + case 1: + // We need "symbol" only for + // indexing the probability + // array, thus we can use + // rc_bit_last() here to omit + // the unneeded updating of + // "symbol". + rc_bit_last(probs[symbol], , + rep0 += 1 << offset, + SEQ_POS_MODEL); + } +#endif + } else { + // The distance is >= 128. Decode the + // lower bits without probabilities + // except the lowest four bits. + assert(symbol >= 14); + assert(limit >= 6); + limit -= ALIGN_BITS; + assert(limit >= 2); + case SEQ_DIRECT: + // Not worth manual unrolling + do { + rc_direct(rep0, SEQ_DIRECT); + } while (--limit > 0); + + // Decode the lowest four bits using + // probabilities. + rep0 <<= ALIGN_BITS; + symbol = 1; +#ifdef HAVE_SMALL + offset = 0; + case SEQ_ALIGN: + do { + rc_bit(coder->pos_align[ + symbol], , + rep0 += 1 << offset, + SEQ_ALIGN); + } while (++offset < ALIGN_BITS); +#else + case SEQ_ALIGN0: + rc_bit(coder->pos_align[symbol], , + rep0 += 1, SEQ_ALIGN0); + case SEQ_ALIGN1: + rc_bit(coder->pos_align[symbol], , + rep0 += 2, SEQ_ALIGN1); + case SEQ_ALIGN2: + rc_bit(coder->pos_align[symbol], , + rep0 += 4, SEQ_ALIGN2); + case SEQ_ALIGN3: + // Like in SEQ_POS_MODEL, we don't + // need "symbol" for anything else + // than indexing the probability array. + rc_bit_last(coder->pos_align[symbol], , + rep0 += 8, SEQ_ALIGN3); +#endif + + if (rep0 == UINT32_MAX) { + // End of payload marker was + // found. It must not be + // present if uncompressed + // size is known. + if (coder->uncompressed_size + != LZMA_VLI_UNKNOWN) { + ret = LZMA_DATA_ERROR; + goto out; + } + + case SEQ_EOPM: + // TODO Comment + rc_normalize(SEQ_EOPM); + ret = LZMA_STREAM_END; + goto out; + } + } + } + + // Validate the distance we just decoded. + if (unlikely(!dict_is_distance_valid(&dict, rep0))) { + ret = LZMA_DATA_ERROR; + goto out; + } + + } else { + rc_update_1(coder->is_rep[state]); + + // Repeated match + // + // The match distance is a value that we have had + // earlier. The latest four match distances are + // available as rep0, rep1, rep2 and rep3. We will + // now decode which of them is the new distance. + // + // There cannot be a match if we haven't produced + // any output, so check that first. + if (unlikely(!dict_is_distance_valid(&dict, 0))) { + ret = LZMA_DATA_ERROR; + goto out; + } + + case SEQ_IS_REP0: + rc_if_0(coder->is_rep0[state], SEQ_IS_REP0) { + rc_update_0(coder->is_rep0[state]); + // The distance is rep0. + + case SEQ_IS_REP0_LONG: + rc_if_0(coder->is_rep0_long[state][pos_state], + SEQ_IS_REP0_LONG) { + rc_update_0(coder->is_rep0_long[ + state][pos_state]); + + update_short_rep(state); + + case SEQ_SHORTREP: + if (unlikely(dict_put(&dict, dict_get( + &dict, rep0)))) { + coder->sequence = SEQ_SHORTREP; + goto out; + } + + continue; + } + + // Repeating more than one byte at + // distance of rep0. + rc_update_1(coder->is_rep0_long[ + state][pos_state]); + + } else { + rc_update_1(coder->is_rep0[state]); + + case SEQ_IS_REP1: + // The distance is rep1, rep2 or rep3. Once + // we find out which one of these three, it + // is stored to rep0 and rep1, rep2 and rep3 + // are updated accordingly. + rc_if_0(coder->is_rep1[state], SEQ_IS_REP1) { + rc_update_0(coder->is_rep1[state]); + + const uint32_t distance = rep1; + rep1 = rep0; + rep0 = distance; + + } else { + rc_update_1(coder->is_rep1[state]); + case SEQ_IS_REP2: + rc_if_0(coder->is_rep2[state], + SEQ_IS_REP2) { + rc_update_0(coder->is_rep2[ + state]); + + const uint32_t distance = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = distance; + + } else { + rc_update_1(coder->is_rep2[ + state]); + + const uint32_t distance = rep3; + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = distance; + } + } + } + + update_long_rep(state); + + // Decode the length of the repeated match. + len_decode(len, coder->rep_len_decoder, + pos_state, SEQ_REP_LEN); + } + + ///////////////////////////////// + // Repeat from history buffer. // + ///////////////////////////////// + + // The length is always between these limits. There is no way + // to trigger the algorithm to set len outside this range. + assert(len >= MATCH_LEN_MIN); + assert(len <= MATCH_LEN_MAX); + + case SEQ_COPY: + // Repeat len bytes from distance of rep0. + if (unlikely(dict_repeat(&dict, rep0, &len))) { + coder->sequence = SEQ_COPY; + goto out; + } + } + + rc_normalize(SEQ_NORMALIZE); + coder->sequence = SEQ_IS_MATCH; + +out: + // Save state + + // NOTE: Must not copy dict.limit. + dictptr->pos = dict.pos; + dictptr->full = dict.full; + + rc_from_local(coder->rc, *in_pos); + + coder->state = state; + coder->rep0 = rep0; + coder->rep1 = rep1; + coder->rep2 = rep2; + coder->rep3 = rep3; + + coder->probs = probs; + coder->symbol = symbol; + coder->limit = limit; + coder->offset = offset; + coder->len = len; + + // Update the remaining amount of uncompressed data if uncompressed + // size was known. + if (coder->uncompressed_size != LZMA_VLI_UNKNOWN) { + coder->uncompressed_size -= dict.pos - dict_start; + + // Since there cannot be end of payload marker if the + // uncompressed size was known, we check here if we + // finished decoding. + if (coder->uncompressed_size == 0 && ret == LZMA_OK + && coder->sequence != SEQ_NORMALIZE) + ret = coder->sequence == SEQ_IS_MATCH + ? LZMA_STREAM_END : LZMA_DATA_ERROR; + } + + // We can do an additional check in the range decoder to catch some + // corrupted files. + if (ret == LZMA_STREAM_END) { + if (!rc_is_finished(coder->rc)) + ret = LZMA_DATA_ERROR; + + // Reset the range decoder so that it is ready to reinitialize + // for a new LZMA2 chunk. + rc_reset(coder->rc); + } + + return ret; +} + + + +static void +lzma_decoder_uncompressed(lzma_coder *coder, lzma_vli uncompressed_size) +{ + coder->uncompressed_size = uncompressed_size; +} + +/* +extern void +lzma_lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size) +{ + // This is hack. + (*(lzma_coder **)(coder))->uncompressed_size = uncompressed_size; +} +*/ + +static void +lzma_decoder_reset(lzma_coder *coder, const void *opt) +{ + const lzma_options_lzma *options = opt; + + // NOTE: We assume that lc/lp/pb are valid since they were + // successfully decoded with lzma_lzma_decode_properties(). + // FIXME? + + // Calculate pos_mask. We don't need pos_bits as is for anything. + coder->pos_mask = (1U << options->pb) - 1; + + // Initialize the literal decoder. + literal_init(coder->literal, options->lc, options->lp); + + coder->literal_context_bits = options->lc; + coder->literal_pos_mask = (1U << options->lp) - 1; + + // State + coder->state = STATE_LIT_LIT; + coder->rep0 = 0; + coder->rep1 = 0; + coder->rep2 = 0; + coder->rep3 = 0; + coder->pos_mask = (1U << options->pb) - 1; + + // Range decoder + rc_reset(coder->rc); + + // Bit and bittree decoders + for (uint32_t i = 0; i < STATES; ++i) { + for (uint32_t j = 0; j <= coder->pos_mask; ++j) { + bit_reset(coder->is_match[i][j]); + bit_reset(coder->is_rep0_long[i][j]); + } + + bit_reset(coder->is_rep[i]); + bit_reset(coder->is_rep0[i]); + bit_reset(coder->is_rep1[i]); + bit_reset(coder->is_rep2[i]); + } + + for (uint32_t i = 0; i < LEN_TO_POS_STATES; ++i) + bittree_reset(coder->pos_slot[i], POS_SLOT_BITS); + + for (uint32_t i = 0; i < FULL_DISTANCES - END_POS_MODEL_INDEX; ++i) + bit_reset(coder->pos_special[i]); + + bittree_reset(coder->pos_align, ALIGN_BITS); + + // Len decoders (also bit/bittree) + const uint32_t num_pos_states = 1U << options->pb; + bit_reset(coder->match_len_decoder.choice); + bit_reset(coder->match_len_decoder.choice2); + bit_reset(coder->rep_len_decoder.choice); + bit_reset(coder->rep_len_decoder.choice2); + + for (uint32_t pos_state = 0; pos_state < num_pos_states; ++pos_state) { + bittree_reset(coder->match_len_decoder.low[pos_state], + LEN_LOW_BITS); + bittree_reset(coder->match_len_decoder.mid[pos_state], + LEN_MID_BITS); + + bittree_reset(coder->rep_len_decoder.low[pos_state], + LEN_LOW_BITS); + bittree_reset(coder->rep_len_decoder.mid[pos_state], + LEN_MID_BITS); + } + + bittree_reset(coder->match_len_decoder.high, LEN_HIGH_BITS); + bittree_reset(coder->rep_len_decoder.high, LEN_HIGH_BITS); + + coder->sequence = SEQ_IS_MATCH; + coder->probs = NULL; + coder->symbol = 0; + coder->limit = 0; + coder->offset = 0; + coder->len = 0; + + return; +} + + +extern lzma_ret +lzma_lzma_decoder_create(lzma_lz_decoder *lz, lzma_allocator *allocator, + const void *opt, lzma_lz_options *lz_options) +{ + if (lz->coder == NULL) { + lz->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (lz->coder == NULL) + return LZMA_MEM_ERROR; + + lz->code = &lzma_decode; + lz->reset = &lzma_decoder_reset; + lz->set_uncompressed = &lzma_decoder_uncompressed; + } + + // All dictionary sizes are OK here. LZ decoder will take care of + // the special cases. + const lzma_options_lzma *options = opt; + lz_options->dict_size = options->dict_size; + lz_options->preset_dict = options->preset_dict; + lz_options->preset_dict_size = options->preset_dict_size; + + return LZMA_OK; +} + + +/// Allocate and initialize LZMA decoder. This is used only via LZ +/// initialization (lzma_lzma_decoder_init() passes function pointer to +/// the LZ initialization). +static lzma_ret +lzma_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator, + const void *options, lzma_lz_options *lz_options) +{ + if (!is_lclppb_valid(options)) + return LZMA_PROG_ERROR; + + return_if_error(lzma_lzma_decoder_create( + lz, allocator, options, lz_options)); + + lzma_decoder_reset(lz->coder, options); + lzma_decoder_uncompressed(lz->coder, LZMA_VLI_UNKNOWN); + + return LZMA_OK; +} + + +extern lzma_ret +lzma_lzma_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + // LZMA can only be the last filter in the chain. This is enforced + // by the raw_decoder initialization. + assert(filters[1].init == NULL); + + return lzma_lz_decoder_init(next, allocator, filters, + &lzma_decoder_init); +} + + +extern bool +lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte) +{ + if (byte > (4 * 5 + 4) * 9 + 8) + return true; + + // See the file format specification to understand this. + options->pb = byte / (9 * 5); + byte -= options->pb * 9 * 5; + options->lp = byte / 9; + options->lc = byte - options->lp * 9; + + return options->lc + options->lp > LZMA_LCLP_MAX; +} + + +extern uint64_t +lzma_lzma_decoder_memusage_nocheck(const void *options) +{ + const lzma_options_lzma *const opt = options; + return sizeof(lzma_coder) + lzma_lz_decoder_memusage(opt->dict_size); +} + + +extern uint64_t +lzma_lzma_decoder_memusage(const void *options) +{ + if (!is_lclppb_valid(options)) + return UINT64_MAX; + + return lzma_lzma_decoder_memusage_nocheck(options); +} + + +extern lzma_ret +lzma_lzma_props_decode(void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size) +{ + if (props_size != 5) + return LZMA_OPTIONS_ERROR; + + lzma_options_lzma *opt + = lzma_alloc(sizeof(lzma_options_lzma), allocator); + if (opt == NULL) + return LZMA_MEM_ERROR; + + if (lzma_lzma_lclppb_decode(opt, props[0])) + goto error; + + // All dictionary sizes are accepted, including zero. LZ decoder + // will automatically use a dictionary at least a few KiB even if + // a smaller dictionary is requested. + opt->dict_size = unaligned_read32le(props + 1); + + opt->preset_dict = NULL; + opt->preset_dict_size = 0; + + *options = opt; + + return LZMA_OK; + +error: + lzma_free(opt, allocator); + return LZMA_OPTIONS_ERROR; +} diff --git a/src/liblzma/lzma/lzma_decoder.h b/src/liblzma/lzma/lzma_decoder.h new file mode 100644 index 000000000000..a463a76fc694 --- /dev/null +++ b/src/liblzma/lzma/lzma_decoder.h @@ -0,0 +1,52 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_decoder.h +/// \brief LZMA decoder API +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZMA_DECODER_H +#define LZMA_LZMA_DECODER_H + +#include "common.h" + + +/// Allocates and initializes LZMA decoder +extern lzma_ret lzma_lzma_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern uint64_t lzma_lzma_decoder_memusage(const void *options); + +extern lzma_ret lzma_lzma_props_decode( + void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size); + + +/// \brief Decodes the LZMA Properties byte (lc/lp/pb) +/// +/// \return true if error occurred, false on success +/// +extern bool lzma_lzma_lclppb_decode( + lzma_options_lzma *options, uint8_t byte); + + +#ifdef LZMA_LZ_DECODER_H +/// Allocate and setup function pointers only. This is used by LZMA1 and +/// LZMA2 decoders. +extern lzma_ret lzma_lzma_decoder_create( + lzma_lz_decoder *lz, lzma_allocator *allocator, + const void *opt, lzma_lz_options *lz_options); + +/// Gets memory usage without validating lc/lp/pb. This is used by LZMA2 +/// decoder, because raw LZMA2 decoding doesn't need lc/lp/pb. +extern uint64_t lzma_lzma_decoder_memusage_nocheck(const void *options); + +#endif + +#endif diff --git a/src/liblzma/lzma/lzma_encoder.c b/src/liblzma/lzma/lzma_encoder.c new file mode 100644 index 000000000000..0fe992d510a1 --- /dev/null +++ b/src/liblzma/lzma/lzma_encoder.c @@ -0,0 +1,675 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_encoder.c +/// \brief LZMA encoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lzma2_encoder.h" +#include "lzma_encoder_private.h" +#include "fastpos.h" + + +///////////// +// Literal // +///////////// + +static inline void +literal_matched(lzma_range_encoder *rc, probability *subcoder, + uint32_t match_byte, uint32_t symbol) +{ + uint32_t offset = 0x100; + symbol += UINT32_C(1) << 8; + + do { + match_byte <<= 1; + const uint32_t match_bit = match_byte & offset; + const uint32_t subcoder_index + = offset + match_bit + (symbol >> 8); + const uint32_t bit = (symbol >> 7) & 1; + rc_bit(rc, &subcoder[subcoder_index], bit); + + symbol <<= 1; + offset &= ~(match_byte ^ symbol); + + } while (symbol < (UINT32_C(1) << 16)); +} + + +static inline void +literal(lzma_coder *coder, lzma_mf *mf, uint32_t position) +{ + // Locate the literal byte to be encoded and the subcoder. + const uint8_t cur_byte = mf->buffer[ + mf->read_pos - mf->read_ahead]; + probability *subcoder = literal_subcoder(coder->literal, + coder->literal_context_bits, coder->literal_pos_mask, + position, mf->buffer[mf->read_pos - mf->read_ahead - 1]); + + if (is_literal_state(coder->state)) { + // Previous LZMA-symbol was a literal. Encode a normal + // literal without a match byte. + rc_bittree(&coder->rc, subcoder, 8, cur_byte); + } else { + // Previous LZMA-symbol was a match. Use the last byte of + // the match as a "match byte". That is, compare the bits + // of the current literal and the match byte. + const uint8_t match_byte = mf->buffer[ + mf->read_pos - coder->reps[0] - 1 + - mf->read_ahead]; + literal_matched(&coder->rc, subcoder, match_byte, cur_byte); + } + + update_literal(coder->state); +} + + +////////////////// +// Match length // +////////////////// + +static void +length_update_prices(lzma_length_encoder *lc, const uint32_t pos_state) +{ + const uint32_t table_size = lc->table_size; + lc->counters[pos_state] = table_size; + + const uint32_t a0 = rc_bit_0_price(lc->choice); + const uint32_t a1 = rc_bit_1_price(lc->choice); + const uint32_t b0 = a1 + rc_bit_0_price(lc->choice2); + const uint32_t b1 = a1 + rc_bit_1_price(lc->choice2); + uint32_t *const prices = lc->prices[pos_state]; + + uint32_t i; + for (i = 0; i < table_size && i < LEN_LOW_SYMBOLS; ++i) + prices[i] = a0 + rc_bittree_price(lc->low[pos_state], + LEN_LOW_BITS, i); + + for (; i < table_size && i < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; ++i) + prices[i] = b0 + rc_bittree_price(lc->mid[pos_state], + LEN_MID_BITS, i - LEN_LOW_SYMBOLS); + + for (; i < table_size; ++i) + prices[i] = b1 + rc_bittree_price(lc->high, LEN_HIGH_BITS, + i - LEN_LOW_SYMBOLS - LEN_MID_SYMBOLS); + + return; +} + + +static inline void +length(lzma_range_encoder *rc, lzma_length_encoder *lc, + const uint32_t pos_state, uint32_t len, const bool fast_mode) +{ + assert(len <= MATCH_LEN_MAX); + len -= MATCH_LEN_MIN; + + if (len < LEN_LOW_SYMBOLS) { + rc_bit(rc, &lc->choice, 0); + rc_bittree(rc, lc->low[pos_state], LEN_LOW_BITS, len); + } else { + rc_bit(rc, &lc->choice, 1); + len -= LEN_LOW_SYMBOLS; + + if (len < LEN_MID_SYMBOLS) { + rc_bit(rc, &lc->choice2, 0); + rc_bittree(rc, lc->mid[pos_state], LEN_MID_BITS, len); + } else { + rc_bit(rc, &lc->choice2, 1); + len -= LEN_MID_SYMBOLS; + rc_bittree(rc, lc->high, LEN_HIGH_BITS, len); + } + } + + // Only getoptimum uses the prices so don't update the table when + // in fast mode. + if (!fast_mode) + if (--lc->counters[pos_state] == 0) + length_update_prices(lc, pos_state); +} + + +/////////// +// Match // +/////////// + +static inline void +match(lzma_coder *coder, const uint32_t pos_state, + const uint32_t distance, const uint32_t len) +{ + update_match(coder->state); + + length(&coder->rc, &coder->match_len_encoder, pos_state, len, + coder->fast_mode); + + const uint32_t pos_slot = get_pos_slot(distance); + const uint32_t len_to_pos_state = get_len_to_pos_state(len); + rc_bittree(&coder->rc, coder->pos_slot[len_to_pos_state], + POS_SLOT_BITS, pos_slot); + + if (pos_slot >= START_POS_MODEL_INDEX) { + const uint32_t footer_bits = (pos_slot >> 1) - 1; + const uint32_t base = (2 | (pos_slot & 1)) << footer_bits; + const uint32_t pos_reduced = distance - base; + + if (pos_slot < END_POS_MODEL_INDEX) { + // Careful here: base - pos_slot - 1 can be -1, but + // rc_bittree_reverse starts at probs[1], not probs[0]. + rc_bittree_reverse(&coder->rc, + coder->pos_special + base - pos_slot - 1, + footer_bits, pos_reduced); + } else { + rc_direct(&coder->rc, pos_reduced >> ALIGN_BITS, + footer_bits - ALIGN_BITS); + rc_bittree_reverse( + &coder->rc, coder->pos_align, + ALIGN_BITS, pos_reduced & ALIGN_MASK); + ++coder->align_price_count; + } + } + + coder->reps[3] = coder->reps[2]; + coder->reps[2] = coder->reps[1]; + coder->reps[1] = coder->reps[0]; + coder->reps[0] = distance; + ++coder->match_price_count; +} + + +//////////////////// +// Repeated match // +//////////////////// + +static inline void +rep_match(lzma_coder *coder, const uint32_t pos_state, + const uint32_t rep, const uint32_t len) +{ + if (rep == 0) { + rc_bit(&coder->rc, &coder->is_rep0[coder->state], 0); + rc_bit(&coder->rc, + &coder->is_rep0_long[coder->state][pos_state], + len != 1); + } else { + const uint32_t distance = coder->reps[rep]; + rc_bit(&coder->rc, &coder->is_rep0[coder->state], 1); + + if (rep == 1) { + rc_bit(&coder->rc, &coder->is_rep1[coder->state], 0); + } else { + rc_bit(&coder->rc, &coder->is_rep1[coder->state], 1); + rc_bit(&coder->rc, &coder->is_rep2[coder->state], + rep - 2); + + if (rep == 3) + coder->reps[3] = coder->reps[2]; + + coder->reps[2] = coder->reps[1]; + } + + coder->reps[1] = coder->reps[0]; + coder->reps[0] = distance; + } + + if (len == 1) { + update_short_rep(coder->state); + } else { + length(&coder->rc, &coder->rep_len_encoder, pos_state, len, + coder->fast_mode); + update_long_rep(coder->state); + } +} + + +////////// +// Main // +////////// + +static void +encode_symbol(lzma_coder *coder, lzma_mf *mf, + uint32_t back, uint32_t len, uint32_t position) +{ + const uint32_t pos_state = position & coder->pos_mask; + + if (back == UINT32_MAX) { + // Literal i.e. eight-bit byte + assert(len == 1); + rc_bit(&coder->rc, + &coder->is_match[coder->state][pos_state], 0); + literal(coder, mf, position); + } else { + // Some type of match + rc_bit(&coder->rc, + &coder->is_match[coder->state][pos_state], 1); + + if (back < REP_DISTANCES) { + // It's a repeated match i.e. the same distance + // has been used earlier. + rc_bit(&coder->rc, &coder->is_rep[coder->state], 1); + rep_match(coder, pos_state, back, len); + } else { + // Normal match + rc_bit(&coder->rc, &coder->is_rep[coder->state], 0); + match(coder, pos_state, back - REP_DISTANCES, len); + } + } + + assert(mf->read_ahead >= len); + mf->read_ahead -= len; +} + + +static bool +encode_init(lzma_coder *coder, lzma_mf *mf) +{ + assert(mf_position(mf) == 0); + + if (mf->read_pos == mf->read_limit) { + if (mf->action == LZMA_RUN) + return false; // We cannot do anything. + + // We are finishing (we cannot get here when flushing). + assert(mf->write_pos == mf->read_pos); + assert(mf->action == LZMA_FINISH); + } else { + // Do the actual initialization. The first LZMA symbol must + // always be a literal. + mf_skip(mf, 1); + mf->read_ahead = 0; + rc_bit(&coder->rc, &coder->is_match[0][0], 0); + rc_bittree(&coder->rc, coder->literal[0], 8, mf->buffer[0]); + } + + // Initialization is done (except if empty file). + coder->is_initialized = true; + + return true; +} + + +static void +encode_eopm(lzma_coder *coder, uint32_t position) +{ + const uint32_t pos_state = position & coder->pos_mask; + rc_bit(&coder->rc, &coder->is_match[coder->state][pos_state], 1); + rc_bit(&coder->rc, &coder->is_rep[coder->state], 0); + match(coder, pos_state, UINT32_MAX, MATCH_LEN_MIN); +} + + +/// Number of bytes that a single encoding loop in lzma_lzma_encode() can +/// consume from the dictionary. This limit comes from lzma_lzma_optimum() +/// and may need to be updated if that function is significantly modified. +#define LOOP_INPUT_MAX (OPTS + 1) + + +extern lzma_ret +lzma_lzma_encode(lzma_coder *restrict coder, lzma_mf *restrict mf, + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size, uint32_t limit) +{ + // Initialize the stream if no data has been encoded yet. + if (!coder->is_initialized && !encode_init(coder, mf)) + return LZMA_OK; + + // Get the lowest bits of the uncompressed offset from the LZ layer. + uint32_t position = mf_position(mf); + + while (true) { + // Encode pending bits, if any. Calling this before encoding + // the next symbol is needed only with plain LZMA, since + // LZMA2 always provides big enough buffer to flush + // everything out from the range encoder. For the same reason, + // rc_encode() never returns true when this function is used + // as part of LZMA2 encoder. + if (rc_encode(&coder->rc, out, out_pos, out_size)) { + assert(limit == UINT32_MAX); + return LZMA_OK; + } + + // With LZMA2 we need to take care that compressed size of + // a chunk doesn't get too big. + // TODO + if (limit != UINT32_MAX + && (mf->read_pos - mf->read_ahead >= limit + || *out_pos + rc_pending(&coder->rc) + >= LZMA2_CHUNK_MAX + - LOOP_INPUT_MAX)) + break; + + // Check that there is some input to process. + if (mf->read_pos >= mf->read_limit) { + if (mf->action == LZMA_RUN) + return LZMA_OK; + + if (mf->read_ahead == 0) + break; + } + + // Get optimal match (repeat position and length). + // Value ranges for pos: + // - [0, REP_DISTANCES): repeated match + // - [REP_DISTANCES, UINT32_MAX): + // match at (pos - REP_DISTANCES) + // - UINT32_MAX: not a match but a literal + // Value ranges for len: + // - [MATCH_LEN_MIN, MATCH_LEN_MAX] + uint32_t len; + uint32_t back; + + if (coder->fast_mode) + lzma_lzma_optimum_fast(coder, mf, &back, &len); + else + lzma_lzma_optimum_normal( + coder, mf, &back, &len, position); + + encode_symbol(coder, mf, back, len, position); + + position += len; + } + + if (!coder->is_flushed) { + coder->is_flushed = true; + + // We don't support encoding plain LZMA streams without EOPM, + // and LZMA2 doesn't use EOPM at LZMA level. + if (limit == UINT32_MAX) + encode_eopm(coder, position); + + // Flush the remaining bytes from the range encoder. + rc_flush(&coder->rc); + + // Copy the remaining bytes to the output buffer. If there + // isn't enough output space, we will copy out the remaining + // bytes on the next call to this function by using + // the rc_encode() call in the encoding loop above. + if (rc_encode(&coder->rc, out, out_pos, out_size)) { + assert(limit == UINT32_MAX); + return LZMA_OK; + } + } + + // Make it ready for the next LZMA2 chunk. + coder->is_flushed = false; + + return LZMA_STREAM_END; +} + + +static lzma_ret +lzma_encode(lzma_coder *restrict coder, lzma_mf *restrict mf, + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size) +{ + // Plain LZMA has no support for sync-flushing. + if (unlikely(mf->action == LZMA_SYNC_FLUSH)) + return LZMA_OPTIONS_ERROR; + + return lzma_lzma_encode(coder, mf, out, out_pos, out_size, UINT32_MAX); +} + + +//////////////////// +// Initialization // +//////////////////// + +static bool +is_options_valid(const lzma_options_lzma *options) +{ + // Validate some of the options. LZ encoder validates nice_len too + // but we need a valid value here earlier. + return is_lclppb_valid(options) + && options->nice_len >= MATCH_LEN_MIN + && options->nice_len <= MATCH_LEN_MAX + && (options->mode == LZMA_MODE_FAST + || options->mode == LZMA_MODE_NORMAL); +} + + +static void +set_lz_options(lzma_lz_options *lz_options, const lzma_options_lzma *options) +{ + // LZ encoder initialization does the validation for these so we + // don't need to validate here. + lz_options->before_size = OPTS; + lz_options->dict_size = options->dict_size; + lz_options->after_size = LOOP_INPUT_MAX; + lz_options->match_len_max = MATCH_LEN_MAX; + lz_options->nice_len = options->nice_len; + lz_options->match_finder = options->mf; + lz_options->depth = options->depth; + lz_options->preset_dict = options->preset_dict; + lz_options->preset_dict_size = options->preset_dict_size; + return; +} + + +static void +length_encoder_reset(lzma_length_encoder *lencoder, + const uint32_t num_pos_states, const bool fast_mode) +{ + bit_reset(lencoder->choice); + bit_reset(lencoder->choice2); + + for (size_t pos_state = 0; pos_state < num_pos_states; ++pos_state) { + bittree_reset(lencoder->low[pos_state], LEN_LOW_BITS); + bittree_reset(lencoder->mid[pos_state], LEN_MID_BITS); + } + + bittree_reset(lencoder->high, LEN_HIGH_BITS); + + if (!fast_mode) + for (size_t pos_state = 0; pos_state < num_pos_states; + ++pos_state) + length_update_prices(lencoder, pos_state); + + return; +} + + +extern lzma_ret +lzma_lzma_encoder_reset(lzma_coder *coder, const lzma_options_lzma *options) +{ + if (!is_options_valid(options)) + return LZMA_OPTIONS_ERROR; + + coder->pos_mask = (1U << options->pb) - 1; + coder->literal_context_bits = options->lc; + coder->literal_pos_mask = (1U << options->lp) - 1; + + // Range coder + rc_reset(&coder->rc); + + // State + coder->state = STATE_LIT_LIT; + for (size_t i = 0; i < REP_DISTANCES; ++i) + coder->reps[i] = 0; + + literal_init(coder->literal, options->lc, options->lp); + + // Bit encoders + for (size_t i = 0; i < STATES; ++i) { + for (size_t j = 0; j <= coder->pos_mask; ++j) { + bit_reset(coder->is_match[i][j]); + bit_reset(coder->is_rep0_long[i][j]); + } + + bit_reset(coder->is_rep[i]); + bit_reset(coder->is_rep0[i]); + bit_reset(coder->is_rep1[i]); + bit_reset(coder->is_rep2[i]); + } + + for (size_t i = 0; i < FULL_DISTANCES - END_POS_MODEL_INDEX; ++i) + bit_reset(coder->pos_special[i]); + + // Bit tree encoders + for (size_t i = 0; i < LEN_TO_POS_STATES; ++i) + bittree_reset(coder->pos_slot[i], POS_SLOT_BITS); + + bittree_reset(coder->pos_align, ALIGN_BITS); + + // Length encoders + length_encoder_reset(&coder->match_len_encoder, + 1U << options->pb, coder->fast_mode); + + length_encoder_reset(&coder->rep_len_encoder, + 1U << options->pb, coder->fast_mode); + + // Price counts are incremented every time appropriate probabilities + // are changed. price counts are set to zero when the price tables + // are updated, which is done when the appropriate price counts have + // big enough value, and lzma_mf.read_ahead == 0 which happens at + // least every OPTS (a few thousand) possible price count increments. + // + // By resetting price counts to UINT32_MAX / 2, we make sure that the + // price tables will be initialized before they will be used (since + // the value is definitely big enough), and that it is OK to increment + // price counts without risk of integer overflow (since UINT32_MAX / 2 + // is small enough). The current code doesn't increment price counts + // before initializing price tables, but it maybe done in future if + // we add support for saving the state between LZMA2 chunks. + coder->match_price_count = UINT32_MAX / 2; + coder->align_price_count = UINT32_MAX / 2; + + coder->opts_end_index = 0; + coder->opts_current_index = 0; + + return LZMA_OK; +} + + +extern lzma_ret +lzma_lzma_encoder_create(lzma_coder **coder_ptr, lzma_allocator *allocator, + const lzma_options_lzma *options, lzma_lz_options *lz_options) +{ + // Allocate lzma_coder if it wasn't already allocated. + if (*coder_ptr == NULL) { + *coder_ptr = lzma_alloc(sizeof(lzma_coder), allocator); + if (*coder_ptr == NULL) + return LZMA_MEM_ERROR; + } + + lzma_coder *coder = *coder_ptr; + + // Set compression mode. We haven't validates the options yet, + // but it's OK here, since nothing bad happens with invalid + // options in the code below, and they will get rejected by + // lzma_lzma_encoder_reset() call at the end of this function. + switch (options->mode) { + case LZMA_MODE_FAST: + coder->fast_mode = true; + break; + + case LZMA_MODE_NORMAL: { + coder->fast_mode = false; + + // Set dist_table_size. + // Round the dictionary size up to next 2^n. + uint32_t log_size = 0; + while ((UINT32_C(1) << log_size) < options->dict_size) + ++log_size; + + coder->dist_table_size = log_size * 2; + + // Length encoders' price table size + coder->match_len_encoder.table_size + = options->nice_len + 1 - MATCH_LEN_MIN; + coder->rep_len_encoder.table_size + = options->nice_len + 1 - MATCH_LEN_MIN; + break; + } + + default: + return LZMA_OPTIONS_ERROR; + } + + // We don't need to write the first byte as literal if there is + // a non-empty preset dictionary. encode_init() wouldn't even work + // if there is a non-empty preset dictionary, because encode_init() + // assumes that position is zero and previous byte is also zero. + coder->is_initialized = options->preset_dict != NULL + && options->preset_dict_size > 0; + coder->is_flushed = false; + + set_lz_options(lz_options, options); + + return lzma_lzma_encoder_reset(coder, options); +} + + +static lzma_ret +lzma_encoder_init(lzma_lz_encoder *lz, lzma_allocator *allocator, + const void *options, lzma_lz_options *lz_options) +{ + lz->code = &lzma_encode; + return lzma_lzma_encoder_create( + &lz->coder, allocator, options, lz_options); +} + + +extern lzma_ret +lzma_lzma_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return lzma_lz_encoder_init( + next, allocator, filters, &lzma_encoder_init); +} + + +extern uint64_t +lzma_lzma_encoder_memusage(const void *options) +{ + if (!is_options_valid(options)) + return UINT64_MAX; + + lzma_lz_options lz_options; + set_lz_options(&lz_options, options); + + const uint64_t lz_memusage = lzma_lz_encoder_memusage(&lz_options); + if (lz_memusage == UINT64_MAX) + return UINT64_MAX; + + return (uint64_t)(sizeof(lzma_coder)) + lz_memusage; +} + + +extern bool +lzma_lzma_lclppb_encode(const lzma_options_lzma *options, uint8_t *byte) +{ + if (!is_lclppb_valid(options)) + return true; + + *byte = (options->pb * 5 + options->lp) * 9 + options->lc; + assert(*byte <= (4 * 5 + 4) * 9 + 8); + + return false; +} + + +#ifdef HAVE_ENCODER_LZMA1 +extern lzma_ret +lzma_lzma_props_encode(const void *options, uint8_t *out) +{ + const lzma_options_lzma *const opt = options; + + if (lzma_lzma_lclppb_encode(opt, out)) + return LZMA_PROG_ERROR; + + unaligned_write32le(out + 1, opt->dict_size); + + return LZMA_OK; +} +#endif + + +extern LZMA_API(lzma_bool) +lzma_mode_is_supported(lzma_mode mode) +{ + return mode == LZMA_MODE_FAST || mode == LZMA_MODE_NORMAL; +} diff --git a/src/liblzma/lzma/lzma_encoder.h b/src/liblzma/lzma/lzma_encoder.h new file mode 100644 index 000000000000..835e1f583304 --- /dev/null +++ b/src/liblzma/lzma/lzma_encoder.h @@ -0,0 +1,54 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_encoder.h +/// \brief LZMA encoder API +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZMA_ENCODER_H +#define LZMA_LZMA_ENCODER_H + +#include "common.h" + + +extern lzma_ret lzma_lzma_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + + +extern uint64_t lzma_lzma_encoder_memusage(const void *options); + +extern lzma_ret lzma_lzma_props_encode(const void *options, uint8_t *out); + + +/// Encodes lc/lp/pb into one byte. Returns false on success and true on error. +extern bool lzma_lzma_lclppb_encode( + const lzma_options_lzma *options, uint8_t *byte); + + +#ifdef LZMA_LZ_ENCODER_H + +/// Initializes raw LZMA encoder; this is used by LZMA2. +extern lzma_ret lzma_lzma_encoder_create( + lzma_coder **coder_ptr, lzma_allocator *allocator, + const lzma_options_lzma *options, lzma_lz_options *lz_options); + + +/// Resets an already initialized LZMA encoder; this is used by LZMA2. +extern lzma_ret lzma_lzma_encoder_reset( + lzma_coder *coder, const lzma_options_lzma *options); + + +extern lzma_ret lzma_lzma_encode(lzma_coder *restrict coder, + lzma_mf *restrict mf, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + uint32_t read_limit); + +#endif + +#endif diff --git a/src/liblzma/lzma/lzma_encoder_optimum_fast.c b/src/liblzma/lzma/lzma_encoder_optimum_fast.c new file mode 100644 index 000000000000..4ca55b60028f --- /dev/null +++ b/src/liblzma/lzma/lzma_encoder_optimum_fast.c @@ -0,0 +1,179 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_encoder_optimum_fast.c +// +// Author: Igor Pavlov +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lzma_encoder_private.h" + + +#define change_pair(small_dist, big_dist) \ + (((big_dist) >> 7) > (small_dist)) + + +extern void +lzma_lzma_optimum_fast(lzma_coder *restrict coder, lzma_mf *restrict mf, + uint32_t *restrict back_res, uint32_t *restrict len_res) +{ + const uint32_t nice_len = mf->nice_len; + + uint32_t len_main; + uint32_t matches_count; + if (mf->read_ahead == 0) { + len_main = mf_find(mf, &matches_count, coder->matches); + } else { + assert(mf->read_ahead == 1); + len_main = coder->longest_match_length; + matches_count = coder->matches_count; + } + + const uint8_t *buf = mf_ptr(mf) - 1; + const uint32_t buf_avail = MIN(mf_avail(mf) + 1, MATCH_LEN_MAX); + + if (buf_avail < 2) { + // There's not enough input left to encode a match. + *back_res = UINT32_MAX; + *len_res = 1; + return; + } + + // Look for repeated matches; scan the previous four match distances + uint32_t rep_len = 0; + uint32_t rep_index = 0; + + for (uint32_t i = 0; i < REP_DISTANCES; ++i) { + // Pointer to the beginning of the match candidate + const uint8_t *const buf_back = buf - coder->reps[i] - 1; + + // If the first two bytes (2 == MATCH_LEN_MIN) do not match, + // this rep is not useful. + if (not_equal_16(buf, buf_back)) + continue; + + // The first two bytes matched. + // Calculate the length of the match. + uint32_t len; + for (len = 2; len < buf_avail + && buf[len] == buf_back[len]; ++len) ; + + // If we have found a repeated match that is at least + // nice_len long, return it immediately. + if (len >= nice_len) { + *back_res = i; + *len_res = len; + mf_skip(mf, len - 1); + return; + } + + if (len > rep_len) { + rep_index = i; + rep_len = len; + } + } + + // We didn't find a long enough repeated match. Encode it as a normal + // match if the match length is at least nice_len. + if (len_main >= nice_len) { + *back_res = coder->matches[matches_count - 1].dist + + REP_DISTANCES; + *len_res = len_main; + mf_skip(mf, len_main - 1); + return; + } + + uint32_t back_main = 0; + if (len_main >= 2) { + back_main = coder->matches[matches_count - 1].dist; + + while (matches_count > 1 && len_main == + coder->matches[matches_count - 2].len + 1) { + if (!change_pair(coder->matches[ + matches_count - 2].dist, + back_main)) + break; + + --matches_count; + len_main = coder->matches[matches_count - 1].len; + back_main = coder->matches[matches_count - 1].dist; + } + + if (len_main == 2 && back_main >= 0x80) + len_main = 1; + } + + if (rep_len >= 2) { + if (rep_len + 1 >= len_main + || (rep_len + 2 >= len_main + && back_main > (UINT32_C(1) << 9)) + || (rep_len + 3 >= len_main + && back_main > (UINT32_C(1) << 15))) { + *back_res = rep_index; + *len_res = rep_len; + mf_skip(mf, rep_len - 1); + return; + } + } + + if (len_main < 2 || buf_avail <= 2) { + *back_res = UINT32_MAX; + *len_res = 1; + return; + } + + // Get the matches for the next byte. If we find a better match, + // the current byte is encoded as a literal. + coder->longest_match_length = mf_find(mf, + &coder->matches_count, coder->matches); + + if (coder->longest_match_length >= 2) { + const uint32_t new_dist = coder->matches[ + coder->matches_count - 1].dist; + + if ((coder->longest_match_length >= len_main + && new_dist < back_main) + || (coder->longest_match_length == len_main + 1 + && !change_pair(back_main, new_dist)) + || (coder->longest_match_length > len_main + 1) + || (coder->longest_match_length + 1 >= len_main + && len_main >= 3 + && change_pair(new_dist, back_main))) { + *back_res = UINT32_MAX; + *len_res = 1; + return; + } + } + + // In contrast to LZMA SDK, dictionary could not have been moved + // between mf_find() calls, thus it is safe to just increment + // the old buf pointer instead of recalculating it with mf_ptr(). + ++buf; + + const uint32_t limit = len_main - 1; + + for (uint32_t i = 0; i < REP_DISTANCES; ++i) { + const uint8_t *const buf_back = buf - coder->reps[i] - 1; + + if (not_equal_16(buf, buf_back)) + continue; + + uint32_t len; + for (len = 2; len < limit + && buf[len] == buf_back[len]; ++len) ; + + if (len >= limit) { + *back_res = UINT32_MAX; + *len_res = 1; + return; + } + } + + *back_res = back_main + REP_DISTANCES; + *len_res = len_main; + mf_skip(mf, len_main - 2); + return; +} diff --git a/src/liblzma/lzma/lzma_encoder_optimum_normal.c b/src/liblzma/lzma/lzma_encoder_optimum_normal.c new file mode 100644 index 000000000000..9284c8a2896f --- /dev/null +++ b/src/liblzma/lzma/lzma_encoder_optimum_normal.c @@ -0,0 +1,868 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_encoder_optimum_normal.c +// +// Author: Igor Pavlov +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lzma_encoder_private.h" +#include "fastpos.h" + + +//////////// +// Prices // +//////////// + +static uint32_t +get_literal_price(const lzma_coder *const coder, const uint32_t pos, + const uint32_t prev_byte, const bool match_mode, + uint32_t match_byte, uint32_t symbol) +{ + const probability *const subcoder = literal_subcoder(coder->literal, + coder->literal_context_bits, coder->literal_pos_mask, + pos, prev_byte); + + uint32_t price = 0; + + if (!match_mode) { + price = rc_bittree_price(subcoder, 8, symbol); + } else { + uint32_t offset = 0x100; + symbol += UINT32_C(1) << 8; + + do { + match_byte <<= 1; + + const uint32_t match_bit = match_byte & offset; + const uint32_t subcoder_index + = offset + match_bit + (symbol >> 8); + const uint32_t bit = (symbol >> 7) & 1; + price += rc_bit_price(subcoder[subcoder_index], bit); + + symbol <<= 1; + offset &= ~(match_byte ^ symbol); + + } while (symbol < (UINT32_C(1) << 16)); + } + + return price; +} + + +static inline uint32_t +get_len_price(const lzma_length_encoder *const lencoder, + const uint32_t len, const uint32_t pos_state) +{ + // NOTE: Unlike the other price tables, length prices are updated + // in lzma_encoder.c + return lencoder->prices[pos_state][len - MATCH_LEN_MIN]; +} + + +static inline uint32_t +get_short_rep_price(const lzma_coder *const coder, + const lzma_lzma_state state, const uint32_t pos_state) +{ + return rc_bit_0_price(coder->is_rep0[state]) + + rc_bit_0_price(coder->is_rep0_long[state][pos_state]); +} + + +static inline uint32_t +get_pure_rep_price(const lzma_coder *const coder, const uint32_t rep_index, + const lzma_lzma_state state, uint32_t pos_state) +{ + uint32_t price; + + if (rep_index == 0) { + price = rc_bit_0_price(coder->is_rep0[state]); + price += rc_bit_1_price(coder->is_rep0_long[state][pos_state]); + } else { + price = rc_bit_1_price(coder->is_rep0[state]); + + if (rep_index == 1) { + price += rc_bit_0_price(coder->is_rep1[state]); + } else { + price += rc_bit_1_price(coder->is_rep1[state]); + price += rc_bit_price(coder->is_rep2[state], + rep_index - 2); + } + } + + return price; +} + + +static inline uint32_t +get_rep_price(const lzma_coder *const coder, const uint32_t rep_index, + const uint32_t len, const lzma_lzma_state state, + const uint32_t pos_state) +{ + return get_len_price(&coder->rep_len_encoder, len, pos_state) + + get_pure_rep_price(coder, rep_index, state, pos_state); +} + + +static inline uint32_t +get_pos_len_price(const lzma_coder *const coder, const uint32_t pos, + const uint32_t len, const uint32_t pos_state) +{ + const uint32_t len_to_pos_state = get_len_to_pos_state(len); + uint32_t price; + + if (pos < FULL_DISTANCES) { + price = coder->distances_prices[len_to_pos_state][pos]; + } else { + const uint32_t pos_slot = get_pos_slot_2(pos); + price = coder->pos_slot_prices[len_to_pos_state][pos_slot] + + coder->align_prices[pos & ALIGN_MASK]; + } + + price += get_len_price(&coder->match_len_encoder, len, pos_state); + + return price; +} + + +static void +fill_distances_prices(lzma_coder *coder) +{ + for (uint32_t len_to_pos_state = 0; + len_to_pos_state < LEN_TO_POS_STATES; + ++len_to_pos_state) { + + uint32_t *const pos_slot_prices + = coder->pos_slot_prices[len_to_pos_state]; + + // Price to encode the pos_slot. + for (uint32_t pos_slot = 0; + pos_slot < coder->dist_table_size; ++pos_slot) + pos_slot_prices[pos_slot] = rc_bittree_price( + coder->pos_slot[len_to_pos_state], + POS_SLOT_BITS, pos_slot); + + // For matches with distance >= FULL_DISTANCES, add the price + // of the direct bits part of the match distance. (Align bits + // are handled by fill_align_prices()). + for (uint32_t pos_slot = END_POS_MODEL_INDEX; + pos_slot < coder->dist_table_size; ++pos_slot) + pos_slot_prices[pos_slot] += rc_direct_price( + ((pos_slot >> 1) - 1) - ALIGN_BITS); + + // Distances in the range [0, 3] are fully encoded with + // pos_slot, so they are used for coder->distances_prices + // as is. + for (uint32_t i = 0; i < START_POS_MODEL_INDEX; ++i) + coder->distances_prices[len_to_pos_state][i] + = pos_slot_prices[i]; + } + + // Distances in the range [4, 127] depend on pos_slot and pos_special. + // We do this in a loop separate from the above loop to avoid + // redundant calls to get_pos_slot(). + for (uint32_t i = START_POS_MODEL_INDEX; i < FULL_DISTANCES; ++i) { + const uint32_t pos_slot = get_pos_slot(i); + const uint32_t footer_bits = ((pos_slot >> 1) - 1); + const uint32_t base = (2 | (pos_slot & 1)) << footer_bits; + const uint32_t price = rc_bittree_reverse_price( + coder->pos_special + base - pos_slot - 1, + footer_bits, i - base); + + for (uint32_t len_to_pos_state = 0; + len_to_pos_state < LEN_TO_POS_STATES; + ++len_to_pos_state) + coder->distances_prices[len_to_pos_state][i] + = price + coder->pos_slot_prices[ + len_to_pos_state][pos_slot]; + } + + coder->match_price_count = 0; + return; +} + + +static void +fill_align_prices(lzma_coder *coder) +{ + for (uint32_t i = 0; i < ALIGN_TABLE_SIZE; ++i) + coder->align_prices[i] = rc_bittree_reverse_price( + coder->pos_align, ALIGN_BITS, i); + + coder->align_price_count = 0; + return; +} + + +///////////// +// Optimal // +///////////// + +static inline void +make_literal(lzma_optimal *optimal) +{ + optimal->back_prev = UINT32_MAX; + optimal->prev_1_is_literal = false; +} + + +static inline void +make_short_rep(lzma_optimal *optimal) +{ + optimal->back_prev = 0; + optimal->prev_1_is_literal = false; +} + + +#define is_short_rep(optimal) \ + ((optimal).back_prev == 0) + + +static void +backward(lzma_coder *restrict coder, uint32_t *restrict len_res, + uint32_t *restrict back_res, uint32_t cur) +{ + coder->opts_end_index = cur; + + uint32_t pos_mem = coder->opts[cur].pos_prev; + uint32_t back_mem = coder->opts[cur].back_prev; + + do { + if (coder->opts[cur].prev_1_is_literal) { + make_literal(&coder->opts[pos_mem]); + coder->opts[pos_mem].pos_prev = pos_mem - 1; + + if (coder->opts[cur].prev_2) { + coder->opts[pos_mem - 1].prev_1_is_literal + = false; + coder->opts[pos_mem - 1].pos_prev + = coder->opts[cur].pos_prev_2; + coder->opts[pos_mem - 1].back_prev + = coder->opts[cur].back_prev_2; + } + } + + const uint32_t pos_prev = pos_mem; + const uint32_t back_cur = back_mem; + + back_mem = coder->opts[pos_prev].back_prev; + pos_mem = coder->opts[pos_prev].pos_prev; + + coder->opts[pos_prev].back_prev = back_cur; + coder->opts[pos_prev].pos_prev = cur; + cur = pos_prev; + + } while (cur != 0); + + coder->opts_current_index = coder->opts[0].pos_prev; + *len_res = coder->opts[0].pos_prev; + *back_res = coder->opts[0].back_prev; + + return; +} + + +////////// +// Main // +////////// + +static inline uint32_t +helper1(lzma_coder *restrict coder, lzma_mf *restrict mf, + uint32_t *restrict back_res, uint32_t *restrict len_res, + uint32_t position) +{ + const uint32_t nice_len = mf->nice_len; + + uint32_t len_main; + uint32_t matches_count; + + if (mf->read_ahead == 0) { + len_main = mf_find(mf, &matches_count, coder->matches); + } else { + assert(mf->read_ahead == 1); + len_main = coder->longest_match_length; + matches_count = coder->matches_count; + } + + const uint32_t buf_avail = MIN(mf_avail(mf) + 1, MATCH_LEN_MAX); + if (buf_avail < 2) { + *back_res = UINT32_MAX; + *len_res = 1; + return UINT32_MAX; + } + + const uint8_t *const buf = mf_ptr(mf) - 1; + + uint32_t rep_lens[REP_DISTANCES]; + uint32_t rep_max_index = 0; + + for (uint32_t i = 0; i < REP_DISTANCES; ++i) { + const uint8_t *const buf_back = buf - coder->reps[i] - 1; + + if (not_equal_16(buf, buf_back)) { + rep_lens[i] = 0; + continue; + } + + uint32_t len_test; + for (len_test = 2; len_test < buf_avail + && buf[len_test] == buf_back[len_test]; + ++len_test) ; + + rep_lens[i] = len_test; + if (len_test > rep_lens[rep_max_index]) + rep_max_index = i; + } + + if (rep_lens[rep_max_index] >= nice_len) { + *back_res = rep_max_index; + *len_res = rep_lens[rep_max_index]; + mf_skip(mf, *len_res - 1); + return UINT32_MAX; + } + + + if (len_main >= nice_len) { + *back_res = coder->matches[matches_count - 1].dist + + REP_DISTANCES; + *len_res = len_main; + mf_skip(mf, len_main - 1); + return UINT32_MAX; + } + + const uint8_t current_byte = *buf; + const uint8_t match_byte = *(buf - coder->reps[0] - 1); + + if (len_main < 2 && current_byte != match_byte + && rep_lens[rep_max_index] < 2) { + *back_res = UINT32_MAX; + *len_res = 1; + return UINT32_MAX; + } + + coder->opts[0].state = coder->state; + + const uint32_t pos_state = position & coder->pos_mask; + + coder->opts[1].price = rc_bit_0_price( + coder->is_match[coder->state][pos_state]) + + get_literal_price(coder, position, buf[-1], + !is_literal_state(coder->state), + match_byte, current_byte); + + make_literal(&coder->opts[1]); + + const uint32_t match_price = rc_bit_1_price( + coder->is_match[coder->state][pos_state]); + const uint32_t rep_match_price = match_price + + rc_bit_1_price(coder->is_rep[coder->state]); + + if (match_byte == current_byte) { + const uint32_t short_rep_price = rep_match_price + + get_short_rep_price( + coder, coder->state, pos_state); + + if (short_rep_price < coder->opts[1].price) { + coder->opts[1].price = short_rep_price; + make_short_rep(&coder->opts[1]); + } + } + + const uint32_t len_end = MAX(len_main, rep_lens[rep_max_index]); + + if (len_end < 2) { + *back_res = coder->opts[1].back_prev; + *len_res = 1; + return UINT32_MAX; + } + + coder->opts[1].pos_prev = 0; + + for (uint32_t i = 0; i < REP_DISTANCES; ++i) + coder->opts[0].backs[i] = coder->reps[i]; + + uint32_t len = len_end; + do { + coder->opts[len].price = RC_INFINITY_PRICE; + } while (--len >= 2); + + + for (uint32_t i = 0; i < REP_DISTANCES; ++i) { + uint32_t rep_len = rep_lens[i]; + if (rep_len < 2) + continue; + + const uint32_t price = rep_match_price + get_pure_rep_price( + coder, i, coder->state, pos_state); + + do { + const uint32_t cur_and_len_price = price + + get_len_price( + &coder->rep_len_encoder, + rep_len, pos_state); + + if (cur_and_len_price < coder->opts[rep_len].price) { + coder->opts[rep_len].price = cur_and_len_price; + coder->opts[rep_len].pos_prev = 0; + coder->opts[rep_len].back_prev = i; + coder->opts[rep_len].prev_1_is_literal = false; + } + } while (--rep_len >= 2); + } + + + const uint32_t normal_match_price = match_price + + rc_bit_0_price(coder->is_rep[coder->state]); + + len = rep_lens[0] >= 2 ? rep_lens[0] + 1 : 2; + if (len <= len_main) { + uint32_t i = 0; + while (len > coder->matches[i].len) + ++i; + + for(; ; ++len) { + const uint32_t dist = coder->matches[i].dist; + const uint32_t cur_and_len_price = normal_match_price + + get_pos_len_price(coder, + dist, len, pos_state); + + if (cur_and_len_price < coder->opts[len].price) { + coder->opts[len].price = cur_and_len_price; + coder->opts[len].pos_prev = 0; + coder->opts[len].back_prev + = dist + REP_DISTANCES; + coder->opts[len].prev_1_is_literal = false; + } + + if (len == coder->matches[i].len) + if (++i == matches_count) + break; + } + } + + return len_end; +} + + +static inline uint32_t +helper2(lzma_coder *coder, uint32_t *reps, const uint8_t *buf, + uint32_t len_end, uint32_t position, const uint32_t cur, + const uint32_t nice_len, const uint32_t buf_avail_full) +{ + uint32_t matches_count = coder->matches_count; + uint32_t new_len = coder->longest_match_length; + uint32_t pos_prev = coder->opts[cur].pos_prev; + lzma_lzma_state state; + + if (coder->opts[cur].prev_1_is_literal) { + --pos_prev; + + if (coder->opts[cur].prev_2) { + state = coder->opts[coder->opts[cur].pos_prev_2].state; + + if (coder->opts[cur].back_prev_2 < REP_DISTANCES) + update_long_rep(state); + else + update_match(state); + + } else { + state = coder->opts[pos_prev].state; + } + + update_literal(state); + + } else { + state = coder->opts[pos_prev].state; + } + + if (pos_prev == cur - 1) { + if (is_short_rep(coder->opts[cur])) + update_short_rep(state); + else + update_literal(state); + } else { + uint32_t pos; + if (coder->opts[cur].prev_1_is_literal + && coder->opts[cur].prev_2) { + pos_prev = coder->opts[cur].pos_prev_2; + pos = coder->opts[cur].back_prev_2; + update_long_rep(state); + } else { + pos = coder->opts[cur].back_prev; + if (pos < REP_DISTANCES) + update_long_rep(state); + else + update_match(state); + } + + if (pos < REP_DISTANCES) { + reps[0] = coder->opts[pos_prev].backs[pos]; + + uint32_t i; + for (i = 1; i <= pos; ++i) + reps[i] = coder->opts[pos_prev].backs[i - 1]; + + for (; i < REP_DISTANCES; ++i) + reps[i] = coder->opts[pos_prev].backs[i]; + + } else { + reps[0] = pos - REP_DISTANCES; + + for (uint32_t i = 1; i < REP_DISTANCES; ++i) + reps[i] = coder->opts[pos_prev].backs[i - 1]; + } + } + + coder->opts[cur].state = state; + + for (uint32_t i = 0; i < REP_DISTANCES; ++i) + coder->opts[cur].backs[i] = reps[i]; + + const uint32_t cur_price = coder->opts[cur].price; + + const uint8_t current_byte = *buf; + const uint8_t match_byte = *(buf - reps[0] - 1); + + const uint32_t pos_state = position & coder->pos_mask; + + const uint32_t cur_and_1_price = cur_price + + rc_bit_0_price(coder->is_match[state][pos_state]) + + get_literal_price(coder, position, buf[-1], + !is_literal_state(state), match_byte, current_byte); + + bool next_is_literal = false; + + if (cur_and_1_price < coder->opts[cur + 1].price) { + coder->opts[cur + 1].price = cur_and_1_price; + coder->opts[cur + 1].pos_prev = cur; + make_literal(&coder->opts[cur + 1]); + next_is_literal = true; + } + + const uint32_t match_price = cur_price + + rc_bit_1_price(coder->is_match[state][pos_state]); + const uint32_t rep_match_price = match_price + + rc_bit_1_price(coder->is_rep[state]); + + if (match_byte == current_byte + && !(coder->opts[cur + 1].pos_prev < cur + && coder->opts[cur + 1].back_prev == 0)) { + + const uint32_t short_rep_price = rep_match_price + + get_short_rep_price(coder, state, pos_state); + + if (short_rep_price <= coder->opts[cur + 1].price) { + coder->opts[cur + 1].price = short_rep_price; + coder->opts[cur + 1].pos_prev = cur; + make_short_rep(&coder->opts[cur + 1]); + next_is_literal = true; + } + } + + if (buf_avail_full < 2) + return len_end; + + const uint32_t buf_avail = MIN(buf_avail_full, nice_len); + + if (!next_is_literal && match_byte != current_byte) { // speed optimization + // try literal + rep0 + const uint8_t *const buf_back = buf - reps[0] - 1; + const uint32_t limit = MIN(buf_avail_full, nice_len + 1); + + uint32_t len_test = 1; + while (len_test < limit && buf[len_test] == buf_back[len_test]) + ++len_test; + + --len_test; + + if (len_test >= 2) { + lzma_lzma_state state_2 = state; + update_literal(state_2); + + const uint32_t pos_state_next = (position + 1) & coder->pos_mask; + const uint32_t next_rep_match_price = cur_and_1_price + + rc_bit_1_price(coder->is_match[state_2][pos_state_next]) + + rc_bit_1_price(coder->is_rep[state_2]); + + //for (; len_test >= 2; --len_test) { + const uint32_t offset = cur + 1 + len_test; + + while (len_end < offset) + coder->opts[++len_end].price = RC_INFINITY_PRICE; + + const uint32_t cur_and_len_price = next_rep_match_price + + get_rep_price(coder, 0, len_test, + state_2, pos_state_next); + + if (cur_and_len_price < coder->opts[offset].price) { + coder->opts[offset].price = cur_and_len_price; + coder->opts[offset].pos_prev = cur + 1; + coder->opts[offset].back_prev = 0; + coder->opts[offset].prev_1_is_literal = true; + coder->opts[offset].prev_2 = false; + } + //} + } + } + + + uint32_t start_len = 2; // speed optimization + + for (uint32_t rep_index = 0; rep_index < REP_DISTANCES; ++rep_index) { + const uint8_t *const buf_back = buf - reps[rep_index] - 1; + if (not_equal_16(buf, buf_back)) + continue; + + uint32_t len_test; + for (len_test = 2; len_test < buf_avail + && buf[len_test] == buf_back[len_test]; + ++len_test) ; + + while (len_end < cur + len_test) + coder->opts[++len_end].price = RC_INFINITY_PRICE; + + const uint32_t len_test_temp = len_test; + const uint32_t price = rep_match_price + get_pure_rep_price( + coder, rep_index, state, pos_state); + + do { + const uint32_t cur_and_len_price = price + + get_len_price(&coder->rep_len_encoder, + len_test, pos_state); + + if (cur_and_len_price < coder->opts[cur + len_test].price) { + coder->opts[cur + len_test].price = cur_and_len_price; + coder->opts[cur + len_test].pos_prev = cur; + coder->opts[cur + len_test].back_prev = rep_index; + coder->opts[cur + len_test].prev_1_is_literal = false; + } + } while (--len_test >= 2); + + len_test = len_test_temp; + + if (rep_index == 0) + start_len = len_test + 1; + + + uint32_t len_test_2 = len_test + 1; + const uint32_t limit = MIN(buf_avail_full, + len_test_2 + nice_len); + for (; len_test_2 < limit + && buf[len_test_2] == buf_back[len_test_2]; + ++len_test_2) ; + + len_test_2 -= len_test + 1; + + if (len_test_2 >= 2) { + lzma_lzma_state state_2 = state; + update_long_rep(state_2); + + uint32_t pos_state_next = (position + len_test) & coder->pos_mask; + + const uint32_t cur_and_len_literal_price = price + + get_len_price(&coder->rep_len_encoder, + len_test, pos_state) + + rc_bit_0_price(coder->is_match[state_2][pos_state_next]) + + get_literal_price(coder, position + len_test, + buf[len_test - 1], true, + buf_back[len_test], buf[len_test]); + + update_literal(state_2); + + pos_state_next = (position + len_test + 1) & coder->pos_mask; + + const uint32_t next_rep_match_price = cur_and_len_literal_price + + rc_bit_1_price(coder->is_match[state_2][pos_state_next]) + + rc_bit_1_price(coder->is_rep[state_2]); + + //for(; len_test_2 >= 2; len_test_2--) { + const uint32_t offset = cur + len_test + 1 + len_test_2; + + while (len_end < offset) + coder->opts[++len_end].price = RC_INFINITY_PRICE; + + const uint32_t cur_and_len_price = next_rep_match_price + + get_rep_price(coder, 0, len_test_2, + state_2, pos_state_next); + + if (cur_and_len_price < coder->opts[offset].price) { + coder->opts[offset].price = cur_and_len_price; + coder->opts[offset].pos_prev = cur + len_test + 1; + coder->opts[offset].back_prev = 0; + coder->opts[offset].prev_1_is_literal = true; + coder->opts[offset].prev_2 = true; + coder->opts[offset].pos_prev_2 = cur; + coder->opts[offset].back_prev_2 = rep_index; + } + //} + } + } + + + //for (uint32_t len_test = 2; len_test <= new_len; ++len_test) + if (new_len > buf_avail) { + new_len = buf_avail; + + matches_count = 0; + while (new_len > coder->matches[matches_count].len) + ++matches_count; + + coder->matches[matches_count++].len = new_len; + } + + + if (new_len >= start_len) { + const uint32_t normal_match_price = match_price + + rc_bit_0_price(coder->is_rep[state]); + + while (len_end < cur + new_len) + coder->opts[++len_end].price = RC_INFINITY_PRICE; + + uint32_t i = 0; + while (start_len > coder->matches[i].len) + ++i; + + for (uint32_t len_test = start_len; ; ++len_test) { + const uint32_t cur_back = coder->matches[i].dist; + uint32_t cur_and_len_price = normal_match_price + + get_pos_len_price(coder, + cur_back, len_test, pos_state); + + if (cur_and_len_price < coder->opts[cur + len_test].price) { + coder->opts[cur + len_test].price = cur_and_len_price; + coder->opts[cur + len_test].pos_prev = cur; + coder->opts[cur + len_test].back_prev + = cur_back + REP_DISTANCES; + coder->opts[cur + len_test].prev_1_is_literal = false; + } + + if (len_test == coder->matches[i].len) { + // Try Match + Literal + Rep0 + const uint8_t *const buf_back = buf - cur_back - 1; + uint32_t len_test_2 = len_test + 1; + const uint32_t limit = MIN(buf_avail_full, + len_test_2 + nice_len); + + for (; len_test_2 < limit && + buf[len_test_2] == buf_back[len_test_2]; + ++len_test_2) ; + + len_test_2 -= len_test + 1; + + if (len_test_2 >= 2) { + lzma_lzma_state state_2 = state; + update_match(state_2); + uint32_t pos_state_next + = (position + len_test) & coder->pos_mask; + + const uint32_t cur_and_len_literal_price = cur_and_len_price + + rc_bit_0_price( + coder->is_match[state_2][pos_state_next]) + + get_literal_price(coder, + position + len_test, + buf[len_test - 1], + true, + buf_back[len_test], + buf[len_test]); + + update_literal(state_2); + pos_state_next = (pos_state_next + 1) & coder->pos_mask; + + const uint32_t next_rep_match_price + = cur_and_len_literal_price + + rc_bit_1_price( + coder->is_match[state_2][pos_state_next]) + + rc_bit_1_price(coder->is_rep[state_2]); + + // for(; len_test_2 >= 2; --len_test_2) { + const uint32_t offset = cur + len_test + 1 + len_test_2; + + while (len_end < offset) + coder->opts[++len_end].price = RC_INFINITY_PRICE; + + cur_and_len_price = next_rep_match_price + + get_rep_price(coder, 0, len_test_2, + state_2, pos_state_next); + + if (cur_and_len_price < coder->opts[offset].price) { + coder->opts[offset].price = cur_and_len_price; + coder->opts[offset].pos_prev = cur + len_test + 1; + coder->opts[offset].back_prev = 0; + coder->opts[offset].prev_1_is_literal = true; + coder->opts[offset].prev_2 = true; + coder->opts[offset].pos_prev_2 = cur; + coder->opts[offset].back_prev_2 + = cur_back + REP_DISTANCES; + } + //} + } + + if (++i == matches_count) + break; + } + } + } + + return len_end; +} + + +extern void +lzma_lzma_optimum_normal(lzma_coder *restrict coder, lzma_mf *restrict mf, + uint32_t *restrict back_res, uint32_t *restrict len_res, + uint32_t position) +{ + // If we have symbols pending, return the next pending symbol. + if (coder->opts_end_index != coder->opts_current_index) { + assert(mf->read_ahead > 0); + *len_res = coder->opts[coder->opts_current_index].pos_prev + - coder->opts_current_index; + *back_res = coder->opts[coder->opts_current_index].back_prev; + coder->opts_current_index = coder->opts[ + coder->opts_current_index].pos_prev; + return; + } + + // Update the price tables. In LZMA SDK <= 4.60 (and possibly later) + // this was done in both initialization function and in the main loop. + // In liblzma they were moved into this single place. + if (mf->read_ahead == 0) { + if (coder->match_price_count >= (1 << 7)) + fill_distances_prices(coder); + + if (coder->align_price_count >= ALIGN_TABLE_SIZE) + fill_align_prices(coder); + } + + // TODO: This needs quite a bit of cleaning still. But splitting + // the original function into two pieces makes it at least a little + // more readable, since those two parts don't share many variables. + + uint32_t len_end = helper1(coder, mf, back_res, len_res, position); + if (len_end == UINT32_MAX) + return; + + uint32_t reps[REP_DISTANCES]; + memcpy(reps, coder->reps, sizeof(reps)); + + uint32_t cur; + for (cur = 1; cur < len_end; ++cur) { + assert(cur < OPTS); + + coder->longest_match_length = mf_find( + mf, &coder->matches_count, coder->matches); + + if (coder->longest_match_length >= mf->nice_len) + break; + + len_end = helper2(coder, reps, mf_ptr(mf) - 1, len_end, + position + cur, cur, mf->nice_len, + MIN(mf_avail(mf) + 1, OPTS - 1 - cur)); + } + + backward(coder, len_res, back_res, cur); + return; +} diff --git a/src/liblzma/lzma/lzma_encoder_presets.c b/src/liblzma/lzma/lzma_encoder_presets.c new file mode 100644 index 000000000000..c4c9c146f559 --- /dev/null +++ b/src/liblzma/lzma/lzma_encoder_presets.c @@ -0,0 +1,52 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_encoder_presets.c +/// \brief Encoder presets +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +extern LZMA_API(lzma_bool) +lzma_lzma_preset(lzma_options_lzma *options, uint32_t preset) +{ + const uint32_t level = preset & LZMA_PRESET_LEVEL_MASK; + const uint32_t flags = preset & ~LZMA_PRESET_LEVEL_MASK; + const uint32_t supported_flags = LZMA_PRESET_EXTREME; + + if (level > 9 || (flags & ~supported_flags)) + return true; + + const uint32_t dict_shift = level <= 1 ? 16 : level + 17; + options->dict_size = UINT32_C(1) << dict_shift; + + options->preset_dict = NULL; + options->preset_dict_size = 0; + + options->lc = LZMA_LC_DEFAULT; + options->lp = LZMA_LP_DEFAULT; + options->pb = LZMA_PB_DEFAULT; + + options->mode = level <= 2 ? LZMA_MODE_FAST : LZMA_MODE_NORMAL; + + options->nice_len = level == 0 ? 8 : level <= 5 ? 32 : 64; + options->mf = level <= 1 ? LZMA_MF_HC3 : level <= 2 ? LZMA_MF_HC4 + : LZMA_MF_BT4; + options->depth = 0; + + if (flags & LZMA_PRESET_EXTREME) { + options->lc = 4; // FIXME? + options->mode = LZMA_MODE_NORMAL; + options->mf = LZMA_MF_BT4; + options->nice_len = 273; + options->depth = 512; + } + + return false; +} diff --git a/src/liblzma/lzma/lzma_encoder_private.h b/src/liblzma/lzma/lzma_encoder_private.h new file mode 100644 index 000000000000..684745236c82 --- /dev/null +++ b/src/liblzma/lzma/lzma_encoder_private.h @@ -0,0 +1,148 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_encoder_private.h +/// \brief Private definitions for LZMA encoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZMA_ENCODER_PRIVATE_H +#define LZMA_LZMA_ENCODER_PRIVATE_H + +#include "lz_encoder.h" +#include "range_encoder.h" +#include "lzma_common.h" +#include "lzma_encoder.h" + + +// Macro to compare if the first two bytes in two buffers differ. This is +// needed in lzma_lzma_optimum_*() to test if the match is at least +// MATCH_LEN_MIN bytes. Unaligned access gives tiny gain so there's no +// reason to not use it when it is supported. +#ifdef TUKLIB_FAST_UNALIGNED_ACCESS +# define not_equal_16(a, b) \ + (*(const uint16_t *)(a) != *(const uint16_t *)(b)) +#else +# define not_equal_16(a, b) \ + ((a)[0] != (b)[0] || (a)[1] != (b)[1]) +#endif + + +// Optimal - Number of entries in the optimum array. +#define OPTS (1 << 12) + + +typedef struct { + probability choice; + probability choice2; + probability low[POS_STATES_MAX][LEN_LOW_SYMBOLS]; + probability mid[POS_STATES_MAX][LEN_MID_SYMBOLS]; + probability high[LEN_HIGH_SYMBOLS]; + + uint32_t prices[POS_STATES_MAX][LEN_SYMBOLS]; + uint32_t table_size; + uint32_t counters[POS_STATES_MAX]; + +} lzma_length_encoder; + + +typedef struct { + lzma_lzma_state state; + + bool prev_1_is_literal; + bool prev_2; + + uint32_t pos_prev_2; + uint32_t back_prev_2; + + uint32_t price; + uint32_t pos_prev; // pos_next; + uint32_t back_prev; + + uint32_t backs[REP_DISTANCES]; + +} lzma_optimal; + + +struct lzma_coder_s { + /// Range encoder + lzma_range_encoder rc; + + /// State + lzma_lzma_state state; + + /// The four most recent match distances + uint32_t reps[REP_DISTANCES]; + + /// Array of match candidates + lzma_match matches[MATCH_LEN_MAX + 1]; + + /// Number of match candidates in matches[] + uint32_t matches_count; + + /// Variable to hold the length of the longest match between calls + /// to lzma_lzma_optimum_*(). + uint32_t longest_match_length; + + /// True if using getoptimumfast + bool fast_mode; + + /// True if the encoder has been initialized by encoding the first + /// byte as a literal. + bool is_initialized; + + /// True if the range encoder has been flushed, but not all bytes + /// have been written to the output buffer yet. + bool is_flushed; + + uint32_t pos_mask; ///< (1 << pos_bits) - 1 + uint32_t literal_context_bits; + uint32_t literal_pos_mask; + + // These are the same as in lzma_decoder.c. See comments there. + probability literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE]; + probability is_match[STATES][POS_STATES_MAX]; + probability is_rep[STATES]; + probability is_rep0[STATES]; + probability is_rep1[STATES]; + probability is_rep2[STATES]; + probability is_rep0_long[STATES][POS_STATES_MAX]; + probability pos_slot[LEN_TO_POS_STATES][POS_SLOTS]; + probability pos_special[FULL_DISTANCES - END_POS_MODEL_INDEX]; + probability pos_align[ALIGN_TABLE_SIZE]; + + // These are the same as in lzma_decoder.c except that the encoders + // include also price tables. + lzma_length_encoder match_len_encoder; + lzma_length_encoder rep_len_encoder; + + // Price tables + uint32_t pos_slot_prices[LEN_TO_POS_STATES][POS_SLOTS]; + uint32_t distances_prices[LEN_TO_POS_STATES][FULL_DISTANCES]; + uint32_t dist_table_size; + uint32_t match_price_count; + + uint32_t align_prices[ALIGN_TABLE_SIZE]; + uint32_t align_price_count; + + // Optimal + uint32_t opts_end_index; + uint32_t opts_current_index; + lzma_optimal opts[OPTS]; +}; + + +extern void lzma_lzma_optimum_fast( + lzma_coder *restrict coder, lzma_mf *restrict mf, + uint32_t *restrict back_res, uint32_t *restrict len_res); + +extern void lzma_lzma_optimum_normal(lzma_coder *restrict coder, + lzma_mf *restrict mf, uint32_t *restrict back_res, + uint32_t *restrict len_res, uint32_t position); + +#endif diff --git a/src/liblzma/rangecoder/price.h b/src/liblzma/rangecoder/price.h new file mode 100644 index 000000000000..8ae02ca7474e --- /dev/null +++ b/src/liblzma/rangecoder/price.h @@ -0,0 +1,92 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file price.h +/// \brief Probability price calculation +// +// Author: Igor Pavlov +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_PRICE_H +#define LZMA_PRICE_H + + +#define RC_MOVE_REDUCING_BITS 4 +#define RC_BIT_PRICE_SHIFT_BITS 4 +#define RC_PRICE_TABLE_SIZE (RC_BIT_MODEL_TOTAL >> RC_MOVE_REDUCING_BITS) + +#define RC_INFINITY_PRICE (UINT32_C(1) << 30) + + +/// Lookup table for the inline functions defined in this file. +extern const uint8_t lzma_rc_prices[RC_PRICE_TABLE_SIZE]; + + +static inline uint32_t +rc_bit_price(const probability prob, const uint32_t bit) +{ + return lzma_rc_prices[(prob ^ ((UINT32_C(0) - bit) + & (RC_BIT_MODEL_TOTAL - 1))) >> RC_MOVE_REDUCING_BITS]; +} + + +static inline uint32_t +rc_bit_0_price(const probability prob) +{ + return lzma_rc_prices[prob >> RC_MOVE_REDUCING_BITS]; +} + + +static inline uint32_t +rc_bit_1_price(const probability prob) +{ + return lzma_rc_prices[(prob ^ (RC_BIT_MODEL_TOTAL - 1)) + >> RC_MOVE_REDUCING_BITS]; +} + + +static inline uint32_t +rc_bittree_price(const probability *const probs, + const uint32_t bit_levels, uint32_t symbol) +{ + uint32_t price = 0; + symbol += UINT32_C(1) << bit_levels; + + do { + const uint32_t bit = symbol & 1; + symbol >>= 1; + price += rc_bit_price(probs[symbol], bit); + } while (symbol != 1); + + return price; +} + + +static inline uint32_t +rc_bittree_reverse_price(const probability *const probs, + uint32_t bit_levels, uint32_t symbol) +{ + uint32_t price = 0; + uint32_t model_index = 1; + + do { + const uint32_t bit = symbol & 1; + symbol >>= 1; + price += rc_bit_price(probs[model_index], bit); + model_index = (model_index << 1) + bit; + } while (--bit_levels != 0); + + return price; +} + + +static inline uint32_t +rc_direct_price(const uint32_t bits) +{ + return bits << RC_BIT_PRICE_SHIFT_BITS; +} + +#endif diff --git a/src/liblzma/rangecoder/price_table.c b/src/liblzma/rangecoder/price_table.c new file mode 100644 index 000000000000..ac64bf62c767 --- /dev/null +++ b/src/liblzma/rangecoder/price_table.c @@ -0,0 +1,22 @@ +/* This file has been automatically generated by price_tablegen.c. */ + +#include "range_encoder.h" + +const uint8_t lzma_rc_prices[RC_PRICE_TABLE_SIZE] = { + 128, 103, 91, 84, 78, 73, 69, 66, + 63, 61, 58, 56, 54, 52, 51, 49, + 48, 46, 45, 44, 43, 42, 41, 40, + 39, 38, 37, 36, 35, 34, 34, 33, + 32, 31, 31, 30, 29, 29, 28, 28, + 27, 26, 26, 25, 25, 24, 24, 23, + 23, 22, 22, 22, 21, 21, 20, 20, + 19, 19, 19, 18, 18, 17, 17, 17, + 16, 16, 16, 15, 15, 15, 14, 14, + 14, 13, 13, 13, 12, 12, 12, 11, + 11, 11, 11, 10, 10, 10, 10, 9, + 9, 9, 9, 8, 8, 8, 8, 7, + 7, 7, 7, 6, 6, 6, 6, 5, + 5, 5, 5, 5, 4, 4, 4, 4, + 3, 3, 3, 3, 3, 2, 2, 2, + 2, 2, 2, 1, 1, 1, 1, 1 +}; diff --git a/src/liblzma/rangecoder/price_tablegen.c b/src/liblzma/rangecoder/price_tablegen.c new file mode 100644 index 000000000000..bf08ce39d7e5 --- /dev/null +++ b/src/liblzma/rangecoder/price_tablegen.c @@ -0,0 +1,87 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file price_tablegen.c +/// \brief Probability price table generator +/// +/// Compiling: gcc -std=c99 -o price_tablegen price_tablegen.c +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include "range_common.h" +#include "price.h" + + +static uint32_t rc_prices[RC_PRICE_TABLE_SIZE]; + + +static void +init_price_table(void) +{ + for (uint32_t i = (UINT32_C(1) << RC_MOVE_REDUCING_BITS) / 2; + i < RC_BIT_MODEL_TOTAL; + i += (UINT32_C(1) << RC_MOVE_REDUCING_BITS)) { + const uint32_t cycles_bits = RC_BIT_PRICE_SHIFT_BITS; + uint32_t w = i; + uint32_t bit_count = 0; + + for (uint32_t j = 0; j < cycles_bits; ++j) { + w *= w; + bit_count <<= 1; + + while (w >= (UINT32_C(1) << 16)) { + w >>= 1; + ++bit_count; + } + } + + rc_prices[i >> RC_MOVE_REDUCING_BITS] + = (RC_BIT_MODEL_TOTAL_BITS << cycles_bits) + - 15 - bit_count; + } + + return; +} + + +static void +print_price_table(void) +{ + printf("/* This file has been automatically generated by " + "price_tablegen.c. */\n\n" + "#include \"range_encoder.h\"\n\n" + "const uint8_t lzma_rc_prices[" + "RC_PRICE_TABLE_SIZE] = {"); + + const size_t array_size = sizeof(lzma_rc_prices) + / sizeof(lzma_rc_prices[0]); + for (size_t i = 0; i < array_size; ++i) { + if (i % 8 == 0) + printf("\n\t"); + + printf("%4" PRIu32, rc_prices[i]); + + if (i != array_size - 1) + printf(","); + } + + printf("\n};\n"); + + return; +} + + +int +main(void) +{ + init_price_table(); + print_price_table(); + return 0; +} diff --git a/src/liblzma/rangecoder/range_common.h b/src/liblzma/rangecoder/range_common.h new file mode 100644 index 000000000000..0e6424198d13 --- /dev/null +++ b/src/liblzma/rangecoder/range_common.h @@ -0,0 +1,73 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file range_common.h +/// \brief Common things for range encoder and decoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_RANGE_COMMON_H +#define LZMA_RANGE_COMMON_H + +#ifdef HAVE_CONFIG_H +# include "common.h" +#endif + + +/////////////// +// Constants // +/////////////// + +#define RC_SHIFT_BITS 8 +#define RC_TOP_BITS 24 +#define RC_TOP_VALUE (UINT32_C(1) << RC_TOP_BITS) +#define RC_BIT_MODEL_TOTAL_BITS 11 +#define RC_BIT_MODEL_TOTAL (UINT32_C(1) << RC_BIT_MODEL_TOTAL_BITS) +#define RC_MOVE_BITS 5 + + +//////////// +// Macros // +//////////// + +// Resets the probability so that both 0 and 1 have probability of 50 % +#define bit_reset(prob) \ + prob = RC_BIT_MODEL_TOTAL >> 1 + +// This does the same for a complete bit tree. +// (A tree represented as an array.) +#define bittree_reset(probs, bit_levels) \ + for (uint32_t bt_i = 0; bt_i < (1 << (bit_levels)); ++bt_i) \ + bit_reset((probs)[bt_i]) + + +////////////////////// +// Type definitions // +////////////////////// + +/// \brief Type of probabilities used with range coder +/// +/// This needs to be at least 12-bit integer, so uint16_t is a logical choice. +/// However, on some architecture and compiler combinations, a bigger type +/// may give better speed, because the probability variables are accessed +/// a lot. On the other hand, bigger probability type increases cache +/// footprint, since there are 2 to 14 thousand probability variables in +/// LZMA (assuming the limit of lc + lp <= 4; with lc + lp <= 12 there +/// would be about 1.5 million variables). +/// +/// With malicious files, the initialization speed of the LZMA decoder can +/// become important. In that case, smaller probability variables mean that +/// there is less bytes to write to RAM, which makes initialization faster. +/// With big probability type, the initialization can become so slow that it +/// can be a problem e.g. for email servers doing virus scanning. +/// +/// I will be sticking to uint16_t unless some specific architectures +/// are *much* faster (20-50 %) with uint32_t. +typedef uint16_t probability; + +#endif diff --git a/src/liblzma/rangecoder/range_decoder.h b/src/liblzma/rangecoder/range_decoder.h new file mode 100644 index 000000000000..fb96180fb399 --- /dev/null +++ b/src/liblzma/rangecoder/range_decoder.h @@ -0,0 +1,179 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file range_decoder.h +/// \brief Range Decoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_RANGE_DECODER_H +#define LZMA_RANGE_DECODER_H + +#include "range_common.h" + + +typedef struct { + uint32_t range; + uint32_t code; + uint32_t init_bytes_left; +} lzma_range_decoder; + + +/// Reads the first five bytes to initialize the range decoder. +static inline bool +rc_read_init(lzma_range_decoder *rc, const uint8_t *restrict in, + size_t *restrict in_pos, size_t in_size) +{ + while (rc->init_bytes_left > 0) { + if (*in_pos == in_size) + return false; + + rc->code = (rc->code << 8) | in[*in_pos]; + ++*in_pos; + --rc->init_bytes_left; + } + + return true; +} + + +/// Makes local copies of range decoder and *in_pos variables. Doing this +/// improves speed significantly. The range decoder macros expect also +/// variables `in' and `in_size' to be defined. +#define rc_to_local(range_decoder, in_pos) \ + lzma_range_decoder rc = range_decoder; \ + size_t rc_in_pos = (in_pos); \ + uint32_t rc_bound + + +/// Stores the local copes back to the range decoder structure. +#define rc_from_local(range_decoder, in_pos) \ +do { \ + range_decoder = rc; \ + in_pos = rc_in_pos; \ +} while (0) + + +/// Resets the range decoder structure. +#define rc_reset(range_decoder) \ +do { \ + (range_decoder).range = UINT32_MAX; \ + (range_decoder).code = 0; \ + (range_decoder).init_bytes_left = 5; \ +} while (0) + + +/// When decoding has been properly finished, rc.code is always zero unless +/// the input stream is corrupt. So checking this can catch some corrupt +/// files especially if they don't have any other integrity check. +#define rc_is_finished(range_decoder) \ + ((range_decoder).code == 0) + + +/// Read the next input byte if needed. If more input is needed but there is +/// no more input available, "goto out" is used to jump out of the main +/// decoder loop. +#define rc_normalize(seq) \ +do { \ + if (rc.range < RC_TOP_VALUE) { \ + if (unlikely(rc_in_pos == in_size)) { \ + coder->sequence = seq; \ + goto out; \ + } \ + rc.range <<= RC_SHIFT_BITS; \ + rc.code = (rc.code << RC_SHIFT_BITS) | in[rc_in_pos++]; \ + } \ +} while (0) + + +/// Start decoding a bit. This must be used together with rc_update_0() +/// and rc_update_1(): +/// +/// rc_if_0(prob, seq) { +/// rc_update_0(prob); +/// // Do something +/// } else { +/// rc_update_1(prob); +/// // Do something else +/// } +/// +#define rc_if_0(prob, seq) \ + rc_normalize(seq); \ + rc_bound = (rc.range >> RC_BIT_MODEL_TOTAL_BITS) * (prob); \ + if (rc.code < rc_bound) + + +/// Update the range decoder state and the used probability variable to +/// match a decoded bit of 0. +#define rc_update_0(prob) \ +do { \ + rc.range = rc_bound; \ + prob += (RC_BIT_MODEL_TOTAL - (prob)) >> RC_MOVE_BITS; \ +} while (0) + + +/// Update the range decoder state and the used probability variable to +/// match a decoded bit of 1. +#define rc_update_1(prob) \ +do { \ + rc.range -= rc_bound; \ + rc.code -= rc_bound; \ + prob -= (prob) >> RC_MOVE_BITS; \ +} while (0) + + +/// Decodes one bit and runs action0 or action1 depending on the decoded bit. +/// This macro is used as the last step in bittree reverse decoders since +/// those don't use "symbol" for anything else than indexing the probability +/// arrays. +#define rc_bit_last(prob, action0, action1, seq) \ +do { \ + rc_if_0(prob, seq) { \ + rc_update_0(prob); \ + action0; \ + } else { \ + rc_update_1(prob); \ + action1; \ + } \ +} while (0) + + +/// Decodes one bit, updates "symbol", and runs action0 or action1 depending +/// on the decoded bit. +#define rc_bit(prob, action0, action1, seq) \ + rc_bit_last(prob, \ + symbol <<= 1; action0, \ + symbol = (symbol << 1) + 1; action1, \ + seq); + + +/// Like rc_bit() but add "case seq:" as a prefix. This makes the unrolled +/// loops more readable because the code isn't littered with "case" +/// statements. On the other hand this also makes it less readable, since +/// spotting the places where the decoder loop may be restarted is less +/// obvious. +#define rc_bit_case(prob, action0, action1, seq) \ + case seq: rc_bit(prob, action0, action1, seq) + + +/// Decode a bit without using a probability. +#define rc_direct(dest, seq) \ +do { \ + rc_normalize(seq); \ + rc.range >>= 1; \ + rc.code -= rc.range; \ + rc_bound = UINT32_C(0) - (rc.code >> 31); \ + rc.code += rc.range & rc_bound; \ + dest = (dest << 1) + (rc_bound + 1); \ +} while (0) + + +// NOTE: No macros are provided for bittree decoding. It seems to be simpler +// to just write them open in the code. + +#endif diff --git a/src/liblzma/rangecoder/range_encoder.h b/src/liblzma/rangecoder/range_encoder.h new file mode 100644 index 000000000000..1e1c36995b63 --- /dev/null +++ b/src/liblzma/rangecoder/range_encoder.h @@ -0,0 +1,231 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file range_encoder.h +/// \brief Range Encoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_RANGE_ENCODER_H +#define LZMA_RANGE_ENCODER_H + +#include "range_common.h" +#include "price.h" + + +/// Maximum number of symbols that can be put pending into lzma_range_encoder +/// structure between calls to lzma_rc_encode(). For LZMA, 52+5 is enough +/// (match with big distance and length followed by range encoder flush). +#define RC_SYMBOLS_MAX 58 + + +typedef struct { + uint64_t low; + uint64_t cache_size; + uint32_t range; + uint8_t cache; + + /// Number of symbols in the tables + size_t count; + + /// rc_encode()'s position in the tables + size_t pos; + + /// Symbols to encode + enum { + RC_BIT_0, + RC_BIT_1, + RC_DIRECT_0, + RC_DIRECT_1, + RC_FLUSH, + } symbols[RC_SYMBOLS_MAX]; + + /// Probabilities associated with RC_BIT_0 or RC_BIT_1 + probability *probs[RC_SYMBOLS_MAX]; + +} lzma_range_encoder; + + +static inline void +rc_reset(lzma_range_encoder *rc) +{ + rc->low = 0; + rc->cache_size = 1; + rc->range = UINT32_MAX; + rc->cache = 0; + rc->count = 0; + rc->pos = 0; +} + + +static inline void +rc_bit(lzma_range_encoder *rc, probability *prob, uint32_t bit) +{ + rc->symbols[rc->count] = bit; + rc->probs[rc->count] = prob; + ++rc->count; +} + + +static inline void +rc_bittree(lzma_range_encoder *rc, probability *probs, + uint32_t bit_count, uint32_t symbol) +{ + uint32_t model_index = 1; + + do { + const uint32_t bit = (symbol >> --bit_count) & 1; + rc_bit(rc, &probs[model_index], bit); + model_index = (model_index << 1) + bit; + } while (bit_count != 0); +} + + +static inline void +rc_bittree_reverse(lzma_range_encoder *rc, probability *probs, + uint32_t bit_count, uint32_t symbol) +{ + uint32_t model_index = 1; + + do { + const uint32_t bit = symbol & 1; + symbol >>= 1; + rc_bit(rc, &probs[model_index], bit); + model_index = (model_index << 1) + bit; + } while (--bit_count != 0); +} + + +static inline void +rc_direct(lzma_range_encoder *rc, + uint32_t value, uint32_t bit_count) +{ + do { + rc->symbols[rc->count++] + = RC_DIRECT_0 + ((value >> --bit_count) & 1); + } while (bit_count != 0); +} + + +static inline void +rc_flush(lzma_range_encoder *rc) +{ + for (size_t i = 0; i < 5; ++i) + rc->symbols[rc->count++] = RC_FLUSH; +} + + +static inline bool +rc_shift_low(lzma_range_encoder *rc, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + if ((uint32_t)(rc->low) < (uint32_t)(0xFF000000) + || (uint32_t)(rc->low >> 32) != 0) { + do { + if (*out_pos == out_size) + return true; + + out[*out_pos] = rc->cache + (uint8_t)(rc->low >> 32); + ++*out_pos; + rc->cache = 0xFF; + + } while (--rc->cache_size != 0); + + rc->cache = (rc->low >> 24) & 0xFF; + } + + ++rc->cache_size; + rc->low = (rc->low & 0x00FFFFFF) << RC_SHIFT_BITS; + + return false; +} + + +static inline bool +rc_encode(lzma_range_encoder *rc, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + assert(rc->count <= RC_SYMBOLS_MAX); + + while (rc->pos < rc->count) { + // Normalize + if (rc->range < RC_TOP_VALUE) { + if (rc_shift_low(rc, out, out_pos, out_size)) + return true; + + rc->range <<= RC_SHIFT_BITS; + } + + // Encode a bit + switch (rc->symbols[rc->pos]) { + case RC_BIT_0: { + probability prob = *rc->probs[rc->pos]; + rc->range = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) + * prob; + prob += (RC_BIT_MODEL_TOTAL - prob) >> RC_MOVE_BITS; + *rc->probs[rc->pos] = prob; + break; + } + + case RC_BIT_1: { + probability prob = *rc->probs[rc->pos]; + const uint32_t bound = prob * (rc->range + >> RC_BIT_MODEL_TOTAL_BITS); + rc->low += bound; + rc->range -= bound; + prob -= prob >> RC_MOVE_BITS; + *rc->probs[rc->pos] = prob; + break; + } + + case RC_DIRECT_0: + rc->range >>= 1; + break; + + case RC_DIRECT_1: + rc->range >>= 1; + rc->low += rc->range; + break; + + case RC_FLUSH: + // Prevent further normalizations. + rc->range = UINT32_MAX; + + // Flush the last five bytes (see rc_flush()). + do { + if (rc_shift_low(rc, out, out_pos, out_size)) + return true; + } while (++rc->pos < rc->count); + + // Reset the range encoder so we are ready to continue + // encoding if we weren't finishing the stream. + rc_reset(rc); + return false; + + default: + assert(0); + break; + } + + ++rc->pos; + } + + rc->count = 0; + rc->pos = 0; + + return false; +} + + +static inline uint64_t +rc_pending(const lzma_range_encoder *rc) +{ + return rc->cache_size + 5 - 1; +} + +#endif diff --git a/src/liblzma/simple/arm.c b/src/liblzma/simple/arm.c new file mode 100644 index 000000000000..8fcf64374914 --- /dev/null +++ b/src/liblzma/simple/arm.c @@ -0,0 +1,69 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file arm.c +/// \brief Filter for ARM binaries +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +static size_t +arm_code(lzma_simple *simple lzma_attribute((unused)), + uint32_t now_pos, bool is_encoder, + uint8_t *buffer, size_t size) +{ + size_t i; + for (i = 0; i + 4 <= size; i += 4) { + if (buffer[i + 3] == 0xEB) { + uint32_t src = (buffer[i + 2] << 16) + | (buffer[i + 1] << 8) + | (buffer[i + 0]); + src <<= 2; + + uint32_t dest; + if (is_encoder) + dest = now_pos + (uint32_t)(i) + 8 + src; + else + dest = src - (now_pos + (uint32_t)(i) + 8); + + dest >>= 2; + buffer[i + 2] = (dest >> 16); + buffer[i + 1] = (dest >> 8); + buffer[i + 0] = dest; + } + } + + return i; +} + + +static lzma_ret +arm_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + return lzma_simple_coder_init(next, allocator, filters, + &arm_code, 0, 4, 4, is_encoder); +} + + +extern lzma_ret +lzma_simple_arm_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return arm_coder_init(next, allocator, filters, true); +} + + +extern lzma_ret +lzma_simple_arm_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return arm_coder_init(next, allocator, filters, false); +} diff --git a/src/liblzma/simple/armthumb.c b/src/liblzma/simple/armthumb.c new file mode 100644 index 000000000000..eb6a69d1282a --- /dev/null +++ b/src/liblzma/simple/armthumb.c @@ -0,0 +1,74 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file armthumb.c +/// \brief Filter for ARM-Thumb binaries +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +static size_t +armthumb_code(lzma_simple *simple lzma_attribute((unused)), + uint32_t now_pos, bool is_encoder, + uint8_t *buffer, size_t size) +{ + size_t i; + for (i = 0; i + 4 <= size; i += 2) { + if ((buffer[i + 1] & 0xF8) == 0xF0 + && (buffer[i + 3] & 0xF8) == 0xF8) { + uint32_t src = ((buffer[i + 1] & 0x7) << 19) + | (buffer[i + 0] << 11) + | ((buffer[i + 3] & 0x7) << 8) + | (buffer[i + 2]); + + src <<= 1; + + uint32_t dest; + if (is_encoder) + dest = now_pos + (uint32_t)(i) + 4 + src; + else + dest = src - (now_pos + (uint32_t)(i) + 4); + + dest >>= 1; + buffer[i + 1] = 0xF0 | ((dest >> 19) & 0x7); + buffer[i + 0] = (dest >> 11); + buffer[i + 3] = 0xF8 | ((dest >> 8) & 0x7); + buffer[i + 2] = (dest); + i += 2; + } + } + + return i; +} + + +static lzma_ret +armthumb_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + return lzma_simple_coder_init(next, allocator, filters, + &armthumb_code, 0, 4, 2, is_encoder); +} + + +extern lzma_ret +lzma_simple_armthumb_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + return armthumb_coder_init(next, allocator, filters, true); +} + + +extern lzma_ret +lzma_simple_armthumb_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + return armthumb_coder_init(next, allocator, filters, false); +} diff --git a/src/liblzma/simple/ia64.c b/src/liblzma/simple/ia64.c new file mode 100644 index 000000000000..fd263d4aea0d --- /dev/null +++ b/src/liblzma/simple/ia64.c @@ -0,0 +1,110 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file ia64.c +/// \brief Filter for IA64 (Itanium) binaries +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +static size_t +ia64_code(lzma_simple *simple lzma_attribute((unused)), + uint32_t now_pos, bool is_encoder, + uint8_t *buffer, size_t size) +{ + static const uint32_t BRANCH_TABLE[32] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 4, 4, 6, 6, 0, 0, 7, 7, + 4, 4, 0, 0, 4, 4, 0, 0 + }; + + size_t i; + for (i = 0; i + 16 <= size; i += 16) { + const uint32_t instr_template = buffer[i] & 0x1F; + const uint32_t mask = BRANCH_TABLE[instr_template]; + uint32_t bit_pos = 5; + + for (size_t slot = 0; slot < 3; ++slot, bit_pos += 41) { + if (((mask >> slot) & 1) == 0) + continue; + + const size_t byte_pos = (bit_pos >> 3); + const uint32_t bit_res = bit_pos & 0x7; + uint64_t instruction = 0; + + for (size_t j = 0; j < 6; ++j) + instruction += (uint64_t)( + buffer[i + j + byte_pos]) + << (8 * j); + + uint64_t inst_norm = instruction >> bit_res; + + if (((inst_norm >> 37) & 0xF) == 0x5 + && ((inst_norm >> 9) & 0x7) == 0 + /* && (inst_norm & 0x3F)== 0 */ + ) { + uint32_t src = (uint32_t)( + (inst_norm >> 13) & 0xFFFFF); + src |= ((inst_norm >> 36) & 1) << 20; + + src <<= 4; + + uint32_t dest; + if (is_encoder) + dest = now_pos + (uint32_t)(i) + src; + else + dest = src - (now_pos + (uint32_t)(i)); + + dest >>= 4; + + inst_norm &= ~((uint64_t)(0x8FFFFF) << 13); + inst_norm |= (uint64_t)(dest & 0xFFFFF) << 13; + inst_norm |= (uint64_t)(dest & 0x100000) + << (36 - 20); + + instruction &= (1 << bit_res) - 1; + instruction |= (inst_norm << bit_res); + + for (size_t j = 0; j < 6; j++) + buffer[i + j + byte_pos] = (uint8_t)( + instruction + >> (8 * j)); + } + } + } + + return i; +} + + +static lzma_ret +ia64_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + return lzma_simple_coder_init(next, allocator, filters, + &ia64_code, 0, 16, 16, is_encoder); +} + + +extern lzma_ret +lzma_simple_ia64_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + return ia64_coder_init(next, allocator, filters, true); +} + + +extern lzma_ret +lzma_simple_ia64_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + return ia64_coder_init(next, allocator, filters, false); +} diff --git a/src/liblzma/simple/powerpc.c b/src/liblzma/simple/powerpc.c new file mode 100644 index 000000000000..aaa14f221977 --- /dev/null +++ b/src/liblzma/simple/powerpc.c @@ -0,0 +1,73 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file powerpc.c +/// \brief Filter for PowerPC (big endian) binaries +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +static size_t +powerpc_code(lzma_simple *simple lzma_attribute((unused)), + uint32_t now_pos, bool is_encoder, + uint8_t *buffer, size_t size) +{ + size_t i; + for (i = 0; i + 4 <= size; i += 4) { + // PowerPC branch 6(48) 24(Offset) 1(Abs) 1(Link) + if ((buffer[i] >> 2) == 0x12 + && ((buffer[i + 3] & 3) == 1)) { + + const uint32_t src = ((buffer[i + 0] & 3) << 24) + | (buffer[i + 1] << 16) + | (buffer[i + 2] << 8) + | (buffer[i + 3] & (~3)); + + uint32_t dest; + if (is_encoder) + dest = now_pos + (uint32_t)(i) + src; + else + dest = src - (now_pos + (uint32_t)(i)); + + buffer[i + 0] = 0x48 | ((dest >> 24) & 0x03); + buffer[i + 1] = (dest >> 16); + buffer[i + 2] = (dest >> 8); + buffer[i + 3] &= 0x03; + buffer[i + 3] |= dest; + } + } + + return i; +} + + +static lzma_ret +powerpc_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + return lzma_simple_coder_init(next, allocator, filters, + &powerpc_code, 0, 4, 4, is_encoder); +} + + +extern lzma_ret +lzma_simple_powerpc_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + return powerpc_coder_init(next, allocator, filters, true); +} + + +extern lzma_ret +lzma_simple_powerpc_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + return powerpc_coder_init(next, allocator, filters, false); +} diff --git a/src/liblzma/simple/simple_coder.c b/src/liblzma/simple/simple_coder.c new file mode 100644 index 000000000000..06db86ec2d0a --- /dev/null +++ b/src/liblzma/simple/simple_coder.c @@ -0,0 +1,280 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file simple_coder.c +/// \brief Wrapper for simple filters +/// +/// Simple filters don't change the size of the data i.e. number of bytes +/// in equals the number of bytes out. +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +/// Copied or encodes/decodes more data to out[]. +static lzma_ret +copy_or_code(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + assert(!coder->end_was_reached); + + if (coder->next.code == NULL) { + lzma_bufcpy(in, in_pos, in_size, out, out_pos, out_size); + + // Check if end of stream was reached. + if (coder->is_encoder && action == LZMA_FINISH + && *in_pos == in_size) + coder->end_was_reached = true; + + } else { + // Call the next coder in the chain to provide us some data. + // We don't care about uncompressed_size here, because + // the next filter in the chain will do it for us (since + // we don't change the size of the data). + const lzma_ret ret = coder->next.code( + coder->next.coder, allocator, + in, in_pos, in_size, + out, out_pos, out_size, action); + + if (ret == LZMA_STREAM_END) { + assert(!coder->is_encoder + || action == LZMA_FINISH); + coder->end_was_reached = true; + + } else if (ret != LZMA_OK) { + return ret; + } + } + + return LZMA_OK; +} + + +static size_t +call_filter(lzma_coder *coder, uint8_t *buffer, size_t size) +{ + const size_t filtered = coder->filter(coder->simple, + coder->now_pos, coder->is_encoder, + buffer, size); + coder->now_pos += filtered; + return filtered; +} + + +static lzma_ret +simple_code(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + // TODO: Add partial support for LZMA_SYNC_FLUSH. We can support it + // in cases when the filter is able to filter everything. With most + // simple filters it can be done at offset that is a multiple of 2, + // 4, or 16. With x86 filter, it needs good luck, and thus cannot + // be made to work predictably. + if (action == LZMA_SYNC_FLUSH) + return LZMA_OPTIONS_ERROR; + + // Flush already filtered data from coder->buffer[] to out[]. + if (coder->pos < coder->filtered) { + lzma_bufcpy(coder->buffer, &coder->pos, coder->filtered, + out, out_pos, out_size); + + // If we couldn't flush all the filtered data, return to + // application immediately. + if (coder->pos < coder->filtered) + return LZMA_OK; + + if (coder->end_was_reached) { + assert(coder->filtered == coder->size); + return LZMA_STREAM_END; + } + } + + // If we get here, there is no filtered data left in the buffer. + coder->filtered = 0; + + assert(!coder->end_was_reached); + + // If there is more output space left than there is unfiltered data + // in coder->buffer[], flush coder->buffer[] to out[], and copy/code + // more data to out[] hopefully filling it completely. Then filter + // the data in out[]. This step is where most of the data gets + // filtered if the buffer sizes used by the application are reasonable. + const size_t out_avail = out_size - *out_pos; + const size_t buf_avail = coder->size - coder->pos; + if (out_avail > buf_avail) { + // Store the old position so that we know from which byte + // to start filtering. + const size_t out_start = *out_pos; + + // Flush data from coder->buffer[] to out[], but don't reset + // coder->pos and coder->size yet. This way the coder can be + // restarted if the next filter in the chain returns e.g. + // LZMA_MEM_ERROR. + memcpy(out + *out_pos, coder->buffer + coder->pos, buf_avail); + *out_pos += buf_avail; + + // Copy/Encode/Decode more data to out[]. + { + const lzma_ret ret = copy_or_code(coder, allocator, + in, in_pos, in_size, + out, out_pos, out_size, action); + assert(ret != LZMA_STREAM_END); + if (ret != LZMA_OK) + return ret; + } + + // Filter out[]. + const size_t size = *out_pos - out_start; + const size_t filtered = call_filter( + coder, out + out_start, size); + + const size_t unfiltered = size - filtered; + assert(unfiltered <= coder->allocated / 2); + + // Now we can update coder->pos and coder->size, because + // the next coder in the chain (if any) was successful. + coder->pos = 0; + coder->size = unfiltered; + + if (coder->end_was_reached) { + // The last byte has been copied to out[] already. + // They are left as is. + coder->size = 0; + + } else if (unfiltered > 0) { + // There is unfiltered data left in out[]. Copy it to + // coder->buffer[] and rewind *out_pos appropriately. + *out_pos -= unfiltered; + memcpy(coder->buffer, out + *out_pos, unfiltered); + } + } else if (coder->pos > 0) { + memmove(coder->buffer, coder->buffer + coder->pos, buf_avail); + coder->size -= coder->pos; + coder->pos = 0; + } + + assert(coder->pos == 0); + + // If coder->buffer[] isn't empty, try to fill it by copying/decoding + // more data. Then filter coder->buffer[] and copy the successfully + // filtered data to out[]. It is probable, that some filtered and + // unfiltered data will be left to coder->buffer[]. + if (coder->size > 0) { + { + const lzma_ret ret = copy_or_code(coder, allocator, + in, in_pos, in_size, + coder->buffer, &coder->size, + coder->allocated, action); + assert(ret != LZMA_STREAM_END); + if (ret != LZMA_OK) + return ret; + } + + coder->filtered = call_filter( + coder, coder->buffer, coder->size); + + // Everything is considered to be filtered if coder->buffer[] + // contains the last bytes of the data. + if (coder->end_was_reached) + coder->filtered = coder->size; + + // Flush as much as possible. + lzma_bufcpy(coder->buffer, &coder->pos, coder->filtered, + out, out_pos, out_size); + } + + // Check if we got everything done. + if (coder->end_was_reached && coder->pos == coder->size) + return LZMA_STREAM_END; + + return LZMA_OK; +} + + +static void +simple_coder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->next, allocator); + lzma_free(coder->simple, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +simple_coder_update(lzma_coder *coder, lzma_allocator *allocator, + const lzma_filter *filters_null lzma_attribute((unused)), + const lzma_filter *reversed_filters) +{ + // No update support, just call the next filter in the chain. + return lzma_next_filter_update( + &coder->next, allocator, reversed_filters + 1); +} + + +extern lzma_ret +lzma_simple_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, + size_t (*filter)(lzma_simple *simple, uint32_t now_pos, + bool is_encoder, uint8_t *buffer, size_t size), + size_t simple_size, size_t unfiltered_max, + uint32_t alignment, bool is_encoder) +{ + // Allocate memory for the lzma_coder structure if needed. + if (next->coder == NULL) { + // Here we allocate space also for the temporary buffer. We + // need twice the size of unfiltered_max, because then it + // is always possible to filter at least unfiltered_max bytes + // more data in coder->buffer[] if it can be filled completely. + next->coder = lzma_alloc(sizeof(lzma_coder) + + 2 * unfiltered_max, allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &simple_code; + next->end = &simple_coder_end; + next->update = &simple_coder_update; + + next->coder->next = LZMA_NEXT_CODER_INIT; + next->coder->filter = filter; + next->coder->allocated = 2 * unfiltered_max; + + // Allocate memory for filter-specific data structure. + if (simple_size > 0) { + next->coder->simple = lzma_alloc( + simple_size, allocator); + if (next->coder->simple == NULL) + return LZMA_MEM_ERROR; + } else { + next->coder->simple = NULL; + } + } + + if (filters[0].options != NULL) { + const lzma_options_bcj *simple = filters[0].options; + next->coder->now_pos = simple->start_offset; + if (next->coder->now_pos & (alignment - 1)) + return LZMA_OPTIONS_ERROR; + } else { + next->coder->now_pos = 0; + } + + // Reset variables. + next->coder->is_encoder = is_encoder; + next->coder->end_was_reached = false; + next->coder->pos = 0; + next->coder->filtered = 0; + next->coder->size = 0; + + return lzma_next_filter_init( + &next->coder->next, allocator, filters + 1); +} diff --git a/src/liblzma/simple/simple_coder.h b/src/liblzma/simple/simple_coder.h new file mode 100644 index 000000000000..0952fad33b34 --- /dev/null +++ b/src/liblzma/simple/simple_coder.h @@ -0,0 +1,60 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file simple_coder.h +/// \brief Wrapper for simple filters +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_SIMPLE_CODER_H +#define LZMA_SIMPLE_CODER_H + +#include "common.h" + + +extern lzma_ret lzma_simple_x86_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_x86_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + + +extern lzma_ret lzma_simple_powerpc_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_powerpc_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + + +extern lzma_ret lzma_simple_ia64_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_ia64_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + + +extern lzma_ret lzma_simple_arm_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_arm_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + + +extern lzma_ret lzma_simple_armthumb_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_armthumb_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + + +extern lzma_ret lzma_simple_sparc_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_sparc_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +#endif diff --git a/src/liblzma/simple/simple_decoder.c b/src/liblzma/simple/simple_decoder.c new file mode 100644 index 000000000000..0beccd32a7d8 --- /dev/null +++ b/src/liblzma/simple/simple_decoder.c @@ -0,0 +1,40 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file simple_decoder.c +/// \brief Properties decoder for simple filters +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_decoder.h" + + +extern lzma_ret +lzma_simple_props_decode(void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size) +{ + if (props_size == 0) + return LZMA_OK; + + if (props_size != 4) + return LZMA_OPTIONS_ERROR; + + lzma_options_bcj *opt = lzma_alloc( + sizeof(lzma_options_bcj), allocator); + if (opt == NULL) + return LZMA_MEM_ERROR; + + opt->start_offset = unaligned_read32le(props); + + // Don't leave an options structure allocated if start_offset is zero. + if (opt->start_offset == 0) + lzma_free(opt, allocator); + else + *options = opt; + + return LZMA_OK; +} diff --git a/src/liblzma/simple/simple_decoder.h b/src/liblzma/simple/simple_decoder.h new file mode 100644 index 000000000000..b8bf590f7681 --- /dev/null +++ b/src/liblzma/simple/simple_decoder.h @@ -0,0 +1,22 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file simple_decoder.h +/// \brief Properties decoder for simple filters +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_SIMPLE_DECODER_H +#define LZMA_SIMPLE_DECODER_H + +#include "simple_coder.h" + +extern lzma_ret lzma_simple_props_decode( + void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size); + +#endif diff --git a/src/liblzma/simple/simple_encoder.c b/src/liblzma/simple/simple_encoder.c new file mode 100644 index 000000000000..8aa463bed220 --- /dev/null +++ b/src/liblzma/simple/simple_encoder.c @@ -0,0 +1,38 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file simple_encoder.c +/// \brief Properties encoder for simple filters +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_encoder.h" + + +extern lzma_ret +lzma_simple_props_size(uint32_t *size, const void *options) +{ + const lzma_options_bcj *const opt = options; + *size = (opt == NULL || opt->start_offset == 0) ? 0 : 4; + return LZMA_OK; +} + + +extern lzma_ret +lzma_simple_props_encode(const void *options, uint8_t *out) +{ + const lzma_options_bcj *const opt = options; + + // The default start offset is zero, so we don't need to store any + // options unless the start offset is non-zero. + if (opt == NULL || opt->start_offset == 0) + return LZMA_OK; + + unaligned_write32le(out, opt->start_offset); + + return LZMA_OK; +} diff --git a/src/liblzma/simple/simple_encoder.h b/src/liblzma/simple/simple_encoder.h new file mode 100644 index 000000000000..1cee4823a4ed --- /dev/null +++ b/src/liblzma/simple/simple_encoder.h @@ -0,0 +1,23 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file simple_encoder.c +/// \brief Properties encoder for simple filters +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_SIMPLE_ENCODER_H +#define LZMA_SIMPLE_ENCODER_H + +#include "simple_coder.h" + + +extern lzma_ret lzma_simple_props_size(uint32_t *size, const void *options); + +extern lzma_ret lzma_simple_props_encode(const void *options, uint8_t *out); + +#endif diff --git a/src/liblzma/simple/simple_private.h b/src/liblzma/simple/simple_private.h new file mode 100644 index 000000000000..a69f82785fba --- /dev/null +++ b/src/liblzma/simple/simple_private.h @@ -0,0 +1,76 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file simple_private.h +/// \brief Private definitions for so called simple filters +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_SIMPLE_PRIVATE_H +#define LZMA_SIMPLE_PRIVATE_H + +#include "simple_coder.h" + + +typedef struct lzma_simple_s lzma_simple; + +struct lzma_coder_s { + /// Next filter in the chain + lzma_next_coder next; + + /// True if the next coder in the chain has returned LZMA_STREAM_END + /// or if we have processed uncompressed_size bytes. + bool end_was_reached; + + /// True if filter() should encode the data; false to decode. + /// Currently all simple filters use the same function for encoding + /// and decoding, because the difference between encoders and decoders + /// is very small. + bool is_encoder; + + /// Pointer to filter-specific function, which does + /// the actual filtering. + size_t (*filter)(lzma_simple *simple, uint32_t now_pos, + bool is_encoder, uint8_t *buffer, size_t size); + + /// Pointer to filter-specific data, or NULL if filter doesn't need + /// any extra data. + lzma_simple *simple; + + /// The lowest 32 bits of the current position in the data. Most + /// filters need this to do conversions between absolute and relative + /// addresses. + uint32_t now_pos; + + /// Size of the memory allocated for the buffer. + size_t allocated; + + /// Flushing position in the temporary buffer. buffer[pos] is the + /// next byte to be copied to out[]. + size_t pos; + + /// buffer[filtered] is the first unfiltered byte. When pos is smaller + /// than filtered, there is unflushed filtered data in the buffer. + size_t filtered; + + /// Total number of bytes (both filtered and unfiltered) currently + /// in the temporary buffer. + size_t size; + + /// Temporary buffer + uint8_t buffer[]; +}; + + +extern lzma_ret lzma_simple_coder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters, + size_t (*filter)(lzma_simple *simple, uint32_t now_pos, + bool is_encoder, uint8_t *buffer, size_t size), + size_t simple_size, size_t unfiltered_max, + uint32_t alignment, bool is_encoder); + +#endif diff --git a/src/liblzma/simple/sparc.c b/src/liblzma/simple/sparc.c new file mode 100644 index 000000000000..808a59aaac78 --- /dev/null +++ b/src/liblzma/simple/sparc.c @@ -0,0 +1,81 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file sparc.c +/// \brief Filter for SPARC binaries +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +static size_t +sparc_code(lzma_simple *simple lzma_attribute((unused)), + uint32_t now_pos, bool is_encoder, + uint8_t *buffer, size_t size) +{ + size_t i; + for (i = 0; i + 4 <= size; i += 4) { + + if ((buffer[i] == 0x40 && (buffer[i + 1] & 0xC0) == 0x00) + || (buffer[i] == 0x7F + && (buffer[i + 1] & 0xC0) == 0xC0)) { + + uint32_t src = ((uint32_t)buffer[i + 0] << 24) + | ((uint32_t)buffer[i + 1] << 16) + | ((uint32_t)buffer[i + 2] << 8) + | ((uint32_t)buffer[i + 3]); + + src <<= 2; + + uint32_t dest; + if (is_encoder) + dest = now_pos + (uint32_t)(i) + src; + else + dest = src - (now_pos + (uint32_t)(i)); + + dest >>= 2; + + dest = (((0 - ((dest >> 22) & 1)) << 22) & 0x3FFFFFFF) + | (dest & 0x3FFFFF) + | 0x40000000; + + buffer[i + 0] = (uint8_t)(dest >> 24); + buffer[i + 1] = (uint8_t)(dest >> 16); + buffer[i + 2] = (uint8_t)(dest >> 8); + buffer[i + 3] = (uint8_t)(dest); + } + } + + return i; +} + + +static lzma_ret +sparc_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + return lzma_simple_coder_init(next, allocator, filters, + &sparc_code, 0, 4, 4, is_encoder); +} + + +extern lzma_ret +lzma_simple_sparc_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + return sparc_coder_init(next, allocator, filters, true); +} + + +extern lzma_ret +lzma_simple_sparc_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + return sparc_coder_init(next, allocator, filters, false); +} diff --git a/src/liblzma/simple/x86.c b/src/liblzma/simple/x86.c new file mode 100644 index 000000000000..5d1509bb2278 --- /dev/null +++ b/src/liblzma/simple/x86.c @@ -0,0 +1,154 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file x86.c +/// \brief Filter for x86 binaries (BCJ filter) +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +#define Test86MSByte(b) ((b) == 0 || (b) == 0xFF) + + +struct lzma_simple_s { + uint32_t prev_mask; + uint32_t prev_pos; +}; + + +static size_t +x86_code(lzma_simple *simple, uint32_t now_pos, bool is_encoder, + uint8_t *buffer, size_t size) +{ + static const bool MASK_TO_ALLOWED_STATUS[8] + = { true, true, true, false, true, false, false, false }; + + static const uint32_t MASK_TO_BIT_NUMBER[8] + = { 0, 1, 2, 2, 3, 3, 3, 3 }; + + uint32_t prev_mask = simple->prev_mask; + uint32_t prev_pos = simple->prev_pos; + + if (size < 5) + return 0; + + if (now_pos - prev_pos > 5) + prev_pos = now_pos - 5; + + const size_t limit = size - 5; + size_t buffer_pos = 0; + + while (buffer_pos <= limit) { + uint8_t b = buffer[buffer_pos]; + if (b != 0xE8 && b != 0xE9) { + ++buffer_pos; + continue; + } + + const uint32_t offset = now_pos + (uint32_t)(buffer_pos) + - prev_pos; + prev_pos = now_pos + (uint32_t)(buffer_pos); + + if (offset > 5) { + prev_mask = 0; + } else { + for (uint32_t i = 0; i < offset; ++i) { + prev_mask &= 0x77; + prev_mask <<= 1; + } + } + + b = buffer[buffer_pos + 4]; + + if (Test86MSByte(b) + && MASK_TO_ALLOWED_STATUS[(prev_mask >> 1) & 0x7] + && (prev_mask >> 1) < 0x10) { + + uint32_t src = ((uint32_t)(b) << 24) + | ((uint32_t)(buffer[buffer_pos + 3]) << 16) + | ((uint32_t)(buffer[buffer_pos + 2]) << 8) + | (buffer[buffer_pos + 1]); + + uint32_t dest; + while (true) { + if (is_encoder) + dest = src + (now_pos + (uint32_t)( + buffer_pos) + 5); + else + dest = src - (now_pos + (uint32_t)( + buffer_pos) + 5); + + if (prev_mask == 0) + break; + + const uint32_t i = MASK_TO_BIT_NUMBER[ + prev_mask >> 1]; + + b = (uint8_t)(dest >> (24 - i * 8)); + + if (!Test86MSByte(b)) + break; + + src = dest ^ ((1 << (32 - i * 8)) - 1); + } + + buffer[buffer_pos + 4] + = (uint8_t)(~(((dest >> 24) & 1) - 1)); + buffer[buffer_pos + 3] = (uint8_t)(dest >> 16); + buffer[buffer_pos + 2] = (uint8_t)(dest >> 8); + buffer[buffer_pos + 1] = (uint8_t)(dest); + buffer_pos += 5; + prev_mask = 0; + + } else { + ++buffer_pos; + prev_mask |= 1; + if (Test86MSByte(b)) + prev_mask |= 0x10; + } + } + + simple->prev_mask = prev_mask; + simple->prev_pos = prev_pos; + + return buffer_pos; +} + + +static lzma_ret +x86_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + const lzma_ret ret = lzma_simple_coder_init(next, allocator, filters, + &x86_code, sizeof(lzma_simple), 5, 1, is_encoder); + + if (ret == LZMA_OK) { + next->coder->simple->prev_mask = 0; + next->coder->simple->prev_pos = (uint32_t)(-5); + } + + return ret; +} + + +extern lzma_ret +lzma_simple_x86_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return x86_coder_init(next, allocator, filters, true); +} + + +extern lzma_ret +lzma_simple_x86_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return x86_coder_init(next, allocator, filters, false); +} diff --git a/src/liblzma/subblock/subblock_decoder.c b/src/liblzma/subblock/subblock_decoder.c new file mode 100644 index 000000000000..e055cee3ea78 --- /dev/null +++ b/src/liblzma/subblock/subblock_decoder.c @@ -0,0 +1,630 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file subblock_decoder.c +/// \brief Decoder of the Subblock filter +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "subblock_decoder.h" +#include "subblock_decoder_helper.h" +#include "filter_decoder.h" + + +/// Maximum number of consecutive Subblocks with Subblock Type Padding +#define PADDING_MAX 31 + + +struct lzma_coder_s { + lzma_next_coder next; + + enum { + // These require that there is at least one input + // byte available. + SEQ_FLAGS, + SEQ_FILTER_FLAGS, + SEQ_FILTER_END, + SEQ_REPEAT_COUNT_1, + SEQ_REPEAT_COUNT_2, + SEQ_REPEAT_COUNT_3, + SEQ_REPEAT_SIZE, + SEQ_REPEAT_READ_DATA, + SEQ_SIZE_1, + SEQ_SIZE_2, + SEQ_SIZE_3, // This must be right before SEQ_DATA. + + // These don't require any input to be available. + SEQ_DATA, + SEQ_REPEAT_FAST, + SEQ_REPEAT_NORMAL, + } sequence; + + /// Number of bytes left in the current Subblock Data field. + size_t size; + + /// Number of consecutive Subblocks with Subblock Type Padding + uint32_t padding; + + /// True when .next.code() has returned LZMA_STREAM_END. + bool next_finished; + + /// True when the Subblock decoder has detected End of Payload Marker. + /// This may become true before next_finished becomes true. + bool this_finished; + + /// True if Subfilters are allowed. + bool allow_subfilters; + + /// Indicates if at least one byte of decoded output has been + /// produced after enabling Subfilter. + bool got_output_with_subfilter; + + /// Possible subfilter + lzma_next_coder subfilter; + + /// Filter Flags decoder is needed to parse the ID and Properties + /// of the subfilter. + lzma_next_coder filter_flags_decoder; + + /// The filter_flags_decoder stores its results here. + lzma_filter filter_flags; + + /// Options for the Subblock decoder helper. This is used to tell + /// the helper when it should return LZMA_STREAM_END to the subfilter. + lzma_options_subblock_helper helper; + + struct { + /// How many times buffer should be repeated + size_t count; + + /// Size of the buffer + size_t size; + + /// Position in the buffer + size_t pos; + + /// Buffer to hold the data to be repeated + uint8_t buffer[LZMA_SUBBLOCK_RLE_MAX]; + } repeat; + + /// Temporary buffer needed when the Subblock filter is not the last + /// filter in the chain. The output of the next filter is first + /// decoded into buffer[], which is then used as input for the actual + /// Subblock decoder. + struct { + size_t pos; + size_t size; + uint8_t buffer[LZMA_BUFFER_SIZE]; + } temp; +}; + + +/// Values of valid Subblock Flags +enum { + FLAG_PADDING, + FLAG_EOPM, + FLAG_DATA, + FLAG_REPEAT, + FLAG_SET_SUBFILTER, + FLAG_END_SUBFILTER, +}; + + +/// Calls the subfilter and updates coder->uncompressed_size. +static lzma_ret +subfilter_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + assert(coder->subfilter.code != NULL); + + // Call the subfilter. + const lzma_ret ret = coder->subfilter.code( + coder->subfilter.coder, allocator, + in, in_pos, in_size, out, out_pos, out_size, action); + + return ret; +} + + +static lzma_ret +decode_buffer(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + while (*out_pos < out_size && (*in_pos < in_size + || coder->sequence >= SEQ_DATA)) + switch (coder->sequence) { + case SEQ_FLAGS: { + // Do the correct action depending on the Subblock Type. + switch (in[*in_pos] >> 4) { + case FLAG_PADDING: + // Only check that reserved bits are zero. + if (++coder->padding > PADDING_MAX + || in[*in_pos] & 0x0F) + return LZMA_DATA_ERROR; + ++*in_pos; + break; + + case FLAG_EOPM: + // There must be no Padding before EOPM. + if (coder->padding != 0) + return LZMA_DATA_ERROR; + + // Check that reserved bits are zero. + if (in[*in_pos] & 0x0F) + return LZMA_DATA_ERROR; + + // There must be no Subfilter enabled. + if (coder->subfilter.code != NULL) + return LZMA_DATA_ERROR; + + ++*in_pos; + return LZMA_STREAM_END; + + case FLAG_DATA: + // First four bits of the Subblock Data size. + coder->size = in[*in_pos] & 0x0F; + ++*in_pos; + coder->got_output_with_subfilter = true; + coder->sequence = SEQ_SIZE_1; + break; + + case FLAG_REPEAT: + // First four bits of the Repeat Count. We use + // coder->size as a temporary place for it. + coder->size = in[*in_pos] & 0x0F; + ++*in_pos; + coder->got_output_with_subfilter = true; + coder->sequence = SEQ_REPEAT_COUNT_1; + break; + + case FLAG_SET_SUBFILTER: { + if (coder->padding != 0 || (in[*in_pos] & 0x0F) + || coder->subfilter.code != NULL + || !coder->allow_subfilters) + return LZMA_DATA_ERROR; + + assert(coder->filter_flags.options == NULL); + abort(); +// return_if_error(lzma_filter_flags_decoder_init( +// &coder->filter_flags_decoder, +// allocator, &coder->filter_flags)); + + coder->got_output_with_subfilter = false; + + ++*in_pos; + coder->sequence = SEQ_FILTER_FLAGS; + break; + } + + case FLAG_END_SUBFILTER: { + if (coder->padding != 0 || (in[*in_pos] & 0x0F) + || coder->subfilter.code == NULL + || !coder->got_output_with_subfilter) + return LZMA_DATA_ERROR; + + // Tell the helper filter to indicate End of Input + // to our subfilter. + coder->helper.end_was_reached = true; + + size_t dummy = 0; + const lzma_ret ret = subfilter_decode(coder, allocator, + NULL, &dummy, 0, out, out_pos,out_size, + action); + + // If we didn't reach the end of the subfilter's output + // yet, return to the application. On the next call we + // will get to this same switch-case again, because we + // haven't updated *in_pos yet. + if (ret != LZMA_STREAM_END) + return ret; + + // Free Subfilter's memory. This is a bit debatable, + // since we could avoid some malloc()/free() calls + // if the same Subfilter gets used soon again. But + // if Subfilter isn't used again, we could leave + // a memory-hogging filter dangling until someone + // frees Subblock filter itself. + lzma_next_end(&coder->subfilter, allocator); + + // Free memory used for subfilter options. This is + // safe, because we don't support any Subfilter that + // would allow pointers in the options structure. + lzma_free(coder->filter_flags.options, allocator); + coder->filter_flags.options = NULL; + + ++*in_pos; + + break; + } + + default: + return LZMA_DATA_ERROR; + } + + break; + } + + case SEQ_FILTER_FLAGS: { + const lzma_ret ret = coder->filter_flags_decoder.code( + coder->filter_flags_decoder.coder, allocator, + in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN); + if (ret != LZMA_STREAM_END) + return ret == LZMA_OPTIONS_ERROR + ? LZMA_DATA_ERROR : ret; + + // Don't free the filter_flags_decoder. It doesn't take much + // memory and we may need it again. + + // Initialize the Subfilter. Subblock and Copy filters are + // not allowed. + if (coder->filter_flags.id == LZMA_FILTER_SUBBLOCK) + return LZMA_DATA_ERROR; + + coder->helper.end_was_reached = false; + + lzma_filter filters[3] = { + { + .id = coder->filter_flags.id, + .options = coder->filter_flags.options, + }, { + .id = LZMA_FILTER_SUBBLOCK_HELPER, + .options = &coder->helper, + }, { + .id = LZMA_VLI_UNKNOWN, + .options = NULL, + } + }; + + // Optimization: We know that LZMA uses End of Payload Marker + // (not End of Input), so we can omit the helper filter. + if (filters[0].id == LZMA_FILTER_LZMA1) + filters[1].id = LZMA_VLI_UNKNOWN; + + return_if_error(lzma_raw_decoder_init( + &coder->subfilter, allocator, filters)); + + coder->sequence = SEQ_FLAGS; + break; + } + + case SEQ_FILTER_END: + // We are in the beginning of a Subblock. The next Subblock + // whose type is not Padding, must indicate end of Subfilter. + if (in[*in_pos] == (FLAG_PADDING << 4)) { + ++*in_pos; + break; + } + + if (in[*in_pos] != (FLAG_END_SUBFILTER << 4)) + return LZMA_DATA_ERROR; + + coder->sequence = SEQ_FLAGS; + break; + + case SEQ_REPEAT_COUNT_1: + case SEQ_SIZE_1: + // We use the same code to parse + // - the Size (28 bits) in Subblocks of type Data; and + // - the Repeat count (28 bits) in Subblocks of type + // Repeating Data. + coder->size |= (size_t)(in[*in_pos]) << 4; + ++*in_pos; + ++coder->sequence; + break; + + case SEQ_REPEAT_COUNT_2: + case SEQ_SIZE_2: + coder->size |= (size_t)(in[*in_pos]) << 12; + ++*in_pos; + ++coder->sequence; + break; + + case SEQ_REPEAT_COUNT_3: + case SEQ_SIZE_3: + coder->size |= (size_t)(in[*in_pos]) << 20; + ++*in_pos; + + // The real value is the stored value plus one. + ++coder->size; + + // This moves to SEQ_REPEAT_SIZE or SEQ_DATA. That's why + // SEQ_DATA must be right after SEQ_SIZE_3 in coder->sequence. + ++coder->sequence; + break; + + case SEQ_REPEAT_SIZE: + // Move the Repeat Count to the correct variable and parse + // the Size of the Data to be repeated. + coder->repeat.count = coder->size; + coder->repeat.size = (size_t)(in[*in_pos]) + 1; + coder->repeat.pos = 0; + + // The size of the Data field must be bigger than the number + // of Padding bytes before this Subblock. + if (coder->repeat.size <= coder->padding) + return LZMA_DATA_ERROR; + + ++*in_pos; + coder->padding = 0; + coder->sequence = SEQ_REPEAT_READ_DATA; + break; + + case SEQ_REPEAT_READ_DATA: { + // Fill coder->repeat.buffer[]. + const size_t in_avail = in_size - *in_pos; + const size_t out_avail + = coder->repeat.size - coder->repeat.pos; + const size_t copy_size = MIN(in_avail, out_avail); + + memcpy(coder->repeat.buffer + coder->repeat.pos, + in + *in_pos, copy_size); + *in_pos += copy_size; + coder->repeat.pos += copy_size; + + if (coder->repeat.pos == coder->repeat.size) { + coder->repeat.pos = 0; + + if (coder->repeat.size == 1 + && coder->subfilter.code == NULL) + coder->sequence = SEQ_REPEAT_FAST; + else + coder->sequence = SEQ_REPEAT_NORMAL; + } + + break; + } + + case SEQ_DATA: { + // The size of the Data field must be bigger than the number + // of Padding bytes before this Subblock. + assert(coder->size > 0); + if (coder->size <= coder->padding) + return LZMA_DATA_ERROR; + + coder->padding = 0; + + // Limit the amount of input to match the available + // Subblock Data size. + size_t in_limit; + if (in_size - *in_pos > coder->size) + in_limit = *in_pos + coder->size; + else + in_limit = in_size; + + if (coder->subfilter.code == NULL) { + const size_t copy_size = lzma_bufcpy( + in, in_pos, in_limit, + out, out_pos, out_size); + + coder->size -= copy_size; + } else { + const size_t in_start = *in_pos; + const lzma_ret ret = subfilter_decode( + coder, allocator, + in, in_pos, in_limit, + out, out_pos, out_size, + action); + + // Update the number of unprocessed bytes left in + // this Subblock. This assert() is true because + // in_limit prevents *in_pos getting too big. + assert(*in_pos - in_start <= coder->size); + coder->size -= *in_pos - in_start; + + if (ret == LZMA_STREAM_END) { + // End of Subfilter can occur only at + // a Subblock boundary. + if (coder->size != 0) + return LZMA_DATA_ERROR; + + // We need a Subblock with Unset + // Subfilter before more data. + coder->sequence = SEQ_FILTER_END; + break; + } + + if (ret != LZMA_OK) + return ret; + } + + // If we couldn't process the whole Subblock Data yet, return. + if (coder->size > 0) + return LZMA_OK; + + coder->sequence = SEQ_FLAGS; + break; + } + + case SEQ_REPEAT_FAST: { + // Optimization for cases when there is only one byte to + // repeat and no Subfilter. + const size_t out_avail = out_size - *out_pos; + const size_t copy_size = MIN(coder->repeat.count, out_avail); + + memset(out + *out_pos, coder->repeat.buffer[0], copy_size); + + *out_pos += copy_size; + coder->repeat.count -= copy_size; + + if (coder->repeat.count != 0) + return LZMA_OK; + + coder->sequence = SEQ_FLAGS; + break; + } + + case SEQ_REPEAT_NORMAL: + do { + // Cycle the repeat buffer if needed. + if (coder->repeat.pos == coder->repeat.size) { + if (--coder->repeat.count == 0) { + coder->sequence = SEQ_FLAGS; + break; + } + + coder->repeat.pos = 0; + } + + if (coder->subfilter.code == NULL) { + lzma_bufcpy(coder->repeat.buffer, + &coder->repeat.pos, + coder->repeat.size, + out, out_pos, out_size); + } else { + const lzma_ret ret = subfilter_decode( + coder, allocator, + coder->repeat.buffer, + &coder->repeat.pos, + coder->repeat.size, + out, out_pos, out_size, + action); + + if (ret == LZMA_STREAM_END) { + // End of Subfilter can occur only at + // a Subblock boundary. + if (coder->repeat.pos + != coder->repeat.size + || --coder->repeat + .count != 0) + return LZMA_DATA_ERROR; + + // We need a Subblock with Unset + // Subfilter before more data. + coder->sequence = SEQ_FILTER_END; + break; + + } else if (ret != LZMA_OK) { + return ret; + } + } + } while (*out_pos < out_size); + + break; + + default: + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static lzma_ret +subblock_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + if (coder->next.code == NULL) + return decode_buffer(coder, allocator, in, in_pos, in_size, + out, out_pos, out_size, action); + + while (*out_pos < out_size) { + if (!coder->next_finished + && coder->temp.pos == coder->temp.size) { + coder->temp.pos = 0; + coder->temp.size = 0; + + const lzma_ret ret = coder->next.code( + coder->next.coder, + allocator, in, in_pos, in_size, + coder->temp.buffer, &coder->temp.size, + LZMA_BUFFER_SIZE, action); + + if (ret == LZMA_STREAM_END) + coder->next_finished = true; + else if (coder->temp.size == 0 || ret != LZMA_OK) + return ret; + } + + if (coder->this_finished) { + if (coder->temp.pos != coder->temp.size) + return LZMA_DATA_ERROR; + + if (coder->next_finished) + return LZMA_STREAM_END; + + return LZMA_OK; + } + + const lzma_ret ret = decode_buffer(coder, allocator, + coder->temp.buffer, &coder->temp.pos, + coder->temp.size, + out, out_pos, out_size, action); + + if (ret == LZMA_STREAM_END) + // The next coder in the chain hasn't finished + // yet. If the input data is valid, there + // must be no more output coming, but the + // next coder may still need a litle more + // input to detect End of Payload Marker. + coder->this_finished = true; + else if (ret != LZMA_OK) + return ret; + else if (coder->next_finished && *out_pos < out_size) + return LZMA_DATA_ERROR; + } + + return LZMA_OK; +} + + +static void +subblock_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->next, allocator); + lzma_next_end(&coder->subfilter, allocator); + lzma_next_end(&coder->filter_flags_decoder, allocator); + lzma_free(coder->filter_flags.options, allocator); + lzma_free(coder, allocator); + return; +} + + +extern lzma_ret +lzma_subblock_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &subblock_decode; + next->end = &subblock_decoder_end; + + next->coder->next = LZMA_NEXT_CODER_INIT; + next->coder->subfilter = LZMA_NEXT_CODER_INIT; + next->coder->filter_flags_decoder = LZMA_NEXT_CODER_INIT; + + } else { + lzma_next_end(&next->coder->subfilter, allocator); + lzma_free(next->coder->filter_flags.options, allocator); + } + + next->coder->filter_flags.options = NULL; + + next->coder->sequence = SEQ_FLAGS; + next->coder->padding = 0; + next->coder->next_finished = false; + next->coder->this_finished = false; + next->coder->temp.pos = 0; + next->coder->temp.size = 0; + + if (filters[0].options != NULL) + next->coder->allow_subfilters = ((lzma_options_subblock *)( + filters[0].options))->allow_subfilters; + else + next->coder->allow_subfilters = false; + + return lzma_next_filter_init( + &next->coder->next, allocator, filters + 1); +} diff --git a/src/liblzma/subblock/subblock_decoder.h b/src/liblzma/subblock/subblock_decoder.h new file mode 100644 index 000000000000..d1030b2b94f5 --- /dev/null +++ b/src/liblzma/subblock/subblock_decoder.h @@ -0,0 +1,22 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file subblock_decoder.h +/// \brief Decoder of the Subblock filter +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_SUBBLOCK_DECODER_H +#define LZMA_SUBBLOCK_DECODER_H + +#include "common.h" + + +extern lzma_ret lzma_subblock_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +#endif diff --git a/src/liblzma/subblock/subblock_decoder_helper.c b/src/liblzma/subblock/subblock_decoder_helper.c new file mode 100644 index 000000000000..2a864eddad9f --- /dev/null +++ b/src/liblzma/subblock/subblock_decoder_helper.c @@ -0,0 +1,70 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file subblock_decoder_helper.c +/// \brief Helper filter for the Subblock decoder +/// +/// This filter is used to indicate End of Input for subfilters needing it. +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "subblock_decoder_helper.h" + + +struct lzma_coder_s { + const lzma_options_subblock_helper *options; +}; + + +static lzma_ret +helper_decode(lzma_coder *coder, + lzma_allocator *allocator lzma_attribute((unused)), + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + lzma_action action lzma_attribute((unused))) +{ + // If end_was_reached is true, we cannot have any input. + assert(!coder->options->end_was_reached || *in_pos == in_size); + + // We can safely copy as much as possible, because we are never + // given more data than a single Subblock Data field. + lzma_bufcpy(in, in_pos, in_size, out, out_pos, out_size); + + // Return LZMA_STREAM_END when instructed so by the Subblock decoder. + return coder->options->end_was_reached ? LZMA_STREAM_END : LZMA_OK; +} + + +static void +helper_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_free(coder, allocator); + return; +} + + +extern lzma_ret +lzma_subblock_decoder_helper_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + // This is always the last filter in the chain. + assert(filters[1].init == NULL); + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &helper_decode; + next->end = &helper_end; + } + + next->coder->options = filters[0].options; + + return LZMA_OK; +} diff --git a/src/liblzma/subblock/subblock_decoder_helper.h b/src/liblzma/subblock/subblock_decoder_helper.h new file mode 100644 index 000000000000..18dcbb39a397 --- /dev/null +++ b/src/liblzma/subblock/subblock_decoder_helper.h @@ -0,0 +1,29 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file subblock_decoder_helper.h +/// \brief Helper filter for the Subblock decoder +/// +/// This filter is used to indicate End of Input for subfilters needing it. +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_SUBBLOCK_DECODER_HELPER_H +#define LZMA_SUBBLOCK_DECODER_HELPER_H + +#include "common.h" + + +typedef struct { + bool end_was_reached; +} lzma_options_subblock_helper; + + +extern lzma_ret lzma_subblock_decoder_helper_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +#endif diff --git a/src/liblzma/subblock/subblock_encoder.c b/src/liblzma/subblock/subblock_encoder.c new file mode 100644 index 000000000000..4f71f99cc639 --- /dev/null +++ b/src/liblzma/subblock/subblock_encoder.c @@ -0,0 +1,984 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file subblock_encoder.c +/// \brief Encoder of the Subblock filter +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "subblock_encoder.h" +#include "filter_encoder.h" + + +/// Maximum number of repeats that a single Repeating Data can indicate. +/// This is directly from the file format specification. +#define REPEAT_COUNT_MAX (1U << 28) + +/// Number of bytes the data chunk (not including the header part) must be +/// before we care about alignment. This is somewhat arbitrary. It just +/// doesn't make sense to waste bytes for alignment when the data chunk +/// is very small. +#define MIN_CHUNK_SIZE_FOR_ALIGN 4 + +/// Number of bytes of the header part of Subblock Type `Data'. This is +/// used as the `skew' argument for subblock_align(). +#define ALIGN_SKEW_DATA 4 + +/// Like above but for Repeating Data. +#define ALIGN_SKEW_REPEATING_DATA 5 + +/// Writes one byte to output buffer and updates the alignment counter. +#define write_byte(b) \ +do { \ + assert(*out_pos < out_size); \ + out[*out_pos] = b; \ + ++*out_pos; \ + ++coder->alignment.out_pos; \ +} while (0) + + +struct lzma_coder_s { + lzma_next_coder next; + bool next_finished; + + enum { + SEQ_FILL, + SEQ_FLUSH, + SEQ_RLE_COUNT_0, + SEQ_RLE_COUNT_1, + SEQ_RLE_COUNT_2, + SEQ_RLE_COUNT_3, + SEQ_RLE_SIZE, + SEQ_RLE_DATA, + SEQ_DATA_SIZE_0, + SEQ_DATA_SIZE_1, + SEQ_DATA_SIZE_2, + SEQ_DATA_SIZE_3, + SEQ_DATA, + SEQ_SUBFILTER_INIT, + SEQ_SUBFILTER_FLAGS, + } sequence; + + /// Pointer to the options given by the application. This is used + /// for two-way communication with the application. + lzma_options_subblock *options; + + /// Position in various arrays. + size_t pos; + + /// Holds subblock.size - 1 or rle.size - 1 when encoding size + /// of Data or Repeat Count. + uint32_t tmp; + + struct { + /// This is a copy of options->alignment, or + /// LZMA_SUBBLOCK_ALIGNMENT_DEFAULT if options is NULL. + uint32_t multiple; + + /// Number of input bytes which we have processed and started + /// writing out. 32-bit integer is enough since we care only + /// about the lowest bits when fixing alignment. + uint32_t in_pos; + + /// Number of bytes written out. + uint32_t out_pos; + } alignment; + + struct { + /// Pointer to allocated buffer holding the Data field + /// of Subblock Type "Data". + uint8_t *data; + + /// Number of bytes in the buffer. + size_t size; + + /// Allocated size of the buffer. + size_t limit; + + /// Number of input bytes that we have already read but + /// not yet started writing out. This can be different + /// to `size' when using Subfilter. That's why we track + /// in_pending separately for RLE (see below). + uint32_t in_pending; + } subblock; + + struct { + /// Buffer to hold the data that may be coded with + /// Subblock Type `Repeating Data'. + uint8_t buffer[LZMA_SUBBLOCK_RLE_MAX]; + + /// Number of bytes in buffer[]. + size_t size; + + /// Number of times the first `size' bytes of buffer[] + /// will be repeated. + uint64_t count; + + /// Like subblock.in_pending above, but for RLE. + uint32_t in_pending; + } rle; + + struct { + enum { + SUB_NONE, + SUB_SET, + SUB_RUN, + SUB_FLUSH, + SUB_FINISH, + SUB_END_MARKER, + } mode; + + /// This is a copy of options->allow_subfilters. We use + /// this to verify that the application doesn't change + /// the value of allow_subfilters. + bool allow; + + /// When this is true, application is not allowed to modify + /// options->subblock_mode. We may still modify it here. + bool mode_locked; + + /// True if we have encoded at least one byte of data with + /// the Subfilter. + bool got_input; + + /// Track the amount of input available once + /// LZMA_SUBFILTER_FINISH has been enabled. + /// This is needed for sanity checking (kind + /// of duplicating what common/code.c does). + size_t in_avail; + + /// Buffer for the Filter Flags field written after + /// the `Set Subfilter' indicator. + uint8_t *flags; + + /// Size of Filter Flags field. + uint32_t flags_size; + + /// Pointers to Subfilter. + lzma_next_coder subcoder; + + } subfilter; + + /// Temporary buffer used when we are not the last filter in the chain. + struct { + size_t pos; + size_t size; + uint8_t buffer[LZMA_BUFFER_SIZE]; + } temp; +}; + + +/// \brief Aligns the output buffer +/// +/// Aligns the output buffer so that after skew bytes the output position is +/// a multiple of coder->alignment.multiple. +static bool +subblock_align(lzma_coder *coder, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + size_t chunk_size, uint32_t skew) +{ + assert(*out_pos < out_size); + + // Fix the alignment only if it makes sense at least a little. + if (chunk_size >= MIN_CHUNK_SIZE_FOR_ALIGN) { + const uint32_t target = coder->alignment.in_pos + % coder->alignment.multiple; + + while ((coder->alignment.out_pos + skew) + % coder->alignment.multiple != target) { + // Zero indicates padding. + write_byte(0x00); + + // Check if output buffer got full and indicate it to + // the caller. + if (*out_pos == out_size) + return true; + } + } + + // Output buffer is not full. + return false; +} + + +/// \brief Checks if buffer contains repeated data +/// +/// \param needle Buffer containing a single repeat chunk +/// \param needle_size Size of needle in bytes +/// \param buf Buffer to search for repeated needles +/// \param buf_chunks Buffer size is buf_chunks * needle_size. +/// +/// \return True if the whole buf is filled with repeated needles. +/// +static bool +is_repeating(const uint8_t *restrict needle, size_t needle_size, + const uint8_t *restrict buf, size_t buf_chunks) +{ + while (buf_chunks-- != 0) { + if (memcmp(buf, needle, needle_size) != 0) + return false; + + buf += needle_size; + } + + return true; +} + + +/// \brief Optimizes the repeating style and updates coder->sequence +static void +subblock_rle_flush(lzma_coder *coder) +{ + // The Subblock decoder can use memset() when the size of the data + // being repeated is one byte, so we check if the RLE buffer is + // filled with a single repeating byte. + if (coder->rle.size > 1) { + const uint8_t b = coder->rle.buffer[0]; + size_t i = 0; + while (true) { + if (coder->rle.buffer[i] != b) + break; + + if (++i == coder->rle.size) { + // TODO Integer overflow check maybe, + // although this needs at least 2**63 bytes + // of input until it gets triggered... + coder->rle.count *= coder->rle.size; + coder->rle.size = 1; + break; + } + } + } + + if (coder->rle.count == 1) { + // The buffer should be repeated only once. It is + // waste of space to use Repeating Data. Instead, + // write a regular Data Subblock. See SEQ_RLE_COUNT_0 + // in subblock_buffer() for more info. + coder->tmp = coder->rle.size - 1; + } else if (coder->rle.count > REPEAT_COUNT_MAX) { + // There's so much to repeat that it doesn't fit into + // 28-bit integer. We will write two or more Subblocks + // of type Repeating Data. + coder->tmp = REPEAT_COUNT_MAX - 1; + } else { + coder->tmp = coder->rle.count - 1; + } + + coder->sequence = SEQ_RLE_COUNT_0; + + return; +} + + +/// \brief Resizes coder->subblock.data for a new size limit +static lzma_ret +subblock_data_size(lzma_coder *coder, lzma_allocator *allocator, + size_t new_limit) +{ + // Verify that the new limit is valid. + if (new_limit < LZMA_SUBBLOCK_DATA_SIZE_MIN + || new_limit > LZMA_SUBBLOCK_DATA_SIZE_MAX) + return LZMA_OPTIONS_ERROR; + + // Ff the new limit is different than the previous one, we need + // to reallocate the data buffer. + if (new_limit != coder->subblock.limit) { + lzma_free(coder->subblock.data, allocator); + coder->subblock.data = lzma_alloc(new_limit, allocator); + if (coder->subblock.data == NULL) + return LZMA_MEM_ERROR; + } + + coder->subblock.limit = new_limit; + + return LZMA_OK; +} + + +static lzma_ret +subblock_buffer(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + // Changing allow_subfilter is not allowed. + if (coder->options != NULL && coder->subfilter.allow + != coder->options->allow_subfilters) + return LZMA_PROG_ERROR; + + // Check if we need to do something special with the Subfilter. + if (coder->subfilter.allow) { + assert(coder->options != NULL); + + // See if subfilter_mode has been changed. + switch (coder->options->subfilter_mode) { + case LZMA_SUBFILTER_NONE: + if (coder->subfilter.mode != SUB_NONE) + return LZMA_PROG_ERROR; + break; + + case LZMA_SUBFILTER_SET: + if (coder->subfilter.mode_locked + || coder->subfilter.mode != SUB_NONE) + return LZMA_PROG_ERROR; + + coder->subfilter.mode = SUB_SET; + coder->subfilter.got_input = false; + + if (coder->sequence == SEQ_FILL) + coder->sequence = SEQ_FLUSH; + + break; + + case LZMA_SUBFILTER_RUN: + if (coder->subfilter.mode != SUB_RUN) + return LZMA_PROG_ERROR; + + break; + + case LZMA_SUBFILTER_FINISH: { + const size_t in_avail = in_size - *in_pos; + + if (coder->subfilter.mode == SUB_RUN) { + if (coder->subfilter.mode_locked) + return LZMA_PROG_ERROR; + + coder->subfilter.mode = SUB_FINISH; + coder->subfilter.in_avail = in_avail; + + } else if (coder->subfilter.mode != SUB_FINISH + || coder->subfilter.in_avail + != in_avail) { + return LZMA_PROG_ERROR; + } + + break; + } + + default: + return LZMA_OPTIONS_ERROR; + } + + // If we are sync-flushing or finishing, the application may + // no longer change subfilter_mode. Note that this check is + // done after checking the new subfilter_mode above; this + // way the application may e.g. set LZMA_SUBFILTER_SET and + // LZMA_SYNC_FLUSH at the same time, but it cannot modify + // subfilter_mode on the later lzma_code() calls before + // we have returned LZMA_STREAM_END. + if (action != LZMA_RUN) + coder->subfilter.mode_locked = true; + } + + // Main loop + while (*out_pos < out_size) + switch (coder->sequence) { + case SEQ_FILL: + // Grab the new Subblock Data Size and reallocate the buffer. + if (coder->subblock.size == 0 && coder->options != NULL + && coder->options->subblock_data_size + != coder->subblock.limit) + return_if_error(subblock_data_size(coder, + allocator, coder->options + ->subblock_data_size)); + + if (coder->subfilter.mode == SUB_NONE) { + assert(coder->subfilter.subcoder.code == NULL); + + // No Subfilter is enabled, just copy the data as is. + coder->subblock.in_pending += lzma_bufcpy( + in, in_pos, in_size, + coder->subblock.data, + &coder->subblock.size, + coder->subblock.limit); + + // If we ran out of input before the whole buffer + // was filled, return to application. + if (coder->subblock.size < coder->subblock.limit + && action == LZMA_RUN) + return LZMA_OK; + + } else { + assert(coder->options->subfilter_mode + != LZMA_SUBFILTER_SET); + + // Using LZMA_FINISH automatically toggles + // LZMA_SUBFILTER_FINISH. + // + // NOTE: It is possible that application had set + // LZMA_SUBFILTER_SET and LZMA_FINISH at the same + // time. In that case it is possible that we will + // cycle to LZMA_SUBFILTER_RUN, LZMA_SUBFILTER_FINISH, + // and back to LZMA_SUBFILTER_NONE in a single + // Subblock encoder function call. + if (action == LZMA_FINISH) { + coder->options->subfilter_mode + = LZMA_SUBFILTER_FINISH; + coder->subfilter.mode = SUB_FINISH; + } + + const size_t in_start = *in_pos; + + const lzma_ret ret = coder->subfilter.subcoder.code( + coder->subfilter.subcoder.coder, + allocator, in, in_pos, in_size, + coder->subblock.data, + &coder->subblock.size, + coder->subblock.limit, + coder->subfilter.mode == SUB_FINISH + ? LZMA_FINISH : action); + + const size_t in_used = *in_pos - in_start; + coder->subblock.in_pending += in_used; + if (in_used > 0) + coder->subfilter.got_input = true; + + coder->subfilter.in_avail = in_size - *in_pos; + + if (ret == LZMA_STREAM_END) { + // All currently available input must have + // been processed. + assert(*in_pos == in_size); + + // Flush now. Even if coder->subblock.size + // happened to be zero, we still need to go + // to SEQ_FLUSH to possibly finish RLE or + // write the Subfilter Unset indicator. + coder->sequence = SEQ_FLUSH; + + if (coder->subfilter.mode == SUB_RUN) { + // Flushing with Subfilter enabled. + assert(action == LZMA_SYNC_FLUSH); + coder->subfilter.mode = SUB_FLUSH; + break; + } + + // Subfilter finished its job. + assert(coder->subfilter.mode == SUB_FINISH + || action == LZMA_FINISH); + + // At least one byte of input must have been + // encoded with the Subfilter. This is + // required by the file format specification. + if (!coder->subfilter.got_input) + return LZMA_PROG_ERROR; + + // We don't strictly need to do this, but + // doing it sounds like a good idea, because + // otherwise the Subfilter's memory could be + // left allocated for long time, and would + // just waste memory. + lzma_next_end(&coder->subfilter.subcoder, + allocator); + + // We need to flush the currently buffered + // data and write Unset Subfilter marker. + // Note that we cannot set + // coder->options->subfilter_mode to + // LZMA_SUBFILTER_NONE yet, because we + // haven't written the Unset Subfilter + // marker yet. + coder->subfilter.mode = SUB_END_MARKER; + coder->sequence = SEQ_FLUSH; + break; + } + + // Return if we couldn't fill the buffer or + // if an error occurred. + if (coder->subblock.size < coder->subblock.limit + || ret != LZMA_OK) + return ret; + } + + coder->sequence = SEQ_FLUSH; + + // SEQ_FILL doesn't produce any output so falling through + // to SEQ_FLUSH is safe. + assert(*out_pos < out_size); + + // Fall through + + case SEQ_FLUSH: + if (coder->options != NULL) { + // Update the alignment variable. + coder->alignment.multiple = coder->options->alignment; + if (coder->alignment.multiple + < LZMA_SUBBLOCK_ALIGNMENT_MIN + || coder->alignment.multiple + > LZMA_SUBBLOCK_ALIGNMENT_MAX) + return LZMA_OPTIONS_ERROR; + + // Run-length encoder + // + // First check if there is some data pending and we + // have an obvious need to flush it immediately. + if (coder->rle.count > 0 + && (coder->rle.size + != coder->options->rle + || coder->subblock.size + % coder->rle.size)) { + subblock_rle_flush(coder); + break; + } + + // Grab the (possibly new) RLE chunk size and + // validate it. + coder->rle.size = coder->options->rle; + if (coder->rle.size > LZMA_SUBBLOCK_RLE_MAX) + return LZMA_OPTIONS_ERROR; + + if (coder->subblock.size != 0 + && coder->rle.size + != LZMA_SUBBLOCK_RLE_OFF + && coder->subblock.size + % coder->rle.size == 0) { + + // Initialize coder->rle.buffer if we don't + // have RLE already running. + if (coder->rle.count == 0) + memcpy(coder->rle.buffer, + coder->subblock.data, + coder->rle.size); + + // Test if coder->subblock.data is repeating. + // If coder->rle.count would overflow, we + // force flushing. Forced flushing shouldn't + // really happen in real-world situations. + const size_t count = coder->subblock.size + / coder->rle.size; + if (UINT64_MAX - count > coder->rle.count + && is_repeating( + coder->rle.buffer, + coder->rle.size, + coder->subblock.data, + count)) { + coder->rle.count += count; + coder->rle.in_pending += coder + ->subblock.in_pending; + coder->subblock.in_pending = 0; + coder->subblock.size = 0; + + } else if (coder->rle.count > 0) { + // It's not repeating or at least not + // with the same byte sequence as the + // earlier Subblock Data buffers. We + // have some data pending in the RLE + // buffer already, so do a flush. + // Once flushed, we will check again + // if the Subblock Data happens to + // contain a different repeating + // sequence. + subblock_rle_flush(coder); + break; + } + } + } + + // If we now have some data left in coder->subblock, the RLE + // buffer is empty and we must write a regular Subblock Data. + if (coder->subblock.size > 0) { + assert(coder->rle.count == 0); + coder->tmp = coder->subblock.size - 1; + coder->sequence = SEQ_DATA_SIZE_0; + break; + } + + // Check if we should enable Subfilter. + if (coder->subfilter.mode == SUB_SET) { + if (coder->rle.count > 0) + subblock_rle_flush(coder); + else + coder->sequence = SEQ_SUBFILTER_INIT; + break; + } + + // Check if we have just finished Subfiltering. + if (coder->subfilter.mode == SUB_END_MARKER) { + if (coder->rle.count > 0) { + subblock_rle_flush(coder); + break; + } + + coder->options->subfilter_mode = LZMA_SUBFILTER_NONE; + coder->subfilter.mode = SUB_NONE; + + write_byte(0x50); + if (*out_pos == out_size) + return LZMA_OK; + } + + // Check if we have already written everything. + if (action != LZMA_RUN && *in_pos == in_size + && (coder->subfilter.mode == SUB_NONE + || coder->subfilter.mode == SUB_FLUSH)) { + if (coder->rle.count > 0) { + subblock_rle_flush(coder); + break; + } + + if (action == LZMA_SYNC_FLUSH) { + if (coder->subfilter.mode == SUB_FLUSH) + coder->subfilter.mode = SUB_RUN; + + coder->subfilter.mode_locked = false; + coder->sequence = SEQ_FILL; + + } else { + assert(action == LZMA_FINISH); + + // Write EOPM. + // NOTE: No need to use write_byte() here + // since we are finishing. + out[*out_pos] = 0x10; + ++*out_pos; + } + + return LZMA_STREAM_END; + } + + // Otherwise we have more work to do. + coder->sequence = SEQ_FILL; + break; + + case SEQ_RLE_COUNT_0: + assert(coder->rle.count > 0); + + if (coder->rle.count == 1) { + // The buffer should be repeated only once. Fix + // the alignment and write the first byte of + // Subblock Type `Data'. + if (subblock_align(coder, out, out_pos, out_size, + coder->rle.size, ALIGN_SKEW_DATA)) + return LZMA_OK; + + write_byte(0x20 | (coder->tmp & 0x0F)); + + } else { + // We have something to actually repeat, which should + // mean that it takes less space with run-length + // encoding. + if (subblock_align(coder, out, out_pos, out_size, + coder->rle.size, + ALIGN_SKEW_REPEATING_DATA)) + return LZMA_OK; + + write_byte(0x30 | (coder->tmp & 0x0F)); + } + + // NOTE: If we have to write more than one Repeating Data + // due to rle.count > REPEAT_COUNT_MAX, the subsequent + // Repeating Data Subblocks may get wrong alignment, because + // we add rle.in_pending to alignment.in_pos at once instead + // of adding only as much as this particular Repeating Data + // consumed input data. Correct alignment is always restored + // after all the required Repeating Data Subblocks have been + // written. This problem occurs in such a weird cases that + // it's not worth fixing. + coder->alignment.out_pos += coder->rle.size; + coder->alignment.in_pos += coder->rle.in_pending; + coder->rle.in_pending = 0; + + coder->sequence = SEQ_RLE_COUNT_1; + break; + + case SEQ_RLE_COUNT_1: + write_byte(coder->tmp >> 4); + coder->sequence = SEQ_RLE_COUNT_2; + break; + + case SEQ_RLE_COUNT_2: + write_byte(coder->tmp >> 12); + coder->sequence = SEQ_RLE_COUNT_3; + break; + + case SEQ_RLE_COUNT_3: + write_byte(coder->tmp >> 20); + + // Again, see if we are writing regular Data or Repeating Data. + // In the former case, we skip SEQ_RLE_SIZE. + if (coder->rle.count == 1) + coder->sequence = SEQ_RLE_DATA; + else + coder->sequence = SEQ_RLE_SIZE; + + if (coder->rle.count > REPEAT_COUNT_MAX) + coder->rle.count -= REPEAT_COUNT_MAX; + else + coder->rle.count = 0; + + break; + + case SEQ_RLE_SIZE: + assert(coder->rle.size >= LZMA_SUBBLOCK_RLE_MIN); + assert(coder->rle.size <= LZMA_SUBBLOCK_RLE_MAX); + write_byte(coder->rle.size - 1); + coder->sequence = SEQ_RLE_DATA; + break; + + case SEQ_RLE_DATA: + lzma_bufcpy(coder->rle.buffer, &coder->pos, coder->rle.size, + out, out_pos, out_size); + if (coder->pos < coder->rle.size) + return LZMA_OK; + + coder->pos = 0; + coder->sequence = SEQ_FLUSH; + break; + + case SEQ_DATA_SIZE_0: + // We need four bytes for the Size field. + if (subblock_align(coder, out, out_pos, out_size, + coder->subblock.size, ALIGN_SKEW_DATA)) + return LZMA_OK; + + coder->alignment.out_pos += coder->subblock.size; + coder->alignment.in_pos += coder->subblock.in_pending; + coder->subblock.in_pending = 0; + + write_byte(0x20 | (coder->tmp & 0x0F)); + coder->sequence = SEQ_DATA_SIZE_1; + break; + + case SEQ_DATA_SIZE_1: + write_byte(coder->tmp >> 4); + coder->sequence = SEQ_DATA_SIZE_2; + break; + + case SEQ_DATA_SIZE_2: + write_byte(coder->tmp >> 12); + coder->sequence = SEQ_DATA_SIZE_3; + break; + + case SEQ_DATA_SIZE_3: + write_byte(coder->tmp >> 20); + coder->sequence = SEQ_DATA; + break; + + case SEQ_DATA: + lzma_bufcpy(coder->subblock.data, &coder->pos, + coder->subblock.size, out, out_pos, out_size); + if (coder->pos < coder->subblock.size) + return LZMA_OK; + + coder->subblock.size = 0; + coder->pos = 0; + coder->sequence = SEQ_FLUSH; + break; + + case SEQ_SUBFILTER_INIT: { + assert(coder->subblock.size == 0); + assert(coder->subblock.in_pending == 0); + assert(coder->rle.count == 0); + assert(coder->rle.in_pending == 0); + assert(coder->subfilter.mode == SUB_SET); + assert(coder->options != NULL); + + // There must be a filter specified. + if (coder->options->subfilter_options.id == LZMA_VLI_UNKNOWN) + return LZMA_OPTIONS_ERROR; + + // Initialize a raw encoder to work as a Subfilter. + lzma_filter options[2]; + options[0] = coder->options->subfilter_options; + options[1].id = LZMA_VLI_UNKNOWN; + + return_if_error(lzma_raw_encoder_init( + &coder->subfilter.subcoder, allocator, + options)); + + // Encode the Filter Flags field into a buffer. This should + // never fail since we have already successfully initialized + // the Subfilter itself. Check it still, and return + // LZMA_PROG_ERROR instead of whatever the ret would say. + lzma_ret ret = lzma_filter_flags_size( + &coder->subfilter.flags_size, options); + assert(ret == LZMA_OK); + if (ret != LZMA_OK) + return LZMA_PROG_ERROR; + + coder->subfilter.flags = lzma_alloc( + coder->subfilter.flags_size, allocator); + if (coder->subfilter.flags == NULL) + return LZMA_MEM_ERROR; + + // Now we have a big-enough buffer. Encode the Filter Flags. + // Like above, this should never fail. + size_t dummy = 0; + ret = lzma_filter_flags_encode(options, coder->subfilter.flags, + &dummy, coder->subfilter.flags_size); + assert(ret == LZMA_OK); + assert(dummy == coder->subfilter.flags_size); + if (ret != LZMA_OK || dummy != coder->subfilter.flags_size) + return LZMA_PROG_ERROR; + + // Write a Subblock indicating a new Subfilter. + write_byte(0x40); + + coder->options->subfilter_mode = LZMA_SUBFILTER_RUN; + coder->subfilter.mode = SUB_RUN; + coder->alignment.out_pos += coder->subfilter.flags_size; + coder->sequence = SEQ_SUBFILTER_FLAGS; + + // It is safe to fall through because SEQ_SUBFILTER_FLAGS + // uses lzma_bufcpy() which doesn't write unless there is + // output space. + } + + // Fall through + + case SEQ_SUBFILTER_FLAGS: + // Copy the Filter Flags to the output stream. + lzma_bufcpy(coder->subfilter.flags, &coder->pos, + coder->subfilter.flags_size, + out, out_pos, out_size); + if (coder->pos < coder->subfilter.flags_size) + return LZMA_OK; + + lzma_free(coder->subfilter.flags, allocator); + coder->subfilter.flags = NULL; + + coder->pos = 0; + coder->sequence = SEQ_FILL; + break; + + default: + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static lzma_ret +subblock_encode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + if (coder->next.code == NULL) + return subblock_buffer(coder, allocator, in, in_pos, in_size, + out, out_pos, out_size, action); + + while (*out_pos < out_size + && (*in_pos < in_size || action != LZMA_RUN)) { + if (!coder->next_finished + && coder->temp.pos == coder->temp.size) { + coder->temp.pos = 0; + coder->temp.size = 0; + + const lzma_ret ret = coder->next.code(coder->next.coder, + allocator, in, in_pos, in_size, + coder->temp.buffer, &coder->temp.size, + LZMA_BUFFER_SIZE, action); + if (ret == LZMA_STREAM_END) { + assert(action != LZMA_RUN); + coder->next_finished = true; + } else if (coder->temp.size == 0 || ret != LZMA_OK) { + return ret; + } + } + + const lzma_ret ret = subblock_buffer(coder, allocator, + coder->temp.buffer, &coder->temp.pos, + coder->temp.size, out, out_pos, out_size, + coder->next_finished ? LZMA_FINISH : LZMA_RUN); + if (ret == LZMA_STREAM_END) { + assert(action != LZMA_RUN); + assert(coder->next_finished); + return LZMA_STREAM_END; + } + + if (ret != LZMA_OK) + return ret; + } + + return LZMA_OK; +} + + +static void +subblock_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->next, allocator); + lzma_next_end(&coder->subfilter.subcoder, allocator); + lzma_free(coder->subblock.data, allocator); + lzma_free(coder->subfilter.flags, allocator); + lzma_free(coder, allocator); + return; +} + + +extern lzma_ret +lzma_subblock_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &subblock_encode; + next->end = &subblock_encoder_end; + + next->coder->next = LZMA_NEXT_CODER_INIT; + next->coder->subblock.data = NULL; + next->coder->subblock.limit = 0; + next->coder->subfilter.subcoder = LZMA_NEXT_CODER_INIT; + } else { + lzma_next_end(&next->coder->subfilter.subcoder, + allocator); + lzma_free(next->coder->subfilter.flags, allocator); + } + + next->coder->subfilter.flags = NULL; + + next->coder->next_finished = false; + next->coder->sequence = SEQ_FILL; + next->coder->options = filters[0].options; + next->coder->pos = 0; + + next->coder->alignment.in_pos = 0; + next->coder->alignment.out_pos = 0; + next->coder->subblock.size = 0; + next->coder->subblock.in_pending = 0; + next->coder->rle.count = 0; + next->coder->rle.in_pending = 0; + next->coder->subfilter.mode = SUB_NONE; + next->coder->subfilter.mode_locked = false; + + next->coder->temp.pos = 0; + next->coder->temp.size = 0; + + // Grab some values from the options structure if it is available. + size_t subblock_size_limit; + if (next->coder->options != NULL) { + if (next->coder->options->alignment + < LZMA_SUBBLOCK_ALIGNMENT_MIN + || next->coder->options->alignment + > LZMA_SUBBLOCK_ALIGNMENT_MAX) { + subblock_encoder_end(next->coder, allocator); + return LZMA_OPTIONS_ERROR; + } + next->coder->alignment.multiple + = next->coder->options->alignment; + next->coder->subfilter.allow + = next->coder->options->allow_subfilters; + subblock_size_limit = next->coder->options->subblock_data_size; + } else { + next->coder->alignment.multiple + = LZMA_SUBBLOCK_ALIGNMENT_DEFAULT; + next->coder->subfilter.allow = false; + subblock_size_limit = LZMA_SUBBLOCK_DATA_SIZE_DEFAULT; + } + + return_if_error(subblock_data_size(next->coder, allocator, + subblock_size_limit)); + + return lzma_next_filter_init( + &next->coder->next, allocator, filters + 1); +} diff --git a/src/liblzma/subblock/subblock_encoder.h b/src/liblzma/subblock/subblock_encoder.h new file mode 100644 index 000000000000..ddbfe64bce87 --- /dev/null +++ b/src/liblzma/subblock/subblock_encoder.h @@ -0,0 +1,21 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file subblock_encoder.h +/// \brief Encoder of the Subblock filter +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_SUBBLOCK_ENCODER_H +#define LZMA_SUBBLOCK_ENCODER_H + +#include "common.h" + +extern lzma_ret lzma_subblock_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +#endif diff --git a/src/lzmainfo/lzmainfo.1 b/src/lzmainfo/lzmainfo.1 new file mode 100644 index 000000000000..ef736a6c6622 --- /dev/null +++ b/src/lzmainfo/lzmainfo.1 @@ -0,0 +1,55 @@ +.\" +.\" Author: Lasse Collin +.\" +.\" This file has been put into the public domain. +.\" You can do whatever you want with this file. +.\" +.TH LZMAINFO 1 "2009-08-13" "Tukaani" "XZ Utils" +.SH NAME +lzmainfo \- show infomation stored in the .lzma file header +.SH SYNOPSIS +.B lzmainfo +.RB [ \-\-help ] +.RB [ \-\-version ] +.RI [ file ]... +.SH DESCRIPTION +.B lzmainfo +shows information stored in the +.B .lzma +file header. It reads the first 13 bytes from the specified +.IR file , +decodes the header, and prints it to standard output in human +readable format. If no +.I files +are given or +.I file +is +.BR \- , +standard input is read. +.PP +Usually the most interesting information is the uncompressed size and +the dictionary size. Uncompressed size can be shown only if the file is +in the non-streamed +.B .lzma +format variant. The amount of memory required to decompress the file is +a few dozen kilobytes plus the dictionary size. +.PP +.B lzmainfo +is included in XZ Utils primarily for backward compatibility with LZMA Utils. +.SH EXIT STATUS +.TP +.B 0 +All is good. +.TP +.B 1 +An error occurred. +.SH BUGS +.B lzmainfo +uses +.B MB +while the correct suffix would be +.B MiB +(2^20 bytes). +This is to keep the output compatible with LZMA Utils. +.SH SEE ALSO +.BR xz (1) diff --git a/src/lzmainfo/lzmainfo.c b/src/lzmainfo/lzmainfo.c new file mode 100644 index 000000000000..af8e66cc8f99 --- /dev/null +++ b/src/lzmainfo/lzmainfo.c @@ -0,0 +1,210 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzmainfo.c +/// \brief lzmainfo tool for compatibility with LZMA Utils +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "sysdefs.h" +#include +#include + +#include "lzma.h" +#include "getopt.h" +#include "tuklib_gettext.h" +#include "tuklib_progname.h" +#include "tuklib_exit.h" + + +static void lzma_attribute((noreturn)) +help(void) +{ + printf( +_("Usage: %s [--help] [--version] [FILE]...\n" +"Show information stored in the .lzma file header"), progname); + + printf(_( +"\nWith no FILE, or when FILE is -, read standard input.\n")); + printf("\n"); + + printf(_("Report bugs to <%s> (in English or Finnish).\n"), + PACKAGE_BUGREPORT); + printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL); + + tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, true); +} + + +static void lzma_attribute((noreturn)) +version(void) +{ + puts("lzmainfo (" PACKAGE_NAME ") " PACKAGE_VERSION); + tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, true); +} + + +/// Parse command line options. +static void +parse_args(int argc, char **argv) +{ + enum { + OPT_HELP, + OPT_VERSION, + }; + + static const struct option long_opts[] = { + { "help", no_argument, NULL, OPT_HELP }, + { "version", no_argument, NULL, OPT_VERSION }, + { NULL, 0, NULL, 0 } + }; + + int c; + while ((c = getopt_long(argc, argv, "", long_opts, NULL)) != -1) { + switch (c) { + case OPT_HELP: + help(); + + case OPT_VERSION: + version(); + + default: + exit(EXIT_FAILURE); + } + } + + return; +} + + +/// Primitive base-2 logarithm for integers +static uint32_t +my_log2(uint32_t n) +{ + uint32_t e; + for (e = 0; n > 1; ++e, n /= 2) ; + return e; +} + + +/// Parse the .lzma header and display information about it. +static bool +lzmainfo(const char *name, FILE *f) +{ + uint8_t buf[13]; + const size_t size = fread(buf, 1, sizeof(buf), f); + if (size != 13) { + fprintf(stderr, "%s: %s: %s\n", progname, name, + ferror(f) ? strerror(errno) + : _("File is too small to be a .lzma file")); + return true; + } + + lzma_filter filter = { .id = LZMA_FILTER_LZMA1 }; + + // Parse the first five bytes. + switch (lzma_properties_decode(&filter, NULL, buf, 5)) { + case LZMA_OK: + break; + + case LZMA_OPTIONS_ERROR: + fprintf(stderr, "%s: %s: %s\n", progname, name, + _("Not a .lzma file")); + return true; + + case LZMA_MEM_ERROR: + fprintf(stderr, "%s: %s\n", progname, strerror(ENOMEM)); + exit(EXIT_FAILURE); + + default: + fprintf(stderr, "%s: %s\n", progname, + _("Internal error (bug)")); + exit(EXIT_FAILURE); + } + + // Uncompressed size + uint64_t uncompressed_size = 0; + for (size_t i = 0; i < 8; ++i) + uncompressed_size |= (uint64_t)(buf[5 + i]) << (i * 8); + + // Display the results. We don't want to translate these and also + // will use MB instead of MiB, because someone could be parsing + // this output and we don't want to break that when people move + // from LZMA Utils to XZ Utils. + if (f != stdin) + printf("%s\n", name); + + printf("Uncompressed size: "); + if (uncompressed_size == UINT64_MAX) + printf("Unknown"); + else + printf("%" PRIu64 " MB (%" PRIu64 " bytes)", + (uncompressed_size + 512 * 1024) + / (1024 * 1024), + uncompressed_size); + + lzma_options_lzma *opt = filter.options; + + printf("\nDictionary size: " + "%u MB (2^%u bytes)\n" + "Literal context bits (lc): %" PRIu32 "\n" + "Literal pos bits (lp): %" PRIu32 "\n" + "Number of pos bits (pb): %" PRIu32 "\n", + (opt->dict_size + 512 * 1024) / (1024 * 1024), + my_log2(opt->dict_size), opt->lc, opt->lp, opt->pb); + + free(opt); + + return false; +} + + +extern int +main(int argc, char **argv) +{ + tuklib_progname_init(argv); + tuklib_gettext_init(PACKAGE, LOCALEDIR); + + parse_args(argc, argv); + + int ret = EXIT_SUCCESS; + + // We print empty lines around the output only when reading from + // files specified on the command line. This is due to how + // LZMA Utils did it. + if (optind == argc) { + if (lzmainfo("(stdin)", stdin)) + ret = EXIT_FAILURE; + } else { + printf("\n"); + + do { + if (strcmp(argv[optind], "-") == 0) { + if (lzmainfo("(stdin)", stdin)) + ret = EXIT_FAILURE; + } else { + FILE *f = fopen(argv[optind], "r"); + if (f == NULL) { + ret = EXIT_FAILURE; + fprintf(stderr, "%s: %s: %s\n", + progname, + argv[optind], + strerror(errno)); + continue; + } + + if (lzmainfo(argv[optind], f)) + ret = EXIT_FAILURE; + + printf("\n"); + fclose(f); + } + } while (++optind < argc); + } + + tuklib_exit(ret, EXIT_FAILURE, true); +} diff --git a/src/xz/args.c b/src/xz/args.c new file mode 100644 index 000000000000..f37f80244b4e --- /dev/null +++ b/src/xz/args.c @@ -0,0 +1,549 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file args.c +/// \brief Argument parsing +/// +/// \note Filter-specific options parsing is in options.c. +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + +#include "getopt.h" +#include + + +bool opt_stdout = false; +bool opt_force = false; +bool opt_keep_original = false; +bool opt_robot = false; + +// We don't modify or free() this, but we need to assign it in some +// non-const pointers. +const char *stdin_filename = "(stdin)"; + + +static void +parse_real(args_info *args, int argc, char **argv) +{ + enum { + OPT_SUBBLOCK = INT_MIN, + OPT_X86, + OPT_POWERPC, + OPT_IA64, + OPT_ARM, + OPT_ARMTHUMB, + OPT_SPARC, + OPT_DELTA, + OPT_LZMA1, + OPT_LZMA2, + + OPT_NO_SPARSE, + OPT_FILES, + OPT_FILES0, + OPT_INFO_MEMORY, + OPT_ROBOT, + }; + + static const char short_opts[] + = "cC:defF:hHlkM:qQrS:tT:vVz0123456789"; + + static const struct option long_opts[] = { + // Operation mode + { "compress", no_argument, NULL, 'z' }, + { "decompress", no_argument, NULL, 'd' }, + { "uncompress", no_argument, NULL, 'd' }, + { "test", no_argument, NULL, 't' }, + { "list", no_argument, NULL, 'l' }, + + // Operation modifiers + { "keep", no_argument, NULL, 'k' }, + { "force", no_argument, NULL, 'f' }, + { "stdout", no_argument, NULL, 'c' }, + { "to-stdout", no_argument, NULL, 'c' }, + { "no-sparse", no_argument, NULL, OPT_NO_SPARSE }, + { "suffix", required_argument, NULL, 'S' }, + // { "recursive", no_argument, NULL, 'r' }, // TODO + { "files", optional_argument, NULL, OPT_FILES }, + { "files0", optional_argument, NULL, OPT_FILES0 }, + + // Basic compression settings + { "format", required_argument, NULL, 'F' }, + { "check", required_argument, NULL, 'C' }, + { "memory", required_argument, NULL, 'M' }, + { "threads", required_argument, NULL, 'T' }, + + { "extreme", no_argument, NULL, 'e' }, + { "fast", no_argument, NULL, '0' }, + { "best", no_argument, NULL, '9' }, + + // Filters + { "lzma1", optional_argument, NULL, OPT_LZMA1 }, + { "lzma2", optional_argument, NULL, OPT_LZMA2 }, + { "x86", optional_argument, NULL, OPT_X86 }, + { "powerpc", optional_argument, NULL, OPT_POWERPC }, + { "ia64", optional_argument, NULL, OPT_IA64 }, + { "arm", optional_argument, NULL, OPT_ARM }, + { "armthumb", optional_argument, NULL, OPT_ARMTHUMB }, + { "sparc", optional_argument, NULL, OPT_SPARC }, + { "delta", optional_argument, NULL, OPT_DELTA }, + { "subblock", optional_argument, NULL, OPT_SUBBLOCK }, + + // Other options + { "quiet", no_argument, NULL, 'q' }, + { "verbose", no_argument, NULL, 'v' }, + { "no-warn", no_argument, NULL, 'Q' }, + { "robot", no_argument, NULL, OPT_ROBOT }, + { "info-memory", no_argument, NULL, OPT_INFO_MEMORY }, + { "help", no_argument, NULL, 'h' }, + { "long-help", no_argument, NULL, 'H' }, + { "version", no_argument, NULL, 'V' }, + + { NULL, 0, NULL, 0 } + }; + + int c; + + while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL)) + != -1) { + switch (c) { + // Compression preset (also for decompression if --format=raw) + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + coder_set_preset(c - '0'); + break; + + // --memory + case 'M': { + // Support specifying the limit as a percentage of + // installed physical RAM. + size_t len = strlen(optarg); + if (len > 0 && optarg[len - 1] == '%') { + optarg[len - 1] = '\0'; + hardware_memlimit_set_percentage( + str_to_uint64( + "memory%", optarg, 1, 100)); + } else { + // On 32-bit systems, SIZE_MAX would make more + // sense than UINT64_MAX. But use UINT64_MAX + // still so that scripts that assume > 4 GiB + // values don't break. + hardware_memlimit_set(str_to_uint64( + "memory", optarg, + 0, UINT64_MAX)); + } + + break; + } + + // --suffix + case 'S': + suffix_set(optarg); + break; + + case 'T': + hardware_threadlimit_set(str_to_uint64( + "threads", optarg, 0, UINT32_MAX)); + break; + + // --version + case 'V': + // This doesn't return. + message_version(); + + // --stdout + case 'c': + opt_stdout = true; + break; + + // --decompress + case 'd': + opt_mode = MODE_DECOMPRESS; + break; + + // --extreme + case 'e': + coder_set_extreme(); + break; + + // --force + case 'f': + opt_force = true; + break; + + // --info-memory + case OPT_INFO_MEMORY: + // This doesn't return. + message_memlimit(); + + // --help + case 'h': + // This doesn't return. + message_help(false); + + // --long-help + case 'H': + // This doesn't return. + message_help(true); + + // --list + case 'l': + opt_mode = MODE_LIST; + break; + + // --keep + case 'k': + opt_keep_original = true; + break; + + // --quiet + case 'q': + message_verbosity_decrease(); + break; + + case 'Q': + set_exit_no_warn(); + break; + + case 't': + opt_mode = MODE_TEST; + break; + + // --verbose + case 'v': + message_verbosity_increase(); + break; + + // --robot + case OPT_ROBOT: + opt_robot = true; + + // This is to make sure that floating point numbers + // always have a dot as decimal separator. + setlocale(LC_NUMERIC, "C"); + break; + + case 'z': + opt_mode = MODE_COMPRESS; + break; + + // Filter setup + + case OPT_SUBBLOCK: + coder_add_filter(LZMA_FILTER_SUBBLOCK, + options_subblock(optarg)); + break; + + case OPT_X86: + coder_add_filter(LZMA_FILTER_X86, + options_bcj(optarg)); + break; + + case OPT_POWERPC: + coder_add_filter(LZMA_FILTER_POWERPC, + options_bcj(optarg)); + break; + + case OPT_IA64: + coder_add_filter(LZMA_FILTER_IA64, + options_bcj(optarg)); + break; + + case OPT_ARM: + coder_add_filter(LZMA_FILTER_ARM, + options_bcj(optarg)); + break; + + case OPT_ARMTHUMB: + coder_add_filter(LZMA_FILTER_ARMTHUMB, + options_bcj(optarg)); + break; + + case OPT_SPARC: + coder_add_filter(LZMA_FILTER_SPARC, + options_bcj(optarg)); + break; + + case OPT_DELTA: + coder_add_filter(LZMA_FILTER_DELTA, + options_delta(optarg)); + break; + + case OPT_LZMA1: + coder_add_filter(LZMA_FILTER_LZMA1, + options_lzma(optarg)); + break; + + case OPT_LZMA2: + coder_add_filter(LZMA_FILTER_LZMA2, + options_lzma(optarg)); + break; + + // Other + + // --format + case 'F': { + // Just in case, support both "lzma" and "alone" since + // the latter was used for forward compatibility in + // LZMA Utils 4.32.x. + static const struct { + char str[8]; + enum format_type format; + } types[] = { + { "auto", FORMAT_AUTO }, + { "xz", FORMAT_XZ }, + { "lzma", FORMAT_LZMA }, + { "alone", FORMAT_LZMA }, + // { "gzip", FORMAT_GZIP }, + // { "gz", FORMAT_GZIP }, + { "raw", FORMAT_RAW }, + }; + + size_t i = 0; + while (strcmp(types[i].str, optarg) != 0) + if (++i == ARRAY_SIZE(types)) + message_fatal(_("%s: Unknown file " + "format type"), + optarg); + + opt_format = types[i].format; + break; + } + + // --check + case 'C': { + static const struct { + char str[8]; + lzma_check check; + } types[] = { + { "none", LZMA_CHECK_NONE }, + { "crc32", LZMA_CHECK_CRC32 }, + { "crc64", LZMA_CHECK_CRC64 }, + { "sha256", LZMA_CHECK_SHA256 }, + }; + + size_t i = 0; + while (strcmp(types[i].str, optarg) != 0) { + if (++i == ARRAY_SIZE(types)) + message_fatal(_("%s: Unsupported " + "integrity " + "check type"), optarg); + } + + // Use a separate check in case we are using different + // liblzma than what was used to compile us. + if (!lzma_check_is_supported(types[i].check)) + message_fatal(_("%s: Unsupported integrity " + "check type"), optarg); + + coder_set_check(types[i].check); + break; + } + + case OPT_NO_SPARSE: + io_no_sparse(); + break; + + case OPT_FILES: + args->files_delim = '\n'; + + // Fall through + + case OPT_FILES0: + if (args->files_name != NULL) + message_fatal(_("Only one file can be " + "specified with `--files' " + "or `--files0'.")); + + if (optarg == NULL) { + args->files_name = (char *)stdin_filename; + args->files_file = stdin; + } else { + args->files_name = optarg; + args->files_file = fopen(optarg, + c == OPT_FILES ? "r" : "rb"); + if (args->files_file == NULL) + message_fatal("%s: %s", optarg, + strerror(errno)); + } + + break; + + default: + message_try_help(); + tuklib_exit(E_ERROR, E_ERROR, false); + } + } + + return; +} + + +static void +parse_environment(args_info *args, char *argv0) +{ + char *env = getenv("XZ_OPT"); + if (env == NULL) + return; + + // We modify the string, so make a copy of it. + env = xstrdup(env); + + // Calculate the number of arguments in env. argc stats at one + // to include space for the program name. + int argc = 1; + bool prev_was_space = true; + for (size_t i = 0; env[i] != '\0'; ++i) { + // NOTE: Cast to unsigned char is needed so that correct + // value gets passed to isspace(), which expects + // unsigned char cast to int. Casting to int is done + // automatically due to integer promotion, but we need to + // force char to unsigned char manually. Otherwise 8-bit + // characters would get promoted to wrong value if + // char is signed. + if (isspace((unsigned char)env[i])) { + prev_was_space = true; + } else if (prev_was_space) { + prev_was_space = false; + + // Keep argc small enough to fit into a singed int + // and to keep it usable for memory allocation. + if (++argc == MIN(INT_MAX, SIZE_MAX / sizeof(char *))) + message_fatal(_("The environment variable " + "XZ_OPT contains too many " + "arguments")); + } + } + + // Allocate memory to hold pointers to the arguments. Add one to get + // space for the terminating NULL (if some systems happen to need it). + char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *)); + argv[0] = argv0; + argv[argc] = NULL; + + // Go through the string again. Split the arguments using '\0' + // characters and add pointers to the resulting strings to argv. + argc = 1; + prev_was_space = true; + for (size_t i = 0; env[i] != '\0'; ++i) { + if (isspace((unsigned char)env[i])) { + prev_was_space = true; + env[i] = '\0'; + } else if (prev_was_space) { + prev_was_space = false; + argv[argc++] = env + i; + } + } + + // Parse the argument list we got from the environment. All non-option + // arguments i.e. filenames are ignored. + parse_real(args, argc, argv); + + // Reset the state of the getopt_long() so that we can parse the + // command line options too. There are two incompatible ways to + // do it. +#ifdef HAVE_OPTRESET + // BSD + optind = 1; + optreset = 1; +#else + // GNU, Solaris + optind = 0; +#endif + + // We don't need the argument list from environment anymore. + free(argv); + free(env); + + return; +} + + +extern void +args_parse(args_info *args, int argc, char **argv) +{ + // Initialize those parts of *args that we need later. + args->files_name = NULL; + args->files_file = NULL; + args->files_delim = '\0'; + + // Check how we were called. + { + // Remove the leading path name, if any. + const char *name = strrchr(argv[0], '/'); + if (name == NULL) + name = argv[0]; + else + ++name; + + // NOTE: It's possible that name[0] is now '\0' if argv[0] + // is weird, but it doesn't matter here. + + // Look for full command names instead of substrings like + // "un", "cat", and "lz" to reduce possibility of false + // positives when the programs have been renamed. + if (strstr(name, "xzcat") != NULL) { + opt_mode = MODE_DECOMPRESS; + opt_stdout = true; + } else if (strstr(name, "unxz") != NULL) { + opt_mode = MODE_DECOMPRESS; + } else if (strstr(name, "lzcat") != NULL) { + opt_format = FORMAT_LZMA; + opt_mode = MODE_DECOMPRESS; + opt_stdout = true; + } else if (strstr(name, "unlzma") != NULL) { + opt_format = FORMAT_LZMA; + opt_mode = MODE_DECOMPRESS; + } else if (strstr(name, "lzma") != NULL) { + opt_format = FORMAT_LZMA; + } + } + + // First the flags from environment + parse_environment(args, argv[0]); + + // Then from the command line + parse_real(args, argc, argv); + + // Never remove the source file when the destination is not on disk. + // In test mode the data is written nowhere, but setting opt_stdout + // will make the rest of the code behave well. + if (opt_stdout || opt_mode == MODE_TEST) { + opt_keep_original = true; + opt_stdout = true; + } + + // When compressing, if no --format flag was used, or it + // was --format=auto, we compress to the .xz format. + if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO) + opt_format = FORMAT_XZ; + + // Compression settings need to be validated (options themselves and + // their memory usage) when compressing to any file format. It has to + // be done also when uncompressing raw data, since for raw decoding + // the options given on the command line are used to know what kind + // of raw data we are supposed to decode. + if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW) + coder_set_compression_settings(); + + // If no filenames are given, use stdin. + if (argv[optind] == NULL && args->files_name == NULL) { + // We don't modify or free() the "-" constant. The caller + // modifies this so don't make the struct itself const. + static char *names_stdin[2] = { (char *)"-", NULL }; + args->arg_names = names_stdin; + args->arg_count = 1; + } else { + // We got at least one filename from the command line, or + // --files or --files0 was specified. + args->arg_names = argv + optind; + args->arg_count = argc - optind; + } + + return; +} diff --git a/src/xz/args.h b/src/xz/args.h new file mode 100644 index 000000000000..ac5959b52bad --- /dev/null +++ b/src/xz/args.h @@ -0,0 +1,42 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file args.h +/// \brief Argument parsing +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +typedef struct { + /// Filenames from command line + char **arg_names; + + /// Number of filenames from command line + size_t arg_count; + + /// Name of the file from which to read filenames. This is NULL + /// if --files or --files0 was not used. + char *files_name; + + /// File opened for reading from which filenames are read. This is + /// non-NULL only if files_name is non-NULL. + FILE *files_file; + + /// Delimiter for filenames read from files_file + char files_delim; + +} args_info; + + +extern bool opt_stdout; +extern bool opt_force; +extern bool opt_keep_original; +// extern bool opt_recursive; +extern bool opt_robot; + +extern const char *stdin_filename; + +extern void args_parse(args_info *args, int argc, char **argv); diff --git a/src/xz/coder.c b/src/xz/coder.c new file mode 100644 index 000000000000..2ee0c704b2cf --- /dev/null +++ b/src/xz/coder.c @@ -0,0 +1,659 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file coder.c +/// \brief Compresses or uncompresses a file +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + + +/// Return value type for coder_init(). +enum coder_init_ret { + CODER_INIT_NORMAL, + CODER_INIT_PASSTHRU, + CODER_INIT_ERROR, +}; + + +enum operation_mode opt_mode = MODE_COMPRESS; + +enum format_type opt_format = FORMAT_AUTO; + + +/// Stream used to communicate with liblzma +static lzma_stream strm = LZMA_STREAM_INIT; + +/// Filters needed for all encoding all formats, and also decoding in raw data +static lzma_filter filters[LZMA_FILTERS_MAX + 1]; + +/// Input and output buffers +static io_buf in_buf; +static io_buf out_buf; + +/// Number of filters. Zero indicates that we are using a preset. +static size_t filters_count = 0; + +/// Number of the preset (0-9) +static size_t preset_number = 6; + +/// True if we should auto-adjust the compression settings to use less memory +/// if memory usage limit is too low for the original settings. +static bool auto_adjust = true; + +/// Indicate if no preset has been explicitly given. In that case, if we need +/// to auto-adjust for lower memory usage, we won't print a warning. +static bool preset_default = true; + +/// If a preset is used (no custom filter chain) and preset_extreme is true, +/// a significantly slower compression is used to achieve slightly better +/// compression ratio. +static bool preset_extreme = false; + +/// Integrity check type +static lzma_check check; + +/// This becomes false if the --check=CHECK option is used. +static bool check_default = true; + + +extern void +coder_set_check(lzma_check new_check) +{ + check = new_check; + check_default = false; + return; +} + + +extern void +coder_set_preset(size_t new_preset) +{ + preset_number = new_preset; + preset_default = false; + return; +} + + +extern void +coder_set_extreme(void) +{ + preset_extreme = true; + return; +} + + +extern void +coder_add_filter(lzma_vli id, void *options) +{ + if (filters_count == LZMA_FILTERS_MAX) + message_fatal(_("Maximum number of filters is four")); + + filters[filters_count].id = id; + filters[filters_count].options = options; + ++filters_count; + + return; +} + + +static void lzma_attribute((noreturn)) +memlimit_too_small(uint64_t memory_usage) +{ + message(V_ERROR, _("Memory usage limit is too low for the given " + "filter setup.")); + message_mem_needed(V_ERROR, memory_usage); + tuklib_exit(E_ERROR, E_ERROR, false); +} + + +extern void +coder_set_compression_settings(void) +{ + // Options for LZMA1 or LZMA2 in case we are using a preset. + static lzma_options_lzma opt_lzma; + + if (filters_count == 0) { + // We are using a preset. This is not a good idea in raw mode + // except when playing around with things. Different versions + // of this software may use different options in presets, and + // thus make uncompressing the raw data difficult. + if (opt_format == FORMAT_RAW) { + // The message is shown only if warnings are allowed + // but the exit status isn't changed. + message(V_WARNING, _("Using a preset in raw mode " + "is discouraged.")); + message(V_WARNING, _("The exact options of the " + "presets may vary between software " + "versions.")); + } + + // Get the preset for LZMA1 or LZMA2. + if (preset_extreme) + preset_number |= LZMA_PRESET_EXTREME; + + if (lzma_lzma_preset(&opt_lzma, preset_number)) + message_bug(); + + // Use LZMA2 except with --format=lzma we use LZMA1. + filters[0].id = opt_format == FORMAT_LZMA + ? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2; + filters[0].options = &opt_lzma; + filters_count = 1; + } else { + preset_default = false; + } + + // Terminate the filter options array. + filters[filters_count].id = LZMA_VLI_UNKNOWN; + + // If we are using the .lzma format, allow exactly one filter + // which has to be LZMA1. + if (opt_format == FORMAT_LZMA && (filters_count != 1 + || filters[0].id != LZMA_FILTER_LZMA1)) + message_fatal(_("The .lzma format supports only " + "the LZMA1 filter")); + + // If we are using the .xz format, make sure that there is no LZMA1 + // filter to prevent LZMA_PROG_ERROR. + if (opt_format == FORMAT_XZ) + for (size_t i = 0; i < filters_count; ++i) + if (filters[i].id == LZMA_FILTER_LZMA1) + message_fatal(_("LZMA1 cannot be used " + "with the .xz format")); + + // Print the selected filter chain. + message_filters(V_DEBUG, filters); + + // If using --format=raw, we can be decoding. The memusage function + // also validates the filter chain and the options used for the + // filters. + const uint64_t memory_limit = hardware_memlimit_get(); + uint64_t memory_usage; + if (opt_mode == MODE_COMPRESS) + memory_usage = lzma_raw_encoder_memusage(filters); + else + memory_usage = lzma_raw_decoder_memusage(filters); + + if (memory_usage == UINT64_MAX) + message_fatal(_("Unsupported filter chain or filter options")); + + // Print memory usage info before possible dictionary + // size auto-adjusting. + message_mem_needed(V_DEBUG, memory_usage); + + if (memory_usage > memory_limit) { + // If --no-auto-adjust was used or we didn't find LZMA1 or + // LZMA2 as the last filter, give an error immediately. + // --format=raw implies --no-auto-adjust. + if (!auto_adjust || opt_format == FORMAT_RAW) + memlimit_too_small(memory_usage); + + assert(opt_mode == MODE_COMPRESS); + + // Look for the last filter if it is LZMA2 or LZMA1, so + // we can make it use less RAM. With other filters we don't + // know what to do. + size_t i = 0; + while (filters[i].id != LZMA_FILTER_LZMA2 + && filters[i].id != LZMA_FILTER_LZMA1) { + if (filters[i].id == LZMA_VLI_UNKNOWN) + memlimit_too_small(memory_usage); + + ++i; + } + + // Decrease the dictionary size until we meet the memory + // usage limit. First round down to full mebibytes. + lzma_options_lzma *opt = filters[i].options; + const uint32_t orig_dict_size = opt->dict_size; + opt->dict_size &= ~((UINT32_C(1) << 20) - 1); + while (true) { + // If it is below 1 MiB, auto-adjusting failed. We + // could be more sophisticated and scale it down even + // more, but let's see if many complain about this + // version. + // + // FIXME: Displays the scaled memory usage instead + // of the original. + if (opt->dict_size < (UINT32_C(1) << 20)) + memlimit_too_small(memory_usage); + + memory_usage = lzma_raw_encoder_memusage(filters); + if (memory_usage == UINT64_MAX) + message_bug(); + + // Accept it if it is low enough. + if (memory_usage <= memory_limit) + break; + + // Otherwise 1 MiB down and try again. I hope this + // isn't too slow method for cases where the original + // dict_size is very big. + opt->dict_size -= UINT32_C(1) << 20; + } + + // Tell the user that we decreased the dictionary size. + // However, omit the message if no preset or custom chain + // was given. FIXME: Always warn? + if (!preset_default) + message(V_WARNING, _("Adjusted LZMA%c dictionary size " + "from %s MiB to %s MiB to not exceed " + "the memory usage limit of %s MiB"), + filters[i].id == LZMA_FILTER_LZMA2 + ? '2' : '1', + uint64_to_str(orig_dict_size >> 20, 0), + uint64_to_str(opt->dict_size >> 20, 1), + uint64_to_str(round_up_to_mib( + memory_limit), 2)); + } + +/* + // Limit the number of worker threads so that memory usage + // limit isn't exceeded. + assert(memory_usage > 0); + size_t thread_limit = memory_limit / memory_usage; + if (thread_limit == 0) + thread_limit = 1; + + if (opt_threads > thread_limit) + opt_threads = thread_limit; +*/ + + if (check_default) { + // The default check type is CRC64, but fallback to CRC32 + // if CRC64 isn't supported by the copy of liblzma we are + // using. CRC32 is always supported. + check = LZMA_CHECK_CRC64; + if (!lzma_check_is_supported(check)) + check = LZMA_CHECK_CRC32; + } + + return; +} + + +/// Return true if the data in in_buf seems to be in the .xz format. +static bool +is_format_xz(void) +{ + return strm.avail_in >= 6 && memcmp(in_buf.u8, "\3757zXZ", 6) == 0; +} + + +/// Return true if the data in in_buf seems to be in the .lzma format. +static bool +is_format_lzma(void) +{ + // The .lzma header is 13 bytes. + if (strm.avail_in < 13) + return false; + + // Decode the LZMA1 properties. + lzma_filter filter = { .id = LZMA_FILTER_LZMA1 }; + if (lzma_properties_decode(&filter, NULL, in_buf.u8, 5) != LZMA_OK) + return false; + + // A hack to ditch tons of false positives: We allow only dictionary + // sizes that are 2^n or 2^n + 2^(n-1) or UINT32_MAX. LZMA_Alone + // created only files with 2^n, but accepts any dictionary size. + // If someone complains, this will be reconsidered. + lzma_options_lzma *opt = filter.options; + const uint32_t dict_size = opt->dict_size; + free(opt); + + if (dict_size != UINT32_MAX) { + uint32_t d = dict_size - 1; + d |= d >> 2; + d |= d >> 3; + d |= d >> 4; + d |= d >> 8; + d |= d >> 16; + ++d; + if (d != dict_size || dict_size == 0) + return false; + } + + // Another hack to ditch false positives: Assume that if the + // uncompressed size is known, it must be less than 256 GiB. + // Again, if someone complains, this will be reconsidered. + uint64_t uncompressed_size = 0; + for (size_t i = 0; i < 8; ++i) + uncompressed_size |= (uint64_t)(in_buf.u8[5 + i]) << (i * 8); + + if (uncompressed_size != UINT64_MAX + && uncompressed_size > (UINT64_C(1) << 38)) + return false; + + return true; +} + + +/// Detect the input file type (for now, this done only when decompressing), +/// and initialize an appropriate coder. Return value indicates if a normal +/// liblzma-based coder was initialized (CODER_INIT_NORMAL), if passthru +/// mode should be used (CODER_INIT_PASSTHRU), or if an error occurred +/// (CODER_INIT_ERROR). +static enum coder_init_ret +coder_init(file_pair *pair) +{ + lzma_ret ret = LZMA_PROG_ERROR; + + if (opt_mode == MODE_COMPRESS) { + switch (opt_format) { + case FORMAT_AUTO: + // args.c ensures this. + assert(0); + break; + + case FORMAT_XZ: + ret = lzma_stream_encoder(&strm, filters, check); + break; + + case FORMAT_LZMA: + ret = lzma_alone_encoder(&strm, filters[0].options); + break; + + case FORMAT_RAW: + ret = lzma_raw_encoder(&strm, filters); + break; + } + } else { + const uint32_t flags = LZMA_TELL_UNSUPPORTED_CHECK + | LZMA_CONCATENATED; + + // We abuse FORMAT_AUTO to indicate unknown file format, + // for which we may consider passthru mode. + enum format_type init_format = FORMAT_AUTO; + + switch (opt_format) { + case FORMAT_AUTO: + if (is_format_xz()) + init_format = FORMAT_XZ; + else if (is_format_lzma()) + init_format = FORMAT_LZMA; + break; + + case FORMAT_XZ: + if (is_format_xz()) + init_format = FORMAT_XZ; + break; + + case FORMAT_LZMA: + if (is_format_lzma()) + init_format = FORMAT_LZMA; + break; + + case FORMAT_RAW: + init_format = FORMAT_RAW; + break; + } + + switch (init_format) { + case FORMAT_AUTO: + // Uknown file format. If --decompress --stdout + // --force have been given, then we copy the input + // as is to stdout. Checking for MODE_DECOMPRESS + // is needed, because we don't want to do use + // passthru mode with --test. + if (opt_mode == MODE_DECOMPRESS + && opt_stdout && opt_force) + return CODER_INIT_PASSTHRU; + + ret = LZMA_FORMAT_ERROR; + break; + + case FORMAT_XZ: + ret = lzma_stream_decoder(&strm, + hardware_memlimit_get(), flags); + break; + + case FORMAT_LZMA: + ret = lzma_alone_decoder(&strm, + hardware_memlimit_get()); + break; + + case FORMAT_RAW: + // Memory usage has already been checked in + // coder_set_compression_settings(). + ret = lzma_raw_decoder(&strm, filters); + break; + } + + // Try to decode the headers. This will catch too low + // memory usage limit in case it happens in the first + // Block of the first Stream, which is where it very + // probably will happen if it is going to happen. + if (ret == LZMA_OK && init_format != FORMAT_RAW) { + strm.next_out = NULL; + strm.avail_out = 0; + ret = lzma_code(&strm, LZMA_RUN); + } + } + + if (ret != LZMA_OK) { + message_error("%s: %s", pair->src_name, message_strm(ret)); + if (ret == LZMA_MEMLIMIT_ERROR) + message_mem_needed(V_ERROR, lzma_memusage(&strm)); + + return CODER_INIT_ERROR; + } + + return CODER_INIT_NORMAL; +} + + +/// Compress or decompress using liblzma. +static bool +coder_normal(file_pair *pair) +{ + // Encoder needs to know when we have given all the input to it. + // The decoders need to know it too when we are using + // LZMA_CONCATENATED. We need to check for src_eof here, because + // the first input chunk has been already read, and that may + // have been the only chunk we will read. + lzma_action action = pair->src_eof ? LZMA_FINISH : LZMA_RUN; + + lzma_ret ret; + + // Assume that something goes wrong. + bool success = false; + + strm.next_out = out_buf.u8; + strm.avail_out = IO_BUFFER_SIZE; + + while (!user_abort) { + // Fill the input buffer if it is empty and we haven't reached + // end of file yet. + if (strm.avail_in == 0 && !pair->src_eof) { + strm.next_in = in_buf.u8; + strm.avail_in = io_read( + pair, &in_buf, IO_BUFFER_SIZE); + + if (strm.avail_in == SIZE_MAX) + break; + + if (pair->src_eof) + action = LZMA_FINISH; + } + + // Let liblzma do the actual work. + ret = lzma_code(&strm, action); + + // Write out if the output buffer became full. + if (strm.avail_out == 0) { + if (opt_mode != MODE_TEST && io_write(pair, &out_buf, + IO_BUFFER_SIZE - strm.avail_out)) + break; + + strm.next_out = out_buf.u8; + strm.avail_out = IO_BUFFER_SIZE; + } + + if (ret != LZMA_OK) { + // Determine if the return value indicates that we + // won't continue coding. + const bool stop = ret != LZMA_NO_CHECK + && ret != LZMA_UNSUPPORTED_CHECK; + + if (stop) { + // Write the remaining bytes even if something + // went wrong, because that way the user gets + // as much data as possible, which can be good + // when trying to get at least some useful + // data out of damaged files. + if (opt_mode != MODE_TEST && io_write(pair, + &out_buf, IO_BUFFER_SIZE + - strm.avail_out)) + break; + } + + if (ret == LZMA_STREAM_END) { + // Check that there is no trailing garbage. + // This is needed for LZMA_Alone and raw + // streams. + if (strm.avail_in == 0 && !pair->src_eof) { + // Try reading one more byte. + // Hopefully we don't get any more + // input, and thus pair->src_eof + // becomes true. + strm.avail_in = io_read( + pair, &in_buf, 1); + if (strm.avail_in == SIZE_MAX) + break; + + assert(strm.avail_in == 0 + || strm.avail_in == 1); + } + + if (strm.avail_in == 0) { + assert(pair->src_eof); + success = true; + break; + } + + // We hadn't reached the end of the file. + ret = LZMA_DATA_ERROR; + assert(stop); + } + + // If we get here and stop is true, something went + // wrong and we print an error. Otherwise it's just + // a warning and coding can continue. + if (stop) { + message_error("%s: %s", pair->src_name, + message_strm(ret)); + } else { + message_warning("%s: %s", pair->src_name, + message_strm(ret)); + + // When compressing, all possible errors set + // stop to true. + assert(opt_mode != MODE_COMPRESS); + } + + if (ret == LZMA_MEMLIMIT_ERROR) { + // Display how much memory it would have + // actually needed. + message_mem_needed(V_ERROR, + lzma_memusage(&strm)); + } + + if (stop) + break; + } + + // Show progress information under certain conditions. + message_progress_update(); + } + + return success; +} + + +/// Copy from input file to output file without processing the data in any +/// way. This is used only when trying to decompress unrecognized files +/// with --decompress --stdout --force, so the output is always stdout. +static bool +coder_passthru(file_pair *pair) +{ + while (strm.avail_in != 0) { + if (user_abort) + return false; + + if (io_write(pair, &in_buf, strm.avail_in)) + return false; + + strm.total_in += strm.avail_in; + strm.total_out = strm.total_in; + message_progress_update(); + + strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE); + if (strm.avail_in == SIZE_MAX) + return false; + } + + return true; +} + + +extern void +coder_run(const char *filename) +{ + // Set and possibly print the filename for the progress message. + message_filename(filename); + + // Try to open the input file. + file_pair *pair = io_open_src(filename); + if (pair == NULL) + return; + + // Assume that something goes wrong. + bool success = false; + + // Read the first chunk of input data. This is needed to detect + // the input file type (for now, only for decompression). + strm.next_in = in_buf.u8; + strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE); + + if (strm.avail_in != SIZE_MAX) { + // Initialize the coder. This will detect the file format + // and, in decompression or testing mode, check the memory + // usage of the first Block too. This way we don't try to + // open the destination file if we see that coding wouldn't + // work at all anyway. This also avoids deleting the old + // "target" file if --force was used. + const enum coder_init_ret init_ret = coder_init(pair); + + if (init_ret != CODER_INIT_ERROR && !user_abort) { + // Don't open the destination file when --test + // is used. + if (opt_mode == MODE_TEST || !io_open_dest(pair)) { + // Initialize the progress indicator. + const uint64_t in_size + = pair->src_st.st_size <= 0 + ? 0 : pair->src_st.st_size; + message_progress_start(&strm, in_size); + + // Do the actual coding or passthru. + if (init_ret == CODER_INIT_NORMAL) + success = coder_normal(pair); + else + success = coder_passthru(pair); + + message_progress_end(success); + } + } + } + + // Close the file pair. It needs to know if coding was successful to + // know if the source or target file should be unlinked. + io_close(pair, success); + + return; +} diff --git a/src/xz/coder.h b/src/xz/coder.h new file mode 100644 index 000000000000..0d3af6ad4494 --- /dev/null +++ b/src/xz/coder.h @@ -0,0 +1,57 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file coder.h +/// \brief Compresses or uncompresses a file +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +enum operation_mode { + MODE_COMPRESS, + MODE_DECOMPRESS, + MODE_TEST, + MODE_LIST, +}; + + +// NOTE: The order of these is significant in suffix.c. +enum format_type { + FORMAT_AUTO, + FORMAT_XZ, + FORMAT_LZMA, + // HEADER_GZIP, + FORMAT_RAW, +}; + + +/// Operation mode of the command line tool. This is set in args.c and read +/// in several files. +extern enum operation_mode opt_mode; + +/// File format to use when encoding or what format(s) to accept when +/// decoding. This is a global because it's needed also in suffix.c. +/// This is set in args.c. +extern enum format_type opt_format; + + +/// Set the integrity check type used when compressing +extern void coder_set_check(lzma_check check); + +/// Set preset number +extern void coder_set_preset(size_t new_preset); + +/// Enable extreme mode +extern void coder_set_extreme(void); + +/// Add a filter to the custom filter chain +extern void coder_add_filter(lzma_vli id, void *options); + +/// +extern void coder_set_compression_settings(void); + +/// Compress or decompress the given file +extern void coder_run(const char *filename); diff --git a/src/xz/file_io.c b/src/xz/file_io.c new file mode 100644 index 000000000000..a78002eba559 --- /dev/null +++ b/src/xz/file_io.c @@ -0,0 +1,957 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file file_io.c +/// \brief File opening, unlinking, and closing +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + +#include + +#ifdef TUKLIB_DOSLIKE +# include +#else +static bool warn_fchown; +#endif + +#if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) +# include +#elif defined(HAVE_UTIME) +# include +#endif + +#include "tuklib_open_stdxxx.h" + +#ifndef O_BINARY +# define O_BINARY 0 +#endif + +#ifndef O_NOCTTY +# define O_NOCTTY 0 +#endif + + +/// If true, try to create sparse files when decompressing. +static bool try_sparse = true; + +#ifndef TUKLIB_DOSLIKE +/// File status flags of standard output. This is used by io_open_dest() +/// and io_close_dest(). +static int stdout_flags = 0; +#endif + + +static bool io_write_buf(file_pair *pair, const uint8_t *buf, size_t size); + + +extern void +io_init(void) +{ + // Make sure that stdin, stdout, and and stderr are connected to + // a valid file descriptor. Exit immediately with exit code ERROR + // if we cannot make the file descriptors valid. Maybe we should + // print an error message, but our stderr could be screwed anyway. + tuklib_open_stdxxx(E_ERROR); + +#ifndef TUKLIB_DOSLIKE + // If fchown() fails setting the owner, we warn about it only if + // we are root. + warn_fchown = geteuid() == 0; +#endif + +#ifdef __DJGPP__ + // Avoid doing useless things when statting files. + // This isn't important but doesn't hurt. + _djstat_flags = _STAT_INODE | _STAT_EXEC_EXT + | _STAT_EXEC_MAGIC | _STAT_DIRSIZE; +#endif + + return; +} + + +extern void +io_no_sparse(void) +{ + try_sparse = false; + return; +} + + +/// \brief Unlink a file +/// +/// This tries to verify that the file being unlinked really is the file that +/// we want to unlink by verifying device and inode numbers. There's still +/// a small unavoidable race, but this is much better than nothing (the file +/// could have been moved/replaced even hours earlier). +static void +io_unlink(const char *name, const struct stat *known_st) +{ +#if defined(TUKLIB_DOSLIKE) + // On DOS-like systems, st_ino is meaningless, so don't bother + // testing it. Just silence a compiler warning. + (void)known_st; +#else + struct stat new_st; + + // If --force was used, use stat() instead of lstat(). This way + // (de)compressing symlinks works correctly. However, it also means + // that xz cannot detect if a regular file foo is renamed to bar + // and then a symlink foo -> bar is created. Because of stat() + // instead of lstat(), xz will think that foo hasn't been replaced + // with another file. Thus, xz will remove foo even though it no + // longer is the same file that xz used when it started compressing. + // Probably it's not too bad though, so this doesn't need a more + // complex fix. + const int stat_ret = opt_force + ? stat(name, &new_st) : lstat(name, &new_st); + + if (stat_ret +# ifdef __VMS + // st_ino is an array, and we don't want to + // compare st_dev at all. + || memcmp(&new_st.st_ino, &known_st->st_ino, + sizeof(new_st.st_ino)) != 0 +# else + // Typical POSIX-like system + || new_st.st_dev != known_st->st_dev + || new_st.st_ino != known_st->st_ino +# endif + ) + // TRANSLATORS: When compression or decompression finishes, + // and xz is going to remove the source file, xz first checks + // if the source file still exists, and if it does, does its + // device and inode numbers match what xz saw when it opened + // the source file. If these checks fail, this message is + // shown, %s being the filename, and the file is not deleted. + // The check for device and inode numbers is there, because + // it is possible that the user has put a new file in place + // of the original file, and in that case it obviously + // shouldn't be removed. + message_error(_("%s: File seems to have been moved, " + "not removing"), name); + else +#endif + // There's a race condition between lstat() and unlink() + // but at least we have tried to avoid removing wrong file. + if (unlink(name)) + message_error(_("%s: Cannot remove: %s"), + name, strerror(errno)); + + return; +} + + +/// \brief Copies owner/group and permissions +/// +/// \todo ACL and EA support +/// +static void +io_copy_attrs(const file_pair *pair) +{ + // Skip chown and chmod on Windows. +#ifndef TUKLIB_DOSLIKE + // This function is more tricky than you may think at first. + // Blindly copying permissions may permit users to access the + // destination file who didn't have permission to access the + // source file. + + // Try changing the owner of the file. If we aren't root or the owner + // isn't already us, fchown() probably doesn't succeed. We warn + // about failing fchown() only if we are root. + if (fchown(pair->dest_fd, pair->src_st.st_uid, -1) && warn_fchown) + message_warning(_("%s: Cannot set the file owner: %s"), + pair->dest_name, strerror(errno)); + + mode_t mode; + + if (fchown(pair->dest_fd, -1, pair->src_st.st_gid)) { + message_warning(_("%s: Cannot set the file group: %s"), + pair->dest_name, strerror(errno)); + // We can still safely copy some additional permissions: + // `group' must be at least as strict as `other' and + // also vice versa. + // + // NOTE: After this, the owner of the source file may + // get additional permissions. This shouldn't be too bad, + // because the owner would have had permission to chmod + // the original file anyway. + mode = ((pair->src_st.st_mode & 0070) >> 3) + & (pair->src_st.st_mode & 0007); + mode = (pair->src_st.st_mode & 0700) | (mode << 3) | mode; + } else { + // Drop the setuid, setgid, and sticky bits. + mode = pair->src_st.st_mode & 0777; + } + + if (fchmod(pair->dest_fd, mode)) + message_warning(_("%s: Cannot set the file permissions: %s"), + pair->dest_name, strerror(errno)); +#endif + + // Copy the timestamps. We have several possible ways to do this, of + // which some are better in both security and precision. + // + // First, get the nanosecond part of the timestamps. As of writing, + // it's not standardized by POSIX, and there are several names for + // the same thing in struct stat. + long atime_nsec; + long mtime_nsec; + +# if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC) + // GNU and Solaris + atime_nsec = pair->src_st.st_atim.tv_nsec; + mtime_nsec = pair->src_st.st_mtim.tv_nsec; + +# elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC) + // BSD + atime_nsec = pair->src_st.st_atimespec.tv_nsec; + mtime_nsec = pair->src_st.st_mtimespec.tv_nsec; + +# elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC) + // GNU and BSD without extensions + atime_nsec = pair->src_st.st_atimensec; + mtime_nsec = pair->src_st.st_mtimensec; + +# elif defined(HAVE_STRUCT_STAT_ST_UATIME) + // Tru64 + atime_nsec = pair->src_st.st_uatime * 1000; + mtime_nsec = pair->src_st.st_umtime * 1000; + +# elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC) + // UnixWare + atime_nsec = pair->src_st.st_atim.st__tim.tv_nsec; + mtime_nsec = pair->src_st.st_mtim.st__tim.tv_nsec; + +# else + // Safe fallback + atime_nsec = 0; + mtime_nsec = 0; +# endif + + // Construct a structure to hold the timestamps and call appropriate + // function to set the timestamps. +#if defined(HAVE_FUTIMENS) + // Use nanosecond precision. + struct timespec tv[2]; + tv[0].tv_sec = pair->src_st.st_atime; + tv[0].tv_nsec = atime_nsec; + tv[1].tv_sec = pair->src_st.st_mtime; + tv[1].tv_nsec = mtime_nsec; + + (void)futimens(pair->dest_fd, tv); + +#elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) + // Use microsecond precision. + struct timeval tv[2]; + tv[0].tv_sec = pair->src_st.st_atime; + tv[0].tv_usec = atime_nsec / 1000; + tv[1].tv_sec = pair->src_st.st_mtime; + tv[1].tv_usec = mtime_nsec / 1000; + +# if defined(HAVE_FUTIMES) + (void)futimes(pair->dest_fd, tv); +# elif defined(HAVE_FUTIMESAT) + (void)futimesat(pair->dest_fd, NULL, tv); +# else + // Argh, no function to use a file descriptor to set the timestamp. + (void)utimes(pair->dest_name, tv); +# endif + +#elif defined(HAVE_UTIME) + // Use one-second precision. utime() doesn't support using file + // descriptor either. Some systems have broken utime() prototype + // so don't make this const. + struct utimbuf buf = { + .actime = pair->src_st.st_atime, + .modtime = pair->src_st.st_mtime, + }; + + // Avoid warnings. + (void)atime_nsec; + (void)mtime_nsec; + + (void)utime(pair->dest_name, &buf); +#endif + + return; +} + + +/// Opens the source file. Returns false on success, true on error. +static bool +io_open_src_real(file_pair *pair) +{ + // There's nothing to open when reading from stdin. + if (pair->src_name == stdin_filename) { + pair->src_fd = STDIN_FILENO; +#ifdef TUKLIB_DOSLIKE + setmode(STDIN_FILENO, O_BINARY); +#endif + return false; + } + + // Symlinks are not followed unless writing to stdout or --force + // was used. + const bool follow_symlinks = opt_stdout || opt_force; + + // We accept only regular files if we are writing the output + // to disk too. bzip2 allows overriding this with --force but + // gzip and xz don't. + const bool reg_files_only = !opt_stdout; + + // Flags for open() + int flags = O_RDONLY | O_BINARY | O_NOCTTY; + +#ifndef TUKLIB_DOSLIKE + // If we accept only regular files, we need to be careful to avoid + // problems with special files like devices and FIFOs. O_NONBLOCK + // prevents blocking when opening such files. When we want to accept + // special files, we must not use O_NONBLOCK, or otherwise we won't + // block waiting e.g. FIFOs to become readable. + if (reg_files_only) + flags |= O_NONBLOCK; +#endif + +#if defined(O_NOFOLLOW) + if (!follow_symlinks) + flags |= O_NOFOLLOW; +#elif !defined(TUKLIB_DOSLIKE) + // Some POSIX-like systems lack O_NOFOLLOW (it's not required + // by POSIX). Check for symlinks with a separate lstat() on + // these systems. + if (!follow_symlinks) { + struct stat st; + if (lstat(pair->src_name, &st)) { + message_error("%s: %s", pair->src_name, + strerror(errno)); + return true; + + } else if (S_ISLNK(st.st_mode)) { + message_warning(_("%s: Is a symbolic link, " + "skipping"), pair->src_name); + return true; + } + } +#else + // Avoid warnings. + (void)follow_symlinks; +#endif + + // Try to open the file. If we are accepting non-regular files, + // unblock the caught signals so that open() can be interrupted + // if it blocks e.g. due to a FIFO file. + if (!reg_files_only) + signals_unblock(); + + // Maybe this wouldn't need a loop, since all the signal handlers for + // which we don't use SA_RESTART set user_abort to true. But it + // doesn't hurt to have it just in case. + do { + pair->src_fd = open(pair->src_name, flags); + } while (pair->src_fd == -1 && errno == EINTR && !user_abort); + + if (!reg_files_only) + signals_block(); + + if (pair->src_fd == -1) { + // If we were interrupted, don't display any error message. + if (errno == EINTR) { + // All the signals that don't have SA_RESTART + // set user_abort. + assert(user_abort); + return true; + } + +#ifdef O_NOFOLLOW + // Give an understandable error message in if reason + // for failing was that the file was a symbolic link. + // + // Note that at least Linux, OpenBSD, Solaris, and Darwin + // use ELOOP to indicate if O_NOFOLLOW was the reason + // that open() failed. Because there may be + // directories in the pathname, ELOOP may occur also + // because of a symlink loop in the directory part. + // So ELOOP doesn't tell us what actually went wrong. + // + // FreeBSD associates EMLINK with O_NOFOLLOW and + // Tru64 uses ENOTSUP. We use these directly here + // and skip the lstat() call and the associated race. + // I want to hear if there are other kernels that + // fail with something else than ELOOP with O_NOFOLLOW. + bool was_symlink = false; + +# if defined(__FreeBSD__) || defined(__DragonFly__) + if (errno == EMLINK) + was_symlink = true; + +# elif defined(__digital__) && defined(__unix__) + if (errno == ENOTSUP) + was_symlink = true; + +# elif defined(__NetBSD__) + // FIXME? As of 2008-11-20, NetBSD doesn't document what + // errno is used with O_NOFOLLOW. It seems to be EFTYPE, + // but since it isn't documented, it may be wrong to rely + // on it here. + if (errno == EFTYPE) + was_symlink = true; + +# else + if (errno == ELOOP && !follow_symlinks) { + const int saved_errno = errno; + struct stat st; + if (lstat(pair->src_name, &st) == 0 + && S_ISLNK(st.st_mode)) + was_symlink = true; + + errno = saved_errno; + } +# endif + + if (was_symlink) + message_warning(_("%s: Is a symbolic link, " + "skipping"), pair->src_name); + else +#endif + // Something else than O_NOFOLLOW failing + // (assuming that the race conditions didn't + // confuse us). + message_error("%s: %s", pair->src_name, + strerror(errno)); + + return true; + } + +#ifndef TUKLIB_DOSLIKE + // Drop O_NONBLOCK, which is used only when we are accepting only + // regular files. After the open() call, we want things to block + // instead of giving EAGAIN. + if (reg_files_only) { + flags = fcntl(pair->src_fd, F_GETFL); + if (flags == -1) + goto error_msg; + + flags &= ~O_NONBLOCK; + + if (fcntl(pair->src_fd, F_SETFL, flags)) + goto error_msg; + } +#endif + + // Stat the source file. We need the result also when we copy + // the permissions, and when unlinking. + if (fstat(pair->src_fd, &pair->src_st)) + goto error_msg; + + if (S_ISDIR(pair->src_st.st_mode)) { + message_warning(_("%s: Is a directory, skipping"), + pair->src_name); + goto error; + } + + if (reg_files_only) { + if (!S_ISREG(pair->src_st.st_mode)) { + message_warning(_("%s: Not a regular file, " + "skipping"), pair->src_name); + goto error; + } + + // These are meaningless on Windows. +#ifndef TUKLIB_DOSLIKE + if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) { + // gzip rejects setuid and setgid files even + // when --force was used. bzip2 doesn't check + // for them, but calls fchown() after fchmod(), + // and many systems automatically drop setuid + // and setgid bits there. + // + // We accept setuid and setgid files if + // --force was used. We drop these bits + // explicitly in io_copy_attr(). + message_warning(_("%s: File has setuid or " + "setgid bit set, skipping"), + pair->src_name); + goto error; + } + + if (pair->src_st.st_mode & S_ISVTX) { + message_warning(_("%s: File has sticky bit " + "set, skipping"), + pair->src_name); + goto error; + } + + if (pair->src_st.st_nlink > 1) { + message_warning(_("%s: Input file has more " + "than one hard link, " + "skipping"), pair->src_name); + goto error; + } +#endif + } + + return false; + +error_msg: + message_error("%s: %s", pair->src_name, strerror(errno)); +error: + (void)close(pair->src_fd); + return true; +} + + +extern file_pair * +io_open_src(const char *src_name) +{ + if (is_empty_filename(src_name)) + return NULL; + + // Since we have only one file open at a time, we can use + // a statically allocated structure. + static file_pair pair; + + pair = (file_pair){ + .src_name = src_name, + .dest_name = NULL, + .src_fd = -1, + .dest_fd = -1, + .src_eof = false, + .dest_try_sparse = false, + .dest_pending_sparse = 0, + }; + + // Block the signals, for which we have a custom signal handler, so + // that we don't need to worry about EINTR. + signals_block(); + const bool error = io_open_src_real(&pair); + signals_unblock(); + + return error ? NULL : &pair; +} + + +/// \brief Closes source file of the file_pair structure +/// +/// \param pair File whose src_fd should be closed +/// \param success If true, the file will be removed from the disk if +/// closing succeeds and --keep hasn't been used. +static void +io_close_src(file_pair *pair, bool success) +{ + if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) { +#ifdef TUKLIB_DOSLIKE + (void)close(pair->src_fd); +#endif + + // If we are going to unlink(), do it before closing the file. + // This way there's no risk that someone replaces the file and + // happens to get same inode number, which would make us + // unlink() wrong file. + // + // NOTE: DOS-like systems are an exception to this, because + // they don't allow unlinking files that are open. *sigh* + if (success && !opt_keep_original) + io_unlink(pair->src_name, &pair->src_st); + +#ifndef TUKLIB_DOSLIKE + (void)close(pair->src_fd); +#endif + } + + return; +} + + +static bool +io_open_dest_real(file_pair *pair) +{ + if (opt_stdout || pair->src_fd == STDIN_FILENO) { + // We don't modify or free() this. + pair->dest_name = (char *)"(stdout)"; + pair->dest_fd = STDOUT_FILENO; +#ifdef TUKLIB_DOSLIKE + setmode(STDOUT_FILENO, O_BINARY); +#endif + } else { + pair->dest_name = suffix_get_dest_name(pair->src_name); + if (pair->dest_name == NULL) + return true; + + // If --force was used, unlink the target file first. + if (opt_force && unlink(pair->dest_name) && errno != ENOENT) { + message_error(_("%s: Cannot remove: %s"), + pair->dest_name, strerror(errno)); + free(pair->dest_name); + return true; + } + + // Open the file. + const int flags = O_WRONLY | O_BINARY | O_NOCTTY + | O_CREAT | O_EXCL; + const mode_t mode = S_IRUSR | S_IWUSR; + pair->dest_fd = open(pair->dest_name, flags, mode); + + if (pair->dest_fd == -1) { + message_error("%s: %s", pair->dest_name, + strerror(errno)); + free(pair->dest_name); + return true; + } + } + + // If this really fails... well, we have a safe fallback. + if (fstat(pair->dest_fd, &pair->dest_st)) { +#if defined(__VMS) + pair->dest_st.st_ino[0] = 0; + pair->dest_st.st_ino[1] = 0; + pair->dest_st.st_ino[2] = 0; +#elif !defined(TUKLIB_DOSLIKE) + pair->dest_st.st_dev = 0; + pair->dest_st.st_ino = 0; +#endif +#ifndef TUKLIB_DOSLIKE + } else if (try_sparse && opt_mode == MODE_DECOMPRESS) { + // When writing to standard output, we need to be extra + // careful: + // - It may be connected to something else than + // a regular file. + // - We aren't necessarily writing to a new empty file + // or to the end of an existing file. + // - O_APPEND may be active. + // + // TODO: I'm keeping this disabled for DOS-like systems + // for now. FAT doesn't support sparse files, but NTFS + // does, so maybe this should be enabled on Windows after + // some testing. + if (pair->dest_fd == STDOUT_FILENO) { + if (!S_ISREG(pair->dest_st.st_mode)) + return false; + + const int flags = fcntl(STDOUT_FILENO, F_GETFL); + if (flags == -1) + return false; + + if (flags & O_APPEND) { + // Creating a sparse file is not possible + // when O_APPEND is active (it's used by + // shell's >> redirection). As I understand + // it, it is safe to temporarily disable + // O_APPEND in xz, because if someone + // happened to write to the same file at the + // same time, results would be bad anyway + // (users shouldn't assume that xz uses any + // specific block size when writing data). + // + // The write position may be something else + // than the end of the file, so we must fix + // it to start writing at the end of the file + // to imitate O_APPEND. + if (lseek(STDOUT_FILENO, 0, SEEK_END) == -1) + return false; + + if (fcntl(STDOUT_FILENO, F_SETFL, + stdout_flags & ~O_APPEND)) + return false; + + // Remember the flags so that io_close_dest() + // can restore them. + stdout_flags = flags; + + } else if (lseek(STDOUT_FILENO, 0, SEEK_CUR) + != pair->dest_st.st_size) { + // Writing won't start exactly at the end + // of the file. We cannot use sparse output, + // because it would probably corrupt the file. + return false; + } + } + + pair->dest_try_sparse = true; +#endif + } + + return false; +} + + +extern bool +io_open_dest(file_pair *pair) +{ + signals_block(); + const bool ret = io_open_dest_real(pair); + signals_unblock(); + return ret; +} + + +/// \brief Closes destination file of the file_pair structure +/// +/// \param pair File whose dest_fd should be closed +/// \param success If false, the file will be removed from the disk. +/// +/// \return Zero if closing succeeds. On error, -1 is returned and +/// error message printed. +static bool +io_close_dest(file_pair *pair, bool success) +{ +#ifndef TUKLIB_DOSLIKE + // If io_open_dest() has disabled O_APPEND, restore it here. + if (stdout_flags != 0) { + assert(pair->dest_fd == STDOUT_FILENO); + + const int fail = fcntl(STDOUT_FILENO, F_SETFL, stdout_flags); + stdout_flags = 0; + + if (fail) { + message_error(_("Error restoring the O_APPEND flag " + "to standard output: %s"), + strerror(errno)); + return true; + } + } +#endif + + if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO) + return false; + + if (close(pair->dest_fd)) { + message_error(_("%s: Closing the file failed: %s"), + pair->dest_name, strerror(errno)); + + // Closing destination file failed, so we cannot trust its + // contents. Get rid of junk: + io_unlink(pair->dest_name, &pair->dest_st); + free(pair->dest_name); + return true; + } + + // If the operation using this file wasn't successful, we git rid + // of the junk file. + if (!success) + io_unlink(pair->dest_name, &pair->dest_st); + + free(pair->dest_name); + + return false; +} + + +extern void +io_close(file_pair *pair, bool success) +{ + // Take care of sparseness at the end of the output file. + if (success && pair->dest_try_sparse + && pair->dest_pending_sparse > 0) { + // Seek forward one byte less than the size of the pending + // hole, then write one zero-byte. This way the file grows + // to its correct size. An alternative would be to use + // ftruncate() but that isn't portable enough (e.g. it + // doesn't work with FAT on Linux; FAT isn't that important + // since it doesn't support sparse files anyway, but we don't + // want to create corrupt files on it). + if (lseek(pair->dest_fd, pair->dest_pending_sparse - 1, + SEEK_CUR) == -1) { + message_error(_("%s: Seeking failed when trying " + "to create a sparse file: %s"), + pair->dest_name, strerror(errno)); + success = false; + } else { + const uint8_t zero[1] = { '\0' }; + if (io_write_buf(pair, zero, 1)) + success = false; + } + } + + signals_block(); + + // Copy the file attributes. We need to skip this if destination + // file isn't open or it is standard output. + if (success && pair->dest_fd != -1 && pair->dest_fd != STDOUT_FILENO) + io_copy_attrs(pair); + + // Close the destination first. If it fails, we must not remove + // the source file! + if (io_close_dest(pair, success)) + success = false; + + // Close the source file, and unlink it if the operation using this + // file pair was successful and we haven't requested to keep the + // source file. + io_close_src(pair, success); + + signals_unblock(); + + return; +} + + +extern size_t +io_read(file_pair *pair, io_buf *buf_union, size_t size) +{ + // We use small buffers here. + assert(size < SSIZE_MAX); + + uint8_t *buf = buf_union->u8; + size_t left = size; + + while (left > 0) { + const ssize_t amount = read(pair->src_fd, buf, left); + + if (amount == 0) { + pair->src_eof = true; + break; + } + + if (amount == -1) { + if (errno == EINTR) { + if (user_abort) + return SIZE_MAX; + + continue; + } + + message_error(_("%s: Read error: %s"), + pair->src_name, strerror(errno)); + + // FIXME Is this needed? + pair->src_eof = true; + + return SIZE_MAX; + } + + buf += (size_t)(amount); + left -= (size_t)(amount); + } + + return size - left; +} + + +extern bool +io_pread(file_pair *pair, io_buf *buf, size_t size, off_t pos) +{ + // Using lseek() and read() is more portable than pread() and + // for us it is as good as real pread(). + if (lseek(pair->src_fd, pos, SEEK_SET) != pos) { + message_error(_("%s: Error seeking the file: %s"), + pair->src_name, strerror(errno)); + return true; + } + + const size_t amount = io_read(pair, buf, size); + if (amount == SIZE_MAX) + return true; + + if (amount != size) { + message_error(_("%s: Unexpected end of file"), + pair->src_name); + return true; + } + + return false; +} + + +static bool +is_sparse(const io_buf *buf) +{ + assert(IO_BUFFER_SIZE % sizeof(uint64_t) == 0); + + for (size_t i = 0; i < ARRAY_SIZE(buf->u64); ++i) + if (buf->u64[i] != 0) + return false; + + return true; +} + + +static bool +io_write_buf(file_pair *pair, const uint8_t *buf, size_t size) +{ + assert(size < SSIZE_MAX); + + while (size > 0) { + const ssize_t amount = write(pair->dest_fd, buf, size); + if (amount == -1) { + if (errno == EINTR) { + if (user_abort) + return -1; + + continue; + } + + // Handle broken pipe specially. gzip and bzip2 + // don't print anything on SIGPIPE. In addition, + // gzip --quiet uses exit status 2 (warning) on + // broken pipe instead of whatever raise(SIGPIPE) + // would make it return. It is there to hide "Broken + // pipe" message on some old shells (probably old + // GNU bash). + // + // We don't do anything special with --quiet, which + // is what bzip2 does too. If we get SIGPIPE, we + // will handle it like other signals by setting + // user_abort, and get EPIPE here. + if (errno != EPIPE) + message_error(_("%s: Write error: %s"), + pair->dest_name, strerror(errno)); + + return true; + } + + buf += (size_t)(amount); + size -= (size_t)(amount); + } + + return false; +} + + +extern bool +io_write(file_pair *pair, const io_buf *buf, size_t size) +{ + assert(size <= IO_BUFFER_SIZE); + + if (pair->dest_try_sparse) { + // Check if the block is sparse (contains only zeros). If it + // sparse, we just store the amount and return. We will take + // care of actually skipping over the hole when we hit the + // next data block or close the file. + // + // Since io_close() requires that dest_pending_sparse > 0 + // if the file ends with sparse block, we must also return + // if size == 0 to avoid doing the lseek(). + if (size == IO_BUFFER_SIZE) { + if (is_sparse(buf)) { + pair->dest_pending_sparse += size; + return false; + } + } else if (size == 0) { + return false; + } + + // This is not a sparse block. If we have a pending hole, + // skip it now. + if (pair->dest_pending_sparse > 0) { + if (lseek(pair->dest_fd, pair->dest_pending_sparse, + SEEK_CUR) == -1) { + message_error(_("%s: Seeking failed when " + "trying to create a sparse " + "file: %s"), pair->dest_name, + strerror(errno)); + return true; + } + + pair->dest_pending_sparse = 0; + } + } + + return io_write_buf(pair, buf->u8, size); +} diff --git a/src/xz/file_io.h b/src/xz/file_io.h new file mode 100644 index 000000000000..967da868b079 --- /dev/null +++ b/src/xz/file_io.h @@ -0,0 +1,129 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file file_io.h +/// \brief I/O types and functions +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +// Some systems have suboptimal BUFSIZ. Use a bit bigger value on them. +// We also need that IO_BUFFER_SIZE is a multiple of 8 (sizeof(uint64_t)) +#if BUFSIZ <= 1024 +# define IO_BUFFER_SIZE 8192 +#else +# define IO_BUFFER_SIZE (BUFSIZ & ~7U) +#endif + + +/// is_sparse() accesses the buffer as uint64_t for maximum speed. +/// Use an union to make sure that the buffer is properly aligned. +typedef union { + uint8_t u8[IO_BUFFER_SIZE]; + uint32_t u32[IO_BUFFER_SIZE / sizeof(uint32_t)]; + uint64_t u64[IO_BUFFER_SIZE / sizeof(uint64_t)]; +} io_buf; + + +typedef struct { + /// Name of the source filename (as given on the command line) or + /// pointer to static "(stdin)" when reading from standard input. + const char *src_name; + + /// Destination filename converted from src_name or pointer to static + /// "(stdout)" when writing to standard output. + char *dest_name; + + /// File descriptor of the source file + int src_fd; + + /// File descriptor of the target file + int dest_fd; + + /// True once end of the source file has been detected. + bool src_eof; + + /// If true, we look for long chunks of zeros and try to create + /// a sparse file. + bool dest_try_sparse; + + /// This is used only if dest_try_sparse is true. This holds the + /// number of zero bytes we haven't written out, because we plan + /// to make that byte range a sparse chunk. + off_t dest_pending_sparse; + + /// Stat of the source file. + struct stat src_st; + + /// Stat of the destination file. + struct stat dest_st; + +} file_pair; + + +/// \brief Initialize the I/O module +extern void io_init(void); + + +/// \brief Disable creation of sparse files when decompressing +extern void io_no_sparse(void); + + +/// \brief Open the source file +extern file_pair *io_open_src(const char *src_name); + + +/// \brief Open the destination file +extern bool io_open_dest(file_pair *pair); + + +/// \brief Closes the file descriptors and frees possible allocated memory +/// +/// The success argument determines if source or destination file gets +/// unlinked: +/// - false: The destination file is unlinked. +/// - true: The source file is unlinked unless writing to stdout or --keep +/// was used. +extern void io_close(file_pair *pair, bool success); + + +/// \brief Reads from the source file to a buffer +/// +/// \param pair File pair having the source file open for reading +/// \param buf Destination buffer to hold the read data +/// \param size Size of the buffer; assumed be smaller than SSIZE_MAX +/// +/// \return On success, number of bytes read is returned. On end of +/// file zero is returned and pair->src_eof set to true. +/// On error, SIZE_MAX is returned and error message printed. +extern size_t io_read(file_pair *pair, io_buf *buf, size_t size); + + +/// \brief Read from source file from given offset to a buffer +/// +/// This is remotely similar to standard pread(). This uses lseek() though, +/// so the read offset is changed on each call. +/// +/// \param pair Seekable source file +/// \param buf Destination buffer +/// \param size Amount of data to read +/// \param pos Offset relative to the beginning of the file, +/// from which the data should be read. +/// +/// \return On success, false is returned. On error, error message +/// is printed and true is returned. +extern bool io_pread(file_pair *pair, io_buf *buf, size_t size, off_t pos); + + +/// \brief Writes a buffer to the destination file +/// +/// \param pair File pair having the destination file open for writing +/// \param buf Buffer containing the data to be written +/// \param size Size of the buffer; assumed be smaller than SSIZE_MAX +/// +/// \return On success, zero is returned. On error, -1 is returned +/// and error message printed. +extern bool io_write(file_pair *pair, const io_buf *buf, size_t size); diff --git a/src/xz/hardware.c b/src/xz/hardware.c new file mode 100644 index 000000000000..74742fcec870 --- /dev/null +++ b/src/xz/hardware.c @@ -0,0 +1,112 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file hardware.c +/// \brief Detection of available hardware resources +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" +#include "tuklib_cpucores.h" + + +/// Maximum number of free *coder* threads. This can be set with +/// the --threads=NUM command line option. +static uint32_t threadlimit; + +/// Memory usage limit +static uint64_t memlimit; + +/// Total amount of physical RAM +static uint64_t total_ram; + + +extern void +hardware_threadlimit_set(uint32_t new_threadlimit) +{ + if (new_threadlimit == 0) { + // The default is the number of available CPU cores. + threadlimit = tuklib_cpucores(); + if (threadlimit == 0) + threadlimit = 1; + } else { + threadlimit = new_threadlimit; + } + + return; +} + + +extern uint32_t +hardware_threadlimit_get(void) +{ + return threadlimit; +} + + +extern void +hardware_memlimit_set(uint64_t new_memlimit) +{ + if (new_memlimit != 0) { + memlimit = new_memlimit; + } else { + // The default depends on the amount of RAM but so that + // on "low-memory" systems the relative limit is higher + // to make it more likely that files created with "xz -9" + // will still decompress without overriding the limit + // manually. + // + // If 40 % of RAM is 80 MiB or more, use 40 % of RAM as + // the limit. + memlimit = 40 * total_ram / 100; + if (memlimit < UINT64_C(80) * 1024 * 1024) { + // If 80 % of RAM is less than 80 MiB, + // use 80 % of RAM as the limit. + memlimit = 80 * total_ram / 100; + if (memlimit > UINT64_C(80) * 1024 * 1024) { + // Otherwise use 80 MiB as the limit. + memlimit = UINT64_C(80) * 1024 * 1024; + } + } + } + + return; +} + + +extern void +hardware_memlimit_set_percentage(uint32_t percentage) +{ + assert(percentage > 0); + assert(percentage <= 100); + + memlimit = percentage * total_ram / 100; + return; +} + + +extern uint64_t +hardware_memlimit_get(void) +{ + return memlimit; +} + + +extern void +hardware_init(void) +{ + // Get the amount of RAM. If we cannot determine it, + // use the assumption defined by the configure script. + total_ram = lzma_physmem(); + if (total_ram == 0) + total_ram = (uint64_t)(ASSUME_RAM) * 1024 * 1024; + + // Set the defaults. + hardware_memlimit_set(0); + hardware_threadlimit_set(0); + return; +} diff --git a/src/xz/hardware.h b/src/xz/hardware.h new file mode 100644 index 000000000000..b2cf34cbc8cd --- /dev/null +++ b/src/xz/hardware.h @@ -0,0 +1,35 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file hardware.h +/// \brief Detection of available hardware resources +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +/// Initialize some hardware-specific variables, which are needed by other +/// hardware_* functions. +extern void hardware_init(void); + + +/// Set custom value for maximum number of coder threads. +extern void hardware_threadlimit_set(uint32_t threadlimit); + +/// Get the maximum number of coder threads. Some additional helper threads +/// are allowed on top of this). +extern uint32_t hardware_threadlimit_get(void); + + +/// Set custom memory usage limit. This is used for both encoding and +/// decoding. Zero indicates resetting the limit back to defaults. +extern void hardware_memlimit_set(uint64_t memlimit); + +/// Set custom memory usage limit as a percentage of installed RAM. +/// The percentage must be in the range [1, 100]. +extern void hardware_memlimit_set_percentage(uint32_t percentage); + +/// Get the current memory usage limit. +extern uint64_t hardware_memlimit_get(void); diff --git a/src/xz/list.c b/src/xz/list.c new file mode 100644 index 000000000000..91707b918b47 --- /dev/null +++ b/src/xz/list.c @@ -0,0 +1,742 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file list.c +/// \brief Listing information about .xz files +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" +#include "tuklib_integer.h" + + +/// Totals that are displayed if there was more than one file. +/// The "files" counter is also used in print_info_adv() to show +/// the file number. +static struct { + uint64_t files; + uint64_t streams; + uint64_t blocks; + uint64_t compressed_size; + uint64_t uncompressed_size; + uint32_t checks; +} totals = { 0, 0, 0, 0, 0, 0 }; + + +/// \brief Parse the Index(es) from the given .xz file +/// +/// \param idx If decoding is successful, *idx will be set to point +/// to lzma_index containing the decoded information. +/// On error, *idx is not modified. +/// \param pair Input file +/// +/// \return On success, false is returned. On error, true is returned. +/// +// TODO: This function is pretty big. liblzma should have a function that +// takes a callback function to parse the Index(es) from a .xz file to make +// it easy for applications. +static bool +parse_indexes(lzma_index **idx, file_pair *pair) +{ + if (pair->src_st.st_size <= 0) { + message_error(_("%s: File is empty"), pair->src_name); + return true; + } + + if (pair->src_st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) { + message_error(_("%s: Too small to be a valid .xz file"), + pair->src_name); + return true; + } + + io_buf buf; + lzma_stream_flags header_flags; + lzma_stream_flags footer_flags; + lzma_ret ret; + + // lzma_stream for the Index decoder + lzma_stream strm = LZMA_STREAM_INIT; + + // All Indexes decoded so far + lzma_index *combined_index = NULL; + + // The Index currently being decoded + lzma_index *this_index = NULL; + + // Current position in the file. We parse the file backwards so + // initialize it to point to the end of the file. + off_t pos = pair->src_st.st_size; + + // Each loop iteration decodes one Index. + do { + // Check that there is enough data left to contain at least + // the Stream Header and Stream Footer. This check cannot + // fail in the first pass of this loop. + if (pos < 2 * LZMA_STREAM_HEADER_SIZE) { + message_error("%s: %s", pair->src_name, + message_strm(LZMA_DATA_ERROR)); + goto error; + } + + pos -= LZMA_STREAM_HEADER_SIZE; + lzma_vli stream_padding = 0; + + // Locate the Stream Footer. There may be Stream Padding which + // we must skip when reading backwards. + while (true) { + if (pos < LZMA_STREAM_HEADER_SIZE) { + message_error("%s: %s", pair->src_name, + message_strm( + LZMA_DATA_ERROR)); + goto error; + } + + if (io_pread(pair, &buf, + LZMA_STREAM_HEADER_SIZE, pos)) + goto error; + + // Stream Padding is always a multiple of four bytes. + int i = 2; + if (buf.u32[i] != 0) + break; + + // To avoid calling io_pread() for every four bytes + // of Stream Padding, take advantage that we read + // 12 bytes (LZMA_STREAM_HEADER_SIZE) already and + // check them too before calling io_pread() again. + do { + stream_padding += 4; + pos -= 4; + --i; + } while (i >= 0 && buf.u32[i] == 0); + } + + // Decode the Stream Footer. + ret = lzma_stream_footer_decode(&footer_flags, buf.u8); + if (ret != LZMA_OK) { + message_error("%s: %s", pair->src_name, + message_strm(ret)); + goto error; + } + + // Check that the size of the Index field looks sane. + lzma_vli index_size = footer_flags.backward_size; + if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) { + message_error("%s: %s", pair->src_name, + message_strm(LZMA_DATA_ERROR)); + goto error; + } + + // Set pos to the beginning of the Index. + pos -= index_size; + + // See how much memory we can use for decoding this Index. + uint64_t memlimit = hardware_memlimit_get(); + uint64_t memused = 0; + if (combined_index != NULL) { + memused = lzma_index_memused(combined_index); + if (memused > memlimit) + message_bug(); + + memlimit -= memused; + } + + // Decode the Index. + ret = lzma_index_decoder(&strm, &this_index, memlimit); + if (ret != LZMA_OK) { + message_error("%s: %s", pair->src_name, + message_strm(ret)); + goto error; + } + + do { + // Don't give the decoder more input than the + // Index size. + strm.avail_in = MIN(IO_BUFFER_SIZE, index_size); + if (io_pread(pair, &buf, strm.avail_in, pos)) + goto error; + + pos += strm.avail_in; + index_size -= strm.avail_in; + + strm.next_in = buf.u8; + ret = lzma_code(&strm, LZMA_RUN); + + } while (ret == LZMA_OK); + + // If the decoding seems to be successful, check also that + // the Index decoder consumed as much input as indicated + // by the Backward Size field. + if (ret == LZMA_STREAM_END) + if (index_size != 0 || strm.avail_in != 0) + ret = LZMA_DATA_ERROR; + + if (ret != LZMA_STREAM_END) { + // LZMA_BUFFER_ERROR means that the Index decoder + // would have liked more input than what the Index + // size should be according to Stream Footer. + // The message for LZMA_DATA_ERROR makes more + // sense in that case. + if (ret == LZMA_BUF_ERROR) + ret = LZMA_DATA_ERROR; + + message_error("%s: %s", pair->src_name, + message_strm(ret)); + + // If the error was too low memory usage limit, + // show also how much memory would have been needed. + if (ret == LZMA_MEMLIMIT_ERROR) { + uint64_t needed = lzma_memusage(&strm); + if (UINT64_MAX - needed < memused) + needed = UINT64_MAX; + else + needed += memused; + + message_mem_needed(V_ERROR, needed); + } + + goto error; + } + + // Decode the Stream Header and check that its Stream Flags + // match the Stream Footer. + pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE; + if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) { + message_error("%s: %s", pair->src_name, + message_strm(LZMA_DATA_ERROR)); + goto error; + } + + pos -= lzma_index_total_size(this_index); + if (io_pread(pair, &buf, LZMA_STREAM_HEADER_SIZE, pos)) + goto error; + + ret = lzma_stream_header_decode(&header_flags, buf.u8); + if (ret != LZMA_OK) { + message_error("%s: %s", pair->src_name, + message_strm(ret)); + goto error; + } + + ret = lzma_stream_flags_compare(&header_flags, &footer_flags); + if (ret != LZMA_OK) { + message_error("%s: %s", pair->src_name, + message_strm(ret)); + goto error; + } + + // Store the decoded Stream Flags into this_index. This is + // needed so that we can print which Check is used in each + // Stream. + ret = lzma_index_stream_flags(this_index, &footer_flags); + if (ret != LZMA_OK) + message_bug(); + + // Store also the size of the Stream Padding field. It is + // needed to show the offsets of the Streams correctly. + ret = lzma_index_stream_padding(this_index, stream_padding); + if (ret != LZMA_OK) + message_bug(); + + if (combined_index != NULL) { + // Append the earlier decoded Indexes + // after this_index. + ret = lzma_index_cat( + this_index, combined_index, NULL); + if (ret != LZMA_OK) { + message_error("%s: %s", pair->src_name, + message_strm(ret)); + goto error; + } + } + + combined_index = this_index; + this_index = NULL; + + } while (pos > 0); + + lzma_end(&strm); + + // All OK. Make combined_index available to the caller. + *idx = combined_index; + return false; + +error: + // Something went wrong, free the allocated memory. + lzma_end(&strm); + lzma_index_end(combined_index, NULL); + lzma_index_end(this_index, NULL); + return true; +} + + +/// \brief Get the compression ratio +/// +/// This has slightly different format than that is used by in message.c. +static const char * +get_ratio(uint64_t compressed_size, uint64_t uncompressed_size) +{ + if (uncompressed_size == 0) + return "---"; + + const double ratio = (double)(compressed_size) + / (double)(uncompressed_size); + if (ratio > 9.999) + return "---"; + + static char buf[6]; + snprintf(buf, sizeof(buf), "%.3f", ratio); + return buf; +} + + +static const char check_names[LZMA_CHECK_ID_MAX + 1][12] = { + "None", + "CRC32", + "Unknown-2", + "Unknown-3", + "CRC64", + "Unknown-5", + "Unknown-6", + "Unknown-7", + "Unknown-8", + "Unknown-9", + "SHA-256", + "Unknown-11", + "Unknown-12", + "Unknown-13", + "Unknown-14", + "Unknown-15", +}; + + +/// \brief Get a comma-separated list of Check names +/// +/// \param checks Bit mask of Checks to print +/// \param space_after_comma +/// It's better to not use spaces in table-like listings, +/// but in more verbose formats a space after a comma +/// is good for readability. +static const char * +get_check_names(uint32_t checks, bool space_after_comma) +{ + assert(checks != 0); + + static char buf[sizeof(check_names)]; + char *pos = buf; + size_t left = sizeof(buf); + + const char *sep = space_after_comma ? ", " : ","; + bool comma = false; + + for (size_t i = 0; i <= LZMA_CHECK_ID_MAX; ++i) { + if (checks & (UINT32_C(1) << i)) { + my_snprintf(&pos, &left, "%s%s", + comma ? sep : "", check_names[i]); + comma = true; + } + } + + return buf; +} + + +/// \brief Read the Check value from the .xz file and print it +/// +/// Since this requires a seek, listing all Check values for all Blocks can +/// be slow. +/// +/// \param pair Input file +/// \param iter Location of the Block whose Check value should +/// be printed. +/// +/// \return False on success, true on I/O error. +static bool +print_check_value(file_pair *pair, const lzma_index_iter *iter) +{ + // Don't read anything from the file if there is no integrity Check. + if (iter->stream.flags->check == LZMA_CHECK_NONE) { + printf("---"); + return false; + } + + // Locate and read the Check field. + const uint32_t size = lzma_check_size(iter->stream.flags->check); + const off_t offset = iter->block.compressed_file_offset + + iter->block.total_size - size; + io_buf buf; + if (io_pread(pair, &buf, size, offset)) + return true; + + // CRC32 and CRC64 are in little endian. Guess that all the future + // 32-bit and 64-bit Check values are little endian too. It shouldn't + // be a too big problem if this guess is wrong. + if (size == 4) { + printf("%08" PRIx32, conv32le(buf.u32[0])); + } else if (size == 8) { + printf("%016" PRIx64, conv64le(buf.u64[0])); + } else { + for (size_t i = 0; i < size; ++i) + printf("%02x", buf.u8[i]); + } + + return false; +} + + +static void +print_info_basic(const lzma_index *idx, file_pair *pair) +{ + static bool headings_displayed = false; + if (!headings_displayed) { + headings_displayed = true; + // TRANSLATORS: These are column titles. From Strms (Streams) + // to Ratio, the columns are right aligned. Check and Filename + // are left aligned. If you need longer words, it's OK to + // use two lines here. Test with xz --list. + puts(_("Strms Blocks Compressed Uncompressed Ratio " + "Check Filename")); + } + + printf("%5s %7s %11s %11s %5s %-7s %s\n", + uint64_to_str(lzma_index_stream_count(idx), 0), + uint64_to_str(lzma_index_block_count(idx), 1), + uint64_to_nicestr(lzma_index_file_size(idx), + NICESTR_B, NICESTR_TIB, false, 2), + uint64_to_nicestr(lzma_index_uncompressed_size(idx), + NICESTR_B, NICESTR_TIB, false, 3), + get_ratio(lzma_index_file_size(idx), + lzma_index_uncompressed_size(idx)), + get_check_names(lzma_index_checks(idx), false), + pair->src_name); + + return; +} + + +static void +print_adv_helper(uint64_t stream_count, uint64_t block_count, + uint64_t compressed_size, uint64_t uncompressed_size, + uint32_t checks) +{ + printf(_(" Stream count: %s\n"), + uint64_to_str(stream_count, 0)); + printf(_(" Block count: %s\n"), + uint64_to_str(block_count, 0)); + printf(_(" Compressed size: %s\n"), + uint64_to_nicestr(compressed_size, + NICESTR_B, NICESTR_TIB, true, 0)); + printf(_(" Uncompressed size: %s\n"), + uint64_to_nicestr(uncompressed_size, + NICESTR_B, NICESTR_TIB, true, 0)); + printf(_(" Ratio: %s\n"), + get_ratio(compressed_size, uncompressed_size)); + printf(_(" Check: %s\n"), + get_check_names(checks, true)); + return; +} + + +static void +print_info_adv(const lzma_index *idx, file_pair *pair) +{ + // Print the overall information. + print_adv_helper(lzma_index_stream_count(idx), + lzma_index_block_count(idx), + lzma_index_file_size(idx), + lzma_index_uncompressed_size(idx), + lzma_index_checks(idx)); + + // TODO: The rest of this function needs some work. Currently + // the offsets are not printed, which could be useful even when + // printed in a less accurate format. On the other hand, maybe + // this should print the information with exact byte values, + // or maybe there should be at least an option to do that. + // + // We could also display some other info. E.g. it could be useful + // to quickly see how big is the biggest Block (uncompressed size) + // and if all Blocks have Compressed Size and Uncompressed Size + // fields present, which can be used e.g. for multithreaded + // decompression. + + // Avoid printing Stream and Block lists when they wouldn't be useful. + bool show_blocks = false; + if (lzma_index_stream_count(idx) > 1) { + puts(_(" Streams:")); + puts(_(" Number Blocks Compressed " + "Uncompressed Ratio Check")); + + lzma_index_iter iter; + lzma_index_iter_init(&iter, idx); + while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_STREAM)) { + if (iter.stream.block_count > 1) + show_blocks = true; + + printf(" %8s %10s %11s %11s %5s %s\n", + uint64_to_str(iter.stream.number, 0), + uint64_to_str(iter.stream.block_count, 1), + uint64_to_nicestr( + iter.stream.compressed_size, + NICESTR_B, NICESTR_TIB, false, 2), + uint64_to_nicestr( + iter.stream.uncompressed_size, + NICESTR_B, NICESTR_TIB, false, 3), + get_ratio(iter.stream.compressed_size, + iter.stream.uncompressed_size), + check_names[iter.stream.flags->check]); + } + } + + if (show_blocks || lzma_index_block_count(idx) + > lzma_index_stream_count(idx) + || message_verbosity_get() >= V_DEBUG) { + puts(_(" Blocks:")); + // FIXME: Number in Stream/file, which one is better? + puts(_(" Stream Number Compressed " + "Uncompressed Ratio Check")); + + lzma_index_iter iter; + lzma_index_iter_init(&iter, idx); + while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_BLOCK)) { + printf(" %8s %10s %11s %11s %5s %-7s", + uint64_to_str(iter.stream.number, 0), + uint64_to_str(iter.block.number_in_stream, 1), + uint64_to_nicestr(iter.block.total_size, + NICESTR_B, NICESTR_TIB, false, 2), + uint64_to_nicestr( + iter.block.uncompressed_size, + NICESTR_B, NICESTR_TIB, false, 3), + get_ratio(iter.block.total_size, + iter.block.uncompressed_size), + check_names[iter.stream.flags->check]); + + if (message_verbosity_get() >= V_DEBUG) + if (print_check_value(pair, &iter)) + return; + + putchar('\n'); + } + } +} + + +static void +print_info_robot(const lzma_index *idx, file_pair *pair) +{ + printf("file\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 + "\t%s\t%s\t%s\n", + lzma_index_stream_count(idx), + lzma_index_block_count(idx), + lzma_index_file_size(idx), + lzma_index_uncompressed_size(idx), + get_ratio(lzma_index_file_size(idx), + lzma_index_uncompressed_size(idx)), + get_check_names(lzma_index_checks(idx), false), + pair->src_name); + + if (message_verbosity_get() >= V_VERBOSE) { + lzma_index_iter iter; + lzma_index_iter_init(&iter, idx); + + while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_STREAM)) + printf("stream\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 + "\t%" PRIu64 "\t%" PRIu64 + "\t%s\t%" PRIu64 "\t%s\n", + iter.stream.number, + iter.stream.compressed_offset, + iter.stream.uncompressed_offset, + iter.stream.compressed_size, + iter.stream.uncompressed_size, + get_ratio(iter.stream.compressed_size, + iter.stream.uncompressed_size), + iter.stream.padding, + check_names[iter.stream.flags->check]); + + lzma_index_iter_rewind(&iter); + while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_BLOCK)) { + printf("block\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 + "\t%" PRIu64 "\t%" PRIu64 + "\t%" PRIu64 "\t%" PRIu64 "\t%s\t%s", + iter.stream.number, + iter.block.number_in_stream, + iter.block.number_in_file, + iter.block.compressed_file_offset, + iter.block.uncompressed_file_offset, + iter.block.total_size, + iter.block.uncompressed_size, + get_ratio(iter.block.total_size, + iter.block.uncompressed_size), + check_names[iter.stream.flags->check]); + + if (message_verbosity_get() >= V_DEBUG) { + putchar('\t'); + if (print_check_value(pair, &iter)) + return; + } + + putchar('\n'); + } + } + + return; +} + + +static void +update_totals(const lzma_index *idx) +{ + // TODO: Integer overflow checks + ++totals.files; + totals.streams += lzma_index_stream_count(idx); + totals.blocks += lzma_index_block_count(idx); + totals.compressed_size += lzma_index_file_size(idx); + totals.uncompressed_size += lzma_index_uncompressed_size(idx); + totals.checks |= lzma_index_checks(idx); + return; +} + + +static void +print_totals_basic(void) +{ + // Print a separator line. + char line[80]; + memset(line, '-', sizeof(line)); + line[sizeof(line) - 1] = '\0'; + puts(line); + + // Print the totals except the file count, which needs + // special handling. + printf("%5s %7s %11s %11s %5s %-7s ", + uint64_to_str(totals.streams, 0), + uint64_to_str(totals.blocks, 1), + uint64_to_nicestr(totals.compressed_size, + NICESTR_B, NICESTR_TIB, false, 2), + uint64_to_nicestr(totals.uncompressed_size, + NICESTR_B, NICESTR_TIB, false, 3), + get_ratio(totals.compressed_size, + totals.uncompressed_size), + get_check_names(totals.checks, false)); + + // Since we print totals only when there are at least two files, + // the English message will always use "%s files". But some other + // languages need different forms for different plurals so we + // have to translate this string still. + // + // TRANSLATORS: This simply indicates the number of files shown + // by --list even though the format string uses %s. + printf(N_("%s file", "%s files\n", + totals.files <= ULONG_MAX ? totals.files + : (totals.files % 1000000) + 1000000), + uint64_to_str(totals.files, 0)); + + return; +} + + +static void +print_totals_adv(void) +{ + putchar('\n'); + puts(_("Totals:")); + printf(_(" Number of files: %s\n"), + uint64_to_str(totals.files, 0)); + print_adv_helper(totals.streams, totals.blocks, + totals.compressed_size, totals.uncompressed_size, + totals.checks); + + return; +} + + +static void +print_totals_robot(void) +{ + printf("totals\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 + "\t%s\t%s\t%" PRIu64 "\n", + totals.streams, + totals.blocks, + totals.compressed_size, + totals.uncompressed_size, + get_ratio(totals.compressed_size, + totals.uncompressed_size), + get_check_names(totals.checks, false), + totals.files); + + return; +} + + +extern void +list_totals(void) +{ + if (opt_robot) { + // Always print totals in --robot mode. It can be convenient + // in some cases and doesn't complicate usage of the + // single-file case much. + print_totals_robot(); + + } else if (totals.files > 1) { + // For non-robot mode, totals are printed only if there + // is more than one file. + if (message_verbosity_get() <= V_WARNING) + print_totals_basic(); + else + print_totals_adv(); + } + + return; +} + + +extern void +list_file(const char *filename) +{ + if (opt_format != FORMAT_XZ && opt_format != FORMAT_AUTO) + message_fatal(_("--list works only on .xz files " + "(--format=xz or --format=auto)")); + + message_filename(filename); + + if (filename == stdin_filename) { + message_error(_("--list does not support reading from " + "standard input")); + return; + } + + // Unset opt_stdout so that io_open_src() won't accept special files. + // Set opt_force so that io_open_src() will follow symlinks. + opt_stdout = false; + opt_force = true; + file_pair *pair = io_open_src(filename); + if (pair == NULL) + return; + + lzma_index *idx; + if (!parse_indexes(&idx, pair)) { + // Update the totals that are displayed after all + // the individual files have been listed. + update_totals(idx); + + // We have three main modes: + // - --robot, which has submodes if --verbose is specified + // once or twice + // - Normal --list without --verbose + // - --list with one or two --verbose + if (opt_robot) + print_info_robot(idx, pair); + else if (message_verbosity_get() <= V_WARNING) + print_info_basic(idx, pair); + else + print_info_adv(idx, pair); + + lzma_index_end(idx, NULL); + } + + io_close(pair, false); + return; +} diff --git a/src/xz/list.h b/src/xz/list.h new file mode 100644 index 000000000000..a4c6ec7dc429 --- /dev/null +++ b/src/xz/list.h @@ -0,0 +1,18 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file list.h +/// \brief List information about .xz files +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +/// \brief List information about the given .xz file +extern void list_file(const char *filename); + + +/// \brief Show the totals after all files have been listed +extern void list_totals(void); diff --git a/src/xz/main.c b/src/xz/main.c new file mode 100644 index 000000000000..e0905893c89d --- /dev/null +++ b/src/xz/main.c @@ -0,0 +1,272 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file main.c +/// \brief main() +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" +#include + + +/// Exit status to use. This can be changed with set_exit_status(). +static enum exit_status_type exit_status = E_SUCCESS; + +/// True if --no-warn is specified. When this is true, we don't set +/// the exit status to E_WARNING when something worth a warning happens. +static bool no_warn = false; + + +extern void +set_exit_status(enum exit_status_type new_status) +{ + assert(new_status == E_WARNING || new_status == E_ERROR); + + if (exit_status != E_ERROR) + exit_status = new_status; + + return; +} + + +extern void +set_exit_no_warn(void) +{ + no_warn = true; + return; +} + + +static const char * +read_name(const args_info *args) +{ + // FIXME: Maybe we should have some kind of memory usage limit here + // like the tool has for the actual compression and decompression. + // Giving some huge text file with --files0 makes us to read the + // whole file in RAM. + static char *name = NULL; + static size_t size = 256; + + // Allocate the initial buffer. This is never freed, since after it + // is no longer needed, the program exits very soon. It is safe to + // use xmalloc() and xrealloc() in this function, because while + // executing this function, no files are open for writing, and thus + // there's no need to cleanup anything before exiting. + if (name == NULL) + name = xmalloc(size); + + // Write position in name + size_t pos = 0; + + // Read one character at a time into name. + while (!user_abort) { + const int c = fgetc(args->files_file); + + if (ferror(args->files_file)) { + // Take care of EINTR since we have established + // the signal handlers already. + if (errno == EINTR) + continue; + + message_error(_("%s: Error reading filenames: %s"), + args->files_name, strerror(errno)); + return NULL; + } + + if (feof(args->files_file)) { + if (pos != 0) + message_error(_("%s: Unexpected end of input " + "when reading filenames"), + args->files_name); + + return NULL; + } + + if (c == args->files_delim) { + // We allow consecutive newline (--files) or '\0' + // characters (--files0), and ignore such empty + // filenames. + if (pos == 0) + continue; + + // A non-empty name was read. Terminate it with '\0' + // and return it. + name[pos] = '\0'; + return name; + } + + if (c == '\0') { + // A null character was found when using --files, + // which expects plain text input separated with + // newlines. + message_error(_("%s: Null character found when " + "reading filenames; maybe you meant " + "to use `--files0' instead " + "of `--files'?"), args->files_name); + return NULL; + } + + name[pos++] = c; + + // Allocate more memory if needed. There must always be space + // at least for one character to allow terminating the string + // with '\0'. + if (pos == size) { + size *= 2; + name = xrealloc(name, size); + } + } + + return NULL; +} + + +int +main(int argc, char **argv) +{ + // Set up the progname variable. + tuklib_progname_init(argv); + + // Initialize the file I/O. This makes sure that + // stdin, stdout, and stderr are something valid. + io_init(); + + // Set up the locale and message translations. + tuklib_gettext_init(PACKAGE, LOCALEDIR); + + // Initialize handling of error/warning/other messages. + message_init(); + + // Set hardware-dependent default values. These can be overriden + // on the command line, thus this must be done before args_parse(). + hardware_init(); + + // Parse the command line arguments and get an array of filenames. + // This doesn't return if something is wrong with the command line + // arguments. If there are no arguments, one filename ("-") is still + // returned to indicate stdin. + args_info args; + args_parse(&args, argc, argv); + + if (opt_mode != MODE_LIST && opt_robot) + message_fatal(_("Compression and decompression with --robot " + "are not supported yet.")); + + // Tell the message handling code how many input files there are if + // we know it. This way the progress indicator can show it. + if (args.files_name != NULL) + message_set_files(0); + else + message_set_files(args.arg_count); + + // Refuse to write compressed data to standard output if it is + // a terminal. + if (opt_mode == MODE_COMPRESS) { + if (opt_stdout || (args.arg_count == 1 + && strcmp(args.arg_names[0], "-") == 0)) { + if (is_tty_stdout()) { + message_try_help(); + tuklib_exit(E_ERROR, E_ERROR, false); + } + } + } + + // Set up the signal handlers. We don't need these before we + // start the actual action and not in --list mode, so this is + // done after parsing the command line arguments. + // + // It's good to keep signal handlers in normal compression and + // decompression modes even when only writing to stdout, because + // we might need to restore O_APPEND flag on stdout before exiting. + // In --test mode, signal handlers aren't really needed, but let's + // keep them there for consistency with normal decompression. + if (opt_mode != MODE_LIST) + signals_init(); + + // coder_run() handles compression, decompression, and testing. + // list_file() is for --list. + void (*run)(const char *filename) = opt_mode == MODE_LIST + ? &list_file : &coder_run; + + // Process the files given on the command line. Note that if no names + // were given, args_parse() gave us a fake "-" filename. + for (size_t i = 0; i < args.arg_count && !user_abort; ++i) { + if (strcmp("-", args.arg_names[i]) == 0) { + // Processing from stdin to stdout. Check that we + // aren't writing compressed data to a terminal or + // reading it from a terminal. + if (opt_mode == MODE_COMPRESS) { + if (is_tty_stdout()) + continue; + } else if (is_tty_stdin()) { + continue; + } + + // It doesn't make sense to compress data from stdin + // if we are supposed to read filenames from stdin + // too (enabled with --files or --files0). + if (args.files_name == stdin_filename) { + message_error(_("Cannot read data from " + "standard input when " + "reading filenames " + "from standard input")); + continue; + } + + // Replace the "-" with a special pointer, which is + // recognized by coder_run() and other things. + // This way error messages get a proper filename + // string and the code still knows that it is + // handling the special case of stdin. + args.arg_names[i] = (char *)stdin_filename; + } + + // Do the actual compression or decompression. + run(args.arg_names[i]); + } + + // If --files or --files0 was used, process the filenames from the + // given file or stdin. Note that here we don't consider "-" to + // indicate stdin like we do with the command line arguments. + if (args.files_name != NULL) { + // read_name() checks for user_abort so we don't need to + // check it as loop termination condition. + while (true) { + const char *name = read_name(&args); + if (name == NULL) + break; + + // read_name() doesn't return empty names. + assert(name[0] != '\0'); + run(name); + } + + if (args.files_name != stdin_filename) + (void)fclose(args.files_file); + } + + // All files have now been handled. If in --list mode, display + // the totals before exiting. We don't have signal handlers + // enabled in --list mode, so we don't need to check user_abort. + if (opt_mode == MODE_LIST) { + assert(!user_abort); + list_totals(); + } + + // If we have got a signal, raise it to kill the program instead + // of calling tuklib_exit(). + signals_exit(); + + // Suppress the exit status indicating a warning if --no-warn + // was specified. + if (exit_status == E_WARNING && no_warn) + exit_status = E_SUCCESS; + + tuklib_exit(exit_status, E_ERROR, + message_verbosity_get() != V_SILENT); +} diff --git a/src/xz/main.h b/src/xz/main.h new file mode 100644 index 000000000000..323f2f7d09cd --- /dev/null +++ b/src/xz/main.h @@ -0,0 +1,30 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file main.h +/// \brief Miscellaneous declarations +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +/// Possible exit status values. These are the same as used by gzip and bzip2. +enum exit_status_type { + E_SUCCESS = 0, + E_ERROR = 1, + E_WARNING = 2, +}; + + +/// Sets the exit status after a warning or error has occurred. If new_status +/// is E_WARNING and the old exit status was already E_ERROR, the exit +/// status is not changed. +extern void set_exit_status(enum exit_status_type new_status); + + +/// Use E_SUCCESS instead of E_WARNING if something worth a warning occurs +/// but nothing worth an error has occurred. This is called when --no-warn +/// is specified. +extern void set_exit_no_warn(void); diff --git a/src/xz/message.c b/src/xz/message.c new file mode 100644 index 000000000000..f24e98e97d92 --- /dev/null +++ b/src/xz/message.c @@ -0,0 +1,1189 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file message.c +/// \brief Printing messages +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + +#ifdef HAVE_SYS_TIME_H +# include +#endif + +#include + + +/// Number of the current file +static unsigned int files_pos = 0; + +/// Total number of input files; zero if unknown. +static unsigned int files_total; + +/// Verbosity level +static enum message_verbosity verbosity = V_WARNING; + +/// Filename which we will print with the verbose messages +static const char *filename; + +/// True once the a filename has been printed to stderr as part of progress +/// message. If automatic progress updating isn't enabled, this becomes true +/// after the first progress message has been printed due to user sending +/// SIGINFO, SIGUSR1, or SIGALRM. Once this variable is true, we will print +/// an empty line before the next filename to make the output more readable. +static bool first_filename_printed = false; + +/// This is set to true when we have printed the current filename to stderr +/// as part of a progress message. This variable is useful only if not +/// updating progress automatically: if user sends many SIGINFO, SIGUSR1, or +/// SIGALRM signals, we won't print the name of the same file multiple times. +static bool current_filename_printed = false; + +/// True if we should print progress indicator and update it automatically +/// if also verbose >= V_VERBOSE. +static bool progress_automatic; + +/// True if message_progress_start() has been called but +/// message_progress_end() hasn't been called yet. +static bool progress_started = false; + +/// This is true when a progress message was printed and the cursor is still +/// on the same line with the progress message. In that case, a newline has +/// to be printed before any error messages. +static bool progress_active = false; + +/// Pointer to lzma_stream used to do the encoding or decoding. +static lzma_stream *progress_strm; + +/// Expected size of the input stream is needed to show completion percentage +/// and estimate remaining time. +static uint64_t expected_in_size; + +/// Time when we started processing the file +static uint64_t start_time; + + +// Use alarm() and SIGALRM when they are supported. This has two minor +// advantages over the alternative of polling gettimeofday(): +// - It is possible for the user to send SIGINFO, SIGUSR1, or SIGALRM to +// get intermediate progress information even when --verbose wasn't used +// or stderr is not a terminal. +// - alarm() + SIGALRM seems to have slightly less overhead than polling +// gettimeofday(). +#ifdef SIGALRM + +/// The signal handler for SIGALRM sets this to true. It is set back to false +/// once the progress message has been updated. +static volatile sig_atomic_t progress_needs_updating = false; + +/// Signal handler for SIGALRM +static void +progress_signal_handler(int sig lzma_attribute((unused))) +{ + progress_needs_updating = true; + return; +} + +#else + +/// This is true when progress message printing is wanted. Using the same +/// variable name as above to avoid some ifdefs. +static bool progress_needs_updating = false; + +/// Elapsed time when the next progress message update should be done. +static uint64_t progress_next_update; + +#endif + + +/// Get the current time as microseconds since epoch +static uint64_t +my_time(void) +{ + struct timeval tv; + gettimeofday(&tv, NULL); + return (uint64_t)(tv.tv_sec) * UINT64_C(1000000) + tv.tv_usec; +} + + +extern void +message_init(void) +{ + // If --verbose is used, we use a progress indicator if and only + // if stderr is a terminal. If stderr is not a terminal, we print + // verbose information only after finishing the file. As a special + // exception, even if --verbose was not used, user can send SIGALRM + // to make us print progress information once without automatic + // updating. + progress_automatic = isatty(STDERR_FILENO); + + // Commented out because COLUMNS is rarely exported to environment. + // Most users have at least 80 columns anyway, let's think something + // fancy here if enough people complain. +/* + if (progress_automatic) { + // stderr is a terminal. Check the COLUMNS environment + // variable to see if the terminal is wide enough. If COLUMNS + // doesn't exist or it has some unparsable value, we assume + // that the terminal is wide enough. + const char *columns_str = getenv("COLUMNS"); + if (columns_str != NULL) { + char *endptr; + const long columns = strtol(columns_str, &endptr, 10); + if (*endptr != '\0' || columns < 80) + progress_automatic = false; + } + } +*/ + +#ifdef SIGALRM + // At least DJGPP lacks SA_RESTART. It's not essential for us (the + // rest of the code can handle interrupted system calls), so just + // define it zero. +# ifndef SA_RESTART +# define SA_RESTART 0 +# endif + // Establish the signal handlers which set a flag to tell us that + // progress info should be updated. Since these signals don't + // require any quick action, we set SA_RESTART. + static const int sigs[] = { +#ifdef SIGALRM + SIGALRM, +#endif +#ifdef SIGINFO + SIGINFO, +#endif +#ifdef SIGUSR1 + SIGUSR1, +#endif + }; + + struct sigaction sa; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_RESTART; + sa.sa_handler = &progress_signal_handler; + + for (size_t i = 0; i < ARRAY_SIZE(sigs); ++i) + if (sigaction(sigs[i], &sa, NULL)) + message_signal_handler(); +#endif + + return; +} + + +extern void +message_verbosity_increase(void) +{ + if (verbosity < V_DEBUG) + ++verbosity; + + return; +} + + +extern void +message_verbosity_decrease(void) +{ + if (verbosity > V_SILENT) + --verbosity; + + return; +} + + +extern enum message_verbosity +message_verbosity_get(void) +{ + return verbosity; +} + + +extern void +message_set_files(unsigned int files) +{ + files_total = files; + return; +} + + +/// Prints the name of the current file if it hasn't been printed already, +/// except if we are processing exactly one stream from stdin to stdout. +/// I think it looks nicer to not print "(stdin)" when --verbose is used +/// in a pipe and no other files are processed. +static void +print_filename(void) +{ + if (files_total != 1 || filename != stdin_filename) { + signals_block(); + + FILE *file = opt_mode == MODE_LIST ? stdout : stderr; + + // If a file was already processed, put an empty line + // before the next filename to improve readability. + if (first_filename_printed) + fputc('\n', file); + + first_filename_printed = true; + current_filename_printed = true; + + // If we don't know how many files there will be due + // to usage of --files or --files0. + if (files_total == 0) + fprintf(file, "%s (%u)\n", filename, + files_pos); + else + fprintf(file, "%s (%u/%u)\n", filename, + files_pos, files_total); + + signals_unblock(); + } + + return; +} + + +extern void +message_filename(const char *src_name) +{ + // Start numbering the files starting from one. + ++files_pos; + filename = src_name; + + if (verbosity >= V_VERBOSE + && (progress_automatic || opt_mode == MODE_LIST)) + print_filename(); + else + current_filename_printed = false; + + return; +} + + +extern void +message_progress_start(lzma_stream *strm, uint64_t in_size) +{ + // Store the pointer to the lzma_stream used to do the coding. + // It is needed to find out the position in the stream. + progress_strm = strm; + + // Store the processing start time of the file and its expected size. + // If we aren't printing any statistics, then these are unused. But + // since it is possible that the user sends us a signal to show + // statistics, we need to have these available anyway. + start_time = my_time(); + expected_in_size = in_size; + + // Indicate that progress info may need to be printed before + // printing error messages. + progress_started = true; + + // If progress indicator is wanted, print the filename and possibly + // the file count now. + if (verbosity >= V_VERBOSE && progress_automatic) { + // Start the timer to display the first progress message + // after one second. An alternative would be to show the + // first message almost immediately, but delaying by one + // second looks better to me, since extremely early + // progress info is pretty much useless. +#ifdef SIGALRM + // First disable a possibly existing alarm. + alarm(0); + progress_needs_updating = false; + alarm(1); +#else + progress_needs_updating = true; + progress_next_update = 1000000; +#endif + } + + return; +} + + +/// Make the string indicating completion percentage. +static const char * +progress_percentage(uint64_t in_pos) +{ + // If the size of the input file is unknown or the size told us is + // clearly wrong since we have processed more data than the alleged + // size of the file, show a static string indicating that we have + // no idea of the completion percentage. + if (expected_in_size == 0 || in_pos > expected_in_size) + return "--- %"; + + // Never show 100.0 % before we actually are finished. + double percentage = (double)(in_pos) / (double)(expected_in_size) + * 99.9; + + static char buf[sizeof("99.9 %")]; + snprintf(buf, sizeof(buf), "%.1f %%", percentage); + + return buf; +} + + +/// Make the string containing the amount of input processed, amount of +/// output produced, and the compression ratio. +static const char * +progress_sizes(uint64_t compressed_pos, uint64_t uncompressed_pos, bool final) +{ + // This is enough to hold sizes up to about 99 TiB if thousand + // separator is used, or about 1 PiB without thousand separator. + // After that the progress indicator will look a bit silly, since + // the compression ratio no longer fits with three decimal places. + static char buf[36]; + + char *pos = buf; + size_t left = sizeof(buf); + + // Print the sizes. If this the final message, use more reasonable + // units than MiB if the file was small. + const enum nicestr_unit unit_min = final ? NICESTR_B : NICESTR_MIB; + my_snprintf(&pos, &left, "%s / %s", + uint64_to_nicestr(compressed_pos, + unit_min, NICESTR_TIB, false, 0), + uint64_to_nicestr(uncompressed_pos, + unit_min, NICESTR_TIB, false, 1)); + + // Avoid division by zero. If we cannot calculate the ratio, set + // it to some nice number greater than 10.0 so that it gets caught + // in the next if-clause. + const double ratio = uncompressed_pos > 0 + ? (double)(compressed_pos) / (double)(uncompressed_pos) + : 16.0; + + // If the ratio is very bad, just indicate that it is greater than + // 9.999. This way the length of the ratio field stays fixed. + if (ratio > 9.999) + snprintf(pos, left, " > %.3f", 9.999); + else + snprintf(pos, left, " = %.3f", ratio); + + return buf; +} + + +/// Make the string containing the processing speed of uncompressed data. +static const char * +progress_speed(uint64_t uncompressed_pos, uint64_t elapsed) +{ + // Don't print the speed immediately, since the early values look + // somewhat random. + if (elapsed < 3000000) + return ""; + + static const char unit[][8] = { + "KiB/s", + "MiB/s", + "GiB/s", + }; + + size_t unit_index = 0; + + // Calculate the speed as KiB/s. + double speed = (double)(uncompressed_pos) + / ((double)(elapsed) * (1024.0 / 1e6)); + + // Adjust the unit of the speed if needed. + while (speed > 999.0) { + speed /= 1024.0; + if (++unit_index == ARRAY_SIZE(unit)) + return ""; // Way too fast ;-) + } + + // Use decimal point only if the number is small. Examples: + // - 0.1 KiB/s + // - 9.9 KiB/s + // - 99 KiB/s + // - 999 KiB/s + static char buf[sizeof("999 GiB/s")]; + snprintf(buf, sizeof(buf), "%.*f %s", + speed > 9.9 ? 0 : 1, speed, unit[unit_index]); + return buf; +} + + +/// Make a string indicating elapsed or remaining time. The format is either +/// M:SS or H:MM:SS depending on if the time is an hour or more. +static const char * +progress_time(uint64_t useconds) +{ + // 9999 hours = 416 days + static char buf[sizeof("9999:59:59")]; + + uint32_t seconds = useconds / 1000000; + + // Don't show anything if the time is zero or ridiculously big. + if (seconds == 0 || seconds > ((9999 * 60) + 59) * 60 + 59) + return ""; + + uint32_t minutes = seconds / 60; + seconds %= 60; + + if (minutes >= 60) { + const uint32_t hours = minutes / 60; + minutes %= 60; + snprintf(buf, sizeof(buf), + "%" PRIu32 ":%02" PRIu32 ":%02" PRIu32, + hours, minutes, seconds); + } else { + snprintf(buf, sizeof(buf), "%" PRIu32 ":%02" PRIu32, + minutes, seconds); + } + + return buf; +} + + +/// Return a string containing estimated remaining time when +/// reasonably possible. +static const char * +progress_remaining(uint64_t in_pos, uint64_t elapsed) +{ + // Don't show the estimated remaining time when it wouldn't + // make sense: + // - Input size is unknown. + // - Input has grown bigger since we started (de)compressing. + // - We haven't processed much data yet, so estimate would be + // too inaccurate. + // - Only a few seconds has passed since we started (de)compressing, + // so estimate would be too inaccurate. + if (expected_in_size == 0 || in_pos > expected_in_size + || in_pos < (UINT64_C(1) << 19) || elapsed < 8000000) + return ""; + + // Calculate the estimate. Don't give an estimate of zero seconds, + // since it is possible that all the input has been already passed + // to the library, but there is still quite a bit of output pending. + uint32_t remaining = (double)(expected_in_size - in_pos) + * ((double)(elapsed) / 1e6) / (double)(in_pos); + if (remaining < 1) + remaining = 1; + + static char buf[sizeof("9 h 55 min")]; + + // Select appropriate precision for the estimated remaining time. + if (remaining <= 10) { + // A maximum of 10 seconds remaining. + // Show the number of seconds as is. + snprintf(buf, sizeof(buf), "%" PRIu32 " s", remaining); + + } else if (remaining <= 50) { + // A maximum of 50 seconds remaining. + // Round up to the next multiple of five seconds. + remaining = (remaining + 4) / 5 * 5; + snprintf(buf, sizeof(buf), "%" PRIu32 " s", remaining); + + } else if (remaining <= 590) { + // A maximum of 9 minutes and 50 seconds remaining. + // Round up to the next multiple of ten seconds. + remaining = (remaining + 9) / 10 * 10; + snprintf(buf, sizeof(buf), "%" PRIu32 " min %" PRIu32 " s", + remaining / 60, remaining % 60); + + } else if (remaining <= 59 * 60) { + // A maximum of 59 minutes remaining. + // Round up to the next multiple of a minute. + remaining = (remaining + 59) / 60; + snprintf(buf, sizeof(buf), "%" PRIu32 " min", remaining); + + } else if (remaining <= 9 * 3600 + 50 * 60) { + // A maximum of 9 hours and 50 minutes left. + // Round up to the next multiple of ten minutes. + remaining = (remaining + 599) / 600 * 10; + snprintf(buf, sizeof(buf), "%" PRIu32 " h %" PRIu32 " min", + remaining / 60, remaining % 60); + + } else if (remaining <= 23 * 3600) { + // A maximum of 23 hours remaining. + // Round up to the next multiple of an hour. + remaining = (remaining + 3599) / 3600; + snprintf(buf, sizeof(buf), "%" PRIu32 " h", remaining); + + } else if (remaining <= 9 * 24 * 3600 + 23 * 3600) { + // A maximum of 9 days and 23 hours remaining. + // Round up to the next multiple of an hour. + remaining = (remaining + 3599) / 3600; + snprintf(buf, sizeof(buf), "%" PRIu32 " d %" PRIu32 " h", + remaining / 24, remaining % 24); + + } else if (remaining <= 999 * 24 * 3600) { + // A maximum of 999 days remaining. ;-) + // Round up to the next multiple of a day. + remaining = (remaining + 24 * 3600 - 1) / (24 * 3600); + snprintf(buf, sizeof(buf), "%" PRIu32 " d", remaining); + + } else { + // The estimated remaining time is too big. Don't show it. + return ""; + } + + return buf; +} + + +/// Calculate the elapsed time as microseconds. +static uint64_t +progress_elapsed(void) +{ + return my_time() - start_time; +} + + +/// Get information about position in the stream. This is currently simple, +/// but it will become more complicated once we have multithreading support. +static void +progress_pos(uint64_t *in_pos, + uint64_t *compressed_pos, uint64_t *uncompressed_pos) +{ + *in_pos = progress_strm->total_in; + + if (opt_mode == MODE_COMPRESS) { + *compressed_pos = progress_strm->total_out; + *uncompressed_pos = progress_strm->total_in; + } else { + *compressed_pos = progress_strm->total_in; + *uncompressed_pos = progress_strm->total_out; + } + + return; +} + + +extern void +message_progress_update(void) +{ + if (!progress_needs_updating) + return; + + // Calculate how long we have been processing this file. + const uint64_t elapsed = progress_elapsed(); + +#ifndef SIGALRM + if (progress_next_update > elapsed) + return; + + progress_next_update = elapsed + 1000000; +#endif + + // Get our current position in the stream. + uint64_t in_pos; + uint64_t compressed_pos; + uint64_t uncompressed_pos; + progress_pos(&in_pos, &compressed_pos, &uncompressed_pos); + + // Block signals so that fprintf() doesn't get interrupted. + signals_block(); + + // Print the filename if it hasn't been printed yet. + if (!current_filename_printed) + print_filename(); + + // Print the actual progress message. The idea is that there is at + // least three spaces between the fields in typical situations, but + // even in rare situations there is at least one space. + fprintf(stderr, "\r %6s %35s %9s %10s %10s\r", + progress_percentage(in_pos), + progress_sizes(compressed_pos, uncompressed_pos, false), + progress_speed(uncompressed_pos, elapsed), + progress_time(elapsed), + progress_remaining(in_pos, elapsed)); + +#ifdef SIGALRM + // Updating the progress info was finished. Reset + // progress_needs_updating to wait for the next SIGALRM. + // + // NOTE: This has to be done before alarm(1) or with (very) bad + // luck we could be setting this to false after the alarm has already + // been triggered. + progress_needs_updating = false; + + if (verbosity >= V_VERBOSE && progress_automatic) { + // Mark that the progress indicator is active, so if an error + // occurs, the error message gets printed cleanly. + progress_active = true; + + // Restart the timer so that progress_needs_updating gets + // set to true after about one second. + alarm(1); + } else { + // The progress message was printed because user had sent us + // SIGALRM. In this case, each progress message is printed + // on its own line. + fputc('\n', stderr); + } +#else + // When SIGALRM isn't supported and we get here, it's always due to + // automatic progress update. We set progress_active here too like + // described above. + assert(verbosity >= V_VERBOSE); + assert(progress_automatic); + progress_active = true; +#endif + + signals_unblock(); + + return; +} + + +static void +progress_flush(bool finished) +{ + if (!progress_started || verbosity < V_VERBOSE) + return; + + uint64_t in_pos; + uint64_t compressed_pos; + uint64_t uncompressed_pos; + progress_pos(&in_pos, &compressed_pos, &uncompressed_pos); + + // Avoid printing intermediate progress info if some error occurs + // in the beginning of the stream. (If something goes wrong later in + // the stream, it is sometimes useful to tell the user where the + // error approximately occurred, especially if the error occurs + // after a time-consuming operation.) + if (!finished && !progress_active + && (compressed_pos == 0 || uncompressed_pos == 0)) + return; + + progress_active = false; + + const uint64_t elapsed = progress_elapsed(); + + signals_block(); + + // When using the auto-updating progress indicator, the final + // statistics are printed in the same format as the progress + // indicator itself. + if (progress_automatic) { + fprintf(stderr, "\r %6s %35s %9s %10s %10s\n", + finished ? "100 %" : progress_percentage(in_pos), + progress_sizes(compressed_pos, uncompressed_pos, true), + progress_speed(uncompressed_pos, elapsed), + progress_time(elapsed), + finished ? "" : progress_remaining(in_pos, elapsed)); + } else { + // The filename is always printed. + fprintf(stderr, "%s: ", filename); + + // Percentage is printed only if we didn't finish yet. + if (!finished) { + // Don't print the percentage when it isn't known + // (starts with a dash). + const char *percentage = progress_percentage(in_pos); + if (percentage[0] != '-') + fprintf(stderr, "%s, ", percentage); + } + + // Size information is always printed. + fprintf(stderr, "%s", progress_sizes( + compressed_pos, uncompressed_pos, true)); + + // The speed and elapsed time aren't always shown. + const char *speed = progress_speed(uncompressed_pos, elapsed); + if (speed[0] != '\0') + fprintf(stderr, ", %s", speed); + + const char *elapsed_str = progress_time(elapsed); + if (elapsed_str[0] != '\0') + fprintf(stderr, ", %s", elapsed_str); + + fputc('\n', stderr); + } + + signals_unblock(); + + return; +} + + +extern void +message_progress_end(bool success) +{ + assert(progress_started); + progress_flush(success); + progress_started = false; + return; +} + + +static void +vmessage(enum message_verbosity v, const char *fmt, va_list ap) +{ + if (v <= verbosity) { + signals_block(); + + progress_flush(false); + + fprintf(stderr, "%s: ", progname); + vfprintf(stderr, fmt, ap); + fputc('\n', stderr); + + signals_unblock(); + } + + return; +} + + +extern void +message(enum message_verbosity v, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vmessage(v, fmt, ap); + va_end(ap); + return; +} + + +extern void +message_warning(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vmessage(V_WARNING, fmt, ap); + va_end(ap); + + set_exit_status(E_WARNING); + return; +} + + +extern void +message_error(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vmessage(V_ERROR, fmt, ap); + va_end(ap); + + set_exit_status(E_ERROR); + return; +} + + +extern void +message_fatal(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vmessage(V_ERROR, fmt, ap); + va_end(ap); + + tuklib_exit(E_ERROR, E_ERROR, false); +} + + +extern void +message_bug(void) +{ + message_fatal(_("Internal error (bug)")); +} + + +extern void +message_signal_handler(void) +{ + message_fatal(_("Cannot establish signal handlers")); +} + + +extern const char * +message_strm(lzma_ret code) +{ + switch (code) { + case LZMA_NO_CHECK: + return _("No integrity check; not verifying file integrity"); + + case LZMA_UNSUPPORTED_CHECK: + return _("Unsupported type of integrity check; " + "not verifying file integrity"); + + case LZMA_MEM_ERROR: + return strerror(ENOMEM); + + case LZMA_MEMLIMIT_ERROR: + return _("Memory usage limit reached"); + + case LZMA_FORMAT_ERROR: + return _("File format not recognized"); + + case LZMA_OPTIONS_ERROR: + return _("Unsupported options"); + + case LZMA_DATA_ERROR: + return _("Compressed data is corrupt"); + + case LZMA_BUF_ERROR: + return _("Unexpected end of input"); + + case LZMA_OK: + case LZMA_STREAM_END: + case LZMA_GET_CHECK: + case LZMA_PROG_ERROR: + return _("Internal error (bug)"); + } + + return NULL; +} + + +extern void +message_mem_needed(enum message_verbosity v, uint64_t memusage) +{ + if (v > verbosity) + return; + + // Convert memusage to MiB, rounding up to the next full MiB. + // This way the user can always use the displayed usage as + // the new memory usage limit. (If we rounded to the nearest, + // the user might need to +1 MiB to get high enough limit.) + memusage = round_up_to_mib(memusage); + + // 2^64 with thousand separators + " MiB" suffix + '\0' = 26 + 4 + 1 + char memlimitstr[32]; + + // Show the memory usage limit as MiB unless it is less than 1 MiB. + // This way it's easy to notice errors where one has typed + // --memory=123 instead of --memory=123MiB. + uint64_t memlimit = hardware_memlimit_get(); + if (memlimit < (UINT32_C(1) << 20)) { + snprintf(memlimitstr, sizeof(memlimitstr), "%s B", + uint64_to_str(memlimit, 1)); + } else { + // Round up just like with memusage. If this function is + // called for informational purposes (to just show the + // current usage and limit), we should never show that + // the usage is higher than the limit, which would give + // a false impression that the memory usage limit isn't + // properly enforced. + snprintf(memlimitstr, sizeof(memlimitstr), "%s MiB", + uint64_to_str(round_up_to_mib(memlimit), 1)); + } + + message(v, _("%s MiB of memory is required. The limit is %s."), + uint64_to_str(memusage, 0), memlimitstr); + + return; +} + + +extern void +message_filters(enum message_verbosity v, const lzma_filter *filters) +{ + if (v > verbosity) + return; + + fprintf(stderr, _("%s: Filter chain:"), progname); + + for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) { + fprintf(stderr, " --"); + + switch (filters[i].id) { + case LZMA_FILTER_LZMA1: + case LZMA_FILTER_LZMA2: { + const lzma_options_lzma *opt = filters[i].options; + const char *mode; + const char *mf; + + switch (opt->mode) { + case LZMA_MODE_FAST: + mode = "fast"; + break; + + case LZMA_MODE_NORMAL: + mode = "normal"; + break; + + default: + mode = "UNKNOWN"; + break; + } + + switch (opt->mf) { + case LZMA_MF_HC3: + mf = "hc3"; + break; + + case LZMA_MF_HC4: + mf = "hc4"; + break; + + case LZMA_MF_BT2: + mf = "bt2"; + break; + + case LZMA_MF_BT3: + mf = "bt3"; + break; + + case LZMA_MF_BT4: + mf = "bt4"; + break; + + default: + mf = "UNKNOWN"; + break; + } + + fprintf(stderr, "lzma%c=dict=%" PRIu32 + ",lc=%" PRIu32 ",lp=%" PRIu32 + ",pb=%" PRIu32 + ",mode=%s,nice=%" PRIu32 ",mf=%s" + ",depth=%" PRIu32, + filters[i].id == LZMA_FILTER_LZMA2 + ? '2' : '1', + opt->dict_size, + opt->lc, opt->lp, opt->pb, + mode, opt->nice_len, mf, opt->depth); + break; + } + + case LZMA_FILTER_X86: + fprintf(stderr, "x86"); + break; + + case LZMA_FILTER_POWERPC: + fprintf(stderr, "powerpc"); + break; + + case LZMA_FILTER_IA64: + fprintf(stderr, "ia64"); + break; + + case LZMA_FILTER_ARM: + fprintf(stderr, "arm"); + break; + + case LZMA_FILTER_ARMTHUMB: + fprintf(stderr, "armthumb"); + break; + + case LZMA_FILTER_SPARC: + fprintf(stderr, "sparc"); + break; + + case LZMA_FILTER_DELTA: { + const lzma_options_delta *opt = filters[i].options; + fprintf(stderr, "delta=dist=%" PRIu32, opt->dist); + break; + } + + default: + fprintf(stderr, "UNKNOWN"); + break; + } + } + + fputc('\n', stderr); + return; +} + + +extern void +message_try_help(void) +{ + // Print this with V_WARNING instead of V_ERROR to prevent it from + // showing up when --quiet has been specified. + message(V_WARNING, _("Try `%s --help' for more information."), + progname); + return; +} + + +extern void +message_memlimit(void) +{ + if (opt_robot) + printf("%" PRIu64 "\n", hardware_memlimit_get()); + else + printf(_("%s MiB (%s bytes)\n"), + uint64_to_str( + round_up_to_mib(hardware_memlimit_get()), 0), + uint64_to_str(hardware_memlimit_get(), 1)); + + tuklib_exit(E_SUCCESS, E_ERROR, verbosity != V_SILENT); +} + + +extern void +message_version(void) +{ + // It is possible that liblzma version is different than the command + // line tool version, so print both. + if (opt_robot) { + printf("XZ_VERSION=%d\nLIBLZMA_VERSION=%d\n", + LZMA_VERSION, lzma_version_number()); + } else { + printf("xz (" PACKAGE_NAME ") " LZMA_VERSION_STRING "\n"); + printf("liblzma %s\n", lzma_version_string()); + } + + tuklib_exit(E_SUCCESS, E_ERROR, verbosity != V_SILENT); +} + + +extern void +message_help(bool long_help) +{ + printf(_("Usage: %s [OPTION]... [FILE]...\n" + "Compress or decompress FILEs in the .xz format.\n\n"), + progname); + + puts(_("Mandatory arguments to long options are mandatory for " + "short options too.\n")); + + if (long_help) + puts(_(" Operation mode:\n")); + + puts(_( +" -z, --compress force compression\n" +" -d, --decompress force decompression\n" +" -t, --test test compressed file integrity\n" +" -l, --list list information about files")); + + if (long_help) + puts(_("\n Operation modifiers:\n")); + + puts(_( +" -k, --keep keep (don't delete) input files\n" +" -f, --force force overwrite of output file and (de)compress links\n" +" -c, --stdout write to standard output and don't delete input files")); + + if (long_help) + puts(_( +" --no-sparse do not create sparse files when decompressing\n" +" -S, --suffix=.SUF use the suffix `.SUF' on compressed files\n" +" --files=[FILE] read filenames to process from FILE; if FILE is\n" +" omitted, filenames are read from the standard input;\n" +" filenames must be terminated with the newline character\n" +" --files0=[FILE] like --files but use the null character as terminator")); + + if (long_help) { + puts(_("\n Basic file format and compression options:\n")); + puts(_( +" -F, --format=FMT file format to encode or decode; possible values are\n" +" `auto' (default), `xz', `lzma', and `raw'\n" +" -C, --check=CHECK integrity check type: `crc32', `crc64' (default),\n" +" `sha256', or `none' (use with caution)")); + } + + puts(_( +" -0 .. -9 compression preset; 0-2 fast compression, 3-5 good\n" +" compression, 6-9 excellent compression; default is 6")); + + puts(_( +" -e, --extreme use more CPU time when encoding to increase compression\n" +" ratio without increasing memory usage of the decoder")); + + if (long_help) + puts(_( // xgettext:no-c-format +" -M, --memory=NUM use roughly NUM bytes of memory at maximum; 0 indicates\n" +" the default setting, which is 40 % of total RAM")); + + if (long_help) { + puts(_( +"\n Custom filter chain for compression (alternative for using presets):")); + +#if defined(HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1) \ + || defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2) + puts(_( +"\n" +" --lzma1[=OPTS] LZMA1 or LZMA2; OPTS is a comma-separated list of zero or\n" +" --lzma2[=OPTS] more of the following options (valid values; default):\n" +" preset=NUM reset options to preset number NUM (0-9)\n" +" dict=NUM dictionary size (4KiB - 1536MiB; 8MiB)\n" +" lc=NUM number of literal context bits (0-4; 3)\n" +" lp=NUM number of literal position bits (0-4; 0)\n" +" pb=NUM number of position bits (0-4; 2)\n" +" mode=MODE compression mode (fast, normal; normal)\n" +" nice=NUM nice length of a match (2-273; 64)\n" +" mf=NAME match finder (hc3, hc4, bt2, bt3, bt4; bt4)\n" +" depth=NUM maximum search depth; 0=automatic (default)")); +#endif + + puts(_( +"\n" +" --x86[=OPTS] x86 BCJ filter\n" +" --powerpc[=OPTS] PowerPC BCJ filter (big endian only)\n" +" --ia64[=OPTS] IA64 (Itanium) BCJ filter\n" +" --arm[=OPTS] ARM BCJ filter (little endian only)\n" +" --armthumb[=OPTS] ARM-Thumb BCJ filter (little endian only)\n" +" --sparc[=OPTS] SPARC BCJ filter\n" +" Valid OPTS for all BCJ filters:\n" +" start=NUM start offset for conversions (default=0)")); + +#if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA) + puts(_( +"\n" +" --delta[=OPTS] Delta filter; valid OPTS (valid values; default):\n" +" dist=NUM distance between bytes being subtracted\n" +" from each other (1-256; 1)")); +#endif + +#if defined(HAVE_ENCODER_SUBBLOCK) || defined(HAVE_DECODER_SUBBLOCK) + puts(_( +"\n" +" --subblock[=OPTS] Subblock filter; valid OPTS (valid values; default):\n" +" size=NUM number of bytes of data per subblock\n" +" (1 - 256Mi; 4Ki)\n" +" rle=NUM run-length encoder chunk size (0-256; 0)")); +#endif + } + + if (long_help) + puts(_("\n Other options:\n")); + + puts(_( +" -q, --quiet suppress warnings; specify twice to suppress errors too\n" +" -v, --verbose be verbose; specify twice for even more verbose")); + + if (long_help) { + puts(_( +" -Q, --no-warn make warnings not affect the exit status")); + puts(_( +" --robot use machine-parsable messages (useful for scripts)")); + puts(""); + puts(_( +" --info-memory display the memory usage limit and exit")); + puts(_( +" -h, --help display the short help (lists only the basic options)\n" +" -H, --long-help display this long help and exit")); + } else { + puts(_( +" -h, --help display this short help and exit\n" +" -H, --long-help display the long help (lists also the advanced options)")); + } + + puts(_( +" -V, --version display the version number and exit")); + + puts(_("\nWith no FILE, or when FILE is -, read standard input.\n")); + + if (long_help) { + printf(_( +"On this system and configuration, this program will use a maximum of roughly\n" +"%s MiB RAM and "), uint64_to_str(round_up_to_mib(hardware_memlimit_get()), 0)); + printf(N_("one thread.\n\n", "%s threads.\n\n", + hardware_threadlimit_get()), + uint64_to_str(hardware_threadlimit_get(), 0)); + } + + // TRANSLATORS: This message indicates the bug reporting address + // for this package. Please add _another line_ saying + // "Report translation bugs to <...>\n" with the email or WWW + // address for translation bugs. Thanks. + printf(_("Report bugs to <%s> (in English or Finnish).\n"), + PACKAGE_BUGREPORT); + printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL); + + tuklib_exit(E_SUCCESS, E_ERROR, verbosity != V_SILENT); +} diff --git a/src/xz/message.h b/src/xz/message.h new file mode 100644 index 000000000000..7d637dfe6b76 --- /dev/null +++ b/src/xz/message.h @@ -0,0 +1,151 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file message.h +/// \brief Printing messages to stderr +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +/// Verbosity levels +enum message_verbosity { + V_SILENT, ///< No messages + V_ERROR, ///< Only error messages + V_WARNING, ///< Errors and warnings + V_VERBOSE, ///< Errors, warnings, and verbose statistics + V_DEBUG, ///< Debugging, FIXME remove? +}; + + +/// \brief Initializes the message functions +/// +/// If an error occurs, this function doesn't return. +/// +extern void message_init(void); + + +/// Increase verbosity level by one step unless it was at maximum. +extern void message_verbosity_increase(void); + +/// Decrease verbosity level by one step unless it was at minimum. +extern void message_verbosity_decrease(void); + +/// Get the current verbosity level. +extern enum message_verbosity message_verbosity_get(void); + + +/// \brief Print a message if verbosity level is at least "verbosity" +/// +/// This doesn't touch the exit status. +extern void message(enum message_verbosity verbosity, const char *fmt, ...) + lzma_attribute((format(printf, 2, 3))); + + +/// \brief Prints a warning and possibly sets exit status +/// +/// The message is printed only if verbosity level is at least V_WARNING. +/// The exit status is set to WARNING unless it was already at ERROR. +extern void message_warning(const char *fmt, ...) + lzma_attribute((format(printf, 1, 2))); + + +/// \brief Prints an error message and sets exit status +/// +/// The message is printed only if verbosity level is at least V_ERROR. +/// The exit status is set to ERROR. +extern void message_error(const char *fmt, ...) + lzma_attribute((format(printf, 1, 2))); + + +/// \brief Prints an error message and exits with EXIT_ERROR +/// +/// The message is printed only if verbosity level is at least V_ERROR. +extern void message_fatal(const char *fmt, ...) + lzma_attribute((format(printf, 1, 2))) + lzma_attribute((noreturn)); + + +/// Print an error message that an internal error occurred and exit with +/// EXIT_ERROR. +extern void message_bug(void) lzma_attribute((noreturn)); + + +/// Print a message that establishing signal handlers failed, and exit with +/// exit status ERROR. +extern void message_signal_handler(void) lzma_attribute((noreturn)); + + +/// Convert lzma_ret to a string. +extern const char *message_strm(lzma_ret code); + + +/// Display how much memory was needed and how much the limit was. +extern void message_mem_needed(enum message_verbosity v, uint64_t memusage); + + +/// Print the filter chain. +extern void message_filters( + enum message_verbosity v, const lzma_filter *filters); + + +/// Print a message that user should try --help. +extern void message_try_help(void); + + +/// Print the memory usage limit and exit. +extern void message_memlimit(void) lzma_attribute((noreturn)); + + +/// Prints the version number to stdout and exits with exit status SUCCESS. +extern void message_version(void) lzma_attribute((noreturn)); + + +/// Print the help message. +extern void message_help(bool long_help) lzma_attribute((noreturn)); + + +/// \brief Set the total number of files to be processed +/// +/// Standard input is counted as a file here. This is used when printing +/// the filename via message_filename(). +extern void message_set_files(unsigned int files); + + +/// \brief Set the name of the current file and possibly print it too +/// +/// The name is printed immediately if --list was used or if --verbose +/// was used and stderr is a terminal. Even when the filename isn't printed, +/// it is stored so that it can be printed later if needed for progress +/// messages. +extern void message_filename(const char *src_name); + + +/// \brief Start progress info handling +/// +/// message_filename() must be called before this function to set +/// the filename. +/// +/// This must be paired with a call to message_progress_end() before the +/// given *strm becomes invalid. +/// +/// \param strm Pointer to lzma_stream used for the coding. +/// \param in_size Size of the input file, or zero if unknown. +/// +extern void message_progress_start(lzma_stream *strm, uint64_t in_size); + + +/// Update the progress info if in verbose mode and enough time has passed +/// since the previous update. This can be called only when +/// message_progress_start() has already been used. +extern void message_progress_update(void); + + +/// \brief Finishes the progress message if we were in verbose mode +/// +/// \param finished True if the whole stream was successfully coded +/// and output written to the output stream. +/// +extern void message_progress_end(bool finished); diff --git a/src/xz/options.c b/src/xz/options.c new file mode 100644 index 000000000000..00b34a83ae1b --- /dev/null +++ b/src/xz/options.c @@ -0,0 +1,435 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file options.c +/// \brief Parser for filter-specific options +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + + +/////////////////// +// Generic stuff // +/////////////////// + +typedef struct { + const char *name; + uint64_t id; +} name_id_map; + + +typedef struct { + const char *name; + const name_id_map *map; + uint64_t min; + uint64_t max; +} option_map; + + +/// Parses option=value pairs that are separated with colons, semicolons, +/// or commas: opt=val:opt=val;opt=val,opt=val +/// +/// Each option is a string, that is converted to an integer using the +/// index where the option string is in the array. +/// +/// Value can be +/// - a string-id map mapping a list of possible string values to integers +/// (opts[i].map != NULL, opts[i].min and opts[i].max are ignored); +/// - a number with minimum and maximum value limit +/// (opts[i].map == NULL && opts[i].min != UINT64_MAX); +/// - a string that will be parsed by the filter-specific code +/// (opts[i].map == NULL && opts[i].min == UINT64_MAX, opts[i].max ignored) +/// +/// When parsing both option and value succeed, a filter-specific function +/// is called, which should update the given value to filter-specific +/// options structure. +/// +/// \param str String containing the options from the command line +/// \param opts Filter-specific option map +/// \param set Filter-specific function to update filter_options +/// \param filter_options Pointer to filter-specific options structure +/// +/// \return Returns only if no errors occur. +/// +static void +parse_options(const char *str, const option_map *opts, + void (*set)(void *filter_options, + uint32_t key, uint64_t value, const char *valuestr), + void *filter_options) +{ + if (str == NULL || str[0] == '\0') + return; + + char *s = xstrdup(str); + char *name = s; + + while (*name != '\0') { + if (*name == ',') { + ++name; + continue; + } + + char *split = strchr(name, ','); + if (split != NULL) + *split = '\0'; + + char *value = strchr(name, '='); + if (value != NULL) + *value++ = '\0'; + + if (value == NULL || value[0] == '\0') + message_fatal(_("%s: Options must be `name=value' " + "pairs separated with commas"), str); + + // Look for the option name from the option map. + size_t i = 0; + while (true) { + if (opts[i].name == NULL) + message_fatal(_("%s: Invalid option name"), + name); + + if (strcmp(name, opts[i].name) == 0) + break; + + ++i; + } + + // Option was found from the map. See how we should handle it. + if (opts[i].map != NULL) { + // value is a string which we should map + // to an integer. + size_t j; + for (j = 0; opts[i].map[j].name != NULL; ++j) { + if (strcmp(opts[i].map[j].name, value) == 0) + break; + } + + if (opts[i].map[j].name == NULL) + message_fatal(_("%s: Invalid option value"), + value); + + set(filter_options, i, opts[i].map[j].id, value); + + } else if (opts[i].min == UINT64_MAX) { + // value is a special string that will be + // parsed by set(). + set(filter_options, i, 0, value); + + } else { + // value is an integer. + const uint64_t v = str_to_uint64(name, value, + opts[i].min, opts[i].max); + set(filter_options, i, v, value); + } + + // Check if it was the last option. + if (split == NULL) + break; + + name = split + 1; + } + + free(s); + return; +} + + +////////////// +// Subblock // +////////////// + +enum { + OPT_SIZE, + OPT_RLE, + OPT_ALIGN, +}; + + +static void +set_subblock(void *options, uint32_t key, uint64_t value, + const char *valuestr lzma_attribute((unused))) +{ + lzma_options_subblock *opt = options; + + switch (key) { + case OPT_SIZE: + opt->subblock_data_size = value; + break; + + case OPT_RLE: + opt->rle = value; + break; + + case OPT_ALIGN: + opt->alignment = value; + break; + } +} + + +extern lzma_options_subblock * +options_subblock(const char *str) +{ + static const option_map opts[] = { + { "size", NULL, LZMA_SUBBLOCK_DATA_SIZE_MIN, + LZMA_SUBBLOCK_DATA_SIZE_MAX }, + { "rle", NULL, LZMA_SUBBLOCK_RLE_OFF, + LZMA_SUBBLOCK_RLE_MAX }, + { "align",NULL, LZMA_SUBBLOCK_ALIGNMENT_MIN, + LZMA_SUBBLOCK_ALIGNMENT_MAX }, + { NULL, NULL, 0, 0 } + }; + + lzma_options_subblock *options + = xmalloc(sizeof(lzma_options_subblock)); + *options = (lzma_options_subblock){ + .allow_subfilters = false, + .alignment = LZMA_SUBBLOCK_ALIGNMENT_DEFAULT, + .subblock_data_size = LZMA_SUBBLOCK_DATA_SIZE_DEFAULT, + .rle = LZMA_SUBBLOCK_RLE_OFF, + }; + + parse_options(str, opts, &set_subblock, options); + + return options; +} + + +/////////// +// Delta // +/////////// + +enum { + OPT_DIST, +}; + + +static void +set_delta(void *options, uint32_t key, uint64_t value, + const char *valuestr lzma_attribute((unused))) +{ + lzma_options_delta *opt = options; + switch (key) { + case OPT_DIST: + opt->dist = value; + break; + } +} + + +extern lzma_options_delta * +options_delta(const char *str) +{ + static const option_map opts[] = { + { "dist", NULL, LZMA_DELTA_DIST_MIN, + LZMA_DELTA_DIST_MAX }, + { NULL, NULL, 0, 0 } + }; + + lzma_options_delta *options = xmalloc(sizeof(lzma_options_delta)); + *options = (lzma_options_delta){ + // It's hard to give a useful default for this. + .type = LZMA_DELTA_TYPE_BYTE, + .dist = LZMA_DELTA_DIST_MIN, + }; + + parse_options(str, opts, &set_delta, options); + + return options; +} + + +///////// +// BCJ // +///////// + +enum { + OPT_START_OFFSET, +}; + + +static void +set_bcj(void *options, uint32_t key, uint64_t value, + const char *valuestr lzma_attribute((unused))) +{ + lzma_options_bcj *opt = options; + switch (key) { + case OPT_START_OFFSET: + opt->start_offset = value; + break; + } +} + + +extern lzma_options_bcj * +options_bcj(const char *str) +{ + static const option_map opts[] = { + { "start", NULL, 0, UINT32_MAX }, + { NULL, NULL, 0, 0 } + }; + + lzma_options_bcj *options = xmalloc(sizeof(lzma_options_bcj)); + *options = (lzma_options_bcj){ + .start_offset = 0, + }; + + parse_options(str, opts, &set_bcj, options); + + return options; +} + + +////////// +// LZMA // +////////// + +enum { + OPT_PRESET, + OPT_DICT, + OPT_LC, + OPT_LP, + OPT_PB, + OPT_MODE, + OPT_NICE, + OPT_MF, + OPT_DEPTH, +}; + + +static void lzma_attribute((noreturn)) +error_lzma_preset(const char *valuestr) +{ + message_fatal(_("Unsupported LZMA1/LZMA2 preset: %s"), valuestr); +} + + +static void +set_lzma(void *options, uint32_t key, uint64_t value, const char *valuestr) +{ + lzma_options_lzma *opt = options; + + switch (key) { + case OPT_PRESET: { + if (valuestr[0] < '0' || valuestr[0] > '9') + error_lzma_preset(valuestr); + + uint32_t preset = valuestr[0] - '0'; + + // Currently only "e" is supported as a modifier, + // so keep this simple for now. + if (valuestr[1] != '\0') { + if (valuestr[1] == 'e') + preset |= LZMA_PRESET_EXTREME; + else + error_lzma_preset(valuestr); + + if (valuestr[2] != '\0') + error_lzma_preset(valuestr); + } + + if (lzma_lzma_preset(options, preset)) + error_lzma_preset(valuestr); + + break; + } + + case OPT_DICT: + opt->dict_size = value; + break; + + case OPT_LC: + opt->lc = value; + break; + + case OPT_LP: + opt->lp = value; + break; + + case OPT_PB: + opt->pb = value; + break; + + case OPT_MODE: + opt->mode = value; + break; + + case OPT_NICE: + opt->nice_len = value; + break; + + case OPT_MF: + opt->mf = value; + break; + + case OPT_DEPTH: + opt->depth = value; + break; + } +} + + +extern lzma_options_lzma * +options_lzma(const char *str) +{ + static const name_id_map modes[] = { + { "fast", LZMA_MODE_FAST }, + { "normal", LZMA_MODE_NORMAL }, + { NULL, 0 } + }; + + static const name_id_map mfs[] = { + { "hc3", LZMA_MF_HC3 }, + { "hc4", LZMA_MF_HC4 }, + { "bt2", LZMA_MF_BT2 }, + { "bt3", LZMA_MF_BT3 }, + { "bt4", LZMA_MF_BT4 }, + { NULL, 0 } + }; + + static const option_map opts[] = { + { "preset", NULL, UINT64_MAX, 0 }, + { "dict", NULL, LZMA_DICT_SIZE_MIN, + (UINT32_C(1) << 30) + (UINT32_C(1) << 29) }, + { "lc", NULL, LZMA_LCLP_MIN, LZMA_LCLP_MAX }, + { "lp", NULL, LZMA_LCLP_MIN, LZMA_LCLP_MAX }, + { "pb", NULL, LZMA_PB_MIN, LZMA_PB_MAX }, + { "mode", modes, 0, 0 }, + { "nice", NULL, 2, 273 }, + { "mf", mfs, 0, 0 }, + { "depth", NULL, 0, UINT32_MAX }, + { NULL, NULL, 0, 0 } + }; + + lzma_options_lzma *options = xmalloc(sizeof(lzma_options_lzma)); + *options = (lzma_options_lzma){ + .dict_size = LZMA_DICT_SIZE_DEFAULT, + .preset_dict = NULL, + .preset_dict_size = 0, + .lc = LZMA_LC_DEFAULT, + .lp = LZMA_LP_DEFAULT, + .pb = LZMA_PB_DEFAULT, + .mode = LZMA_MODE_NORMAL, + .nice_len = 64, + .mf = LZMA_MF_BT4, + .depth = 0, + }; + + parse_options(str, opts, &set_lzma, options); + + if (options->lc + options->lp > LZMA_LCLP_MAX) + message_fatal(_("The sum of lc and lp must be at " + "maximum of 4")); + + const uint32_t nice_len_min = options->mf & 0x0F; + if (options->nice_len < nice_len_min) + message_fatal(_("The selected match finder requires at " + "least nice=%" PRIu32), nice_len_min); + + return options; +} diff --git a/src/xz/options.h b/src/xz/options.h new file mode 100644 index 000000000000..e7389c8e99cb --- /dev/null +++ b/src/xz/options.h @@ -0,0 +1,38 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file options.h +/// \brief Parser for filter-specific options +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +/// \brief Parser for Subblock options +/// +/// \return Pointer to allocated options structure. +/// Doesn't return on error. +extern lzma_options_subblock *options_subblock(const char *str); + + +/// \brief Parser for Delta options +/// +/// \return Pointer to allocated options structure. +/// Doesn't return on error. +extern lzma_options_delta *options_delta(const char *str); + + +/// \brief Parser for BCJ options +/// +/// \return Pointer to allocated options structure. +/// Doesn't return on error. +extern lzma_options_bcj *options_bcj(const char *str); + + +/// \brief Parser for LZMA options +/// +/// \return Pointer to allocated options structure. +/// Doesn't return on error. +extern lzma_options_lzma *options_lzma(const char *str); diff --git a/src/xz/private.h b/src/xz/private.h new file mode 100644 index 000000000000..b543435750d8 --- /dev/null +++ b/src/xz/private.h @@ -0,0 +1,51 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file private.h +/// \brief Common includes, definions, and prototypes +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "sysdefs.h" +#include "mythread.h" +#include "lzma.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "tuklib_gettext.h" +#include "tuklib_progname.h" +#include "tuklib_exit.h" + +#ifndef STDIN_FILENO +# define STDIN_FILENO (fileno(stdin)) +#endif + +#ifndef STDOUT_FILENO +# define STDOUT_FILENO (fileno(stdout)) +#endif + +#ifndef STDERR_FILENO +# define STDERR_FILENO (fileno(stderr)) +#endif + +#include "main.h" +#include "coder.h" +#include "message.h" +#include "args.h" +#include "hardware.h" +#include "file_io.h" +#include "options.h" +#include "signals.h" +#include "suffix.h" +#include "util.h" +#include "list.h" diff --git a/src/xz/signals.c b/src/xz/signals.c new file mode 100644 index 000000000000..b27cd5b52cb1 --- /dev/null +++ b/src/xz/signals.c @@ -0,0 +1,189 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file signals.c +/// \brief Handling signals to abort operation +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + + +volatile sig_atomic_t user_abort = false; + + +#ifndef _WIN32 + +/// If we were interrupted by a signal, we store the signal number so that +/// we can raise that signal to kill the program when all cleanups have +/// been done. +static volatile sig_atomic_t exit_signal = 0; + +/// Mask of signals for which have have established a signal handler to set +/// user_abort to true. +static sigset_t hooked_signals; + +/// True once signals_init() has finished. This is used to skip blocking +/// signals (with uninitialized hooked_signals) if signals_block() and +/// signals_unblock() are called before signals_init() has been called. +static bool signals_are_initialized = false; + +/// signals_block() and signals_unblock() can be called recursively. +static size_t signals_block_count = 0; + + +static void +signal_handler(int sig) +{ + exit_signal = sig; + user_abort = true; + return; +} + + +extern void +signals_init(void) +{ + // List of signals for which we establish the signal handler. + static const int sigs[] = { + SIGINT, + SIGTERM, +#ifdef SIGHUP + SIGHUP, +#endif +#ifdef SIGPIPE + SIGPIPE, +#endif +#ifdef SIGXCPU + SIGXCPU, +#endif +#ifdef SIGXFSZ + SIGXFSZ, +#endif + }; + + // Mask of the signals for which we have established a signal handler. + sigemptyset(&hooked_signals); + for (size_t i = 0; i < ARRAY_SIZE(sigs); ++i) + sigaddset(&hooked_signals, sigs[i]); + + struct sigaction sa; + + // All the signals that we handle we also blocked while the signal + // handler runs. + sa.sa_mask = hooked_signals; + + // Don't set SA_RESTART, because we want EINTR so that we can check + // for user_abort and cleanup before exiting. We block the signals + // for which we have established a handler when we don't want EINTR. + sa.sa_flags = 0; + sa.sa_handler = &signal_handler; + + for (size_t i = 0; i < ARRAY_SIZE(sigs); ++i) { + // If the parent process has left some signals ignored, + // we don't unignore them. + struct sigaction old; + if (sigaction(sigs[i], NULL, &old) == 0 + && old.sa_handler == SIG_IGN) + continue; + + // Establish the signal handler. + if (sigaction(sigs[i], &sa, NULL)) + message_signal_handler(); + } + + signals_are_initialized = true; + + return; +} + + +#ifndef __VMS +extern void +signals_block(void) +{ + if (signals_are_initialized) { + if (signals_block_count++ == 0) { + const int saved_errno = errno; + mythread_sigmask(SIG_BLOCK, &hooked_signals, NULL); + errno = saved_errno; + } + } + + return; +} + + +extern void +signals_unblock(void) +{ + if (signals_are_initialized) { + assert(signals_block_count > 0); + + if (--signals_block_count == 0) { + const int saved_errno = errno; + mythread_sigmask(SIG_UNBLOCK, &hooked_signals, NULL); + errno = saved_errno; + } + } + + return; +} +#endif + + +extern void +signals_exit(void) +{ + const int sig = exit_signal; + + if (sig != 0) { + struct sigaction sa; + sa.sa_handler = SIG_DFL; + sigfillset(&sa.sa_mask); + sa.sa_flags = 0; + sigaction(sig, &sa, NULL); + raise(exit_signal); + } + + return; +} + +#else + +// While Windows has some very basic signal handling functions as required +// by C89, they are not really used, or so I understood. Instead, we use +// SetConsoleCtrlHandler() to catch user pressing C-c. + +#include + + +static BOOL WINAPI +signal_handler(DWORD type lzma_attribute((unused))) +{ + // Since we don't get a signal number which we could raise() at + // signals_exit() like on POSIX, just set the exit status to + // indicate an error, so that we cannot return with zero exit status. + // + // FIXME: Since this function runs in its own thread, + // set_exit_status() should have a mutex. + set_exit_status(E_ERROR); + user_abort = true; + return TRUE; +} + + +extern void +signals_init(void) +{ + if (!SetConsoleCtrlHandler(&signal_handler, TRUE)) + message_signal_handler(); + + return; +} + +#endif diff --git a/src/xz/signals.h b/src/xz/signals.h new file mode 100644 index 000000000000..7603d8033f71 --- /dev/null +++ b/src/xz/signals.h @@ -0,0 +1,43 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file signals.h +/// \brief Handling signals to abort operation +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +/// If this is true, we will clean up the possibly incomplete output file, +/// return to main() as soon as practical. That is, the code needs to poll +/// this variable in various places. +extern volatile sig_atomic_t user_abort; + + +/// Initialize the signal handler, which will set user_abort to true when +/// user e.g. presses C-c. +extern void signals_init(void); + + +#if defined(_WIN32) || defined(__VMS) +# define signals_block() do { } while (0) +# define signals_unblock() do { } while (0) +#else +/// Block the signals which don't have SA_RESTART and which would just set +/// user_abort to true. This is handy when we don't want to handle EINTR +/// and don't want SA_RESTART either. +extern void signals_block(void); + +/// Unblock the signals blocked by signals_block(). +extern void signals_unblock(void); +#endif + +#ifdef _WIN32 +# define signals_exit() do { } while (0) +#else +/// If user has sent us a signal earlier to terminate the process, +/// re-raise that signal to actually terminate the process. +extern void signals_exit(void); +#endif diff --git a/src/xz/suffix.c b/src/xz/suffix.c new file mode 100644 index 000000000000..f2a2da2749b1 --- /dev/null +++ b/src/xz/suffix.c @@ -0,0 +1,211 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file suffix.c +/// \brief Checks filename suffix and creates the destination filename +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + +// For case-insensitive filename suffix on case-insensitive systems +#if defined(TUKLIB_DOSLIKE) || defined(__VMS) +# define strcmp strcasecmp +#endif + + +static char *custom_suffix = NULL; + + +struct suffix_pair { + const char *compressed; + const char *uncompressed; +}; + + +/// \brief Checks if src_name has given compressed_suffix +/// +/// \param suffix Filename suffix to look for +/// \param src_name Input filename +/// \param src_len strlen(src_name) +/// +/// \return If src_name has the suffix, src_len - strlen(suffix) is +/// returned. It's always a positive integer. Otherwise zero +/// is returned. +static size_t +test_suffix(const char *suffix, const char *src_name, size_t src_len) +{ + const size_t suffix_len = strlen(suffix); + + // The filename must have at least one character in addition to + // the suffix. src_name may contain path to the filename, so we + // need to check for directory separator too. + if (src_len <= suffix_len || src_name[src_len - suffix_len - 1] == '/') + return 0; + + if (strcmp(suffix, src_name + src_len - suffix_len) == 0) + return src_len - suffix_len; + + return 0; +} + + +/// \brief Removes the filename suffix of the compressed file +/// +/// \return Name of the uncompressed file, or NULL if file has unknown +/// suffix. +static char * +uncompressed_name(const char *src_name, const size_t src_len) +{ + static const struct suffix_pair suffixes[] = { + { ".xz", "" }, + { ".txz", ".tar" }, // .txz abbreviation for .txt.gz is rare. + { ".lzma", "" }, + { ".tlz", ".tar" }, + // { ".gz", "" }, + // { ".tgz", ".tar" }, + }; + + const char *new_suffix = ""; + size_t new_len = 0; + + if (opt_format == FORMAT_RAW) { + // Don't check for known suffixes when --format=raw was used. + if (custom_suffix == NULL) { + message_error(_("%s: With --format=raw, " + "--suffix=.SUF is required unless " + "writing to stdout"), src_name); + return NULL; + } + } else { + for (size_t i = 0; i < ARRAY_SIZE(suffixes); ++i) { + new_len = test_suffix(suffixes[i].compressed, + src_name, src_len); + if (new_len != 0) { + new_suffix = suffixes[i].uncompressed; + break; + } + } + } + + if (new_len == 0 && custom_suffix != NULL) + new_len = test_suffix(custom_suffix, src_name, src_len); + + if (new_len == 0) { + message_warning(_("%s: Filename has an unknown suffix, " + "skipping"), src_name); + return NULL; + } + + const size_t new_suffix_len = strlen(new_suffix); + char *dest_name = xmalloc(new_len + new_suffix_len + 1); + + memcpy(dest_name, src_name, new_len); + memcpy(dest_name + new_len, new_suffix, new_suffix_len); + dest_name[new_len + new_suffix_len] = '\0'; + + return dest_name; +} + + +/// \brief Appends suffix to src_name +/// +/// In contrast to uncompressed_name(), we check only suffixes that are valid +/// for the specified file format. +static char * +compressed_name(const char *src_name, const size_t src_len) +{ + // The order of these must match the order in args.h. + static const struct suffix_pair all_suffixes[][3] = { + { + { ".xz", "" }, + { ".txz", ".tar" }, + { NULL, NULL } + }, { + { ".lzma", "" }, + { ".tlz", ".tar" }, + { NULL, NULL } +/* + }, { + { ".gz", "" }, + { ".tgz", ".tar" }, + { NULL, NULL } +*/ + }, { + // --format=raw requires specifying the suffix + // manually or using stdout. + { NULL, NULL } + } + }; + + // args.c ensures this. + assert(opt_format != FORMAT_AUTO); + + const size_t format = opt_format - 1; + const struct suffix_pair *const suffixes = all_suffixes[format]; + + for (size_t i = 0; suffixes[i].compressed != NULL; ++i) { + if (test_suffix(suffixes[i].compressed, src_name, src_len) + != 0) { + message_warning(_("%s: File already has `%s' " + "suffix, skipping"), src_name, + suffixes[i].compressed); + return NULL; + } + } + + // TODO: Hmm, maybe it would be better to validate this in args.c, + // since the suffix handling when decoding is weird now. + if (opt_format == FORMAT_RAW && custom_suffix == NULL) { + message_error(_("%s: With --format=raw, " + "--suffix=.SUF is required unless " + "writing to stdout"), src_name); + return NULL; + } + + const char *suffix = custom_suffix != NULL + ? custom_suffix : suffixes[0].compressed; + const size_t suffix_len = strlen(suffix); + + char *dest_name = xmalloc(src_len + suffix_len + 1); + + memcpy(dest_name, src_name, src_len); + memcpy(dest_name + src_len, suffix, suffix_len); + dest_name[src_len + suffix_len] = '\0'; + + return dest_name; +} + + +extern char * +suffix_get_dest_name(const char *src_name) +{ + assert(src_name != NULL); + + // Length of the name is needed in all cases to locate the end of + // the string to compare the suffix, so calculate the length here. + const size_t src_len = strlen(src_name); + + return opt_mode == MODE_COMPRESS + ? compressed_name(src_name, src_len) + : uncompressed_name(src_name, src_len); +} + + +extern void +suffix_set(const char *suffix) +{ + // Empty suffix and suffixes having a slash are rejected. Such + // suffixes would break things later. + if (suffix[0] == '\0' || strchr(suffix, '/') != NULL) + message_fatal(_("%s: Invalid filename suffix"), optarg); + + // Replace the old custom_suffix (if any) with the new suffix. + free(custom_suffix); + custom_suffix = xstrdup(suffix); + return; +} diff --git a/src/xz/suffix.h b/src/xz/suffix.h new file mode 100644 index 000000000000..5537d7324f45 --- /dev/null +++ b/src/xz/suffix.h @@ -0,0 +1,28 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file suffix.h +/// \brief Checks filename suffix and creates the destination filename +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +/// \brief Get the name of the destination file +/// +/// Depending on the global variable opt_mode, this tries to find a matching +/// counterpart for src_name. If the name can be constructed, it is allocated +/// and returned (caller must free it). On error, a message is printed and +/// NULL is returned. +extern char *suffix_get_dest_name(const char *src_name); + + +/// \brief Set a custom filename suffix +/// +/// This function calls xstrdup() for the given suffix, thus the caller +/// doesn't need to keep the memory allocated. There can be only one custom +/// suffix, thus if this is called multiple times, the old suffixes are freed +/// and forgotten. +extern void suffix_set(const char *suffix); diff --git a/src/xz/util.c b/src/xz/util.c new file mode 100644 index 000000000000..deb5dcc2fd39 --- /dev/null +++ b/src/xz/util.c @@ -0,0 +1,314 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file util.c +/// \brief Miscellaneous utility functions +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" +#include + + +extern void * +xrealloc(void *ptr, size_t size) +{ + assert(size > 0); + + ptr = realloc(ptr, size); + if (ptr == NULL) + message_fatal("%s", strerror(errno)); + + return ptr; +} + + +extern char * +xstrdup(const char *src) +{ + assert(src != NULL); + const size_t size = strlen(src) + 1; + char *dest = xmalloc(size); + return memcpy(dest, src, size); +} + + +extern uint64_t +str_to_uint64(const char *name, const char *value, uint64_t min, uint64_t max) +{ + uint64_t result = 0; + + // Skip blanks. + while (*value == ' ' || *value == '\t') + ++value; + + // Accept special value "max". Supporting "min" doesn't seem useful. + if (strcmp(value, "max") == 0) + return max; + + if (*value < '0' || *value > '9') + message_fatal(_("%s: Value is not a non-negative " + "decimal integer"), value); + + do { + // Don't overflow. + if (result > (UINT64_MAX - 9) / 10) + goto error; + + result *= 10; + result += *value - '0'; + ++value; + } while (*value >= '0' && *value <= '9'); + + if (*value != '\0') { + // Look for suffix. Originally this supported both base-2 + // and base-10, but since there seems to be little need + // for base-10 in this program, treat everything as base-2 + // and also be more relaxed about the case of the first + // letter of the suffix. + uint64_t multiplier = 0; + if (*value == 'k' || *value == 'K') + multiplier = UINT64_C(1) << 10; + else if (*value == 'm' || *value == 'M') + multiplier = UINT64_C(1) << 20; + else if (*value == 'g' || *value == 'G') + multiplier = UINT64_C(1) << 30; + + ++value; + + // Allow also e.g. Ki, KiB, and KB. + if (*value != '\0' && strcmp(value, "i") != 0 + && strcmp(value, "iB") != 0 + && strcmp(value, "B") != 0) + multiplier = 0; + + if (multiplier == 0) { + message(V_ERROR, _("%s: Invalid multiplier suffix"), + value - 1); + message_fatal(_("Valid suffixes are `KiB' (2^10), " + "`MiB' (2^20), and `GiB' (2^30).")); + } + + // Don't overflow here either. + if (result > UINT64_MAX / multiplier) + goto error; + + result *= multiplier; + } + + if (result < min || result > max) + goto error; + + return result; + +error: + message_fatal(_("Value of the option `%s' must be in the range " + "[%" PRIu64 ", %" PRIu64 "]"), + name, min, max); +} + + +extern uint64_t +round_up_to_mib(uint64_t n) +{ + return (n >> 20) + ((n & ((UINT32_C(1) << 20) - 1)) != 0); +} + + +extern const char * +uint64_to_str(uint64_t value, uint32_t slot) +{ + // 2^64 with thousand separators is 26 bytes plus trailing '\0'. + static char bufs[4][32]; + + assert(slot < ARRAY_SIZE(bufs)); + + static enum { UNKNOWN, WORKS, BROKEN } thousand = UNKNOWN; + if (thousand == UNKNOWN) { + bufs[slot][0] = '\0'; + snprintf(bufs[slot], sizeof(bufs[slot]), "%'" PRIu64, + UINT64_C(1)); + thousand = bufs[slot][0] == '1' ? WORKS : BROKEN; + } + + if (thousand == WORKS) + snprintf(bufs[slot], sizeof(bufs[slot]), "%'" PRIu64, value); + else + snprintf(bufs[slot], sizeof(bufs[slot]), "%" PRIu64, value); + + return bufs[slot]; +} + + +extern const char * +uint64_to_nicestr(uint64_t value, enum nicestr_unit unit_min, + enum nicestr_unit unit_max, bool always_also_bytes, + uint32_t slot) +{ + assert(unit_min <= unit_max); + assert(unit_max <= NICESTR_TIB); + + enum nicestr_unit unit = NICESTR_B; + const char *str; + + if ((unit_min == NICESTR_B && value < 10000) + || unit_max == NICESTR_B) { + // The value is shown as bytes. + str = uint64_to_str(value, slot); + } else { + // Scale the value to a nicer unit. Unless unit_min and + // unit_max limit us, we will show at most five significant + // digits with one decimal place. + double d = (double)(value); + do { + d /= 1024.0; + ++unit; + } while (unit < unit_min || (d > 9999.9 && unit < unit_max)); + + str = double_to_str(d); + } + + static const char suffix[5][4] = { "B", "KiB", "MiB", "GiB", "TiB" }; + + // Minimum buffer size: + // 26 2^64 with thousand separators + // 4 " KiB" + // 2 " (" + // 26 2^64 with thousand separators + // 3 " B)" + // 1 '\0' + // 62 Total + static char buf[4][64]; + char *pos = buf[slot]; + size_t left = sizeof(buf[slot]); + my_snprintf(&pos, &left, "%s %s", str, suffix[unit]); + + if (always_also_bytes && value >= 10000) + snprintf(pos, left, " (%s B)", uint64_to_str(value, slot)); + + return buf[slot]; +} + + +extern const char * +double_to_str(double value) +{ + static char buf[64]; + + static enum { UNKNOWN, WORKS, BROKEN } thousand = UNKNOWN; + if (thousand == UNKNOWN) { + buf[0] = '\0'; + snprintf(buf, sizeof(buf), "%'.1f", 2.0); + thousand = buf[0] == '2' ? WORKS : BROKEN; + } + + if (thousand == WORKS) + snprintf(buf, sizeof(buf), "%'.1f", value); + else + snprintf(buf, sizeof(buf), "%.1f", value); + + return buf; +} + + +extern void +my_snprintf(char **pos, size_t *left, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + const int len = vsnprintf(*pos, *left, fmt, ap); + va_end(ap); + + // If an error occurred, we want the caller to think that the whole + // buffer was used. This way no more data will be written to the + // buffer. We don't need better error handling here. + if (len < 0 || (size_t)(len) >= *left) { + *left = 0; + } else { + *pos += len; + *left -= len; + } + + return; +} + + +/* +/// \brief Simple quoting to get rid of ASCII control characters +/// +/// This is not so cool and locale-dependent, but should be good enough +/// At least we don't print any control characters on the terminal. +/// +extern char * +str_quote(const char *str) +{ + size_t dest_len = 0; + bool has_ctrl = false; + + while (str[dest_len] != '\0') + if (*(unsigned char *)(str + dest_len++) < 0x20) + has_ctrl = true; + + char *dest = malloc(dest_len + 1); + if (dest != NULL) { + if (has_ctrl) { + for (size_t i = 0; i < dest_len; ++i) + if (*(unsigned char *)(str + i) < 0x20) + dest[i] = '?'; + else + dest[i] = str[i]; + + dest[dest_len] = '\0'; + + } else { + // Usually there are no control characters, + // so we can optimize. + memcpy(dest, str, dest_len + 1); + } + } + + return dest; +} +*/ + + +extern bool +is_empty_filename(const char *filename) +{ + if (filename[0] == '\0') { + message_error(_("Empty filename, skipping")); + return true; + } + + return false; +} + + +extern bool +is_tty_stdin(void) +{ + const bool ret = isatty(STDIN_FILENO); + + if (ret) + message_error(_("Compressed data cannot be read from " + "a terminal")); + + return ret; +} + + +extern bool +is_tty_stdout(void) +{ + const bool ret = isatty(STDOUT_FILENO); + + if (ret) + message_error(_("Compressed data cannot be written to " + "a terminal")); + + return ret; +} diff --git a/src/xz/util.h b/src/xz/util.h new file mode 100644 index 000000000000..2e08b4a868ec --- /dev/null +++ b/src/xz/util.h @@ -0,0 +1,129 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file util.h +/// \brief Miscellaneous utility functions +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +/// \brief Safe malloc() that never returns NULL +/// +/// \note xmalloc(), xrealloc(), and xstrdup() must not be used when +/// there are files open for writing, that should be cleaned up +/// before exiting. +#define xmalloc(size) xrealloc(NULL, size) + + +/// \brief Safe realloc() that never returns NULL +extern void *xrealloc(void *ptr, size_t size); + + +/// \brief Safe strdup() that never returns NULL +extern char *xstrdup(const char *src); + + +/// \brief Fancy version of strtoull() +/// +/// \param name Name of the option to show in case of an error +/// \param value String containing the number to be parsed; may +/// contain suffixes "k", "M", "G", "Ki", "Mi", or "Gi" +/// \param min Minimum valid value +/// \param max Maximum valid value +/// +/// \return Parsed value that is in the range [min, max]. Does not return +/// if an error occurs. +/// +extern uint64_t str_to_uint64(const char *name, const char *value, + uint64_t min, uint64_t max); + + +/// \brief Round an integer up to the next full MiB and convert to MiB +/// +/// This is used when printing memory usage and limit. +extern uint64_t round_up_to_mib(uint64_t n); + + +/// \brief Convert uint64_t to a string +/// +/// Convert the given value to a string with locale-specific thousand +/// separators, if supported by the snprintf() implementation. The string +/// is stored into an internal static buffer indicated by the slot argument. +/// A pointer to the selected buffer is returned. +/// +/// This function exists, because non-POSIX systems don't support thousand +/// separator in format strings. Solving the problem in a simple way doesn't +/// work, because it breaks gettext (specifically, the xgettext tool). +extern const char *uint64_to_str(uint64_t value, uint32_t slot); + + +enum nicestr_unit { + NICESTR_B, + NICESTR_KIB, + NICESTR_MIB, + NICESTR_GIB, + NICESTR_TIB, +}; + + +/// \brief Convert uint64_t to a nice human readable string +/// +/// This is like uint64_to_str() but uses B, KiB, MiB, GiB, or TiB suffix +/// and optionally includes the exact size in parenthesis. +/// +/// \param value Value to be printed +/// \param unit_min Smallest unit to use. This and unit_max are used +/// e.g. when showing the progress indicator to force +/// the unit to MiB. +/// \param unit_max Biggest unit to use. assert(unit_min <= unit_max). +/// \param always_also_bytes +/// Show also the exact byte value in parenthesis +/// if the nicely formatted string uses bigger unit +/// than bytes. +/// \param slot Which static buffer to use to hold the string. +/// This is shared with uint64_to_str(). +/// +/// \return Pointer to statically allocated buffer containing the string. +/// +/// \note This uses double_to_str() internally so the static buffer +/// in double_to_str() will be overwritten. +/// +extern const char *uint64_to_nicestr(uint64_t value, + enum nicestr_unit unit_min, enum nicestr_unit unit_max, + bool always_also_bytes, uint32_t slot); + + +/// \brief Convert double to a string with one decimal place +/// +/// This is like uint64_to_str() except that this converts a double and +/// uses exactly one decimal place. +extern const char *double_to_str(double value); + + +/// \brief Wrapper for snprintf() to help constructing a string in pieces +/// +/// A maximum of *left bytes is written starting from *pos. *pos and *left +/// are updated accordingly. +extern void my_snprintf(char **pos, size_t *left, const char *fmt, ...) + lzma_attribute((format(printf, 3, 4))); + + +/// \brief Check if filename is empty and print an error message +extern bool is_empty_filename(const char *filename); + + +/// \brief Test if stdin is a terminal +/// +/// If stdin is a terminal, an error message is printed and exit status set +/// to EXIT_ERROR. +extern bool is_tty_stdin(void); + + +/// \brief Test if stdout is a terminal +/// +/// If stdout is a terminal, an error message is printed and exit status set +/// to EXIT_ERROR. +extern bool is_tty_stdout(void); diff --git a/src/xz/xz.1 b/src/xz/xz.1 new file mode 100644 index 000000000000..b60353d0e927 --- /dev/null +++ b/src/xz/xz.1 @@ -0,0 +1,1351 @@ +'\" t +.\" +.\" Author: Lasse Collin +.\" +.\" This file has been put into the public domain. +.\" You can do whatever you want with this file. +.\" +.TH XZ 1 "2010-03-07" "Tukaani" "XZ Utils" +.SH NAME +xz, unxz, xzcat, lzma, unlzma, lzcat \- Compress or decompress .xz and .lzma files +.SH SYNOPSIS +.B xz +.RI [ option ]... +.RI [ file ]... +.PP +.B unxz +is equivalent to +.BR "xz \-\-decompress" . +.br +.B xzcat +is equivalent to +.BR "xz \-\-decompress \-\-stdout" . +.br +.B lzma +is equivalent to +.BR "xz \-\-format=lzma" . +.br +.B unlzma +is equivalent to +.BR "xz \-\-format=lzma \-\-decompress" . +.br +.B lzcat +is equivalent to +.BR "xz \-\-format=lzma \-\-decompress \-\-stdout" . +.PP +When writing scripts that need to decompress files, it is recommended to +always use the name +.B xz +with appropriate arguments +.RB ( "xz \-d" +or +.BR "xz \-dc" ) +instead of the names +.B unxz +and +.BR xzcat. +.SH DESCRIPTION +.B xz +is a general-purpose data compression tool with command line syntax similar to +.BR gzip (1) +and +.BR bzip2 (1). +The native file format is the +.B .xz +format, but also the legacy +.B .lzma +format and raw compressed streams with no container format headers +are supported. +.PP +.B xz +compresses or decompresses each +.I file +according to the selected operation mode. +If no +.I files +are given or +.I file +is +.BR \- , +.B xz +reads from standard input and writes the processed data to standard output. +.B xz +will refuse (display an error and skip the +.IR file ) +to write compressed data to standard output if it is a terminal. Similarly, +.B xz +will refuse to read compressed data from standard input if it is a terminal. +.PP +Unless +.B \-\-stdout +is specified, +.I files +other than +.B \- +are written to a new file whose name is derived from the source +.I file +name: +.IP \(bu 3 +When compressing, the suffix of the target file format +.RB ( .xz +or +.BR .lzma ) +is appended to the source filename to get the target filename. +.IP \(bu 3 +When decompressing, the +.B .xz +or +.B .lzma +suffix is removed from the filename to get the target filename. +.B xz +also recognizes the suffixes +.B .txz +and +.BR .tlz , +and replaces them with the +.B .tar +suffix. +.PP +If the target file already exists, an error is displayed and the +.I file +is skipped. +.PP +Unless writing to standard output, +.B xz +will display a warning and skip the +.I file +if any of the following applies: +.IP \(bu 3 +.I File +is not a regular file. Symbolic links are not followed, thus they +are not considered to be regular files. +.IP \(bu 3 +.I File +has more than one hard link. +.IP \(bu 3 +.I File +has setuid, setgid, or sticky bit set. +.IP \(bu 3 +The operation mode is set to compress, and the +.I file +already has a suffix of the target file format +.RB ( .xz +or +.B .txz +when compressing to the +.B .xz +format, and +.B .lzma +or +.B .tlz +when compressing to the +.B .lzma +format). +.IP \(bu 3 +The operation mode is set to decompress, and the +.I file +doesn't have a suffix of any of the supported file formats +.RB ( .xz , +.BR .txz , +.BR .lzma , +or +.BR .tlz ). +.PP +After successfully compressing or decompressing the +.IR file , +.B xz +copies the owner, group, permissions, access time, and modification time +from the source +.I file +to the target file. If copying the group fails, the permissions are modified +so that the target file doesn't become accessible to users who didn't have +permission to access the source +.IR file . +.B xz +doesn't support copying other metadata like access control lists +or extended attributes yet. +.PP +Once the target file has been successfully closed, the source +.I file +is removed unless +.B \-\-keep +was specified. The source +.I file +is never removed if the output is written to standard output. +.PP +Sending +.B SIGINFO +or +.B SIGUSR1 +to the +.B xz +process makes it print progress information to standard error. +This has only limited use since when standard error is a terminal, using +.B \-\-verbose +will display an automatically updating progress indicator. +.SS "Memory usage" +The memory usage of +.B xz +varies from a few hundred kilobytes to several gigabytes depending on +the compression settings. The settings used when compressing a file +affect also the memory usage of the decompressor. Typically the decompressor +needs only 5\ % to 20\ % of the amount of RAM that the compressor needed when +creating the file. Still, the worst-case memory usage of the decompressor +is several gigabytes. +.PP +To prevent uncomfortable surprises caused by huge memory usage, +.B xz +has a built-in memory usage limiter. While some operating systems provide +ways to limit the memory usage of processes, relying on it wasn't deemed +to be flexible enough. The default limit depends on the total amount of +physical RAM: +.IP \(bu 3 +If 40\ % of RAM is at least 80 MiB, 40\ % of RAM is used as the limit. +.IP \(bu 3 +If 80\ % of RAM is over 80 MiB, 80 MiB is used as the limit. +.IP \(bu 3 +Otherwise 80\ % of RAM is used as the limit. +.PP +When compressing, if the selected compression settings exceed the memory +usage limit, the settings are automatically adjusted downwards and a notice +about this is displayed. As an exception, if the memory usage limit is +exceeded when compressing with +.BR \-\-format=raw , +an error is displayed and +.B xz +will exit with exit status +.BR 1 . +.PP +If source +.I file +cannot be decompressed without exceeding the memory usage limit, an error +message is displayed and the file is skipped. Note that compressed files +may contain many blocks, which may have been compressed with different +settings. Typically all blocks will have roughly the same memory requirements, +but it is possible that a block later in the file will exceed the memory usage +limit, and an error about too low memory usage limit gets displayed after some +data has already been decompressed. +.PP +The absolute value of the active memory usage limit can be seen with +.B \-\-info-memory +or near the bottom of the output of +.BR \-\-long\-help . +The default limit can be overridden with +\fB\-\-memory=\fIlimit\fR. +.SH OPTIONS +.SS "Integer suffixes and special values" +In most places where an integer argument is expected, an optional suffix +is supported to easily indicate large integers. There must be no space +between the integer and the suffix. +.TP +.B KiB +The integer is multiplied by 1,024 (2^10). Also +.BR Ki , +.BR k , +.BR kB , +.BR K , +and +.B KB +are accepted as synonyms for +.BR KiB . +.TP +.B MiB +The integer is multiplied by 1,048,576 (2^20). Also +.BR Mi , +.BR m , +.BR M , +and +.B MB +are accepted as synonyms for +.BR MiB . +.TP +.B GiB +The integer is multiplied by 1,073,741,824 (2^30). Also +.BR Gi , +.BR g , +.BR G , +and +.B GB +are accepted as synonyms for +.BR GiB . +.PP +A special value +.B max +can be used to indicate the maximum integer value supported by the option. +.SS "Operation mode" +If multiple operation mode options are given, the last one takes effect. +.TP +.BR \-z ", " \-\-compress +Compress. This is the default operation mode when no operation mode option +is specified, and no other operation mode is implied from the command name +(for example, +.B unxz +implies +.BR \-\-decompress ). +.TP +.BR \-d ", " \-\-decompress ", " \-\-uncompress +Decompress. +.TP +.BR \-t ", " \-\-test +Test the integrity of compressed +.IR files . +No files are created or removed. This option is equivalent to +.B "\-\-decompress \-\-stdout" +except that the decompressed data is discarded instead of being +written to standard output. +.TP +.BR \-l ", " \-\-list +View information about the compressed files. No uncompressed output is +produced, and no files are created or removed. In list mode, the program +cannot read the compressed data from standard input or from other +unseekable sources. +.IP +.B "This feature has not been implemented yet." +.SS "Operation modifiers" +.TP +.BR \-k ", " \-\-keep +Keep (don't delete) the input files. +.TP +.BR \-f ", " \-\-force +This option has several effects: +.RS +.IP \(bu 3 +If the target file already exists, delete it before compressing or +decompressing. +.IP \(bu 3 +Compress or decompress even if the input is a symbolic link to a regular file, +has more than one hard link, or has setuid, setgid, or sticky bit set. +The setuid, setgid, and sticky bits are not copied to the target file. +.IP \(bu 3 +If combined with +.B \-\-decompress +.BR \-\-stdout +and +.B xz +doesn't recognize the type of the source file, +.B xz +will copy the source file as is to standard output. This allows using +.B xzcat +.B \--force +like +.BR cat (1) +for files that have not been compressed with +.BR xz . +Note that in future, +.B xz +might support new compressed file formats, which may make +.B xz +decompress more types of files instead of copying them as is to +standard output. +.BI \-\-format= format +can be used to restrict +.B xz +to decompress only a single file format. +.RE +.TP +.BR \-c ", " \-\-stdout ", " \-\-to-stdout +Write the compressed or decompressed data to standard output instead of +a file. This implies +.BR \-\-keep . +.TP +.B \-\-no\-sparse +Disable creation of sparse files. By default, if decompressing into +a regular file, +.B xz +tries to make the file sparse if the decompressed data contains long +sequences of binary zeros. It works also when writing to standard output +as long as standard output is connected to a regular file, and certain +additional conditions are met to make it safe. Creating sparse files may +save disk space and speed up the decompression by reducing the amount of +disk I/O. +.TP +\fB\-S\fR \fI.suf\fR, \fB\-\-suffix=\fI.suf +When compressing, use +.I .suf +as the suffix for the target file instead of +.B .xz +or +.BR .lzma . +If not writing to standard output and the source file already has the suffix +.IR .suf , +a warning is displayed and the file is skipped. +.IP +When decompressing, recognize also files with the suffix +.I .suf +in addition to files with the +.BR .xz , +.BR .txz , +.BR .lzma , +or +.B .tlz +suffix. If the source file has the suffix +.IR .suf , +the suffix is removed to get the target filename. +.IP +When compressing or decompressing raw streams +.RB ( \-\-format=raw ), +the suffix must always be specified unless writing to standard output, +because there is no default suffix for raw streams. +.TP +\fB\-\-files\fR[\fB=\fIfile\fR] +Read the filenames to process from +.IR file ; +if +.I file +is omitted, filenames are read from standard input. Filenames must be +terminated with the newline character. A dash +.RB ( \- ) +is taken as a regular filename; it doesn't mean standard input. +If filenames are given also as command line arguments, they are +processed before the filenames read from +.IR file . +.TP +\fB\-\-files0\fR[\fB=\fIfile\fR] +This is identical to \fB\-\-files\fR[\fB=\fIfile\fR] except that the +filenames must be terminated with the null character. +.SS "Basic file format and compression options" +.TP +\fB\-F\fR \fIformat\fR, \fB\-\-format=\fIformat +Specify the file format to compress or decompress: +.RS +.IP \(bu 3 +.BR auto : +This is the default. When compressing, +.B auto +is equivalent to +.BR xz . +When decompressing, the format of the input file is automatically detected. +Note that raw streams (created with +.BR \-\-format=raw ) +cannot be auto-detected. +.IP \(bu 3 +.BR xz : +Compress to the +.B .xz +file format, or accept only +.B .xz +files when decompressing. +.IP \(bu 3 +.B lzma +or +.BR alone : +Compress to the legacy +.B .lzma +file format, or accept only +.B .lzma +files when decompressing. The alternative name +.B alone +is provided for backwards compatibility with LZMA Utils. +.IP \(bu 3 +.BR raw : +Compress or uncompress a raw stream (no headers). This is meant for advanced +users only. To decode raw streams, you need to set not only +.B \-\-format=raw +but also specify the filter chain, which would normally be stored in the +container format headers. +.RE +.TP +\fB\-C\fR \fIcheck\fR, \fB\-\-check=\fIcheck +Specify the type of the integrity check, which is calculated from the +uncompressed data. This option has an effect only when compressing into the +.B .xz +format; the +.B .lzma +format doesn't support integrity checks. +The integrity check (if any) is verified when the +.B .xz +file is decompressed. +.IP +Supported +.I check +types: +.RS +.IP \(bu 3 +.BR none : +Don't calculate an integrity check at all. This is usually a bad idea. This +can be useful when integrity of the data is verified by other means anyway. +.IP \(bu 3 +.BR crc32 : +Calculate CRC32 using the polynomial from IEEE-802.3 (Ethernet). +.IP \(bu 3 +.BR crc64 : +Calculate CRC64 using the polynomial from ECMA-182. This is the default, since +it is slightly better than CRC32 at detecting damaged files and the speed +difference is negligible. +.IP \(bu 3 +.BR sha256 : +Calculate SHA-256. This is somewhat slower than CRC32 and CRC64. +.RE +.IP +Integrity of the +.B .xz +headers is always verified with CRC32. It is not possible to change or +disable it. +.TP +.BR \-0 " ... " \-9 +Select compression preset. If a preset level is specified multiple times, +the last one takes effect. +.IP +The compression preset levels can be categorised roughly into three +categories: +.RS +.IP "\fB\-0\fR ... \fB\-2" +Fast presets with relatively low memory usage. +.B \-1 +and +.B \-2 +should give compression speed and ratios comparable to +.B "bzip2 \-1" +and +.BR "bzip2 \-9" , +respectively. +Currently +.B \-0 +is not very good (not much faster than +.B \-1 +but much worse compression). In future, +.B \-0 +may be indicate some fast algorithm instead of LZMA2. +.IP "\fB\-3\fR ... \fB\-5" +Good compression ratio with low to medium memory usage. +These are significantly slower than levels 0\-2. +.IP "\fB\-6\fR ... \fB\-9" +Excellent compression with medium to high memory usage. These are also +slower than the lower preset levels. The default is +.BR \-6 . +Unless you want to maximize the compression ratio, you probably don't want +a higher preset level than +.B \-7 +due to speed and memory usage. +.RE +.IP +The exact compression settings (filter chain) used by each preset may +vary between +.B xz +versions. The settings may also vary between files being compressed, if +.B xz +determines that modified settings will probably give better compression +ratio without significantly affecting compression time or memory usage. +.IP +Because the settings may vary, the memory usage may vary too. The following +table lists the maximum memory usage of each preset level, which won't be +exceeded even in future versions of +.BR xz . +.IP +.B "FIXME: The table below is just a rough idea." +.RS +.RS +.TS +tab(;); +c c c +n n n. +Preset;Compression;Decompression +\-0;6 MiB;1 MiB +\-1;6 MiB;1 MiB +\-2;10 MiB;1 MiB +\-3;20 MiB;2 MiB +\-4;30 MiB;3 MiB +\-5;60 MiB;6 MiB +\-6;100 MiB;10 MiB +\-7;200 MiB;20 MiB +\-8;400 MiB;40 MiB +\-9;800 MiB;80 MiB +.TE +.RE +.RE +.IP +When compressing, +.B xz +automatically adjusts the compression settings downwards if +the memory usage limit would be exceeded, so it is safe to specify +a high preset level even on systems that don't have lots of RAM. +.TP +.BR \-\-fast " and " \-\-best +These are somewhat misleading aliases for +.B \-0 +and +.BR \-9 , +respectively. +These are provided only for backwards compatibility with LZMA Utils. +Avoid using these options. +.IP +Especially the name of +.B \-\-best +is misleading, because the definition of best depends on the input data, +and that usually people don't want the very best compression ratio anyway, +because it would be very slow. +.TP +.BR \-e ", " \-\-extreme +Modify the compression preset (\fB\-0\fR ... \fB\-9\fR) so that a little bit +better compression ratio can be achieved without increasing memory usage +of the compressor or decompressor (exception: compressor memory usage may +increase a little with presets \fB\-0\fR ... \fB\-2\fR). The downside is that +the compression time will increase dramatically (it can easily double). +.TP +\fB\-M\fR \fIlimit\fR, \fB\-\-memory=\fIlimit +Set the memory usage limit. If this option is specified multiple times, +the last one takes effect. The +.I limit +can be specified in multiple ways: +.RS +.IP \(bu 3 +The +.I limit +can be an absolute value in bytes. Using an integer suffix like +.B MiB +can be useful. Example: +.B "\-\-memory=80MiB" +.IP \(bu 3 +The +.I limit +can be specified as a percentage of physical RAM. Example: +.B "\-\-memory=70%" +.IP \(bu 3 +The +.I limit +can be reset back to its default value by setting it to +.BR 0 . +See the section +.B "Memory usage" +for how the default limit is defined. +.IP \(bu 3 +The memory usage limiting can be effectively disabled by setting +.I limit +to +.BR max . +This isn't recommended. It's usually better to use, for example, +.BR \-\-memory=90% . +.RE +.IP +The current +.I limit +can be seen near the bottom of the output of the +.B \-\-long-help +option. +.TP +\fB\-T\fR \fIthreads\fR, \fB\-\-threads=\fIthreads +Specify the maximum number of worker threads to use. The default is +the number of available CPU cores. You can see the current value of +.I threads +near the end of the output of the +.B \-\-long\-help +option. +.IP +The actual number of worker threads can be less than +.I threads +if using more threads would exceed the memory usage limit. +In addition to CPU-intensive worker threads, +.B xz +may use a few auxiliary threads, which don't use a lot of CPU time. +.IP +.B "Multithreaded compression and decompression are not implemented yet," +.B "so this option has no effect for now." +.SS Custom compressor filter chains +A custom filter chain allows specifying the compression settings in detail +instead of relying on the settings associated to the preset levels. +When a custom filter chain is specified, the compression preset level options +(\fB\-0\fR ... \fB\-9\fR and \fB\-\-extreme\fR) are silently ignored. +.PP +A filter chain is comparable to piping on the UN*X command line. +When compressing, the uncompressed input goes to the first filter, whose +output goes to the next filter (if any). The output of the last filter +gets written to the compressed file. The maximum number of filters in +the chain is four, but typically a filter chain has only one or two filters. +.PP +Many filters have limitations where they can be in the filter chain: +some filters can work only as the last filter in the chain, some only +as a non-last filter, and some work in any position in the chain. Depending +on the filter, this limitation is either inherent to the filter design or +exists to prevent security issues. +.PP +A custom filter chain is specified by using one or more filter options in +the order they are wanted in the filter chain. That is, the order of filter +options is significant! When decoding raw streams +.RB ( \-\-format=raw ), +the filter chain is specified in the same order as it was specified when +compressing. +.PP +Filters take filter-specific +.I options +as a comma-separated list. Extra commas in +.I options +are ignored. Every option has a default value, so you need to +specify only those you want to change. +.TP +\fB\-\-lzma1\fR[\fB=\fIoptions\fR], \fB\-\-lzma2\fR[\fB=\fIoptions\fR] +Add LZMA1 or LZMA2 filter to the filter chain. These filter can be used +only as the last filter in the chain. +.IP +LZMA1 is a legacy filter, which is supported almost solely due to the legacy +.B .lzma +file format, which supports only LZMA1. LZMA2 is an updated +version of LZMA1 to fix some practical issues of LZMA1. The +.B .xz +format uses LZMA2, and doesn't support LZMA1 at all. Compression speed and +ratios of LZMA1 and LZMA2 are practically the same. +.IP +LZMA1 and LZMA2 share the same set of +.IR options : +.RS +.TP +.BI preset= preset +Reset all LZMA1 or LZMA2 +.I options +to +.IR preset . +.I Preset +consist of an integer, which may be followed by single-letter preset +modifiers. The integer can be from +.B 0 +to +.BR 9 , +matching the command line options \fB\-0\fR ... \fB\-9\fR. +The only supported modifier is currently +.BR e , +which matches +.BR \-\-extreme . +.IP +The default +.I preset +is +.BR 6 , +from which the default values for the rest of the LZMA1 or LZMA2 +.I options +are taken. +.TP +.BI dict= size +Dictionary (history buffer) size indicates how many bytes of the recently +processed uncompressed data is kept in memory. One method to reduce size of +the uncompressed data is to store distance-length pairs, which +indicate what data to repeat from the dictionary buffer. The bigger +the dictionary, the better the compression ratio usually is, +but dictionaries bigger than the uncompressed data are waste of RAM. +.IP +Typical dictionary size is from 64 KiB to 64 MiB. The minimum is 4 KiB. +The maximum for compression is currently 1.5 GiB. The decompressor already +supports dictionaries up to one byte less than 4 GiB, which is the +maximum for LZMA1 and LZMA2 stream formats. +.IP +Dictionary size has the biggest effect on compression ratio. +Dictionary size and match finder together determine the memory usage of +the LZMA1 or LZMA2 encoder. The same dictionary size is required +for decompressing that was used when compressing, thus the memory usage of +the decoder is determined by the dictionary size used when compressing. +.TP +.BI lc= lc +Specify the number of literal context bits. The minimum is +.B 0 +and the maximum is +.BR 4 ; +the default is +.BR 3 . +In addition, the sum of +.I lc +and +.I lp +must not exceed +.BR 4 . +.TP +.BI lp= lp +Specify the number of literal position bits. The minimum is +.B 0 +and the maximum is +.BR 4 ; +the default is +.BR 0 . +.TP +.BI pb= pb +Specify the number of position bits. The minimum is +.B 0 +and the maximum is +.BR 4 ; +the default is +.BR 2 . +.TP +.BI mode= mode +Compression +.I mode +specifies the function used to analyze the data produced by the match finder. +Supported +.I modes +are +.B fast +and +.BR normal . +The default is +.B fast +for +.I presets +.BR 0 \- 2 +and +.B normal +for +.I presets +.BR 3 \- 9 . +.TP +.BI mf= mf +Match finder has a major effect on encoder speed, memory usage, and +compression ratio. Usually Hash Chain match finders are faster than +Binary Tree match finders. Hash Chains are usually used together with +.B mode=fast +and Binary Trees with +.BR mode=normal . +The memory usage formulas are only rough estimates, +which are closest to reality when +.I dict +is a power of two. +.RS +.TP +.B hc3 +Hash Chain with 2- and 3-byte hashing +.br +Minimum value for +.IR nice : +3 +.br +Memory usage: +.I dict +* 7.5 (if +.I dict +<= 16 MiB); +.br +.I dict +* 5.5 + 64 MiB (if +.I dict +> 16 MiB) +.TP +.B hc4 +Hash Chain with 2-, 3-, and 4-byte hashing +.br +Minimum value for +.IR nice : +4 +.br +Memory usage: +.I dict +* 7.5 +.TP +.B bt2 +Binary Tree with 2-byte hashing +.br +Minimum value for +.IR nice : +2 +.br +Memory usage: +.I dict +* 9.5 +.TP +.B bt3 +Binary Tree with 2- and 3-byte hashing +.br +Minimum value for +.IR nice : +3 +.br +Memory usage: +.I dict +* 11.5 (if +.I dict +<= 16 MiB); +.br +.I dict +* 9.5 + 64 MiB (if +.I dict +> 16 MiB) +.TP +.B bt4 +Binary Tree with 2-, 3-, and 4-byte hashing +.br +Minimum value for +.IR nice : +4 +.br +Memory usage: +.I dict +* 11.5 +.RE +.TP +.BI nice= nice +Specify what is considered to be a nice length for a match. Once a match +of at least +.I nice +bytes is found, the algorithm stops looking for possibly better matches. +.IP +.I nice +can be 2\-273 bytes. Higher values tend to give better compression ratio +at expense of speed. The default depends on the +.I preset +level. +.TP +.BI depth= depth +Specify the maximum search depth in the match finder. The default is the +special value +.BR 0 , +which makes the compressor determine a reasonable +.I depth +from +.I mf +and +.IR nice . +.IP +Using very high values for +.I depth +can make the encoder extremely slow with carefully crafted files. +Avoid setting the +.I depth +over 1000 unless you are prepared to interrupt the compression in case it +is taking too long. +.RE +.IP +When decoding raw streams +.RB ( \-\-format=raw ), +LZMA2 needs only the value of +.BR dict . +LZMA1 needs also +.BR lc , +.BR lp , +and +.BR pb. +.TP +\fB\-\-x86\fR[\fB=\fIoptions\fR] +.TP +\fB\-\-powerpc\fR[\fB=\fIoptions\fR] +.TP +\fB\-\-ia64\fR[\fB=\fIoptions\fR] +.TP +\fB\-\-arm\fR[\fB=\fIoptions\fR] +.TP +\fB\-\-armthumb\fR[\fB=\fIoptions\fR] +.TP +\fB\-\-sparc\fR[\fB=\fIoptions\fR] +Add a branch/call/jump (BCJ) filter to the filter chain. These filters +can be used only as non-last filter in the filter chain. +.IP +A BCJ filter converts relative addresses in the machine code to their +absolute counterparts. This doesn't change the size of the data, but +it increases redundancy, which allows e.g. LZMA2 to get better +compression ratio. +.IP +The BCJ filters are always reversible, so using a BCJ filter for wrong +type of data doesn't cause any data loss. However, applying a BCJ filter +for wrong type of data is a bad idea, because it tends to make the +compression ratio worse. +.IP +Different instruction sets have have different alignment: +.RS +.RS +.TS +tab(;); +l n l +l n l. +Filter;Alignment;Notes +x86;1;32-bit and 64-bit x86 +PowerPC;4;Big endian only +ARM;4;Little endian only +ARM-Thumb;2;Little endian only +IA-64;16;Big or little endian +SPARC;4;Big or little endian +.TE +.RE +.RE +.IP +Since the BCJ-filtered data is usually compressed with LZMA2, the compression +ratio may be improved slightly if the LZMA2 options are set to match the +alignment of the selected BCJ filter. For example, with the IA-64 filter, +it's good to set +.B pb=4 +with LZMA2 (2^4=16). The x86 filter is an exception; it's usually good to +stick to LZMA2's default four-byte alignment when compressing x86 executables. +.IP +All BCJ filters support the same +.IR options : +.RS +.TP +.BI start= offset +Specify the start +.I offset +that is used when converting between relative and absolute addresses. +The +.I offset +must be a multiple of the alignment of the filter (see the table above). +The default is zero. In practice, the default is good; specifying +a custom +.I offset +is almost never useful. +.IP +Specifying a non-zero start +.I offset +is probably useful only if the executable has multiple sections, and there +are many cross-section jumps or calls. Applying a BCJ filter separately for +each section with proper start offset and then compressing the result as +a single chunk may give some improvement in compression ratio compared +to applying the BCJ filter with the default +.I offset +for the whole executable. +.RE +.TP +\fB\-\-delta\fR[\fB=\fIoptions\fR] +Add Delta filter to the filter chain. The Delta filter +can be used only as non-last filter in the filter chain. +.IP +Currently only simple byte-wise delta calculation is supported. It can +be useful when compressing e.g. uncompressed bitmap images or uncompressed +PCM audio. However, special purpose algorithms may give significantly better +results than Delta + LZMA2. This is true especially with audio, which +compresses faster and better e.g. with FLAC. +.IP +Supported +.IR options : +.RS +.TP +.BI dist= distance +Specify the +.I distance +of the delta calculation as bytes. +.I distance +must be 1\-256. The default is 1. +.IP +For example, with +.B dist=2 +and eight-byte input A1 B1 A2 B3 A3 B5 A4 B7, the output will be +A1 B1 01 02 01 02 01 02. +.RE +.SS "Other options" +.TP +.BR \-q ", " \-\-quiet +Suppress warnings and notices. Specify this twice to suppress errors too. +This option has no effect on the exit status. That is, even if a warning +was suppressed, the exit status to indicate a warning is still used. +.TP +.BR \-v ", " \-\-verbose +Be verbose. If standard error is connected to a terminal, +.B xz +will display a progress indicator. +Specifying +.B \-\-verbose +twice will give even more verbose output (useful mostly for debugging). +.IP +The progress indicator shows the following information: +.RS +.IP \(bu 3 +Completion percentage is shown if the size of the input file is known. +That is, percentage cannot be shown in pipes. +.IP \(bu 3 +Amount of compressed data produced (compressing) or consumed (decompressing). +.IP \(bu 3 +Amount of uncompressed data consumed (compressing) or produced +(decompressing). +.IP \(bu 3 +Compression ratio, which is calculated by dividing the amount of +compressed data processed so far by the amount of uncompressed data +processed so far. +.IP \(bu 3 +Compression or decompression speed. This is measured as the amount of +uncompressed data consumed (compression) or produced (decompression) +per second. It is shown once a few seconds have passed since +.B xz +started processing the file. +.IP \(bu 3 +Elapsed time or estimated time remaining. +Elapsed time is displayed in the format M:SS or H:MM:SS. +The estimated remaining time is displayed in a less precise format +which never has colons, for example, 2 min 30 s. The estimate can +be shown only when the size of the input file is known and a couple of +seconds have already passed since +.B xz +started processing the file. +.RE +.IP +When standard error is not a terminal, +.B \-\-verbose +will make +.B xz +print the filename, compressed size, uncompressed size, compression ratio, +speed, and elapsed time on a single line to standard error after +compressing or decompressing the file. If operating took at least a few +seconds, also the speed and elapsed time are printed. If the operation +didn't finish, for example due to user interruption, also the completion +percentage is printed if the size of the input file is known. +.TP +.BR \-Q ", " \-\-no\-warn +Don't set the exit status to +.B 2 +even if a condition worth a warning was detected. This option doesn't affect +the verbosity level, thus both +.B \-\-quiet +and +.B \-\-no\-warn +have to be used to not display warnings and to not alter the exit status. +.TP +.B \-\-robot +Print messages in a machine-parsable format. This is intended to ease +writing frontends that want to use +.B xz +instead of liblzma, which may be the case with various scripts. The output +with this option enabled is meant to be stable across +.B xz +releases. Currently +.B \-\-robot +is implemented only for +.B \-\-info\-memory +and +.BR \-\-version , +but the idea is to make it usable for actual compression +and decompression too. +.TP +.BR \-\-info-memory +Display the current memory usage limit in human-readable format on +a single line, and exit successfully. To see how much RAM +.B xz +thinks your system has, use +.BR "\-\-memory=100% \-\-info\-memory" . +To get machine-parsable output +(memory usage limit as bytes without thousand separators), specify +.B \-\-robot +before +.BR \-\-info-memory . +.TP +.BR \-h ", " \-\-help +Display a help message describing the most commonly used options, +and exit successfully. +.TP +.BR \-H ", " \-\-long\-help +Display a help message describing all features of +.BR xz , +and exit successfully +.TP +.BR \-V ", " \-\-version +Display the version number of +.B xz +and liblzma in human readable format. To get machine-parsable output, specify +.B \-\-robot +before +.BR \-\-version . +.SH "EXIT STATUS" +.TP +.B 0 +All is good. +.TP +.B 1 +An error occurred. +.TP +.B 2 +Something worth a warning occurred, but no actual errors occurred. +.PP +Notices (not warnings or errors) printed on standard error don't affect +the exit status. +.SH ENVIRONMENT +.TP +.B XZ_OPT +A space-separated list of options is parsed from +.B XZ_OPT +before parsing the options given on the command line. Note that only +options are parsed from +.BR XZ_OPT ; +all non-options are silently ignored. Parsing is done with +.BR getopt_long (3) +which is used also for the command line arguments. +.SH "LZMA UTILS COMPATIBILITY" +The command line syntax of +.B xz +is practically a superset of +.BR lzma , +.BR unlzma , +and +.BR lzcat +as found from LZMA Utils 4.32.x. In most cases, it is possible to replace +LZMA Utils with XZ Utils without breaking existing scripts. There are some +incompatibilities though, which may sometimes cause problems. +.SS "Compression preset levels" +The numbering of the compression level presets is not identical in +.B xz +and LZMA Utils. +The most important difference is how dictionary sizes are mapped to different +presets. Dictionary size is roughly equal to the decompressor memory usage. +.RS +.TS +tab(;); +c c c +c n n. +Level;xz;LZMA Utils +\-1;64 KiB;64 KiB +\-2;512 KiB;1 MiB +\-3;1 MiB;512 KiB +\-4;2 MiB;1 MiB +\-5;4 MiB;2 MiB +\-6;8 MiB;4 MiB +\-7;16 MiB;8 MiB +\-8;32 MiB;16 MiB +\-9;64 MiB;32 MiB +.TE +.RE +.PP +The dictionary size differences affect the compressor memory usage too, +but there are some other differences between LZMA Utils and XZ Utils, which +make the difference even bigger: +.RS +.TS +tab(;); +c c c +c n n. +Level;xz;LZMA Utils 4.32.x +\-1;2 MiB;2 MiB +\-2;5 MiB;12 MiB +\-3;13 MiB;12 MiB +\-4;25 MiB;16 MiB +\-5;48 MiB;26 MiB +\-6;94 MiB;45 MiB +\-7;186 MiB;83 MiB +\-8;370 MiB;159 MiB +\-9;674 MiB;311 MiB +.TE +.RE +.PP +The default preset level in LZMA Utils is +.B \-7 +while in XZ Utils it is +.BR \-6 , +so both use 8 MiB dictionary by default. +.SS "Streamed vs. non-streamed .lzma files" +Uncompressed size of the file can be stored in the +.B .lzma +header. LZMA Utils does that when compressing regular files. +The alternative is to mark that uncompressed size is unknown and +use end of payload marker to indicate where the decompressor should stop. +LZMA Utils uses this method when uncompressed size isn't known, which is +the case for example in pipes. +.PP +.B xz +supports decompressing +.B .lzma +files with or without end of payload marker, but all +.B .lzma +files created by +.B xz +will use end of payload marker and have uncompressed size marked as unknown +in the +.B .lzma +header. This may be a problem in some (uncommon) situations. For example, a +.B .lzma +decompressor in an embedded device might work only with files that have known +uncompressed size. If you hit this problem, you need to use LZMA Utils or +LZMA SDK to create +.B .lzma +files with known uncompressed size. +.SS "Unsupported .lzma files" +The +.B .lzma +format allows +.I lc +values up to 8, and +.I lp +values up to 4. LZMA Utils can decompress files with any +.I lc +and +.IR lp , +but always creates files with +.B lc=3 +and +.BR lp=0 . +Creating files with other +.I lc +and +.I lp +is possible with +.B xz +and with LZMA SDK. +.PP +The implementation of the LZMA1 filter in liblzma requires +that the sum of +.I lc +and +.I lp +must not exceed 4. Thus, +.B .lzma +files which exceed this limitation, cannot be decompressed with +.BR xz . +.PP +LZMA Utils creates only +.B .lzma +files which have dictionary size of +.RI "2^" n +(a power of 2), but accepts files with any dictionary size. +liblzma accepts only +.B .lzma +files which have dictionary size of +.RI "2^" n +or +.RI "2^" n " + 2^(" n "\-1)." +This is to decrease false positives when detecting +.B .lzma +files. +.PP +These limitations shouldn't be a problem in practice, since practically all +.B .lzma +files have been compressed with settings that liblzma will accept. +.SS "Trailing garbage" +When decompressing, LZMA Utils silently ignore everything after the first +.B .lzma +stream. In most situations, this is a bug. This also means that LZMA Utils +don't support decompressing concatenated +.B .lzma +files. +.PP +If there is data left after the first +.B .lzma +stream, +.B xz +considers the file to be corrupt. This may break obscure scripts which have +assumed that trailing garbage is ignored. +.SH NOTES +.SS Compressed output may vary +The exact compressed output produced from the same uncompressed input file +may vary between XZ Utils versions even if compression options are identical. +This is because the encoder can be improved (faster or better compression) +without affecting the file format. The output can vary even between different +builds of the same XZ Utils version, if different build options are used. +.PP +The above means that implementing +.B \-\-rsyncable +to create rsyncable +.B .xz +files is not going to happen without freezing a part of the encoder +implementation, which can then be used with +.BR \-\-rsyncable . +.SS Embedded .xz decompressors +Embedded +.B .xz +decompressor implementations like XZ Embedded don't necessarily support files +created with +.I check +types other than +.B none +and +.BR crc32 . +Since the default is \fB\-\-check=\fIcrc64\fR, you must use +.B \-\-check=none +or +.B \-\-check=crc32 +when creating files for embedded systems. +.PP +Outside embedded systems, all +.B .xz +format decompressors support all the +.I check +types, or at least are able to decompress the file without verifying the +integrity check if the particular +.I check +is not supported. +.PP +XZ Embedded supports BCJ filters, but only with the default start offset. +.SH "SEE ALSO" +.BR xzdec (1), +.BR gzip (1), +.BR bzip2 (1) +.PP +XZ Utils: +.br +XZ Embedded: +.br +LZMA SDK: diff --git a/src/xzdec/xzdec.1 b/src/xzdec/xzdec.1 new file mode 100644 index 000000000000..3057c586d25f --- /dev/null +++ b/src/xzdec/xzdec.1 @@ -0,0 +1,168 @@ +.\" +.\" Author: Lasse Collin +.\" +.\" This file has been put into the public domain. +.\" You can do whatever you want with this file. +.\" +.TH XZDEC 1 "2010-03-07" "Tukaani" "XZ Utils" +.SH NAME +xzdec, lzmadec \- Small .xz and .lzma decompressors +.SH SYNOPSIS +.B xzdec +.RI [ option ]... +.RI [ file ]... +.br +.B lzmadec +.RI [ option ]... +.RI [ file ]... +.SH DESCRIPTION +.B xzdec +is a liblzma-based decompression-only tool for +.B .xz +(and only +.BR .xz ) +files. +.B xzdec +is intended to work as a drop-in replacement for +.BR xz (1) +in the most common situations where a script has been written to use +.B "xz \-\-decompress \-\-stdout" +(and possibly a few other commonly used options) to decompress +.B .xz +files. +.B lzmadec +is identical to +.B xzdec +except that +.B lzmadec +supports +.B .lzma +files instead of +.B .xz +files. +.PP +To reduce the size of the executable, +.B xzdec +doesn't support multithreading or localization, and doesn't read options from +.B XZ_OPT +environment variable. +.B xzdec +doesn't support displaying intermediate progress information: sending +.B SIGINFO +to +.B xzdec +does nothing, but sending +.B SIGUSR1 +terminates the process instead of displaying progress information. +.SH OPTIONS +.TP +.BR \-d ", " \-\-decompress ", " \-\-uncompress +Ignored for +.BR xz (1) +compatibility. +.B xzdec +supports only decompression. +.TP +.BR \-k ", " \-\-keep +Ignored for +.BR xz (1) +compatibility. +.B xzdec +never creates or removes any files. +.TP +.BR \-c ", " \-\-stdout ", " \-\-to-stdout +Ignored for +.BR xz (1) +compatibility. +.B xzdec +always writes the decompressed data to standard output. +.TP +\fB\-M\fR \fIlimit\fR, \fB\-\-memory=\fIlimit +Set the memory usage +.IR limit . +If this option is specified multiple times, the last one takes effect. The +.I limit +can be specified in multiple ways: +.RS +.IP \(bu 3 +The +.I limit +can be an absolute value in bytes. Using an integer suffix like +.B MiB +can be useful. Example: +.B "\-\-memory=80MiB" +.IP \(bu 3 +The +.I limit +can be specified as a percentage of physical RAM. Example: +.B "\-\-memory=70%" +.IP \(bu 3 +The +.I limit +can be reset back to its default value by setting it to +.BR 0 . +.IP \(bu 3 +The memory usage limiting can be effectively disabled by setting +.I limit +to +.BR max . +This isn't recommended. It's usually better to use, for example, +.BR \-\-memory=90% . +.RE +.IP +The current +.I limit +can be seen near the bottom of the output of the +.B \-\-help +option. +.TP +.BR \-q ", " \-\-quiet +Specifying this once does nothing since +.B xzdec +never displays any warnings or notices. +Specify this twice to suppress errors. +.TP +.BR \-Q ", " \-\-no-warn +Ignored for +.BR xz (1) +compatibility. +.B xzdec +never uses the exit status +.BR "2" . +.TP +.BR \-h ", " \-\-help +Display a help message and exit successfully. +.TP +.BR \-V ", " \-\-version +Display the version number of +.B xzdec +and liblzma. +.SH "EXIT STATUS" +.TP +.B 0 +All was good. +.TP +.B 1 +An error occurred. +.PP +.B xzdec +doesn't have any warning messages like +.BR xz (1) +has, thus the exit status +.B 2 +is not used by +.BR xzdec . +.SH NOTES +.B xzdec +and +.B lzmadec +are not really that small. The size can be reduced further by dropping +features from liblzma at compile time, but that shouldn't usually be done +for executables distributed in typical non-embedded operating system +distributions. If you need a truly small +.B .xz +decompressor, consider using XZ Embedded. +.SH "SEE ALSO" +.BR xz (1) +.PP +XZ Embedded: diff --git a/src/xzdec/xzdec.c b/src/xzdec/xzdec.c new file mode 100644 index 000000000000..8518d362a2dd --- /dev/null +++ b/src/xzdec/xzdec.c @@ -0,0 +1,482 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file xzdec.c +/// \brief Simple single-threaded tool to uncompress .xz or .lzma files +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "sysdefs.h" +#include "lzma.h" + +#include +#include +#include +#include + +#include "getopt.h" +#include "tuklib_progname.h" +#include "tuklib_exit.h" + +#ifdef TUKLIB_DOSLIKE +# include +# include +#endif + + +#ifdef LZMADEC +# define TOOL_FORMAT "lzma" +#else +# define TOOL_FORMAT "xz" +#endif + + +/// Number of bytes to use memory at maximum +static uint64_t memlimit; + +/// Total amount of physical RAM +static uint64_t total_ram; + +/// Error messages are suppressed if this is zero, which is the case when +/// --quiet has been given at least twice. +static unsigned int display_errors = 2; + + +static void lzma_attribute((format(printf, 1, 2))) +my_errorf(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + + if (display_errors) { + fprintf(stderr, "%s: ", progname); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + } + + va_end(ap); + return; +} + + +static void lzma_attribute((noreturn)) +help(void) +{ + // Round up to the next MiB and do it correctly also with UINT64_MAX. + const uint64_t mem_mib = (memlimit >> 20) + + ((memlimit & ((UINT32_C(1) << 20) - 1)) != 0); + + printf( +"Usage: %s [OPTION]... [FILE]...\n" +"Uncompress files in the ." TOOL_FORMAT " format to the standard output.\n" +"\n" +" -c, --stdout (ignored)\n" +" -d, --decompress (ignored)\n" +" -k, --keep (ignored)\n" +" -M, --memory=NUM use NUM bytes of memory at maximum (0 means default)\n" +" -q, --quiet specify *twice* to suppress errors\n" +" -Q, --no-warn (ignored)\n" +" -h, --help display this help and exit\n" +" -V, --version display the version number and exit\n" +"\n" +"With no FILE, or when FILE is -, read standard input.\n" +"\n" +"On this system and configuration, this program will use a maximum of roughly\n" +"%" PRIu64 " MiB RAM.\n" +"\n" +"Report bugs to <" PACKAGE_BUGREPORT "> (in English or Finnish).\n" +PACKAGE_NAME " home page: <" PACKAGE_URL ">\n", progname, mem_mib); + tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors); +} + + +static void lzma_attribute((noreturn)) +version(void) +{ + printf(TOOL_FORMAT "dec (" PACKAGE_NAME ") " LZMA_VERSION_STRING "\n" + "liblzma %s\n", lzma_version_string()); + + tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors); +} + + +/// Find out the amount of physical memory (RAM) in the system, and set +/// the memory usage limit to the given percentage of RAM. +static void +memlimit_set_percentage(uint32_t percentage) +{ + memlimit = percentage * total_ram / 100; + return; +} + + +/// Set the memory usage limit to give number of bytes. Zero is a special +/// value to indicate the default limit. +static void +memlimit_set(uint64_t new_memlimit) +{ + if (new_memlimit != 0) { + memlimit = new_memlimit; + } else { + memlimit = 40 * total_ram / 100; + if (memlimit < UINT64_C(80) * 1024 * 1024) { + memlimit = 80 * total_ram / 100; + if (memlimit > UINT64_C(80) * 1024 * 1024) + memlimit = UINT64_C(80) * 1024 * 1024; + } + } + + return; +} + + +/// Get the total amount of physical RAM and set the memory usage limit +/// to the default value. +static void +memlimit_init(void) +{ + // If we cannot determine the amount of RAM, use the assumption + // defined by the configure script. + total_ram = lzma_physmem(); + if (total_ram == 0) + total_ram = (uint64_t)(ASSUME_RAM) * 1024 * 1024; + + memlimit_set(0); + return; +} + + +/// \brief Convert a string to uint64_t +/// +/// This is rudely copied from src/xz/util.c and modified a little. :-( +/// +/// \param max Return value when the string "max" was specified. +/// +static uint64_t +str_to_uint64(const char *value, uint64_t max) +{ + uint64_t result = 0; + + // Accept special value "max". + if (strcmp(value, "max") == 0) + return max; + + if (*value < '0' || *value > '9') { + my_errorf("%s: Value is not a non-negative decimal integer", + value); + exit(EXIT_FAILURE); + } + + do { + // Don't overflow. + if (result > (UINT64_MAX - 9) / 10) + return UINT64_MAX; + + result *= 10; + result += *value - '0'; + ++value; + } while (*value >= '0' && *value <= '9'); + + if (*value != '\0') { + // Look for suffix. + uint64_t multiplier = 0; + if (*value == 'k' || *value == 'K') + multiplier = UINT64_C(1) << 10; + else if (*value == 'm' || *value == 'M') + multiplier = UINT64_C(1) << 20; + else if (*value == 'g' || *value == 'G') + multiplier = UINT64_C(1) << 30; + + ++value; + + // Allow also e.g. Ki, KiB, and KB. + if (*value != '\0' && strcmp(value, "i") != 0 + && strcmp(value, "iB") != 0 + && strcmp(value, "B") != 0) + multiplier = 0; + + if (multiplier == 0) { + my_errorf("%s: Invalid suffix", value - 1); + exit(EXIT_FAILURE); + } + + // Don't overflow here either. + if (result > UINT64_MAX / multiplier) + result = UINT64_MAX; + else + result *= multiplier; + } + + return result; +} + + +/// Parses command line options. +static void +parse_options(int argc, char **argv) +{ + static const char short_opts[] = "cdkM:hqQV"; + static const struct option long_opts[] = { + { "stdout", no_argument, NULL, 'c' }, + { "to-stdout", no_argument, NULL, 'c' }, + { "decompress", no_argument, NULL, 'd' }, + { "uncompress", no_argument, NULL, 'd' }, + { "keep", no_argument, NULL, 'k' }, + { "memory", required_argument, NULL, 'M' }, + { "quiet", no_argument, NULL, 'q' }, + { "no-warn", no_argument, NULL, 'Q' }, + { "help", no_argument, NULL, 'h' }, + { "version", no_argument, NULL, 'V' }, + { NULL, 0, NULL, 0 } + }; + + int c; + + while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL)) + != -1) { + switch (c) { + case 'c': + case 'd': + case 'k': + case 'Q': + break; + + case 'M': { + // Support specifying the limit as a percentage of + // installed physical RAM. + const size_t len = strlen(optarg); + if (len > 0 && optarg[len - 1] == '%') { + // Memory limit is a percentage of total + // installed RAM. + optarg[len - 1] = '\0'; + const uint64_t percentage + = str_to_uint64(optarg, 100); + if (percentage < 1 || percentage > 100) { + my_errorf("Percentage must be in " + "the range [1, 100]"); + exit(EXIT_FAILURE); + } + + memlimit_set_percentage(percentage); + } else { + memlimit_set(str_to_uint64( + optarg, UINT64_MAX)); + } + + break; + } + + case 'q': + if (display_errors > 0) + --display_errors; + + break; + + case 'h': + help(); + + case 'V': + version(); + + default: + exit(EXIT_FAILURE); + } + } + + return; +} + + +static void +uncompress(lzma_stream *strm, FILE *file, const char *filename) +{ + lzma_ret ret; + + // Initialize the decoder +#ifdef LZMADEC + ret = lzma_alone_decoder(strm, memlimit); +#else + ret = lzma_stream_decoder(strm, memlimit, LZMA_CONCATENATED); +#endif + + // The only reasonable error here is LZMA_MEM_ERROR. + // FIXME: Maybe also LZMA_MEMLIMIT_ERROR in future? + if (ret != LZMA_OK) { + my_errorf("%s", ret == LZMA_MEM_ERROR ? strerror(ENOMEM) + : "Internal error (bug)"); + exit(EXIT_FAILURE); + } + + // Input and output buffers + uint8_t in_buf[BUFSIZ]; + uint8_t out_buf[BUFSIZ]; + + strm->avail_in = 0; + strm->next_out = out_buf; + strm->avail_out = BUFSIZ; + + lzma_action action = LZMA_RUN; + + while (true) { + if (strm->avail_in == 0) { + strm->next_in = in_buf; + strm->avail_in = fread(in_buf, 1, BUFSIZ, file); + + if (ferror(file)) { + // POSIX says that fread() sets errno if + // an error occurred. ferror() doesn't + // touch errno. + my_errorf("%s: Error reading input file: %s", + filename, strerror(errno)); + exit(EXIT_FAILURE); + } + +#ifndef LZMADEC + // When using LZMA_CONCATENATED, we need to tell + // liblzma when it has got all the input. + if (feof(file)) + action = LZMA_FINISH; +#endif + } + + ret = lzma_code(strm, action); + + // Write and check write error before checking decoder error. + // This way as much data as possible gets written to output + // even if decoder detected an error. + if (strm->avail_out == 0 || ret != LZMA_OK) { + const size_t write_size = BUFSIZ - strm->avail_out; + + if (fwrite(out_buf, 1, write_size, stdout) + != write_size) { + // Wouldn't be a surprise if writing to stderr + // would fail too but at least try to show an + // error message. + my_errorf("Cannot write to standard output: " + "%s", strerror(errno)); + exit(EXIT_FAILURE); + } + + strm->next_out = out_buf; + strm->avail_out = BUFSIZ; + } + + if (ret != LZMA_OK) { + if (ret == LZMA_STREAM_END) { +#ifdef LZMADEC + // Check that there's no trailing garbage. + if (strm->avail_in != 0 + || fread(in_buf, 1, 1, file) + != 0 + || !feof(file)) + ret = LZMA_DATA_ERROR; + else + return; +#else + // lzma_stream_decoder() already guarantees + // that there's no trailing garbage. + assert(strm->avail_in == 0); + assert(action == LZMA_FINISH); + assert(feof(file)); + return; +#endif + } + + const char *msg; + switch (ret) { + case LZMA_MEM_ERROR: + msg = strerror(ENOMEM); + break; + + case LZMA_MEMLIMIT_ERROR: + msg = "Memory usage limit reached"; + break; + + case LZMA_FORMAT_ERROR: + msg = "File format not recognized"; + break; + + case LZMA_OPTIONS_ERROR: + // FIXME: Better message? + msg = "Unsupported compression options"; + break; + + case LZMA_DATA_ERROR: + msg = "File is corrupt"; + break; + + case LZMA_BUF_ERROR: + msg = "Unexpected end of input"; + break; + + default: + msg = "Internal error (bug)"; + break; + } + + my_errorf("%s: %s", filename, msg); + exit(EXIT_FAILURE); + } + } +} + + +int +main(int argc, char **argv) +{ + // Initialize progname which we will be used in error messages. + tuklib_progname_init(argv); + + // Set the default memory usage limit. This is needed before parsing + // the command line arguments. + memlimit_init(); + + // Parse the command line options. + parse_options(argc, argv); + + // The same lzma_stream is used for all files that we decode. This way + // we don't need to reallocate memory for every file if they use same + // compression settings. + lzma_stream strm = LZMA_STREAM_INIT; + + // Some systems require setting stdin and stdout to binary mode. +#ifdef TUKLIB_DOSLIKE + setmode(fileno(stdin), O_BINARY); + setmode(fileno(stdout), O_BINARY); +#endif + + if (optind == argc) { + // No filenames given, decode from stdin. + uncompress(&strm, stdin, "(stdin)"); + } else { + // Loop through the filenames given on the command line. + do { + // "-" indicates stdin. + if (strcmp(argv[optind], "-") == 0) { + uncompress(&strm, stdin, "(stdin)"); + } else { + FILE *file = fopen(argv[optind], "rb"); + if (file == NULL) { + my_errorf("%s: %s", argv[optind], + strerror(errno)); + exit(EXIT_FAILURE); + } + + uncompress(&strm, file, argv[optind]); + fclose(file); + } + } while (++optind < argc); + } + +#ifndef NDEBUG + // Free the memory only when debugging. Freeing wastes some time, + // but allows detecting possible memory leaks with Valgrind. + lzma_end(&strm); +#endif + + tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors); +}