Vendor import of xz-5.2.8 (trimmed).

2022-11-15 21:53:08 -08:00 · 2022-11-15 21:53:08 -08:00 · bbf0e6b900
commit bbf0e6b900
parent b89a971493
44 changed files with 2939 additions and 220 deletions
--- a/2
+++ b/2
@ -3,7 +3,7 @@ Authors of XZ Utils
 ===================

    XZ Utils is developed and maintained by Lasse Collin
-    <lasse.collin@tukaani.org>.
+    <lasse.collin@tukaani.org> and Jia Tan <jiat0218@gmail.com>.

    Major parts of liblzma are based on code written by Igor Pavlov,
    specifically the LZMA SDK <http://7-zip.org/sdk.html>. Without
--- a/2120
+++ b/2120
--- a/2
+++ b/2
@ -204,7 +204,7 @@ XZ Utils

    Several strings will change in a future version of xz so if you
    wish to start a new translation, look at the code in the xz git
-    repostiory instead of a 5.2.x release.
+    repository instead of a 5.2.x release.


 5. Other implementations of the .xz format
--- a/19
+++ b/19
@ -8,27 +8,33 @@ has been important. :-) In alphabetical order:
  - H. Peter Anvin
  - Jeff Bastian
  - Nelson H. F. Beebe
+  - Karl Beldan
  - Karl Berry
  - Anders F. Björklund
  - Emmanuel Blot
  - Melanie Blower
+  - Alexander Bluhm
  - Martin Blumenstingl
  - Ben Boeckel
  - Jakub Bogusz
+  - Adam Borowski
  - Maarten Bosmans
  - Trent W. Buck
+  - Kevin R. Bulgrien
  - James Buren
  - David Burklund
  - Daniel Mealha Cabrita
  - Milo Casagrande
  - Marek Černocký
  - Tomer Chachamu
+  - Vitaly Chikunov
  - Antoine Cœur
  - Gabi Davar
  - Chris Donawa
  - Andrew Dudman
  - Markus Duft
  - İsmail Dönmez
+  - Paul Eggert
  - Robert Elz
  - Gilles Espinasse
  - Denis Excoffier
@ -36,6 +42,7 @@ has been important. :-) In alphabetical order:
  - Michael Fox
  - Mike Frysinger
  - Daniel Richard G.
+  - Bjarni Ingi Gislason
  - Bill Glessner
  - Jason Gorski
  - Juan Manuel Guerrero
@ -63,10 +70,13 @@ has been important. :-) In alphabetical order:
  - Xin Li
  - Eric Lindblad
  - Lorenzo De Liso
+  - H.J. Lu
  - Bela Lubkin
  - Gregory Margo
  - Julien Marrec
+  - Ed Maste
  - Martin Matuška
+  - Ivan A. Melnikov
  - Jim Meyering
  - Arkadiusz Miskiewicz
  - Conley Moorhous
@ -77,6 +87,7 @@ has been important. :-) In alphabetical order:
  - Jonathan Nieder
  - Andre Noll
  - Peter O'Gorman
+  - Daniel Packard
  - Filip Palian
  - Peter Pallinger
  - Rui Paulo
@ -88,12 +99,15 @@ has been important. :-) In alphabetical order:
  - Trần Ngọc Quân
  - Pavel Raiskup
  - Ole André Vadla Ravnås
+  - Eric S. Raymond
  - Robert Readman
  - Bernhard Reutner-Fischer
-  - Eric S. Raymond
+  - Markus Rickert
  - Cristian Rodríguez
  - Christian von Roques
+  - Boud Roukema
  - Torsten Rupp
+  - Stephen Sachs
  - Jukka Salmi
  - Alexandre Sauvé
  - Benno Schulenberg
@ -102,11 +116,13 @@ has been important. :-) In alphabetical order:
  - Dan Shechter
  - Stuart Shelton
  - Sebastian Andrzej Siewior
+  - Ville Skyttä
  - Brad Smith
  - Bruce Stark
  - Pippijn van Steenhoven
  - Jonathan Stott
  - Dan Stromberg
+  - Jia Tan
  - Vincent Torri
  - Paul Townsend
  - Mohammed Adnène Trojette
@ -117,6 +133,7 @@ has been important. :-) In alphabetical order:
  - Adam Walling
  - Jeffrey Walton
  - Christian Weisgerber
+  - Dan Weiss
  - Bert Wesarg
  - Fredrik Wikstrom
  - Jim Wilcoxson
--- a/src/common/mythread.h
+++ b/src/common/mythread.h
@ -370,10 +370,11 @@ typedef struct {
 		BOOL pending_; \
 		if (!InitOnceBeginInitialize(&once_, 0, &pending_, NULL)) \
 			abort(); \
-		if (pending_) \
+		if (pending_) { \
 			func(); \
-		if (!InitOnceComplete(&once, 0, NULL)) \
-			abort(); \
+			if (!InitOnceComplete(&once, 0, NULL)) \
+				abort(); \
+		} \
 	} while (0)
 #endif

--- a/src/common/tuklib_cpucores.c
+++ b/src/common/tuklib_cpucores.c
@ -72,7 +72,16 @@ tuklib_cpucores(void)
 	}

 #elif defined(TUKLIB_CPUCORES_SYSCTL)
+	// On OpenBSD HW_NCPUONLINE tells the number of processor cores that
+	// are online so it is preferred over HW_NCPU which also counts cores
+	// that aren't currently available. The number of cores online is
+	// often less than HW_NCPU because OpenBSD disables simultaneous
+	// multi-threading (SMT) by default.
+#	ifdef HW_NCPUONLINE
+	int name[2] = { CTL_HW, HW_NCPUONLINE };
+#	else
 	int name[2] = { CTL_HW, HW_NCPU };
+#	endif
 	int cpus;
 	size_t cpus_size = sizeof(cpus);
 	if (sysctl(name, 2, &cpus, &cpus_size, NULL, 0) != -1
--- a/src/common/tuklib_physmem.c
+++ b/src/common/tuklib_physmem.c
@ -84,7 +84,7 @@ tuklib_physmem(void)
 		// supports reporting values greater than 4 GiB. To keep the
 		// code working also on older Windows versions, use
 		// GlobalMemoryStatusEx() conditionally.
-		HMODULE kernel32 = GetModuleHandle("kernel32.dll");
+		HMODULE kernel32 = GetModuleHandle(TEXT("kernel32.dll"));
 		if (kernel32 != NULL) {
 			typedef BOOL (WINAPI *gmse_type)(LPMEMORYSTATUSEX);
 			gmse_type gmse = (gmse_type)GetProcAddress(
--- a/src/liblzma/api/lzma.h
+++ b/src/liblzma/api/lzma.h
@ -219,7 +219,8 @@
 */
 #ifndef lzma_nothrow
 #	if defined(__cplusplus)
-#		if __cplusplus >= 201103L
+#		if __cplusplus >= 201103L || (defined(_MSVC_LANG) \
+				&& _MSVC_LANG >= 201103L)
 #			define lzma_nothrow noexcept
 #		else
 #			define lzma_nothrow throw()
--- a/src/liblzma/api/lzma/base.h
+++ b/src/liblzma/api/lzma/base.h
@ -145,6 +145,12 @@ typedef enum {
 		 * specified memory usage limit. To continue decoding,
 		 * the memory usage limit has to be increased with
 		 * lzma_memlimit_set().
+		 *
+		 * liblzma 5.2.6 and earlier had a bug in single-threaded .xz
+		 * decoder (lzma_stream_decoder()) which made it impossible
+		 * to continue decoding after LZMA_MEMLIMIT_ERROR even if
+		 * the limit was increased using lzma_memlimit_set().
+		 * Other decoders worked correctly.
 		 */

 	LZMA_FORMAT_ERROR       = 7,
@ -447,7 +453,7 @@ typedef struct lzma_internal_s lzma_internal;
 *
 * The lzma_stream structure is used for
 *  - passing pointers to input and output buffers to liblzma;
- *  - defining custom memory hander functions; and
+ *  - defining custom memory handler functions; and
 *  - holding a pointer to coder-specific internal data structures.
 *
 * Typical usage:
@ -649,6 +655,11 @@ extern LZMA_API(uint64_t) lzma_memlimit_get(const lzma_stream *strm)
 * return LZMA_OK. Later versions treat 0 as if 1 had been specified (so
 * lzma_memlimit_get() will return 1 even if you specify 0 here).
 *
+ * liblzma 5.2.6 and earlier had a bug in single-threaded .xz decoder
+ * (lzma_stream_decoder()) which made it impossible to continue decoding
+ * after LZMA_MEMLIMIT_ERROR even if the limit was increased using
+ * lzma_memlimit_set(). Other decoders worked correctly.
+ *
 * \return      - LZMA_OK: New memory usage limit successfully set.
 *              - LZMA_MEMLIMIT_ERROR: The new limit is too small.
 *                The limit was not changed.
--- a/src/liblzma/api/lzma/block.h
+++ b/src/liblzma/api/lzma/block.h
@ -464,9 +464,6 @@ extern LZMA_API(lzma_ret) lzma_block_encoder(
 * LZMA_FINISH is not required. It is supported only for convenience.
 *
 * \return      - LZMA_OK: All good, continue with lzma_code().
- *              - LZMA_UNSUPPORTED_CHECK: Initialization was successful, but
- *                the given Check ID is not supported, thus Check will be
- *                ignored.
 *              - LZMA_PROG_ERROR
 *              - LZMA_MEM_ERROR
 */
--- a/src/liblzma/api/lzma/container.h
+++ b/src/liblzma/api/lzma/container.h
@ -526,7 +526,8 @@ extern LZMA_API(lzma_ret) lzma_stream_buffer_encode(
 *                          had been specified.
 * \param       flags       Bitwise-or of zero or more of the decoder flags:
 *                          LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK,
- *                          LZMA_TELL_ANY_CHECK, LZMA_CONCATENATED
+ *                          LZMA_TELL_ANY_CHECK, LZMA_IGNORE_CHECK,
+ *                          LZMA_CONCATENATED
 *
 * \return      - LZMA_OK: Initialization was successful.
 *              - LZMA_MEM_ERROR: Cannot allocate memory.
@ -545,13 +546,23 @@ extern LZMA_API(lzma_ret) lzma_stream_decoder(
 * calls lzma_stream_decoder() or lzma_alone_decoder() once the type
 * of the input file has been detected.
 *
+ * If the flag LZMA_CONCATENATED is used and the input is a .lzma file:
+ * For historical reasons concatenated .lzma files aren't supported.
+ * If there is trailing data after one .lzma stream, lzma_code() will
+ * return LZMA_DATA_ERROR. (lzma_alone_decoder() doesn't have such a check
+ * as it doesn't support any decoder flags. It will return LZMA_STREAM_END
+ * after one .lzma stream.)
+ *
 * \param       strm        Pointer to properly prepared lzma_stream
 * \param       memlimit    Memory usage limit as bytes. Use UINT64_MAX
 *                          to effectively disable the limiter. liblzma
 *                          5.2.3 and earlier don't allow 0 here and return
 *                          LZMA_PROG_ERROR; later versions treat 0 as if 1
 *                          had been specified.
- * \param       flags       Bitwise-or of flags, or zero for no flags.
+ * \param       flags       Bitwise-or of zero or more of the decoder flags:
+ *                          LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK,
+ *                          LZMA_TELL_ANY_CHECK, LZMA_IGNORE_CHECK,
+ *                          LZMA_CONCATENATED
 *
 * \return      - LZMA_OK: Initialization was successful.
 *              - LZMA_MEM_ERROR: Cannot allocate memory.
@ -595,8 +606,9 @@ extern LZMA_API(lzma_ret) lzma_alone_decoder(
 *                          returned.
 * \param       flags       Bitwise-or of zero or more of the decoder flags:
 *                          LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK,
- *                          LZMA_CONCATENATED. Note that LZMA_TELL_ANY_CHECK
- *                          is not allowed and will return LZMA_PROG_ERROR.
+ *                          LZMA_IGNORE_CHECK, LZMA_CONCATENATED. Note that
+ *                          LZMA_TELL_ANY_CHECK is not allowed and will
+ *                          return LZMA_PROG_ERROR.
 * \param       allocator   lzma_allocator for custom allocator functions.
 *                          Set to NULL to use malloc() and free().
 * \param       in          Beginning of the input buffer
--- a/src/liblzma/api/lzma/filter.h
+++ b/src/liblzma/api/lzma/filter.h
@ -108,7 +108,9 @@ extern LZMA_API(lzma_bool) lzma_filter_decoder_is_supported(lzma_vli id)
 * need to be initialized by the caller in any way.
 *
 * If an error occurs, memory possibly already allocated by this function
- * is always freed.
+ * is always freed. liblzma versions older than 5.2.7 may modify the dest
+ * array and leave its contents in an undefined state if an error occurs.
+ * liblzma 5.2.7 and newer only modify the dest array when returning LZMA_OK.
 *
 * \return      - LZMA_OK
 *              - LZMA_MEM_ERROR
--- a/src/liblzma/api/lzma/version.h
+++ b/src/liblzma/api/lzma/version.h
@ -22,7 +22,7 @@
 */
 #define LZMA_VERSION_MAJOR 5
 #define LZMA_VERSION_MINOR 2
-#define LZMA_VERSION_PATCH 5
+#define LZMA_VERSION_PATCH 8
 #define LZMA_VERSION_STABILITY LZMA_VERSION_STABILITY_STABLE

 #ifndef LZMA_VERSION_COMMIT
--- a/src/liblzma/api/lzma/vli.h
+++ b/src/liblzma/api/lzma/vli.h
@ -159,6 +159,8 @@ extern LZMA_API(lzma_ret) lzma_vli_decode(lzma_vli *vli, size_t *vli_pos,
 /**
 * \brief       Get the number of bytes required to encode a VLI
 *
+ * \param       vli       Integer whose encoded size is to be determined
+ *
 * \return      Number of bytes on success (1-9). If vli isn't valid,
 *              zero is returned.
 */
--- a/src/liblzma/check/crc32_x86.S
+++ b/src/liblzma/check/crc32_x86.S
@ -51,6 +51,14 @@ init_table(void)
 * extern uint32_t lzma_crc32(const uint8_t *buf, size_t size, uint32_t crc);
 */

+/* When Intel CET is enabled, include <cet.h> in assembly code to mark
+   Intel CET support.  */
+#ifdef __CET__
+# include <cet.h>
+#else
+# define _CET_ENDBR
+#endif
+
 /*
 * On some systems, the functions need to be prefixed. The prefix is
 * usually an underscore.
@ -83,6 +91,7 @@ init_table(void)

 	ALIGN(4, 16)
 LZMA_CRC32:
+	_CET_ENDBR
 	/*
 	 * Register usage:
 	 * %eax crc
@ -195,7 +204,7 @@ LZMA_CRC32:

 	/*
 	 * Read the next four bytes, for which the CRC is calculated
-	 * on the next interation of the loop.
+	 * on the next iteration of the loop.
 	 */
 	movl	12(%esi), %ecx

@ -296,9 +305,9 @@ LZMA_CRC32:

 /*
 * This is needed to support non-executable stack. It's ugly to
- * use __linux__ here, but I don't know a way to detect when
+ * use __FreeBSD__ and __linux__ here, but I don't know a way to detect when
 * we are using GNU assembler.
 */
-#if defined(__ELF__) && defined(__linux__)
+#if defined(__ELF__) && (defined(__FreeBSD__) || defined(__linux__))
 	.section	.note.GNU-stack,"",@progbits
 #endif
--- a/src/liblzma/check/crc64_x86.S
+++ b/src/liblzma/check/crc64_x86.S
@ -41,6 +41,14 @@ init_table(void)
 * extern uint64_t lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc);
 */

+/* When Intel CET is enabled, include <cet.h> in assembly code to mark
+   Intel CET support.  */
+#ifdef __CET__
+# include <cet.h>
+#else
+# define _CET_ENDBR
+#endif
+
 /*
 * On some systems, the functions need to be prefixed. The prefix is
 * usually an underscore.
@ -73,6 +81,7 @@ init_table(void)

 	ALIGN(4, 16)
 LZMA_CRC64:
+	_CET_ENDBR
 	/*
 	 * Register usage:
 	 * %eax crc LSB
@ -279,9 +288,9 @@ LZMA_CRC64:

 /*
 * This is needed to support non-executable stack. It's ugly to
- * use __linux__ here, but I don't know a way to detect when
+ * use __FreeBSD__ and __linux__ here, but I don't know a way to detect when
 * we are using GNU assembler.
 */
-#if defined(__ELF__) && defined(__linux__)
+#if defined(__ELF__) && (defined(__FreeBSD__) || defined(__linux__))
 	.section	.note.GNU-stack,"",@progbits
 #endif
--- a/src/liblzma/common/alone_decoder.c
+++ b/src/liblzma/common/alone_decoder.c
@ -146,7 +146,7 @@ alone_decode(void *coder_ptr, const lzma_allocator *allocator,

 		// Use a hack to set the uncompressed size.
 		lzma_lz_decoder_uncompressed(coder->next.coder,
-				coder->uncompressed_size);
+				coder->uncompressed_size, true);

 		coder->sequence = SEQ_CODE;
 		break;
--- a/src/liblzma/common/auto_decoder.c
+++ b/src/liblzma/common/auto_decoder.c
@ -86,8 +86,8 @@ auto_decode(void *coder_ptr, const lzma_allocator *allocator,
 	// Fall through

 	case SEQ_FINISH:
-		// When LZMA_DECODE_CONCATENATED was used and we were decoding
-		// LZMA_Alone file, we need to check check that there is no
+		// When LZMA_CONCATENATED was used and we were decoding
+		// a LZMA_Alone file, we need to check that there is no
 		// trailing garbage and wait for LZMA_FINISH.
 		if (*in_pos < in_size)
 			return LZMA_DATA_ERROR;
--- a/src/liblzma/common/block_buffer_encoder.c
+++ b/src/liblzma/common/block_buffer_encoder.c
@ -325,6 +325,24 @@ lzma_block_buffer_encode(lzma_block *block, const lzma_allocator *allocator,
 }


+#ifdef HAVE_SYMBOL_VERSIONS_LINUX
+// This is for compatibility with binaries linked against liblzma that
+// has been patched with xz-5.2.2-compat-libs.patch from RHEL/CentOS 7.
+LZMA_SYMVER_API("lzma_block_uncomp_encode@XZ_5.2.2",
+	lzma_ret, lzma_block_uncomp_encode_522)(lzma_block *block,
+		const uint8_t *in, size_t in_size,
+		uint8_t *out, size_t *out_pos, size_t out_size)
+		lzma_nothrow lzma_attr_warn_unused_result
+		__attribute__((__alias__("lzma_block_uncomp_encode_52")));
+
+LZMA_SYMVER_API("lzma_block_uncomp_encode@@XZ_5.2",
+	lzma_ret, lzma_block_uncomp_encode_52)(lzma_block *block,
+		const uint8_t *in, size_t in_size,
+		uint8_t *out, size_t *out_pos, size_t out_size)
+		lzma_nothrow lzma_attr_warn_unused_result;
+
+#define lzma_block_uncomp_encode lzma_block_uncomp_encode_52
+#endif
 extern LZMA_API(lzma_ret)
 lzma_block_uncomp_encode(lzma_block *block,
 		const uint8_t *in, size_t in_size,
--- a/src/liblzma/common/block_decoder.c
+++ b/src/liblzma/common/block_decoder.c
@ -40,6 +40,9 @@ typedef struct {
 	/// is unknown.
 	lzma_vli compressed_limit;

+	/// Maximum allowed Uncompressed Size.
+	lzma_vli uncompressed_limit;
+
 	/// Position when reading the Check field
 	size_t check_pos;

@ -51,21 +54,6 @@ typedef struct {
 } lzma_block_coder;


-static inline bool
-update_size(lzma_vli *size, lzma_vli add, lzma_vli limit)
-{
-	if (limit > LZMA_VLI_MAX)
-		limit = LZMA_VLI_MAX;
-
-	if (limit < *size || limit - *size < add)
-		return true;
-
-	*size += add;
-
-	return false;
-}
-
-
 static inline bool
 is_size_valid(lzma_vli size, lzma_vli reference)
 {
@ -86,21 +74,54 @@ block_decode(void *coder_ptr, const lzma_allocator *allocator,
 		const size_t in_start = *in_pos;
 		const size_t out_start = *out_pos;

+		// Limit the amount of input and output space that we give
+		// to the raw decoder based on the information we have
+		// (or don't have) from Block Header.
+		const size_t in_stop = *in_pos + (size_t)my_min(
+			in_size - *in_pos,
+			coder->compressed_limit - coder->compressed_size);
+		const size_t out_stop = *out_pos + (size_t)my_min(
+			out_size - *out_pos,
+			coder->uncompressed_limit - coder->uncompressed_size);
+
 		const lzma_ret ret = coder->next.code(coder->next.coder,
-				allocator, in, in_pos, in_size,
-				out, out_pos, out_size, action);
+				allocator, in, in_pos, in_stop,
+				out, out_pos, out_stop, action);

 		const size_t in_used = *in_pos - in_start;
 		const size_t out_used = *out_pos - out_start;

-		// NOTE: We compare to compressed_limit here, which prevents
-		// the total size of the Block growing past LZMA_VLI_MAX.
-		if (update_size(&coder->compressed_size, in_used,
-					coder->compressed_limit)
-				|| update_size(&coder->uncompressed_size,
-					out_used,
-					coder->block->uncompressed_size))
-			return LZMA_DATA_ERROR;
+		// Because we have limited the input and output sizes,
+		// we know that these cannot grow too big or overflow.
+		coder->compressed_size += in_used;
+		coder->uncompressed_size += out_used;
+
+		if (ret == LZMA_OK) {
+			const bool comp_done = coder->compressed_size
+					== coder->block->compressed_size;
+			const bool uncomp_done = coder->uncompressed_size
+					== coder->block->uncompressed_size;
+
+			// If both input and output amounts match the sizes
+			// in Block Header but we still got LZMA_OK instead
+			// of LZMA_STREAM_END, the file is broken.
+			if (comp_done && uncomp_done)
+				return LZMA_DATA_ERROR;
+
+			// If the decoder has consumed all the input that it
+			// needs but it still couldn't fill the output buffer
+			// or return LZMA_STREAM_END, the file is broken.
+			if (comp_done && *out_pos < out_size)
+				return LZMA_DATA_ERROR;
+
+			// If the decoder has produced all the output but
+			// it still didn't return LZMA_STREAM_END or consume
+			// more input (for example, detecting an end of
+			// payload marker may need more input but produce
+			// no output) the file is broken.
+			if (uncomp_done && *in_pos < in_size)
+				return LZMA_DATA_ERROR;
+		}

 		if (!coder->ignore_check)
 			lzma_check_update(&coder->check, coder->block->check,
@ -230,6 +251,14 @@ lzma_block_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 					- lzma_check_size(block->check)
 				: block->compressed_size;

+	// With Uncompressed Size this is simpler. If Block Header lacks
+	// the size info, then LZMA_VLI_MAX is the maximum possible
+	// Uncompressed Size.
+	coder->uncompressed_limit
+			= block->uncompressed_size == LZMA_VLI_UNKNOWN
+				? LZMA_VLI_MAX
+				: block->uncompressed_size;
+
 	// Initialize the check. It's caller's problem if the Check ID is not
 	// supported, and the Block decoder cannot verify the Check field.
 	// Caller can test lzma_check_is_supported(block->check).
--- a/src/liblzma/common/common.c
+++ b/src/liblzma/common/common.c
@ -366,6 +366,20 @@ lzma_end(lzma_stream *strm)
 }


+#ifdef HAVE_SYMBOL_VERSIONS_LINUX
+// This is for compatibility with binaries linked against liblzma that
+// has been patched with xz-5.2.2-compat-libs.patch from RHEL/CentOS 7.
+LZMA_SYMVER_API("lzma_get_progress@XZ_5.2.2",
+	void, lzma_get_progress_522)(lzma_stream *strm,
+		uint64_t *progress_in, uint64_t *progress_out) lzma_nothrow
+		__attribute__((__alias__("lzma_get_progress_52")));
+
+LZMA_SYMVER_API("lzma_get_progress@@XZ_5.2",
+	void, lzma_get_progress_52)(lzma_stream *strm,
+		uint64_t *progress_in, uint64_t *progress_out) lzma_nothrow;
+
+#define lzma_get_progress lzma_get_progress_52
+#endif
 extern LZMA_API(void)
 lzma_get_progress(lzma_stream *strm,
 		uint64_t *progress_in, uint64_t *progress_out)
--- a/src/liblzma/common/common.h
+++ b/src/liblzma/common/common.h
@ -34,6 +34,34 @@

 #include "lzma.h"

+#ifdef HAVE_SYMBOL_VERSIONS_LINUX
+// To keep link-time optimization (LTO, -flto) working with GCC,
+// the __symver__ attribute must be used instead of __asm__(".symver ...").
+// Otherwise the symbol versions may be lost, resulting in broken liblzma
+// that has wrong default versions in the exported symbol list!
+// The attribute was added in GCC 10; LTO with older GCC is not supported.
+//
+// To keep -Wmissing-prototypes happy, use LZMA_SYMVER_API only with function
+// declarations (including those with __alias__ attribute) and LZMA_API with
+// the function definitions. This means a little bit of silly copy-and-paste
+// between declarations and definitions though.
+//
+// As of GCC 12.2, the __symver__ attribute supports only @ and @@ but the
+// very convenient @@@ isn't supported (it's supported by GNU assembler
+// since 2000). When using @@ instead of @@@, the internal name must not be
+// the same as the external name to avoid problems in some situations. This
+// is why "#define foo_52 foo" is needed for the default symbol versions.
+#	if TUKLIB_GNUC_REQ(10, 0) && !defined(__INTEL_COMPILER)
+#		define LZMA_SYMVER_API(extnamever, type, intname) \
+			extern __attribute__((__symver__(extnamever))) \
+					LZMA_API(type) intname
+#	else
+#		define LZMA_SYMVER_API(extnamever, type, intname) \
+			__asm__(".symver " #intname "," extnamever); \
+			extern LZMA_API(type) intname
+#	endif
+#endif
+
 // These allow helping the compiler in some often-executed branches, whose
 // result is almost always the same.
 #ifdef __GNUC__
--- a/src/liblzma/common/filter_common.c
+++ b/src/liblzma/common/filter_common.c
@ -122,12 +122,16 @@ static const struct {


 extern LZMA_API(lzma_ret)
-lzma_filters_copy(const lzma_filter *src, lzma_filter *dest,
+lzma_filters_copy(const lzma_filter *src, lzma_filter *real_dest,
 		const lzma_allocator *allocator)
 {
-	if (src == NULL || dest == NULL)
+	if (src == NULL || real_dest == NULL)
 		return LZMA_PROG_ERROR;

+	// Use a temporary destination so that the real destination
+	// will never be modied if an error occurs.
+	lzma_filter dest[LZMA_FILTERS_MAX + 1];
+
 	lzma_ret ret;
 	size_t i;
 	for (i = 0; src[i].id != LZMA_VLI_UNKNOWN; ++i) {
@ -173,18 +177,20 @@ lzma_filters_copy(const lzma_filter *src, lzma_filter *dest,
 	}

 	// Terminate the filter array.
-	assert(i <= LZMA_FILTERS_MAX + 1);
+	assert(i < LZMA_FILTERS_MAX + 1);
 	dest[i].id = LZMA_VLI_UNKNOWN;
 	dest[i].options = NULL;

+	// Copy it to the caller-supplied array now that we know that
+	// no errors occurred.
+	memcpy(real_dest, dest, (i + 1) * sizeof(lzma_filter));
+
 	return LZMA_OK;

 error:
 	// Free the options which we have already allocated.
-	while (i-- > 0) {
+	while (i-- > 0)
 		lzma_free(dest[i].options, allocator);
-		dest[i].options = NULL;
-	}

 	return ret;
 }
--- a/src/liblzma/common/hardware_cputhreads.c
+++ b/src/liblzma/common/hardware_cputhreads.c
@ -15,6 +15,18 @@
 #include "tuklib_cpucores.h"


+#ifdef HAVE_SYMBOL_VERSIONS_LINUX
+// This is for compatibility with binaries linked against liblzma that
+// has been patched with xz-5.2.2-compat-libs.patch from RHEL/CentOS 7.
+LZMA_SYMVER_API("lzma_cputhreads@XZ_5.2.2",
+	uint32_t, lzma_cputhreads_522)(void) lzma_nothrow
+		__attribute__((__alias__("lzma_cputhreads_52")));
+
+LZMA_SYMVER_API("lzma_cputhreads@@XZ_5.2",
+	uint32_t, lzma_cputhreads_52)(void) lzma_nothrow;
+
+#define lzma_cputhreads lzma_cputhreads_52
+#endif
 extern LZMA_API(uint32_t)
 lzma_cputhreads(void)
 {
--- a/src/liblzma/common/index.c
+++ b/src/liblzma/common/index.c
@ -656,6 +656,10 @@ lzma_index_append(lzma_index *i, const lzma_allocator *allocator,
 	const uint32_t index_list_size_add = lzma_vli_size(unpadded_size)
 			+ lzma_vli_size(uncompressed_size);

+	// Check that uncompressed size will not overflow.
+	if (uncompressed_base + uncompressed_size > LZMA_VLI_MAX)
+		return LZMA_DATA_ERROR;
+
 	// Check that the file size will stay within limits.
 	if (index_file_size(s->node.compressed_base,
 			compressed_base + unpadded_size, s->record_count + 1,
@ -767,6 +771,9 @@ extern LZMA_API(lzma_ret)
 lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src,
 		const lzma_allocator *allocator)
 {
+	if (dest == NULL || src == NULL)
+		return LZMA_PROG_ERROR;
+
 	const lzma_vli dest_file_size = lzma_index_file_size(dest);

 	// Check that we don't exceed the file size limits.
@ -835,6 +842,11 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src,
 		}
 	}

+	// dest->checks includes the check types of all except the last Stream
+	// in dest. Set the bit for the check type of the last Stream now so
+	// that it won't get lost when Stream(s) from src are appended to dest.
+	dest->checks = lzma_index_checks(dest);
+
 	// Add all the Streams from src to dest. Update the base offsets
 	// of each Stream from src.
 	const index_cat_info info = {
@ -851,7 +863,7 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src,
 	dest->total_size += src->total_size;
 	dest->record_count += src->record_count;
 	dest->index_list_size += src->index_list_size;
-	dest->checks = lzma_index_checks(dest) | src->checks;
+	dest->checks |= src->checks;

 	// There's nothing else left in src than the base structure.
 	lzma_free(src, allocator);
@ -1226,7 +1238,7 @@ lzma_index_iter_locate(lzma_index_iter *iter, lzma_vli target)

 	// Use binary search to locate the exact Record. It is the first
 	// Record whose uncompressed_sum is greater than target.
-	// This is because we want the rightmost Record that fullfills the
+	// This is because we want the rightmost Record that fulfills the
 	// search criterion. It is possible that there are empty Blocks;
 	// we don't want to return them.
 	size_t left = 0;
--- a/src/liblzma/common/index_hash.c
+++ b/src/liblzma/common/index_hash.c
@ -122,7 +122,7 @@ lzma_index_hash_size(const lzma_index_hash *index_hash)


 /// Updates the sizes and the hash without any validation.
-static lzma_ret
+static void
 hash_append(lzma_index_hash_info *info, lzma_vli unpadded_size,
 		lzma_vli uncompressed_size)
 {
@ -136,7 +136,7 @@ hash_append(lzma_index_hash_info *info, lzma_vli unpadded_size,
 	lzma_check_update(&info->check, LZMA_CHECK_BEST,
 			(const uint8_t *)(sizes), sizeof(sizes));

-	return LZMA_OK;
+	return;
 }


@ -152,8 +152,7 @@ lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli unpadded_size,
 		return LZMA_PROG_ERROR;

 	// Update the hash.
-	return_if_error(hash_append(&index_hash->blocks,
-			unpadded_size, uncompressed_size));
+	hash_append(&index_hash->blocks, unpadded_size, uncompressed_size);

 	// Validate the properties of *info are still in allowed limits.
 	if (index_hash->blocks.blocks_size > LZMA_VLI_MAX
@ -239,9 +238,9 @@ lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in,
 			index_hash->sequence = SEQ_UNCOMPRESSED;
 		} else {
 			// Update the hash.
-			return_if_error(hash_append(&index_hash->records,
+			hash_append(&index_hash->records,
 					index_hash->unpadded_size,
-					index_hash->uncompressed_size));
+					index_hash->uncompressed_size);

 			// Verify that we don't go over the known sizes. Note
 			// that this validation is simpler than the one used
--- a/src/liblzma/common/memcmplen.h
+++ b/src/liblzma/common/memcmplen.h
@ -80,8 +80,7 @@ lzma_memcmplen(const uint8_t *buf1, const uint8_t *buf2,

 #elif defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
 		&& defined(HAVE__MM_MOVEMASK_EPI8) \
-		&& ((defined(__GNUC__) && defined(__SSE2_MATH__)) \
-			|| (defined(__INTEL_COMPILER) && defined(__SSE2__)) \
+		&& (defined(__SSE2__) \
 			|| (defined(_MSC_VER) && defined(_M_IX86_FP) \
 				&& _M_IX86_FP >= 2))
 	// NOTE: Like above, this will use 128-bit unaligned access which
--- a/src/liblzma/common/stream_decoder.c
+++ b/src/liblzma/common/stream_decoder.c
@ -18,15 +18,14 @@ typedef struct {
 	enum {
 		SEQ_STREAM_HEADER,
 		SEQ_BLOCK_HEADER,
-		SEQ_BLOCK,
+		SEQ_BLOCK_INIT,
+		SEQ_BLOCK_RUN,
 		SEQ_INDEX,
 		SEQ_STREAM_FOOTER,
 		SEQ_STREAM_PADDING,
 	} sequence;

-	/// Block or Metadata decoder. This takes little memory and the same
-	/// data structure can be used to decode every Block Header, so it's
-	/// a good idea to have a separate lzma_next_coder structure for it.
+	/// Block decoder
 	lzma_next_coder block_decoder;

 	/// Block options decoded by the Block Header decoder and used by
@ -63,9 +62,9 @@ typedef struct {

 	/// If true, we will decode concatenated Streams that possibly have
 	/// Stream Padding between or after them. LZMA_STREAM_END is returned
-	/// once the application isn't giving us any new input, and we aren't
-	/// in the middle of a Stream, and possible Stream Padding is a
-	/// multiple of four bytes.
+	/// once the application isn't giving us any new input (LZMA_FINISH),
+	/// and we aren't in the middle of a Stream, and possible
+	/// Stream Padding is a multiple of four bytes.
 	bool concatenated;

 	/// When decoding concatenated Streams, this is true as long as we
@ -187,6 +186,15 @@ stream_decode(void *coder_ptr, const lzma_allocator *allocator,
 			return LZMA_OK;

 		coder->pos = 0;
+		coder->sequence = SEQ_BLOCK_INIT;
+	}
+
+	// Fall through
+
+	case SEQ_BLOCK_INIT: {
+		// Checking memusage and doing the initialization needs
+		// its own sequence point because we need to be able to
+		// retry if we return LZMA_MEMLIMIT_ERROR.

 		// Version 1 is needed to support the .ignore_check option.
 		coder->block_options.version = 1;
@ -240,17 +248,17 @@ stream_decode(void *coder_ptr, const lzma_allocator *allocator,

 		coder->block_options.filters = NULL;

-		// Check if memory usage calculation and Block enocoder
+		// Check if memory usage calculation and Block decoder
 		// initialization succeeded.
 		if (ret != LZMA_OK)
 			return ret;

-		coder->sequence = SEQ_BLOCK;
+		coder->sequence = SEQ_BLOCK_RUN;
 	}

 	// Fall through

-	case SEQ_BLOCK: {
+	case SEQ_BLOCK_RUN: {
 		const lzma_ret ret = coder->block_decoder.code(
 				coder->block_decoder.coder, allocator,
 				in, in_pos, in_size, out, out_pos, out_size,
--- a/src/liblzma/common/stream_encoder_mt.c
+++ b/src/liblzma/common/stream_encoder_mt.c
@ -715,6 +715,10 @@ stream_encode_mt(void *coder_ptr, const lzma_allocator *allocator,
 				ret = lzma_index_append(coder->index,
 						allocator, unpadded_size,
 						uncompressed_size);
+				if (ret != LZMA_OK) {
+					threads_stop(coder, false);
+					return ret;
+				}

 				// If we didn't fill the output buffer yet,
 				// try to read more data. Maybe the next
@ -724,8 +728,7 @@ stream_encode_mt(void *coder_ptr, const lzma_allocator *allocator,
 			}

 			if (ret != LZMA_OK) {
-				// coder->thread_error was set or
-				// lzma_index_append() failed.
+				// coder->thread_error was set.
 				threads_stop(coder, false);
 				return ret;
 			}
@ -1075,6 +1078,31 @@ stream_encoder_mt_init(lzma_next_coder *next, const lzma_allocator *allocator,
 }


+#ifdef HAVE_SYMBOL_VERSIONS_LINUX
+// These are for compatibility with binaries linked against liblzma that
+// has been patched with xz-5.2.2-compat-libs.patch from RHEL/CentOS 7.
+// Actually that patch didn't create lzma_stream_encoder_mt@XZ_5.2.2
+// but it has been added here anyway since someone might misread the
+// RHEL patch and think both @XZ_5.1.2alpha and @XZ_5.2.2 exist.
+LZMA_SYMVER_API("lzma_stream_encoder_mt@XZ_5.1.2alpha",
+	lzma_ret, lzma_stream_encoder_mt_512a)(
+		lzma_stream *strm, const lzma_mt *options)
+		lzma_nothrow lzma_attr_warn_unused_result
+		__attribute__((__alias__("lzma_stream_encoder_mt_52")));
+
+LZMA_SYMVER_API("lzma_stream_encoder_mt@XZ_5.2.2",
+	lzma_ret, lzma_stream_encoder_mt_522)(
+		lzma_stream *strm, const lzma_mt *options)
+		lzma_nothrow lzma_attr_warn_unused_result
+		__attribute__((__alias__("lzma_stream_encoder_mt_52")));
+
+LZMA_SYMVER_API("lzma_stream_encoder_mt@@XZ_5.2",
+	lzma_ret, lzma_stream_encoder_mt_52)(
+		lzma_stream *strm, const lzma_mt *options)
+		lzma_nothrow lzma_attr_warn_unused_result;
+
+#define lzma_stream_encoder_mt lzma_stream_encoder_mt_52
+#endif
 extern LZMA_API(lzma_ret)
 lzma_stream_encoder_mt(lzma_stream *strm, const lzma_mt *options)
 {
@ -1090,6 +1118,23 @@ lzma_stream_encoder_mt(lzma_stream *strm, const lzma_mt *options)
 }


+#ifdef HAVE_SYMBOL_VERSIONS_LINUX
+LZMA_SYMVER_API("lzma_stream_encoder_mt_memusage@XZ_5.1.2alpha",
+	uint64_t, lzma_stream_encoder_mt_memusage_512a)(
+	const lzma_mt *options) lzma_nothrow lzma_attr_pure
+	__attribute__((__alias__("lzma_stream_encoder_mt_memusage_52")));
+
+LZMA_SYMVER_API("lzma_stream_encoder_mt_memusage@XZ_5.2.2",
+	uint64_t, lzma_stream_encoder_mt_memusage_522)(
+	const lzma_mt *options) lzma_nothrow lzma_attr_pure
+	__attribute__((__alias__("lzma_stream_encoder_mt_memusage_52")));
+
+LZMA_SYMVER_API("lzma_stream_encoder_mt_memusage@@XZ_5.2",
+	uint64_t, lzma_stream_encoder_mt_memusage_52)(
+	const lzma_mt *options) lzma_nothrow lzma_attr_pure;
+
+#define lzma_stream_encoder_mt_memusage lzma_stream_encoder_mt_memusage_52
+#endif
 // This function name is a monster but it's consistent with the older
 // monster names. :-( 31 chars is the max that C99 requires so in that
 // sense it's not too long. ;-)
--- a/src/liblzma/liblzma_generic.map
+++ b/src/liblzma/liblzma_generic.map
@ -93,6 +93,9 @@ global:
 	lzma_vli_decode;
 	lzma_vli_encode;
 	lzma_vli_size;
+
+local:
+	*;
 };

 XZ_5.2 {
@ -102,7 +105,4 @@ global:
 	lzma_get_progress;
 	lzma_stream_encoder_mt;
 	lzma_stream_encoder_mt_memusage;
-
-local:
-	*;
 } XZ_5.0;
--- a/src/liblzma/liblzma_linux.map
+++ b/src/liblzma/liblzma_linux.map
@ -0,0 +1,123 @@
+XZ_5.0 {
+global:
+	lzma_alone_decoder;
+	lzma_alone_encoder;
+	lzma_auto_decoder;
+	lzma_block_buffer_bound;
+	lzma_block_buffer_decode;
+	lzma_block_buffer_encode;
+	lzma_block_compressed_size;
+	lzma_block_decoder;
+	lzma_block_encoder;
+	lzma_block_header_decode;
+	lzma_block_header_encode;
+	lzma_block_header_size;
+	lzma_block_total_size;
+	lzma_block_unpadded_size;
+	lzma_check_is_supported;
+	lzma_check_size;
+	lzma_code;
+	lzma_crc32;
+	lzma_crc64;
+	lzma_easy_buffer_encode;
+	lzma_easy_decoder_memusage;
+	lzma_easy_encoder;
+	lzma_easy_encoder_memusage;
+	lzma_end;
+	lzma_filter_decoder_is_supported;
+	lzma_filter_encoder_is_supported;
+	lzma_filter_flags_decode;
+	lzma_filter_flags_encode;
+	lzma_filter_flags_size;
+	lzma_filters_copy;
+	lzma_filters_update;
+	lzma_get_check;
+	lzma_index_append;
+	lzma_index_block_count;
+	lzma_index_buffer_decode;
+	lzma_index_buffer_encode;
+	lzma_index_cat;
+	lzma_index_checks;
+	lzma_index_decoder;
+	lzma_index_dup;
+	lzma_index_encoder;
+	lzma_index_end;
+	lzma_index_file_size;
+	lzma_index_hash_append;
+	lzma_index_hash_decode;
+	lzma_index_hash_end;
+	lzma_index_hash_init;
+	lzma_index_hash_size;
+	lzma_index_init;
+	lzma_index_iter_init;
+	lzma_index_iter_locate;
+	lzma_index_iter_next;
+	lzma_index_iter_rewind;
+	lzma_index_memusage;
+	lzma_index_memused;
+	lzma_index_size;
+	lzma_index_stream_count;
+	lzma_index_stream_flags;
+	lzma_index_stream_padding;
+	lzma_index_stream_size;
+	lzma_index_total_size;
+	lzma_index_uncompressed_size;
+	lzma_lzma_preset;
+	lzma_memlimit_get;
+	lzma_memlimit_set;
+	lzma_memusage;
+	lzma_mf_is_supported;
+	lzma_mode_is_supported;
+	lzma_physmem;
+	lzma_properties_decode;
+	lzma_properties_encode;
+	lzma_properties_size;
+	lzma_raw_buffer_decode;
+	lzma_raw_buffer_encode;
+	lzma_raw_decoder;
+	lzma_raw_decoder_memusage;
+	lzma_raw_encoder;
+	lzma_raw_encoder_memusage;
+	lzma_stream_buffer_bound;
+	lzma_stream_buffer_decode;
+	lzma_stream_buffer_encode;
+	lzma_stream_decoder;
+	lzma_stream_encoder;
+	lzma_stream_flags_compare;
+	lzma_stream_footer_decode;
+	lzma_stream_footer_encode;
+	lzma_stream_header_decode;
+	lzma_stream_header_encode;
+	lzma_version_number;
+	lzma_version_string;
+	lzma_vli_decode;
+	lzma_vli_encode;
+	lzma_vli_size;
+
+local:
+	*;
+};
+
+XZ_5.2 {
+global:
+	lzma_block_uncomp_encode;
+	lzma_cputhreads;
+	lzma_get_progress;
+	lzma_stream_encoder_mt;
+	lzma_stream_encoder_mt_memusage;
+} XZ_5.0;
+
+XZ_5.1.2alpha {
+global:
+	lzma_stream_encoder_mt;
+	lzma_stream_encoder_mt_memusage;
+} XZ_5.0;
+
+XZ_5.2.2 {
+global:
+	lzma_block_uncomp_encode;
+	lzma_cputhreads;
+	lzma_get_progress;
+	lzma_stream_encoder_mt;
+	lzma_stream_encoder_mt_memusage;
+} XZ_5.1.2alpha;
--- a/src/liblzma/lz/lz_decoder.c
+++ b/src/liblzma/lz/lz_decoder.c
@ -304,8 +304,14 @@ lzma_lz_decoder_memusage(size_t dictionary_size)


 extern void
-lzma_lz_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size)
+lzma_lz_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size,
+		bool allow_eopm)
 {
 	lzma_coder *coder = coder_ptr;
-	coder->lz.set_uncompressed(coder->lz.coder, uncompressed_size);
+
+	if (uncompressed_size == LZMA_VLI_UNKNOWN)
+		allow_eopm = true;
+
+	coder->lz.set_uncompressed(coder->lz.coder, uncompressed_size,
+			allow_eopm);
 }
--- a/src/liblzma/lz/lz_decoder.h
+++ b/src/liblzma/lz/lz_decoder.h
@ -62,8 +62,10 @@ typedef struct {

 	void (*reset)(void *coder, const void *options);

-	/// Set the uncompressed size
-	void (*set_uncompressed)(void *coder, lzma_vli uncompressed_size);
+	/// Set the uncompressed size. If uncompressed_size == LZMA_VLI_UNKNOWN
+	/// then allow_eopm will always be true.
+	void (*set_uncompressed)(void *coder, lzma_vli uncompressed_size,
+			bool allow_eopm);

 	/// Free allocated resources
 	void (*end)(void *coder, const lzma_allocator *allocator);
@ -91,7 +93,7 @@ extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next,
 extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size);

 extern void lzma_lz_decoder_uncompressed(
-		void *coder, lzma_vli uncompressed_size);
+		void *coder, lzma_vli uncompressed_size, bool allow_eopm);


 //////////////////////
--- a/src/liblzma/lz/lz_encoder.c
+++ b/src/liblzma/lz/lz_encoder.c
@ -585,32 +585,28 @@ lzma_lz_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 extern LZMA_API(lzma_bool)
 lzma_mf_is_supported(lzma_match_finder mf)
 {
-	bool ret = false;
-
+	switch (mf) {
 #ifdef HAVE_MF_HC3
-	if (mf == LZMA_MF_HC3)
-		ret = true;
+	case LZMA_MF_HC3:
+		return true;
 #endif
-
 #ifdef HAVE_MF_HC4
-	if (mf == LZMA_MF_HC4)
-		ret = true;
+	case LZMA_MF_HC4:
+		return true;
 #endif
-
 #ifdef HAVE_MF_BT2
-	if (mf == LZMA_MF_BT2)
-		ret = true;
+	case LZMA_MF_BT2:
+		return true;
 #endif
-
 #ifdef HAVE_MF_BT3
-	if (mf == LZMA_MF_BT3)
-		ret = true;
+	case LZMA_MF_BT3:
+		return true;
 #endif
-
 #ifdef HAVE_MF_BT4
-	if (mf == LZMA_MF_BT4)
-		ret = true;
+	case LZMA_MF_BT4:
+		return true;
 #endif
-
-	return ret;
+	default:
+		return false;
+	}
 }
--- a/src/liblzma/lzma/lzma2_decoder.c
+++ b/src/liblzma/lzma/lzma2_decoder.c
@ -139,7 +139,7 @@ lzma2_decode(void *coder_ptr, lzma_dict *restrict dict,
 		coder->uncompressed_size += in[(*in_pos)++] + 1U;
 		coder->sequence = SEQ_COMPRESSED_0;
 		coder->lzma.set_uncompressed(coder->lzma.coder,
-				coder->uncompressed_size);
+				coder->uncompressed_size, false);
 		break;

 	case SEQ_COMPRESSED_0:
--- a/src/liblzma/lzma/lzma2_encoder.c
+++ b/src/liblzma/lzma/lzma2_encoder.c
@ -378,6 +378,9 @@ lzma_lzma2_encoder_memusage(const void *options)
 extern lzma_ret
 lzma_lzma2_props_encode(const void *options, uint8_t *out)
 {
+	if (options == NULL)
+		return LZMA_PROG_ERROR;
+
 	const lzma_options_lzma *const opt = options;
 	uint32_t d = my_max(opt->dict_size, LZMA_DICT_SIZE_MIN);

--- a/src/liblzma/lzma/lzma_decoder.c
+++ b/src/liblzma/lzma/lzma_decoder.c
@ -238,6 +238,11 @@ typedef struct {
 	/// payload marker is expected.
 	lzma_vli uncompressed_size;

+	/// True if end of payload marker (EOPM) is allowed even when
+	/// uncompressed_size is known; false if EOPM must not be present.
+	/// This is ignored if uncompressed_size == LZMA_VLI_UNKNOWN.
+	bool allow_eopm;
+
 	////////////////////////////////
 	// State of incomplete symbol //
 	////////////////////////////////
@ -343,12 +348,24 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,

 	lzma_ret ret = LZMA_OK;

-	// If uncompressed size is known, there must be no end of payload
-	// marker.
-	const bool no_eopm = coder->uncompressed_size
-			!= LZMA_VLI_UNKNOWN;
-	if (no_eopm && coder->uncompressed_size < dict.limit - dict.pos)
+	// This is true when the next LZMA symbol is allowed to be EOPM.
+	// That is, if this is false, then EOPM is considered
+	// an invalid symbol and we will return LZMA_DATA_ERROR.
+	//
+	// EOPM is always required (not just allowed) when
+	// the uncompressed size isn't known. When uncompressed size
+	// is known, eopm_is_valid may be set to true later.
+	bool eopm_is_valid = coder->uncompressed_size == LZMA_VLI_UNKNOWN;
+
+	// If uncompressed size is known and there is enough output space
+	// to decode all the data, limit the available buffer space so that
+	// the main loop won't try to decode past the end of the stream.
+	bool might_finish_without_eopm = false;
+	if (coder->uncompressed_size != LZMA_VLI_UNKNOWN
+			&& coder->uncompressed_size <= dict.limit - dict.pos) {
 		dict.limit = dict.pos + (size_t)(coder->uncompressed_size);
+		might_finish_without_eopm = true;
+	}

 	// The main decoder loop. The "switch" is used to restart the decoder at
 	// correct location. Once restarted, the "switch" is no longer used.
@ -361,8 +378,32 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,

 	case SEQ_NORMALIZE:
 	case SEQ_IS_MATCH:
-		if (unlikely(no_eopm && dict.pos == dict.limit))
-			break;
+		if (unlikely(might_finish_without_eopm
+				&& dict.pos == dict.limit)) {
+			// In rare cases there is a useless byte that needs
+			// to be read anyway.
+			rc_normalize(SEQ_NORMALIZE);
+
+			// If the range decoder state is such that we can
+			// be at the end of the LZMA stream, then the
+			// decoding is finished.
+			if (rc_is_finished(rc)) {
+				ret = LZMA_STREAM_END;
+				goto out;
+			}
+
+			// If the caller hasn't allowed EOPM to be present
+			// together with known uncompressed size, then the
+			// LZMA stream is corrupt.
+			if (!coder->allow_eopm) {
+				ret = LZMA_DATA_ERROR;
+				goto out;
+			}
+
+			// Otherwise continue decoding with the expectation
+			// that the next LZMA symbol is EOPM.
+			eopm_is_valid = true;
+		}

 		rc_if_0(coder->is_match[state][pos_state], SEQ_IS_MATCH) {
 			rc_update_0(coder->is_match[state][pos_state]);
@ -658,11 +699,18 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,

 					if (rep0 == UINT32_MAX) {
 						// End of payload marker was
-						// found. It must not be
-						// present if uncompressed
-						// size is known.
-						if (coder->uncompressed_size
-						!= LZMA_VLI_UNKNOWN) {
+						// found. It may only be
+						// present if
+						//   - uncompressed size is
+						//     unknown or
+						//   - after known uncompressed
+						//     size amount of bytes has
+						//     been decompressed and
+						//     caller has indicated
+						//     that EOPM might be used
+						//     (it's not allowed in
+						//     LZMA2).
+						if (!eopm_is_valid) {
 							ret = LZMA_DATA_ERROR;
 							goto out;
 						}
@ -671,7 +719,9 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
 						// LZMA1 stream with
 						// end-of-payload marker.
 						rc_normalize(SEQ_EOPM);
-						ret = LZMA_STREAM_END;
+						ret = rc_is_finished(rc)
+							? LZMA_STREAM_END
+							: LZMA_DATA_ERROR;
 						goto out;
 					}
 				}
@ -793,9 +843,6 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
 		}
 	}

-	rc_normalize(SEQ_NORMALIZE);
-	coder->sequence = SEQ_IS_MATCH;
-
 out:
 	// Save state

@ -822,24 +869,21 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
 	if (coder->uncompressed_size != LZMA_VLI_UNKNOWN) {
 		coder->uncompressed_size -= dict.pos - dict_start;

-		// Since there cannot be end of payload marker if the
-		// uncompressed size was known, we check here if we
-		// finished decoding.
+		// If we have gotten all the output but the decoder wants
+		// to write more output, the file is corrupt. There are
+		// three SEQ values where output is produced.
 		if (coder->uncompressed_size == 0 && ret == LZMA_OK
-				&& coder->sequence != SEQ_NORMALIZE)
-			ret = coder->sequence == SEQ_IS_MATCH
-					? LZMA_STREAM_END : LZMA_DATA_ERROR;
+				&& (coder->sequence == SEQ_LITERAL_WRITE
+					|| coder->sequence == SEQ_SHORTREP
+					|| coder->sequence == SEQ_COPY))
+			ret = LZMA_DATA_ERROR;
 	}

-	// We can do an additional check in the range decoder to catch some
-	// corrupted files.
 	if (ret == LZMA_STREAM_END) {
-		if (!rc_is_finished(coder->rc))
-			ret = LZMA_DATA_ERROR;
-
 		// Reset the range decoder so that it is ready to reinitialize
 		// for a new LZMA2 chunk.
 		rc_reset(coder->rc);
+		coder->sequence = SEQ_IS_MATCH;
 	}

 	return ret;
@ -848,10 +892,12 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,


 static void
-lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size)
+lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size,
+		bool allow_eopm)
 {
 	lzma_lzma1_decoder *coder = coder_ptr;
 	coder->uncompressed_size = uncompressed_size;
+	coder->allow_eopm = allow_eopm;
 }


@ -977,7 +1023,7 @@ lzma_decoder_init(lzma_lz_decoder *lz, const lzma_allocator *allocator,
 			lz, allocator, options, lz_options));

 	lzma_decoder_reset(lz->coder, options);
-	lzma_decoder_uncompressed(lz->coder, LZMA_VLI_UNKNOWN);
+	lzma_decoder_uncompressed(lz->coder, LZMA_VLI_UNKNOWN, true);

 	return LZMA_OK;
 }
--- a/src/liblzma/lzma/lzma_encoder.c
+++ b/src/liblzma/lzma/lzma_encoder.c
@ -658,6 +658,9 @@ lzma_lzma_lclppb_encode(const lzma_options_lzma *options, uint8_t *byte)
 extern lzma_ret
 lzma_lzma_props_encode(const void *options, uint8_t *out)
 {
+	if (options == NULL)
+		return LZMA_PROG_ERROR;
+
 	const lzma_options_lzma *const opt = options;

 	if (lzma_lzma_lclppb_encode(opt, out))
--- a/src/liblzma/validate_map.sh
+++ b/src/liblzma/validate_map.sh
@ -2,7 +2,79 @@

 ###############################################################################
 #
-# Check liblzma.map for certain types of errors
+# Check liblzma_*.map for certain types of errors.
+#
+# liblzma_generic.map is for FreeBSD and Solaris and possibly others
+# except GNU/Linux.
+#
+# liblzma_linux.map is for GNU/Linux only. This and the matching extra code
+# in the .c files make liblzma >= 5.2.7 compatible with binaries that were
+# linked against ill-patched liblzma in RHEL/CentOS 7. By providing the
+# compatibility in official XZ Utils release will hopefully prevent people
+# from further copying the broken patch to other places when they want
+# compatibility with binaries linked on RHEL/CentOS 7. The long version
+# of the story:
+#
+#     RHEL/CentOS 7 shipped with 5.1.2alpha, including the threaded
+#     encoder that is behind #ifdef LZMA_UNSTABLE in the API headers.
+#     In 5.1.2alpha these symbols are under XZ_5.1.2alpha in liblzma.map.
+#     API/ABI compatibility tracking isn't done between development
+#     releases so newer releases didn't have XZ_5.1.2alpha anymore.
+#
+#     Later RHEL/CentOS 7 updated xz to 5.2.2 but they wanted to keep
+#     the exported symbols compatible with 5.1.2alpha. After checking
+#     the ABI changes it turned out that >= 5.2.0 ABI is backward
+#     compatible with the threaded encoder functions from 5.1.2alpha
+#     (but not vice versa as fixes and extensions to these functions
+#     were made between 5.1.2alpha and 5.2.0).
+#
+#     In RHEL/CentOS 7, XZ Utils 5.2.2 was patched with
+#     xz-5.2.2-compat-libs.patch to modify liblzma.map:
+#
+#       - XZ_5.1.2alpha was added with lzma_stream_encoder_mt and
+#         lzma_stream_encoder_mt_memusage. This matched XZ Utils 5.1.2alpha.
+#
+#       - XZ_5.2 was replaced with XZ_5.2.2. It is clear that this was
+#         an error; the intention was to keep using XZ_5.2 (XZ_5.2.2
+#         has never been used in XZ Utils). So XZ_5.2.2 lists all
+#         symbols that were listed under XZ_5.2 before the patch.
+#         lzma_stream_encoder_mt and _mt_memusage are included too so
+#         they are listed both here and under XZ_5.1.2alpha.
+#
+#     The patch didn't add any __asm__(".symver ...") lines to the .c
+#     files. Thus the resulting liblzma.so exports the threaded encoder
+#     functions under XZ_5.1.2alpha only. Listing the two functions
+#     also under XZ_5.2.2 in liblzma.map has no effect without
+#     matching .symver lines.
+#
+#     The lack of XZ_5.2 in RHEL/CentOS 7 means that binaries linked
+#     against unpatched XZ Utils 5.2.x won't run on RHEL/CentOS 7.
+#     This is unfortunate but this alone isn't too bad as the problem
+#     is contained within RHEL/CentOS 7 and doesn't affect users
+#     of other distributions. It could also be fixed internally in
+#     RHEL/CentOS 7.
+#
+#     The second problem is more serious: In XZ Utils 5.2.2 the API
+#     headers don't have #ifdef LZMA_UNSTABLE for obvious reasons.
+#     This is true in RHEL/CentOS 7 version too. Thus now programs
+#     using new APIs can be compiled without an extra #define. However,
+#     the programs end up depending on symbol version XZ_5.1.2alpha
+#     (and possibly also XZ_5.2.2) instead of XZ_5.2 as they would
+#     with an unpatched XZ Utils 5.2.2. This means that such binaries
+#     won't run on other distributions shipping XZ Utils >= 5.2.0 as
+#     they don't provide XZ_5.1.2alpha or XZ_5.2.2; they only provide
+#     XZ_5.2 (and XZ_5.0). (This includes RHEL/CentOS 8 as the patch
+#     luckily isn't included there anymore with XZ Utils 5.2.4.)
+#
+#     Binaries built by RHEL/CentOS 7 users get distributed and then
+#     people wonder why they don't run on some other distribution.
+#     Seems that people have found out about the patch and been copying
+#     it to some build scripts, seemingly curing the symptoms but
+#     actually spreading the illness further and outside RHEL/CentOS 7.
+#     Adding compatibility in an official XZ Utils release should work
+#     as a vaccine against this ill patch and stop it from spreading.
+#     The vaccine is kept GNU/Linux-only as other OSes should be immune
+#     (hopefully it hasn't spread via some build script to other OSes).
 #
 # Author: Lasse Collin
 #
@ -18,11 +90,11 @@ STATUS=0

 cd "$(dirname "$0")"

-# Get the list of symbols that aren't defined in liblzma.map.
+# Get the list of symbols that aren't defined in liblzma_generic.map.
 SYMS=$(sed -n 's/^extern LZMA_API([^)]*) \([a-z0-9_]*\)(.*$/\1;/p' \
 		api/lzma/*.h \
 	| sort \
-	| grep -Fve "$(sed '/[{}:*]/d;/^$/d;s/^	//' liblzma.map)")
+	| grep -Fve "$(sed '/[{}:*]/d;/^$/d;s/^	//' liblzma_generic.map)")

 # Check that there are no old alpha or beta versions listed.
 VER=$(cd ../.. && sh build-aux/version.sh)
@ -30,21 +102,41 @@ NAMES=
 case $VER in
 	*alpha | *beta)
 		NAMES=$(sed -n 's/^.*XZ_\([^ ]*\)\(alpha\|beta\) .*$/\1\2/p' \
-			liblzma.map | grep -Fv "$VER")
+			liblzma_generic.map | grep -Fv "$VER")
 		;;
 esac

 # Check for duplicate lines. It can catch missing dependencies.
-DUPS=$(sort liblzma.map | sed '/^$/d;/^global:$/d' | uniq -d)
+DUPS=$(sort liblzma_generic.map | sed '/^$/d;/^global:$/d' | uniq -d)
+
+# Check that liblzma_linux.map is in sync with liblzma_generic.map.
+# The RHEL/CentOS 7 compatibility symbols are in a fixed location
+# so it makes it easy to remove them for comparison with liblzma_generic.map.
+#
+# NOTE: Putting XZ_5.2 before the compatibility symbols XZ_5.1.2alpha
+# and XZ_5.2.2 in liblzma_linux.map is important: If liblzma_linux.map is
+# incorrectly used without #define HAVE_SYMBOL_VERSIONS_LINUX, only the first
+# occurrence of each function name will be used from liblzma_linux.map;
+# the rest are ignored by the linker. Thus having XZ_5.2 before the
+# compatibility symbols means that @@XZ_5.2 will be used for the symbols
+# listed under XZ_5.2 {...} and the same function names later in
+# the file under XZ_5.1.2alpha {...} and XZ_5.2.2 {...} will be
+# ignored (@XZ_5.1.2alpha or @XZ_5.2.2 won't be added at all when
+# the #define HAVE_SYMBOL_VERSIONS_LINUX isn't used).
+IN_SYNC=
+if ! sed '109,123d' liblzma_linux.map \
+		| cmp -s - liblzma_generic.map; then
+	IN_SYNC=no
+fi

 # Print error messages if needed.
-if test -n "$SYMS$NAMES$DUPS"; then
+if test -n "$SYMS$NAMES$DUPS$IN_SYNC"; then
 	echo
-	echo 'validate_map.sh found problems from liblzma.map:'
+	echo 'validate_map.sh found problems from liblzma_*.map:'
 	echo

 	if test -n "$SYMS"; then
-		echo 'liblzma.map lacks the following symbols:'
+		echo 'liblzma_generic.map lacks the following symbols:'
 		echo "$SYMS"
 		echo
 	fi
@ -61,6 +153,11 @@ if test -n "$SYMS$NAMES$DUPS"; then
 		echo
 	fi

+	if test -n "$IN_SYNC"; then
+		echo "liblzma_generic.map and liblzma_linux.map aren't in sync"
+		echo
+	fi
+
 	STATUS=1
 fi

--- a/src/xz/coder.c
+++ b/src/xz/coder.c
@ -513,8 +513,12 @@ coder_init(file_pair *pair)
 			// is needed, because we don't want to do use
 			// passthru mode with --test.
 			if (opt_mode == MODE_DECOMPRESS
-					&& opt_stdout && opt_force)
+					&& opt_stdout && opt_force) {
+				// These are needed for progress info.
+				strm.total_in = 0;
+				strm.total_out = 0;
 				return CODER_INIT_PASSTHRU;
+			}

 			ret = LZMA_FORMAT_ERROR;
 			break;
@ -542,10 +546,30 @@ coder_init(file_pair *pair)
 		// memory usage limit in case it happens in the first
 		// Block of the first Stream, which is where it very
 		// probably will happen if it is going to happen.
+		//
+		// This will also catch unsupported check type which
+		// we treat as a warning only. If there are empty
+		// concatenated Streams with unsupported check type then
+		// the message can be shown more than once here. The loop
+		// is used in case there is first a warning about
+		// unsupported check type and then the first Block
+		// would exceed the memlimit.
 		if (ret == LZMA_OK && init_format != FORMAT_RAW) {
 			strm.next_out = NULL;
 			strm.avail_out = 0;
-			ret = lzma_code(&strm, LZMA_RUN);
+			while ((ret = lzma_code(&strm, LZMA_RUN))
+					== LZMA_UNSUPPORTED_CHECK)
+				message_warning("%s: %s", pair->src_name,
+						message_strm(ret));
+
+			// With --single-stream lzma_code won't wait for
+			// LZMA_FINISH and thus it can return LZMA_STREAM_END
+			// if the file has no uncompressed data inside.
+			// So treat LZMA_STREAM_END as LZMA_OK here.
+			// When lzma_code() is called again in coder_normal()
+			// it will return LZMA_STREAM_END again.
+			if (ret == LZMA_STREAM_END)
+				ret = LZMA_OK;
 		}
 #endif
 	}
@ -756,9 +780,9 @@ coder_normal(file_pair *pair)

 		} else if (ret != LZMA_OK) {
 			// Determine if the return value indicates that we
-			// won't continue coding.
-			const bool stop = ret != LZMA_NO_CHECK
-					&& ret != LZMA_UNSUPPORTED_CHECK;
+			// won't continue coding. LZMA_NO_CHECK would be
+			// here too if LZMA_TELL_ANY_CHECK was used.
+			const bool stop = ret != LZMA_UNSUPPORTED_CHECK;

 			if (stop) {
 				// Write the remaining bytes even if something
@ -907,6 +931,15 @@ coder_run(const char *filename)
 				mytime_set_start_time();

 				// Initialize the progress indicator.
+				//
+				// NOTE: When reading from stdin, fstat()
+				// isn't called on it and thus src_st.st_size
+				// is zero. If stdin pointed to a regular
+				// file, it would still be possible to know
+				// the file size but then we would also need
+				// to take into account the current reading
+				// position since with stdin it isn't
+				// necessarily at the beginning of the file.
 				const bool is_passthru = init_ret
 						== CODER_INIT_PASSTHRU;
 				const uint64_t in_size
--- a/src/xz/file_io.c
+++ b/src/xz/file_io.c
@ -330,14 +330,14 @@ io_unlink(const char *name, const struct stat *known_st)
 		// it is possible that the user has put a new file in place
 		// of the original file, and in that case it obviously
 		// shouldn't be removed.
-		message_error(_("%s: File seems to have been moved, "
+		message_warning(_("%s: File seems to have been moved, "
 				"not removing"), name);
 	else
 #endif
 		// There's a race condition between lstat() and unlink()
 		// but at least we have tried to avoid removing wrong file.
 		if (unlink(name))
-			message_error(_("%s: Cannot remove: %s"),
+			message_warning(_("%s: Cannot remove: %s"),
 					name, strerror(errno));

 	return;
@ -368,7 +368,14 @@ io_copy_attrs(const file_pair *pair)

 	mode_t mode;

-	if (fchown(pair->dest_fd, (uid_t)(-1), pair->src_st.st_gid)) {
+	// With BSD semantics the new dest file may have a group that
+	// does not belong to the user. If the src file has the same gid
+	// nothing has to be done. Nevertheless OpenBSD fchown(2) fails
+	// in this case which seems to be POSIX compliant. As there is
+	// nothing to do, skip the system call.
+	if (pair->dest_st.st_gid != pair->src_st.st_gid
+			&& fchown(pair->dest_fd, (uid_t)(-1),
+				pair->src_st.st_gid)) {
 		message_warning(_("%s: Cannot set the file group: %s"),
 				pair->dest_name, strerror(errno));
 		// We can still safely copy some additional permissions:
@ -536,8 +543,9 @@ io_open_src_real(file_pair *pair)
 	}

 	// Symlinks are not followed unless writing to stdout or --force
-	// was used.
-	const bool follow_symlinks = opt_stdout || opt_force;
+	// or --keep was used.
+	const bool follow_symlinks
+			= opt_stdout || opt_force || opt_keep_original;

 	// We accept only regular files if we are writing the output
 	// to disk too. bzip2 allows overriding this with --force but
@ -674,7 +682,7 @@ io_open_src_real(file_pair *pair)
 	}

 #ifndef TUKLIB_DOSLIKE
-	if (reg_files_only && !opt_force) {
+	if (reg_files_only && !opt_force && !opt_keep_original) {
 		if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) {
 			// gzip rejects setuid and setgid files even
 			// when --force was used. bzip2 doesn't check
@ -683,7 +691,7 @@ io_open_src_real(file_pair *pair)
 			// and setgid bits there.
 			//
 			// We accept setuid and setgid files if
-			// --force was used. We drop these bits
+			// --force or --keep was used. We drop these bits
 			// explicitly in io_copy_attr().
 			message_warning(_("%s: File has setuid or "
 					"setgid bit set, skipping"),
@ -747,6 +755,10 @@ io_open_src(const char *src_name)
 	// a statically allocated structure.
 	static file_pair pair;

+	// This implicitly also initializes src_st.st_size to zero
+	// which is expected to be <= 0 by default. fstat() isn't
+	// called when reading from standard input but src_st.st_size
+	// is still read.
 	pair = (file_pair){
 		.src_name = src_name,
 		.dest_name = NULL,
--- a/src/xz/hardware.c
+++ b/src/xz/hardware.c
@ -91,7 +91,13 @@ hardware_memlimit_set(uint64_t new_memlimit,
 		// Use a value less than SIZE_MAX so that there's some room
 		// for the xz program and so on. Don't use 4000 MiB because
 		// it could look like someone mixed up base-2 and base-10.
+#ifdef __mips__
+		// For MIPS32, due to architectural pecularities,
+		// the limit is even lower.
+		const uint64_t limit_max = UINT64_C(2000) << 20;
+#else
 		const uint64_t limit_max = UINT64_C(4020) << 20;
+#endif

 		// UINT64_MAX is a special case for the string "max" so
 		// that has to be handled specially.
--- a/src/xz/message.c
+++ b/src/xz/message.c
@ -355,11 +355,8 @@ progress_speed(uint64_t uncompressed_pos, uint64_t elapsed)
 	if (elapsed < 3000)
 		return "";

-	static const char unit[][8] = {
-		"KiB/s",
-		"MiB/s",
-		"GiB/s",
-	};
+	// The first character of KiB/s, MiB/s, or GiB/s:
+	static const char unit[] = { 'K', 'M', 'G' };

 	size_t unit_index = 0;

@ -381,7 +378,7 @@ progress_speed(uint64_t uncompressed_pos, uint64_t elapsed)
 	//  - 999 KiB/s
 	// Use big enough buffer to hold e.g. a multibyte decimal point.
 	static char buf[16];
-	snprintf(buf, sizeof(buf), "%.*f %s",
+	snprintf(buf, sizeof(buf), "%.*f %ciB/s",
 			speed > 9.9 ? 0 : 1, speed, unit[unit_index]);
 	return buf;
 }
@ -1116,6 +1113,9 @@ message_help(bool long_help)
 "  -k, --keep          keep (don't delete) input files\n"
 "  -f, --force         force overwrite of output file and (de)compress links\n"
 "  -c, --stdout        write to standard output and don't delete input files"));
+	// NOTE: --to-stdout isn't included above because it's not
+	// the recommended spelling. It was copied from gzip but other
+	// compressors with gzip-like syntax don't support it.

 	if (long_help) {
 		puts(_(
--- a/src/xz/xz.1
+++ b/src/xz/xz.1
@ -5,7 +5,7 @@
 .\" This file has been put into the public domain.
 .\" You can do whatever you want with this file.
 .\"
-.TH XZ 1 "2020-02-01" "Tukaani" "XZ Utils"
+.TH XZ 1 "2022-10-25" "Tukaani" "XZ Utils"
 .
 .SH NAME
 xz, unxz, xzcat, lzma, unlzma, lzcat \- Compress or decompress .xz and .lzma files
@ -183,7 +183,8 @@ is removed unless
 was specified.
 The source
 .I file
-is never removed if the output is written to standard output.
+is never removed if the output is written to standard output
+or if an error occurs.
 .PP
 Sending
 .B SIGINFO
@ -221,7 +222,7 @@ To prevent uncomfortable surprises,
 has a built-in memory usage limiter, which is disabled by default.
 While some operating systems provide ways to limit
 the memory usage of processes, relying on it
-wasn't deemed to be flexible enough (e.g. using
+wasn't deemed to be flexible enough (for example, using
 .BR ulimit (1)
 to limit virtual memory tends to cripple
 .BR mmap (2)).
@ -231,19 +232,21 @@ the command line option \fB\-\-memlimit=\fIlimit\fR.
 Often it is more convenient to enable the limiter
 by default by setting the environment variable
 .BR XZ_DEFAULTS ,
-e.g.\&
+for example,
 .BR XZ_DEFAULTS=\-\-memlimit=150MiB .
 It is possible to set the limits separately
-for compression and decompression
-by using \fB\-\-memlimit\-compress=\fIlimit\fR and
-\fB\-\-memlimit\-decompress=\fIlimit\fR.
+for compression and decompression by using
+.BI \-\-memlimit\-compress= limit
+and \fB\-\-memlimit\-decompress=\fIlimit\fR.
 Using these two options outside
 .B XZ_DEFAULTS
 is rarely useful because a single run of
 .B xz
 cannot do both compression and decompression and
 .BI \-\-memlimit= limit
-(or \fB\-M\fR \fIlimit\fR)
+(or
+.B \-M
+.IR limit )
 is shorter to type on the command line.
 .PP
 If the specified memory usage limit is exceeded when decompressing,
@ -252,11 +255,13 @@ will display an error and decompressing the file will fail.
 If the limit is exceeded when compressing,
 .B xz
 will try to scale the settings down so that the limit
-is no longer exceeded (except when using \fB\-\-format=raw\fR
-or \fB\-\-no\-adjust\fR).
+is no longer exceeded (except when using
+.B \-\-format=raw
+or
+.BR \-\-no\-adjust ).
 This way the operation won't fail unless the limit is very small.
 The scaling of the settings is done in steps that don't
-match the compression level presets, e.g. if the limit is
+match the compression level presets, for example, if the limit is
 only slightly less than the amount required for
 .BR "xz \-9" ,
 the settings will be scaled down only a little,
@ -276,7 +281,7 @@ It is possible to insert padding between the concatenated parts
 or after the last part.
 The padding must consist of null bytes and the size
 of the padding must be a multiple of four bytes.
-This can be useful e.g. if the
+This can be useful, for example, if the
 .B .xz
 file is stored on a medium that measures file sizes
 in 512-byte blocks.
@ -373,7 +378,7 @@ For even more information, use
 twice, but note that this may be slow, because getting all the extra
 information requires many seeks.
 The width of verbose output exceeds
-80 characters, so piping the output to e.g.\&
+80 characters, so piping the output to, for example,
 .B "less\ \-S"
 may be convenient if the terminal isn't wide enough.
 .IP ""
@ -388,6 +393,20 @@ should be used.
 .TP
 .BR \-k ", " \-\-keep
 Don't delete the input files.
+.IP ""
+Since
+.B xz
+5.2.6,
+this option also makes
+.B xz
+compress or decompress even if the input is
+a symbolic link to a regular file,
+has more than one hard link,
+or has the setuid, setgid, or sticky bit set.
+The setuid, setgid, and sticky bits are not copied
+to the target file.
+In earlier versions this was only done with
+.BR \-\-force .
 .TP
 .BR \-f ", " \-\-force
 This option has several effects:
@ -405,7 +424,7 @@ to the target file.
 .IP \(bu 3
 When used with
 .B \-\-decompress
-.BR \-\-stdout
+.B \-\-stdout
 and
 .B xz
 cannot recognize the type of the source file,
@ -670,7 +689,7 @@ Good to very good compression while keeping
 decompressor memory usage reasonable even for old systems.
 .B \-6
 is the default, which is usually a good choice
-e.g. for distributing files that need to be decompressible
+for distributing files that need to be decompressible
 even on systems with only 16\ MiB RAM.
 .RB ( \-5e
 or
@ -840,7 +859,7 @@ The default
 .I size
 is three times the LZMA2 dictionary size or 1 MiB,
 whichever is more.
-Typically a good value is 2\-4 times
+Typically a good value is 2\(en4 times
 the size of the LZMA2 dictionary or at least 1 MiB.
 Using
 .I size
@ -1006,7 +1025,7 @@ until the details have been decided.
 .RE
 .IP ""
 For 32-bit
-.BR xz
+.B xz
 there is a special case: if the
 .I limit
 would be over
@ -1015,6 +1034,9 @@ the
 .I limit
 is set to
 .BR "4020\ MiB" .
+On MIPS32
+.B "2000\ MiB"
+is used instead.
 (The values
 .B 0
 and
@ -1022,7 +1044,8 @@ and
 aren't affected by this.
 A similar feature doesn't exist for decompression.)
 This can be helpful when a 32-bit executable has access
-to 4\ GiB address space while hopefully doing no harm in other situations.
+to 4\ GiB address space (2 GiB on MIPS32)
+while hopefully doing no harm in other situations.
 .IP ""
 See also the section
 .BR "Memory usage" .
@ -1042,7 +1065,8 @@ for possible ways to specify the
 .IR limit .
 .TP
 \fB\-M\fR \fIlimit\fR, \fB\-\-memlimit=\fIlimit\fR, \fB\-\-memory=\fIlimit
-This is equivalent to specifying \fB\-\-memlimit\-compress=\fIlimit
+This is equivalent to specifying
+.BI \-\-memlimit\-compress= limit
 \fB\-\-memlimit\-decompress=\fIlimit\fR.
 .TP
 .B \-\-no\-adjust
@ -1088,7 +1112,12 @@ A custom filter chain allows specifying
 the compression settings in detail instead of relying on
 the settings associated to the presets.
 When a custom filter chain is specified,
-preset options (\fB\-0\fR ... \fB\-9\fR and \fB\-\-extreme\fR)
+preset options
+.RB ( \-0
+\&...\&
+.B \-9
+and
+.BR \-\-extreme )
 earlier on the command line are forgotten.
 If a preset option is specified
 after one or more custom filter chain options,
@ -1172,7 +1201,10 @@ The integer can be from
 .B 0
 to
 .BR 9 ,
-matching the command line options \fB\-0\fR ... \fB\-9\fR.
+matching the command line options
+.B \-0
+\&...\&
+.BR \-9 .
 The only supported modifier is currently
 .BR e ,
 which matches
@ -1253,7 +1285,7 @@ The literal coding makes an assumption that the highest
 .I lc
 bits of the previous uncompressed byte correlate
 with the next byte.
-E.g. in typical English text, an upper-case letter is
+For example, in typical English text, an upper-case letter is
 often followed by a lower-case letter, and a lower-case
 letter is usually followed by another lower-case letter.
 In the US-ASCII character set, the highest three bits are 010
@ -1268,7 +1300,7 @@ If you want maximum compression, test
 .BR lc=4 .
 Sometimes it helps a little, and
 sometimes it makes compression worse.
-If it makes it worse, test e.g.\&
+If it makes it worse, test
 .B lc=2
 too.
 .TP
@ -1294,10 +1326,10 @@ The default means four-byte alignment
 .RI (2^ pb =2^2=4),
 which is often a good choice when there's no better guess.
 .IP ""
-When the aligment is known, setting
+When the alignment is known, setting
 .I pb
 accordingly may reduce the file size a little.
-E.g. with text files having one-byte
+For example, with text files having one-byte
 alignment (US-ASCII, ISO-8859-*, UTF-8), setting
 .B pb=0
 can improve compression slightly.
@ -1325,7 +1357,7 @@ The default depends on the
 .IR preset :
 0 uses
 .BR hc3 ,
-1\-3
+1\(en3
 use
 .BR hc4 ,
 and the rest use
@ -1441,11 +1473,11 @@ The default is
 .B fast
 for
 .I presets
-0\-3 and
+0\(en3 and
 .B normal
 for
 .I presets
-4\-9.
+4\(en9.
 .IP ""
 Usually
 .B fast
@ -1464,7 +1496,7 @@ bytes is found, the algorithm stops
 looking for possibly better matches.
 .IP ""
 .I Nice
-can be 2\-273 bytes.
+can be 2\(en273 bytes.
 Higher values tend to give better compression ratio
 at the expense of speed.
 The default depends on the
@ -1482,7 +1514,7 @@ and
 .IP ""
 Reasonable
 .I depth
-for Hash Chains is 4\-100 and 16\-1000 for Binary Trees.
+for Hash Chains is 4\(en100 and 16\(en1000 for Binary Trees.
 Using very high values for
 .I depth
 can make the encoder extremely slow with some files.
@ -1523,7 +1555,7 @@ A BCJ filter converts relative addresses in
 the machine code to their absolute counterparts.
 This doesn't change the size of the data,
 but it increases redundancy,
-which can help LZMA2 to produce 0\-15\ % smaller
+which can help LZMA2 to produce 0\(en15\ % smaller
 .B .xz
 file.
 The BCJ filters are always reversible,
@ -1551,7 +1583,7 @@ the compression ratio:
 .RS
 .IP \(bu 3
 Some types of files containing executable code
-(e.g. object files, static libraries, and Linux kernel modules)
+(for example, object files, static libraries, and Linux kernel modules)
 have the addresses in the instructions filled with filler values.
 These BCJ filters will still do the address conversion,
 which will make the compression worse with these files.
@ -1625,12 +1657,12 @@ The Delta filter can be only used as a non-last filter
 in the filter chain.
 .IP ""
 Currently only simple byte-wise delta calculation is supported.
-It can be useful when compressing e.g. uncompressed bitmap images
+It can be useful when compressing, for example, uncompressed bitmap images
 or uncompressed PCM audio.
 However, special purpose algorithms may give significantly better
 results than Delta + LZMA2.
 This is true especially with audio,
-which compresses faster and better e.g. with
+which compresses faster and better, for example, with
 .BR flac (1).
 .IP ""
 Supported
@ -1642,7 +1674,7 @@ Specify the
 .I distance
 of the delta calculation in bytes.
 .I distance
-must be 1\-256.
+must be 1\(en256.
 The default is 1.
 .IP ""
 For example, with
@ -1701,7 +1733,7 @@ known and a couple of seconds have already passed since
 .B xz
 started processing the file.
 The time is shown in a less precise format which
-never has any colons, e.g. 2 min 30 s.
+never has any colons, for example, 2 min 30 s.
 .RE
 .IP ""
 When standard error is not a terminal,
@ -1714,7 +1746,7 @@ on a single line to standard error after compressing or
 decompressing the file.
 The speed and elapsed time are included only when
 the operation took at least a few seconds.
-If the operation didn't finish, e.g. due to user interruption,
+If the operation didn't finish, for example, due to user interruption,
 also the completion percentage is printed
 if the size of the input file is known.
 .TP
@ -1740,7 +1772,7 @@ See the section
 .B "ROBOT MODE"
 for details.
 .TP
-.BR \-\-info\-memory
+.B \-\-info\-memory
 Display, in human-readable format, how much physical memory (RAM)
 .B xz
 thinks the system has and the memory usage limits for compression
@ -1916,8 +1948,8 @@ Compressed size of the file
 .IP 5. 4
 Uncompressed size of the file
 .IP 6. 4
-Compression ratio, for example
-.BR 0.123.
+Compression ratio, for example,
+.BR 0.123 .
 If ratio is over 9.999, three dashes
 .RB ( \-\-\- )
 are displayed instead of the ratio.
@ -2168,9 +2200,9 @@ This is for passing options to
 when it is not possible to set the options directly on the
 .B xz
 command line.
-This is the case e.g. when
+This is the case when
 .B xz
-is run by a script or tool, e.g. GNU
+is run by a script or tool, for example, GNU
 .BR tar (1):
 .RS
 .RS
@ -2184,11 +2216,12 @@ XZ_OPT=\-2v tar caf foo.tar.xz foo
 .RE
 .IP ""
 Scripts may use
-.B XZ_OPT
-e.g. to set script-specific default compression options.
+.BR XZ_OPT ,
+for example, to set script-specific default compression options.
 It is still recommended to allow users to override
 .B XZ_OPT
-if that is reasonable, e.g. in
+if that is reasonable.
+For example, in
 .BR sh (1)
 scripts one may use something like this:
 .RS
@ -2210,7 +2243,7 @@ is practically a superset of
 .BR lzma ,
 .BR unlzma ,
 and
-.BR lzcat
+.B lzcat
 as found from LZMA Utils 4.32.x.
 In most cases, it is possible to replace
 LZMA Utils with XZ Utils without breaking existing scripts.
@ -2284,7 +2317,7 @@ The alternative is to mark that uncompressed size is unknown
 and use end-of-payload marker to indicate
 where the decompressor should stop.
 LZMA Utils uses this method when uncompressed size isn't known,
-which is the case for example in pipes.
+which is the case, for example, in pipes.
 .PP
 .B xz
 supports decompressing
@ -2480,7 +2513,7 @@ Create
 with the preset
 .B \-4e
 .RB ( "\-4 \-\-extreme" ),
-which is slower than e.g. the default
+which is slower than the default
 .BR \-6 ,
 but needs less memory for compression and decompression (48\ MiB
 and 5\ MiB, respectively):
@ -2590,7 +2623,7 @@ but if a limit has already been set, don't increase it:
 .PP
 .nf
 .ft CW
-NEWLIM=$((123 << 20))  # 123 MiB
+NEWLIM=$((123 << 20))\ \ # 123 MiB
 OLDLIM=$(xz \-\-robot \-\-info\-memory | cut \-f3)
 if [ $OLDLIM \-eq 0 \-o $OLDLIM \-gt $NEWLIM ]; then
    XZ_OPT="$XZ_OPT \-\-memlimit\-decompress=$NEWLIM"
@ -2634,10 +2667,10 @@ Preset;CompCPU
 .RE
 .PP
 If you know that a file requires
-somewhat big dictionary (e.g. 32 MiB) to compress well,
+somewhat big dictionary (for example, 32\ MiB) to compress well,
 but you want to compress it quicker than
 .B "xz \-8"
-would do, a preset with a low CompCPU value (e.g. 1)
+would do, a preset with a low CompCPU value (for example, 1)
 can be modified to use a bigger dictionary:
 .RS
 .PP
@ -2687,9 +2720,8 @@ the size of the uncompressed file is waste of memory,
 so the above command isn't useful for small files.
 .PP
 Sometimes the compression time doesn't matter,
-but the decompressor memory usage has to be kept low
-e.g. to make it possible to decompress the file on
-an embedded system.
+but the decompressor memory usage has to be kept low, for example,
+to make it possible to decompress the file on an embedded system.
 The following command uses
 .B \-6e
 .RB ( "\-6 \-\-extreme" )
@ -2720,7 +2752,7 @@ might help too, but usually
 and
 .I pb
 are more important.
-E.g. a source code archive contains mostly US-ASCII text,
+For example, a source code archive contains mostly US-ASCII text,
 so something like the following might give
 slightly (like 0.1\ %) smaller file than
 .B "xz \-6e"
@ -2737,7 +2769,7 @@ xz \-\-lzma2=preset=6e,pb=0,lc=4 source_code.tar
 .PP
 Using another filter together with LZMA2 can improve
 compression with certain file types.
-E.g. to compress a x86-32 or x86-64 shared library
+For example, to compress a x86-32 or x86-64 shared library
 using the x86 BCJ filter:
 .RS
 .PP
@ -2766,10 +2798,10 @@ which has a few more advanced filters than simple
 delta but uses Deflate for the actual compression.
 .PP
 The image has to be saved in uncompressed format,
-e.g. as uncompressed TIFF.
+for example, as uncompressed TIFF.
 The distance parameter of the Delta filter is set
 to match the number of bytes per pixel in the image.
-E.g. 24-bit RGB bitmap needs
+For example, 24-bit RGB bitmap needs
 .BR dist=3 ,
 and it is also good to pass
 .B pb=0
@ -2783,7 +2815,7 @@ xz \-\-delta=dist=3 \-\-lzma2=pb=0 foo.tiff
 .fi
 .RE
 .PP
-If multiple images have been put into a single archive (e.g.\&
+If multiple images have been put into a single archive (for example,
 .BR .tar ),
 the Delta filter will work on that too as long as all images
 have the same number of bytes per pixel.