Port NetBSD improvements:
- Add -l support for xz files - Add lzip support to gzip based on the example lzip decoder. Obtained from: NetBSD MFC after: 2 weeks Relnotes: yes
This commit is contained in:
parent
eef000151a
commit
b4e032be86
@ -1,4 +1,4 @@
|
||||
.\" $NetBSD: gzip.1,v 1.30 2017/10/22 17:36:49 abhinav Exp $
|
||||
.\" $NetBSD: gzip.1,v 1.31 2018/10/26 22:10:15 christos Exp $
|
||||
.\"
|
||||
.\" Copyright (c) 1997, 2003, 2004, 2008, 2009, 2015, 2017 Matthew R. Green
|
||||
.\" All rights reserved.
|
||||
@ -25,7 +25,7 @@
|
||||
.\" SUCH DAMAGE.
|
||||
.\"
|
||||
.\" $FreeBSD$
|
||||
.Dd November 21, 2017
|
||||
.Dd January 7, 2018
|
||||
.Dt GZIP 1
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -109,6 +109,7 @@ This version of
|
||||
is also capable of decompressing files compressed using
|
||||
.Xr compress 1 ,
|
||||
.Xr bzip2 1 ,
|
||||
.Ar lzip ,
|
||||
or
|
||||
.Xr xz 1 .
|
||||
.Sh OPTIONS
|
||||
@ -224,7 +225,7 @@ This implementation of
|
||||
was ported based on the
|
||||
.Nx
|
||||
.Nm
|
||||
version 20170803,
|
||||
version 20181111,
|
||||
and first appeared in
|
||||
.Fx 7.0 .
|
||||
.Sh AUTHORS
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* $NetBSD: gzip.c,v 1.113 2018/06/12 00:42:17 kamil Exp $ */
|
||||
/* $NetBSD: gzip.c,v 1.116 2018/10/27 11:39:12 skrll Exp $ */
|
||||
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause-NetBSD
|
||||
@ -83,6 +83,9 @@ enum filetype {
|
||||
#endif
|
||||
#ifndef NO_XZ_SUPPORT
|
||||
FT_XZ,
|
||||
#endif
|
||||
#ifndef NO_LZ_SUPPORT
|
||||
FT_LZ,
|
||||
#endif
|
||||
FT_LAST,
|
||||
FT_UNKNOWN
|
||||
@ -110,6 +113,11 @@ enum filetype {
|
||||
#define XZ_MAGIC "\3757zXZ"
|
||||
#endif
|
||||
|
||||
#ifndef NO_LZ_SUPPORT
|
||||
#define LZ_SUFFIX ".lz"
|
||||
#define LZ_MAGIC "LZIP"
|
||||
#endif
|
||||
|
||||
#define GZ_SUFFIX ".gz"
|
||||
|
||||
#define BUFLEN (64 * 1024)
|
||||
@ -154,6 +162,9 @@ static suffixes_t suffixes[] = {
|
||||
#endif
|
||||
#ifndef NO_XZ_SUPPORT
|
||||
SUFFIX(XZ_SUFFIX, ""),
|
||||
#endif
|
||||
#ifndef NO_LZ_SUPPORT
|
||||
SUFFIX(LZ_SUFFIX, ""),
|
||||
#endif
|
||||
SUFFIX(GZ_SUFFIX, ""), /* Overwritten by -S "" */
|
||||
#endif /* SMALL */
|
||||
@ -162,7 +173,7 @@ static suffixes_t suffixes[] = {
|
||||
#define NUM_SUFFIXES (nitems(suffixes))
|
||||
#define SUFFIX_MAXLEN 30
|
||||
|
||||
static const char gzip_version[] = "FreeBSD gzip 20171121";
|
||||
static const char gzip_version[] = "FreeBSD gzip 20190107";
|
||||
|
||||
#ifndef SMALL
|
||||
static const char gzip_copyright[] = \
|
||||
@ -246,6 +257,7 @@ static void display_license(void);
|
||||
static const suffixes_t *check_suffix(char *, int);
|
||||
static ssize_t read_retry(int, void *, size_t);
|
||||
static ssize_t write_retry(int, const void *, size_t);
|
||||
static void print_list_out(off_t, off_t, const char*);
|
||||
|
||||
#ifdef SMALL
|
||||
#define infile_set(f,t) infile_set(f)
|
||||
@ -289,6 +301,11 @@ static off_t unpack(int, int, char *, size_t, off_t *);
|
||||
|
||||
#ifndef NO_XZ_SUPPORT
|
||||
static off_t unxz(int, int, char *, size_t, off_t *);
|
||||
static off_t unxz_len(int);
|
||||
#endif
|
||||
|
||||
#ifndef NO_LZ_SUPPORT
|
||||
static off_t unlz(int, int, char *, size_t, off_t *);
|
||||
#endif
|
||||
|
||||
#ifdef SMALL
|
||||
@ -1158,6 +1175,11 @@ file_gettype(u_char *buf)
|
||||
if (memcmp(buf, XZ_MAGIC, 4) == 0) /* XXX: We only have 4 bytes */
|
||||
return FT_XZ;
|
||||
else
|
||||
#endif
|
||||
#ifndef NO_LZ_SUPPORT
|
||||
if (memcmp(buf, LZ_MAGIC, 4) == 0)
|
||||
return FT_LZ;
|
||||
else
|
||||
#endif
|
||||
return FT_UNKNOWN;
|
||||
}
|
||||
@ -1632,14 +1654,23 @@ file_uncompress(char *file, char *outfile, size_t outsize)
|
||||
#ifndef NO_XZ_SUPPORT
|
||||
case FT_XZ:
|
||||
if (lflag) {
|
||||
maybe_warnx("no -l with xz files");
|
||||
goto lose;
|
||||
size = unxz_len(fd);
|
||||
print_list_out(in_size, size, file);
|
||||
return -1;
|
||||
}
|
||||
|
||||
size = unxz(fd, zfd, NULL, 0, NULL);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#ifndef NO_LZ_SUPPORT
|
||||
case FT_LZ:
|
||||
if (lflag) {
|
||||
maybe_warnx("no -l with lzip files");
|
||||
goto lose;
|
||||
}
|
||||
size = unlz(fd, zfd, NULL, 0, NULL);
|
||||
break;
|
||||
#endif
|
||||
#ifndef SMALL
|
||||
case FT_UNKNOWN:
|
||||
if (lflag) {
|
||||
@ -1871,6 +1902,12 @@ handle_stdin(void)
|
||||
usize = unxz(STDIN_FILENO, STDOUT_FILENO,
|
||||
(char *)header1, sizeof header1, &gsize);
|
||||
break;
|
||||
#endif
|
||||
#ifndef NO_LZ_SUPPORT
|
||||
case FT_LZ:
|
||||
usize = unlz(STDIN_FILENO, STDOUT_FILENO,
|
||||
(char *)header1, sizeof header1, &gsize);
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -2197,6 +2234,12 @@ print_list(int fd, off_t out, const char *outfile, time_t ts)
|
||||
#else
|
||||
(void)&ts; /* XXX */
|
||||
#endif
|
||||
print_list_out(out, in, outfile);
|
||||
}
|
||||
|
||||
static void
|
||||
print_list_out(off_t out, off_t in, const char *outfile)
|
||||
{
|
||||
printf("%12llu %12llu ", (unsigned long long)out, (unsigned long long)in);
|
||||
print_ratio(in, out, stdout);
|
||||
printf(" %s\n", outfile);
|
||||
@ -2271,6 +2314,9 @@ display_version(void)
|
||||
#ifndef NO_XZ_SUPPORT
|
||||
#include "unxz.c"
|
||||
#endif
|
||||
#ifndef NO_LZ_SUPPORT
|
||||
#include "unlz.c"
|
||||
#endif
|
||||
|
||||
static ssize_t
|
||||
read_retry(int fd, void *buf, size_t sz)
|
||||
|
646
usr.bin/gzip/unlz.c
Normal file
646
usr.bin/gzip/unlz.c
Normal file
@ -0,0 +1,646 @@
|
||||
/* $NetBSD: unlz.c,v 1.6 2018/11/11 01:42:36 christos Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2018 The NetBSD Foundation, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to The NetBSD Foundation
|
||||
* by Christos Zoulas.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
/* Lzd - Educational decompressor for the lzip format
|
||||
Copyright (C) 2013-2018 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software. Redistribution and use in source and
|
||||
binary forms, with or without modification, are permitted provided
|
||||
that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*/
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#define LZ_STATES 12
|
||||
|
||||
#define LITERAL_CONTEXT_BITS 3
|
||||
#define POS_STATE_BITS 2
|
||||
#define POS_STATES (1 << POS_STATE_BITS)
|
||||
#define POS_STATE_MASK (POS_STATES - 1)
|
||||
|
||||
#define STATES 4
|
||||
#define DIS_SLOT_BITS 6
|
||||
|
||||
#define DIS_MODEL_START 4
|
||||
#define DIS_MODEL_END 14
|
||||
|
||||
#define MODELED_DISTANCES (1 << (DIS_MODEL_END / 2))
|
||||
#define DIS_ALIGN_BITS 4
|
||||
#define DIS_ALIGN_SIZE (1 << DIS_ALIGN_BITS)
|
||||
|
||||
#define LOW_BITS 3
|
||||
#define MID_BITS 3
|
||||
#define HIGH_BITS 8
|
||||
|
||||
#define LOW_SYMBOLS (1 << LOW_BITS)
|
||||
#define MID_SYMBOLS (1 << MID_BITS)
|
||||
#define HIGH_SYMBOLS (1 << HIGH_BITS)
|
||||
|
||||
#define MAX_SYMBOLS (LOW_SYMBOLS + MID_SYMBOLS + HIGH_SYMBOLS)
|
||||
|
||||
#define MIN_MATCH_LEN 2
|
||||
|
||||
#define BIT_MODEL_MOVE_BITS 5
|
||||
#define BIT_MODEL_TOTAL_BITS 11
|
||||
#define BIT_MODEL_TOTAL (1 << BIT_MODEL_TOTAL_BITS)
|
||||
#define BIT_MODEL_INIT (BIT_MODEL_TOTAL / 2)
|
||||
|
||||
static const int lz_st_next[] = {
|
||||
0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5,
|
||||
};
|
||||
|
||||
static bool
|
||||
lz_st_is_char(int st) {
|
||||
return st < 7;
|
||||
}
|
||||
|
||||
static int
|
||||
lz_st_get_char(int st) {
|
||||
return lz_st_next[st];
|
||||
}
|
||||
|
||||
static int
|
||||
lz_st_get_match(int st) {
|
||||
return st < 7 ? 7 : 10;
|
||||
}
|
||||
|
||||
static int
|
||||
lz_st_get_rep(int st) {
|
||||
return st < 7 ? 8 : 11;
|
||||
}
|
||||
|
||||
static int
|
||||
lz_st_get_short_rep(int st) {
|
||||
return st < 7 ? 9 : 11;
|
||||
}
|
||||
|
||||
struct lz_len_model {
|
||||
int choice1;
|
||||
int choice2;
|
||||
int bm_low[POS_STATES][LOW_SYMBOLS];
|
||||
int bm_mid[POS_STATES][MID_SYMBOLS];
|
||||
int bm_high[HIGH_SYMBOLS];
|
||||
};
|
||||
|
||||
static uint32_t lz_crc[256];
|
||||
|
||||
static void
|
||||
lz_crc_init(void)
|
||||
{
|
||||
for (unsigned i = 0; i < nitems(lz_crc); i++) {
|
||||
unsigned c = i;
|
||||
for (unsigned j = 0; j < 8; j++) {
|
||||
if (c & 1)
|
||||
c = 0xEDB88320U ^ (c >> 1);
|
||||
else
|
||||
c >>= 1;
|
||||
}
|
||||
lz_crc[i] = c;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
lz_crc_update(uint32_t *crc, const uint8_t *buf, size_t len)
|
||||
{
|
||||
for (size_t i = 0; i < len; i++)
|
||||
*crc = lz_crc[(*crc ^ buf[i]) & 0xFF] ^ (*crc >> 8);
|
||||
}
|
||||
|
||||
struct lz_range_decoder {
|
||||
FILE *fp;
|
||||
uint32_t code;
|
||||
uint32_t range;
|
||||
};
|
||||
|
||||
static int
|
||||
lz_rd_create(struct lz_range_decoder *rd, FILE *fp)
|
||||
{
|
||||
rd->fp = fp;
|
||||
rd->code = 0;
|
||||
rd->range = ~0;
|
||||
for (int i = 0; i < 5; i++)
|
||||
rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
|
||||
return ferror(rd->fp) ? -1 : 0;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
lz_rd_decode(struct lz_range_decoder *rd, int num_bits)
|
||||
{
|
||||
unsigned symbol = 0;
|
||||
|
||||
for (int i = num_bits; i > 0; i--) {
|
||||
rd->range >>= 1;
|
||||
symbol <<= 1;
|
||||
if (rd->code >= rd->range) {
|
||||
rd->code -= rd->range;
|
||||
symbol |= 1;
|
||||
}
|
||||
if (rd->range <= 0x00FFFFFFU) {
|
||||
rd->range <<= 8;
|
||||
rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
|
||||
}
|
||||
}
|
||||
|
||||
return symbol;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
lz_rd_decode_bit(struct lz_range_decoder *rd, int *bm)
|
||||
{
|
||||
unsigned symbol;
|
||||
const uint32_t bound = (rd->range >> BIT_MODEL_TOTAL_BITS) * *bm;
|
||||
|
||||
if(rd->code < bound) {
|
||||
rd->range = bound;
|
||||
*bm += (BIT_MODEL_TOTAL - *bm) >> BIT_MODEL_MOVE_BITS;
|
||||
symbol = 0;
|
||||
}
|
||||
else {
|
||||
rd->range -= bound;
|
||||
rd->code -= bound;
|
||||
*bm -= *bm >> BIT_MODEL_MOVE_BITS;
|
||||
symbol = 1;
|
||||
}
|
||||
|
||||
if (rd->range <= 0x00FFFFFFU) {
|
||||
rd->range <<= 8;
|
||||
rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
|
||||
}
|
||||
return symbol;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
lz_rd_decode_tree(struct lz_range_decoder *rd, int *bm, int num_bits)
|
||||
{
|
||||
unsigned symbol = 1;
|
||||
|
||||
for (int i = 0; i < num_bits; i++)
|
||||
symbol = (symbol << 1) | lz_rd_decode_bit(rd, &bm[symbol]);
|
||||
|
||||
return symbol - (1 << num_bits);
|
||||
}
|
||||
|
||||
static unsigned
|
||||
lz_rd_decode_tree_reversed(struct lz_range_decoder *rd, int *bm, int num_bits)
|
||||
{
|
||||
unsigned symbol = lz_rd_decode_tree(rd, bm, num_bits);
|
||||
unsigned reversed_symbol = 0;
|
||||
|
||||
for (int i = 0; i < num_bits; i++) {
|
||||
reversed_symbol = (reversed_symbol << 1) | (symbol & 1);
|
||||
symbol >>= 1;
|
||||
}
|
||||
|
||||
return reversed_symbol;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
lz_rd_decode_matched(struct lz_range_decoder *rd, int *bm, int match_byte)
|
||||
{
|
||||
unsigned symbol = 1;
|
||||
|
||||
for (int i = 7; i >= 0; i--) {
|
||||
const unsigned match_bit = (match_byte >> i) & 1;
|
||||
const unsigned bit = lz_rd_decode_bit(rd,
|
||||
&bm[symbol + (match_bit << 8) + 0x100]);
|
||||
symbol = (symbol << 1) | bit;
|
||||
if (match_bit != bit) {
|
||||
while (symbol < 0x100) {
|
||||
symbol = (symbol << 1) |
|
||||
lz_rd_decode_bit(rd, &bm[symbol]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return symbol & 0xFF;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
lz_rd_decode_len(struct lz_range_decoder *rd, struct lz_len_model *lm,
|
||||
int pos_state)
|
||||
{
|
||||
if (lz_rd_decode_bit(rd, &lm->choice1) == 0)
|
||||
return lz_rd_decode_tree(rd, lm->bm_low[pos_state], LOW_BITS);
|
||||
|
||||
if (lz_rd_decode_bit(rd, &lm->choice2) == 0) {
|
||||
return LOW_SYMBOLS +
|
||||
lz_rd_decode_tree(rd, lm->bm_mid[pos_state], MID_BITS);
|
||||
}
|
||||
|
||||
return LOW_SYMBOLS + MID_SYMBOLS +
|
||||
lz_rd_decode_tree(rd, lm->bm_high, HIGH_BITS);
|
||||
}
|
||||
|
||||
struct lz_decoder {
|
||||
FILE *fin, *fout;
|
||||
off_t pos, ppos, spos, dict_size;
|
||||
bool wrapped;
|
||||
uint32_t crc;
|
||||
uint8_t *obuf;
|
||||
struct lz_range_decoder rdec;
|
||||
};
|
||||
|
||||
static int
|
||||
lz_flush(struct lz_decoder *lz)
|
||||
{
|
||||
off_t offs = lz->pos - lz->spos;
|
||||
if (offs <= 0)
|
||||
return -1;
|
||||
|
||||
size_t size = (size_t)offs;
|
||||
lz_crc_update(&lz->crc, lz->obuf + lz->spos, size);
|
||||
if (fwrite(lz->obuf + lz->spos, 1, size, lz->fout) != size)
|
||||
return -1;
|
||||
|
||||
lz->wrapped = lz->pos >= lz->dict_size;
|
||||
if (lz->wrapped) {
|
||||
lz->ppos += lz->pos;
|
||||
lz->pos = 0;
|
||||
}
|
||||
lz->spos = lz->pos;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
lz_destroy(struct lz_decoder *lz)
|
||||
{
|
||||
if (lz->fin)
|
||||
fclose(lz->fin);
|
||||
if (lz->fout)
|
||||
fclose(lz->fout);
|
||||
free(lz->obuf);
|
||||
}
|
||||
|
||||
static int
|
||||
lz_create(struct lz_decoder *lz, int fin, int fdout, int dict_size)
|
||||
{
|
||||
memset(lz, 0, sizeof(*lz));
|
||||
|
||||
lz->fin = fdopen(dup(fin), "r");
|
||||
if (lz->fin == NULL)
|
||||
goto out;
|
||||
|
||||
lz->fout = fdopen(dup(fdout), "w");
|
||||
if (lz->fout == NULL)
|
||||
goto out;
|
||||
|
||||
lz->pos = lz->ppos = lz->spos = 0;
|
||||
lz->crc = ~0;
|
||||
lz->dict_size = dict_size;
|
||||
lz->wrapped = false;
|
||||
|
||||
lz->obuf = malloc(dict_size);
|
||||
if (lz->obuf == NULL)
|
||||
goto out;
|
||||
|
||||
if (lz_rd_create(&lz->rdec, lz->fin) == -1)
|
||||
goto out;
|
||||
return 0;
|
||||
out:
|
||||
lz_destroy(lz);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static uint8_t
|
||||
lz_peek(const struct lz_decoder *lz, unsigned ahead)
|
||||
{
|
||||
off_t diff = lz->pos - ahead - 1;
|
||||
|
||||
if (diff >= 0)
|
||||
return lz->obuf[diff];
|
||||
|
||||
if (lz->wrapped)
|
||||
return lz->obuf[lz->dict_size + diff];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
lz_put(struct lz_decoder *lz, uint8_t b)
|
||||
{
|
||||
lz->obuf[lz->pos++] = b;
|
||||
if (lz->dict_size == lz->pos)
|
||||
lz_flush(lz);
|
||||
}
|
||||
|
||||
static off_t
|
||||
lz_get_data_position(const struct lz_decoder *lz)
|
||||
{
|
||||
return lz->ppos + lz->pos;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
lz_get_crc(const struct lz_decoder *lz)
|
||||
{
|
||||
return lz->crc ^ 0xffffffffU;
|
||||
}
|
||||
|
||||
static void
|
||||
lz_bm_init(int *a, size_t l)
|
||||
{
|
||||
for (size_t i = 0; i < l; i++)
|
||||
a[i] = BIT_MODEL_INIT;
|
||||
}
|
||||
|
||||
#define LZ_BM_INIT(a) lz_bm_init(a, nitems(a))
|
||||
#define LZ_BM_INIT2(a) do { \
|
||||
size_t l = nitems(a[0]); \
|
||||
for (size_t i = 0; i < nitems(a); i++) \
|
||||
lz_bm_init(a[i], l); \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#define LZ_MODEL_INIT(a) do { \
|
||||
a.choice1 = BIT_MODEL_INIT; \
|
||||
a.choice2 = BIT_MODEL_INIT; \
|
||||
LZ_BM_INIT2(a.bm_low); \
|
||||
LZ_BM_INIT2(a.bm_mid); \
|
||||
LZ_BM_INIT(a.bm_high); \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
static bool
|
||||
lz_decode_member(struct lz_decoder *lz)
|
||||
{
|
||||
int bm_literal[1 << LITERAL_CONTEXT_BITS][0x300];
|
||||
int bm_match[LZ_STATES][POS_STATES];
|
||||
int bm_rep[4][LZ_STATES];
|
||||
int bm_len[LZ_STATES][POS_STATES];
|
||||
int bm_dis_slot[LZ_STATES][1 << DIS_SLOT_BITS];
|
||||
int bm_dis[MODELED_DISTANCES - DIS_MODEL_END + 1];
|
||||
int bm_align[DIS_ALIGN_SIZE];
|
||||
|
||||
LZ_BM_INIT2(bm_literal);
|
||||
LZ_BM_INIT2(bm_match);
|
||||
LZ_BM_INIT2(bm_rep);
|
||||
LZ_BM_INIT2(bm_len);
|
||||
LZ_BM_INIT2(bm_dis_slot);
|
||||
LZ_BM_INIT(bm_dis);
|
||||
LZ_BM_INIT(bm_align);
|
||||
|
||||
struct lz_len_model match_len_model;
|
||||
struct lz_len_model rep_len_model;
|
||||
|
||||
LZ_MODEL_INIT(match_len_model);
|
||||
LZ_MODEL_INIT(rep_len_model);
|
||||
|
||||
struct lz_range_decoder *rd = &lz->rdec;
|
||||
unsigned rep[4] = { 0 };
|
||||
|
||||
|
||||
int state = 0;
|
||||
|
||||
while (!feof(lz->fin) && !ferror(lz->fin)) {
|
||||
const int pos_state = lz_get_data_position(lz) & POS_STATE_MASK;
|
||||
// bit 1
|
||||
if (lz_rd_decode_bit(rd, &bm_match[state][pos_state]) == 0) {
|
||||
const uint8_t prev_byte = lz_peek(lz, 0);
|
||||
const int literal_state =
|
||||
prev_byte >> (8 - LITERAL_CONTEXT_BITS);
|
||||
int *bm = bm_literal[literal_state];
|
||||
if (lz_st_is_char(state))
|
||||
lz_put(lz, lz_rd_decode_tree(rd, bm, 8));
|
||||
else {
|
||||
int peek = lz_peek(lz, rep[0]);
|
||||
lz_put(lz, lz_rd_decode_matched(rd, bm, peek));
|
||||
}
|
||||
state = lz_st_get_char(state);
|
||||
continue;
|
||||
}
|
||||
int len;
|
||||
// bit 2
|
||||
if (lz_rd_decode_bit(rd, &bm_rep[0][state]) != 0) {
|
||||
// bit 3
|
||||
if (lz_rd_decode_bit(rd, &bm_rep[1][state]) == 0) {
|
||||
// bit 4
|
||||
if (lz_rd_decode_bit(rd,
|
||||
&bm_len[state][pos_state]) == 0)
|
||||
{
|
||||
state = lz_st_get_short_rep(state);
|
||||
lz_put(lz, lz_peek(lz, rep[0]));
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
unsigned distance;
|
||||
// bit 4
|
||||
if (lz_rd_decode_bit(rd, &bm_rep[2][state])
|
||||
== 0)
|
||||
distance = rep[1];
|
||||
else {
|
||||
// bit 5
|
||||
if (lz_rd_decode_bit(rd,
|
||||
&bm_rep[3][state]) == 0)
|
||||
distance = rep[2];
|
||||
else {
|
||||
distance = rep[3];
|
||||
rep[3] = rep[2];
|
||||
}
|
||||
rep[2] = rep[1];
|
||||
}
|
||||
rep[1] = rep[0];
|
||||
rep[0] = distance;
|
||||
}
|
||||
state = lz_st_get_rep(state);
|
||||
len = MIN_MATCH_LEN +
|
||||
lz_rd_decode_len(rd, &rep_len_model, pos_state);
|
||||
} else {
|
||||
rep[3] = rep[2]; rep[2] = rep[1]; rep[1] = rep[0];
|
||||
len = MIN_MATCH_LEN +
|
||||
lz_rd_decode_len(rd, &match_len_model, pos_state);
|
||||
const int len_state =
|
||||
MIN(len - MIN_MATCH_LEN, STATES - 1);
|
||||
rep[0] = lz_rd_decode_tree(rd, bm_dis_slot[len_state],
|
||||
DIS_SLOT_BITS);
|
||||
if (rep[0] >= DIS_MODEL_START) {
|
||||
const unsigned dis_slot = rep[0];
|
||||
const int direct_bits = (dis_slot >> 1) - 1;
|
||||
rep[0] = (2 | (dis_slot & 1)) << direct_bits;
|
||||
if (dis_slot < DIS_MODEL_END)
|
||||
rep[0] += lz_rd_decode_tree_reversed(rd,
|
||||
&bm_dis[rep[0] - dis_slot],
|
||||
direct_bits);
|
||||
else {
|
||||
rep[0] += lz_rd_decode(rd, direct_bits
|
||||
- DIS_ALIGN_BITS) << DIS_ALIGN_BITS;
|
||||
rep[0] += lz_rd_decode_tree_reversed(rd,
|
||||
bm_align, DIS_ALIGN_BITS);
|
||||
if (rep[0] == 0xFFFFFFFFU) {
|
||||
lz_flush(lz);
|
||||
return len == MIN_MATCH_LEN;
|
||||
}
|
||||
}
|
||||
}
|
||||
state = lz_st_get_match(state);
|
||||
if (rep[0] >= lz->dict_size ||
|
||||
(rep[0] >= lz->pos && !lz->wrapped)) {
|
||||
lz_flush(lz);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < len; i++)
|
||||
lz_put(lz, lz_peek(lz, rep[0]));
|
||||
}
|
||||
lz_flush(lz);
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* 0-3 CRC32 of the uncompressed data
|
||||
* 4-11 size of the uncompressed data
|
||||
* 12-19 member size including header and trailer
|
||||
*/
|
||||
#define TRAILER_SIZE 20
|
||||
|
||||
|
||||
static off_t
|
||||
lz_decode(int fin, int fdout, unsigned dict_size, off_t *insize)
|
||||
{
|
||||
struct lz_decoder lz;
|
||||
off_t rv = -1;
|
||||
|
||||
if (lz_create(&lz, fin, fdout, dict_size) == -1)
|
||||
return -1;
|
||||
|
||||
if (!lz_decode_member(&lz))
|
||||
goto out;
|
||||
|
||||
uint8_t trailer[TRAILER_SIZE];
|
||||
|
||||
for(size_t i = 0; i < nitems(trailer); i++)
|
||||
trailer[i] = (uint8_t)getc(lz.fin);
|
||||
|
||||
unsigned crc = 0;
|
||||
for (int i = 3; i >= 0; --i) {
|
||||
crc <<= 8;
|
||||
crc += trailer[i];
|
||||
}
|
||||
|
||||
int64_t data_size = 0;
|
||||
for (int i = 11; i >= 4; --i) {
|
||||
data_size <<= 8;
|
||||
data_size += trailer[i];
|
||||
}
|
||||
|
||||
if (crc != lz_get_crc(&lz) || data_size != lz_get_data_position(&lz))
|
||||
goto out;
|
||||
|
||||
rv = 0;
|
||||
for (int i = 19; i >= 12; --i) {
|
||||
rv <<= 8;
|
||||
rv += trailer[i];
|
||||
}
|
||||
if (insize)
|
||||
*insize = rv;
|
||||
#if 0
|
||||
/* Does not work with pipes */
|
||||
rv = ftello(lz.fout);
|
||||
#else
|
||||
rv = data_size;
|
||||
#endif
|
||||
out:
|
||||
lz_destroy(&lz);
|
||||
return rv;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* 0-3 magic
|
||||
* 4 version
|
||||
* 5 coded dict_size
|
||||
*/
|
||||
#define HDR_SIZE 6
|
||||
#define MIN_DICTIONARY_SIZE (1 << 12)
|
||||
#define MAX_DICTIONARY_SIZE (1 << 29)
|
||||
|
||||
static const char hdrmagic[] = { 'L', 'Z', 'I', 'P', 1 };
|
||||
|
||||
static unsigned
|
||||
lz_get_dict_size(unsigned char c)
|
||||
{
|
||||
unsigned dict_size = 1 << (c & 0x1f);
|
||||
dict_size -= (dict_size >> 2) * ( (c >> 5) & 0x7);
|
||||
if (dict_size < MIN_DICTIONARY_SIZE || dict_size > MAX_DICTIONARY_SIZE)
|
||||
return 0;
|
||||
return dict_size;
|
||||
}
|
||||
|
||||
static off_t
|
||||
unlz(int fin, int fout, char *pre, size_t prelen, off_t *bytes_in)
|
||||
{
|
||||
if (lz_crc[0] == 0)
|
||||
lz_crc_init();
|
||||
|
||||
char header[HDR_SIZE];
|
||||
|
||||
if (prelen > sizeof(header))
|
||||
return -1;
|
||||
if (pre && prelen)
|
||||
memcpy(header, pre, prelen);
|
||||
|
||||
ssize_t nr = read(fin, header + prelen, sizeof(header) - prelen);
|
||||
switch (nr) {
|
||||
case -1:
|
||||
return -1;
|
||||
case 0:
|
||||
return prelen ? -1 : 0;
|
||||
default:
|
||||
if ((size_t)nr != sizeof(header) - prelen)
|
||||
return -1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (memcmp(header, hdrmagic, sizeof(hdrmagic)) != 0)
|
||||
return -1;
|
||||
|
||||
unsigned dict_size = lz_get_dict_size(header[5]);
|
||||
if (dict_size == 0)
|
||||
return -1;
|
||||
|
||||
return lz_decode(fin, fout, dict_size, bytes_in);
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
/* $NetBSD: unxz.c,v 1.7 2017/08/04 07:27:08 mrg Exp $ */
|
||||
/* $NetBSD: unxz.c,v 1.8 2018/10/06 16:36:45 martin Exp $ */
|
||||
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause-NetBSD
|
||||
@ -156,3 +156,322 @@ unxz(int i, int o, char *pre, size_t prelen, off_t *bytes_in)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
/*
|
||||
* Copied various bits and pieces from xz support code or brute force
|
||||
* replacements.
|
||||
*/
|
||||
|
||||
#define my_min(A,B) ((A)<(B)?(A):(B))
|
||||
|
||||
// Some systems have suboptimal BUFSIZ. Use a bit bigger value on them.
|
||||
// We also need that IO_BUFFER_SIZE is a multiple of 8 (sizeof(uint64_t))
|
||||
#if BUFSIZ <= 1024
|
||||
# define IO_BUFFER_SIZE 8192
|
||||
#else
|
||||
# define IO_BUFFER_SIZE (BUFSIZ & ~7U)
|
||||
#endif
|
||||
|
||||
/// is_sparse() accesses the buffer as uint64_t for maximum speed.
|
||||
/// Use an union to make sure that the buffer is properly aligned.
|
||||
typedef union {
|
||||
uint8_t u8[IO_BUFFER_SIZE];
|
||||
uint32_t u32[IO_BUFFER_SIZE / sizeof(uint32_t)];
|
||||
uint64_t u64[IO_BUFFER_SIZE / sizeof(uint64_t)];
|
||||
} io_buf;
|
||||
|
||||
|
||||
static bool
|
||||
io_pread(int fd, io_buf *buf, size_t size, off_t pos)
|
||||
{
|
||||
// Using lseek() and read() is more portable than pread() and
|
||||
// for us it is as good as real pread().
|
||||
if (lseek(fd, pos, SEEK_SET) != pos) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const size_t amount = read(fd, buf, size);
|
||||
if (amount == SIZE_MAX)
|
||||
return true;
|
||||
|
||||
if (amount != size) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Most of the following is copied (mostly verbatim) from the xz
|
||||
* distribution, from file src/xz/list.c
|
||||
*/
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
/// \file list.c
|
||||
/// \brief Listing information about .xz files
|
||||
//
|
||||
// Author: Lasse Collin
|
||||
//
|
||||
// This file has been put into the public domain.
|
||||
// You can do whatever you want with this file.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
/// Information about a .xz file
|
||||
typedef struct {
|
||||
/// Combined Index of all Streams in the file
|
||||
lzma_index *idx;
|
||||
|
||||
/// Total amount of Stream Padding
|
||||
uint64_t stream_padding;
|
||||
|
||||
/// Highest memory usage so far
|
||||
uint64_t memusage_max;
|
||||
|
||||
/// True if all Blocks so far have Compressed Size and
|
||||
/// Uncompressed Size fields
|
||||
bool all_have_sizes;
|
||||
|
||||
/// Oldest XZ Utils version that will decompress the file
|
||||
uint32_t min_version;
|
||||
|
||||
} xz_file_info;
|
||||
|
||||
#define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 }
|
||||
|
||||
|
||||
/// \brief Parse the Index(es) from the given .xz file
|
||||
///
|
||||
/// \param xfi Pointer to structure where the decoded information
|
||||
/// is stored.
|
||||
/// \param pair Input file
|
||||
///
|
||||
/// \return On success, false is returned. On error, true is returned.
|
||||
///
|
||||
// TODO: This function is pretty big. liblzma should have a function that
|
||||
// takes a callback function to parse the Index(es) from a .xz file to make
|
||||
// it easy for applications.
|
||||
static bool
|
||||
parse_indexes(xz_file_info *xfi, int src_fd)
|
||||
{
|
||||
struct stat st;
|
||||
|
||||
fstat(src_fd, &st);
|
||||
if (st.st_size <= 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) {
|
||||
return true;
|
||||
}
|
||||
|
||||
io_buf buf;
|
||||
lzma_stream_flags header_flags;
|
||||
lzma_stream_flags footer_flags;
|
||||
lzma_ret ret;
|
||||
|
||||
// lzma_stream for the Index decoder
|
||||
lzma_stream strm = LZMA_STREAM_INIT;
|
||||
|
||||
// All Indexes decoded so far
|
||||
lzma_index *combined_index = NULL;
|
||||
|
||||
// The Index currently being decoded
|
||||
lzma_index *this_index = NULL;
|
||||
|
||||
// Current position in the file. We parse the file backwards so
|
||||
// initialize it to point to the end of the file.
|
||||
off_t pos = st.st_size;
|
||||
|
||||
// Each loop iteration decodes one Index.
|
||||
do {
|
||||
// Check that there is enough data left to contain at least
|
||||
// the Stream Header and Stream Footer. This check cannot
|
||||
// fail in the first pass of this loop.
|
||||
if (pos < 2 * LZMA_STREAM_HEADER_SIZE) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
pos -= LZMA_STREAM_HEADER_SIZE;
|
||||
lzma_vli stream_padding = 0;
|
||||
|
||||
// Locate the Stream Footer. There may be Stream Padding which
|
||||
// we must skip when reading backwards.
|
||||
while (true) {
|
||||
if (pos < LZMA_STREAM_HEADER_SIZE) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (io_pread(src_fd, &buf,
|
||||
LZMA_STREAM_HEADER_SIZE, pos))
|
||||
goto error;
|
||||
|
||||
// Stream Padding is always a multiple of four bytes.
|
||||
int i = 2;
|
||||
if (buf.u32[i] != 0)
|
||||
break;
|
||||
|
||||
// To avoid calling io_pread() for every four bytes
|
||||
// of Stream Padding, take advantage that we read
|
||||
// 12 bytes (LZMA_STREAM_HEADER_SIZE) already and
|
||||
// check them too before calling io_pread() again.
|
||||
do {
|
||||
stream_padding += 4;
|
||||
pos -= 4;
|
||||
--i;
|
||||
} while (i >= 0 && buf.u32[i] == 0);
|
||||
}
|
||||
|
||||
// Decode the Stream Footer.
|
||||
ret = lzma_stream_footer_decode(&footer_flags, buf.u8);
|
||||
if (ret != LZMA_OK) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
// Check that the Stream Footer doesn't specify something
|
||||
// that we don't support. This can only happen if the xz
|
||||
// version is older than liblzma and liblzma supports
|
||||
// something new.
|
||||
//
|
||||
// It is enough to check Stream Footer. Stream Header must
|
||||
// match when it is compared against Stream Footer with
|
||||
// lzma_stream_flags_compare().
|
||||
if (footer_flags.version != 0) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
// Check that the size of the Index field looks sane.
|
||||
lzma_vli index_size = footer_flags.backward_size;
|
||||
if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
// Set pos to the beginning of the Index.
|
||||
pos -= index_size;
|
||||
|
||||
// Decode the Index.
|
||||
ret = lzma_index_decoder(&strm, &this_index, UINT64_MAX);
|
||||
if (ret != LZMA_OK) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
do {
|
||||
// Don't give the decoder more input than the
|
||||
// Index size.
|
||||
strm.avail_in = my_min(IO_BUFFER_SIZE, index_size);
|
||||
if (io_pread(src_fd, &buf, strm.avail_in, pos))
|
||||
goto error;
|
||||
|
||||
pos += strm.avail_in;
|
||||
index_size -= strm.avail_in;
|
||||
|
||||
strm.next_in = buf.u8;
|
||||
ret = lzma_code(&strm, LZMA_RUN);
|
||||
|
||||
} while (ret == LZMA_OK);
|
||||
|
||||
// If the decoding seems to be successful, check also that
|
||||
// the Index decoder consumed as much input as indicated
|
||||
// by the Backward Size field.
|
||||
if (ret == LZMA_STREAM_END)
|
||||
if (index_size != 0 || strm.avail_in != 0)
|
||||
ret = LZMA_DATA_ERROR;
|
||||
|
||||
if (ret != LZMA_STREAM_END) {
|
||||
// LZMA_BUFFER_ERROR means that the Index decoder
|
||||
// would have liked more input than what the Index
|
||||
// size should be according to Stream Footer.
|
||||
// The message for LZMA_DATA_ERROR makes more
|
||||
// sense in that case.
|
||||
if (ret == LZMA_BUF_ERROR)
|
||||
ret = LZMA_DATA_ERROR;
|
||||
|
||||
goto error;
|
||||
}
|
||||
|
||||
// Decode the Stream Header and check that its Stream Flags
|
||||
// match the Stream Footer.
|
||||
pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE;
|
||||
if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
pos -= lzma_index_total_size(this_index);
|
||||
if (io_pread(src_fd, &buf, LZMA_STREAM_HEADER_SIZE, pos))
|
||||
goto error;
|
||||
|
||||
ret = lzma_stream_header_decode(&header_flags, buf.u8);
|
||||
if (ret != LZMA_OK) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
ret = lzma_stream_flags_compare(&header_flags, &footer_flags);
|
||||
if (ret != LZMA_OK) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
// Store the decoded Stream Flags into this_index. This is
|
||||
// needed so that we can print which Check is used in each
|
||||
// Stream.
|
||||
ret = lzma_index_stream_flags(this_index, &footer_flags);
|
||||
if (ret != LZMA_OK)
|
||||
goto error;
|
||||
|
||||
// Store also the size of the Stream Padding field. It is
|
||||
// needed to show the offsets of the Streams correctly.
|
||||
ret = lzma_index_stream_padding(this_index, stream_padding);
|
||||
if (ret != LZMA_OK)
|
||||
goto error;
|
||||
|
||||
if (combined_index != NULL) {
|
||||
// Append the earlier decoded Indexes
|
||||
// after this_index.
|
||||
ret = lzma_index_cat(
|
||||
this_index, combined_index, NULL);
|
||||
if (ret != LZMA_OK) {
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
combined_index = this_index;
|
||||
this_index = NULL;
|
||||
|
||||
xfi->stream_padding += stream_padding;
|
||||
|
||||
} while (pos > 0);
|
||||
|
||||
lzma_end(&strm);
|
||||
|
||||
// All OK. Make combined_index available to the caller.
|
||||
xfi->idx = combined_index;
|
||||
return false;
|
||||
|
||||
error:
|
||||
// Something went wrong, free the allocated memory.
|
||||
lzma_end(&strm);
|
||||
lzma_index_end(combined_index, NULL);
|
||||
lzma_index_end(this_index, NULL);
|
||||
return true;
|
||||
}
|
||||
|
||||
/***************** end of copy form list.c *************************/
|
||||
|
||||
/*
|
||||
* Small wrapper to extract total length of a file
|
||||
*/
|
||||
off_t
|
||||
unxz_len(int fd)
|
||||
{
|
||||
xz_file_info xfi = XZ_FILE_INFO_INIT;
|
||||
if (!parse_indexes(&xfi, fd)) {
|
||||
off_t res = lzma_index_uncompressed_size(xfi.idx);
|
||||
lzma_index_end(xfi.idx, NULL);
|
||||
return res;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user