Port NetBSD improvements:

- Add -l support for xz files
 - Add lzip support to gzip based on the example lzip decoder.

Obtained from:	NetBSD
MFC after:	2 weeks
Relnotes:	yes
This commit is contained in:
delphij 2019-01-07 08:27:11 +00:00
parent eef000151a
commit b4e032be86
4 changed files with 1021 additions and 9 deletions

View File

@ -1,4 +1,4 @@
.\" $NetBSD: gzip.1,v 1.30 2017/10/22 17:36:49 abhinav Exp $
.\" $NetBSD: gzip.1,v 1.31 2018/10/26 22:10:15 christos Exp $
.\"
.\" Copyright (c) 1997, 2003, 2004, 2008, 2009, 2015, 2017 Matthew R. Green
.\" All rights reserved.
@ -25,7 +25,7 @@
.\" SUCH DAMAGE.
.\"
.\" $FreeBSD$
.Dd November 21, 2017
.Dd January 7, 2018
.Dt GZIP 1
.Os
.Sh NAME
@ -109,6 +109,7 @@ This version of
is also capable of decompressing files compressed using
.Xr compress 1 ,
.Xr bzip2 1 ,
.Ar lzip ,
or
.Xr xz 1 .
.Sh OPTIONS
@ -224,7 +225,7 @@ This implementation of
was ported based on the
.Nx
.Nm
version 20170803,
version 20181111,
and first appeared in
.Fx 7.0 .
.Sh AUTHORS

View File

@ -1,4 +1,4 @@
/* $NetBSD: gzip.c,v 1.113 2018/06/12 00:42:17 kamil Exp $ */
/* $NetBSD: gzip.c,v 1.116 2018/10/27 11:39:12 skrll Exp $ */
/*-
* SPDX-License-Identifier: BSD-2-Clause-NetBSD
@ -83,6 +83,9 @@ enum filetype {
#endif
#ifndef NO_XZ_SUPPORT
FT_XZ,
#endif
#ifndef NO_LZ_SUPPORT
FT_LZ,
#endif
FT_LAST,
FT_UNKNOWN
@ -110,6 +113,11 @@ enum filetype {
#define XZ_MAGIC "\3757zXZ"
#endif
#ifndef NO_LZ_SUPPORT
#define LZ_SUFFIX ".lz"
#define LZ_MAGIC "LZIP"
#endif
#define GZ_SUFFIX ".gz"
#define BUFLEN (64 * 1024)
@ -154,6 +162,9 @@ static suffixes_t suffixes[] = {
#endif
#ifndef NO_XZ_SUPPORT
SUFFIX(XZ_SUFFIX, ""),
#endif
#ifndef NO_LZ_SUPPORT
SUFFIX(LZ_SUFFIX, ""),
#endif
SUFFIX(GZ_SUFFIX, ""), /* Overwritten by -S "" */
#endif /* SMALL */
@ -162,7 +173,7 @@ static suffixes_t suffixes[] = {
#define NUM_SUFFIXES (nitems(suffixes))
#define SUFFIX_MAXLEN 30
static const char gzip_version[] = "FreeBSD gzip 20171121";
static const char gzip_version[] = "FreeBSD gzip 20190107";
#ifndef SMALL
static const char gzip_copyright[] = \
@ -246,6 +257,7 @@ static void display_license(void);
static const suffixes_t *check_suffix(char *, int);
static ssize_t read_retry(int, void *, size_t);
static ssize_t write_retry(int, const void *, size_t);
static void print_list_out(off_t, off_t, const char*);
#ifdef SMALL
#define infile_set(f,t) infile_set(f)
@ -289,6 +301,11 @@ static off_t unpack(int, int, char *, size_t, off_t *);
#ifndef NO_XZ_SUPPORT
static off_t unxz(int, int, char *, size_t, off_t *);
static off_t unxz_len(int);
#endif
#ifndef NO_LZ_SUPPORT
static off_t unlz(int, int, char *, size_t, off_t *);
#endif
#ifdef SMALL
@ -1158,6 +1175,11 @@ file_gettype(u_char *buf)
if (memcmp(buf, XZ_MAGIC, 4) == 0) /* XXX: We only have 4 bytes */
return FT_XZ;
else
#endif
#ifndef NO_LZ_SUPPORT
if (memcmp(buf, LZ_MAGIC, 4) == 0)
return FT_LZ;
else
#endif
return FT_UNKNOWN;
}
@ -1632,14 +1654,23 @@ file_uncompress(char *file, char *outfile, size_t outsize)
#ifndef NO_XZ_SUPPORT
case FT_XZ:
if (lflag) {
maybe_warnx("no -l with xz files");
goto lose;
size = unxz_len(fd);
print_list_out(in_size, size, file);
return -1;
}
size = unxz(fd, zfd, NULL, 0, NULL);
break;
#endif
#ifndef NO_LZ_SUPPORT
case FT_LZ:
if (lflag) {
maybe_warnx("no -l with lzip files");
goto lose;
}
size = unlz(fd, zfd, NULL, 0, NULL);
break;
#endif
#ifndef SMALL
case FT_UNKNOWN:
if (lflag) {
@ -1871,6 +1902,12 @@ handle_stdin(void)
usize = unxz(STDIN_FILENO, STDOUT_FILENO,
(char *)header1, sizeof header1, &gsize);
break;
#endif
#ifndef NO_LZ_SUPPORT
case FT_LZ:
usize = unlz(STDIN_FILENO, STDOUT_FILENO,
(char *)header1, sizeof header1, &gsize);
break;
#endif
}
@ -2197,6 +2234,12 @@ print_list(int fd, off_t out, const char *outfile, time_t ts)
#else
(void)&ts; /* XXX */
#endif
print_list_out(out, in, outfile);
}
static void
print_list_out(off_t out, off_t in, const char *outfile)
{
printf("%12llu %12llu ", (unsigned long long)out, (unsigned long long)in);
print_ratio(in, out, stdout);
printf(" %s\n", outfile);
@ -2271,6 +2314,9 @@ display_version(void)
#ifndef NO_XZ_SUPPORT
#include "unxz.c"
#endif
#ifndef NO_LZ_SUPPORT
#include "unlz.c"
#endif
static ssize_t
read_retry(int fd, void *buf, size_t sz)

646
usr.bin/gzip/unlz.c Normal file
View File

@ -0,0 +1,646 @@
/* $NetBSD: unlz.c,v 1.6 2018/11/11 01:42:36 christos Exp $ */
/*-
* Copyright (c) 2018 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Christos Zoulas.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD$
*/
/* Lzd - Educational decompressor for the lzip format
Copyright (C) 2013-2018 Antonio Diaz Diaz.
This program is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
that the following conditions are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/
#include <sys/param.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <errno.h>
#include <unistd.h>
#define LZ_STATES 12
#define LITERAL_CONTEXT_BITS 3
#define POS_STATE_BITS 2
#define POS_STATES (1 << POS_STATE_BITS)
#define POS_STATE_MASK (POS_STATES - 1)
#define STATES 4
#define DIS_SLOT_BITS 6
#define DIS_MODEL_START 4
#define DIS_MODEL_END 14
#define MODELED_DISTANCES (1 << (DIS_MODEL_END / 2))
#define DIS_ALIGN_BITS 4
#define DIS_ALIGN_SIZE (1 << DIS_ALIGN_BITS)
#define LOW_BITS 3
#define MID_BITS 3
#define HIGH_BITS 8
#define LOW_SYMBOLS (1 << LOW_BITS)
#define MID_SYMBOLS (1 << MID_BITS)
#define HIGH_SYMBOLS (1 << HIGH_BITS)
#define MAX_SYMBOLS (LOW_SYMBOLS + MID_SYMBOLS + HIGH_SYMBOLS)
#define MIN_MATCH_LEN 2
#define BIT_MODEL_MOVE_BITS 5
#define BIT_MODEL_TOTAL_BITS 11
#define BIT_MODEL_TOTAL (1 << BIT_MODEL_TOTAL_BITS)
#define BIT_MODEL_INIT (BIT_MODEL_TOTAL / 2)
static const int lz_st_next[] = {
0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5,
};
static bool
lz_st_is_char(int st) {
return st < 7;
}
static int
lz_st_get_char(int st) {
return lz_st_next[st];
}
static int
lz_st_get_match(int st) {
return st < 7 ? 7 : 10;
}
static int
lz_st_get_rep(int st) {
return st < 7 ? 8 : 11;
}
static int
lz_st_get_short_rep(int st) {
return st < 7 ? 9 : 11;
}
struct lz_len_model {
int choice1;
int choice2;
int bm_low[POS_STATES][LOW_SYMBOLS];
int bm_mid[POS_STATES][MID_SYMBOLS];
int bm_high[HIGH_SYMBOLS];
};
static uint32_t lz_crc[256];
static void
lz_crc_init(void)
{
for (unsigned i = 0; i < nitems(lz_crc); i++) {
unsigned c = i;
for (unsigned j = 0; j < 8; j++) {
if (c & 1)
c = 0xEDB88320U ^ (c >> 1);
else
c >>= 1;
}
lz_crc[i] = c;
}
}
static void
lz_crc_update(uint32_t *crc, const uint8_t *buf, size_t len)
{
for (size_t i = 0; i < len; i++)
*crc = lz_crc[(*crc ^ buf[i]) & 0xFF] ^ (*crc >> 8);
}
struct lz_range_decoder {
FILE *fp;
uint32_t code;
uint32_t range;
};
static int
lz_rd_create(struct lz_range_decoder *rd, FILE *fp)
{
rd->fp = fp;
rd->code = 0;
rd->range = ~0;
for (int i = 0; i < 5; i++)
rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
return ferror(rd->fp) ? -1 : 0;
}
static unsigned
lz_rd_decode(struct lz_range_decoder *rd, int num_bits)
{
unsigned symbol = 0;
for (int i = num_bits; i > 0; i--) {
rd->range >>= 1;
symbol <<= 1;
if (rd->code >= rd->range) {
rd->code -= rd->range;
symbol |= 1;
}
if (rd->range <= 0x00FFFFFFU) {
rd->range <<= 8;
rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
}
}
return symbol;
}
static unsigned
lz_rd_decode_bit(struct lz_range_decoder *rd, int *bm)
{
unsigned symbol;
const uint32_t bound = (rd->range >> BIT_MODEL_TOTAL_BITS) * *bm;
if(rd->code < bound) {
rd->range = bound;
*bm += (BIT_MODEL_TOTAL - *bm) >> BIT_MODEL_MOVE_BITS;
symbol = 0;
}
else {
rd->range -= bound;
rd->code -= bound;
*bm -= *bm >> BIT_MODEL_MOVE_BITS;
symbol = 1;
}
if (rd->range <= 0x00FFFFFFU) {
rd->range <<= 8;
rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
}
return symbol;
}
static unsigned
lz_rd_decode_tree(struct lz_range_decoder *rd, int *bm, int num_bits)
{
unsigned symbol = 1;
for (int i = 0; i < num_bits; i++)
symbol = (symbol << 1) | lz_rd_decode_bit(rd, &bm[symbol]);
return symbol - (1 << num_bits);
}
static unsigned
lz_rd_decode_tree_reversed(struct lz_range_decoder *rd, int *bm, int num_bits)
{
unsigned symbol = lz_rd_decode_tree(rd, bm, num_bits);
unsigned reversed_symbol = 0;
for (int i = 0; i < num_bits; i++) {
reversed_symbol = (reversed_symbol << 1) | (symbol & 1);
symbol >>= 1;
}
return reversed_symbol;
}
static unsigned
lz_rd_decode_matched(struct lz_range_decoder *rd, int *bm, int match_byte)
{
unsigned symbol = 1;
for (int i = 7; i >= 0; i--) {
const unsigned match_bit = (match_byte >> i) & 1;
const unsigned bit = lz_rd_decode_bit(rd,
&bm[symbol + (match_bit << 8) + 0x100]);
symbol = (symbol << 1) | bit;
if (match_bit != bit) {
while (symbol < 0x100) {
symbol = (symbol << 1) |
lz_rd_decode_bit(rd, &bm[symbol]);
}
break;
}
}
return symbol & 0xFF;
}
static unsigned
lz_rd_decode_len(struct lz_range_decoder *rd, struct lz_len_model *lm,
int pos_state)
{
if (lz_rd_decode_bit(rd, &lm->choice1) == 0)
return lz_rd_decode_tree(rd, lm->bm_low[pos_state], LOW_BITS);
if (lz_rd_decode_bit(rd, &lm->choice2) == 0) {
return LOW_SYMBOLS +
lz_rd_decode_tree(rd, lm->bm_mid[pos_state], MID_BITS);
}
return LOW_SYMBOLS + MID_SYMBOLS +
lz_rd_decode_tree(rd, lm->bm_high, HIGH_BITS);
}
struct lz_decoder {
FILE *fin, *fout;
off_t pos, ppos, spos, dict_size;
bool wrapped;
uint32_t crc;
uint8_t *obuf;
struct lz_range_decoder rdec;
};
static int
lz_flush(struct lz_decoder *lz)
{
off_t offs = lz->pos - lz->spos;
if (offs <= 0)
return -1;
size_t size = (size_t)offs;
lz_crc_update(&lz->crc, lz->obuf + lz->spos, size);
if (fwrite(lz->obuf + lz->spos, 1, size, lz->fout) != size)
return -1;
lz->wrapped = lz->pos >= lz->dict_size;
if (lz->wrapped) {
lz->ppos += lz->pos;
lz->pos = 0;
}
lz->spos = lz->pos;
return 0;
}
static void
lz_destroy(struct lz_decoder *lz)
{
if (lz->fin)
fclose(lz->fin);
if (lz->fout)
fclose(lz->fout);
free(lz->obuf);
}
static int
lz_create(struct lz_decoder *lz, int fin, int fdout, int dict_size)
{
memset(lz, 0, sizeof(*lz));
lz->fin = fdopen(dup(fin), "r");
if (lz->fin == NULL)
goto out;
lz->fout = fdopen(dup(fdout), "w");
if (lz->fout == NULL)
goto out;
lz->pos = lz->ppos = lz->spos = 0;
lz->crc = ~0;
lz->dict_size = dict_size;
lz->wrapped = false;
lz->obuf = malloc(dict_size);
if (lz->obuf == NULL)
goto out;
if (lz_rd_create(&lz->rdec, lz->fin) == -1)
goto out;
return 0;
out:
lz_destroy(lz);
return -1;
}
static uint8_t
lz_peek(const struct lz_decoder *lz, unsigned ahead)
{
off_t diff = lz->pos - ahead - 1;
if (diff >= 0)
return lz->obuf[diff];
if (lz->wrapped)
return lz->obuf[lz->dict_size + diff];
return 0;
}
static void
lz_put(struct lz_decoder *lz, uint8_t b)
{
lz->obuf[lz->pos++] = b;
if (lz->dict_size == lz->pos)
lz_flush(lz);
}
static off_t
lz_get_data_position(const struct lz_decoder *lz)
{
return lz->ppos + lz->pos;
}
static unsigned
lz_get_crc(const struct lz_decoder *lz)
{
return lz->crc ^ 0xffffffffU;
}
static void
lz_bm_init(int *a, size_t l)
{
for (size_t i = 0; i < l; i++)
a[i] = BIT_MODEL_INIT;
}
#define LZ_BM_INIT(a) lz_bm_init(a, nitems(a))
#define LZ_BM_INIT2(a) do { \
size_t l = nitems(a[0]); \
for (size_t i = 0; i < nitems(a); i++) \
lz_bm_init(a[i], l); \
} while (/*CONSTCOND*/0)
#define LZ_MODEL_INIT(a) do { \
a.choice1 = BIT_MODEL_INIT; \
a.choice2 = BIT_MODEL_INIT; \
LZ_BM_INIT2(a.bm_low); \
LZ_BM_INIT2(a.bm_mid); \
LZ_BM_INIT(a.bm_high); \
} while (/*CONSTCOND*/0)
static bool
lz_decode_member(struct lz_decoder *lz)
{
int bm_literal[1 << LITERAL_CONTEXT_BITS][0x300];
int bm_match[LZ_STATES][POS_STATES];
int bm_rep[4][LZ_STATES];
int bm_len[LZ_STATES][POS_STATES];
int bm_dis_slot[LZ_STATES][1 << DIS_SLOT_BITS];
int bm_dis[MODELED_DISTANCES - DIS_MODEL_END + 1];
int bm_align[DIS_ALIGN_SIZE];
LZ_BM_INIT2(bm_literal);
LZ_BM_INIT2(bm_match);
LZ_BM_INIT2(bm_rep);
LZ_BM_INIT2(bm_len);
LZ_BM_INIT2(bm_dis_slot);
LZ_BM_INIT(bm_dis);
LZ_BM_INIT(bm_align);
struct lz_len_model match_len_model;
struct lz_len_model rep_len_model;
LZ_MODEL_INIT(match_len_model);
LZ_MODEL_INIT(rep_len_model);
struct lz_range_decoder *rd = &lz->rdec;
unsigned rep[4] = { 0 };
int state = 0;
while (!feof(lz->fin) && !ferror(lz->fin)) {
const int pos_state = lz_get_data_position(lz) & POS_STATE_MASK;
// bit 1
if (lz_rd_decode_bit(rd, &bm_match[state][pos_state]) == 0) {
const uint8_t prev_byte = lz_peek(lz, 0);
const int literal_state =
prev_byte >> (8 - LITERAL_CONTEXT_BITS);
int *bm = bm_literal[literal_state];
if (lz_st_is_char(state))
lz_put(lz, lz_rd_decode_tree(rd, bm, 8));
else {
int peek = lz_peek(lz, rep[0]);
lz_put(lz, lz_rd_decode_matched(rd, bm, peek));
}
state = lz_st_get_char(state);
continue;
}
int len;
// bit 2
if (lz_rd_decode_bit(rd, &bm_rep[0][state]) != 0) {
// bit 3
if (lz_rd_decode_bit(rd, &bm_rep[1][state]) == 0) {
// bit 4
if (lz_rd_decode_bit(rd,
&bm_len[state][pos_state]) == 0)
{
state = lz_st_get_short_rep(state);
lz_put(lz, lz_peek(lz, rep[0]));
continue;
}
} else {
unsigned distance;
// bit 4
if (lz_rd_decode_bit(rd, &bm_rep[2][state])
== 0)
distance = rep[1];
else {
// bit 5
if (lz_rd_decode_bit(rd,
&bm_rep[3][state]) == 0)
distance = rep[2];
else {
distance = rep[3];
rep[3] = rep[2];
}
rep[2] = rep[1];
}
rep[1] = rep[0];
rep[0] = distance;
}
state = lz_st_get_rep(state);
len = MIN_MATCH_LEN +
lz_rd_decode_len(rd, &rep_len_model, pos_state);
} else {
rep[3] = rep[2]; rep[2] = rep[1]; rep[1] = rep[0];
len = MIN_MATCH_LEN +
lz_rd_decode_len(rd, &match_len_model, pos_state);
const int len_state =
MIN(len - MIN_MATCH_LEN, STATES - 1);
rep[0] = lz_rd_decode_tree(rd, bm_dis_slot[len_state],
DIS_SLOT_BITS);
if (rep[0] >= DIS_MODEL_START) {
const unsigned dis_slot = rep[0];
const int direct_bits = (dis_slot >> 1) - 1;
rep[0] = (2 | (dis_slot & 1)) << direct_bits;
if (dis_slot < DIS_MODEL_END)
rep[0] += lz_rd_decode_tree_reversed(rd,
&bm_dis[rep[0] - dis_slot],
direct_bits);
else {
rep[0] += lz_rd_decode(rd, direct_bits
- DIS_ALIGN_BITS) << DIS_ALIGN_BITS;
rep[0] += lz_rd_decode_tree_reversed(rd,
bm_align, DIS_ALIGN_BITS);
if (rep[0] == 0xFFFFFFFFU) {
lz_flush(lz);
return len == MIN_MATCH_LEN;
}
}
}
state = lz_st_get_match(state);
if (rep[0] >= lz->dict_size ||
(rep[0] >= lz->pos && !lz->wrapped)) {
lz_flush(lz);
return false;
}
}
for (int i = 0; i < len; i++)
lz_put(lz, lz_peek(lz, rep[0]));
}
lz_flush(lz);
return false;
}
/*
* 0-3 CRC32 of the uncompressed data
* 4-11 size of the uncompressed data
* 12-19 member size including header and trailer
*/
#define TRAILER_SIZE 20
static off_t
lz_decode(int fin, int fdout, unsigned dict_size, off_t *insize)
{
struct lz_decoder lz;
off_t rv = -1;
if (lz_create(&lz, fin, fdout, dict_size) == -1)
return -1;
if (!lz_decode_member(&lz))
goto out;
uint8_t trailer[TRAILER_SIZE];
for(size_t i = 0; i < nitems(trailer); i++)
trailer[i] = (uint8_t)getc(lz.fin);
unsigned crc = 0;
for (int i = 3; i >= 0; --i) {
crc <<= 8;
crc += trailer[i];
}
int64_t data_size = 0;
for (int i = 11; i >= 4; --i) {
data_size <<= 8;
data_size += trailer[i];
}
if (crc != lz_get_crc(&lz) || data_size != lz_get_data_position(&lz))
goto out;
rv = 0;
for (int i = 19; i >= 12; --i) {
rv <<= 8;
rv += trailer[i];
}
if (insize)
*insize = rv;
#if 0
/* Does not work with pipes */
rv = ftello(lz.fout);
#else
rv = data_size;
#endif
out:
lz_destroy(&lz);
return rv;
}
/*
* 0-3 magic
* 4 version
* 5 coded dict_size
*/
#define HDR_SIZE 6
#define MIN_DICTIONARY_SIZE (1 << 12)
#define MAX_DICTIONARY_SIZE (1 << 29)
static const char hdrmagic[] = { 'L', 'Z', 'I', 'P', 1 };
static unsigned
lz_get_dict_size(unsigned char c)
{
unsigned dict_size = 1 << (c & 0x1f);
dict_size -= (dict_size >> 2) * ( (c >> 5) & 0x7);
if (dict_size < MIN_DICTIONARY_SIZE || dict_size > MAX_DICTIONARY_SIZE)
return 0;
return dict_size;
}
static off_t
unlz(int fin, int fout, char *pre, size_t prelen, off_t *bytes_in)
{
if (lz_crc[0] == 0)
lz_crc_init();
char header[HDR_SIZE];
if (prelen > sizeof(header))
return -1;
if (pre && prelen)
memcpy(header, pre, prelen);
ssize_t nr = read(fin, header + prelen, sizeof(header) - prelen);
switch (nr) {
case -1:
return -1;
case 0:
return prelen ? -1 : 0;
default:
if ((size_t)nr != sizeof(header) - prelen)
return -1;
break;
}
if (memcmp(header, hdrmagic, sizeof(hdrmagic)) != 0)
return -1;
unsigned dict_size = lz_get_dict_size(header[5]);
if (dict_size == 0)
return -1;
return lz_decode(fin, fout, dict_size, bytes_in);
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: unxz.c,v 1.7 2017/08/04 07:27:08 mrg Exp $ */
/* $NetBSD: unxz.c,v 1.8 2018/10/06 16:36:45 martin Exp $ */
/*-
* SPDX-License-Identifier: BSD-2-Clause-NetBSD
@ -156,3 +156,322 @@ unxz(int i, int o, char *pre, size_t prelen, off_t *bytes_in)
}
}
}
#include <stdbool.h>
/*
* Copied various bits and pieces from xz support code or brute force
* replacements.
*/
#define my_min(A,B) ((A)<(B)?(A):(B))
// Some systems have suboptimal BUFSIZ. Use a bit bigger value on them.
// We also need that IO_BUFFER_SIZE is a multiple of 8 (sizeof(uint64_t))
#if BUFSIZ <= 1024
# define IO_BUFFER_SIZE 8192
#else
# define IO_BUFFER_SIZE (BUFSIZ & ~7U)
#endif
/// is_sparse() accesses the buffer as uint64_t for maximum speed.
/// Use an union to make sure that the buffer is properly aligned.
typedef union {
uint8_t u8[IO_BUFFER_SIZE];
uint32_t u32[IO_BUFFER_SIZE / sizeof(uint32_t)];
uint64_t u64[IO_BUFFER_SIZE / sizeof(uint64_t)];
} io_buf;
static bool
io_pread(int fd, io_buf *buf, size_t size, off_t pos)
{
// Using lseek() and read() is more portable than pread() and
// for us it is as good as real pread().
if (lseek(fd, pos, SEEK_SET) != pos) {
return true;
}
const size_t amount = read(fd, buf, size);
if (amount == SIZE_MAX)
return true;
if (amount != size) {
return true;
}
return false;
}
/*
* Most of the following is copied (mostly verbatim) from the xz
* distribution, from file src/xz/list.c
*/
///////////////////////////////////////////////////////////////////////////////
//
/// \file list.c
/// \brief Listing information about .xz files
//
// Author: Lasse Collin
//
// This file has been put into the public domain.
// You can do whatever you want with this file.
//
///////////////////////////////////////////////////////////////////////////////
/// Information about a .xz file
typedef struct {
/// Combined Index of all Streams in the file
lzma_index *idx;
/// Total amount of Stream Padding
uint64_t stream_padding;
/// Highest memory usage so far
uint64_t memusage_max;
/// True if all Blocks so far have Compressed Size and
/// Uncompressed Size fields
bool all_have_sizes;
/// Oldest XZ Utils version that will decompress the file
uint32_t min_version;
} xz_file_info;
#define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 }
/// \brief Parse the Index(es) from the given .xz file
///
/// \param xfi Pointer to structure where the decoded information
/// is stored.
/// \param pair Input file
///
/// \return On success, false is returned. On error, true is returned.
///
// TODO: This function is pretty big. liblzma should have a function that
// takes a callback function to parse the Index(es) from a .xz file to make
// it easy for applications.
static bool
parse_indexes(xz_file_info *xfi, int src_fd)
{
struct stat st;
fstat(src_fd, &st);
if (st.st_size <= 0) {
return true;
}
if (st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) {
return true;
}
io_buf buf;
lzma_stream_flags header_flags;
lzma_stream_flags footer_flags;
lzma_ret ret;
// lzma_stream for the Index decoder
lzma_stream strm = LZMA_STREAM_INIT;
// All Indexes decoded so far
lzma_index *combined_index = NULL;
// The Index currently being decoded
lzma_index *this_index = NULL;
// Current position in the file. We parse the file backwards so
// initialize it to point to the end of the file.
off_t pos = st.st_size;
// Each loop iteration decodes one Index.
do {
// Check that there is enough data left to contain at least
// the Stream Header and Stream Footer. This check cannot
// fail in the first pass of this loop.
if (pos < 2 * LZMA_STREAM_HEADER_SIZE) {
goto error;
}
pos -= LZMA_STREAM_HEADER_SIZE;
lzma_vli stream_padding = 0;
// Locate the Stream Footer. There may be Stream Padding which
// we must skip when reading backwards.
while (true) {
if (pos < LZMA_STREAM_HEADER_SIZE) {
goto error;
}
if (io_pread(src_fd, &buf,
LZMA_STREAM_HEADER_SIZE, pos))
goto error;
// Stream Padding is always a multiple of four bytes.
int i = 2;
if (buf.u32[i] != 0)
break;
// To avoid calling io_pread() for every four bytes
// of Stream Padding, take advantage that we read
// 12 bytes (LZMA_STREAM_HEADER_SIZE) already and
// check them too before calling io_pread() again.
do {
stream_padding += 4;
pos -= 4;
--i;
} while (i >= 0 && buf.u32[i] == 0);
}
// Decode the Stream Footer.
ret = lzma_stream_footer_decode(&footer_flags, buf.u8);
if (ret != LZMA_OK) {
goto error;
}
// Check that the Stream Footer doesn't specify something
// that we don't support. This can only happen if the xz
// version is older than liblzma and liblzma supports
// something new.
//
// It is enough to check Stream Footer. Stream Header must
// match when it is compared against Stream Footer with
// lzma_stream_flags_compare().
if (footer_flags.version != 0) {
goto error;
}
// Check that the size of the Index field looks sane.
lzma_vli index_size = footer_flags.backward_size;
if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) {
goto error;
}
// Set pos to the beginning of the Index.
pos -= index_size;
// Decode the Index.
ret = lzma_index_decoder(&strm, &this_index, UINT64_MAX);
if (ret != LZMA_OK) {
goto error;
}
do {
// Don't give the decoder more input than the
// Index size.
strm.avail_in = my_min(IO_BUFFER_SIZE, index_size);
if (io_pread(src_fd, &buf, strm.avail_in, pos))
goto error;
pos += strm.avail_in;
index_size -= strm.avail_in;
strm.next_in = buf.u8;
ret = lzma_code(&strm, LZMA_RUN);
} while (ret == LZMA_OK);
// If the decoding seems to be successful, check also that
// the Index decoder consumed as much input as indicated
// by the Backward Size field.
if (ret == LZMA_STREAM_END)
if (index_size != 0 || strm.avail_in != 0)
ret = LZMA_DATA_ERROR;
if (ret != LZMA_STREAM_END) {
// LZMA_BUFFER_ERROR means that the Index decoder
// would have liked more input than what the Index
// size should be according to Stream Footer.
// The message for LZMA_DATA_ERROR makes more
// sense in that case.
if (ret == LZMA_BUF_ERROR)
ret = LZMA_DATA_ERROR;
goto error;
}
// Decode the Stream Header and check that its Stream Flags
// match the Stream Footer.
pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE;
if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) {
goto error;
}
pos -= lzma_index_total_size(this_index);
if (io_pread(src_fd, &buf, LZMA_STREAM_HEADER_SIZE, pos))
goto error;
ret = lzma_stream_header_decode(&header_flags, buf.u8);
if (ret != LZMA_OK) {
goto error;
}
ret = lzma_stream_flags_compare(&header_flags, &footer_flags);
if (ret != LZMA_OK) {
goto error;
}
// Store the decoded Stream Flags into this_index. This is
// needed so that we can print which Check is used in each
// Stream.
ret = lzma_index_stream_flags(this_index, &footer_flags);
if (ret != LZMA_OK)
goto error;
// Store also the size of the Stream Padding field. It is
// needed to show the offsets of the Streams correctly.
ret = lzma_index_stream_padding(this_index, stream_padding);
if (ret != LZMA_OK)
goto error;
if (combined_index != NULL) {
// Append the earlier decoded Indexes
// after this_index.
ret = lzma_index_cat(
this_index, combined_index, NULL);
if (ret != LZMA_OK) {
goto error;
}
}
combined_index = this_index;
this_index = NULL;
xfi->stream_padding += stream_padding;
} while (pos > 0);
lzma_end(&strm);
// All OK. Make combined_index available to the caller.
xfi->idx = combined_index;
return false;
error:
// Something went wrong, free the allocated memory.
lzma_end(&strm);
lzma_index_end(combined_index, NULL);
lzma_index_end(this_index, NULL);
return true;
}
/***************** end of copy form list.c *************************/
/*
* Small wrapper to extract total length of a file
*/
off_t
unxz_len(int fd)
{
xz_file_info xfi = XZ_FILE_INFO_INIT;
if (!parse_indexes(&xfi, fd)) {
off_t res = lzma_index_uncompressed_size(xfi.idx);
lzma_index_end(xfi.idx, NULL);
return res;
}
return 0;
}