Maxim Sobolev 056c1a0528 Fix logical bug in the bzip2 reading code, which results in bogus EIO
returned on a perfectly valid bzip2 stream whose decompressed size
is multiple of read-ahead buffer size. Reproduce the problem is easy:
create some power-of-two sized file (truncate -s 1m file will do),
bzip2 it and try to load it as md_image from loader. See how it fails.

The bug doesn't affect gzip code (which most of bzip2-reading code was
copied from) probably due to the fact that libgzip doesn't report
Z_STREAM_END with the last block, but requires extra call to inflate()
to retrieve it and has some extra data in the input stream at that time.
However, apply similar fix to gzipfs.c just in the case the API will
change in the future to do what bzip2 code does.

Add some ifdef'ed code to enable testing bzipfs.c from witin normal
FreeBSD environment as opposed to the restricted loader one, so that
one can use gdb and whatnot.

Sponsored by:	Sippy Software, Inc., http://www.sippysoft.com/
MFC in:		7 days
2007-12-18 01:50:49 +00:00

348 lines
8.7 KiB
C

/*
* Copyright (c) 1998 Michael Smith.
* Copyright (c) 2000 Maxim Sobolev
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#ifndef REGRESSION
#include "stand.h"
#else
#include <sys/errno.h>
#include <sys/fcntl.h>
#include <sys/types.h>
#include <sys/unistd.h>
struct open_file {
int f_flags; /* see F_* below */
void *f_fsdata; /* file system specific data */
};
#define F_READ 0x0001 /* file opened for reading */
#define EOFFSET (ELAST+8) /* relative seek not supported */
static inline u_int min(u_int a, u_int b) { return (a < b ? a : b); }
#define panic(x, y) abort()
#endif
#include <sys/stat.h>
#include <string.h>
#include <bzlib.h>
#define BZ_BUFSIZE 2048 /* XXX larger? */
struct bz_file
{
int bzf_rawfd;
bz_stream bzf_bzstream;
char bzf_buf[BZ_BUFSIZE];
int bzf_endseen;
};
static int bzf_fill(struct bz_file *z);
static int bzf_open(const char *path, struct open_file *f);
static int bzf_close(struct open_file *f);
static int bzf_read(struct open_file *f, void *buf, size_t size, size_t *resid);
static off_t bzf_seek(struct open_file *f, off_t offset, int where);
static int bzf_stat(struct open_file *f, struct stat *sb);
#ifndef REGRESSION
struct fs_ops bzipfs_fsops = {
"bzip",
bzf_open,
bzf_close,
bzf_read,
null_write,
bzf_seek,
bzf_stat,
null_readdir
};
#endif
#if 0
void *
calloc(int items, size_t size)
{
return(malloc(items * size));
}
#endif
static int
bzf_fill(struct bz_file *bzf)
{
int result;
int req;
req = BZ_BUFSIZE - bzf->bzf_bzstream.avail_in;
result = 0;
/* If we need more */
if (req > 0) {
/* move old data to bottom of buffer */
if (req < BZ_BUFSIZE)
bcopy(bzf->bzf_buf + req, bzf->bzf_buf, BZ_BUFSIZE - req);
/* read to fill buffer and update availibility data */
result = read(bzf->bzf_rawfd, bzf->bzf_buf + bzf->bzf_bzstream.avail_in, req);
bzf->bzf_bzstream.next_in = bzf->bzf_buf;
if (result >= 0)
bzf->bzf_bzstream.avail_in += result;
}
return(result);
}
/*
* Adapted from get_byte/check_header in libz
*
* Returns 0 if the header is OK, nonzero if not.
*/
static int
get_byte(struct bz_file *bzf)
{
if ((bzf->bzf_bzstream.avail_in == 0) && (bzf_fill(bzf) == -1))
return(-1);
bzf->bzf_bzstream.avail_in--;
return(*(bzf->bzf_bzstream.next_in)++);
}
static int bz_magic[3] = {'B', 'Z', 'h'}; /* bzip2 magic header */
static int
check_header(struct bz_file *bzf)
{
unsigned int len;
int c;
/* Check the bzip2 magic header */
for (len = 0; len < 3; len++) {
c = get_byte(bzf);
if (c != bz_magic[len]) {
return(1);
}
}
/* Check that the block size is valid */
c = get_byte(bzf);
if (c < '1' || c > '9')
return(1);
/* Put back bytes that we've took from the input stream */
bzf->bzf_bzstream.next_in -= 4;
bzf->bzf_bzstream.avail_in += 4;
return(0);
}
static int
bzf_open(const char *fname, struct open_file *f)
{
static char *bzfname;
int rawfd;
struct bz_file *bzf;
char *cp;
int error;
struct stat sb;
/* Have to be in "just read it" mode */
if (f->f_flags != F_READ)
return(EPERM);
/* If the name already ends in .gz or .bz2, ignore it */
if ((cp = strrchr(fname, '.')) && (!strcmp(cp, ".gz")
|| !strcmp(cp, ".bz2") || !strcmp(cp, ".split")))
return(ENOENT);
/* Construct new name */
bzfname = malloc(strlen(fname) + 5);
sprintf(bzfname, "%s.bz2", fname);
/* Try to open the compressed datafile */
rawfd = open(bzfname, O_RDONLY);
free(bzfname);
if (rawfd == -1)
return(ENOENT);
if (fstat(rawfd, &sb) < 0) {
printf("bzf_open: stat failed\n");
close(rawfd);
return(ENOENT);
}
if (!S_ISREG(sb.st_mode)) {
printf("bzf_open: not a file\n");
close(rawfd);
return(EISDIR); /* best guess */
}
/* Allocate a bz_file structure, populate it */
bzf = malloc(sizeof(struct bz_file));
bzero(bzf, sizeof(struct bz_file));
bzf->bzf_rawfd = rawfd;
/* Verify that the file is bzipped (XXX why do this afterwards?) */
if (check_header(bzf)) {
close(bzf->bzf_rawfd);
BZ2_bzDecompressEnd(&(bzf->bzf_bzstream));
free(bzf);
return(EFTYPE);
}
/* Initialise the inflation engine */
if ((error = BZ2_bzDecompressInit(&(bzf->bzf_bzstream), 0, 1)) != BZ_OK) {
printf("bzf_open: BZ2_bzDecompressInit returned %d\n", error);
close(bzf->bzf_rawfd);
free(bzf);
return(EIO);
}
/* Looks OK, we'll take it */
f->f_fsdata = bzf;
return(0);
}
static int
bzf_close(struct open_file *f)
{
struct bz_file *bzf = (struct bz_file *)f->f_fsdata;
BZ2_bzDecompressEnd(&(bzf->bzf_bzstream));
close(bzf->bzf_rawfd);
free(bzf);
return(0);
}
static int
bzf_read(struct open_file *f, void *buf, size_t size, size_t *resid)
{
struct bz_file *bzf = (struct bz_file *)f->f_fsdata;
int error;
bzf->bzf_bzstream.next_out = buf; /* where and how much */
bzf->bzf_bzstream.avail_out = size;
while (bzf->bzf_bzstream.avail_out && bzf->bzf_endseen == 0) {
if ((bzf->bzf_bzstream.avail_in == 0) && (bzf_fill(bzf) == -1)) {
printf("bzf_read: fill error\n");
return(EIO);
}
if (bzf->bzf_bzstream.avail_in == 0) { /* oops, unexpected EOF */
printf("bzf_read: unexpected EOF\n");
if (bzf->bzf_bzstream.avail_out == size)
return (EIO);
break;
}
error = BZ2_bzDecompress(&bzf->bzf_bzstream); /* decompression pass */
if (error == BZ_STREAM_END) { /* EOF, all done */
bzf->bzf_endseen = 1;
break;
}
if (error != BZ_OK) { /* argh, decompression error */
printf("bzf_read: BZ2_bzDecompress returned %d\n", error);
return(EIO);
}
}
if (resid != NULL)
*resid = bzf->bzf_bzstream.avail_out;
return(0);
}
static off_t
bzf_seek(struct open_file *f, off_t offset, int where)
{
struct bz_file *bzf = (struct bz_file *)f->f_fsdata;
off_t target;
char discard[16];
switch (where) {
case SEEK_SET:
target = offset;
break;
case SEEK_CUR:
target = offset + bzf->bzf_bzstream.total_out_lo32;
break;
case SEEK_END:
target = -1;
default:
errno = EINVAL;
return (-1);
}
/* Can we get there from here? */
if (target < bzf->bzf_bzstream.total_out_lo32) {
errno = EOFFSET;
return -1;
}
/* skip forwards if required */
while (target > bzf->bzf_bzstream.total_out_lo32) {
errno = bzf_read(f, discard, min(sizeof(discard),
target - bzf->bzf_bzstream.total_out_lo32), NULL);
if (errno)
return(-1);
}
/* This is where we are (be honest if we overshot) */
return (bzf->bzf_bzstream.total_out_lo32);
}
static int
bzf_stat(struct open_file *f, struct stat *sb)
{
struct bz_file *bzf = (struct bz_file *)f->f_fsdata;
int result;
/* stat as normal, but indicate that size is unknown */
if ((result = fstat(bzf->bzf_rawfd, sb)) == 0)
sb->st_size = -1;
return(result);
}
void
bz_internal_error(int errorcode)
{
panic("bzipfs: critical error %d in bzip2 library occured\n", errorcode);
}
#ifdef REGRESSION
/* Small test case, open and decompress test.bz2 */
int main()
{
struct open_file f;
char buf[1024];
size_t resid;
int err;
memset(&f, '\0', sizeof(f));
f.f_flags = F_READ;
err = bzf_open("test", &f);
if (err != 0)
exit(1);
do {
err = bzf_read(&f, buf, sizeof(buf), &resid);
} while (err == 0 && resid != sizeof(buf));
if (err != 0)
exit(2);
exit(0);
}
#endif