8cd3d45ad9
- HOLE - it simply turns all-zero blocks into few bytes header; it is extremely fast, so it is turned on by default; it is mostly intended to speed up initial synchronization where we expect many zeros; - LZF - very fast algorithm by Marc Alexander Lehmann, which shows very decent compression ratio and has BSD license. MFC after: 2 weeks
284 lines
6.6 KiB
C
284 lines
6.6 KiB
C
/*-
|
|
* Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <sys/cdefs.h>
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
#include <sys/endian.h>
|
|
|
|
#include <errno.h>
|
|
#include <string.h>
|
|
#include <strings.h>
|
|
|
|
#include <hast.h>
|
|
#include <lzf.h>
|
|
#include <nv.h>
|
|
#include <pjdlog.h>
|
|
|
|
#include "hast_compression.h"
|
|
|
|
static bool
|
|
allzeros(const void *data, size_t size)
|
|
{
|
|
const uint64_t *p = data;
|
|
unsigned int i;
|
|
uint64_t v;
|
|
|
|
PJDLOG_ASSERT((size % sizeof(*p)) == 0);
|
|
|
|
/*
|
|
* This is the fastest method I found for checking if the given
|
|
* buffer contain all zeros.
|
|
* Because inside the loop we don't check at every step, we would
|
|
* get an answer only after walking through entire buffer.
|
|
* To return early if the buffer doesn't contain all zeros, we probe
|
|
* 8 bytes at the begining, in the middle and at the end of the buffer
|
|
* first.
|
|
*/
|
|
|
|
size >>= 3; /* divide by 8 */
|
|
if ((p[0] | p[size >> 1] | p[size - 1]) != 0)
|
|
return (false);
|
|
v = 0;
|
|
for (i = 0; i < size; i++)
|
|
v |= *p++;
|
|
return (v == 0);
|
|
}
|
|
|
|
static void *
|
|
hast_hole_compress(const unsigned char *data, size_t *sizep)
|
|
{
|
|
uint32_t size;
|
|
void *newbuf;
|
|
|
|
if (!allzeros(data, *sizep))
|
|
return (NULL);
|
|
|
|
newbuf = malloc(sizeof(size));
|
|
if (newbuf == NULL) {
|
|
pjdlog_warning("Unable to compress (no memory: %zu).",
|
|
(size_t)*sizep);
|
|
return (NULL);
|
|
}
|
|
size = htole32((uint32_t)*sizep);
|
|
bcopy(&size, newbuf, sizeof(size));
|
|
*sizep = sizeof(size);
|
|
|
|
return (newbuf);
|
|
}
|
|
|
|
static void *
|
|
hast_hole_decompress(const unsigned char *data, size_t *sizep)
|
|
{
|
|
uint32_t size;
|
|
void *newbuf;
|
|
|
|
if (*sizep != sizeof(size)) {
|
|
pjdlog_error("Unable to decompress (invalid size: %zu).",
|
|
*sizep);
|
|
return (NULL);
|
|
}
|
|
|
|
bcopy(data, &size, sizeof(size));
|
|
size = le32toh(size);
|
|
|
|
newbuf = malloc(size);
|
|
if (newbuf == NULL) {
|
|
pjdlog_error("Unable to decompress (no memory: %zu).",
|
|
(size_t)size);
|
|
return (NULL);
|
|
}
|
|
bzero(newbuf, size);
|
|
*sizep = size;
|
|
|
|
return (newbuf);
|
|
}
|
|
|
|
/* Minimum block size to try to compress. */
|
|
#define HAST_LZF_COMPRESS_MIN 1024
|
|
|
|
static void *
|
|
hast_lzf_compress(const unsigned char *data, size_t *sizep)
|
|
{
|
|
unsigned char *newbuf;
|
|
uint32_t origsize;
|
|
size_t newsize;
|
|
|
|
origsize = *sizep;
|
|
|
|
if (origsize <= HAST_LZF_COMPRESS_MIN)
|
|
return (NULL);
|
|
|
|
newsize = sizeof(origsize) + origsize - HAST_LZF_COMPRESS_MIN;
|
|
newbuf = malloc(newsize);
|
|
if (newbuf == NULL) {
|
|
pjdlog_warning("Unable to compress (no memory: %zu).",
|
|
newsize);
|
|
return (NULL);
|
|
}
|
|
newsize = lzf_compress(data, *sizep, newbuf + sizeof(origsize),
|
|
newsize - sizeof(origsize));
|
|
if (newsize == 0) {
|
|
free(newbuf);
|
|
return (NULL);
|
|
}
|
|
origsize = htole32(origsize);
|
|
bcopy(&origsize, newbuf, sizeof(origsize));
|
|
|
|
*sizep = sizeof(origsize) + newsize;
|
|
return (newbuf);
|
|
}
|
|
|
|
static void *
|
|
hast_lzf_decompress(const unsigned char *data, size_t *sizep)
|
|
{
|
|
unsigned char *newbuf;
|
|
uint32_t origsize;
|
|
size_t newsize;
|
|
|
|
PJDLOG_ASSERT(*sizep > sizeof(origsize));
|
|
|
|
bcopy(data, &origsize, sizeof(origsize));
|
|
origsize = le32toh(origsize);
|
|
PJDLOG_ASSERT(origsize > HAST_LZF_COMPRESS_MIN);
|
|
|
|
newbuf = malloc(origsize);
|
|
if (newbuf == NULL) {
|
|
pjdlog_error("Unable to decompress (no memory: %zu).",
|
|
(size_t)origsize);
|
|
return (NULL);
|
|
}
|
|
newsize = lzf_decompress(data + sizeof(origsize),
|
|
*sizep - sizeof(origsize), newbuf, origsize);
|
|
if (newsize == 0) {
|
|
free(newbuf);
|
|
pjdlog_error("Unable to decompress.");
|
|
return (NULL);
|
|
}
|
|
PJDLOG_ASSERT(newsize == origsize);
|
|
|
|
*sizep = newsize;
|
|
return (newbuf);
|
|
}
|
|
|
|
const char *
|
|
compression_name(int num)
|
|
{
|
|
|
|
switch (num) {
|
|
case HAST_COMPRESSION_NONE:
|
|
return ("none");
|
|
case HAST_COMPRESSION_HOLE:
|
|
return ("hole");
|
|
case HAST_COMPRESSION_LZF:
|
|
return ("lzf");
|
|
}
|
|
return ("unknown");
|
|
}
|
|
|
|
int
|
|
compression_send(const struct hast_resource *res, struct nv *nv, void **datap,
|
|
size_t *sizep, bool *freedatap)
|
|
{
|
|
unsigned char *newbuf;
|
|
int compression;
|
|
size_t size;
|
|
|
|
size = *sizep;
|
|
compression = res->hr_compression;
|
|
|
|
switch (compression) {
|
|
case HAST_COMPRESSION_NONE:
|
|
return (0);
|
|
case HAST_COMPRESSION_HOLE:
|
|
newbuf = hast_hole_compress(*datap, &size);
|
|
break;
|
|
case HAST_COMPRESSION_LZF:
|
|
/* Try 'hole' compression first. */
|
|
newbuf = hast_hole_compress(*datap, &size);
|
|
if (newbuf != NULL)
|
|
compression = HAST_COMPRESSION_HOLE;
|
|
else
|
|
newbuf = hast_lzf_compress(*datap, &size);
|
|
break;
|
|
default:
|
|
PJDLOG_ABORT("Invalid compression: %d.", res->hr_compression);
|
|
}
|
|
|
|
if (newbuf == NULL) {
|
|
/* Unable to compress the data. */
|
|
return (0);
|
|
}
|
|
nv_add_string(nv, compression_name(compression), "compression");
|
|
if (nv_error(nv) != 0) {
|
|
free(newbuf);
|
|
errno = nv_error(nv);
|
|
return (-1);
|
|
}
|
|
if (*freedatap)
|
|
free(*datap);
|
|
*freedatap = true;
|
|
*datap = newbuf;
|
|
*sizep = size;
|
|
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
compression_recv(const struct hast_resource *res __unused, struct nv *nv,
|
|
void **datap, size_t *sizep, bool *freedatap)
|
|
{
|
|
unsigned char *newbuf;
|
|
const char *algo;
|
|
size_t size;
|
|
|
|
algo = nv_get_string(nv, "compression");
|
|
if (algo == NULL)
|
|
return (0); /* No compression. */
|
|
|
|
newbuf = NULL;
|
|
size = *sizep;
|
|
|
|
if (strcmp(algo, "hole") == 0)
|
|
newbuf = hast_hole_decompress(*datap, &size);
|
|
else if (strcmp(algo, "lzf") == 0)
|
|
newbuf = hast_lzf_decompress(*datap, &size);
|
|
else {
|
|
pjdlog_error("Unknown compression algorithm '%s'.", algo);
|
|
return (-1); /* Unknown compression algorithm. */
|
|
}
|
|
|
|
if (newbuf == NULL)
|
|
return (-1);
|
|
if (*freedatap)
|
|
free(*datap);
|
|
*freedatap = true;
|
|
*datap = newbuf;
|
|
*sizep = size;
|
|
|
|
return (0);
|
|
}
|