Allow to compress on-the-wire data using two algorithms:

- HOLE - it simply turns all-zero blocks into few bytes header;
	it is extremely fast, so it is turned on by default;
	it is mostly intended to speed up initial synchronization
	where we expect many zeros;
- LZF - very fast algorithm by Marc Alexander Lehmann, which shows
	very decent compression ratio and has BSD license.

MFC after:	2 weeks
This commit is contained in:
Pawel Jakub Dawidek 2011-03-06 23:09:33 +00:00
parent 563effb9ff
commit 8cd3d45ad9
14 changed files with 1037 additions and 4 deletions

View File

@ -8,7 +8,8 @@ PROG= hastctl
SRCS= activemap.c
SRCS+= crc32.c
SRCS+= ebuf.c
SRCS+= hast_checksum.c hast_proto.c hastctl.c
SRCS+= hast_checksum.c hast_compression.c hast_proto.c hastctl.c
SRCS+= lzf.c
SRCS+= metadata.c
SRCS+= nv.c
SRCS+= parse.y pjdlog.c

View File

@ -6,7 +6,8 @@ PROG= hastd
SRCS= activemap.c
SRCS+= control.c crc32.c
SRCS+= ebuf.c event.c
SRCS+= hast_checksum.c hast_proto.c hastd.c hooks.c
SRCS+= hast_checksum.c hast_compression.c hast_proto.c hastd.c hooks.c
SRCS+= lzf.c
SRCS+= metadata.c
SRCS+= nv.c
SRCS+= secondary.c

View File

@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include "hast.h"
#include "hastd.h"
#include "hast_checksum.h"
#include "hast_compression.h"
#include "hast_proto.h"
#include "hooks.h"
#include "nv.h"
@ -249,6 +250,8 @@ control_status(struct hastd_config *cfg, struct nv *nvout,
}
nv_add_string(nvout, checksum_name(res->hr_checksum),
"checksum%u", no);
nv_add_string(nvout, compression_name(res->hr_compression),
"compression%u", no);
nv_add_string(nvout, role2str(res->hr_role), "role%u", no);
switch (res->hr_role) {

View File

@ -60,6 +60,7 @@ control <addr>
listen <addr>
replication <mode>
checksum <algorithm>
compression <algorithm>
timeout <seconds>
exec <path>
@ -79,6 +80,7 @@ resource <name> {
# Resource section
replication <mode>
checksum <algorithm>
compression <algorithm>
name <name>
local <path>
timeout <seconds>
@ -215,6 +217,24 @@ CRC32 checksum will be calculated.
.It Ic sha256
SHA256 checksum will be calculated.
.El
.It Ic compression Aq algorithm
.Pp
Compression algorithm should be one of the following:
.Bl -tag -width ".Ic none"
.It Ic none
Data send over the network will not be compressed.
.It Ic hole
Only blocks that contain all zeros will be compressed.
This is very useful for initial synchronization where potentially many blocks
are still all zeros.
There should be no measurable performance overhead when this algorithm is being
used.
This is the default setting.
.It Ic lzf
The LZF algorithm by Marc Alexander Lehmann will be used to compress the data
send over the network.
LZF is very fast, general purpose compression algorithm.
.El
.It Ic timeout Aq seconds
.Pp
Connection timeout in seconds.

View File

@ -117,6 +117,10 @@ struct hastd_config {
#define HAST_REPLICATION_MEMSYNC 1
#define HAST_REPLICATION_ASYNC 2
#define HAST_COMPRESSION_NONE 0
#define HAST_COMPRESSION_HOLE 1
#define HAST_COMPRESSION_LZF 2
#define HAST_CHECKSUM_NONE 0
#define HAST_CHECKSUM_CRC32 1
#define HAST_CHECKSUM_SHA256 2
@ -137,6 +141,8 @@ struct hast_resource {
int hr_keepdirty;
/* Path to a program to execute on various events. */
char hr_exec[PATH_MAX];
/* Compression algorithm. */
int hr_compression;
/* Checksum algorithm. */
int hr_checksum;

View File

@ -0,0 +1,283 @@
/*-
* Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/endian.h>
#include <errno.h>
#include <string.h>
#include <strings.h>
#include <hast.h>
#include <lzf.h>
#include <nv.h>
#include <pjdlog.h>
#include "hast_compression.h"
static bool
allzeros(const void *data, size_t size)
{
const uint64_t *p = data;
unsigned int i;
uint64_t v;
PJDLOG_ASSERT((size % sizeof(*p)) == 0);
/*
* This is the fastest method I found for checking if the given
* buffer contain all zeros.
* Because inside the loop we don't check at every step, we would
* get an answer only after walking through entire buffer.
* To return early if the buffer doesn't contain all zeros, we probe
* 8 bytes at the begining, in the middle and at the end of the buffer
* first.
*/
size >>= 3; /* divide by 8 */
if ((p[0] | p[size >> 1] | p[size - 1]) != 0)
return (false);
v = 0;
for (i = 0; i < size; i++)
v |= *p++;
return (v == 0);
}
static void *
hast_hole_compress(const unsigned char *data, size_t *sizep)
{
uint32_t size;
void *newbuf;
if (!allzeros(data, *sizep))
return (NULL);
newbuf = malloc(sizeof(size));
if (newbuf == NULL) {
pjdlog_warning("Unable to compress (no memory: %zu).",
(size_t)*sizep);
return (NULL);
}
size = htole32((uint32_t)*sizep);
bcopy(&size, newbuf, sizeof(size));
*sizep = sizeof(size);
return (newbuf);
}
static void *
hast_hole_decompress(const unsigned char *data, size_t *sizep)
{
uint32_t size;
void *newbuf;
if (*sizep != sizeof(size)) {
pjdlog_error("Unable to decompress (invalid size: %zu).",
*sizep);
return (NULL);
}
bcopy(data, &size, sizeof(size));
size = le32toh(size);
newbuf = malloc(size);
if (newbuf == NULL) {
pjdlog_error("Unable to decompress (no memory: %zu).",
(size_t)size);
return (NULL);
}
bzero(newbuf, size);
*sizep = size;
return (newbuf);
}
/* Minimum block size to try to compress. */
#define HAST_LZF_COMPRESS_MIN 1024
static void *
hast_lzf_compress(const unsigned char *data, size_t *sizep)
{
unsigned char *newbuf;
uint32_t origsize;
size_t newsize;
origsize = *sizep;
if (origsize <= HAST_LZF_COMPRESS_MIN)
return (NULL);
newsize = sizeof(origsize) + origsize - HAST_LZF_COMPRESS_MIN;
newbuf = malloc(newsize);
if (newbuf == NULL) {
pjdlog_warning("Unable to compress (no memory: %zu).",
newsize);
return (NULL);
}
newsize = lzf_compress(data, *sizep, newbuf + sizeof(origsize),
newsize - sizeof(origsize));
if (newsize == 0) {
free(newbuf);
return (NULL);
}
origsize = htole32(origsize);
bcopy(&origsize, newbuf, sizeof(origsize));
*sizep = sizeof(origsize) + newsize;
return (newbuf);
}
static void *
hast_lzf_decompress(const unsigned char *data, size_t *sizep)
{
unsigned char *newbuf;
uint32_t origsize;
size_t newsize;
PJDLOG_ASSERT(*sizep > sizeof(origsize));
bcopy(data, &origsize, sizeof(origsize));
origsize = le32toh(origsize);
PJDLOG_ASSERT(origsize > HAST_LZF_COMPRESS_MIN);
newbuf = malloc(origsize);
if (newbuf == NULL) {
pjdlog_error("Unable to decompress (no memory: %zu).",
(size_t)origsize);
return (NULL);
}
newsize = lzf_decompress(data + sizeof(origsize),
*sizep - sizeof(origsize), newbuf, origsize);
if (newsize == 0) {
free(newbuf);
pjdlog_error("Unable to decompress.");
return (NULL);
}
PJDLOG_ASSERT(newsize == origsize);
*sizep = newsize;
return (newbuf);
}
const char *
compression_name(int num)
{
switch (num) {
case HAST_COMPRESSION_NONE:
return ("none");
case HAST_COMPRESSION_HOLE:
return ("hole");
case HAST_COMPRESSION_LZF:
return ("lzf");
}
return ("unknown");
}
int
compression_send(const struct hast_resource *res, struct nv *nv, void **datap,
size_t *sizep, bool *freedatap)
{
unsigned char *newbuf;
int compression;
size_t size;
size = *sizep;
compression = res->hr_compression;
switch (compression) {
case HAST_COMPRESSION_NONE:
return (0);
case HAST_COMPRESSION_HOLE:
newbuf = hast_hole_compress(*datap, &size);
break;
case HAST_COMPRESSION_LZF:
/* Try 'hole' compression first. */
newbuf = hast_hole_compress(*datap, &size);
if (newbuf != NULL)
compression = HAST_COMPRESSION_HOLE;
else
newbuf = hast_lzf_compress(*datap, &size);
break;
default:
PJDLOG_ABORT("Invalid compression: %d.", res->hr_compression);
}
if (newbuf == NULL) {
/* Unable to compress the data. */
return (0);
}
nv_add_string(nv, compression_name(compression), "compression");
if (nv_error(nv) != 0) {
free(newbuf);
errno = nv_error(nv);
return (-1);
}
if (*freedatap)
free(*datap);
*freedatap = true;
*datap = newbuf;
*sizep = size;
return (0);
}
int
compression_recv(const struct hast_resource *res __unused, struct nv *nv,
void **datap, size_t *sizep, bool *freedatap)
{
unsigned char *newbuf;
const char *algo;
size_t size;
algo = nv_get_string(nv, "compression");
if (algo == NULL)
return (0); /* No compression. */
newbuf = NULL;
size = *sizep;
if (strcmp(algo, "hole") == 0)
newbuf = hast_hole_decompress(*datap, &size);
else if (strcmp(algo, "lzf") == 0)
newbuf = hast_lzf_decompress(*datap, &size);
else {
pjdlog_error("Unknown compression algorithm '%s'.", algo);
return (-1); /* Unknown compression algorithm. */
}
if (newbuf == NULL)
return (-1);
if (*freedatap)
free(*datap);
*freedatap = true;
*datap = newbuf;
*sizep = size;
return (0);
}

View File

@ -0,0 +1,44 @@
/*-
* Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _HAST_COMPRESSION_H_
#define _HAST_COMPRESSION_H_
#include <stdlib.h> /* size_t */
#include <hast.h>
#include <nv.h>
const char *compression_name(int num);
int compression_send(const struct hast_resource *res, struct nv *nv,
void **datap, size_t *sizep, bool *freedatap);
int compression_recv(const struct hast_resource *res, struct nv *nv,
void **datap, size_t *sizep, bool *freedatap);
#endif /* !_HAST_COMPRESSION_H_ */

View File

@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
#ifdef HAVE_CRYPTO
#include "hast_checksum.h"
#endif
#include "hast_compression.h"
#include "hast_proto.h"
struct hast_main_header {
@ -67,6 +68,7 @@ struct hast_pipe_stage {
};
static struct hast_pipe_stage pipeline[] = {
{ "compression", compression_send, compression_recv },
{ "checksum", checksum_send, checksum_recv }
};

View File

@ -363,6 +363,8 @@ resource_needs_restart(const struct hast_resource *res0,
return (true);
if (res0->hr_checksum != res1->hr_checksum)
return (true);
if (res0->hr_compression != res1->hr_compression)
return (true);
if (res0->hr_timeout != res1->hr_timeout)
return (true);
if (strcmp(res0->hr_exec, res1->hr_exec) != 0)
@ -389,6 +391,8 @@ resource_needs_reload(const struct hast_resource *res0,
return (true);
if (res0->hr_checksum != res1->hr_checksum)
return (true);
if (res0->hr_compression != res1->hr_compression)
return (true);
if (res0->hr_timeout != res1->hr_timeout)
return (true);
if (strcmp(res0->hr_exec, res1->hr_exec) != 0)
@ -409,6 +413,7 @@ resource_reload(const struct hast_resource *res)
nv_add_string(nvout, res->hr_remoteaddr, "remoteaddr");
nv_add_int32(nvout, (int32_t)res->hr_replication, "replication");
nv_add_int32(nvout, (int32_t)res->hr_checksum, "checksum");
nv_add_int32(nvout, (int32_t)res->hr_compression, "compression");
nv_add_int32(nvout, (int32_t)res->hr_timeout, "timeout");
nv_add_string(nvout, res->hr_exec, "exec");
if (nv_error(nvout) != 0) {
@ -568,6 +573,7 @@ hastd_reload(void)
sizeof(cres->hr_remoteaddr));
cres->hr_replication = nres->hr_replication;
cres->hr_checksum = nres->hr_checksum;
cres->hr_compression = nres->hr_compression;
cres->hr_timeout = nres->hr_timeout;
strlcpy(cres->hr_exec, nres->hr_exec,
sizeof(cres->hr_exec));

406
sbin/hastd/lzf.c Normal file
View File

@ -0,0 +1,406 @@
/*
* Copyright (c) 2000-2008 Marc Alexander Lehmann <schmorp@schmorp.de>
*
* Redistribution and use in source and binary forms, with or without modifica-
* tion, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
* CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
* EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
* CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
* ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Alternatively, the contents of this file may be used under the terms of
* the GNU General Public License ("GPL") version 2 or any later version,
* in which case the provisions of the GPL are applicable instead of
* the above. If you wish to allow the use of your version of this file
* only under the terms of the GPL and not to allow others to use your
* version of this file under the BSD license, indicate your decision
* by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL. If you do not delete the
* provisions above, a recipient may use your version of this file under
* either the BSD or the GPL.
*/
#include "lzf.h"
#define HSIZE (1 << (HLOG))
/*
* don't play with this unless you benchmark!
* decompression is not dependent on the hash function
* the hashing function might seem strange, just believe me
* it works ;)
*/
#ifndef FRST
# define FRST(p) (((p[0]) << 8) | p[1])
# define NEXT(v,p) (((v) << 8) | p[2])
# if ULTRA_FAST
# define IDX(h) ((( h >> (3*8 - HLOG)) - h ) & (HSIZE - 1))
# elif VERY_FAST
# define IDX(h) ((( h >> (3*8 - HLOG)) - h*5) & (HSIZE - 1))
# else
# define IDX(h) ((((h ^ (h << 5)) >> (3*8 - HLOG)) - h*5) & (HSIZE - 1))
# endif
#endif
/*
* IDX works because it is very similar to a multiplicative hash, e.g.
* ((h * 57321 >> (3*8 - HLOG)) & (HSIZE - 1))
* the latter is also quite fast on newer CPUs, and compresses similarly.
*
* the next one is also quite good, albeit slow ;)
* (int)(cos(h & 0xffffff) * 1e6)
*/
#if 0
/* original lzv-like hash function, much worse and thus slower */
# define FRST(p) (p[0] << 5) ^ p[1]
# define NEXT(v,p) ((v) << 5) ^ p[2]
# define IDX(h) ((h) & (HSIZE - 1))
#endif
#define MAX_LIT (1 << 5)
#define MAX_OFF (1 << 13)
#define MAX_REF ((1 << 8) + (1 << 3))
#if __GNUC__ >= 3
# define expect(expr,value) __builtin_expect ((expr),(value))
# define inline inline
#else
# define expect(expr,value) (expr)
# define inline static
#endif
#define expect_false(expr) expect ((expr) != 0, 0)
#define expect_true(expr) expect ((expr) != 0, 1)
/*
* compressed format
*
* 000LLLLL <L+1> ; literal
* LLLooooo oooooooo ; backref L
* 111ooooo LLLLLLLL oooooooo ; backref L+7
*
*/
unsigned int
lzf_compress (const void *const in_data, unsigned int in_len,
void *out_data, unsigned int out_len
#if LZF_STATE_ARG
, LZF_STATE htab
#endif
)
{
#if !LZF_STATE_ARG
LZF_STATE htab;
#endif
const u8 **hslot;
const u8 *ip = (const u8 *)in_data;
u8 *op = (u8 *)out_data;
const u8 *in_end = ip + in_len;
u8 *out_end = op + out_len;
const u8 *ref;
/* off requires a type wide enough to hold a general pointer difference.
* ISO C doesn't have that (size_t might not be enough and ptrdiff_t only
* works for differences within a single object). We also assume that no
* no bit pattern traps. Since the only platform that is both non-POSIX
* and fails to support both assumptions is windows 64 bit, we make a
* special workaround for it.
*/
#if defined (WIN32) && defined (_M_X64)
unsigned _int64 off; /* workaround for missing POSIX compliance */
#else
unsigned long off;
#endif
unsigned int hval;
int lit;
if (!in_len || !out_len)
return 0;
#if INIT_HTAB
memset (htab, 0, sizeof (htab));
# if 0
for (hslot = htab; hslot < htab + HSIZE; hslot++)
*hslot++ = ip;
# endif
#endif
lit = 0; op++; /* start run */
hval = FRST (ip);
while (ip < in_end - 2)
{
hval = NEXT (hval, ip);
hslot = htab + IDX (hval);
ref = *hslot; *hslot = ip;
if (1
#if INIT_HTAB
&& ref < ip /* the next test will actually take care of this, but this is faster */
#endif
&& (off = ip - ref - 1) < MAX_OFF
&& ip + 4 < in_end
&& ref > (const u8 *)in_data
#if STRICT_ALIGN
&& ref[0] == ip[0]
&& ref[1] == ip[1]
&& ref[2] == ip[2]
#else
&& *(const u16 *)ref == *(const u16 *)ip
&& ref[2] == ip[2]
#endif
)
{
/* match found at *ref++ */
unsigned int len = 2;
unsigned int maxlen = in_end - ip - len;
maxlen = maxlen > MAX_REF ? MAX_REF : maxlen;
if (expect_false (op + 3 + 1 >= out_end)) /* first a faster conservative test */
if (op - !lit + 3 + 1 >= out_end) /* second the exact but rare test */
return 0;
op [- lit - 1] = lit - 1; /* stop run */
op -= !lit; /* undo run if length is zero */
for (;;)
{
if (expect_true (maxlen > 16))
{
len++; if (ref [len] != ip [len]) break;
len++; if (ref [len] != ip [len]) break;
len++; if (ref [len] != ip [len]) break;
len++; if (ref [len] != ip [len]) break;
len++; if (ref [len] != ip [len]) break;
len++; if (ref [len] != ip [len]) break;
len++; if (ref [len] != ip [len]) break;
len++; if (ref [len] != ip [len]) break;
len++; if (ref [len] != ip [len]) break;
len++; if (ref [len] != ip [len]) break;
len++; if (ref [len] != ip [len]) break;
len++; if (ref [len] != ip [len]) break;
len++; if (ref [len] != ip [len]) break;
len++; if (ref [len] != ip [len]) break;
len++; if (ref [len] != ip [len]) break;
len++; if (ref [len] != ip [len]) break;
}
do
len++;
while (len < maxlen && ref[len] == ip[len]);
break;
}
len -= 2; /* len is now #octets - 1 */
ip++;
if (len < 7)
{
*op++ = (off >> 8) + (len << 5);
}
else
{
*op++ = (off >> 8) + ( 7 << 5);
*op++ = len - 7;
}
*op++ = off;
lit = 0; op++; /* start run */
ip += len + 1;
if (expect_false (ip >= in_end - 2))
break;
#if ULTRA_FAST || VERY_FAST
--ip;
# if VERY_FAST && !ULTRA_FAST
--ip;
# endif
hval = FRST (ip);
hval = NEXT (hval, ip);
htab[IDX (hval)] = ip;
ip++;
# if VERY_FAST && !ULTRA_FAST
hval = NEXT (hval, ip);
htab[IDX (hval)] = ip;
ip++;
# endif
#else
ip -= len + 1;
do
{
hval = NEXT (hval, ip);
htab[IDX (hval)] = ip;
ip++;
}
while (len--);
#endif
}
else
{
/* one more literal byte we must copy */
if (expect_false (op >= out_end))
return 0;
lit++; *op++ = *ip++;
if (expect_false (lit == MAX_LIT))
{
op [- lit - 1] = lit - 1; /* stop run */
lit = 0; op++; /* start run */
}
}
}
if (op + 3 > out_end) /* at most 3 bytes can be missing here */
return 0;
while (ip < in_end)
{
lit++; *op++ = *ip++;
if (expect_false (lit == MAX_LIT))
{
op [- lit - 1] = lit - 1; /* stop run */
lit = 0; op++; /* start run */
}
}
op [- lit - 1] = lit - 1; /* end run */
op -= !lit; /* undo run if length is zero */
return op - (u8 *)out_data;
}
#if AVOID_ERRNO
# define SET_ERRNO(n)
#else
# include <errno.h>
# define SET_ERRNO(n) errno = (n)
#endif
#if (__i386 || __amd64) && __GNUC__ >= 3
# define lzf_movsb(dst, src, len) \
asm ("rep movsb" \
: "=D" (dst), "=S" (src), "=c" (len) \
: "0" (dst), "1" (src), "2" (len));
#endif
unsigned int
lzf_decompress (const void *const in_data, unsigned int in_len,
void *out_data, unsigned int out_len)
{
u8 const *ip = (const u8 *)in_data;
u8 *op = (u8 *)out_data;
u8 const *const in_end = ip + in_len;
u8 *const out_end = op + out_len;
do
{
unsigned int ctrl = *ip++;
if (ctrl < (1 << 5)) /* literal run */
{
ctrl++;
if (op + ctrl > out_end)
{
SET_ERRNO (E2BIG);
return 0;
}
#if CHECK_INPUT
if (ip + ctrl > in_end)
{
SET_ERRNO (EINVAL);
return 0;
}
#endif
#ifdef lzf_movsb
lzf_movsb (op, ip, ctrl);
#else
do
*op++ = *ip++;
while (--ctrl);
#endif
}
else /* back reference */
{
unsigned int len = ctrl >> 5;
u8 *ref = op - ((ctrl & 0x1f) << 8) - 1;
#if CHECK_INPUT
if (ip >= in_end)
{
SET_ERRNO (EINVAL);
return 0;
}
#endif
if (len == 7)
{
len += *ip++;
#if CHECK_INPUT
if (ip >= in_end)
{
SET_ERRNO (EINVAL);
return 0;
}
#endif
}
ref -= *ip++;
if (op + len + 2 > out_end)
{
SET_ERRNO (E2BIG);
return 0;
}
if (ref < (u8 *)out_data)
{
SET_ERRNO (EINVAL);
return 0;
}
#ifdef lzf_movsb
len += 2;
lzf_movsb (op, ref, len);
#else
*op++ = *ref++;
*op++ = *ref++;
do
*op++ = *ref++;
while (--len);
#endif
}
}
while (ip < in_end);
return op - (u8 *)out_data;
}

211
sbin/hastd/lzf.h Normal file
View File

@ -0,0 +1,211 @@
/*
* Copyright (c) 2000-2008 Marc Alexander Lehmann <schmorp@schmorp.de>
*
* Redistribution and use in source and binary forms, with or without modifica-
* tion, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
* CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
* EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
* CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
* ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Alternatively, the contents of this file may be used under the terms of
* the GNU General Public License ("GPL") version 2 or any later version,
* in which case the provisions of the GPL are applicable instead of
* the above. If you wish to allow the use of your version of this file
* only under the terms of the GPL and not to allow others to use your
* version of this file under the BSD license, indicate your decision
* by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL. If you do not delete the
* provisions above, a recipient may use your version of this file under
* either the BSD or the GPL.
*/
#ifndef LZF_H
#define LZF_H
/***********************************************************************
**
** lzf -- an extremely fast/free compression/decompression-method
** http://liblzf.plan9.de/
**
** This algorithm is believed to be patent-free.
**
***********************************************************************/
#define LZF_VERSION 0x0105 /* 1.5, API version */
/*
* Compress in_len bytes stored at the memory block starting at
* in_data and write the result to out_data, up to a maximum length
* of out_len bytes.
*
* If the output buffer is not large enough or any error occurs return 0,
* otherwise return the number of bytes used, which might be considerably
* more than in_len (but less than 104% of the original size), so it
* makes sense to always use out_len == in_len - 1), to ensure _some_
* compression, and store the data uncompressed otherwise (with a flag, of
* course.
*
* lzf_compress might use different algorithms on different systems and
* even different runs, thus might result in different compressed strings
* depending on the phase of the moon or similar factors. However, all
* these strings are architecture-independent and will result in the
* original data when decompressed using lzf_decompress.
*
* The buffers must not be overlapping.
*
* If the option LZF_STATE_ARG is enabled, an extra argument must be
* supplied which is not reflected in this header file. Refer to lzfP.h
* and lzf_c.c.
*
*/
unsigned int
lzf_compress (const void *const in_data, unsigned int in_len,
void *out_data, unsigned int out_len);
/*
* Decompress data compressed with some version of the lzf_compress
* function and stored at location in_data and length in_len. The result
* will be stored at out_data up to a maximum of out_len characters.
*
* If the output buffer is not large enough to hold the decompressed
* data, a 0 is returned and errno is set to E2BIG. Otherwise the number
* of decompressed bytes (i.e. the original length of the data) is
* returned.
*
* If an error in the compressed data is detected, a zero is returned and
* errno is set to EINVAL.
*
* This function is very fast, about as fast as a copying loop.
*/
unsigned int
lzf_decompress (const void *const in_data, unsigned int in_len,
void *out_data, unsigned int out_len);
/*
* Size of hashtable is (1 << HLOG) * sizeof (char *)
* decompression is independent of the hash table size
* the difference between 15 and 14 is very small
* for small blocks (and 14 is usually a bit faster).
* For a low-memory/faster configuration, use HLOG == 13;
* For best compression, use 15 or 16 (or more, up to 23).
*/
#ifndef HLOG
# define HLOG 16
#endif
/*
* Sacrifice very little compression quality in favour of compression speed.
* This gives almost the same compression as the default code, and is
* (very roughly) 15% faster. This is the preferred mode of operation.
*/
#ifndef VERY_FAST
# define VERY_FAST 1
#endif
/*
* Sacrifice some more compression quality in favour of compression speed.
* (roughly 1-2% worse compression for large blocks and
* 9-10% for small, redundant, blocks and >>20% better speed in both cases)
* In short: when in need for speed, enable this for binary data,
* possibly disable this for text data.
*/
#ifndef ULTRA_FAST
# define ULTRA_FAST 0
#endif
/*
* Unconditionally aligning does not cost very much, so do it if unsure
*/
#ifndef STRICT_ALIGN
# define STRICT_ALIGN !(defined(__i386) || defined (__amd64))
#endif
/*
* You may choose to pre-set the hash table (might be faster on some
* modern cpus and large (>>64k) blocks, and also makes compression
* deterministic/repeatable when the configuration otherwise is the same).
*/
#ifndef INIT_HTAB
# define INIT_HTAB 1
#endif
/*
* Avoid assigning values to errno variable? for some embedding purposes
* (linux kernel for example), this is neccessary. NOTE: this breaks
* the documentation in lzf.h.
*/
#ifndef AVOID_ERRNO
# define AVOID_ERRNO 0
#endif
/*
* Wether to pass the LZF_STATE variable as argument, or allocate it
* on the stack. For small-stack environments, define this to 1.
* NOTE: this breaks the prototype in lzf.h.
*/
#ifndef LZF_STATE_ARG
# define LZF_STATE_ARG 0
#endif
/*
* Wether to add extra checks for input validity in lzf_decompress
* and return EINVAL if the input stream has been corrupted. This
* only shields against overflowing the input buffer and will not
* detect most corrupted streams.
* This check is not normally noticable on modern hardware
* (<1% slowdown), but might slow down older cpus considerably.
*/
#ifndef CHECK_INPUT
# define CHECK_INPUT 1
#endif
/*****************************************************************************/
/* nothing should be changed below */
typedef unsigned char u8;
typedef const u8 *LZF_STATE[1 << (HLOG)];
#if !STRICT_ALIGN
/* for unaligned accesses we need a 16 bit datatype. */
# include <limits.h>
# if USHRT_MAX == 65535
typedef unsigned short u16;
# elif UINT_MAX == 65535
typedef unsigned int u16;
# else
# undef STRICT_ALIGN
# define STRICT_ALIGN 1
# endif
#endif
#if ULTRA_FAST
# if defined(VERY_FAST)
# undef VERY_FAST
# endif
#endif
#if INIT_HTAB
# ifdef __cplusplus
# include <cstring>
# else
# include <string.h>
# endif
#endif
#endif

View File

@ -62,6 +62,7 @@ static char depth0_control[HAST_ADDRSIZE];
static char depth0_listen[HAST_ADDRSIZE];
static int depth0_replication;
static int depth0_checksum;
static int depth0_compression;
static int depth0_timeout;
static char depth0_exec[PATH_MAX];
@ -170,6 +171,7 @@ yy_config_parse(const char *config, bool exitonerror)
depth0_timeout = HAST_TIMEOUT;
depth0_replication = HAST_REPLICATION_MEMSYNC;
depth0_checksum = HAST_CHECKSUM_NONE;
depth0_compression = HAST_COMPRESSION_HOLE;
strlcpy(depth0_control, HAST_CONTROL, sizeof(depth0_control));
strlcpy(depth0_listen, HASTD_LISTEN, sizeof(depth0_listen));
depth0_exec[0] = '\0';
@ -233,6 +235,13 @@ yy_config_parse(const char *config, bool exitonerror)
*/
curres->hr_checksum = depth0_checksum;
}
if (curres->hr_compression == -1) {
/*
* Compression is not set at resource-level.
* Use global or default setting.
*/
curres->hr_compression = depth0_compression;
}
if (curres->hr_timeout == -1) {
/*
* Timeout is not set at resource-level.
@ -266,13 +275,14 @@ yy_config_free(struct hastd_config *config)
}
%}
%token CONTROL LISTEN PORT REPLICATION CHECKSUM
%token CONTROL LISTEN PORT REPLICATION CHECKSUM COMPRESSION
%token TIMEOUT EXEC EXTENTSIZE RESOURCE NAME LOCAL REMOTE ON
%token FULLSYNC MEMSYNC ASYNC NONE CRC32 SHA256
%token FULLSYNC MEMSYNC ASYNC NONE CRC32 SHA256 HOLE LZF
%token NUM STR OB CB
%type <num> replication_type
%type <num> checksum_type
%type <num> compression_type
%union
{
@ -299,6 +309,8 @@ statement:
|
checksum_statement
|
compression_statement
|
timeout_statement
|
exec_statement
@ -416,6 +428,30 @@ checksum_type:
SHA256 { $$ = HAST_CHECKSUM_SHA256; }
;
compression_statement: COMPRESSION compression_type
{
switch (depth) {
case 0:
depth0_compression = $2;
break;
case 1:
if (curres != NULL)
curres->hr_compression = $2;
break;
default:
assert(!"compression at wrong depth level");
}
}
;
compression_type:
NONE { $$ = HAST_COMPRESSION_NONE; }
|
HOLE { $$ = HAST_COMPRESSION_HOLE; }
|
LZF { $$ = HAST_COMPRESSION_LZF; }
;
timeout_statement: TIMEOUT NUM
{
switch (depth) {
@ -609,6 +645,7 @@ resource_start: STR
curres->hr_previous_role = HAST_ROLE_INIT;
curres->hr_replication = -1;
curres->hr_checksum = -1;
curres->hr_compression = -1;
curres->hr_timeout = -1;
curres->hr_exec[0] = '\0';
curres->hr_provname[0] = '\0';
@ -629,6 +666,8 @@ resource_entry:
|
checksum_statement
|
compression_statement
|
timeout_statement
|
exec_statement

View File

@ -1910,6 +1910,7 @@ primary_config_reload(struct hast_resource *res, struct nv *nv)
nv_assert(nv, "remoteaddr");
nv_assert(nv, "replication");
nv_assert(nv, "checksum");
nv_assert(nv, "compression");
nv_assert(nv, "timeout");
nv_assert(nv, "exec");
@ -1918,6 +1919,7 @@ primary_config_reload(struct hast_resource *res, struct nv *nv)
#define MODIFIED_REMOTEADDR 0x01
#define MODIFIED_REPLICATION 0x02
#define MODIFIED_CHECKSUM 0x04
#define MODIFIED_COMPRESSION 0x08
#define MODIFIED_TIMEOUT 0x10
#define MODIFIED_EXEC 0x20
modified = 0;
@ -1941,6 +1943,11 @@ primary_config_reload(struct hast_resource *res, struct nv *nv)
gres->hr_checksum = vint;
modified |= MODIFIED_CHECKSUM;
}
vint = nv_get_int32(nv, "compression");
if (gres->hr_compression != vint) {
gres->hr_compression = vint;
modified |= MODIFIED_COMPRESSION;
}
vint = nv_get_int32(nv, "timeout");
if (gres->hr_timeout != vint) {
gres->hr_timeout = vint;
@ -1994,6 +2001,7 @@ primary_config_reload(struct hast_resource *res, struct nv *nv)
#undef MODIFIED_REMOTEADDR
#undef MODIFIED_REPLICATION
#undef MODIFIED_CHECKSUM
#undef MODIFIED_COMPRESSION
#undef MODIFIED_TIMEOUT
#undef MODIFIED_EXEC

View File

@ -50,6 +50,7 @@ listen { DP; return LISTEN; }
port { DP; return PORT; }
replication { DP; return REPLICATION; }
checksum { DP; return CHECKSUM; }
compression { DP; return COMPRESSION; }
timeout { DP; return TIMEOUT; }
exec { DP; return EXEC; }
resource { DP; return RESOURCE; }
@ -63,6 +64,8 @@ async { DP; return ASYNC; }
none { DP; return NONE; }
crc32 { DP; return CRC32; }
sha256 { DP; return SHA256; }
hole { DP; return HOLE; }
lzf { DP; return LZF; }
[0-9]+ { DP; yylval.num = atoi(yytext); return NUM; }
[a-zA-Z0-9\.\-_/\:]+ { DP; yylval.str = strdup(yytext); return STR; }
\{ { DP; depth++; return OB; }