Import libucl snapshot 20160604

It replaces xxhash with mumhash It fixes issues with msgpack on non x86
2016-06-04 14:57:25 +00:00 · 2016-06-04 14:57:25 +00:00 · 169b6f53fe
commit 169b6f53fe
parent 86c919b7d7 2d6424b082
11 changed files with 534 additions and 1233 deletions
--- a/contrib/libucl/src/Makefile.am
+++ b/contrib/libucl/src/Makefile.am
@ -12,8 +12,7 @@ libucl_la_SOURCES=	ucl_emitter.c \
 					ucl_schema.c \
 					ucl_util.c \
 					ucl_msgpack.c \
-					ucl_sexp.c \
-					xxhash.c
+					ucl_sexp.c
 libucl_la_CFLAGS=	$(libucl_common_cflags) \
 					@CURL_CFLAGS@
 libucl_la_LDFLAGS = -version-info @SO_VERSION@
@ -25,7 +24,7 @@ libucl_la_LIBADD=	@LIBFETCH_LIBS@ \
 include_HEADERS=	$(top_srcdir)/include/ucl.h \
 					$(top_srcdir)/include/ucl++.h
 noinst_HEADERS=	ucl_internal.h \
-				xxhash.h \
+				mum.h \
 				ucl_hash.h \
 				ucl_chartable.h \
 				tree.h
--- a/contrib/libucl/src/mum.h
+++ b/contrib/libucl/src/mum.h
@ -0,0 +1,417 @@
+/* Copyright (c) 2016 Vladimir Makarov <vmakarov@gcc.gnu.org>
+
+   Permission is hereby granted, free of charge, to any person
+   obtaining a copy of this software and associated documentation
+   files (the "Software"), to deal in the Software without
+   restriction, including without limitation the rights to use, copy,
+   modify, merge, publish, distribute, sublicense, and/or sell copies
+   of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/* This file implements MUM (MUltiply and Mix) hashing.  We randomize
+   input data by 64x64-bit multiplication and mixing hi- and low-parts
+   of the multiplication result by using an addition and then mix it
+   into the current state.  We use prime numbers randomly generated
+   with the equal probability of their bit values for the
+   multiplication.  When all primes are used once, the state is
+   randomized and the same prime numbers are used again for data
+   randomization.
+
+   The MUM hashing passes all SMHasher tests.  Pseudo Random Number
+   Generator based on MUM also passes NIST Statistical Test Suite for
+   Random and Pseudorandom Number Generators for Cryptographic
+   Applications (version 2.2.1) with 1000 bitstreams each containing
+   1M bits.  MUM hashing is also faster Spooky64 and City64 on small
+   strings (at least upto 512-bit) on Haswell and Power7.  The MUM bulk
+   speed (speed on very long data) is bigger than Spooky and City on
+   Power7.  On Haswell the bulk speed is bigger than Spooky one and
+   close to City speed.  */
+
+#ifndef __MUM_HASH__
+#define __MUM_HASH__
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+
+#ifdef _MSC_VER
+typedef unsigned __int16 uint16_t;
+typedef unsigned __int32 uint32_t;
+typedef unsigned __int64 uint64_t;
+#else
+#include <stdint.h>
+#endif
+
+/* Macro saying to use 128-bit integers implemented by GCC for some
+   targets.  */
+#ifndef _MUM_USE_INT128
+/* In GCC uint128_t is defined if HOST_BITS_PER_WIDE_INT >= 64.
+   HOST_WIDE_INT is long if HOST_BITS_PER_LONG > HOST_BITS_PER_INT,
+   otherwise int. */
+#if defined(__GNUC__) && UINT_MAX != ULONG_MAX
+#define _MUM_USE_INT128 1
+#else
+#define _MUM_USE_INT128 0
+#endif
+#endif
+
+#if defined(__GNUC__) && ((__GNUC__ == 4) &&  (__GNUC_MINOR__ >= 9) || (__GNUC__ > 4))
+#define _MUM_FRESH_GCC
+#endif
+
+#if defined(__GNUC__) && !defined(__llvm__)
+#define _MUM_ATTRIBUTE_UNUSED  __attribute__((unused))
+#define _MUM_OPTIMIZE(opts) __attribute__((__optimize__ (opts)))
+#define _MUM_TARGET(opts) __attribute__((__target__ (opts)))
+#else
+#define _MUM_ATTRIBUTE_UNUSED
+#define _MUM_OPTIMIZE(opts)
+#define _MUM_TARGET(opts)
+#endif
+
+
+/* Here are different primes randomly generated with the equal
+   probability of their bit values.  They are used to randomize input
+   values.  */
+static uint64_t _mum_hash_step_prime = 0x2e0bb864e9ea7df5ULL;
+static uint64_t _mum_key_step_prime = 0xcdb32970830fcaa1ULL;
+static uint64_t _mum_block_start_prime = 0xc42b5e2e6480b23bULL;
+static uint64_t _mum_unroll_prime = 0x7b51ec3d22f7096fULL;
+static uint64_t _mum_tail_prime = 0xaf47d47c99b1461bULL;
+static uint64_t _mum_finish_prime1 = 0xa9a7ae7ceff79f3fULL;
+static uint64_t _mum_finish_prime2 = 0xaf47d47c99b1461bULL;
+
+static uint64_t _mum_primes [] = {
+  0X9ebdcae10d981691, 0X32b9b9b97a27ac7d, 0X29b5584d83d35bbd, 0X4b04e0e61401255f,
+  0X25e8f7b1f1c9d027, 0X80d4c8c000f3e881, 0Xbd1255431904b9dd, 0X8a3bd4485eee6d81,
+  0X3bc721b2aad05197, 0X71b1a19b907d6e33, 0X525e6c1084a8534b, 0X9e4c2cd340c1299f,
+  0Xde3add92e94caa37, 0X7e14eadb1f65311d, 0X3f5aa40f89812853, 0X33b15a3b587d15c9,
+};
+
+/* Multiply 64-bit V and P and return sum of high and low parts of the
+   result.  */
+static inline uint64_t
+_mum (uint64_t v, uint64_t p) {
+  uint64_t hi, lo;
+#if _MUM_USE_INT128
+#if defined(__aarch64__)
+  /* AARCH64 needs 2 insns to calculate 128-bit result of the
+     multiplication.  If we use a generic code we actually call a
+     function doing 128x128->128 bit multiplication.  The function is
+     very slow.  */
+  lo = v * p, hi;
+  asm ("umulh %0, %1, %2" : "=r" (hi) : "r" (v), "r" (p));
+#else
+  __uint128_t r = (__uint128_t) v * (__uint128_t) p;
+  hi = (uint64_t) (r >> 64);
+  lo = (uint64_t) r;
+#endif
+#else
+  /* Implementation of 64x64->128-bit multiplication by four 32x32->64
+     bit multiplication.  */
+  uint64_t hv = v >> 32, hp = p >> 32;
+  uint64_t lv = (uint32_t) v, lp = (uint32_t) p;
+  uint64_t rh =  hv * hp;
+  uint64_t rm_0 = hv * lp;
+  uint64_t rm_1 = hp * lv;
+  uint64_t rl =  lv * lp;
+  uint64_t t, carry = 0;
+
+  /* We could ignore a carry bit here if we did not care about the
+     same hash for 32-bit and 64-bit targets.  */
+  t = rl + (rm_0 << 32);
+#ifdef MUM_TARGET_INDEPENDENT_HASH
+  carry = t < rl;
+#endif
+  lo = t + (rm_1 << 32);
+#ifdef MUM_TARGET_INDEPENDENT_HASH
+  carry += lo < t;
+#endif
+  hi = rh + (rm_0 >> 32) + (rm_1 >> 32) + carry;
+#endif
+  /* We could use XOR here too but, for some reasons, on Haswell and
+     Power7 using an addition improves hashing performance by 10% for
+     small strings.  */
+  return hi + lo;
+}
+
+#if defined(_MSC_VER)
+#define _mum_bswap_32(x) _byteswap_uint32_t (x)
+#define _mum_bswap_64(x) _byteswap_uint64_t (x)
+#elif defined(__APPLE__)
+#include <libkern/OSByteOrder.h>
+#define _mum_bswap_32(x) OSSwapInt32 (x)
+#define _mum_bswap_64(x) OSSwapInt64 (x)
+#elif defined(__GNUC__)
+#define _mum_bswap32(x) __builtin_bswap32 (x)
+#define _mum_bswap64(x) __builtin_bswap64 (x)
+#else
+#include <byteswap.h>
+#define _mum_bswap32(x) bswap32 (x)
+#define _mum_bswap64(x) bswap64 (x)
+#endif
+
+static inline uint64_t
+_mum_le (uint64_t v) {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(MUM_TARGET_INDEPENDENT_HASH)
+  return v;
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+  return _mum_bswap64 (v);
+#else
+#error "Unknown endianess"
+#endif
+}
+
+static inline uint32_t
+_mum_le32 (uint32_t v) {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(MUM_TARGET_INDEPENDENT_HASH)
+  return v;
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+  return _mum_bswap32 (v);
+#else
+#error "Unknown endianess"
+#endif
+}
+
+/* Macro defining how many times the most nested loop in
+   _mum_hash_aligned will be unrolled by the compiler (although it can
+   make an own decision:).  Use only a constant here to help a
+   compiler to unroll a major loop.
+
+   The macro value affects the result hash for strings > 128 bit.  The
+   unroll factor greatly affects the hashing speed.  We prefer the
+   speed.  */
+#ifndef _MUM_UNROLL_FACTOR_POWER
+#if defined(__PPC64__) && !defined(MUM_TARGET_INDEPENDENT_HASH)
+#define _MUM_UNROLL_FACTOR_POWER 3
+#elif defined(__aarch64__) && !defined(MUM_TARGET_INDEPENDENT_HASH)
+#define _MUM_UNROLL_FACTOR_POWER 4
+#else
+#define _MUM_UNROLL_FACTOR_POWER 2
+#endif
+#endif
+
+#if _MUM_UNROLL_FACTOR_POWER < 1
+#error "too small unroll factor"
+#elif _MUM_UNROLL_FACTOR_POWER > 4
+#error "We have not enough primes for such unroll factor"
+#endif
+
+#define _MUM_UNROLL_FACTOR (1 << _MUM_UNROLL_FACTOR_POWER)
+
+static inline uint64_t _MUM_OPTIMIZE("unroll-loops")
+_mum_hash_aligned (uint64_t start, const void *key, size_t len) {
+  uint64_t result = start;
+  const unsigned char *str = (const unsigned char *) key;
+  uint64_t u64;
+  int i;
+  size_t n;
+
+  result = _mum (result, _mum_block_start_prime);
+  while  (len > _MUM_UNROLL_FACTOR * sizeof (uint64_t)) {
+    /* This loop could be vectorized when we have vector insns for
+       64x64->128-bit multiplication.  AVX2 currently only have a
+       vector insn for 4 32x32->64-bit multiplication.  */
+    for (i = 0; i < _MUM_UNROLL_FACTOR; i++)
+      result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]);
+    len -= _MUM_UNROLL_FACTOR * sizeof (uint64_t);
+    str += _MUM_UNROLL_FACTOR * sizeof (uint64_t);
+    /* We will use the same prime numbers on the next iterations --
+       randomize the state.  */
+    result = _mum (result, _mum_unroll_prime);
+  }
+  n = len / sizeof (uint64_t);
+  for (i = 0; i < (int)n; i++)
+    result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]);
+  len -= n * sizeof (uint64_t); str += n * sizeof (uint64_t);
+  switch (len) {
+  case 7:
+    u64 = _mum_le32 (*(uint32_t *) str);
+    u64 |= (uint64_t) str[4] << 32;
+    u64 |= (uint64_t) str[5] << 40;
+    u64 |= (uint64_t) str[6] << 48;
+    return result ^ _mum (u64, _mum_tail_prime);
+  case 6:
+    u64 = _mum_le32 (*(uint32_t *) str);
+    u64 |= (uint64_t) str[4] << 32;
+    u64 |= (uint64_t) str[5] << 40;
+    return result ^ _mum (u64, _mum_tail_prime);
+  case 5:
+    u64 = _mum_le32 (*(uint32_t *) str);
+    u64 |= (uint64_t) str[4] << 32;
+    return result ^ _mum (u64, _mum_tail_prime);
+  case 4:
+    u64 = _mum_le32 (*(uint32_t *) str);
+    return result ^ _mum (u64, _mum_tail_prime);
+  case 3:
+    u64 = str[0];
+    u64 |= (uint64_t) str[1] << 8;
+    u64 |= (uint64_t) str[2] << 16;
+    return result ^ _mum (u64, _mum_tail_prime);
+  case 2:
+    u64 = str[0];
+    u64 |= (uint64_t) str[1] << 8;
+    return result ^ _mum (u64, _mum_tail_prime);
+  case 1:
+    u64 = str[0];
+    return result ^ _mum (u64, _mum_tail_prime);
+  }
+  return result;
+}
+
+/* Final randomization of H.  */
+static inline uint64_t
+_mum_final (uint64_t h) {
+  h ^= _mum (h, _mum_finish_prime1);
+  h ^= _mum (h, _mum_finish_prime2);
+  return h;
+}
+
+#if defined(__x86_64__) && defined(_MUM_FRESH_GCC)
+
+/* We want to use AVX2 insn MULX instead of generic x86-64 MULQ where
+   it is possible.  Although on modern Intel processors MULQ takes
+   3-cycles vs. 4 for MULX, MULX permits more freedom in insn
+   scheduling as it uses less fixed registers.  */
+static inline uint64_t _MUM_TARGET("arch=haswell")
+_mum_hash_avx2 (const void * key, size_t len, uint64_t seed) {
+  return _mum_final (_mum_hash_aligned (seed + len, key, len));
+}
+#endif
+
+#ifndef _MUM_UNALIGNED_ACCESS
+#if defined(__x86_64__) || defined(__i386__) || defined(__PPC64__) \
+    || defined(__s390__) || defined(__m32c__) || defined(cris)     \
+    || defined(__CR16__) || defined(__vax__) || defined(__m68k__) \
+    || defined(__aarch64__)
+#define _MUM_UNALIGNED_ACCESS 1
+#else
+#define _MUM_UNALIGNED_ACCESS 0
+#endif
+#endif
+
+/* When we need an aligned access to data being hashed we move part of
+   the unaligned data to an aligned block of given size and then
+   process it, repeating processing the data by the block.  */
+#ifndef _MUM_BLOCK_LEN
+#define _MUM_BLOCK_LEN 1024
+#endif
+
+#if _MUM_BLOCK_LEN < 8
+#error "too small block length"
+#endif
+
+static inline uint64_t
+#if defined(__x86_64__)
+_MUM_TARGET("inline-all-stringops")
+#endif
+_mum_hash_default (const void *key, size_t len, uint64_t seed) {
+  uint64_t result;
+  const unsigned char *str = (const unsigned char *) key;
+  size_t block_len;
+  uint64_t buf[_MUM_BLOCK_LEN / sizeof (uint64_t)];
+
+  result = seed + len;
+  if (_MUM_UNALIGNED_ACCESS || ((size_t) str & 0x7) == 0)
+    result = _mum_hash_aligned (result, key, len);
+  else {
+    while (len != 0) {
+      block_len = len < _MUM_BLOCK_LEN ? len : _MUM_BLOCK_LEN;
+      memmove (buf, str, block_len);
+      result = _mum_hash_aligned (result, buf, block_len);
+      len -= block_len;
+      str += block_len;
+    }
+  }
+  return _mum_final (result);
+}
+
+static inline uint64_t
+_mum_next_factor (void) {
+  uint64_t start = 0;
+  int i;
+
+  for (i = 0; i < 8; i++)
+    start = (start << 8) | rand() % 256;
+  return start;
+}
+
+/* ++++++++++++++++++++++++++ Interface functions: +++++++++++++++++++  */
+
+/* Set random multiplicators depending on SEED.  */
+static inline void
+mum_hash_randomize (uint64_t seed) {
+  int i;
+
+  srand (seed);
+  _mum_hash_step_prime = _mum_next_factor ();
+  _mum_key_step_prime = _mum_next_factor ();
+  _mum_finish_prime1 = _mum_next_factor ();
+  _mum_finish_prime2 = _mum_next_factor ();
+  _mum_block_start_prime = _mum_next_factor ();
+  _mum_unroll_prime = _mum_next_factor ();
+  _mum_tail_prime = _mum_next_factor ();
+  for (i = 0; i < (int)(sizeof (_mum_primes) / sizeof (uint64_t)); i++)
+    _mum_primes[i] = _mum_next_factor ();
+}
+
+/* Start hashing data with SEED.  Return the state.  */
+static inline uint64_t
+mum_hash_init (uint64_t seed) {
+  return seed;
+}
+
+/* Process data KEY with the state H and return the updated state.  */
+static inline uint64_t
+mum_hash_step (uint64_t h, uint64_t key)
+{
+  return _mum (h, _mum_hash_step_prime) ^ _mum (key, _mum_key_step_prime);
+}
+
+/* Return the result of hashing using the current state H.  */
+static inline uint64_t
+mum_hash_finish (uint64_t h) {
+  return _mum_final (h);
+}
+
+/* Fast hashing of KEY with SEED.  The hash is always the same for the
+   same key on any target. */
+static inline size_t
+mum_hash64 (uint64_t key, uint64_t seed) {
+  return mum_hash_finish (mum_hash_step (mum_hash_init (seed), key));
+}
+
+/* Hash data KEY of length LEN and SEED.  The hash depends on the
+   target endianess and the unroll factor.  */
+static inline uint64_t
+mum_hash (const void *key, size_t len, uint64_t seed) {
+#if defined(__x86_64__) && defined(_MUM_FRESH_GCC)
+  static int avx2_support = 0;
+
+  if (avx2_support > 0)
+    return _mum_hash_avx2 (key, len, seed);
+  else if (! avx2_support) {
+    __builtin_cpu_init ();
+    avx2_support =  __builtin_cpu_supports ("avx2") ? 1 : -1;
+    if (avx2_support > 0)
+      return _mum_hash_avx2 (key, len, seed);
+  }
+#endif
+  return _mum_hash_default (key, len, seed);
+}
+
+#endif
--- a/contrib/libucl/src/ucl_hash.c
+++ b/contrib/libucl/src/ucl_hash.c
@ -25,6 +25,7 @@
 #include "ucl_hash.h"
 #include "khash.h"
 #include "kvec.h"
+#include "mum.h"

 #include <time.h>
 #include <limits.h>
@ -99,20 +100,11 @@ static const unsigned char lc_map[256] = {
 #define UCL64_BIT_HASH 1
 #endif

-#ifdef UCL64_BIT_HASH
 static inline uint32_t
 ucl_hash_func (const ucl_object_t *o)
 {
-	return XXH64 (o->key, o->keylen, ucl_hash_seed ());
+	return mum_hash (o->key, o->keylen, ucl_hash_seed ());
 }
-#else
-static inline uint32_t
-ucl_hash_func (const ucl_object_t *o)
-{
-	return XXH32 (o->key, o->keylen, ucl_hash_seed ());
-}
-#endif
-
 static inline int
 ucl_hash_equal (const ucl_object_t *k1, const ucl_object_t *k2)
 {
@ -126,91 +118,60 @@ ucl_hash_equal (const ucl_object_t *k1, const ucl_object_t *k2)
 KHASH_INIT (ucl_hash_node, const ucl_object_t *, struct ucl_hash_elt, 1,
 		ucl_hash_func, ucl_hash_equal)

-#ifdef UCL64_BIT_HASH
 static inline uint32_t
 ucl_hash_caseless_func (const ucl_object_t *o)
 {
 	unsigned len = o->keylen;
-	unsigned leftover = o->keylen % 4;
+	unsigned leftover = o->keylen % 8;
 	unsigned fp, i;
 	const uint8_t* s = (const uint8_t*)o->key;
 	union {
 		struct {
-			unsigned char c1, c2, c3, c4;
+			unsigned char c1, c2, c3, c4, c5, c6, c7, c8;
 		} c;
-		uint32_t pp;
+		uint64_t pp;
 	} u;
-	XXH64_state_t st;
+	uint64_t r;

 	fp = len - leftover;
-	XXH64_reset (&st, ucl_hash_seed ());
+	r = ucl_hash_seed ();

-	for (i = 0; i != fp; i += 4) {
+	for (i = 0; i != fp; i += 8) {
 		u.c.c1 = s[i], u.c.c2 = s[i + 1], u.c.c3 = s[i + 2], u.c.c4 = s[i + 3];
+		u.c.c5 = s[i + 4], u.c.c6 = s[i + 5], u.c.c7 = s[i + 6], u.c.c8 = s[i + 7];
 		u.c.c1 = lc_map[u.c.c1];
 		u.c.c2 = lc_map[u.c.c2];
 		u.c.c3 = lc_map[u.c.c3];
 		u.c.c4 = lc_map[u.c.c4];
-		XXH64_update (&st, &u.pp, sizeof (u));
+		u.c.c1 = lc_map[u.c.c5];
+		u.c.c2 = lc_map[u.c.c6];
+		u.c.c3 = lc_map[u.c.c7];
+		u.c.c4 = lc_map[u.c.c8];
+		r = mum_hash_step (r, u.pp);
 	}

 	u.pp = 0;
 	switch (leftover) {
+	case 7:
+		u.c.c7 = lc_map[(unsigned char)s[i++]];
+	case 6:
+		u.c.c6 = lc_map[(unsigned char)s[i++]];
+	case 5:
+		u.c.c5 = lc_map[(unsigned char)s[i++]];
+	case 4:
+		u.c.c4 = lc_map[(unsigned char)s[i++]];
 	case 3:
 		u.c.c3 = lc_map[(unsigned char)s[i++]];
 	case 2:
 		u.c.c2 = lc_map[(unsigned char)s[i++]];
 	case 1:
 		u.c.c1 = lc_map[(unsigned char)s[i]];
-		XXH64_update (&st, &u.pp, leftover);
+		r = mum_hash_step (r, u.pp);
 		break;
 	}

-	return XXH64_digest (&st);
+	return mum_hash_finish (r);
 }
-#else
-static inline uint32_t
-ucl_hash_caseless_func (const ucl_object_t *o)
-{
-	unsigned len = o->keylen;
-	unsigned leftover = o->keylen % 4;
-	unsigned fp, i;
-	const uint8_t* s = (const uint8_t*)o->key;
-	union {
-		struct {
-			unsigned char c1, c2, c3, c4;
-		} c;
-		uint32_t pp;
-	} u;
-	XXH32_state_t st;
-
-	fp = len - leftover;
-	XXH32_reset (&st, ucl_hash_seed ());
-
-	for (i = 0; i != fp; i += 4) {
-		u.c.c1 = s[i], u.c.c2 = s[i + 1], u.c.c3 = s[i + 2], u.c.c4 = s[i + 3];
-		u.c.c1 = lc_map[u.c.c1];
-		u.c.c2 = lc_map[u.c.c2];
-		u.c.c3 = lc_map[u.c.c3];
-		u.c.c4 = lc_map[u.c.c4];
-		XXH32_update (&st, &u.pp, sizeof (u));
-	}
-
-	u.pp = 0;
-	switch (leftover) {
-	case 3:
-		u.c.c3 = lc_map[(unsigned char)s[i++]];
-	case 2:
-		u.c.c2 = lc_map[(unsigned char)s[i++]];
-	case 1:
-		u.c.c1 = lc_map[(unsigned char)s[i]];
-		XXH32_update (&st, &u.pp, leftover);
-		break;
-	}
-
-	return XXH32_digest (&st);
-}
-#endif

 static inline int
 ucl_hash_caseless_equal (const ucl_object_t *k1, const ucl_object_t *k2)
--- a/contrib/libucl/src/ucl_internal.h
+++ b/contrib/libucl/src/ucl_internal.h
@ -93,7 +93,6 @@
 #include "uthash.h"
 #include "ucl.h"
 #include "ucl_hash.h"
-#include "xxhash.h"

 #ifdef HAVE_OPENSSL
 #include <openssl/evp.h>
--- a/contrib/libucl/src/ucl_msgpack.c
+++ b/contrib/libucl/src/ucl_msgpack.c
@ -1423,6 +1423,10 @@ ucl_msgpack_parse_int (struct ucl_parser *parser,
 	int16_t iv16;
 	int32_t iv32;
 	int64_t iv64;
+	uint16_t uiv16;
+	uint32_t uiv32;
+	uint64_t uiv64;
+

 	if (len > remain) {
 		return -1;
@ -1455,7 +1459,9 @@ ucl_msgpack_parse_int (struct ucl_parser *parser,
 		len = 2;
 		break;
 	case msgpack_uint16:
-		obj->value.iv = FROM_BE16 (*(uint16_t *)pos);
+		memcpy (&uiv16, pos, sizeof (uiv16));
+		uiv16 = FROM_BE16 (uiv16);
+		obj->value.iv = uiv16;
 		len = 2;
 		break;
 	case msgpack_int32:
@ -1465,7 +1471,9 @@ ucl_msgpack_parse_int (struct ucl_parser *parser,
 		len = 4;
 		break;
 	case msgpack_uint32:
-		obj->value.iv = FROM_BE32 (*(uint32_t *)pos);
+		memcpy(&uiv32, pos, sizeof(uiv32));
+		uiv32 = FROM_BE32(uiv32);
+		obj->value.iv = uiv32;
 		len = 4;
 		break;
 	case msgpack_int64:
@ -1475,7 +1483,9 @@ ucl_msgpack_parse_int (struct ucl_parser *parser,
 		len = 8;
 		break;
 	case msgpack_uint64:
-		obj->value.iv = FROM_BE64 (*(uint64_t *)pos);
+		memcpy(&uiv64, pos, sizeof(uiv64));
+		uiv64 = FROM_BE64(uiv64);
+		obj->value.iv = uiv64;
 		len = 8;
 		break;
 	default:
@ -1498,6 +1508,7 @@ ucl_msgpack_parse_float (struct ucl_parser *parser,
 		uint32_t i;
 		float f;
 	} d;
+	uint64_t uiv64;

 	if (len > remain) {
 		return -1;
@ -1507,13 +1518,16 @@ ucl_msgpack_parse_float (struct ucl_parser *parser,

 	switch (fmt) {
 	case msgpack_float32:
-		d.i = FROM_BE32 (*(uint32_t *)pos);
+		memcpy(&d.i, pos, sizeof(d.i));
+		d.i = FROM_BE32(d.i);
 		/* XXX: can be slow */
 		obj->value.dv = d.f;
 		len = 4;
 		break;
 	case msgpack_float64:
-		obj->value.iv = FROM_BE64 (*(uint64_t *)pos);
+		memcpy(&uiv64, pos, sizeof(uiv64));
+		uiv64 = FROM_BE64(uiv64);
+		obj->value.iv = uiv64;
 		len = 8;
 		break;
 	default:
--- a/contrib/libucl/src/ucl_parser.c
+++ b/contrib/libucl/src/ucl_parser.c
@ -2597,12 +2597,7 @@ ucl_parser_add_chunk_full (struct ucl_parser *parser, const unsigned char *data,
 		return false;
 	}

-	if (len == 0) {
-		parser->top_obj = ucl_object_new_full (UCL_OBJECT, priority);
-		return true;
-	}
-
-	if (data == NULL) {
+	if (data == NULL && len != 0) {
 		ucl_create_err (&parser->err, "invalid chunk added");
 		return false;
 	}
@ -2613,6 +2608,7 @@ ucl_parser_add_chunk_full (struct ucl_parser *parser, const unsigned char *data,
 			ucl_create_err (&parser->err, "cannot allocate chunk structure");
 			return false;
 		}
+
 		chunk->begin = data;
 		chunk->remain = len;
 		chunk->pos = chunk->begin;
@ -2631,12 +2627,27 @@ ucl_parser_add_chunk_full (struct ucl_parser *parser, const unsigned char *data,
 			return false;
 		}

-		switch (parse_type) {
-		default:
-		case UCL_PARSE_UCL:
-			return ucl_state_machine (parser);
-		case UCL_PARSE_MSGPACK:
-			return ucl_parse_msgpack (parser);
+		if (len > 0) {
+			/* Need to parse something */
+			switch (parse_type) {
+			default:
+			case UCL_PARSE_UCL:
+				return ucl_state_machine (parser);
+			case UCL_PARSE_MSGPACK:
+				return ucl_parse_msgpack (parser);
+			}
+		}
+		else {
+			/* Just add empty chunk and go forward */
+			if (parser->top_obj == NULL) {
+				/*
+				 * In case of empty object, create one to indicate that we've
+				 * read something
+				 */
+				parser->top_obj = ucl_object_new_full (UCL_OBJECT, priority);
+			}
+
+			return true;
 		}
 	}

--- a/contrib/libucl/src/ucl_util.c
+++ b/contrib/libucl/src/ucl_util.c
@ -975,6 +975,7 @@ ucl_include_file_single (const unsigned char *data, size_t len,
 		if (params->soft_fail) {
 			return false;
 		}
+
 		return (!params->must_exist || false);
 	}

@ -1172,11 +1173,14 @@ ucl_include_file_single (const unsigned char *data, size_t len,

 	res = ucl_parser_add_chunk_full (parser, buf, buflen, params->priority,
 			params->strat, params->parse_type);
-	if (!res && !params->must_exist) {
-		/* Free error */
-		utstring_free (parser->err);
-		parser->err = NULL;
-		parser->state = UCL_STATE_AFTER_VALUE;
+
+	if (!res) {
+		if (!params->must_exist) {
+			/* Free error */
+			utstring_free (parser->err);
+			parser->err = NULL;
+			res = true;
+		}
 	}

 	/* Stop nesting the include, take 1 level off the stack */
@ -1849,6 +1853,9 @@ ucl_parser_add_fd_priority (struct ucl_parser *parser, int fd,
 			fd, strerror (errno));
 		return false;
 	}
+	if (st.st_size == 0) {
+		return true;
+	}
 	if ((buf = ucl_mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) {
 		ucl_create_err (&parser->err, "cannot mmap fd %d: %s",
 			fd, strerror (errno));
--- a/contrib/libucl/src/xxhash.c
+++ b/contrib/libucl/src/xxhash.c
@ -1,941 +0,0 @@
-/*
-xxHash - Fast Hash algorithm
-Copyright (C) 2012-2014, Yann Collet.
-BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-* Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-* Redistributions in binary form must reproduce the above
-copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the
-distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-You can contact the author at :
- xxHash source repository : http://code.google.com/p/xxhash/
- public discussion board : https://groups.google.com/forum/#!forum/lz4c
-*/
-
-
-//**************************************
-// Tuning parameters
-//**************************************
-// Unaligned memory access is automatically enabled for "common" CPU, such as x86.
-// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected.
-// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance.
-// You can also enable this parameter if you know your input data will always be aligned (boundaries of 4, for U32).
-#if defined(__ARM_FEATURE_UNALIGNED) || defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
-#  define XXH_USE_UNALIGNED_ACCESS 1
-#endif
-
-// XXH_ACCEPT_NULL_INPUT_POINTER :
-// If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
-// When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
-// This option has a very small performance cost (only measurable on small inputs).
-// By default, this option is disabled. To enable it, uncomment below define :
-// #define XXH_ACCEPT_NULL_INPUT_POINTER 1
-
-// XXH_FORCE_NATIVE_FORMAT :
-// By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
-// Results are therefore identical for little-endian and big-endian CPU.
-// This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
-// Should endian-independance be of no importance for your application, you may set the #define below to 1.
-// It will improve speed for Big-endian CPU.
-// This option has no impact on Little_Endian CPU.
-#define XXH_FORCE_NATIVE_FORMAT 0
-
-//**************************************
-// Compiler Specific Options
-//**************************************
-// Disable some Visual warning messages
-#ifdef _MSC_VER  // Visual Studio
-#  pragma warning(disable : 4127)      // disable: C4127: conditional expression is constant
-#endif
-
-#ifdef _MSC_VER    // Visual Studio
-#  define FORCE_INLINE static __forceinline
-#else
-#  ifdef __GNUC__
-#    define FORCE_INLINE static inline __attribute__((always_inline))
-#  else
-#    define FORCE_INLINE static inline
-#  endif
-#endif
-
-//**************************************
-// Includes & Memory related functions
-//**************************************
-#include "xxhash.h"
-// Modify the local functions below should you wish to use some other memory routines
-// for malloc(), free()
-#include <stdlib.h>
-static void* XXH_malloc(size_t s) { return malloc(s); }
-static void  XXH_free  (void* p)  { free(p); }
-// for memcpy()
-#include <string.h>
-static void* XXH_memcpy(void* dest, const void* src, size_t size)
-{
-    return memcpy(dest,src,size);
-}
-
-
-//**************************************
-// Basic Types
-//**************************************
-#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   // C99
-# include <stdint.h>
-typedef uint8_t  BYTE;
-typedef uint16_t U16;
-typedef uint32_t U32;
-typedef  int32_t S32;
-typedef uint64_t U64;
-#else
-typedef unsigned char      BYTE;
-typedef unsigned short     U16;
-typedef unsigned int       U32;
-typedef   signed int       S32;
-typedef uint64_t U64;
-#endif
-
-#if defined(__GNUC__)  && !defined(XXH_USE_UNALIGNED_ACCESS)
-#  define _PACKED __attribute__ ((packed))
-#else
-#  define _PACKED
-#endif
-
-#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
-#  ifdef __IBMC__
-#    pragma pack(1)
-#  else
-#    pragma pack(push, 1)
-#  endif
-#endif
-
-typedef struct _U32_S
-{
-    U32 v;
-} _PACKED U32_S;
-typedef struct _U64_S
-{
-    U64 v;
-} _PACKED U64_S;
-
-#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
-#  pragma pack(pop)
-#endif
-
-#define A32(x) (((U32_S *)(x))->v)
-#define A64(x) (((U64_S *)(x))->v)
-
-
-//***************************************
-// Compiler-specific Functions and Macros
-//***************************************
-#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
-
-// Note : although _rotl exists for minGW (GCC under windows), performance seems poor
-#if defined(_MSC_VER)
-#  define XXH_rotl32(x,r) _rotl(x,r)
-#  define XXH_rotl64(x,r) _rotl64(x,r)
-#else
-#  define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
-#  define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
-#endif
-
-#if defined(_MSC_VER)     // Visual Studio
-#  define XXH_swap32 _byteswap_ulong
-#  define XXH_swap64 _byteswap_uint64
-#elif GCC_VERSION >= 403 || defined(__clang__)
-#  define XXH_swap32 __builtin_bswap32
-#  define XXH_swap64 __builtin_bswap64
-#else
-static inline U32 XXH_swap32 (U32 x)
-{
-    return  ((x << 24) & 0xff000000 ) |
-            ((x <<  8) & 0x00ff0000 ) |
-            ((x >>  8) & 0x0000ff00 ) |
-            ((x >> 24) & 0x000000ff );
-}
-static inline U64 XXH_swap64 (U64 x)
-{
-    return  ((x << 56) & 0xff00000000000000ULL) |
-            ((x << 40) & 0x00ff000000000000ULL) |
-            ((x << 24) & 0x0000ff0000000000ULL) |
-            ((x << 8)  & 0x000000ff00000000ULL) |
-            ((x >> 8)  & 0x00000000ff000000ULL) |
-            ((x >> 24) & 0x0000000000ff0000ULL) |
-            ((x >> 40) & 0x000000000000ff00ULL) |
-            ((x >> 56) & 0x00000000000000ffULL);
-}
-#endif
-
-
-//**************************************
-// Constants
-//**************************************
-#define PRIME32_1   2654435761U
-#define PRIME32_2   2246822519U
-#define PRIME32_3   3266489917U
-#define PRIME32_4    668265263U
-#define PRIME32_5    374761393U
-
-#define PRIME64_1 11400714785074694791ULL
-#define PRIME64_2 14029467366897019727ULL
-#define PRIME64_3  1609587929392839161ULL
-#define PRIME64_4  9650029242287828579ULL
-#define PRIME64_5  2870177450012600261ULL
-
-//**************************************
-// Architecture Macros
-//**************************************
-typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
-#ifndef XXH_CPU_LITTLE_ENDIAN   // It is possible to define XXH_CPU_LITTLE_ENDIAN externally, for example using a compiler switch
-static const int one = 1;
-#   define XXH_CPU_LITTLE_ENDIAN   (*(char*)(&one))
-#endif
-
-
-//**************************************
-// Macros
-//**************************************
-#define XXH_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(!!(c)) }; }    // use only *after* variable declarations
-
-
-//****************************
-// Memory reads
-//****************************
-typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
-
-FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
-{
-    if (align==XXH_unaligned)
-        return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr));
-    else
-        return endian==XXH_littleEndian ? *(U32*)ptr : XXH_swap32(*(U32*)ptr);
-}
-
-FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
-{
-    return XXH_readLE32_align(ptr, endian, XXH_unaligned);
-}
-
-FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
-{
-    if (align==XXH_unaligned)
-        return endian==XXH_littleEndian ? A64(ptr) : XXH_swap64(A64(ptr));
-    else
-        return endian==XXH_littleEndian ? *(U64*)ptr : XXH_swap64(*(U64*)ptr);
-}
-
-FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
-{
-    return XXH_readLE64_align(ptr, endian, XXH_unaligned);
-}
-
-
-//****************************
-// Simple Hash Functions
-//****************************
-FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
-{
-    const BYTE* p = (const BYTE*)input;
-    const BYTE* bEnd = p + len;
-    U32 h32;
-#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
-
-#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
-    if (p==NULL)
-    {
-        len=0;
-        bEnd=p=(const BYTE*)(size_t)16;
-    }
-#endif
-
-    if (len>=16)
-    {
-        const BYTE* const limit = bEnd - 16;
-        U32 v1 = seed + PRIME32_1 + PRIME32_2;
-        U32 v2 = seed + PRIME32_2;
-        U32 v3 = seed + 0;
-        U32 v4 = seed - PRIME32_1;
-
-        do
-        {
-            v1 += XXH_get32bits(p) * PRIME32_2;
-            v1 = XXH_rotl32(v1, 13);
-            v1 *= PRIME32_1;
-            p+=4;
-            v2 += XXH_get32bits(p) * PRIME32_2;
-            v2 = XXH_rotl32(v2, 13);
-            v2 *= PRIME32_1;
-            p+=4;
-            v3 += XXH_get32bits(p) * PRIME32_2;
-            v3 = XXH_rotl32(v3, 13);
-            v3 *= PRIME32_1;
-            p+=4;
-            v4 += XXH_get32bits(p) * PRIME32_2;
-            v4 = XXH_rotl32(v4, 13);
-            v4 *= PRIME32_1;
-            p+=4;
-        }
-        while (p<=limit);
-
-        h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
-    }
-    else
-    {
-        h32  = seed + PRIME32_5;
-    }
-
-    h32 += (U32) len;
-
-    while (p+4<=bEnd)
-    {
-        h32 += XXH_get32bits(p) * PRIME32_3;
-        h32  = XXH_rotl32(h32, 17) * PRIME32_4 ;
-        p+=4;
-    }
-
-    while (p<bEnd)
-    {
-        h32 += (*p) * PRIME32_5;
-        h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
-        p++;
-    }
-
-    h32 ^= h32 >> 15;
-    h32 *= PRIME32_2;
-    h32 ^= h32 >> 13;
-    h32 *= PRIME32_3;
-    h32 ^= h32 >> 16;
-
-    return h32;
-}
-
-
-unsigned int XXH32 (const void* input, size_t len, unsigned seed)
-{
-#if 0
-    // Simple version, good for code maintenance, but unfortunately slow for small inputs
-    XXH32_state_t state;
-    XXH32_reset(&state, seed);
-    XXH32_update(&state, input, len);
-    return XXH32_digest(&state);
-#else
-    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
-
-#  if !defined(XXH_USE_UNALIGNED_ACCESS)
-    if ((((size_t)input) & 3) == 0)   // Input is aligned, let's leverage the speed advantage
-    {
-        if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
-            return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
-        else
-            return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
-    }
-#  endif
-
-    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
-        return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
-    else
-        return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
-#endif
-}
-
-FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
-{
-    const BYTE* p = (const BYTE*)input;
-    const BYTE* bEnd = p + len;
-    U64 h64;
-#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
-
-#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
-    if (p==NULL)
-    {
-        len=0;
-        bEnd=p=(const BYTE*)(size_t)32;
-    }
-#endif
-
-    if (len>=32)
-    {
-        const BYTE* const limit = bEnd - 32;
-        U64 v1 = seed + PRIME64_1 + PRIME64_2;
-        U64 v2 = seed + PRIME64_2;
-        U64 v3 = seed + 0;
-        U64 v4 = seed - PRIME64_1;
-
-        do
-        {
-            v1 += XXH_get64bits(p) * PRIME64_2;
-            p+=8;
-            v1 = XXH_rotl64(v1, 31);
-            v1 *= PRIME64_1;
-            v2 += XXH_get64bits(p) * PRIME64_2;
-            p+=8;
-            v2 = XXH_rotl64(v2, 31);
-            v2 *= PRIME64_1;
-            v3 += XXH_get64bits(p) * PRIME64_2;
-            p+=8;
-            v3 = XXH_rotl64(v3, 31);
-            v3 *= PRIME64_1;
-            v4 += XXH_get64bits(p) * PRIME64_2;
-            p+=8;
-            v4 = XXH_rotl64(v4, 31);
-            v4 *= PRIME64_1;
-        }
-        while (p<=limit);
-
-        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
-
-        v1 *= PRIME64_2;
-        v1 = XXH_rotl64(v1, 31);
-        v1 *= PRIME64_1;
-        h64 ^= v1;
-        h64 = h64 * PRIME64_1 + PRIME64_4;
-
-        v2 *= PRIME64_2;
-        v2 = XXH_rotl64(v2, 31);
-        v2 *= PRIME64_1;
-        h64 ^= v2;
-        h64 = h64 * PRIME64_1 + PRIME64_4;
-
-        v3 *= PRIME64_2;
-        v3 = XXH_rotl64(v3, 31);
-        v3 *= PRIME64_1;
-        h64 ^= v3;
-        h64 = h64 * PRIME64_1 + PRIME64_4;
-
-        v4 *= PRIME64_2;
-        v4 = XXH_rotl64(v4, 31);
-        v4 *= PRIME64_1;
-        h64 ^= v4;
-        h64 = h64 * PRIME64_1 + PRIME64_4;
-    }
-    else
-    {
-        h64  = seed + PRIME64_5;
-    }
-
-    h64 += (U64) len;
-
-    while (p+8<=bEnd)
-    {
-        U64 k1 = XXH_get64bits(p);
-        k1 *= PRIME64_2;
-        k1 = XXH_rotl64(k1,31);
-        k1 *= PRIME64_1;
-        h64 ^= k1;
-        h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
-        p+=8;
-    }
-
-    if (p+4<=bEnd)
-    {
-        h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
-        h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
-        p+=4;
-    }
-
-    while (p<bEnd)
-    {
-        h64 ^= (*p) * PRIME64_5;
-        h64 = XXH_rotl64(h64, 11) * PRIME64_1;
-        p++;
-    }
-
-    h64 ^= h64 >> 33;
-    h64 *= PRIME64_2;
-    h64 ^= h64 >> 29;
-    h64 *= PRIME64_3;
-    h64 ^= h64 >> 32;
-
-    return h64;
-}
-
-
-uint64_t XXH64 (const void* input, size_t len, uint64_t seed)
-{
-#if 0
-    // Simple version, good for code maintenance, but unfortunately slow for small inputs
-    XXH64_state_t state;
-    XXH64_reset(&state, seed);
-    XXH64_update(&state, input, len);
-    return XXH64_digest(&state);
-#else
-    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
-
-#  if !defined(XXH_USE_UNALIGNED_ACCESS)
-    if ((((size_t)input) & 7)==0)   // Input is aligned, let's leverage the speed advantage
-    {
-        if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
-            return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
-        else
-            return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
-    }
-#  endif
-
-    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
-        return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
-    else
-        return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
-#endif
-}
-
-/****************************************************
- *  Advanced Hash Functions
-****************************************************/
-
-/*** Allocation ***/
-typedef struct
-{
-    U64 total_len;
-    U32 seed;
-    U32 v1;
-    U32 v2;
-    U32 v3;
-    U32 v4;
-    U32 mem32[4];   /* defined as U32 for alignment */
-    U32 memsize;
-} XXH_istate32_t;
-
-typedef struct
-{
-    U64 total_len;
-    U64 seed;
-    U64 v1;
-    U64 v2;
-    U64 v3;
-    U64 v4;
-    U64 mem64[4];   /* defined as U64 for alignment */
-    U32 memsize;
-} XXH_istate64_t;
-
-
-XXH32_state_t* XXH32_createState(void)
-{
-    XXH_STATIC_ASSERT(sizeof(XXH32_state_t) >= sizeof(XXH_istate32_t));   // A compilation error here means XXH32_state_t is not large enough
-    return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
-}
-
-void* XXH32_init (unsigned seed)
-{
-	XXH32_state_t *st = XXH32_createState();
-	XXH32_reset(st, seed);
-
-	return st;
-}
-
-XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
-{
-    XXH_free(statePtr);
-    return XXH_OK;
-};
-
-XXH64_state_t* XXH64_createState(void)
-{
-    XXH_STATIC_ASSERT(sizeof(XXH64_state_t) >= sizeof(XXH_istate64_t));   // A compilation error here means XXH64_state_t is not large enough
-    return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
-}
-XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
-{
-    XXH_free(statePtr);
-    return XXH_OK;
-};
-
-
-/*** Hash feed ***/
-
-XXH_errorcode XXH32_reset(XXH32_state_t* state_in, U32 seed)
-{
-    XXH_istate32_t* state = (XXH_istate32_t*) state_in;
-    state->seed = seed;
-    state->v1 = seed + PRIME32_1 + PRIME32_2;
-    state->v2 = seed + PRIME32_2;
-    state->v3 = seed + 0;
-    state->v4 = seed - PRIME32_1;
-    state->total_len = 0;
-    state->memsize = 0;
-    return XXH_OK;
-}
-
-XXH_errorcode XXH64_reset(XXH64_state_t* state_in, uint64_t seed)
-{
-    XXH_istate64_t* state = (XXH_istate64_t*) state_in;
-    state->seed = seed;
-    state->v1 = seed + PRIME64_1 + PRIME64_2;
-    state->v2 = seed + PRIME64_2;
-    state->v3 = seed + 0;
-    state->v4 = seed - PRIME64_1;
-    state->total_len = 0;
-    state->memsize = 0;
-    return XXH_OK;
-}
-
-
-FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const void* input, size_t len, XXH_endianess endian)
-{
-    XXH_istate32_t* state = (XXH_istate32_t *) state_in;
-    const BYTE* p = (const BYTE*)input;
-    const BYTE* const bEnd = p + len;
-
-#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
-    if (input==NULL) return XXH_ERROR;
-#endif
-
-    state->total_len += len;
-
-    if (state->memsize + len < 16)   // fill in tmp buffer
-    {
-        XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
-        state->memsize += (U32)len;
-        return XXH_OK;
-    }
-
-    if (state->memsize)   // some data left from previous update
-    {
-        XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
-        {
-            const U32* p32 = state->mem32;
-            state->v1 += XXH_readLE32(p32, endian) * PRIME32_2;
-            state->v1 = XXH_rotl32(state->v1, 13);
-            state->v1 *= PRIME32_1;
-            p32++;
-            state->v2 += XXH_readLE32(p32, endian) * PRIME32_2;
-            state->v2 = XXH_rotl32(state->v2, 13);
-            state->v2 *= PRIME32_1;
-            p32++;
-            state->v3 += XXH_readLE32(p32, endian) * PRIME32_2;
-            state->v3 = XXH_rotl32(state->v3, 13);
-            state->v3 *= PRIME32_1;
-            p32++;
-            state->v4 += XXH_readLE32(p32, endian) * PRIME32_2;
-            state->v4 = XXH_rotl32(state->v4, 13);
-            state->v4 *= PRIME32_1;
-            p32++;
-        }
-        p += 16-state->memsize;
-        state->memsize = 0;
-    }
-
-    if (p <= bEnd-16)
-    {
-        const BYTE* const limit = bEnd - 16;
-        U32 v1 = state->v1;
-        U32 v2 = state->v2;
-        U32 v3 = state->v3;
-        U32 v4 = state->v4;
-
-        do
-        {
-            v1 += XXH_readLE32(p, endian) * PRIME32_2;
-            v1 = XXH_rotl32(v1, 13);
-            v1 *= PRIME32_1;
-            p+=4;
-            v2 += XXH_readLE32(p, endian) * PRIME32_2;
-            v2 = XXH_rotl32(v2, 13);
-            v2 *= PRIME32_1;
-            p+=4;
-            v3 += XXH_readLE32(p, endian) * PRIME32_2;
-            v3 = XXH_rotl32(v3, 13);
-            v3 *= PRIME32_1;
-            p+=4;
-            v4 += XXH_readLE32(p, endian) * PRIME32_2;
-            v4 = XXH_rotl32(v4, 13);
-            v4 *= PRIME32_1;
-            p+=4;
-        }
-        while (p<=limit);
-
-        state->v1 = v1;
-        state->v2 = v2;
-        state->v3 = v3;
-        state->v4 = v4;
-    }
-
-    if (p < bEnd)
-    {
-        XXH_memcpy(state->mem32, p, bEnd-p);
-        state->memsize = (int)(bEnd-p);
-    }
-
-    return XXH_OK;
-}
-
-XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
-{
-    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
-
-    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
-        return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
-    else
-        return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
-}
-
-
-
-FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state_in, XXH_endianess endian)
-{
-    XXH_istate32_t* state = (XXH_istate32_t*) state_in;
-    const BYTE * p = (const BYTE*)state->mem32;
-    BYTE* bEnd = (BYTE*)(state->mem32) + state->memsize;
-    U32 h32;
-
-    if (state->total_len >= 16)
-    {
-        h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
-    }
-    else
-    {
-        h32  = state->seed + PRIME32_5;
-    }
-
-    h32 += (U32) state->total_len;
-
-    while (p+4<=bEnd)
-    {
-        h32 += XXH_readLE32(p, endian) * PRIME32_3;
-        h32  = XXH_rotl32(h32, 17) * PRIME32_4;
-        p+=4;
-    }
-
-    while (p<bEnd)
-    {
-        h32 += (*p) * PRIME32_5;
-        h32 = XXH_rotl32(h32, 11) * PRIME32_1;
-        p++;
-    }
-
-    h32 ^= h32 >> 15;
-    h32 *= PRIME32_2;
-    h32 ^= h32 >> 13;
-    h32 *= PRIME32_3;
-    h32 ^= h32 >> 16;
-#if 0
-    XXH32_freeState((XXH32_state_t *)state_in);
-#endif
-    return h32;
-}
-
-
-U32 XXH32_digest (const XXH32_state_t* state_in)
-{
-    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
-
-    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
-        return XXH32_digest_endian(state_in, XXH_littleEndian);
-    else
-        return XXH32_digest_endian(state_in, XXH_bigEndian);
-}
-
-
-FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state_in, const void* input, size_t len, XXH_endianess endian)
-{
-    XXH_istate64_t * state = (XXH_istate64_t *) state_in;
-    const BYTE* p = (const BYTE*)input;
-    const BYTE* const bEnd = p + len;
-
-#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
-    if (input==NULL) return XXH_ERROR;
-#endif
-
-    state->total_len += len;
-
-    if (state->memsize + len < 32)   // fill in tmp buffer
-    {
-        XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
-        state->memsize += (U32)len;
-        return XXH_OK;
-    }
-
-    if (state->memsize)   // some data left from previous update
-    {
-        XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
-        {
-            const U64* p64 = state->mem64;
-            state->v1 += XXH_readLE64(p64, endian) * PRIME64_2;
-            state->v1 = XXH_rotl64(state->v1, 31);
-            state->v1 *= PRIME64_1;
-            p64++;
-            state->v2 += XXH_readLE64(p64, endian) * PRIME64_2;
-            state->v2 = XXH_rotl64(state->v2, 31);
-            state->v2 *= PRIME64_1;
-            p64++;
-            state->v3 += XXH_readLE64(p64, endian) * PRIME64_2;
-            state->v3 = XXH_rotl64(state->v3, 31);
-            state->v3 *= PRIME64_1;
-            p64++;
-            state->v4 += XXH_readLE64(p64, endian) * PRIME64_2;
-            state->v4 = XXH_rotl64(state->v4, 31);
-            state->v4 *= PRIME64_1;
-            p64++;
-        }
-        p += 32-state->memsize;
-        state->memsize = 0;
-    }
-
-    if (p+32 <= bEnd)
-    {
-        const BYTE* const limit = bEnd - 32;
-        U64 v1 = state->v1;
-        U64 v2 = state->v2;
-        U64 v3 = state->v3;
-        U64 v4 = state->v4;
-
-        do
-        {
-            v1 += XXH_readLE64(p, endian) * PRIME64_2;
-            v1 = XXH_rotl64(v1, 31);
-            v1 *= PRIME64_1;
-            p+=8;
-            v2 += XXH_readLE64(p, endian) * PRIME64_2;
-            v2 = XXH_rotl64(v2, 31);
-            v2 *= PRIME64_1;
-            p+=8;
-            v3 += XXH_readLE64(p, endian) * PRIME64_2;
-            v3 = XXH_rotl64(v3, 31);
-            v3 *= PRIME64_1;
-            p+=8;
-            v4 += XXH_readLE64(p, endian) * PRIME64_2;
-            v4 = XXH_rotl64(v4, 31);
-            v4 *= PRIME64_1;
-            p+=8;
-        }
-        while (p<=limit);
-
-        state->v1 = v1;
-        state->v2 = v2;
-        state->v3 = v3;
-        state->v4 = v4;
-    }
-
-    if (p < bEnd)
-    {
-        XXH_memcpy(state->mem64, p, bEnd-p);
-        state->memsize = (int)(bEnd-p);
-    }
-
-    return XXH_OK;
-}
-
-XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
-{
-    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
-
-    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
-        return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
-    else
-        return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
-}
-
-
-
-FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state_in, XXH_endianess endian)
-{
-    XXH_istate64_t * state = (XXH_istate64_t *) state_in;
-    const BYTE * p = (const BYTE*)state->mem64;
-    BYTE* bEnd = (BYTE*)state->mem64 + state->memsize;
-    U64 h64;
-
-    if (state->total_len >= 32)
-    {
-        U64 v1 = state->v1;
-        U64 v2 = state->v2;
-        U64 v3 = state->v3;
-        U64 v4 = state->v4;
-
-        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
-
-        v1 *= PRIME64_2;
-        v1 = XXH_rotl64(v1, 31);
-        v1 *= PRIME64_1;
-        h64 ^= v1;
-        h64 = h64*PRIME64_1 + PRIME64_4;
-
-        v2 *= PRIME64_2;
-        v2 = XXH_rotl64(v2, 31);
-        v2 *= PRIME64_1;
-        h64 ^= v2;
-        h64 = h64*PRIME64_1 + PRIME64_4;
-
-        v3 *= PRIME64_2;
-        v3 = XXH_rotl64(v3, 31);
-        v3 *= PRIME64_1;
-        h64 ^= v3;
-        h64 = h64*PRIME64_1 + PRIME64_4;
-
-        v4 *= PRIME64_2;
-        v4 = XXH_rotl64(v4, 31);
-        v4 *= PRIME64_1;
-        h64 ^= v4;
-        h64 = h64*PRIME64_1 + PRIME64_4;
-    }
-    else
-    {
-        h64  = state->seed + PRIME64_5;
-    }
-
-    h64 += (U64) state->total_len;
-
-    while (p+8<=bEnd)
-    {
-        U64 k1 = XXH_readLE64(p, endian);
-        k1 *= PRIME64_2;
-        k1 = XXH_rotl64(k1,31);
-        k1 *= PRIME64_1;
-        h64 ^= k1;
-        h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
-        p+=8;
-    }
-
-    if (p+4<=bEnd)
-    {
-        h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1;
-        h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
-        p+=4;
-    }
-
-    while (p<bEnd)
-    {
-        h64 ^= (*p) * PRIME64_5;
-        h64 = XXH_rotl64(h64, 11) * PRIME64_1;
-        p++;
-    }
-
-    h64 ^= h64 >> 33;
-    h64 *= PRIME64_2;
-    h64 ^= h64 >> 29;
-    h64 *= PRIME64_3;
-    h64 ^= h64 >> 32;
-#if 0
-    XXH64_freeState((XXH64_state_t *)state_in);
-#endif
-    return h64;
-}
-
-
-uint64_t XXH64_digest (const XXH64_state_t* state_in)
-{
-    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
-
-    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
-        return XXH64_digest_endian(state_in, XXH_littleEndian);
-    else
-        return XXH64_digest_endian(state_in, XXH_bigEndian);
-}
-
-
--- a/contrib/libucl/src/xxhash.h
+++ b/contrib/libucl/src/xxhash.h
@ -1,165 +0,0 @@
-/*
-   xxHash - Extremely Fast Hash algorithm
-   Header File
-   Copyright (C) 2012-2014, Yann Collet.
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are
-   met:
-
-       * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above
-   copyright notice, this list of conditions and the following disclaimer
-   in the documentation and/or other materials provided with the
-   distribution.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-   You can contact the author at :
-   - xxHash source repository : http://code.google.com/p/xxhash/
-*/
-
-/* Notice extracted from xxHash homepage :
-
-xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
-It also successfully passes all tests from the SMHasher suite.
-
-Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
-
-Name            Speed       Q.Score   Author
-xxHash          5.4 GB/s     10
-CrapWow         3.2 GB/s      2       Andrew
-MumurHash 3a    2.7 GB/s     10       Austin Appleby
-SpookyHash      2.0 GB/s     10       Bob Jenkins
-SBox            1.4 GB/s      9       Bret Mulvey
-Lookup3         1.2 GB/s      9       Bob Jenkins
-SuperFastHash   1.2 GB/s      1       Paul Hsieh
-CityHash64      1.05 GB/s    10       Pike & Alakuijala
-FNV             0.55 GB/s     5       Fowler, Noll, Vo
-CRC32           0.43 GB/s     9
-MD5-32          0.33 GB/s    10       Ronald L. Rivest
-SHA1-32         0.28 GB/s    10
-
-Q.Score is a measure of quality of the hash function.
-It depends on successfully passing SMHasher test set.
-10 is a perfect score.
-*/
-#ifndef LIBUCL_XXHASH_H
-#define LIBUCL_XXHASH_H
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-
-/*****************************
-   Includes
-*****************************/
-#include <stddef.h>   /* size_t */
-#include <stdint.h>
-
-
-/*****************************
-   Type
-*****************************/
-typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
-
-
-
-/*****************************
-   Simple Hash Functions
-*****************************/
-
-unsigned int       XXH32 (const void* input, size_t length, unsigned seed);
-uint64_t XXH64 (const void* input, size_t length, uint64_t seed);
-
-/*
-XXH32() :
-    Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
-    The memory between input & input+length must be valid (allocated and read-accessible).
-    "seed" can be used to alter the result predictably.
-    This function successfully passes all SMHasher tests.
-    Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
-XXH64() :
-    Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
-*/
-
-
-
-/*****************************
-   Advanced Hash Functions
-*****************************/
-typedef struct { int64_t ll[ 6]; } XXH32_state_t;
-typedef struct { int64_t ll[11]; } XXH64_state_t;
-
-/*
-These structures allow static allocation of XXH states.
-States must then be initialized using XXHnn_reset() before first use.
-
-If you prefer dynamic allocation, please refer to functions below.
-*/
-
-/*
- * !!!
- * Rspamd specific: we use the legacy method to free state when digest is obtained
- * !!!
- */
-void * XXH32_init (unsigned seed);
-XXH32_state_t* XXH32_createState(void);
-XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
-
-XXH64_state_t* XXH64_createState(void);
-XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
-
-/*
-These functions create and release memory for XXH state.
-States must then be initialized using XXHnn_reset() before first use.
-*/
-
-
-XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, unsigned seed);
-XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
-unsigned int  XXH32_digest (const XXH32_state_t* statePtr);
-
-XXH_errorcode      XXH64_reset  (XXH64_state_t* statePtr, uint64_t seed);
-XXH_errorcode      XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
-uint64_t XXH64_digest (const XXH64_state_t* statePtr);
-
-/*
-These functions calculate the xxHash of an input provided in multiple smaller packets,
-as opposed to an input provided as a single block.
-
-XXH state space must first be allocated, using either static or dynamic method provided above.
-
-Start a new hash by initializing state with a seed, using XXHnn_reset().
-
-Then, feed the hash state by calling XXHnn_update() as many times as necessary.
-Obviously, input must be valid, meaning allocated and read accessible.
-The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
-
-Finally, you can produce a hash anytime, by using XXHnn_digest().
-This function returns the final nn-bits hash.
-You can nonetheless continue feeding the hash state with more input,
-and therefore get some new hashes, by calling again XXHnn_digest().
-
-When you are done, don't forget to free XXH state space, using typically XXHnn_freeState().
-*/
-
-
-#if defined (__cplusplus)
-}
-#endif
-
-#endif
--- a/contrib/libucl/uthash/uthash.h
+++ b/contrib/libucl/uthash/uthash.h
@ -22,12 +22,12 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

 #ifndef UTHASH_H
-#define UTHASH_H 
+#define UTHASH_H

 #include <string.h>   /* memcmp,strlen */
 #include <stddef.h>   /* ptrdiff_t */
 #include <stdlib.h>   /* exit() */
-#include "xxhash.h"
+#include "mum.h"

 /* These macros use decltype or the earlier __typeof GNU extension.
   As decltype is only available in newer compilers (VS2010 or gcc 4.3+
@ -50,7 +50,7 @@ do {
  char **_da_dst = (char**)(&(dst));                                             \
  *_da_dst = (char*)(src);                                                       \
 } while(0)
-#else 
+#else
 #define DECLTYPE_ASSIGN(dst,src)                                                 \
 do {                                                                             \
  (dst) = DECLTYPE(dst)(src);                                                    \
@ -115,12 +115,12 @@ do {
  if (!((tbl)->bloom_bv))  { uthash_fatal( "out of memory"); }                   \
  memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN);                                \
  (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE;                                       \
-} while (0) 
+} while (0)

 #define HASH_BLOOM_FREE(tbl)                                                     \
 do {                                                                             \
  uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN);                              \
-} while (0) 
+} while (0)

 #define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8] |= (1U << ((idx)%8)))
 #define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8] & (1U << ((idx)%8)))
@ -132,9 +132,9 @@ do {
  HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))

 #else
-#define HASH_BLOOM_MAKE(tbl) 
-#define HASH_BLOOM_FREE(tbl) 
-#define HASH_BLOOM_ADD(tbl,hashv) 
+#define HASH_BLOOM_MAKE(tbl)
+#define HASH_BLOOM_FREE(tbl)
+#define HASH_BLOOM_ADD(tbl,hashv)
 #define HASH_BLOOM_TEST(tbl,hashv) (1)
 #define HASH_BLOOM_BYTELEN 0
 #endif
@ -170,7 +170,7 @@ do {
  };                                                                             \
  HASH_ADD(hh,head,fieldname,keylen_in,add);                                     \
 } while(0)
- 
+
 #define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add)                            \
 do {                                                                             \
 unsigned _ha_bkt;                                                               \
@ -328,10 +328,10 @@ do {
    }                                                                            \
 } while (0)
 #else
-#define HASH_FSCK(hh,head) 
+#define HASH_FSCK(hh,head)
 #endif

-/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to 
+/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to
 * the descriptor to which this macro is defined for tuning the hash function.
 * The app can #include <unistd.h> to get the prototype for write(2). */
 #ifdef HASH_EMIT_KEYS
@ -341,12 +341,12 @@ do {
    write(HASH_EMIT_KEYS, &_klen, sizeof(_klen));                                \
    write(HASH_EMIT_KEYS, keyptr, fieldlen);                                     \
 } while (0)
-#else 
-#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)                    
+#else
+#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)
 #endif

 /* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */
-#ifdef HASH_FUNCTION 
+#ifdef HASH_FUNCTION
 #define HASH_FCN HASH_FUNCTION
 #else
 #define HASH_FCN HASH_XX
@ -356,14 +356,14 @@ do {

 #define HASH_XX(key,keylen,num_bkts,hashv,bkt)                                  \
 do {                                                                             \
-  hashv = XXH32 (key, keylen, XX_HASH_PRIME);                                    \
+  hashv = mum_hash (key, keylen, XX_HASH_PRIME);                                 \
  bkt = (hashv) & (num_bkts-1);                                                  \
 } while (0)



 /* key comparison function; return 0 if keys equal */
-#define HASH_KEYCMP(a,b,len) memcmp(a,b,len) 
+#define HASH_KEYCMP(a,b,len) memcmp(a,b,len)

 /* iterate over items in a known bucket to find desired item */
 #define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out)                       \
@ -404,36 +404,36 @@ do {
    }                                                                            \
    if (hh_del->hh_next) {                                                       \
        hh_del->hh_next->hh_prev = hh_del->hh_prev;                              \
-    }                                                                
+    }

 /* Bucket expansion has the effect of doubling the number of buckets
 * and redistributing the items into the new buckets. Ideally the
 * items will distribute more or less evenly into the new buckets
 * (the extent to which this is true is a measure of the quality of
- * the hash function as it applies to the key domain). 
- * 
+ * the hash function as it applies to the key domain).
+ *
 * With the items distributed into more buckets, the chain length
 * (item count) in each bucket is reduced. Thus by expanding buckets
- * the hash keeps a bound on the chain length. This bounded chain 
+ * the hash keeps a bound on the chain length. This bounded chain
 * length is the essence of how a hash provides constant time lookup.
- * 
+ *
 * The calculation of tbl->ideal_chain_maxlen below deserves some
 * explanation. First, keep in mind that we're calculating the ideal
 * maximum chain length based on the *new* (doubled) bucket count.
 * In fractions this is just n/b (n=number of items,b=new num buckets).
- * Since the ideal chain length is an integer, we want to calculate 
+ * Since the ideal chain length is an integer, we want to calculate
 * ceil(n/b). We don't depend on floating point arithmetic in this
 * hash, so to calculate ceil(n/b) with integers we could write
- * 
+ *
 *      ceil(n/b) = (n/b) + ((n%b)?1:0)
- * 
+ *
 * and in fact a previous version of this hash did just that.
 * But now we have improved things a bit by recognizing that b is
 * always a power of two. We keep its base 2 log handy (call it lb),
 * so now we can write this with a bit shift and logical AND:
- * 
+ *
 *      ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0)
- * 
+ *
 */
 #define HASH_EXPAND_BUCKETS(tbl)                                                 \
 do {                                                                             \
@ -485,7 +485,7 @@ do {


 /* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */
-/* Note that HASH_SORT assumes the hash handle name to be hh. 
+/* Note that HASH_SORT assumes the hash handle name to be hh.
 * HASH_SRT was added to allow the hash handle name to be passed in. */
 #define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn)
 #define HASH_SRT(hh,head,cmpfcn)                                                 \
@ -575,10 +575,10 @@ do {
 }                                                                               \
 } while (0)

-/* This function selects items from one hash into another hash. 
- * The end result is that the selected items have dual presence 
- * in both hashes. There is no copy of the items made; rather 
- * they are added into the new hash through a secondary hash 
+/* This function selects items from one hash into another hash.
+ * The end result is that the selected items have dual presence
+ * in both hashes. There is no copy of the items made; rather
+ * they are added into the new hash through a secondary hash
 * hash handle that must be present in the structure. */
 #define HASH_SELECT(hh_dst, dst, hh_src, src, cond)                              \
 do {                                                                             \
@ -638,7 +638,7 @@ do {
 #ifdef NO_DECLTYPE
 #define HASH_ITER(hh,head,el,tmp)                                                \
 for((el)=(head), (*(char**)(&(tmp)))=(char*)((head)?(head)->hh.next:NULL);       \
-  el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL)) 
+  el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL))
 #else
 #define HASH_ITER(hh,head,el,tmp)                                                \
 for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL);                 \
@ -646,7 +646,7 @@ for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL);
 #endif

 /* obtain a count of items in the hash */
-#define HASH_COUNT(head) HASH_CNT(hh,head) 
+#define HASH_COUNT(head) HASH_CNT(hh,head)
 #define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0)

 typedef struct UT_hash_bucket {
@ -655,7 +655,7 @@ typedef struct UT_hash_bucket {

   /* expand_mult is normally set to 0. In this situation, the max chain length
    * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If
-    * the bucket's chain exceeds this length, bucket expansion is triggered). 
+    * the bucket's chain exceeds this length, bucket expansion is triggered).
    * However, setting expand_mult to a non-zero value delays bucket expansion
    * (that would be triggered by additions to this particular bucket)
    * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH.
@ -663,7 +663,7 @@ typedef struct UT_hash_bucket {
    * multiplier is to reduce bucket expansions, since they are expensive, in
    * situations where we know that a particular bucket tends to be overused.
    * It is better to let its chain length grow to a longer yet-still-bounded
-    * value, than to do an O(n) bucket expansion too often. 
+    * value, than to do an O(n) bucket expansion too often.
    */
   unsigned expand_mult;

@ -689,7 +689,7 @@ typedef struct UT_hash_table {
    * hash distribution; reaching them in a chain traversal takes >ideal steps */
   unsigned nonideal_items;

-   /* ineffective expands occur when a bucket doubling was performed, but 
+   /* ineffective expands occur when a bucket doubling was performed, but
    * afterward, more than half the items in the hash had nonideal chain
    * positions. If this happens on two consecutive expansions we inhibit any
    * further expansion, as it's not helping; this happens when the hash
--- a/lib/libucl/Makefile
+++ b/lib/libucl/Makefile
@ -14,8 +14,7 @@ SRCS=		ucl_emitter_streamline.c \
 		ucl_parser.c  \
 		ucl_schema.c \
 		ucl_sexp.c \
-		ucl_util.c \
-		xxhash.c
+		ucl_util.c

 .PATH:		${LIBUCL}/src \
 		${LIBUCL}/include