From 99e9de871aee402a1166cdecb957fe6c8c99da13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dag-Erling=20Sm=C3=B8rgrav?= Date: Sat, 18 Oct 2014 22:15:11 +0000 Subject: [PATCH] Add a complete implementation of MurmurHash3. Tweak both implementations so they match the established idiom. Document them in hash(9). MFC after: 1 month MFC with: r272906 --- share/man/man9/hash.9 | 32 ++++++++++++++-- sys/libkern/murmur3_32.c | 82 ++++++++++++++++++++++++++++++++++------ sys/netpfil/pf/pf.c | 14 +++---- sys/sys/hash.h | 3 +- 4 files changed, 107 insertions(+), 24 deletions(-) diff --git a/share/man/man9/hash.9 b/share/man/man9/hash.9 index 7e48da900d8a..961be0b54e2c 100644 --- a/share/man/man9/hash.9 +++ b/share/man/man9/hash.9 @@ -26,7 +26,7 @@ .\" $OpenBSD: hash.9,v 1.5 2003/04/17 05:08:39 jmc Exp $ .\" $FreeBSD$ .\" -.Dd September 4, 2012 +.Dd October 18, 2014 .Dt HASH 9 .Os .Sh NAME @@ -37,8 +37,10 @@ .Nm hash32_strn , .Nm hash32_stre , .Nm hash32_strne , +.Nm jenkins_hash , .Nm jenkins_hash32 , -.Nm jenkins_hash +.Nm murmur3_32_hash , +.Nm murmur3_32_hash32 .Nd general kernel hashing functions .Sh SYNOPSIS .In sys/hash.h @@ -56,6 +58,10 @@ .Fn jenkins_hash "const void *buf" "size_t len" "uint32_t hash" .Ft uint32_t .Fn jenkins_hash32 "const uint32_t *buf" "size_t count" "uint32_t hash" +.Ft uint32_t +.Fn murmur3_32_hash "const void *buf" "size_t len" "uint32_t hash" +.Ft uint32_t +.Fn murmur3_32_hash32 "const uint32_t *buf" "size_t count" "uint32_t hash" .Sh DESCRIPTION The .Fn hash32 @@ -130,6 +136,16 @@ sized arrays, thus is simplier and faster. It accepts an array of .Ft uint32_t values in its first argument and size of this array in the second argument. +.Pp +The +.Fn murmur3_32_hash +and +.Fn murmur3_32_hash32 +functions are similar to +.Fn jenkins_hash +and +.Fn jenkins_hash32 , +but implement the 32-bit version of MurmurHash3. .Sh RETURN VALUES The .Fn hash32 @@ -185,6 +201,10 @@ The .Nm jenkins_hash functions were added in .Fx 10.0 . +The +.Nm murmur3_32_hash +functions were added in +.Fx 10.1 . .Sh AUTHORS The .Nm hash32 @@ -192,5 +212,9 @@ functions were written by .An Tobias Weingartner . The .Nm jenkins_hash -functions was written by -Bob Jenkins . +functions were written by +.An Bob Jenkins . +The +.Nm murmur3_32_hash +functions were written by +.An Dag-Erling Sm\(/orgrav Aq Mt des@FreeBSD.org . diff --git a/sys/libkern/murmur3_32.c b/sys/libkern/murmur3_32.c index 05ce2c59d01b..ef2a19221383 100644 --- a/sys/libkern/murmur3_32.c +++ b/sys/libkern/murmur3_32.c @@ -22,6 +22,8 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. + * + * $FreeBSD$ */ #include @@ -32,27 +34,31 @@ #define rol32(i32, n) ((i32) << (n) | (i32) >> (32 - (n))) /* - * $FreeBSD$ - * Simple implementation of the Murmur3-32 hash function optimized for - * aligned sequences of 32-bit words. If len is not a multiple of 4, it - * will be rounded down, droping trailer bytes. + * Simple implementation of the Murmur3-32 hash function. + * + * This implementation is slow but safe. It can be made significantly + * faster if the caller guarantees that the input is correctly aligned for + * 32-bit reads, and slightly faster yet if the caller guarantees that the + * length of the input is always a multiple of 4 bytes. */ uint32_t -murmur3_aligned_32(const void *data, size_t len, uint32_t seed) +murmur3_32_hash(const void *data, size_t len, uint32_t seed) { - const uint32_t *data32; + const uint8_t *bytes; uint32_t hash, k; size_t res; - /* initialize */ - len -= len % sizeof(*data32); + /* initialization */ + bytes = data; res = len; - data32 = data; hash = seed; - /* iterate */ - for (res = 0; res < len; res += sizeof(*data32), data32++) { - k = le32toh(*data32); + /* main loop */ + while (res >= 4) { + /* replace with le32toh() if input is aligned */ + k = le32dec(bytes); + bytes += 4; + res -= 4; k *= 0xcc9e2d51; k = rol32(k, 15); k *= 0x1b873593; @@ -62,6 +68,25 @@ murmur3_aligned_32(const void *data, size_t len, uint32_t seed) hash += 0xe6546b64; } + /* remainder */ + /* remove if input length is a multiple of 4 */ + if (res > 0) { + k = 0; + switch (res) { + case 3: + k |= bytes[2] << 16; + case 2: + k |= bytes[1] << 8; + case 1: + k |= bytes[0]; + k *= 0xcc9e2d51; + k = rol32(k, 15); + k *= 0x1b873593; + hash ^= k; + break; + } + } + /* finalize */ hash ^= (uint32_t)len; hash ^= hash >> 16; @@ -72,3 +97,36 @@ murmur3_aligned_32(const void *data, size_t len, uint32_t seed) return (hash); } +/* + * Simplified version of the above optimized for aligned sequences of + * 32-bit words. The count argument is the number of words, not the + * length in bytes. + */ +uint32_t +murmur3_32_hash32(const uint32_t *data, size_t count, uint32_t seed) +{ + uint32_t hash, k; + size_t res; + + /* iterate */ + for (res = count, hash = seed; res > 0; res--, data++) { + k = le32toh(*data); + k *= 0xcc9e2d51; + k = rol32(k, 15); + k *= 0x1b873593; + hash ^= k; + hash = rol32(hash, 13); + hash *= 5; + hash += 0xe6546b64; + } + + /* finalize */ + hash ^= (uint32_t)count; + hash ^= hash >> 16; + hash *= 0x85ebca6b; + hash ^= hash >> 13; + hash *= 0xc2b2ae35; + hash ^= hash >> 16; + return (hash); +} + diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c index ac4a154ac347..16cd7fbfe96f 100644 --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -374,9 +374,9 @@ pf_hashkey(struct pf_state_key *sk) { uint32_t h; - h = murmur3_aligned_32((uint32_t *)sk, - sizeof(struct pf_state_key_cmp), - V_pf_hashseed); + h = murmur3_32_hash32((uint32_t *)sk, + sizeof(struct pf_state_key_cmp)/sizeof(uint32_t), + V_pf_hashseed); return (h & pf_hashmask); } @@ -388,12 +388,12 @@ pf_hashsrc(struct pf_addr *addr, sa_family_t af) switch (af) { case AF_INET: - h = murmur3_aligned_32((uint32_t *)&addr->v4, - sizeof(addr->v4), V_pf_hashseed); + h = murmur3_32_hash32((uint32_t *)&addr->v4, + sizeof(addr->v4)/sizeof(uint32_t), V_pf_hashseed); break; case AF_INET6: - h = murmur3_aligned_32((uint32_t *)&addr->v6, - sizeof(addr->v6), V_pf_hashseed); + h = murmur3_32_hash32((uint32_t *)&addr->v6, + sizeof(addr->v6)/sizeof(uint32_t), V_pf_hashseed); break; default: panic("%s: unknown address family %u", __func__, af); diff --git a/sys/sys/hash.h b/sys/sys/hash.h index e2e008bfa853..8abf17bb3683 100644 --- a/sys/sys/hash.h +++ b/sys/sys/hash.h @@ -126,7 +126,8 @@ hash32_strne(const void *buf, size_t len, int end, const char **ep, uint32_t jenkins_hash(const void *, size_t, uint32_t); uint32_t jenkins_hash32(const uint32_t *, size_t, uint32_t); -uint32_t murmur3_aligned_32(const void *data, size_t len, uint32_t seed); +uint32_t murmur3_32_hash(const void *, size_t, uint32_t); +uint32_t murmur3_32_hash32(const uint32_t *, size_t, uint32_t); #endif /* _KERNEL */