Add a complete implementation of MurmurHash3. Tweak both implementations

so they match the established idiom.  Document them in hash(9).

MFC after:	1 month
MFC with:	r272906
This commit is contained in:
Dag-Erling Smørgrav 2014-10-18 22:15:11 +00:00
parent 78701de4b7
commit 99e9de871a
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=273268
4 changed files with 107 additions and 24 deletions

View File

@ -26,7 +26,7 @@
.\" $OpenBSD: hash.9,v 1.5 2003/04/17 05:08:39 jmc Exp $ .\" $OpenBSD: hash.9,v 1.5 2003/04/17 05:08:39 jmc Exp $
.\" $FreeBSD$ .\" $FreeBSD$
.\" .\"
.Dd September 4, 2012 .Dd October 18, 2014
.Dt HASH 9 .Dt HASH 9
.Os .Os
.Sh NAME .Sh NAME
@ -37,8 +37,10 @@
.Nm hash32_strn , .Nm hash32_strn ,
.Nm hash32_stre , .Nm hash32_stre ,
.Nm hash32_strne , .Nm hash32_strne ,
.Nm jenkins_hash ,
.Nm jenkins_hash32 , .Nm jenkins_hash32 ,
.Nm jenkins_hash .Nm murmur3_32_hash ,
.Nm murmur3_32_hash32
.Nd general kernel hashing functions .Nd general kernel hashing functions
.Sh SYNOPSIS .Sh SYNOPSIS
.In sys/hash.h .In sys/hash.h
@ -56,6 +58,10 @@
.Fn jenkins_hash "const void *buf" "size_t len" "uint32_t hash" .Fn jenkins_hash "const void *buf" "size_t len" "uint32_t hash"
.Ft uint32_t .Ft uint32_t
.Fn jenkins_hash32 "const uint32_t *buf" "size_t count" "uint32_t hash" .Fn jenkins_hash32 "const uint32_t *buf" "size_t count" "uint32_t hash"
.Ft uint32_t
.Fn murmur3_32_hash "const void *buf" "size_t len" "uint32_t hash"
.Ft uint32_t
.Fn murmur3_32_hash32 "const uint32_t *buf" "size_t count" "uint32_t hash"
.Sh DESCRIPTION .Sh DESCRIPTION
The The
.Fn hash32 .Fn hash32
@ -130,6 +136,16 @@ sized arrays, thus is simplier and faster.
It accepts an array of It accepts an array of
.Ft uint32_t .Ft uint32_t
values in its first argument and size of this array in the second argument. values in its first argument and size of this array in the second argument.
.Pp
The
.Fn murmur3_32_hash
and
.Fn murmur3_32_hash32
functions are similar to
.Fn jenkins_hash
and
.Fn jenkins_hash32 ,
but implement the 32-bit version of MurmurHash3.
.Sh RETURN VALUES .Sh RETURN VALUES
The The
.Fn hash32 .Fn hash32
@ -185,6 +201,10 @@ The
.Nm jenkins_hash .Nm jenkins_hash
functions were added in functions were added in
.Fx 10.0 . .Fx 10.0 .
The
.Nm murmur3_32_hash
functions were added in
.Fx 10.1 .
.Sh AUTHORS .Sh AUTHORS
The The
.Nm hash32 .Nm hash32
@ -192,5 +212,9 @@ functions were written by
.An Tobias Weingartner . .An Tobias Weingartner .
The The
.Nm jenkins_hash .Nm jenkins_hash
functions was written by functions were written by
Bob Jenkins . .An Bob Jenkins .
The
.Nm murmur3_32_hash
functions were written by
.An Dag-Erling Sm\(/orgrav Aq Mt des@FreeBSD.org .

View File

@ -22,6 +22,8 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE. * SUCH DAMAGE.
*
* $FreeBSD$
*/ */
#include <sys/hash.h> #include <sys/hash.h>
@ -32,27 +34,31 @@
#define rol32(i32, n) ((i32) << (n) | (i32) >> (32 - (n))) #define rol32(i32, n) ((i32) << (n) | (i32) >> (32 - (n)))
/* /*
* $FreeBSD$ * Simple implementation of the Murmur3-32 hash function.
* Simple implementation of the Murmur3-32 hash function optimized for *
* aligned sequences of 32-bit words. If len is not a multiple of 4, it * This implementation is slow but safe. It can be made significantly
* will be rounded down, droping trailer bytes. * faster if the caller guarantees that the input is correctly aligned for
* 32-bit reads, and slightly faster yet if the caller guarantees that the
* length of the input is always a multiple of 4 bytes.
*/ */
uint32_t uint32_t
murmur3_aligned_32(const void *data, size_t len, uint32_t seed) murmur3_32_hash(const void *data, size_t len, uint32_t seed)
{ {
const uint32_t *data32; const uint8_t *bytes;
uint32_t hash, k; uint32_t hash, k;
size_t res; size_t res;
/* initialize */ /* initialization */
len -= len % sizeof(*data32); bytes = data;
res = len; res = len;
data32 = data;
hash = seed; hash = seed;
/* iterate */ /* main loop */
for (res = 0; res < len; res += sizeof(*data32), data32++) { while (res >= 4) {
k = le32toh(*data32); /* replace with le32toh() if input is aligned */
k = le32dec(bytes);
bytes += 4;
res -= 4;
k *= 0xcc9e2d51; k *= 0xcc9e2d51;
k = rol32(k, 15); k = rol32(k, 15);
k *= 0x1b873593; k *= 0x1b873593;
@ -62,6 +68,25 @@ murmur3_aligned_32(const void *data, size_t len, uint32_t seed)
hash += 0xe6546b64; hash += 0xe6546b64;
} }
/* remainder */
/* remove if input length is a multiple of 4 */
if (res > 0) {
k = 0;
switch (res) {
case 3:
k |= bytes[2] << 16;
case 2:
k |= bytes[1] << 8;
case 1:
k |= bytes[0];
k *= 0xcc9e2d51;
k = rol32(k, 15);
k *= 0x1b873593;
hash ^= k;
break;
}
}
/* finalize */ /* finalize */
hash ^= (uint32_t)len; hash ^= (uint32_t)len;
hash ^= hash >> 16; hash ^= hash >> 16;
@ -72,3 +97,36 @@ murmur3_aligned_32(const void *data, size_t len, uint32_t seed)
return (hash); return (hash);
} }
/*
* Simplified version of the above optimized for aligned sequences of
* 32-bit words. The count argument is the number of words, not the
* length in bytes.
*/
uint32_t
murmur3_32_hash32(const uint32_t *data, size_t count, uint32_t seed)
{
uint32_t hash, k;
size_t res;
/* iterate */
for (res = count, hash = seed; res > 0; res--, data++) {
k = le32toh(*data);
k *= 0xcc9e2d51;
k = rol32(k, 15);
k *= 0x1b873593;
hash ^= k;
hash = rol32(hash, 13);
hash *= 5;
hash += 0xe6546b64;
}
/* finalize */
hash ^= (uint32_t)count;
hash ^= hash >> 16;
hash *= 0x85ebca6b;
hash ^= hash >> 13;
hash *= 0xc2b2ae35;
hash ^= hash >> 16;
return (hash);
}

View File

@ -374,9 +374,9 @@ pf_hashkey(struct pf_state_key *sk)
{ {
uint32_t h; uint32_t h;
h = murmur3_aligned_32((uint32_t *)sk, h = murmur3_32_hash32((uint32_t *)sk,
sizeof(struct pf_state_key_cmp), sizeof(struct pf_state_key_cmp)/sizeof(uint32_t),
V_pf_hashseed); V_pf_hashseed);
return (h & pf_hashmask); return (h & pf_hashmask);
} }
@ -388,12 +388,12 @@ pf_hashsrc(struct pf_addr *addr, sa_family_t af)
switch (af) { switch (af) {
case AF_INET: case AF_INET:
h = murmur3_aligned_32((uint32_t *)&addr->v4, h = murmur3_32_hash32((uint32_t *)&addr->v4,
sizeof(addr->v4), V_pf_hashseed); sizeof(addr->v4)/sizeof(uint32_t), V_pf_hashseed);
break; break;
case AF_INET6: case AF_INET6:
h = murmur3_aligned_32((uint32_t *)&addr->v6, h = murmur3_32_hash32((uint32_t *)&addr->v6,
sizeof(addr->v6), V_pf_hashseed); sizeof(addr->v6)/sizeof(uint32_t), V_pf_hashseed);
break; break;
default: default:
panic("%s: unknown address family %u", __func__, af); panic("%s: unknown address family %u", __func__, af);

View File

@ -126,7 +126,8 @@ hash32_strne(const void *buf, size_t len, int end, const char **ep,
uint32_t jenkins_hash(const void *, size_t, uint32_t); uint32_t jenkins_hash(const void *, size_t, uint32_t);
uint32_t jenkins_hash32(const uint32_t *, size_t, uint32_t); uint32_t jenkins_hash32(const uint32_t *, size_t, uint32_t);
uint32_t murmur3_aligned_32(const void *data, size_t len, uint32_t seed); uint32_t murmur3_32_hash(const void *, size_t, uint32_t);
uint32_t murmur3_32_hash32(const uint32_t *, size_t, uint32_t);
#endif /* _KERNEL */ #endif /* _KERNEL */