Add a complete implementation of MurmurHash3. Tweak both implementations

so they match the established idiom.  Document them in hash(9).

MFC after:	1 month
MFC with:	r272906
This commit is contained in:
Dag-Erling Smørgrav 2014-10-18 22:15:11 +00:00
parent 78701de4b7
commit 99e9de871a
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=273268
4 changed files with 107 additions and 24 deletions

View File

@ -26,7 +26,7 @@
.\" $OpenBSD: hash.9,v 1.5 2003/04/17 05:08:39 jmc Exp $
.\" $FreeBSD$
.\"
.Dd September 4, 2012
.Dd October 18, 2014
.Dt HASH 9
.Os
.Sh NAME
@ -37,8 +37,10 @@
.Nm hash32_strn ,
.Nm hash32_stre ,
.Nm hash32_strne ,
.Nm jenkins_hash ,
.Nm jenkins_hash32 ,
.Nm jenkins_hash
.Nm murmur3_32_hash ,
.Nm murmur3_32_hash32
.Nd general kernel hashing functions
.Sh SYNOPSIS
.In sys/hash.h
@ -56,6 +58,10 @@
.Fn jenkins_hash "const void *buf" "size_t len" "uint32_t hash"
.Ft uint32_t
.Fn jenkins_hash32 "const uint32_t *buf" "size_t count" "uint32_t hash"
.Ft uint32_t
.Fn murmur3_32_hash "const void *buf" "size_t len" "uint32_t hash"
.Ft uint32_t
.Fn murmur3_32_hash32 "const uint32_t *buf" "size_t count" "uint32_t hash"
.Sh DESCRIPTION
The
.Fn hash32
@ -130,6 +136,16 @@ sized arrays, thus is simplier and faster.
It accepts an array of
.Ft uint32_t
values in its first argument and size of this array in the second argument.
.Pp
The
.Fn murmur3_32_hash
and
.Fn murmur3_32_hash32
functions are similar to
.Fn jenkins_hash
and
.Fn jenkins_hash32 ,
but implement the 32-bit version of MurmurHash3.
.Sh RETURN VALUES
The
.Fn hash32
@ -185,6 +201,10 @@ The
.Nm jenkins_hash
functions were added in
.Fx 10.0 .
The
.Nm murmur3_32_hash
functions were added in
.Fx 10.1 .
.Sh AUTHORS
The
.Nm hash32
@ -192,5 +212,9 @@ functions were written by
.An Tobias Weingartner .
The
.Nm jenkins_hash
functions was written by
Bob Jenkins .
functions were written by
.An Bob Jenkins .
The
.Nm murmur3_32_hash
functions were written by
.An Dag-Erling Sm\(/orgrav Aq Mt des@FreeBSD.org .

View File

@ -22,6 +22,8 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/hash.h>
@ -32,27 +34,31 @@
#define rol32(i32, n) ((i32) << (n) | (i32) >> (32 - (n)))
/*
* $FreeBSD$
* Simple implementation of the Murmur3-32 hash function optimized for
* aligned sequences of 32-bit words. If len is not a multiple of 4, it
* will be rounded down, droping trailer bytes.
* Simple implementation of the Murmur3-32 hash function.
*
* This implementation is slow but safe. It can be made significantly
* faster if the caller guarantees that the input is correctly aligned for
* 32-bit reads, and slightly faster yet if the caller guarantees that the
* length of the input is always a multiple of 4 bytes.
*/
uint32_t
murmur3_aligned_32(const void *data, size_t len, uint32_t seed)
murmur3_32_hash(const void *data, size_t len, uint32_t seed)
{
const uint32_t *data32;
const uint8_t *bytes;
uint32_t hash, k;
size_t res;
/* initialize */
len -= len % sizeof(*data32);
/* initialization */
bytes = data;
res = len;
data32 = data;
hash = seed;
/* iterate */
for (res = 0; res < len; res += sizeof(*data32), data32++) {
k = le32toh(*data32);
/* main loop */
while (res >= 4) {
/* replace with le32toh() if input is aligned */
k = le32dec(bytes);
bytes += 4;
res -= 4;
k *= 0xcc9e2d51;
k = rol32(k, 15);
k *= 0x1b873593;
@ -62,6 +68,25 @@ murmur3_aligned_32(const void *data, size_t len, uint32_t seed)
hash += 0xe6546b64;
}
/* remainder */
/* remove if input length is a multiple of 4 */
if (res > 0) {
k = 0;
switch (res) {
case 3:
k |= bytes[2] << 16;
case 2:
k |= bytes[1] << 8;
case 1:
k |= bytes[0];
k *= 0xcc9e2d51;
k = rol32(k, 15);
k *= 0x1b873593;
hash ^= k;
break;
}
}
/* finalize */
hash ^= (uint32_t)len;
hash ^= hash >> 16;
@ -72,3 +97,36 @@ murmur3_aligned_32(const void *data, size_t len, uint32_t seed)
return (hash);
}
/*
* Simplified version of the above optimized for aligned sequences of
* 32-bit words. The count argument is the number of words, not the
* length in bytes.
*/
uint32_t
murmur3_32_hash32(const uint32_t *data, size_t count, uint32_t seed)
{
uint32_t hash, k;
size_t res;
/* iterate */
for (res = count, hash = seed; res > 0; res--, data++) {
k = le32toh(*data);
k *= 0xcc9e2d51;
k = rol32(k, 15);
k *= 0x1b873593;
hash ^= k;
hash = rol32(hash, 13);
hash *= 5;
hash += 0xe6546b64;
}
/* finalize */
hash ^= (uint32_t)count;
hash ^= hash >> 16;
hash *= 0x85ebca6b;
hash ^= hash >> 13;
hash *= 0xc2b2ae35;
hash ^= hash >> 16;
return (hash);
}

View File

@ -374,9 +374,9 @@ pf_hashkey(struct pf_state_key *sk)
{
uint32_t h;
h = murmur3_aligned_32((uint32_t *)sk,
sizeof(struct pf_state_key_cmp),
V_pf_hashseed);
h = murmur3_32_hash32((uint32_t *)sk,
sizeof(struct pf_state_key_cmp)/sizeof(uint32_t),
V_pf_hashseed);
return (h & pf_hashmask);
}
@ -388,12 +388,12 @@ pf_hashsrc(struct pf_addr *addr, sa_family_t af)
switch (af) {
case AF_INET:
h = murmur3_aligned_32((uint32_t *)&addr->v4,
sizeof(addr->v4), V_pf_hashseed);
h = murmur3_32_hash32((uint32_t *)&addr->v4,
sizeof(addr->v4)/sizeof(uint32_t), V_pf_hashseed);
break;
case AF_INET6:
h = murmur3_aligned_32((uint32_t *)&addr->v6,
sizeof(addr->v6), V_pf_hashseed);
h = murmur3_32_hash32((uint32_t *)&addr->v6,
sizeof(addr->v6)/sizeof(uint32_t), V_pf_hashseed);
break;
default:
panic("%s: unknown address family %u", __func__, af);

View File

@ -126,7 +126,8 @@ hash32_strne(const void *buf, size_t len, int end, const char **ep,
uint32_t jenkins_hash(const void *, size_t, uint32_t);
uint32_t jenkins_hash32(const uint32_t *, size_t, uint32_t);
uint32_t murmur3_aligned_32(const void *data, size_t len, uint32_t seed);
uint32_t murmur3_32_hash(const void *, size_t, uint32_t);
uint32_t murmur3_32_hash32(const uint32_t *, size_t, uint32_t);
#endif /* _KERNEL */