Add a complete implementation of MurmurHash3. Tweak both implementations
so they match the established idiom. Document them in hash(9). MFC after: 1 month MFC with: r272906
This commit is contained in:
parent
78701de4b7
commit
99e9de871a
@ -26,7 +26,7 @@
|
||||
.\" $OpenBSD: hash.9,v 1.5 2003/04/17 05:08:39 jmc Exp $
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd September 4, 2012
|
||||
.Dd October 18, 2014
|
||||
.Dt HASH 9
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -37,8 +37,10 @@
|
||||
.Nm hash32_strn ,
|
||||
.Nm hash32_stre ,
|
||||
.Nm hash32_strne ,
|
||||
.Nm jenkins_hash ,
|
||||
.Nm jenkins_hash32 ,
|
||||
.Nm jenkins_hash
|
||||
.Nm murmur3_32_hash ,
|
||||
.Nm murmur3_32_hash32
|
||||
.Nd general kernel hashing functions
|
||||
.Sh SYNOPSIS
|
||||
.In sys/hash.h
|
||||
@ -56,6 +58,10 @@
|
||||
.Fn jenkins_hash "const void *buf" "size_t len" "uint32_t hash"
|
||||
.Ft uint32_t
|
||||
.Fn jenkins_hash32 "const uint32_t *buf" "size_t count" "uint32_t hash"
|
||||
.Ft uint32_t
|
||||
.Fn murmur3_32_hash "const void *buf" "size_t len" "uint32_t hash"
|
||||
.Ft uint32_t
|
||||
.Fn murmur3_32_hash32 "const uint32_t *buf" "size_t count" "uint32_t hash"
|
||||
.Sh DESCRIPTION
|
||||
The
|
||||
.Fn hash32
|
||||
@ -130,6 +136,16 @@ sized arrays, thus is simplier and faster.
|
||||
It accepts an array of
|
||||
.Ft uint32_t
|
||||
values in its first argument and size of this array in the second argument.
|
||||
.Pp
|
||||
The
|
||||
.Fn murmur3_32_hash
|
||||
and
|
||||
.Fn murmur3_32_hash32
|
||||
functions are similar to
|
||||
.Fn jenkins_hash
|
||||
and
|
||||
.Fn jenkins_hash32 ,
|
||||
but implement the 32-bit version of MurmurHash3.
|
||||
.Sh RETURN VALUES
|
||||
The
|
||||
.Fn hash32
|
||||
@ -185,6 +201,10 @@ The
|
||||
.Nm jenkins_hash
|
||||
functions were added in
|
||||
.Fx 10.0 .
|
||||
The
|
||||
.Nm murmur3_32_hash
|
||||
functions were added in
|
||||
.Fx 10.1 .
|
||||
.Sh AUTHORS
|
||||
The
|
||||
.Nm hash32
|
||||
@ -192,5 +212,9 @@ functions were written by
|
||||
.An Tobias Weingartner .
|
||||
The
|
||||
.Nm jenkins_hash
|
||||
functions was written by
|
||||
Bob Jenkins .
|
||||
functions were written by
|
||||
.An Bob Jenkins .
|
||||
The
|
||||
.Nm murmur3_32_hash
|
||||
functions were written by
|
||||
.An Dag-Erling Sm\(/orgrav Aq Mt des@FreeBSD.org .
|
||||
|
@ -22,6 +22,8 @@
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/hash.h>
|
||||
@ -32,27 +34,31 @@
|
||||
#define rol32(i32, n) ((i32) << (n) | (i32) >> (32 - (n)))
|
||||
|
||||
/*
|
||||
* $FreeBSD$
|
||||
* Simple implementation of the Murmur3-32 hash function optimized for
|
||||
* aligned sequences of 32-bit words. If len is not a multiple of 4, it
|
||||
* will be rounded down, droping trailer bytes.
|
||||
* Simple implementation of the Murmur3-32 hash function.
|
||||
*
|
||||
* This implementation is slow but safe. It can be made significantly
|
||||
* faster if the caller guarantees that the input is correctly aligned for
|
||||
* 32-bit reads, and slightly faster yet if the caller guarantees that the
|
||||
* length of the input is always a multiple of 4 bytes.
|
||||
*/
|
||||
uint32_t
|
||||
murmur3_aligned_32(const void *data, size_t len, uint32_t seed)
|
||||
murmur3_32_hash(const void *data, size_t len, uint32_t seed)
|
||||
{
|
||||
const uint32_t *data32;
|
||||
const uint8_t *bytes;
|
||||
uint32_t hash, k;
|
||||
size_t res;
|
||||
|
||||
/* initialize */
|
||||
len -= len % sizeof(*data32);
|
||||
/* initialization */
|
||||
bytes = data;
|
||||
res = len;
|
||||
data32 = data;
|
||||
hash = seed;
|
||||
|
||||
/* iterate */
|
||||
for (res = 0; res < len; res += sizeof(*data32), data32++) {
|
||||
k = le32toh(*data32);
|
||||
/* main loop */
|
||||
while (res >= 4) {
|
||||
/* replace with le32toh() if input is aligned */
|
||||
k = le32dec(bytes);
|
||||
bytes += 4;
|
||||
res -= 4;
|
||||
k *= 0xcc9e2d51;
|
||||
k = rol32(k, 15);
|
||||
k *= 0x1b873593;
|
||||
@ -62,6 +68,25 @@ murmur3_aligned_32(const void *data, size_t len, uint32_t seed)
|
||||
hash += 0xe6546b64;
|
||||
}
|
||||
|
||||
/* remainder */
|
||||
/* remove if input length is a multiple of 4 */
|
||||
if (res > 0) {
|
||||
k = 0;
|
||||
switch (res) {
|
||||
case 3:
|
||||
k |= bytes[2] << 16;
|
||||
case 2:
|
||||
k |= bytes[1] << 8;
|
||||
case 1:
|
||||
k |= bytes[0];
|
||||
k *= 0xcc9e2d51;
|
||||
k = rol32(k, 15);
|
||||
k *= 0x1b873593;
|
||||
hash ^= k;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* finalize */
|
||||
hash ^= (uint32_t)len;
|
||||
hash ^= hash >> 16;
|
||||
@ -72,3 +97,36 @@ murmur3_aligned_32(const void *data, size_t len, uint32_t seed)
|
||||
return (hash);
|
||||
}
|
||||
|
||||
/*
|
||||
* Simplified version of the above optimized for aligned sequences of
|
||||
* 32-bit words. The count argument is the number of words, not the
|
||||
* length in bytes.
|
||||
*/
|
||||
uint32_t
|
||||
murmur3_32_hash32(const uint32_t *data, size_t count, uint32_t seed)
|
||||
{
|
||||
uint32_t hash, k;
|
||||
size_t res;
|
||||
|
||||
/* iterate */
|
||||
for (res = count, hash = seed; res > 0; res--, data++) {
|
||||
k = le32toh(*data);
|
||||
k *= 0xcc9e2d51;
|
||||
k = rol32(k, 15);
|
||||
k *= 0x1b873593;
|
||||
hash ^= k;
|
||||
hash = rol32(hash, 13);
|
||||
hash *= 5;
|
||||
hash += 0xe6546b64;
|
||||
}
|
||||
|
||||
/* finalize */
|
||||
hash ^= (uint32_t)count;
|
||||
hash ^= hash >> 16;
|
||||
hash *= 0x85ebca6b;
|
||||
hash ^= hash >> 13;
|
||||
hash *= 0xc2b2ae35;
|
||||
hash ^= hash >> 16;
|
||||
return (hash);
|
||||
}
|
||||
|
||||
|
@ -374,9 +374,9 @@ pf_hashkey(struct pf_state_key *sk)
|
||||
{
|
||||
uint32_t h;
|
||||
|
||||
h = murmur3_aligned_32((uint32_t *)sk,
|
||||
sizeof(struct pf_state_key_cmp),
|
||||
V_pf_hashseed);
|
||||
h = murmur3_32_hash32((uint32_t *)sk,
|
||||
sizeof(struct pf_state_key_cmp)/sizeof(uint32_t),
|
||||
V_pf_hashseed);
|
||||
|
||||
return (h & pf_hashmask);
|
||||
}
|
||||
@ -388,12 +388,12 @@ pf_hashsrc(struct pf_addr *addr, sa_family_t af)
|
||||
|
||||
switch (af) {
|
||||
case AF_INET:
|
||||
h = murmur3_aligned_32((uint32_t *)&addr->v4,
|
||||
sizeof(addr->v4), V_pf_hashseed);
|
||||
h = murmur3_32_hash32((uint32_t *)&addr->v4,
|
||||
sizeof(addr->v4)/sizeof(uint32_t), V_pf_hashseed);
|
||||
break;
|
||||
case AF_INET6:
|
||||
h = murmur3_aligned_32((uint32_t *)&addr->v6,
|
||||
sizeof(addr->v6), V_pf_hashseed);
|
||||
h = murmur3_32_hash32((uint32_t *)&addr->v6,
|
||||
sizeof(addr->v6)/sizeof(uint32_t), V_pf_hashseed);
|
||||
break;
|
||||
default:
|
||||
panic("%s: unknown address family %u", __func__, af);
|
||||
|
@ -126,7 +126,8 @@ hash32_strne(const void *buf, size_t len, int end, const char **ep,
|
||||
uint32_t jenkins_hash(const void *, size_t, uint32_t);
|
||||
uint32_t jenkins_hash32(const uint32_t *, size_t, uint32_t);
|
||||
|
||||
uint32_t murmur3_aligned_32(const void *data, size_t len, uint32_t seed);
|
||||
uint32_t murmur3_32_hash(const void *, size_t, uint32_t);
|
||||
uint32_t murmur3_32_hash32(const uint32_t *, size_t, uint32_t);
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user