hash: add scalable multi-writer insertion with Intel TSX

This patch introduced scalable multi-writer Cuckoo Hash insertion
based on a split Cuckoo Search and Move operation using Intel
TSX. It can do scalable hash insertion with 22 cores with little
performance loss and negligible TSX abortion rate.

* Added an extra rte_hash flag definition to switch default single writer
  Cuckoo Hash behavior to multiwriter.
    - If HTM is available, it would use hardware feature for concurrency.
    - If HTM is not available, it would fall back to spinlock.

* Created a rte_cuckoo_hash_x86.h file to hold all x86-arch related
  cuckoo_hash functions. And rte_cuckoo_hash.c uses compile time flag to
  select x86 file or other platform-specific implementations. While HTM check
  is still done at runtime (same idea with
  RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT)

* Moved rte_hash private struct definitions to rte_cuckoo_hash.h, to allow
  rte_cuckoo_hash_x86.h or future platform dependent functions to include.

* Following new functions are created for consistent names when new platform
  TM support are added.
    - rte_hash_cuckoo_move_insert_mw_tm: do insertion with bucket movement.
    - rte_hash_cuckoo_insert_mw_tm: do insertion without bucket movement.

* One extra multi-writer test case is added.

Signed-off-by: Wei Shen <wei1.shen@intel.com>
Signed-off-by: Sameh Gobriel <sameh.gobriel@intel.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
This commit is contained in:
Wei Shen 2016-06-16 15:14:14 -07:00 committed by Thomas Monjalon
parent 5590a60241
commit be856325cb
7 changed files with 796 additions and 177 deletions

View File

@ -123,6 +123,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_thash.c
SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_perf.c
SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_functions.c
SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_scaling.c
SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_multiwriter.c
SRCS-$(CONFIG_RTE_LIBRTE_LPM) += test_lpm.c
SRCS-$(CONFIG_RTE_LIBRTE_LPM) += test_lpm_perf.c

View File

@ -0,0 +1,287 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2016 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <inttypes.h>
#include <locale.h>
#include <rte_cycles.h>
#include <rte_hash.h>
#include <rte_hash_crc.h>
#include <rte_launch.h>
#include <rte_malloc.h>
#include <rte_random.h>
#include <rte_spinlock.h>
#include "test.h"
/*
* Check condition and return an error if true. Assumes that "handle" is the
* name of the hash structure pointer to be freed.
*/
#define RETURN_IF_ERROR(cond, str, ...) do { \
if (cond) { \
printf("ERROR line %d: " str "\n", __LINE__, \
##__VA_ARGS__); \
if (handle) \
rte_hash_free(handle); \
return -1; \
} \
} while (0)
#define RTE_APP_TEST_HASH_MULTIWRITER_FAILED 0
struct {
uint32_t *keys;
uint32_t *found;
uint32_t nb_tsx_insertion;
struct rte_hash *h;
} tbl_multiwriter_test_params;
const uint32_t nb_entries = 16*1024*1024;
const uint32_t nb_total_tsx_insertion = 15*1024*1024;
uint32_t rounded_nb_total_tsx_insertion;
static rte_atomic64_t gcycles;
static rte_atomic64_t ginsertions;
static int use_htm;
static int
test_hash_multiwriter_worker(__attribute__((unused)) void *arg)
{
uint64_t i, offset;
uint32_t lcore_id = rte_lcore_id();
uint64_t begin, cycles;
offset = (lcore_id - rte_get_master_lcore())
* tbl_multiwriter_test_params.nb_tsx_insertion;
printf("Core #%d inserting %d: %'"PRId64" - %'"PRId64"\n",
lcore_id, tbl_multiwriter_test_params.nb_tsx_insertion,
offset, offset + tbl_multiwriter_test_params.nb_tsx_insertion);
begin = rte_rdtsc_precise();
for (i = offset;
i < offset + tbl_multiwriter_test_params.nb_tsx_insertion;
i++) {
if (rte_hash_add_key(tbl_multiwriter_test_params.h,
tbl_multiwriter_test_params.keys + i) < 0)
break;
}
cycles = rte_rdtsc_precise() - begin;
rte_atomic64_add(&gcycles, cycles);
rte_atomic64_add(&ginsertions, i - offset);
for (; i < offset + tbl_multiwriter_test_params.nb_tsx_insertion; i++)
tbl_multiwriter_test_params.keys[i]
= RTE_APP_TEST_HASH_MULTIWRITER_FAILED;
return 0;
}
static int
test_hash_multiwriter(void)
{
unsigned int i, rounded_nb_total_tsx_insertion;
static unsigned calledCount = 1;
uint32_t *keys;
uint32_t *found;
struct rte_hash_parameters hash_params = {
.entries = nb_entries,
.key_len = sizeof(uint32_t),
.hash_func = rte_hash_crc,
.hash_func_init_val = 0,
.socket_id = rte_socket_id(),
};
if (use_htm)
hash_params.extra_flag =
RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT
| RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD;
else
hash_params.extra_flag =
RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD;
struct rte_hash *handle;
char name[RTE_HASH_NAMESIZE];
const void *next_key;
void *next_data;
uint32_t iter = 0;
uint32_t duplicated_keys = 0;
uint32_t lost_keys = 0;
snprintf(name, 32, "test%u", calledCount++);
hash_params.name = name;
handle = rte_hash_create(&hash_params);
RETURN_IF_ERROR(handle == NULL, "hash creation failed");
tbl_multiwriter_test_params.h = handle;
tbl_multiwriter_test_params.nb_tsx_insertion =
nb_total_tsx_insertion / rte_lcore_count();
rounded_nb_total_tsx_insertion = (nb_total_tsx_insertion /
tbl_multiwriter_test_params.nb_tsx_insertion)
* tbl_multiwriter_test_params.nb_tsx_insertion;
rte_srand(rte_rdtsc());
keys = rte_malloc(NULL, sizeof(uint32_t) * nb_entries, 0);
if (keys == NULL) {
printf("RTE_MALLOC failed\n");
goto err1;
}
found = rte_zmalloc(NULL, sizeof(uint32_t) * nb_entries, 0);
if (found == NULL) {
printf("RTE_ZMALLOC failed\n");
goto err2;
}
for (i = 0; i < nb_entries; i++)
keys[i] = i;
tbl_multiwriter_test_params.keys = keys;
tbl_multiwriter_test_params.found = found;
rte_atomic64_init(&gcycles);
rte_atomic64_clear(&gcycles);
rte_atomic64_init(&ginsertions);
rte_atomic64_clear(&ginsertions);
/* Fire all threads. */
rte_eal_mp_remote_launch(test_hash_multiwriter_worker,
NULL, CALL_MASTER);
rte_eal_mp_wait_lcore();
while (rte_hash_iterate(handle, &next_key, &next_data, &iter) >= 0) {
/* Search for the key in the list of keys added .*/
i = *(const uint32_t *)next_key;
tbl_multiwriter_test_params.found[i]++;
}
for (i = 0; i < rounded_nb_total_tsx_insertion; i++) {
if (tbl_multiwriter_test_params.keys[i]
!= RTE_APP_TEST_HASH_MULTIWRITER_FAILED) {
if (tbl_multiwriter_test_params.found[i] > 1) {
duplicated_keys++;
break;
}
if (tbl_multiwriter_test_params.found[i] == 0) {
lost_keys++;
printf("key %d is lost\n", i);
break;
}
}
}
if (duplicated_keys > 0) {
printf("%d key duplicated\n", duplicated_keys);
goto err3;
}
if (lost_keys > 0) {
printf("%d key lost\n", lost_keys);
goto err3;
}
printf("No key corrupted during multiwriter insertion.\n");
unsigned long long int cycles_per_insertion =
rte_atomic64_read(&gcycles)/
rte_atomic64_read(&ginsertions);
printf(" cycles per insertion: %llu\n", cycles_per_insertion);
rte_free(tbl_multiwriter_test_params.found);
rte_free(tbl_multiwriter_test_params.keys);
rte_hash_free(handle);
return 0;
err3:
rte_free(tbl_multiwriter_test_params.found);
err2:
rte_free(tbl_multiwriter_test_params.keys);
err1:
rte_hash_free(handle);
return -1;
}
static int
test_hash_multiwriter_main(void)
{
int r = -1;
if (rte_lcore_count() == 1) {
printf("More than one lcore is required to do multiwriter test\n");
return 0;
}
setlocale(LC_NUMERIC, "");
if (!rte_tm_supported()) {
printf("Hardware transactional memory (lock elision) "
"is NOT supported\n");
} else {
printf("Hardware transactional memory (lock elision) "
"is supported\n");
printf("Test multi-writer with Hardware transactional memory\n");
use_htm = 1;
r = test_hash_multiwriter();
}
printf("Test multi-writer without Hardware transactional memory\n");
use_htm = 0;
r = test_hash_multiwriter();
return r;
}
static struct test_command hash_scaling_cmd = {
.command = "hash_multiwriter_autotest",
.callback = test_hash_multiwriter_main,
};
REGISTER_TEST_COMMAND(hash_scaling_cmd);

View File

@ -100,6 +100,18 @@ New Features
A new Crypto PMD has been added, which provides KASUMI F8 (UEA1) ciphering
and KASUMI F9 (UIA1) hashing.
* **Added multi-writer support for RTE Hash with Intel TSX.**
The following features/modifications have been added to rte_hash library:
* Enabled application developers to use an extra flag for rte_hash creation
to specify default behavior (multi-thread safe/unsafe) with rte_hash_add_key
function.
* Changed Cuckoo search algorithm to breadth first search for multi-writer
routine and split Cuckoo Search and Move operations in order to reduce
transactional code region and improve TSX performance.
* Added a hash multi-writer test case for test app.
* **Improved IP Pipeline Application.**
The following features have been added to ip_pipeline application:

View File

@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
* Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -59,12 +59,10 @@
#include <rte_compat.h>
#include "rte_hash.h"
#if defined(RTE_ARCH_X86)
#include "rte_cmp_x86.h"
#endif
#include "rte_cuckoo_hash.h"
#if defined(RTE_ARCH_ARM64)
#include "rte_cmp_arm64.h"
#if defined(RTE_ARCH_X86)
#include "rte_cuckoo_hash_x86.h"
#endif
TAILQ_HEAD(rte_hash_list, rte_tailq_entry);
@ -74,153 +72,6 @@ static struct rte_tailq_elem rte_hash_tailq = {
};
EAL_REGISTER_TAILQ(rte_hash_tailq)
/* Macro to enable/disable run-time checking of function parameters */
#if defined(RTE_LIBRTE_HASH_DEBUG)
#define RETURN_IF_TRUE(cond, retval) do { \
if (cond) \
return retval; \
} while (0)
#else
#define RETURN_IF_TRUE(cond, retval)
#endif
/* Hash function used if none is specified */
#if defined(RTE_MACHINE_CPUFLAG_SSE4_2) || defined(RTE_MACHINE_CPUFLAG_CRC32)
#include <rte_hash_crc.h>
#define DEFAULT_HASH_FUNC rte_hash_crc
#else
#include <rte_jhash.h>
#define DEFAULT_HASH_FUNC rte_jhash
#endif
/** Number of items per bucket. */
#define RTE_HASH_BUCKET_ENTRIES 4
#define NULL_SIGNATURE 0
#define KEY_ALIGNMENT 16
#define LCORE_CACHE_SIZE 8
#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
/*
* All different options to select a key compare function,
* based on the key size and custom function.
*/
enum cmp_jump_table_case {
KEY_CUSTOM = 0,
KEY_16_BYTES,
KEY_32_BYTES,
KEY_48_BYTES,
KEY_64_BYTES,
KEY_80_BYTES,
KEY_96_BYTES,
KEY_112_BYTES,
KEY_128_BYTES,
KEY_OTHER_BYTES,
NUM_KEY_CMP_CASES,
};
/*
* Table storing all different key compare functions
* (multi-process supported)
*/
const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
NULL,
rte_hash_k16_cmp_eq,
rte_hash_k32_cmp_eq,
rte_hash_k48_cmp_eq,
rte_hash_k64_cmp_eq,
rte_hash_k80_cmp_eq,
rte_hash_k96_cmp_eq,
rte_hash_k112_cmp_eq,
rte_hash_k128_cmp_eq,
memcmp
};
#else
/*
* All different options to select a key compare function,
* based on the key size and custom function.
*/
enum cmp_jump_table_case {
KEY_CUSTOM = 0,
KEY_OTHER_BYTES,
NUM_KEY_CMP_CASES,
};
/*
* Table storing all different key compare functions
* (multi-process supported)
*/
const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
NULL,
memcmp
};
#endif
struct lcore_cache {
unsigned len; /**< Cache len */
void *objs[LCORE_CACHE_SIZE]; /**< Cache objects */
} __rte_cache_aligned;
/** A hash table structure. */
struct rte_hash {
char name[RTE_HASH_NAMESIZE]; /**< Name of the hash. */
uint32_t entries; /**< Total table entries. */
uint32_t num_buckets; /**< Number of buckets in table. */
uint32_t key_len; /**< Length of hash key. */
rte_hash_function hash_func; /**< Function used to calculate hash. */
uint32_t hash_func_init_val; /**< Init value used by hash_func. */
rte_hash_cmp_eq_t rte_hash_custom_cmp_eq;
/**< Custom function used to compare keys. */
enum cmp_jump_table_case cmp_jump_table_idx;
/**< Indicates which compare function to use. */
uint32_t bucket_bitmask; /**< Bitmask for getting bucket index
from hash signature. */
uint32_t key_entry_size; /**< Size of each key entry. */
struct rte_ring *free_slots; /**< Ring that stores all indexes
of the free slots in the key table */
void *key_store; /**< Table storing all keys and data */
struct rte_hash_bucket *buckets; /**< Table with buckets storing all the
hash values and key indexes
to the key table*/
uint8_t hw_trans_mem_support; /**< Hardware transactional
memory support */
struct lcore_cache *local_free_slots;
/**< Local cache per lcore, storing some indexes of the free slots */
} __rte_cache_aligned;
/* Structure storing both primary and secondary hashes */
struct rte_hash_signatures {
union {
struct {
hash_sig_t current;
hash_sig_t alt;
};
uint64_t sig;
};
};
/* Structure that stores key-value pair */
struct rte_hash_key {
union {
uintptr_t idata;
void *pdata;
};
/* Variable key size */
char key[0];
} __attribute__((aligned(KEY_ALIGNMENT)));
/** Bucket structure */
struct rte_hash_bucket {
struct rte_hash_signatures signatures[RTE_HASH_BUCKET_ENTRIES];
/* Includes dummy key index that always contains index 0 */
uint32_t key_idx[RTE_HASH_BUCKET_ENTRIES + 1];
uint8_t flag[RTE_HASH_BUCKET_ENTRIES];
} __rte_cache_aligned;
struct rte_hash *
rte_hash_find_existing(const char *name)
{
@ -372,7 +223,7 @@ rte_hash_create(const struct rte_hash_parameters *params)
/*
* If x86 architecture is used, select appropriate compare function,
* which may use x86 instrinsics, otherwise use memcmp
* which may use x86 intrinsics, otherwise use memcmp
*/
#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
/* Select function to compare keys */
@ -431,7 +282,23 @@ rte_hash_create(const struct rte_hash_parameters *params)
h->free_slots = r;
h->hw_trans_mem_support = hw_trans_mem_support;
/* populate the free slots ring. Entry zero is reserved for key misses */
/* Turn on multi-writer only with explicit flat from user and TM
* support.
*/
if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD) {
if (h->hw_trans_mem_support) {
h->add_key = ADD_KEY_MULTIWRITER_TM;
} else {
h->add_key = ADD_KEY_MULTIWRITER;
h->multiwriter_lock = rte_malloc(NULL,
sizeof(rte_spinlock_t),
LCORE_CACHE_SIZE);
rte_spinlock_init(h->multiwriter_lock);
}
} else
h->add_key = ADD_KEY_SINGLEWRITER;
/* Populate free slots ring. Entry zero is reserved for key misses. */
for (i = 1; i < params->entries + 1; i++)
rte_ring_sp_enqueue(r, (void *)((uintptr_t) i));
@ -482,6 +349,8 @@ rte_hash_free(struct rte_hash *h)
if (h->hw_trans_mem_support)
rte_free(h->local_free_slots);
if (h->add_key == ADD_KEY_MULTIWRITER)
rte_free(h->multiwriter_lock);
rte_ring_free(h->free_slots);
rte_free(h->key_store);
rte_free(h->buckets);
@ -632,6 +501,9 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
unsigned lcore_id;
struct lcore_cache *cached_free_slots = NULL;
if (h->add_key == ADD_KEY_MULTIWRITER)
rte_spinlock_lock(h->multiwriter_lock);
prim_bucket_idx = sig & h->bucket_bitmask;
prim_bkt = &h->buckets[prim_bucket_idx];
rte_prefetch0(prim_bkt);
@ -712,35 +584,67 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
rte_memcpy(new_k->key, key, h->key_len);
new_k->pdata = data;
/* Insert new entry is there is room in the primary bucket */
for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
/* Check if slot is available */
if (likely(prim_bkt->signatures[i].sig == NULL_SIGNATURE)) {
prim_bkt->signatures[i].current = sig;
prim_bkt->signatures[i].alt = alt_hash;
prim_bkt->key_idx[i] = new_idx;
#if defined(RTE_ARCH_X86) /* currently only x86 support HTM */
if (h->add_key == ADD_KEY_MULTIWRITER_TM) {
ret = rte_hash_cuckoo_insert_mw_tm(prim_bkt,
sig, alt_hash, new_idx);
if (ret >= 0)
return new_idx - 1;
/* Primary bucket full, need to make space for new entry */
ret = rte_hash_cuckoo_make_space_mw_tm(h, prim_bkt, sig,
alt_hash, new_idx);
if (ret >= 0)
return new_idx - 1;
/* Also search secondary bucket to get better occupancy */
ret = rte_hash_cuckoo_make_space_mw_tm(h, sec_bkt, sig,
alt_hash, new_idx);
if (ret >= 0)
return new_idx - 1;
} else {
#endif
for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
/* Check if slot is available */
if (likely(prim_bkt->signatures[i].sig == NULL_SIGNATURE)) {
prim_bkt->signatures[i].current = sig;
prim_bkt->signatures[i].alt = alt_hash;
prim_bkt->key_idx[i] = new_idx;
break;
}
}
if (i != RTE_HASH_BUCKET_ENTRIES) {
if (h->add_key == ADD_KEY_MULTIWRITER)
rte_spinlock_unlock(h->multiwriter_lock);
return new_idx - 1;
}
}
/* Primary bucket is full, so we need to make space for new entry */
ret = make_space_bucket(h, prim_bkt);
/*
* After recursive function.
* Insert the new entry in the position of the pushed entry
* if successful or return error and
* store the new slot back in the ring
*/
if (ret >= 0) {
prim_bkt->signatures[ret].current = sig;
prim_bkt->signatures[ret].alt = alt_hash;
prim_bkt->key_idx[ret] = new_idx;
return new_idx - 1;
/* Primary bucket full, need to make space for new entry
* After recursive function.
* Insert the new entry in the position of the pushed entry
* if successful or return error and
* store the new slot back in the ring
*/
ret = make_space_bucket(h, prim_bkt);
if (ret >= 0) {
prim_bkt->signatures[ret].current = sig;
prim_bkt->signatures[ret].alt = alt_hash;
prim_bkt->key_idx[ret] = new_idx;
if (h->add_key == ADD_KEY_MULTIWRITER)
rte_spinlock_unlock(h->multiwriter_lock);
return new_idx - 1;
}
#if defined(RTE_ARCH_X86)
}
#endif
/* Error in addition, store new slot back in the ring and return error */
enqueue_slot_back(h, cached_free_slots, (void *)((uintptr_t) new_idx));
if (h->add_key == ADD_KEY_MULTIWRITER)
rte_spinlock_unlock(h->multiwriter_lock);
return ret;
}

View File

@ -0,0 +1,219 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2016 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* rte_cuckoo_hash.h
* This file hold Cuckoo Hash private data structures to allows include from
* platform specific files like rte_cuckoo_hash_x86.h
*/
#ifndef _RTE_CUCKOO_HASH_H_
#define _RTE_CUCKOO_HASH_H_
#if defined(RTE_ARCH_X86)
#include "rte_cmp_x86.h"
#endif
#if defined(RTE_ARCH_ARM64)
#include "rte_cmp_arm64.h"
#endif
/* Macro to enable/disable run-time checking of function parameters */
#if defined(RTE_LIBRTE_HASH_DEBUG)
#define RETURN_IF_TRUE(cond, retval) do { \
if (cond) \
return retval; \
} while (0)
#else
#define RETURN_IF_TRUE(cond, retval)
#endif
/* Hash function used if none is specified */
#if defined(RTE_MACHINE_CPUFLAG_SSE4_2) || defined(RTE_MACHINE_CPUFLAG_CRC32)
#include <rte_hash_crc.h>
#define DEFAULT_HASH_FUNC rte_hash_crc
#else
#include <rte_jhash.h>
#define DEFAULT_HASH_FUNC rte_jhash
#endif
#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
/*
* All different options to select a key compare function,
* based on the key size and custom function.
*/
enum cmp_jump_table_case {
KEY_CUSTOM = 0,
KEY_16_BYTES,
KEY_32_BYTES,
KEY_48_BYTES,
KEY_64_BYTES,
KEY_80_BYTES,
KEY_96_BYTES,
KEY_112_BYTES,
KEY_128_BYTES,
KEY_OTHER_BYTES,
NUM_KEY_CMP_CASES,
};
/*
* Table storing all different key compare functions
* (multi-process supported)
*/
const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
NULL,
rte_hash_k16_cmp_eq,
rte_hash_k32_cmp_eq,
rte_hash_k48_cmp_eq,
rte_hash_k64_cmp_eq,
rte_hash_k80_cmp_eq,
rte_hash_k96_cmp_eq,
rte_hash_k112_cmp_eq,
rte_hash_k128_cmp_eq,
memcmp
};
#else
/*
* All different options to select a key compare function,
* based on the key size and custom function.
*/
enum cmp_jump_table_case {
KEY_CUSTOM = 0,
KEY_OTHER_BYTES,
NUM_KEY_CMP_CASES,
};
/*
* Table storing all different key compare functions
* (multi-process supported)
*/
const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
NULL,
memcmp
};
#endif
enum add_key_case {
ADD_KEY_SINGLEWRITER = 0,
ADD_KEY_MULTIWRITER,
ADD_KEY_MULTIWRITER_TM,
};
/** Number of items per bucket. */
#define RTE_HASH_BUCKET_ENTRIES 4
#define NULL_SIGNATURE 0
#define KEY_ALIGNMENT 16
#define LCORE_CACHE_SIZE 64
#define RTE_HASH_BFS_QUEUE_MAX_LEN 1000
#define RTE_XABORT_CUCKOO_PATH_INVALIDED 0x4
#define RTE_HASH_TSX_MAX_RETRY 10
struct lcore_cache {
unsigned len; /**< Cache len */
void *objs[LCORE_CACHE_SIZE]; /**< Cache objects */
} __rte_cache_aligned;
/* Structure storing both primary and secondary hashes */
struct rte_hash_signatures {
union {
struct {
hash_sig_t current;
hash_sig_t alt;
};
uint64_t sig;
};
};
/* Structure that stores key-value pair */
struct rte_hash_key {
union {
uintptr_t idata;
void *pdata;
};
/* Variable key size */
char key[0];
} __attribute__((aligned(KEY_ALIGNMENT)));
/** Bucket structure */
struct rte_hash_bucket {
struct rte_hash_signatures signatures[RTE_HASH_BUCKET_ENTRIES];
/* Includes dummy key index that always contains index 0 */
uint32_t key_idx[RTE_HASH_BUCKET_ENTRIES + 1];
uint8_t flag[RTE_HASH_BUCKET_ENTRIES];
} __rte_cache_aligned;
/** A hash table structure. */
struct rte_hash {
char name[RTE_HASH_NAMESIZE]; /**< Name of the hash. */
uint32_t entries; /**< Total table entries. */
uint32_t num_buckets; /**< Number of buckets in table. */
uint32_t key_len; /**< Length of hash key. */
rte_hash_function hash_func; /**< Function used to calculate hash. */
uint32_t hash_func_init_val; /**< Init value used by hash_func. */
rte_hash_cmp_eq_t rte_hash_custom_cmp_eq;
/**< Custom function used to compare keys. */
enum cmp_jump_table_case cmp_jump_table_idx;
/**< Indicates which compare function to use. */
uint32_t bucket_bitmask; /**< Bitmask for getting bucket index
from hash signature. */
uint32_t key_entry_size; /**< Size of each key entry. */
struct rte_ring *free_slots; /**< Ring that stores all indexes
of the free slots in the key table */
void *key_store; /**< Table storing all keys and data */
struct rte_hash_bucket *buckets; /**< Table with buckets storing all the
hash values and key indexes
to the key table*/
uint8_t hw_trans_mem_support; /**< Hardware transactional
memory support */
struct lcore_cache *local_free_slots;
/**< Local cache per lcore, storing some indexes of the free slots */
enum add_key_case add_key; /**< Multi-writer hash add behavior */
rte_spinlock_t *multiwriter_lock; /**< Multi-writer spinlock for w/o TM */
} __rte_cache_aligned;
struct queue_node {
struct rte_hash_bucket *bkt; /* Current bucket on the bfs search */
struct queue_node *prev; /* Parent(bucket) in search path */
int prev_slot; /* Parent(slot) in search path */
};
#endif

View File

@ -0,0 +1,193 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2016 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* rte_cuckoo_hash_x86.h
* This file holds all x86 specific Cuckoo Hash functions
*/
/* Only tries to insert at one bucket (@prim_bkt) without trying to push
* buckets around
*/
static inline unsigned
rte_hash_cuckoo_insert_mw_tm(struct rte_hash_bucket *prim_bkt,
hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx)
{
unsigned i, status;
unsigned try = 0;
while (try < RTE_HASH_TSX_MAX_RETRY) {
status = rte_xbegin();
if (likely(status == RTE_XBEGIN_STARTED)) {
/* Insert new entry if there is room in the primary
* bucket.
*/
for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
/* Check if slot is available */
if (likely(prim_bkt->signatures[i].sig ==
NULL_SIGNATURE)) {
prim_bkt->signatures[i].current = sig;
prim_bkt->signatures[i].alt = alt_hash;
prim_bkt->key_idx[i] = new_idx;
break;
}
}
rte_xend();
if (i != RTE_HASH_BUCKET_ENTRIES)
return 0;
break; /* break off try loop if transaction commits */
} else {
/* If we abort we give up this cuckoo path. */
try++;
rte_pause();
}
}
return -1;
}
/* Shift buckets along provided cuckoo_path (@leaf and @leaf_slot) and fill
* the path head with new entry (sig, alt_hash, new_idx)
*/
static inline int
rte_hash_cuckoo_move_insert_mw_tm(const struct rte_hash *h,
struct queue_node *leaf, uint32_t leaf_slot,
hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx)
{
unsigned try = 0;
unsigned status;
uint32_t prev_alt_bkt_idx;
struct queue_node *prev_node, *curr_node = leaf;
struct rte_hash_bucket *prev_bkt, *curr_bkt = leaf->bkt;
uint32_t prev_slot, curr_slot = leaf_slot;
while (try < RTE_HASH_TSX_MAX_RETRY) {
status = rte_xbegin();
if (likely(status == RTE_XBEGIN_STARTED)) {
while (likely(curr_node->prev != NULL)) {
prev_node = curr_node->prev;
prev_bkt = prev_node->bkt;
prev_slot = curr_node->prev_slot;
prev_alt_bkt_idx
= prev_bkt->signatures[prev_slot].alt
& h->bucket_bitmask;
if (unlikely(&h->buckets[prev_alt_bkt_idx]
!= curr_bkt)) {
rte_xabort(RTE_XABORT_CUCKOO_PATH_INVALIDED);
}
/* Need to swap current/alt sig to allow later
* Cuckoo insert to move elements back to its
* primary bucket if available
*/
curr_bkt->signatures[curr_slot].alt =
prev_bkt->signatures[prev_slot].current;
curr_bkt->signatures[curr_slot].current =
prev_bkt->signatures[prev_slot].alt;
curr_bkt->key_idx[curr_slot]
= prev_bkt->key_idx[prev_slot];
curr_slot = prev_slot;
curr_node = prev_node;
curr_bkt = curr_node->bkt;
}
curr_bkt->signatures[curr_slot].current = sig;
curr_bkt->signatures[curr_slot].alt = alt_hash;
curr_bkt->key_idx[curr_slot] = new_idx;
rte_xend();
return 0;
}
/* If we abort we give up this cuckoo path, since most likely it's
* no longer valid as TSX detected data conflict
*/
try++;
rte_pause();
}
return -1;
}
/*
* Make space for new key, using bfs Cuckoo Search and Multi-Writer safe
* Cuckoo
*/
static inline int
rte_hash_cuckoo_make_space_mw_tm(const struct rte_hash *h,
struct rte_hash_bucket *bkt,
hash_sig_t sig, hash_sig_t alt_hash,
uint32_t new_idx)
{
unsigned i;
struct queue_node queue[RTE_HASH_BFS_QUEUE_MAX_LEN];
struct queue_node *tail, *head;
struct rte_hash_bucket *curr_bkt, *alt_bkt;
tail = queue;
head = queue + 1;
tail->bkt = bkt;
tail->prev = NULL;
tail->prev_slot = -1;
/* Cuckoo bfs Search */
while (likely(tail != head && head <
queue + RTE_HASH_BFS_QUEUE_MAX_LEN - 4)) {
curr_bkt = tail->bkt;
for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
if (curr_bkt->signatures[i].sig == NULL_SIGNATURE) {
if (likely(rte_hash_cuckoo_move_insert_mw_tm(h,
tail, i, sig,
alt_hash, new_idx) == 0))
return 0;
}
/* Enqueue new node and keep prev node info */
alt_bkt = &(h->buckets[curr_bkt->signatures[i].alt
& h->bucket_bitmask]);
head->bkt = alt_bkt;
head->prev = tail;
head->prev_slot = i;
head++;
}
tail++;
}
return -ENOSPC;
}

View File

@ -60,6 +60,9 @@ extern "C" {
/** Enable Hardware transactional memory support. */
#define RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT 0x01
/** Default behavior of insertion, single writer/multi writer */
#define RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD 0x02
/** Signature of key that is stored internally. */
typedef uint32_t hash_sig_t;