hash: support lock-free extendable bucket
This patch enables lock-free read-write concurrency support for extendable bucket feature. Suggested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> Reviewed-by: Gavin Hu <gavin.hu@arm.com> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> Acked-by: Yipeng Wang <yipeng1.wang@intel.com>
This commit is contained in:
parent
237060c4ad
commit
f401363d98
@ -108,9 +108,9 @@ Extendable Bucket Functionality support
|
||||
An extra flag is used to enable this functionality (flag is not set by default). When the (RTE_HASH_EXTRA_FLAGS_EXT_TABLE) is set and
|
||||
in the very unlikely case due to excessive hash collisions that a key has failed to be inserted, the hash table bucket is extended with a linked
|
||||
list to insert these failed keys. This feature is important for the workloads (e.g. telco workloads) that need to insert up to 100% of the
|
||||
hash table size and can't tolerate any key insertion failure (even if very few). Currently the extendable bucket is not supported
|
||||
with the lock-free concurrency implementation (RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF).
|
||||
|
||||
hash table size and can't tolerate any key insertion failure (even if very few).
|
||||
Please note that with the 'lock free read/write concurrency' flag enabled, users need to call 'rte_hash_free_key_with_position' API in order to free the empty buckets and
|
||||
deleted keys, to maintain the 100% capacity guarantee.
|
||||
|
||||
Implementation Details (non Extendable Bucket Case)
|
||||
---------------------------------------------------
|
||||
|
@ -141,6 +141,7 @@ rte_hash_create(const struct rte_hash_parameters *params)
|
||||
unsigned int readwrite_concur_support = 0;
|
||||
unsigned int writer_takes_lock = 0;
|
||||
unsigned int no_free_on_del = 0;
|
||||
uint32_t *ext_bkt_to_free = NULL;
|
||||
uint32_t *tbl_chng_cnt = NULL;
|
||||
unsigned int readwrite_concur_lf_support = 0;
|
||||
|
||||
@ -171,15 +172,6 @@ rte_hash_create(const struct rte_hash_parameters *params)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if ((params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) &&
|
||||
(params->extra_flag & RTE_HASH_EXTRA_FLAGS_EXT_TABLE)) {
|
||||
rte_errno = EINVAL;
|
||||
RTE_LOG(ERR, HASH, "rte_hash_create: extendable bucket "
|
||||
"feature not supported with rw concurrency "
|
||||
"lock free\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Check extra flags field to check extra options. */
|
||||
if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT)
|
||||
hw_trans_mem_support = 1;
|
||||
@ -303,6 +295,16 @@ rte_hash_create(const struct rte_hash_parameters *params)
|
||||
*/
|
||||
for (i = 1; i <= num_buckets; i++)
|
||||
rte_ring_sp_enqueue(r_ext, (void *)((uintptr_t) i));
|
||||
|
||||
if (readwrite_concur_lf_support) {
|
||||
ext_bkt_to_free = rte_zmalloc(NULL, sizeof(uint32_t) *
|
||||
num_key_slots, 0);
|
||||
if (ext_bkt_to_free == NULL) {
|
||||
RTE_LOG(ERR, HASH, "ext bkt to free memory allocation "
|
||||
"failed\n");
|
||||
goto err_unlock;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const uint32_t key_entry_size =
|
||||
@ -394,6 +396,7 @@ rte_hash_create(const struct rte_hash_parameters *params)
|
||||
default_hash_func : params->hash_func;
|
||||
h->key_store = k;
|
||||
h->free_slots = r;
|
||||
h->ext_bkt_to_free = ext_bkt_to_free;
|
||||
h->tbl_chng_cnt = tbl_chng_cnt;
|
||||
*h->tbl_chng_cnt = 0;
|
||||
h->hw_trans_mem_support = hw_trans_mem_support;
|
||||
@ -448,6 +451,7 @@ rte_hash_create(const struct rte_hash_parameters *params)
|
||||
rte_free(buckets_ext);
|
||||
rte_free(k);
|
||||
rte_free(tbl_chng_cnt);
|
||||
rte_free(ext_bkt_to_free);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -489,6 +493,7 @@ rte_hash_free(struct rte_hash *h)
|
||||
rte_free(h->buckets);
|
||||
rte_free(h->buckets_ext);
|
||||
rte_free(h->tbl_chng_cnt);
|
||||
rte_free(h->ext_bkt_to_free);
|
||||
rte_free(h);
|
||||
rte_free(te);
|
||||
}
|
||||
@ -804,7 +809,7 @@ rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h,
|
||||
__atomic_store_n(h->tbl_chng_cnt,
|
||||
*h->tbl_chng_cnt + 1,
|
||||
__ATOMIC_RELEASE);
|
||||
/* The stores to sig_alt and sig_current should not
|
||||
/* The store to sig_current should not
|
||||
* move above the store to tbl_chng_cnt.
|
||||
*/
|
||||
__atomic_thread_fence(__ATOMIC_RELEASE);
|
||||
@ -836,7 +841,7 @@ rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h,
|
||||
__atomic_store_n(h->tbl_chng_cnt,
|
||||
*h->tbl_chng_cnt + 1,
|
||||
__ATOMIC_RELEASE);
|
||||
/* The stores to sig_alt and sig_current should not
|
||||
/* The store to sig_current should not
|
||||
* move above the store to tbl_chng_cnt.
|
||||
*/
|
||||
__atomic_thread_fence(__ATOMIC_RELEASE);
|
||||
@ -1059,7 +1064,12 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
|
||||
/* Check if slot is available */
|
||||
if (likely(cur_bkt->key_idx[i] == EMPTY_SLOT)) {
|
||||
cur_bkt->sig_current[i] = short_sig;
|
||||
cur_bkt->key_idx[i] = new_idx;
|
||||
/* Store to signature should not leak after
|
||||
* the store to key_idx
|
||||
*/
|
||||
__atomic_store_n(&cur_bkt->key_idx[i],
|
||||
new_idx,
|
||||
__ATOMIC_RELEASE);
|
||||
__hash_rw_writer_unlock(h);
|
||||
return new_idx - 1;
|
||||
}
|
||||
@ -1077,7 +1087,12 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
|
||||
bkt_id = (uint32_t)((uintptr_t)ext_bkt_id) - 1;
|
||||
/* Use the first location of the new bucket */
|
||||
(h->buckets_ext[bkt_id]).sig_current[0] = short_sig;
|
||||
(h->buckets_ext[bkt_id]).key_idx[0] = new_idx;
|
||||
/* Store to signature should not leak after
|
||||
* the store to key_idx
|
||||
*/
|
||||
__atomic_store_n(&(h->buckets_ext[bkt_id]).key_idx[0],
|
||||
new_idx,
|
||||
__ATOMIC_RELEASE);
|
||||
/* Link the new bucket to sec bucket linked list */
|
||||
last = rte_hash_get_last_bkt(sec_bkt);
|
||||
last->next = &h->buckets_ext[bkt_id];
|
||||
@ -1371,7 +1386,8 @@ remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
|
||||
* empty slot.
|
||||
*/
|
||||
static inline void
|
||||
__rte_hash_compact_ll(struct rte_hash_bucket *cur_bkt, int pos) {
|
||||
__rte_hash_compact_ll(const struct rte_hash *h,
|
||||
struct rte_hash_bucket *cur_bkt, int pos) {
|
||||
int i;
|
||||
struct rte_hash_bucket *last_bkt;
|
||||
|
||||
@ -1382,10 +1398,27 @@ __rte_hash_compact_ll(struct rte_hash_bucket *cur_bkt, int pos) {
|
||||
|
||||
for (i = RTE_HASH_BUCKET_ENTRIES - 1; i >= 0; i--) {
|
||||
if (last_bkt->key_idx[i] != EMPTY_SLOT) {
|
||||
cur_bkt->key_idx[pos] = last_bkt->key_idx[i];
|
||||
cur_bkt->sig_current[pos] = last_bkt->sig_current[i];
|
||||
__atomic_store_n(&cur_bkt->key_idx[pos],
|
||||
last_bkt->key_idx[i],
|
||||
__ATOMIC_RELEASE);
|
||||
if (h->readwrite_concur_lf_support) {
|
||||
/* Inform the readers that the table has changed
|
||||
* Since there is one writer, load acquire on
|
||||
* tbl_chng_cnt is not required.
|
||||
*/
|
||||
__atomic_store_n(h->tbl_chng_cnt,
|
||||
*h->tbl_chng_cnt + 1,
|
||||
__ATOMIC_RELEASE);
|
||||
/* The store to sig_current should
|
||||
* not move above the store to tbl_chng_cnt.
|
||||
*/
|
||||
__atomic_thread_fence(__ATOMIC_RELEASE);
|
||||
}
|
||||
last_bkt->sig_current[i] = NULL_SIGNATURE;
|
||||
last_bkt->key_idx[i] = EMPTY_SLOT;
|
||||
__atomic_store_n(&last_bkt->key_idx[i],
|
||||
EMPTY_SLOT,
|
||||
__ATOMIC_RELEASE);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -1454,7 +1487,7 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
|
||||
/* look for key in primary bucket */
|
||||
ret = search_and_remove(h, key, prim_bkt, short_sig, &pos);
|
||||
if (ret != -1) {
|
||||
__rte_hash_compact_ll(prim_bkt, pos);
|
||||
__rte_hash_compact_ll(h, prim_bkt, pos);
|
||||
last_bkt = prim_bkt->next;
|
||||
prev_bkt = prim_bkt;
|
||||
goto return_bkt;
|
||||
@ -1466,7 +1499,7 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
|
||||
FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
|
||||
ret = search_and_remove(h, key, cur_bkt, short_sig, &pos);
|
||||
if (ret != -1) {
|
||||
__rte_hash_compact_ll(cur_bkt, pos);
|
||||
__rte_hash_compact_ll(h, cur_bkt, pos);
|
||||
last_bkt = sec_bkt->next;
|
||||
prev_bkt = sec_bkt;
|
||||
goto return_bkt;
|
||||
@ -1493,11 +1526,24 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
|
||||
}
|
||||
/* found empty bucket and recycle */
|
||||
if (i == RTE_HASH_BUCKET_ENTRIES) {
|
||||
prev_bkt->next = last_bkt->next = NULL;
|
||||
prev_bkt->next = NULL;
|
||||
uint32_t index = last_bkt - h->buckets_ext + 1;
|
||||
rte_ring_sp_enqueue(h->free_ext_bkts, (void *)(uintptr_t)index);
|
||||
/* Recycle the empty bkt if
|
||||
* no_free_on_del is disabled.
|
||||
*/
|
||||
if (h->no_free_on_del)
|
||||
/* Store index of an empty ext bkt to be recycled
|
||||
* on calling rte_hash_del_xxx APIs.
|
||||
* When lock free read-write concurrency is enabled,
|
||||
* an empty ext bkt cannot be put into free list
|
||||
* immediately (as readers might be using it still).
|
||||
* Hence freeing of the ext bkt is piggy-backed to
|
||||
* freeing of the key index.
|
||||
*/
|
||||
h->ext_bkt_to_free[ret] = index;
|
||||
else
|
||||
rte_ring_sp_enqueue(h->free_ext_bkts, (void *)(uintptr_t)index);
|
||||
}
|
||||
|
||||
__hash_rw_writer_unlock(h);
|
||||
return ret;
|
||||
}
|
||||
@ -1550,6 +1596,14 @@ rte_hash_free_key_with_position(const struct rte_hash *h,
|
||||
/* Out of bounds */
|
||||
if (position >= total_entries)
|
||||
return -EINVAL;
|
||||
if (h->ext_table_support && h->readwrite_concur_lf_support) {
|
||||
uint32_t index = h->ext_bkt_to_free[position];
|
||||
if (index) {
|
||||
/* Recycle empty ext bkt to free list. */
|
||||
rte_ring_sp_enqueue(h->free_ext_bkts, (void *)(uintptr_t)index);
|
||||
h->ext_bkt_to_free[position] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (h->use_local_cache) {
|
||||
lcore_id = rte_lcore_id();
|
||||
@ -1882,6 +1936,9 @@ __rte_hash_lookup_bulk_lf(const struct rte_hash *h, const void **keys,
|
||||
rte_prefetch0(secondary_bkt[i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < num_keys; i++)
|
||||
positions[i] = -ENOENT;
|
||||
|
||||
do {
|
||||
/* Load the table change counter before the lookup
|
||||
* starts. Acquire semantics will make sure that
|
||||
@ -1926,7 +1983,6 @@ __rte_hash_lookup_bulk_lf(const struct rte_hash *h, const void **keys,
|
||||
|
||||
/* Compare keys, first hits in primary first */
|
||||
for (i = 0; i < num_keys; i++) {
|
||||
positions[i] = -ENOENT;
|
||||
while (prim_hitmask[i]) {
|
||||
uint32_t hit_index =
|
||||
__builtin_ctzl(prim_hitmask[i])
|
||||
@ -1999,6 +2055,35 @@ __rte_hash_lookup_bulk_lf(const struct rte_hash *h, const void **keys,
|
||||
continue;
|
||||
}
|
||||
|
||||
/* all found, do not need to go through ext bkt */
|
||||
if (hits == ((1ULL << num_keys) - 1)) {
|
||||
if (hit_mask != NULL)
|
||||
*hit_mask = hits;
|
||||
return;
|
||||
}
|
||||
/* need to check ext buckets for match */
|
||||
if (h->ext_table_support) {
|
||||
for (i = 0; i < num_keys; i++) {
|
||||
if ((hits & (1ULL << i)) != 0)
|
||||
continue;
|
||||
next_bkt = secondary_bkt[i]->next;
|
||||
FOR_EACH_BUCKET(cur_bkt, next_bkt) {
|
||||
if (data != NULL)
|
||||
ret = search_one_bucket_lf(h,
|
||||
keys[i], sig[i],
|
||||
&data[i], cur_bkt);
|
||||
else
|
||||
ret = search_one_bucket_lf(h,
|
||||
keys[i], sig[i],
|
||||
NULL, cur_bkt);
|
||||
if (ret != -1) {
|
||||
positions[i] = ret;
|
||||
hits |= 1ULL << i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/* The loads of sig_current in compare_signatures
|
||||
* should not move below the load from tbl_chng_cnt.
|
||||
*/
|
||||
@ -2015,34 +2100,6 @@ __rte_hash_lookup_bulk_lf(const struct rte_hash *h, const void **keys,
|
||||
__ATOMIC_ACQUIRE);
|
||||
} while (cnt_b != cnt_a);
|
||||
|
||||
/* all found, do not need to go through ext bkt */
|
||||
if ((hits == ((1ULL << num_keys) - 1)) || !h->ext_table_support) {
|
||||
if (hit_mask != NULL)
|
||||
*hit_mask = hits;
|
||||
__hash_rw_reader_unlock(h);
|
||||
return;
|
||||
}
|
||||
|
||||
/* need to check ext buckets for match */
|
||||
for (i = 0; i < num_keys; i++) {
|
||||
if ((hits & (1ULL << i)) != 0)
|
||||
continue;
|
||||
next_bkt = secondary_bkt[i]->next;
|
||||
FOR_EACH_BUCKET(cur_bkt, next_bkt) {
|
||||
if (data != NULL)
|
||||
ret = search_one_bucket_lf(h, keys[i],
|
||||
sig[i], &data[i], cur_bkt);
|
||||
else
|
||||
ret = search_one_bucket_lf(h, keys[i],
|
||||
sig[i], NULL, cur_bkt);
|
||||
if (ret != -1) {
|
||||
positions[i] = ret;
|
||||
hits |= 1ULL << i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (hit_mask != NULL)
|
||||
*hit_mask = hits;
|
||||
}
|
||||
|
@ -211,6 +211,13 @@ struct rte_hash {
|
||||
rte_rwlock_t *readwrite_lock; /**< Read-write lock thread-safety. */
|
||||
struct rte_hash_bucket *buckets_ext; /**< Extra buckets array */
|
||||
struct rte_ring *free_ext_bkts; /**< Ring of indexes of free buckets */
|
||||
/* Stores index of an empty ext bkt to be recycled on calling
|
||||
* rte_hash_del_xxx APIs. When lock free read-write concurrency is
|
||||
* enabled, an empty ext bkt cannot be put into free list immediately
|
||||
* (as readers might be using it still). Hence freeing of the ext bkt
|
||||
* is piggy-backed to freeing of the key index.
|
||||
*/
|
||||
uint32_t *ext_bkt_to_free;
|
||||
uint32_t *tbl_chng_cnt;
|
||||
/**< Indicates if the hash table changed from last read. */
|
||||
} __rte_cache_aligned;
|
||||
|
Loading…
Reference in New Issue
Block a user