hash: add predictable RSS

This patch adds predictable RSS API.
It is based on the idea of searching partial Toeplitz hash collisions.

Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
Acked-by: Yipeng Wang <yipeng1.wang@intel.com>
Acked-by: John McNamara <john.mcnamara@intel.com>
This commit is contained in:
Vladimir Medvedkin 2021-04-19 16:59:51 +01:00 committed by Thomas Monjalon
parent 534fe5f339
commit 28ebff11c2
10 changed files with 4520 additions and 7 deletions

View File

@ -1430,6 +1430,7 @@ Hashes
M: Yipeng Wang <yipeng1.wang@intel.com>
M: Sameh Gobriel <sameh.gobriel@intel.com>
M: Bruce Richardson <bruce.richardson@intel.com>
M: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
F: lib/librte_hash/
F: doc/guides/prog_guide/hash_lib.rst
F: doc/guides/prog_guide/toeplitz_hash_lib.rst

View File

@ -5,11 +5,15 @@
#include <rte_common.h>
#include <rte_eal.h>
#include <rte_ip.h>
#include <rte_random.h>
#include "test.h"
#include <rte_thash.h>
#define HASH_MSK(reta_sz) ((1 << reta_sz) - 1)
#define TUPLE_SZ (RTE_THASH_V4_L4_LEN * 4)
struct test_thash_v4 {
uint32_t dst_ip;
uint32_t src_ip;
@ -75,7 +79,7 @@ uint8_t default_rss_key[] = {
};
static int
test_thash(void)
test_toeplitz_hash_calc(void)
{
uint32_t i, j;
union rte_thash_tuple tuple;
@ -100,7 +104,7 @@ test_thash(void)
RTE_THASH_V4_L4_LEN, default_rss_key);
if ((rss_l3 != v4_tbl[i].hash_l3) ||
(rss_l3l4 != v4_tbl[i].hash_l3l4))
return -1;
return -TEST_FAILED;
/*Calculate hash with converted key*/
rss_l3 = rte_softrss_be((uint32_t *)&tuple,
RTE_THASH_V4_L3_LEN, rss_key_be);
@ -108,7 +112,7 @@ test_thash(void)
RTE_THASH_V4_L4_LEN, rss_key_be);
if ((rss_l3 != v4_tbl[i].hash_l3) ||
(rss_l3l4 != v4_tbl[i].hash_l3l4))
return -1;
return -TEST_FAILED;
}
for (i = 0; i < RTE_DIM(v6_tbl); i++) {
/*Fill ipv6 hdr*/
@ -127,7 +131,7 @@ test_thash(void)
RTE_THASH_V6_L4_LEN, default_rss_key);
if ((rss_l3 != v6_tbl[i].hash_l3) ||
(rss_l3l4 != v6_tbl[i].hash_l3l4))
return -1;
return -TEST_FAILED;
/*Calculate hash with converted key*/
rss_l3 = rte_softrss_be((uint32_t *)&tuple,
RTE_THASH_V6_L3_LEN, rss_key_be);
@ -135,9 +139,462 @@ test_thash(void)
RTE_THASH_V6_L4_LEN, rss_key_be);
if ((rss_l3 != v6_tbl[i].hash_l3) ||
(rss_l3l4 != v6_tbl[i].hash_l3l4))
return -1;
return -TEST_FAILED;
}
return 0;
return TEST_SUCCESS;
}
static int
test_create_invalid(void)
{
struct rte_thash_ctx *ctx;
int key_len = 40;
int reta_sz = 7;
ctx = rte_thash_init_ctx(NULL, key_len, reta_sz, NULL, 0);
RTE_TEST_ASSERT(ctx == NULL,
"Call succeeded with invalid parameters\n");
ctx = rte_thash_init_ctx("test", 0, reta_sz, NULL, 0);
RTE_TEST_ASSERT(ctx == NULL,
"Call succeeded with invalid parameters\n");
ctx = rte_thash_init_ctx(NULL, key_len, 1, NULL, 0);
RTE_TEST_ASSERT(ctx == NULL,
"Call succeeded with invalid parameters\n");
ctx = rte_thash_init_ctx(NULL, key_len, 17, NULL, 0);
RTE_TEST_ASSERT(ctx == NULL,
"Call succeeded with invalid parameters\n");
return TEST_SUCCESS;
}
static int
test_multiple_create(void)
{
struct rte_thash_ctx *ctx;
int key_len = 40;
int reta_sz = 7;
int i;
for (i = 0; i < 100; i++) {
ctx = rte_thash_init_ctx("test", key_len, reta_sz, NULL, 0);
RTE_TEST_ASSERT(ctx != NULL, "Can not create CTX\n");
rte_thash_free_ctx(ctx);
}
return TEST_SUCCESS;
}
static int
test_free_null(void)
{
struct rte_thash_ctx *ctx;
ctx = rte_thash_init_ctx("test", 40, 7, NULL, 0);
RTE_TEST_ASSERT(ctx != NULL, "Can not create CTX\n");
rte_thash_free_ctx(ctx);
rte_thash_free_ctx(NULL);
return TEST_SUCCESS;
}
static int
test_add_invalid_helper(void)
{
struct rte_thash_ctx *ctx;
const int key_len = 40;
int reta_sz = 7;
int ret;
ctx = rte_thash_init_ctx("test", key_len, reta_sz, NULL, 0);
RTE_TEST_ASSERT(ctx != NULL, "can not create thash ctx\n");
ret = rte_thash_add_helper(NULL, "test", reta_sz, 0);
RTE_TEST_ASSERT(ret == -EINVAL,
"Call succeeded with invalid parameters\n");
ret = rte_thash_add_helper(ctx, NULL, reta_sz, 0);
RTE_TEST_ASSERT(ret == -EINVAL,
"Call succeeded with invalid parameters\n");
ret = rte_thash_add_helper(ctx, "test", reta_sz - 1, 0);
RTE_TEST_ASSERT(ret == -EINVAL,
"Call succeeded with invalid parameters\n");
ret = rte_thash_add_helper(ctx, "test", reta_sz, key_len * 8);
RTE_TEST_ASSERT(ret == -EINVAL,
"Call succeeded with invalid parameters\n");
ret = rte_thash_add_helper(ctx, "first_range", reta_sz, 0);
RTE_TEST_ASSERT(ret == 0, "Can not create helper\n");
ret = rte_thash_add_helper(ctx, "first_range", reta_sz, 0);
RTE_TEST_ASSERT(ret == -EEXIST,
"Call succeeded with duplicated name\n");
/*
* Create second helper with offset 3 * reta_sz.
* Note first_range helper created range in key:
* [0, 32 + length{= reta_sz} - 1), i.e [0, 37).
* second range is [44, 81)
*/
ret = rte_thash_add_helper(ctx, "second_range", reta_sz,
32 + 2 * reta_sz);
RTE_TEST_ASSERT(ret == 0, "Can not create helper\n");
/*
* Try to create overlapping with first_ and second_ ranges,
* i.e. [6, 49)
*/
ret = rte_thash_add_helper(ctx, "third_range", 2 * reta_sz, reta_sz);
RTE_TEST_ASSERT(ret == -EEXIST,
"Call succeeded with overlapping ranges\n");
rte_thash_free_ctx(ctx);
return TEST_SUCCESS;
}
static int
test_find_existing(void)
{
struct rte_thash_ctx *ctx, *ret_ctx;
ctx = rte_thash_init_ctx("test", 40, 7, NULL, 0);
RTE_TEST_ASSERT(ctx != NULL, "can not create thash ctx\n");
ret_ctx = rte_thash_find_existing("test");
RTE_TEST_ASSERT(ret_ctx != NULL, "can not find existing ctx\n");
rte_thash_free_ctx(ctx);
return TEST_SUCCESS;
}
static int
test_get_helper(void)
{
struct rte_thash_ctx *ctx;
struct rte_thash_subtuple_helper *h;
int ret;
ctx = rte_thash_init_ctx("test", 40, 7, NULL, 0);
RTE_TEST_ASSERT(ctx != NULL, "Can not create thash ctx\n");
h = rte_thash_get_helper(NULL, "first_range");
RTE_TEST_ASSERT(h == NULL, "Call succeeded with invalid parameters\n");
h = rte_thash_get_helper(ctx, NULL);
RTE_TEST_ASSERT(h == NULL, "Call succeeded with invalid parameters\n");
ret = rte_thash_add_helper(ctx, "first_range", 8, 0);
RTE_TEST_ASSERT(ret == 0, "Can not create helper\n");
h = rte_thash_get_helper(ctx, "first_range");
RTE_TEST_ASSERT(h != NULL, "Can not find helper\n");
rte_thash_free_ctx(ctx);
return TEST_SUCCESS;
}
static int
test_period_overflow(void)
{
struct rte_thash_ctx *ctx;
int reta_sz = 7; /* reflects polynomial degree */
int ret;
/* first create without RTE_THASH_IGNORE_PERIOD_OVERFLOW flag */
ctx = rte_thash_init_ctx("test", 40, reta_sz, NULL, 0);
RTE_TEST_ASSERT(ctx != NULL, "Can not create thash ctx\n");
/* requested range > (2^reta_sz) - 1 */
ret = rte_thash_add_helper(ctx, "test", (1 << reta_sz), 0);
RTE_TEST_ASSERT(ret == -ENOSPC,
"Call succeeded with invalid parameters\n");
/* requested range == len + 32 - 1, smaller than (2^reta_sz) - 1 */
ret = rte_thash_add_helper(ctx, "test", (1 << reta_sz) - 32, 0);
RTE_TEST_ASSERT(ret == 0, "Can not create helper\n");
rte_thash_free_ctx(ctx);
/* create with RTE_THASH_IGNORE_PERIOD_OVERFLOW flag */
ctx = rte_thash_init_ctx("test", 40, reta_sz, NULL,
RTE_THASH_IGNORE_PERIOD_OVERFLOW);
RTE_TEST_ASSERT(ctx != NULL, "Can not create thash ctx\n");
/* requested range > (2^reta_sz - 1) */
ret = rte_thash_add_helper(ctx, "test", (1 << reta_sz) + 10, 0);
RTE_TEST_ASSERT(ret == 0, "Can not create helper\n");
rte_thash_free_ctx(ctx);
return TEST_SUCCESS;
}
static int
test_predictable_rss_min_seq(void)
{
struct rte_thash_ctx *ctx;
struct rte_thash_subtuple_helper *h;
const int key_len = 40;
int reta_sz = 6;
uint8_t initial_key[key_len];
const uint8_t *new_key;
int ret;
union rte_thash_tuple tuple;
uint32_t orig_hash, adj_hash, adj;
unsigned int desired_value = 27 & HASH_MSK(reta_sz);
uint16_t port_value = 22;
memset(initial_key, 0, key_len);
ctx = rte_thash_init_ctx("test", key_len, reta_sz, initial_key,
RTE_THASH_MINIMAL_SEQ);
RTE_TEST_ASSERT(ctx != NULL, "can not create thash ctx\n");
ret = rte_thash_add_helper(ctx, "snat", sizeof(uint16_t) * 8,
offsetof(union rte_thash_tuple, v4.sport) * 8);
RTE_TEST_ASSERT(ret == 0, "can not add helper, ret %d\n", ret);
h = rte_thash_get_helper(ctx, "snat");
RTE_TEST_ASSERT(h != NULL, "can not find helper\n");
new_key = rte_thash_get_key(ctx);
tuple.v4.src_addr = RTE_IPV4(0, 0, 0, 0);
tuple.v4.dst_addr = RTE_IPV4(0, 0, 0, 0);
tuple.v4.sport = 0;
tuple.v4.sport = rte_cpu_to_be_16(port_value);
tuple.v4.dport = 0;
tuple.v4.sctp_tag = rte_be_to_cpu_32(tuple.v4.sctp_tag);
orig_hash = rte_softrss((uint32_t *)&tuple,
RTE_THASH_V4_L4_LEN, new_key);
adj = rte_thash_get_complement(h, orig_hash, desired_value);
tuple.v4.sctp_tag = rte_cpu_to_be_32(tuple.v4.sctp_tag);
tuple.v4.sport ^= rte_cpu_to_be_16(adj);
tuple.v4.sctp_tag = rte_be_to_cpu_32(tuple.v4.sctp_tag);
adj_hash = rte_softrss((uint32_t *)&tuple,
RTE_THASH_V4_L4_LEN, new_key);
RTE_TEST_ASSERT((adj_hash & HASH_MSK(reta_sz)) ==
desired_value, "bad desired value\n");
rte_thash_free_ctx(ctx);
return TEST_SUCCESS;
}
/*
* This test creates 7 subranges in the following order:
* range_one = [56, 95), len = 8, offset = 56
* range_two = [64, 103), len = 8, offset = 64
* range_three = [120, 159), len = 8, offset = 120
* range_four = [48, 87), len = 8, offset = 48
* range_five = [57, 95), len = 7, offset = 57
* range_six = [40, 111), len = 40, offset = 40
* range_seven = [0, 39), len = 8, offset = 0
*/
struct range {
const char *name;
int len;
int offset;
int byte_idx;
};
struct range rng_arr[] = {
{"one", 8, 56, 7},
{"two", 8, 64, 8},
{"three", 8, 120, 15},
{"four", 8, 48, 6},
{"six", 40, 40, 9},
{"five", 7, 57, 7},
{"seven", 8, 0, 0}
};
static int
test_predictable_rss_multirange(void)
{
struct rte_thash_ctx *ctx;
struct rte_thash_subtuple_helper *h[RTE_DIM(rng_arr)];
const uint8_t *new_key;
const int key_len = 40;
int reta_sz = 7;
unsigned int i, j, k;
int ret;
uint32_t desired_value = rte_rand() & HASH_MSK(reta_sz);
uint8_t tuples[RTE_DIM(rng_arr)][16] = { {0} };
uint32_t *ptr;
uint32_t hashes[RTE_DIM(rng_arr)];
uint32_t adj_hashes[RTE_DIM(rng_arr)];
uint32_t adj;
ctx = rte_thash_init_ctx("test", key_len, reta_sz, NULL, 0);
RTE_TEST_ASSERT(ctx != NULL, "can not create thash ctx\n");
for (i = 0; i < RTE_DIM(rng_arr); i++) {
ret = rte_thash_add_helper(ctx, rng_arr[i].name,
rng_arr[i].len, rng_arr[i].offset);
RTE_TEST_ASSERT(ret == 0, "can not add helper\n");
h[i] = rte_thash_get_helper(ctx, rng_arr[i].name);
RTE_TEST_ASSERT(h[i] != NULL, "can not find helper\n");
}
new_key = rte_thash_get_key(ctx);
/*
* calculate hashes, complements, then adjust keys with
* complements and recalsulate hashes
*/
for (i = 0; i < RTE_DIM(rng_arr); i++) {
for (k = 0; k < 100; k++) {
/* init with random keys */
ptr = (uint32_t *)&tuples[i][0];
for (j = 0; j < 4; j++)
ptr[j] = rte_rand();
/* convert keys from BE to CPU byte order */
for (j = 0; j < 4; j++)
ptr[j] = rte_be_to_cpu_32(ptr[j]);
hashes[i] = rte_softrss(ptr, 4, new_key);
adj = rte_thash_get_complement(h[i], hashes[i],
desired_value);
/* convert back to BE to adjust the value */
for (j = 0; j < 4; j++)
ptr[j] = rte_cpu_to_be_32(ptr[j]);
tuples[i][rng_arr[i].byte_idx] ^= adj;
for (j = 0; j < 4; j++)
ptr[j] = rte_be_to_cpu_32(ptr[j]);
adj_hashes[i] = rte_softrss(ptr, 4, new_key);
RTE_TEST_ASSERT((adj_hashes[i] & HASH_MSK(reta_sz)) ==
desired_value,
"bad desired value for %d tuple\n", i);
}
}
rte_thash_free_ctx(ctx);
return TEST_SUCCESS;
}
static int
cmp_tuple_eq(void *userdata, uint8_t *tuple)
{
return memcmp(userdata, tuple, TUPLE_SZ);
}
static int
test_adjust_tuple(void)
{
struct rte_thash_ctx *ctx;
struct rte_thash_subtuple_helper *h;
const int key_len = 40;
const uint8_t *new_key;
uint8_t tuple[TUPLE_SZ];
uint32_t tmp_tuple[TUPLE_SZ / sizeof(uint32_t)];
uint32_t tuple_copy[TUPLE_SZ / sizeof(uint32_t)];
uint32_t hash;
int reta_sz = CHAR_BIT;
int ret;
unsigned int i, desired_value = rte_rand() & HASH_MSK(reta_sz);
memset(tuple, 0xab, TUPLE_SZ);
ctx = rte_thash_init_ctx("test", key_len, reta_sz, NULL, 0);
RTE_TEST_ASSERT(ctx != NULL, "can not create thash ctx\n");
/*
* set offset to be in the middle of a byte
* set size of the subtuple to be 2 * rets_sz
* to have the room for random bits
*/
ret = rte_thash_add_helper(ctx, "test", reta_sz * 2,
(5 * CHAR_BIT) + 4);
RTE_TEST_ASSERT(ret == 0, "can not add helper, ret %d\n", ret);
new_key = rte_thash_get_key(ctx);
h = rte_thash_get_helper(ctx, "test");
RTE_TEST_ASSERT(h != NULL, "can not find helper\n");
ret = rte_thash_adjust_tuple(ctx, h, tuple, TUPLE_SZ, desired_value,
1, NULL, NULL);
RTE_TEST_ASSERT(ret == 0, "can not adjust tuple, ret %d\n", ret);
for (i = 0; i < (TUPLE_SZ / 4); i++)
tmp_tuple[i] =
rte_be_to_cpu_32(*(uint32_t *)&tuple[i * 4]);
hash = rte_softrss(tmp_tuple, TUPLE_SZ / 4, new_key);
RTE_TEST_ASSERT((hash & HASH_MSK(reta_sz)) ==
desired_value, "bad desired value\n");
/* Pass previously calculated tuple to callback function */
memcpy(tuple_copy, tuple, TUPLE_SZ);
memset(tuple, 0xab, TUPLE_SZ);
ret = rte_thash_adjust_tuple(ctx, h, tuple, TUPLE_SZ, desired_value,
1, cmp_tuple_eq, tuple_copy);
RTE_TEST_ASSERT(ret == -EEXIST,
"adjust tuple didn't indicate collision\n");
/*
* Make the function to generate random bits into subtuple
* after first adjustment attempt.
*/
memset(tuple, 0xab, TUPLE_SZ);
ret = rte_thash_adjust_tuple(ctx, h, tuple, TUPLE_SZ, desired_value,
2, cmp_tuple_eq, tuple_copy);
RTE_TEST_ASSERT(ret == 0, "can not adjust tuple, ret %d\n", ret);
for (i = 0; i < (TUPLE_SZ / 4); i++)
tmp_tuple[i] =
rte_be_to_cpu_32(*(uint32_t *)&tuple[i * 4]);
hash = rte_softrss(tmp_tuple, TUPLE_SZ / 4, new_key);
RTE_TEST_ASSERT((hash & HASH_MSK(reta_sz)) ==
desired_value, "bad desired value\n");
rte_thash_free_ctx(ctx);
return TEST_SUCCESS;
}
static struct unit_test_suite thash_tests = {
.suite_name = "thash autotest",
.setup = NULL,
.teardown = NULL,
.unit_test_cases = {
TEST_CASE(test_toeplitz_hash_calc),
TEST_CASE(test_create_invalid),
TEST_CASE(test_multiple_create),
TEST_CASE(test_free_null),
TEST_CASE(test_add_invalid_helper),
TEST_CASE(test_find_existing),
TEST_CASE(test_get_helper),
TEST_CASE(test_period_overflow),
TEST_CASE(test_predictable_rss_min_seq),
TEST_CASE(test_predictable_rss_multirange),
TEST_CASE(test_adjust_tuple),
TEST_CASES_END()
}
};
static int
test_thash(void)
{
return unit_test_suite_runner(&thash_tests);
}
REGISTER_TEST_COMMAND(thash_autotest, test_thash);

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 35 KiB

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 35 KiB

View File

@ -36,3 +36,250 @@ The ``rte_softrss()`` function expects the ``rss_key``
to be exactly the same as the one installed on the NIC.
The ``rte_softrss_be`` function is a faster implementation,
but it expects ``rss_key`` to be converted to the host byte order.
Predictable RSS
---------------
In some usecases it is useful to have a way to find partial collisions of the
Toeplitz hash function. In figure :numref:`figure_rss_queue_assign` only a few
of the least significant bits (LSB) of the hash value are used to indicate an
entry in the RSS Redirection Table (ReTa) and thus the index of the queue. So,
in this case it would be useful to find another tuple whose hash has the same
LSB's as the hash from the original tuple.
For example:
- In the case of SNAT (Source Network Address Translation) it is possible to
find a special source port number on translation so that the hash of
returning packets, of the given connection, will have desired LSB's.
- In the case of MPLS (Multiprotocol Label Switching), if the MPLS tag is used
in the hash calculation, the Label Switching router can allocate a special
MPLS tag to bind an LSP (Label Switching Path) to a given queue. This method
can be used with the allocation of IPSec SPI, VXLan VNI, etc., to bind the
tunnel to the desired queue.
- In the case of a TCP stack, a special source port could be chosen for
outgoing connections, such that the response packets will be assigned to the
desired queue.
This functionality is provided by the API shown below.
The API consists of 3 parts:
* Create the thash context.
* Create the thash helper, associated with a context.
* Use the helper run time to calculate the adjustable bits of the tuple to
ensure a collision.
Thash context
~~~~~~~~~~~~~
The function ``rte_thash_init_ctx()`` initializes the context struct
associated with a particular NIC or a set of NICs. It expects:
* The log2 value of the size of the RSS redirection table for the
corresponding NIC. It reflects the number of least significant bits of the
hash value to produce a collision for.
* A predefined RSS hash key. This is optional, if ``NULL`` then a random key
will be initialized.
* The length of the RSS hash key. This value is usually hardware/driver
specific and can be found in the NIC datasheet.
* Optional flags, as shown below.
Supported flags:
* ``RTE_THASH_IGNORE_PERIOD_OVERFLOW`` - By default, and for security reasons,
the library prohibits generating a repeatable sequence in the hash key. This
flag disables such checking. The flag is mainly used for testing in the lab
to generate an RSS hash key with a uniform hash distribution, if the input
traffic also has a uniform distribution.
* ``RTE_THASH_MINIMAL_SEQ`` - By default, the library generates a special bit
sequence in the hash key for all the bits of the subtuple. However, the
collision generation task requires only the ``log2(RETA_SZ)`` bits in the
subtuple. This flag forces the minimum bit sequence in the hash key to be
generated for the required ``log2(RETA_SZ)`` least significant bits of the
subtuple. The flag can be used in the case of a relatively large number of
helpers that may overlap with their corresponding bit sequences of RSS hash
keys.
Thash helper
~~~~~~~~~~~~
The function ``rte_thash_add_helper()`` initializes the helper struct
associated with a given context and a part of a target tuple of interest which
could be altered to produce a hash collision. On success it writes a specially
calculated bit sequence into the RSS hash key which is stored in the context
and calculates a table with values to be XORed with a subtuple.
It expects:
* A pointer to the Thash context to be associated with.
* A length of the subtuple to be modified. The length is counted in bits.
* An offset of the subtuple to be modified from the beginning of the tuple. It
is also counted in bits.
.. note::
Adding a helper changes the key stored in the corresponding context. So the
updated RSS hash key must be uploaded into the NIC after creating all the
required helpers.
Calculation of the complementary bits to adjust the subtuple
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The ``rte_thash_get_complement()`` function returns a special bit sequence
with length ``N = log2(rss_reta_sz)`` (for the ``rss_reta_sz`` provided at
context initialization) to be xored with N least significant bits of the
subtuple.
It expects:
* A corresponding helper created for a given subtuple of the tuple.
* A hash value of the tuple we want to alter.
* The desired LSB's of the hash value the user expects to have.
After the returned bit sequence has been XORed with the subtuple, the resulted
LSB's of the new hash value, calculated from the altered tuple, will be the
same as in ``desired_hash``.
Adjust tuple API
~~~~~~~~~~~~~~~~~
The ``rte_thash_get_complement()`` function is a user-friendly wrapper around
a number of other functions. It alters a passed tuple to meet the above
mentioned requirements around the desired hash LSB's.
It expects:
* A Thash context and helper.
* A pointer to the tuple to be changed.
* The length of the tuple.
* A callback function and its userdata to check the tuple after it has been
changed.
* The number of attempts to change the tuple. Basically, it makes sense if
there is a callback and a limit on the number of attempts to change the
tuple, if the callback function returns an error.
Usecase example
---------------
There could be a number of different usecases, such as NAT, TCP stack, MPLS
tag allocation, etc. In the following we will consider a SNAT application.
Packets of a single bidirectional flow belonging to different directions can
end up being assigned to different queues and thus processed by different
lcores, as shown in :numref:`figure_predictable_snat_1`:
.. _figure_predictable_snat_1:
.. figure:: img/predictable_snat_1.*
Bidirectional flow packets distribution in general
That leads to a situation where the same packet flow can be shared between two
cores. Such a situation is not ideal from a performance perspective and
requires extra synchronization efforts that might lead to various performance
penalties, for example:
* The connections table is global so locking/RCU on the flow insertion/removal
is required.
* Connection metadata must be protected to avoid race conditions.
* More cache pressure if a single connection metadata is kept in different
L1/L2 caches of a different CPU core.
* Cache pressure/less cache locality on packet handover to the different cores.
We can avoid all these penalties if it can be guaranteed that packets
belonging to one bidirectional flow will be assigned to the same queue, as
shown in :numref:`figure_predictable_snat_2`:
.. _figure_predictable_snat_2:
.. figure:: img/predictable_snat_2.*
Bidirectional flow packets distribution with predictable RSS
To achieve this in a SNAT scenario it is possible to choose a source port not
randomly, but using the predictable RSS library to produce a partial hash
collision. This is shown in the code below.
.. code-block:: c
int key_len = 40; /* The default Niantic RSS key length. */
/** The default Niantic RSS reta size = 2^7 entries, LSBs of hash value are
* used as an indexes in RSS ReTa. */
int reta_sz = 7;
int ret;
struct rte_thash_ctx *ctx;
uint8_t initial_key[key_len] = {0}; /* Default empty key. */
/* Create and initialize a new thash context. */
ctx = rte_thash_init_ctx("SNAT", key_len, reta_sz, initial_key, 0);
/** Add a helper and specify the variable tuple part and its length. In the
* SNAT case we want to choose a new source port on SNAT translation in a
* way that the reverse tuple will have the same LSBs as the original
* direction tuple so that the selected source port will be the
* destination port on reply.
*/
ret = rte_thash_add_helper(ctx, "snat", sizeof(uint16_t) * 8,
offsetof(union rte_thash_tuple, v4.dport) * 8);
if (ret != 0)
return ret;
/* Get handler of the required helper. */
struct rte_thash_subtuple_helper *h = rte_thash_get_helper(ctx, "snat");
/** After calling rte_thash_add_helper() the initial_key passed on ctx
* creation has been changed so we get the new one.
*/
uint8_t *new_key = rte_thash_get_key(ctx);
union rte_thash_tuple tuple, rev_tuple;
/* A complete tuple from the packet. */
complete_tuple(mbuf, &tuple);
/* Calculate the RSS hash or get it from mbuf->hash.rss. */
uint32_t orig_hash = rte_softrss((uint32_t *)&tuple, RTE_THASH_V4_L4_LEN, new_key);
/** Complete the reverse tuple by translating the SRC address and swapping
* src and dst addresses and ports.
*/
get_rev_tuple(&rev_tuple, &tuple, new_ip);
/* Calculate the expected rss hash for the reverse tuple. */
uint32_t rev_hash = rte_softrss((uint32_t *)&rev_tuple, RTE_THASH_V4_L4_LEN, new_key);
/* Get the adjustment bits for the src port to get a new port. */
uint32_t adj = rte_thash_get_compliment(h, rev_hash, orig_hash);
/* Adjust the source port bits. */
uint16_t new_sport = tuple.v4.sport ^ adj;
/* Make an actual packet translation. */
do_snat(mbuf, new_ip, new_sport);

View File

@ -203,6 +203,12 @@ New Features
the events across multiple stages.
* This also reduced the scheduling overhead on a event device.
* **Added Predictable RSS functionality to the Toeplitz hash library.**
Added feature for finding collisions of the Toeplitz hash function -
the hash function used in NICs to spread the traffic among the queues.
It can be used to get predictable mapping of the flows.
* **Updated testpmd.**
* Added a command line option to configure forced speed for Ethernet port.

View File

@ -8,6 +8,7 @@ headers = files('rte_fbk_hash.h',
'rte_thash.h')
indirect_headers += files('rte_crc_arm64.h')
sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c')
sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c', 'rte_thash.c')
deps += ['net']
deps += ['ring']
deps += ['rcu']

682
lib/librte_hash/rte_thash.c Normal file
View File

@ -0,0 +1,682 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2021 Intel Corporation
*/
#include <rte_thash.h>
#include <rte_tailq.h>
#include <rte_random.h>
#include <rte_memcpy.h>
#include <rte_errno.h>
#include <rte_eal.h>
#include <rte_eal_memconfig.h>
#include <rte_log.h>
#include <rte_malloc.h>
#define THASH_NAME_LEN 64
#define TOEPLITZ_HASH_LEN 32
#define RETA_SZ_IN_RANGE(reta_sz) ((reta_sz >= RTE_THASH_RETA_SZ_MIN) &&\
(reta_sz <= RTE_THASH_RETA_SZ_MAX))
TAILQ_HEAD(rte_thash_list, rte_tailq_entry);
static struct rte_tailq_elem rte_thash_tailq = {
.name = "RTE_THASH",
};
EAL_REGISTER_TAILQ(rte_thash_tailq)
/**
* Table of some irreducible polinomials over GF(2).
* For lfsr they are reperesented in BE bit order, and
* x^0 is masked out.
* For example, poly x^5 + x^2 + 1 will be represented
* as (101001b & 11111b) = 01001b = 0x9
*/
static const uint32_t irreducible_poly_table[][4] = {
{0, 0, 0, 0}, /** < degree 0 */
{1, 1, 1, 1}, /** < degree 1 */
{0x3, 0x3, 0x3, 0x3}, /** < degree 2 and so on... */
{0x5, 0x3, 0x5, 0x3},
{0x9, 0x3, 0x9, 0x3},
{0x9, 0x1b, 0xf, 0x5},
{0x21, 0x33, 0x1b, 0x2d},
{0x41, 0x11, 0x71, 0x9},
{0x71, 0xa9, 0xf5, 0x8d},
{0x21, 0xd1, 0x69, 0x1d9},
{0x81, 0x2c1, 0x3b1, 0x185},
{0x201, 0x541, 0x341, 0x461},
{0x941, 0x609, 0xe19, 0x45d},
{0x1601, 0x1f51, 0x1171, 0x359},
{0x2141, 0x2111, 0x2db1, 0x2109},
{0x4001, 0x801, 0x101, 0x7301},
{0x7781, 0xa011, 0x4211, 0x86d9},
};
struct thash_lfsr {
uint32_t ref_cnt;
uint32_t poly;
/**< polynomial associated with the lfsr */
uint32_t rev_poly;
/**< polynomial to generate the sequence in reverse direction */
uint32_t state;
/**< current state of the lfsr */
uint32_t rev_state;
/**< current state of the lfsr for reverse direction */
uint32_t deg; /**< polynomial degree*/
uint32_t bits_cnt; /**< number of bits generated by lfsr*/
};
struct rte_thash_subtuple_helper {
char name[THASH_NAME_LEN]; /** < Name of subtuple configuration */
LIST_ENTRY(rte_thash_subtuple_helper) next;
struct thash_lfsr *lfsr;
uint32_t offset; /** < Offset of the m-sequence */
uint32_t len; /** < Length of the m-sequence */
uint32_t tuple_offset; /** < Offset in bits of the subtuple */
uint32_t tuple_len; /** < Length in bits of the subtuple */
uint32_t lsb_msk; /** < (1 << reta_sz_log) - 1 */
__extension__ uint32_t compl_table[0] __rte_cache_aligned;
/** < Complementary table */
};
struct rte_thash_ctx {
char name[THASH_NAME_LEN];
LIST_HEAD(, rte_thash_subtuple_helper) head;
uint32_t key_len; /** < Length of the NIC RSS hash key */
uint32_t reta_sz_log; /** < size of the RSS ReTa in bits */
uint32_t subtuples_nb; /** < number of subtuples */
uint32_t flags;
uint8_t hash_key[0];
};
static inline uint32_t
get_bit_lfsr(struct thash_lfsr *lfsr)
{
uint32_t bit, ret;
/*
* masking the TAP bits defined by the polynomial and
* calculating parity
*/
bit = __builtin_popcount(lfsr->state & lfsr->poly) & 0x1;
ret = lfsr->state & 0x1;
lfsr->state = ((lfsr->state >> 1) | (bit << (lfsr->deg - 1))) &
((1 << lfsr->deg) - 1);
lfsr->bits_cnt++;
return ret;
}
static inline uint32_t
get_rev_bit_lfsr(struct thash_lfsr *lfsr)
{
uint32_t bit, ret;
bit = __builtin_popcount(lfsr->rev_state & lfsr->rev_poly) & 0x1;
ret = lfsr->rev_state & (1 << (lfsr->deg - 1));
lfsr->rev_state = ((lfsr->rev_state << 1) | bit) &
((1 << lfsr->deg) - 1);
lfsr->bits_cnt++;
return ret;
}
static inline uint32_t
thash_get_rand_poly(uint32_t poly_degree)
{
return irreducible_poly_table[poly_degree][rte_rand() %
RTE_DIM(irreducible_poly_table[poly_degree])];
}
static struct thash_lfsr *
alloc_lfsr(struct rte_thash_ctx *ctx)
{
struct thash_lfsr *lfsr;
uint32_t i;
if (ctx == NULL)
return NULL;
lfsr = rte_zmalloc(NULL, sizeof(struct thash_lfsr), 0);
if (lfsr == NULL)
return NULL;
lfsr->deg = ctx->reta_sz_log;
lfsr->poly = thash_get_rand_poly(lfsr->deg);
do {
lfsr->state = rte_rand() & ((1 << lfsr->deg) - 1);
} while (lfsr->state == 0);
/* init reverse order polynomial */
lfsr->rev_poly = (lfsr->poly >> 1) | (1 << (lfsr->deg - 1));
/* init proper rev_state*/
lfsr->rev_state = lfsr->state;
for (i = 0; i <= lfsr->deg; i++)
get_rev_bit_lfsr(lfsr);
/* clear bits_cnt after rev_state was inited */
lfsr->bits_cnt = 0;
lfsr->ref_cnt = 1;
return lfsr;
}
static void
attach_lfsr(struct rte_thash_subtuple_helper *h, struct thash_lfsr *lfsr)
{
lfsr->ref_cnt++;
h->lfsr = lfsr;
}
static void
free_lfsr(struct thash_lfsr *lfsr)
{
lfsr->ref_cnt--;
if (lfsr->ref_cnt == 0)
rte_free(lfsr);
}
struct rte_thash_ctx *
rte_thash_init_ctx(const char *name, uint32_t key_len, uint32_t reta_sz,
uint8_t *key, uint32_t flags)
{
struct rte_thash_ctx *ctx;
struct rte_tailq_entry *te;
struct rte_thash_list *thash_list;
uint32_t i;
if ((name == NULL) || (key_len == 0) || !RETA_SZ_IN_RANGE(reta_sz)) {
rte_errno = EINVAL;
return NULL;
}
thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list);
rte_mcfg_tailq_write_lock();
/* guarantee there's no existing */
TAILQ_FOREACH(te, thash_list, next) {
ctx = (struct rte_thash_ctx *)te->data;
if (strncmp(name, ctx->name, sizeof(ctx->name)) == 0)
break;
}
ctx = NULL;
if (te != NULL) {
rte_errno = EEXIST;
goto exit;
}
/* allocate tailq entry */
te = rte_zmalloc("THASH_TAILQ_ENTRY", sizeof(*te), 0);
if (te == NULL) {
RTE_LOG(ERR, HASH,
"Can not allocate tailq entry for thash context %s\n",
name);
rte_errno = ENOMEM;
goto exit;
}
ctx = rte_zmalloc(NULL, sizeof(struct rte_thash_ctx) + key_len, 0);
if (ctx == NULL) {
RTE_LOG(ERR, HASH, "thash ctx %s memory allocation failed\n",
name);
rte_errno = ENOMEM;
goto free_te;
}
rte_strlcpy(ctx->name, name, sizeof(ctx->name));
ctx->key_len = key_len;
ctx->reta_sz_log = reta_sz;
LIST_INIT(&ctx->head);
ctx->flags = flags;
if (key)
rte_memcpy(ctx->hash_key, key, key_len);
else {
for (i = 0; i < key_len; i++)
ctx->hash_key[i] = rte_rand();
}
te->data = (void *)ctx;
TAILQ_INSERT_TAIL(thash_list, te, next);
rte_mcfg_tailq_write_unlock();
return ctx;
free_te:
rte_free(te);
exit:
rte_mcfg_tailq_write_unlock();
return NULL;
}
struct rte_thash_ctx *
rte_thash_find_existing(const char *name)
{
struct rte_thash_ctx *ctx;
struct rte_tailq_entry *te;
struct rte_thash_list *thash_list;
thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list);
rte_mcfg_tailq_read_lock();
TAILQ_FOREACH(te, thash_list, next) {
ctx = (struct rte_thash_ctx *)te->data;
if (strncmp(name, ctx->name, sizeof(ctx->name)) == 0)
break;
}
rte_mcfg_tailq_read_unlock();
if (te == NULL) {
rte_errno = ENOENT;
return NULL;
}
return ctx;
}
void
rte_thash_free_ctx(struct rte_thash_ctx *ctx)
{
struct rte_tailq_entry *te;
struct rte_thash_list *thash_list;
struct rte_thash_subtuple_helper *ent, *tmp;
if (ctx == NULL)
return;
thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list);
rte_mcfg_tailq_write_lock();
TAILQ_FOREACH(te, thash_list, next) {
if (te->data == (void *)ctx)
break;
}
if (te != NULL)
TAILQ_REMOVE(thash_list, te, next);
rte_mcfg_tailq_write_unlock();
ent = LIST_FIRST(&(ctx->head));
while (ent) {
free_lfsr(ent->lfsr);
tmp = ent;
ent = LIST_NEXT(ent, next);
LIST_REMOVE(tmp, next);
rte_free(tmp);
}
rte_free(ctx);
rte_free(te);
}
static inline void
set_bit(uint8_t *ptr, uint32_t bit, uint32_t pos)
{
uint32_t byte_idx = pos / CHAR_BIT;
/* index of the bit int byte, indexing starts from MSB */
uint32_t bit_idx = (CHAR_BIT - 1) - (pos & (CHAR_BIT - 1));
uint8_t tmp;
tmp = ptr[byte_idx];
tmp &= ~(1 << bit_idx);
tmp |= bit << bit_idx;
ptr[byte_idx] = tmp;
}
/**
* writes m-sequence to the hash_key for range [start, end]
* (i.e. including start and end positions)
*/
static int
generate_subkey(struct rte_thash_ctx *ctx, struct thash_lfsr *lfsr,
uint32_t start, uint32_t end)
{
uint32_t i;
uint32_t req_bits = (start < end) ? (end - start) : (start - end);
req_bits++; /* due to including end */
/* check if lfsr overflow period of the m-sequence */
if (((lfsr->bits_cnt + req_bits) > (1ULL << lfsr->deg) - 1) &&
((ctx->flags & RTE_THASH_IGNORE_PERIOD_OVERFLOW) !=
RTE_THASH_IGNORE_PERIOD_OVERFLOW)) {
RTE_LOG(ERR, HASH,
"Can't generate m-sequence due to period overflow\n");
return -ENOSPC;
}
if (start < end) {
/* original direction (from left to right)*/
for (i = start; i <= end; i++)
set_bit(ctx->hash_key, get_bit_lfsr(lfsr), i);
} else {
/* reverse direction (from right to left) */
for (i = end; i >= start; i--)
set_bit(ctx->hash_key, get_rev_bit_lfsr(lfsr), i);
}
return 0;
}
static inline uint32_t
get_subvalue(struct rte_thash_ctx *ctx, uint32_t offset)
{
uint32_t *tmp, val;
tmp = (uint32_t *)(&ctx->hash_key[offset >> 3]);
val = rte_be_to_cpu_32(*tmp);
val >>= (TOEPLITZ_HASH_LEN - ((offset & (CHAR_BIT - 1)) +
ctx->reta_sz_log));
return val & ((1 << ctx->reta_sz_log) - 1);
}
static inline void
generate_complement_table(struct rte_thash_ctx *ctx,
struct rte_thash_subtuple_helper *h)
{
int i, j, k;
uint32_t val;
uint32_t start;
start = h->offset + h->len - (2 * ctx->reta_sz_log - 1);
for (i = 1; i < (1 << ctx->reta_sz_log); i++) {
val = 0;
for (j = i; j; j &= (j - 1)) {
k = rte_bsf32(j);
val ^= get_subvalue(ctx, start - k +
ctx->reta_sz_log - 1);
}
h->compl_table[val] = i;
}
}
static inline int
insert_before(struct rte_thash_ctx *ctx,
struct rte_thash_subtuple_helper *ent,
struct rte_thash_subtuple_helper *cur_ent,
struct rte_thash_subtuple_helper *next_ent,
uint32_t start, uint32_t end, uint32_t range_end)
{
int ret;
if (end < cur_ent->offset) {
ent->lfsr = alloc_lfsr(ctx);
if (ent->lfsr == NULL) {
rte_free(ent);
return -ENOMEM;
}
/* generate nonoverlapping range [start, end) */
ret = generate_subkey(ctx, ent->lfsr, start, end - 1);
if (ret != 0) {
free_lfsr(ent->lfsr);
rte_free(ent);
return ret;
}
} else if ((next_ent != NULL) && (end > next_ent->offset)) {
rte_free(ent);
RTE_LOG(ERR, HASH,
"Can't add helper %s due to conflict with existing"
" helper %s\n", ent->name, next_ent->name);
return -ENOSPC;
}
attach_lfsr(ent, cur_ent->lfsr);
/**
* generate partially overlapping range
* [start, cur_ent->start) in reverse order
*/
ret = generate_subkey(ctx, ent->lfsr, cur_ent->offset - 1, start);
if (ret != 0) {
free_lfsr(ent->lfsr);
rte_free(ent);
return ret;
}
if (end > range_end) {
/**
* generate partially overlapping range
* (range_end, end)
*/
ret = generate_subkey(ctx, ent->lfsr, range_end, end - 1);
if (ret != 0) {
free_lfsr(ent->lfsr);
rte_free(ent);
return ret;
}
}
LIST_INSERT_BEFORE(cur_ent, ent, next);
generate_complement_table(ctx, ent);
ctx->subtuples_nb++;
return 0;
}
static inline int
insert_after(struct rte_thash_ctx *ctx,
struct rte_thash_subtuple_helper *ent,
struct rte_thash_subtuple_helper *cur_ent,
struct rte_thash_subtuple_helper *next_ent,
struct rte_thash_subtuple_helper *prev_ent,
uint32_t end, uint32_t range_end)
{
int ret;
if ((next_ent != NULL) && (end > next_ent->offset)) {
rte_free(ent);
RTE_LOG(ERR, HASH,
"Can't add helper %s due to conflict with existing"
" helper %s\n", ent->name, next_ent->name);
return -EEXIST;
}
attach_lfsr(ent, cur_ent->lfsr);
if (end > range_end) {
/**
* generate partially overlapping range
* (range_end, end)
*/
ret = generate_subkey(ctx, ent->lfsr, range_end, end - 1);
if (ret != 0) {
free_lfsr(ent->lfsr);
rte_free(ent);
return ret;
}
}
LIST_INSERT_AFTER(prev_ent, ent, next);
generate_complement_table(ctx, ent);
ctx->subtuples_nb++;
return 0;
}
int
rte_thash_add_helper(struct rte_thash_ctx *ctx, const char *name, uint32_t len,
uint32_t offset)
{
struct rte_thash_subtuple_helper *ent, *cur_ent, *prev_ent, *next_ent;
uint32_t start, end;
int ret;
if ((ctx == NULL) || (name == NULL) || (len < ctx->reta_sz_log) ||
((offset + len + TOEPLITZ_HASH_LEN - 1) >
ctx->key_len * CHAR_BIT))
return -EINVAL;
/* Check for existing name*/
LIST_FOREACH(cur_ent, &ctx->head, next) {
if (strncmp(name, cur_ent->name, sizeof(cur_ent->name)) == 0)
return -EEXIST;
}
end = offset + len + TOEPLITZ_HASH_LEN - 1;
start = ((ctx->flags & RTE_THASH_MINIMAL_SEQ) ==
RTE_THASH_MINIMAL_SEQ) ? (end - (2 * ctx->reta_sz_log - 1)) :
offset;
ent = rte_zmalloc(NULL, sizeof(struct rte_thash_subtuple_helper) +
sizeof(uint32_t) * (1 << ctx->reta_sz_log),
RTE_CACHE_LINE_SIZE);
if (ent == NULL)
return -ENOMEM;
rte_strlcpy(ent->name, name, sizeof(ent->name));
ent->offset = start;
ent->len = end - start;
ent->tuple_offset = offset;
ent->tuple_len = len;
ent->lsb_msk = (1 << ctx->reta_sz_log) - 1;
cur_ent = LIST_FIRST(&ctx->head);
while (cur_ent) {
uint32_t range_end = cur_ent->offset + cur_ent->len;
next_ent = LIST_NEXT(cur_ent, next);
prev_ent = cur_ent;
/* Iterate through overlapping ranges */
while ((next_ent != NULL) && (next_ent->offset < range_end)) {
range_end = RTE_MAX(next_ent->offset + next_ent->len,
range_end);
if (start > next_ent->offset)
prev_ent = next_ent;
next_ent = LIST_NEXT(next_ent, next);
}
if (start < cur_ent->offset)
return insert_before(ctx, ent, cur_ent, next_ent,
start, end, range_end);
else if (start < range_end)
return insert_after(ctx, ent, cur_ent, next_ent,
prev_ent, end, range_end);
cur_ent = next_ent;
continue;
}
ent->lfsr = alloc_lfsr(ctx);
if (ent->lfsr == NULL) {
rte_free(ent);
return -ENOMEM;
}
/* generate nonoverlapping range [start, end) */
ret = generate_subkey(ctx, ent->lfsr, start, end - 1);
if (ret != 0) {
free_lfsr(ent->lfsr);
rte_free(ent);
return ret;
}
if (LIST_EMPTY(&ctx->head)) {
LIST_INSERT_HEAD(&ctx->head, ent, next);
} else {
LIST_FOREACH(next_ent, &ctx->head, next)
prev_ent = next_ent;
LIST_INSERT_AFTER(prev_ent, ent, next);
}
generate_complement_table(ctx, ent);
ctx->subtuples_nb++;
return 0;
}
struct rte_thash_subtuple_helper *
rte_thash_get_helper(struct rte_thash_ctx *ctx, const char *name)
{
struct rte_thash_subtuple_helper *ent;
if ((ctx == NULL) || (name == NULL))
return NULL;
LIST_FOREACH(ent, &ctx->head, next) {
if (strncmp(name, ent->name, sizeof(ent->name)) == 0)
return ent;
}
return NULL;
}
uint32_t
rte_thash_get_complement(struct rte_thash_subtuple_helper *h,
uint32_t hash, uint32_t desired_hash)
{
return h->compl_table[(hash ^ desired_hash) & h->lsb_msk];
}
const uint8_t *
rte_thash_get_key(struct rte_thash_ctx *ctx)
{
return ctx->hash_key;
}
static inline void
xor_bit(uint8_t *ptr, uint32_t bit, uint32_t pos)
{
uint32_t byte_idx = pos >> 3;
uint32_t bit_idx = (CHAR_BIT - 1) - (pos & (CHAR_BIT - 1));
uint8_t tmp;
tmp = ptr[byte_idx];
tmp ^= bit << bit_idx;
ptr[byte_idx] = tmp;
}
int
rte_thash_adjust_tuple(struct rte_thash_ctx *ctx,
struct rte_thash_subtuple_helper *h,
uint8_t *tuple, unsigned int tuple_len,
uint32_t desired_value, unsigned int attempts,
rte_thash_check_tuple_t fn, void *userdata)
{
uint32_t tmp_tuple[tuple_len / sizeof(uint32_t)];
unsigned int i, j, ret = 0;
uint32_t hash, adj_bits;
uint8_t bit;
const uint8_t *hash_key;
if ((ctx == NULL) || (h == NULL) || (tuple == NULL) ||
(tuple_len % sizeof(uint32_t) != 0) || (attempts <= 0))
return -EINVAL;
hash_key = rte_thash_get_key(ctx);
for (i = 0; i < attempts; i++) {
for (j = 0; j < (tuple_len / 4); j++)
tmp_tuple[j] =
rte_be_to_cpu_32(*(uint32_t *)&tuple[j * 4]);
hash = rte_softrss(tmp_tuple, tuple_len / 4, hash_key);
adj_bits = rte_thash_get_complement(h, hash, desired_value);
/*
* Hint: LSB of adj_bits corresponds to
* offset + len bit of tuple
*/
for (j = 0; j < sizeof(uint32_t) * CHAR_BIT; j++) {
bit = (adj_bits >> j) & 0x1;
if (bit)
xor_bit(tuple, bit, h->tuple_offset +
h->tuple_len - 1 - j);
}
if (fn != NULL) {
ret = (fn(userdata, tuple)) ? 0 : -EEXIST;
if (ret == 0)
return 0;
else if (i < (attempts - 1)) {
/* Update tuple with random bits */
for (j = 0; j < h->tuple_len; j++) {
bit = rte_rand() & 0x1;
if (bit)
xor_bit(tuple, bit,
h->tuple_offset +
h->tuple_len - 1 - j);
}
}
} else
return 0;
}
return ret;
}

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2015-2019 Vladimir Medvedkin <medvedkinv@gmail.com>
* Copyright(c) 2021 Intel Corporation
*/
#ifndef _RTE_THASH_H
@ -222,6 +223,228 @@ rte_softrss_be(uint32_t *input_tuple, uint32_t input_len,
return ret;
}
/** @internal Logarithm of minimum size of the RSS ReTa */
#define RTE_THASH_RETA_SZ_MIN 2U
/** @internal Logarithm of maximum size of the RSS ReTa */
#define RTE_THASH_RETA_SZ_MAX 16U
/**
* LFSR will ignore if generated m-sequence has more than 2^n -1 bits,
* where n is the logarithm of the RSS ReTa size.
*/
#define RTE_THASH_IGNORE_PERIOD_OVERFLOW 0x1
/**
* Generate minimal required bit (equal to ReTa LSB) sequence into
* the hash_key
*/
#define RTE_THASH_MINIMAL_SEQ 0x2
/** @internal thash context structure. */
struct rte_thash_ctx;
/** @internal thash helper structure. */
struct rte_thash_subtuple_helper;
/**
* Create a new thash context.
*
* @warning
* @b EXPERIMENTAL: this API may change without prior notice.
*
* @param name
* Context name
* @param key_len
* Length of the toeplitz hash key
* @param reta_sz
* Logarithm of the NIC's Redirection Table (ReTa) size,
* i.e. number of the LSBs if the hash used to determine
* the reta entry.
* @param key
* Pointer to the key used to init an internal key state.
* Could be NULL, in this case internal key will be inited with random.
* @param flags
* Supported flags are:
* RTE_THASH_IGNORE_PERIOD_OVERFLOW
* RTE_THASH_MINIMAL_SEQ
* @return
* A pointer to the created context on success
* NULL otherwise
*/
__rte_experimental
struct rte_thash_ctx *
rte_thash_init_ctx(const char *name, uint32_t key_len, uint32_t reta_sz,
uint8_t *key, uint32_t flags);
/**
* Find an existing thash context and return a pointer to it.
*
* @warning
* @b EXPERIMENTAL: this API may change without prior notice.
*
* @param name
* Name of the thash context
* @return
* Pointer to the thash context or NULL if it was not found with rte_errno
* set appropriately. Possible rte_errno values include:
* - ENOENT - required entry not available to return.
*/
__rte_experimental
struct rte_thash_ctx *
rte_thash_find_existing(const char *name);
/**
* Free a thash context object
*
* @warning
* @b EXPERIMENTAL: this API may change without prior notice.
*
* @param ctx
* Thash context
* @return
* None
*/
__rte_experimental
void
rte_thash_free_ctx(struct rte_thash_ctx *ctx);
/**
* Add a special properties to the toeplitz hash key inside a thash context.
* Creates an internal helper struct which has a complementary table
* to calculate toeplitz hash collisions.
* This function is not multi-thread safe.
*
* @warning
* @b EXPERIMENTAL: this API may change without prior notice.
*
* @param ctx
* Thash context
* @param name
* Name of the helper
* @param len
* Length in bits of the target subtuple
* Must be no shorter than reta_sz passed on rte_thash_init_ctx().
* @param offset
* Offset in bits of the subtuple
* @return
* 0 on success
* negative on error
*/
__rte_experimental
int
rte_thash_add_helper(struct rte_thash_ctx *ctx, const char *name, uint32_t len,
uint32_t offset);
/**
* Find a helper in the context by the given name
*
* @warning
* @b EXPERIMENTAL: this API may change without prior notice.
*
* @param ctx
* Thash context
* @param name
* Name of the helper
* @return
* Pointer to the thash helper or NULL if it was not found.
*/
__rte_experimental
struct rte_thash_subtuple_helper *
rte_thash_get_helper(struct rte_thash_ctx *ctx, const char *name);
/**
* Get a complementary value for the subtuple to produce a
* partial toeplitz hash collision. It must be XOR'ed with the
* subtuple to produce the hash value with the desired hash LSB's
* This function is multi-thread safe.
*
* @param h
* Pointer to the helper struct
* @param hash
* Toeplitz hash value calculated for the given tuple
* @param desired_hash
* Desired hash value to find a collision for
* @return
* A complementary value which must be xored with the corresponding subtuple
*/
__rte_experimental
uint32_t
rte_thash_get_complement(struct rte_thash_subtuple_helper *h,
uint32_t hash, uint32_t desired_hash);
/**
* Get a pointer to the toeplitz hash contained in the context.
* It changes after each addition of a helper. It should be installed to
* the NIC.
*
* @warning
* @b EXPERIMENTAL: this API may change without prior notice.
*
* @param ctx
* Thash context
* @return
* A pointer to the toeplitz hash key
*/
__rte_experimental
const uint8_t *
rte_thash_get_key(struct rte_thash_ctx *ctx);
/**
* Function prototype for the rte_thash_adjust_tuple
* to check if adjusted tuple could be used.
* Generally it is some kind of lookup function to check
* if adjusted tuple is already in use.
*
* @warning
* @b EXPERIMENTAL: this API may change without prior notice.
*
* @param userdata
* Pointer to the userdata. It could be a pointer to the
* table with used tuples to search.
* @param tuple
* Pointer to the tuple to check
*
* @return
* 1 on success
* 0 otherwise
*/
typedef int (*rte_thash_check_tuple_t)(void *userdata, uint8_t *tuple);
/**
* Adjusts tuple in the way to make Toeplitz hash has
* desired least significant bits.
* This function is multi-thread safe.
*
* @warning
* @b EXPERIMENTAL: this API may change without prior notice.
*
* @param ctx
* Thash context
* @param h
* Pointer to the helper struct
* @param tuple
* Pointer to the tuple to be adjusted
* @param tuple_len
* Length of the tuple. Must be multiple of 4.
* @param desired_value
* Desired value of least significant bits of the hash
* @param attempts
* Number of attempts to adjust tuple with fn() calling
* @param fn
* Callback function to check adjusted tuple. Could be NULL
* @param userdata
* Pointer to the userdata to be passed to fn(). Could be NULL
*
* @return
* 0 on success
* negative otherwise
*/
__rte_experimental
int
rte_thash_adjust_tuple(struct rte_thash_ctx *ctx,
struct rte_thash_subtuple_helper *h,
uint8_t *tuple, unsigned int tuple_len,
uint32_t desired_value, unsigned int attempts,
rte_thash_check_tuple_t fn, void *userdata);
#ifdef __cplusplus
}
#endif

View File

@ -37,4 +37,12 @@ EXPERIMENTAL {
rte_hash_lookup_with_hash_bulk_data;
rte_hash_max_key_id;
rte_hash_rcu_qsbr_add;
rte_thash_add_helper;
rte_thash_adjust_tuple;
rte_thash_find_existing;
rte_thash_free_ctx;
rte_thash_get_complement;
rte_thash_get_helper;
rte_thash_get_key;
rte_thash_init_ctx;
};