From 31d7c069478dd94f40c7e4360b2a0953313ffe0e Mon Sep 17 00:00:00 2001 From: Vladimir Medvedkin Date: Tue, 2 Nov 2021 18:38:23 +0000 Subject: [PATCH] hash: add bulk Toeplitz hash implementation This patch adds a bulk version for the Toeplitz hash implemented with Galios Fields New Instructions (GFNI). Signed-off-by: Vladimir Medvedkin Acked-by: Konstantin Ananyev --- app/test/test_thash.c | 67 ++++++++++++++++++++- doc/guides/prog_guide/toeplitz_hash_lib.rst | 20 ++++-- lib/hash/rte_thash_gfni.h | 33 ++++++++++ lib/hash/rte_thash_x86_gfni.h | 40 ++++++++++++ 4 files changed, 154 insertions(+), 6 deletions(-) diff --git a/app/test/test_thash.c b/app/test/test_thash.c index 22d784e1b8..a62530673f 100644 --- a/app/test/test_thash.c +++ b/app/test/test_thash.c @@ -230,6 +230,8 @@ enum { SCALAR_DATA_BUF_2_HASH_IDX, GFNI_DATA_BUF_1_HASH_IDX, GFNI_DATA_BUF_2_HASH_IDX, + GFNI_BULK_DATA_BUF_1_HASH_IDX, + GFNI_BULK_DATA_BUF_2_HASH_IDX, HASH_IDXES }; @@ -241,6 +243,7 @@ test_toeplitz_hash_rand_data(void) uint32_t hash[HASH_IDXES] = { 0 }; uint64_t rss_key_matrixes[RTE_DIM(default_rss_key)]; int i, j; + uint8_t *bulk_data[2]; if (!rte_thash_gfni_supported()) return TEST_SKIPPED; @@ -248,6 +251,9 @@ test_toeplitz_hash_rand_data(void) rte_thash_complete_matrix(rss_key_matrixes, default_rss_key, RTE_DIM(default_rss_key)); + for (i = 0; i < 2; i++) + bulk_data[i] = (uint8_t *)data[i]; + for (i = 0; i < ITER; i++) { for (j = 0; j < DATA_SZ; j++) { data[0][j] = rte_rand(); @@ -266,11 +272,18 @@ test_toeplitz_hash_rand_data(void) hash[GFNI_DATA_BUF_2_HASH_IDX] = rte_thash_gfni( rss_key_matrixes, (uint8_t *)data[1], DATA_SZ * sizeof(uint32_t)); + rte_thash_gfni_bulk(rss_key_matrixes, + DATA_SZ * sizeof(uint32_t), bulk_data, + &hash[GFNI_BULK_DATA_BUF_1_HASH_IDX], 2); if ((hash[SCALAR_DATA_BUF_1_HASH_IDX] != hash[GFNI_DATA_BUF_1_HASH_IDX]) || + (hash[SCALAR_DATA_BUF_1_HASH_IDX] != + hash[GFNI_BULK_DATA_BUF_1_HASH_IDX]) || (hash[SCALAR_DATA_BUF_2_HASH_IDX] != - hash[GFNI_DATA_BUF_2_HASH_IDX])) + hash[GFNI_DATA_BUF_2_HASH_IDX]) || + (hash[SCALAR_DATA_BUF_2_HASH_IDX] != + hash[GFNI_BULK_DATA_BUF_2_HASH_IDX])) return -TEST_FAILED; } @@ -283,6 +296,57 @@ enum { RSS_V6_IDX }; +static int +test_toeplitz_hash_gfni_bulk(void) +{ + uint32_t i, j; + union rte_thash_tuple tuple[2]; + uint8_t *tuples[2]; + uint32_t rss[2] = { 0 }; + uint64_t rss_key_matrixes[RTE_DIM(default_rss_key)]; + + if (!rte_thash_gfni_supported()) + return TEST_SKIPPED; + + /* Convert RSS key into matrixes */ + rte_thash_complete_matrix(rss_key_matrixes, default_rss_key, + RTE_DIM(default_rss_key)); + + for (i = 0; i < RTE_DIM(tuples); i++) { + /* allocate memory enough for a biggest tuple */ + tuples[i] = rte_zmalloc(NULL, RTE_THASH_V6_L4_LEN * 4, 0); + if (tuples[i] == NULL) + return -TEST_FAILED; + } + + for (i = 0; i < RTE_MIN(RTE_DIM(v4_tbl), RTE_DIM(v6_tbl)); i++) { + /*Load IPv4 headers and copy it into the corresponding tuple*/ + tuple[0].v4.src_addr = rte_cpu_to_be_32(v4_tbl[i].src_ip); + tuple[0].v4.dst_addr = rte_cpu_to_be_32(v4_tbl[i].dst_ip); + tuple[0].v4.sport = rte_cpu_to_be_16(v4_tbl[i].dst_port); + tuple[0].v4.dport = rte_cpu_to_be_16(v4_tbl[i].src_port); + rte_memcpy(tuples[0], &tuple[0], RTE_THASH_V4_L4_LEN * 4); + + /*Load IPv6 headers and copy it into the corresponding tuple*/ + for (j = 0; j < RTE_DIM(tuple[1].v6.src_addr); j++) + tuple[1].v6.src_addr[j] = v6_tbl[i].src_ip[j]; + for (j = 0; j < RTE_DIM(tuple[1].v6.dst_addr); j++) + tuple[1].v6.dst_addr[j] = v6_tbl[i].dst_ip[j]; + tuple[1].v6.sport = rte_cpu_to_be_16(v6_tbl[i].dst_port); + tuple[1].v6.dport = rte_cpu_to_be_16(v6_tbl[i].src_port); + rte_memcpy(tuples[1], &tuple[1], RTE_THASH_V6_L4_LEN * 4); + + rte_thash_gfni_bulk(rss_key_matrixes, RTE_THASH_V6_L4_LEN * 4, + tuples, rss, 2); + + if ((rss[RSS_V4_IDX] != v4_tbl[i].hash_l3l4) || + (rss[RSS_V6_IDX] != v6_tbl[i].hash_l3l4)) + return -TEST_FAILED; + } + + return TEST_SUCCESS; +} + static int test_big_tuple_gfni(void) { @@ -748,6 +812,7 @@ static struct unit_test_suite thash_tests = { TEST_CASE(test_toeplitz_hash_calc), TEST_CASE(test_toeplitz_hash_gfni), TEST_CASE(test_toeplitz_hash_rand_data), + TEST_CASE(test_toeplitz_hash_gfni_bulk), TEST_CASE(test_big_tuple_gfni), TEST_CASE(test_create_invalid), TEST_CASE(test_multiple_create), diff --git a/doc/guides/prog_guide/toeplitz_hash_lib.rst b/doc/guides/prog_guide/toeplitz_hash_lib.rst index acdd8c39bb..61eaafd169 100644 --- a/doc/guides/prog_guide/toeplitz_hash_lib.rst +++ b/doc/guides/prog_guide/toeplitz_hash_lib.rst @@ -19,11 +19,12 @@ to calculate the RSS hash sum to spread the traffic among the queues. Toeplitz hash function API -------------------------- -There are three functions that provide calculation of the Toeplitz hash sum: +There are four functions that provide calculation of the Toeplitz hash sum: * ``rte_softrss()`` * ``rte_softrss_be()`` * ``rte_thash_gfni()`` +* ``rte_thash_gfni_bulk()`` First two functions are scalar implementation and take the parameters: @@ -38,11 +39,12 @@ to be exactly the same as the one installed on the NIC. The ``rte_softrss_be`` function is a faster implementation, but it expects ``rss_key`` to be converted to the host byte order. -The last function is vectorized implementation using -Galois Fields New Instructions. Could be used if ``rte_thash_gfni_supported`` returns true. -It expects the tuple to be in network byte order. +The last two functions are vectorized implementations using +Galois Fields New Instructions. Could be used if ``rte_thash_gfni_supported`` is true. +They expect the tuple to be in network byte order. -``rte_thash_gfni()`` calculates the hash value for a single tuple +``rte_thash_gfni()`` calculates the hash value for a single tuple, and +``rte_thash_gfni_bulk()`` bulk implementation of the rte_thash_gfni(). ``rte_thash_gfni()`` takes the parameters: @@ -50,6 +52,14 @@ It expects the tuple to be in network byte order. * A pointer to the tuple. * A length of the tuple in bytes. +``rte_thash_gfni_bulk()`` takes the parameters: + +* A pointer to the matrices derived from the RSS hash key using ``rte_thash_complete_matrix()``. +* A length of the longest tuple in bytes. +* Array of the pointers on data to be hashed. +* Array of ``uint32_t`` where to put calculated Toeplitz hash values +* Number of tuples in a bulk. + ``rte_thash_complete_matrix()`` is a function that calculates matrices required by GFNI implementations from the RSS hash key. It takes the parameters: diff --git a/lib/hash/rte_thash_gfni.h b/lib/hash/rte_thash_gfni.h index bbacd414c0..e97d912d40 100644 --- a/lib/hash/rte_thash_gfni.h +++ b/lib/hash/rte_thash_gfni.h @@ -45,6 +45,39 @@ rte_thash_gfni(const uint64_t *mtrx __rte_unused, return 0; } +/** + * Bulk implementation for Toeplitz hash. + * Dummy implementation. + * + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * @param m + * Pointer to the matrices generated from the corresponding + * RSS hash key using rte_thash_complete_matrix(). + * @param len + * Length of the largest data buffer to be hashed. + * @param tuple + * Array of the pointers on data to be hashed. + * Data must be in network byte order. + * @param val + * Array of uint32_t where to put calculated Toeplitz hash values + * @param num + * Number of tuples to hash. + */ +__rte_experimental +static inline void +rte_thash_gfni_bulk(const uint64_t *mtrx __rte_unused, + int len __rte_unused, uint8_t *tuple[] __rte_unused, + uint32_t val[], uint32_t num) +{ + unsigned int i; + + RTE_LOG(ERR, HASH, "%s is undefined under given arch\n", __func__); + for (i = 0; i < num; i++) + val[i] = 0; +} + #endif /* RTE_THASH_GFNI_DEFINED */ #ifdef __cplusplus diff --git a/lib/hash/rte_thash_x86_gfni.h b/lib/hash/rte_thash_x86_gfni.h index 53486b6734..c2889c3734 100644 --- a/lib/hash/rte_thash_x86_gfni.h +++ b/lib/hash/rte_thash_x86_gfni.h @@ -174,6 +174,46 @@ rte_thash_gfni(const uint64_t *m, const uint8_t *tuple, int len) return val; } +/** + * Bulk implementation for Toeplitz hash. + * + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * @param m + * Pointer to the matrices generated from the corresponding + * RSS hash key using rte_thash_complete_matrix(). + * Note that @p len should not exceed the length of the rss_key minus 4. + * @param len + * Length of the largest data buffer to be hashed. + * @param tuple + * Array of the pointers on data to be hashed. + * Data must be in network byte order. + * @param val + * Array of uint32_t where to put calculated Toeplitz hash values + * @param num + * Number of tuples to hash. + */ +__rte_experimental +static inline void +rte_thash_gfni_bulk(const uint64_t *mtrx, int len, uint8_t *tuple[], + uint32_t val[], uint32_t num) +{ + uint32_t i; + uint32_t val_zero; + __m512i xor_acc; + + for (i = 0; i != (num & ~1); i += 2) { + xor_acc = __rte_thash_gfni(mtrx, tuple[i], tuple[i + 1], len); + __rte_thash_xor_reduce(xor_acc, val + i, val + i + 1); + } + + if (num & 1) { + xor_acc = __rte_thash_gfni(mtrx, tuple[i], NULL, len); + __rte_thash_xor_reduce(xor_acc, val + i, &val_zero); + } +} + #endif /* _GFNI_ */ #ifdef __cplusplus