3fb1ea032b
Use rte_bsf32 and fast bit unset operation to optimize the softrss computation. The following measurements shows improvement over the default softrss computation function. tuple lens old(cycles) new(cycles) 3 1225 337 9 3743 992 Signed-off-by: Yangchao Zhou <zhouyates@gmail.com> Reviewed-by: Vladimir Medvedkin <medvedkinv@gmail.com>
257 lines
6.9 KiB
C
257 lines
6.9 KiB
C
/*-
|
|
* BSD LICENSE
|
|
*
|
|
* Copyright(c) 2015 Vladimir Medvedkin <medvedkinv@gmail.com>
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
* * Neither the name of Intel Corporation nor the names of its
|
|
* contributors may be used to endorse or promote products derived
|
|
* from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#ifndef _RTE_THASH_H
|
|
#define _RTE_THASH_H
|
|
|
|
/**
|
|
* @file
|
|
*
|
|
* toeplitz hash functions.
|
|
*/
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
/**
|
|
* Software implementation of the Toeplitz hash function used by RSS.
|
|
* Can be used either for packet distribution on single queue NIC
|
|
* or for simulating of RSS computation on specific NIC (for example
|
|
* after GRE header decapsulating)
|
|
*/
|
|
|
|
#include <stdint.h>
|
|
#include <rte_byteorder.h>
|
|
#include <rte_ip.h>
|
|
#include <rte_common.h>
|
|
|
|
#if defined(RTE_ARCH_X86) || defined(RTE_MACHINE_CPUFLAG_NEON)
|
|
#include <rte_vect.h>
|
|
#endif
|
|
|
|
#ifdef RTE_ARCH_X86
|
|
/* Byte swap mask used for converting IPv6 address
|
|
* 4-byte chunks to CPU byte order
|
|
*/
|
|
static const __m128i rte_thash_ipv6_bswap_mask = {
|
|
0x0405060700010203ULL, 0x0C0D0E0F08090A0BULL};
|
|
#endif
|
|
|
|
/**
|
|
* length in dwords of input tuple to
|
|
* calculate hash of ipv4 header only
|
|
*/
|
|
#define RTE_THASH_V4_L3_LEN ((sizeof(struct rte_ipv4_tuple) - \
|
|
sizeof(((struct rte_ipv4_tuple *)0)->sctp_tag)) / 4)
|
|
|
|
/**
|
|
* length in dwords of input tuple to
|
|
* calculate hash of ipv4 header +
|
|
* transport header
|
|
*/
|
|
#define RTE_THASH_V4_L4_LEN ((sizeof(struct rte_ipv4_tuple)) / 4)
|
|
|
|
/**
|
|
* length in dwords of input tuple to
|
|
* calculate hash of ipv6 header only
|
|
*/
|
|
#define RTE_THASH_V6_L3_LEN ((sizeof(struct rte_ipv6_tuple) - \
|
|
sizeof(((struct rte_ipv6_tuple *)0)->sctp_tag)) / 4)
|
|
|
|
/**
|
|
* length in dwords of input tuple to
|
|
* calculate hash of ipv6 header +
|
|
* transport header
|
|
*/
|
|
#define RTE_THASH_V6_L4_LEN ((sizeof(struct rte_ipv6_tuple)) / 4)
|
|
|
|
/**
|
|
* IPv4 tuple
|
|
* addresses and ports/sctp_tag have to be CPU byte order
|
|
*/
|
|
struct rte_ipv4_tuple {
|
|
uint32_t src_addr;
|
|
uint32_t dst_addr;
|
|
RTE_STD_C11
|
|
union {
|
|
struct {
|
|
uint16_t dport;
|
|
uint16_t sport;
|
|
};
|
|
uint32_t sctp_tag;
|
|
};
|
|
};
|
|
|
|
/**
|
|
* IPv6 tuple
|
|
* Addresses have to be filled by rte_thash_load_v6_addr()
|
|
* ports/sctp_tag have to be CPU byte order
|
|
*/
|
|
struct rte_ipv6_tuple {
|
|
uint8_t src_addr[16];
|
|
uint8_t dst_addr[16];
|
|
RTE_STD_C11
|
|
union {
|
|
struct {
|
|
uint16_t dport;
|
|
uint16_t sport;
|
|
};
|
|
uint32_t sctp_tag;
|
|
};
|
|
};
|
|
|
|
union rte_thash_tuple {
|
|
struct rte_ipv4_tuple v4;
|
|
struct rte_ipv6_tuple v6;
|
|
#ifdef RTE_ARCH_X86
|
|
} __attribute__((aligned(XMM_SIZE)));
|
|
#else
|
|
};
|
|
#endif
|
|
|
|
/**
|
|
* Prepare special converted key to use with rte_softrss_be()
|
|
* @param orig
|
|
* pointer to original RSS key
|
|
* @param targ
|
|
* pointer to target RSS key
|
|
* @param len
|
|
* RSS key length
|
|
*/
|
|
static inline void
|
|
rte_convert_rss_key(const uint32_t *orig, uint32_t *targ, int len)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < (len >> 2); i++)
|
|
targ[i] = rte_be_to_cpu_32(orig[i]);
|
|
}
|
|
|
|
/**
|
|
* Prepare and load IPv6 addresses (src and dst)
|
|
* into target tuple
|
|
* @param orig
|
|
* Pointer to ipv6 header of the original packet
|
|
* @param targ
|
|
* Pointer to rte_ipv6_tuple structure
|
|
*/
|
|
static inline void
|
|
rte_thash_load_v6_addrs(const struct ipv6_hdr *orig, union rte_thash_tuple *targ)
|
|
{
|
|
#ifdef RTE_ARCH_X86
|
|
__m128i ipv6 = _mm_loadu_si128((const __m128i *)orig->src_addr);
|
|
*(__m128i *)targ->v6.src_addr =
|
|
_mm_shuffle_epi8(ipv6, rte_thash_ipv6_bswap_mask);
|
|
ipv6 = _mm_loadu_si128((const __m128i *)orig->dst_addr);
|
|
*(__m128i *)targ->v6.dst_addr =
|
|
_mm_shuffle_epi8(ipv6, rte_thash_ipv6_bswap_mask);
|
|
#elif defined(RTE_MACHINE_CPUFLAG_NEON)
|
|
uint8x16_t ipv6 = vld1q_u8((uint8_t const *)orig->src_addr);
|
|
vst1q_u8((uint8_t *)targ->v6.src_addr, vrev32q_u8(ipv6));
|
|
ipv6 = vld1q_u8((uint8_t const *)orig->dst_addr);
|
|
vst1q_u8((uint8_t *)targ->v6.dst_addr, vrev32q_u8(ipv6));
|
|
#else
|
|
int i;
|
|
for (i = 0; i < 4; i++) {
|
|
*((uint32_t *)targ->v6.src_addr + i) =
|
|
rte_be_to_cpu_32(*((const uint32_t *)orig->src_addr + i));
|
|
*((uint32_t *)targ->v6.dst_addr + i) =
|
|
rte_be_to_cpu_32(*((const uint32_t *)orig->dst_addr + i));
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/**
|
|
* Generic implementation. Can be used with original rss_key
|
|
* @param input_tuple
|
|
* Pointer to input tuple
|
|
* @param input_len
|
|
* Length of input_tuple in 4-bytes chunks
|
|
* @param rss_key
|
|
* Pointer to RSS hash key.
|
|
* @return
|
|
* Calculated hash value.
|
|
*/
|
|
static inline uint32_t
|
|
rte_softrss(uint32_t *input_tuple, uint32_t input_len,
|
|
const uint8_t *rss_key)
|
|
{
|
|
uint32_t i, j, map, ret = 0;
|
|
|
|
for (j = 0; j < input_len; j++) {
|
|
for (map = input_tuple[j]; map; map &= (map - 1)) {
|
|
i = rte_bsf32(map);
|
|
ret ^= rte_cpu_to_be_32(((const uint32_t *)rss_key)[j]) << (31 - i) |
|
|
(uint32_t)((uint64_t)(rte_cpu_to_be_32(((const uint32_t *)rss_key)[j + 1])) >>
|
|
(i + 1));
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* Optimized implementation.
|
|
* If you want the calculated hash value matches NIC RSS value
|
|
* you have to use special converted key with rte_convert_rss_key() fn.
|
|
* @param input_tuple
|
|
* Pointer to input tuple
|
|
* @param input_len
|
|
* Length of input_tuple in 4-bytes chunks
|
|
* @param *rss_key
|
|
* Pointer to RSS hash key.
|
|
* @return
|
|
* Calculated hash value.
|
|
*/
|
|
static inline uint32_t
|
|
rte_softrss_be(uint32_t *input_tuple, uint32_t input_len,
|
|
const uint8_t *rss_key)
|
|
{
|
|
uint32_t i, j, map, ret = 0;
|
|
|
|
for (j = 0; j < input_len; j++) {
|
|
for (map = input_tuple[j]; map; map &= (map - 1)) {
|
|
i = rte_bsf32(map);
|
|
ret ^= ((const uint32_t *)rss_key)[j] << (31 - i) |
|
|
(uint32_t)((uint64_t)(((const uint32_t *)rss_key)[j + 1]) >> (i + 1));
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#endif /* _RTE_THASH_H */
|