lpm6: store rules in hash table

Rework the lpm6 rule subsystem and replace
current rules algorithm complexity O(n)
with hashtables which allow dealing with
large (50k) rule sets.

Signed-off-by: Alex Kiselev <alex@therouter.net>
Acked-by: Bruce Richardson <bruce.richardson@intel.com>
This commit is contained in:
Alex Kiselev 2018-09-18 13:57:09 +03:00 committed by Thomas Monjalon
parent 3139d3bed7
commit 86b3b21952
4 changed files with 210 additions and 163 deletions

View File

@ -50,7 +50,7 @@ DEPDIRS-librte_hash := librte_eal librte_ring
DIRS-$(CONFIG_RTE_LIBRTE_EFD) += librte_efd
DEPDIRS-librte_efd := librte_eal librte_ring librte_hash
DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm
DEPDIRS-librte_lpm := librte_eal
DEPDIRS-librte_lpm := librte_eal librte_hash
DIRS-$(CONFIG_RTE_LIBRTE_ACL) += librte_acl
DEPDIRS-librte_acl := librte_eal
DIRS-$(CONFIG_RTE_LIBRTE_MEMBER) += librte_member

View File

@ -8,7 +8,7 @@ LIB = librte_lpm.a
CFLAGS += -O3
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
LDLIBS += -lrte_eal
LDLIBS += -lrte_eal -lrte_hash
EXPORT_MAP := rte_lpm_version.map

View File

@ -7,3 +7,4 @@ headers = files('rte_lpm.h', 'rte_lpm6.h')
# since header files have different names, we can install all vector headers
# without worrying about which architecture we actually need
headers += files('rte_lpm_altivec.h', 'rte_lpm_neon.h', 'rte_lpm_sse.h')
deps += ['hash']

View File

@ -21,6 +21,9 @@
#include <rte_errno.h>
#include <rte_rwlock.h>
#include <rte_spinlock.h>
#include <rte_hash.h>
#include <assert.h>
#include <rte_jhash.h>
#include "rte_lpm6.h"
@ -37,6 +40,8 @@
#define BYTE_SIZE 8
#define BYTES2_SIZE 16
#define RULE_HASH_TABLE_EXTRA_SPACE 64
#define lpm6_tbl8_gindex next_hop
/** Flags for setting an entry as valid/invalid. */
@ -70,6 +75,12 @@ struct rte_lpm6_rule {
uint8_t depth; /**< Rule depth. */
};
/** Rules tbl entry key. */
struct rte_lpm6_rule_key {
uint8_t ip[RTE_LPM6_IPV6_ADDR_SIZE]; /**< Rule IP address. */
uint8_t depth; /**< Rule depth. */
};
/** LPM6 structure. */
struct rte_lpm6 {
/* LPM metadata. */
@ -80,7 +91,7 @@ struct rte_lpm6 {
uint32_t next_tbl8; /**< Next tbl8 to be used. */
/* LPM Tables. */
struct rte_lpm6_rule *rules_tbl; /**< LPM rules. */
struct rte_hash *rules_tbl; /**< LPM rules. */
struct rte_lpm6_tbl_entry tbl24[RTE_LPM6_TBL24_NUM_ENTRIES]
__rte_cache_aligned; /**< LPM tbl24 table. */
struct rte_lpm6_tbl_entry tbl8[0]
@ -93,22 +104,69 @@ struct rte_lpm6 {
* and set the rest to 0.
*/
static inline void
mask_ip(uint8_t *ip, uint8_t depth)
ip6_mask_addr(uint8_t *ip, uint8_t depth)
{
int16_t part_depth, mask;
int i;
int16_t part_depth, mask;
int i;
part_depth = depth;
part_depth = depth;
for (i = 0; i < RTE_LPM6_IPV6_ADDR_SIZE; i++) {
if (part_depth < BYTE_SIZE && part_depth >= 0) {
mask = (uint16_t)(~(UINT8_MAX >> part_depth));
ip[i] = (uint8_t)(ip[i] & mask);
} else if (part_depth < 0) {
ip[i] = 0;
}
part_depth -= BYTE_SIZE;
}
for (i = 0; i < RTE_LPM6_IPV6_ADDR_SIZE; i++) {
if (part_depth < BYTE_SIZE && part_depth >= 0) {
mask = (uint16_t)(~(UINT8_MAX >> part_depth));
ip[i] = (uint8_t)(ip[i] & mask);
} else if (part_depth < 0)
ip[i] = 0;
part_depth -= BYTE_SIZE;
}
}
/* copy ipv6 address */
static inline void
ip6_copy_addr(uint8_t *dst, const uint8_t *src)
{
rte_memcpy(dst, src, RTE_LPM6_IPV6_ADDR_SIZE);
}
/*
* LPM6 rule hash function
*
* It's used as a hash function for the rte_hash
* containing rules
*/
static inline uint32_t
rule_hash(const void *data, __rte_unused uint32_t data_len,
uint32_t init_val)
{
return rte_jhash(data, sizeof(struct rte_lpm6_rule_key), init_val);
}
/*
* Init a rule key.
* note that ip must be already masked
*/
static inline void
rule_key_init(struct rte_lpm6_rule_key *key, uint8_t *ip, uint8_t depth)
{
ip6_copy_addr(key->ip, ip);
key->depth = depth;
}
/*
* Rebuild the entire LPM tree by reinserting all rules
*/
static void
rebuild_lpm(struct rte_lpm6 *lpm)
{
uint64_t next_hop;
struct rte_lpm6_rule_key *rule_key;
uint32_t iter = 0;
while (rte_hash_iterate(lpm->rules_tbl, (void *) &rule_key,
(void **) &next_hop, &iter) >= 0)
rte_lpm6_add(lpm, rule_key->ip, rule_key->depth,
(uint32_t) next_hop);
}
/*
@ -121,8 +179,9 @@ rte_lpm6_create(const char *name, int socket_id,
char mem_name[RTE_LPM6_NAMESIZE];
struct rte_lpm6 *lpm = NULL;
struct rte_tailq_entry *te;
uint64_t mem_size, rules_size;
uint64_t mem_size;
struct rte_lpm6_list *lpm_list;
struct rte_hash *rules_tbl = NULL;
lpm_list = RTE_TAILQ_CAST(rte_lpm6_tailq.head, rte_lpm6_list);
@ -136,12 +195,32 @@ rte_lpm6_create(const char *name, int socket_id,
return NULL;
}
/* create rules hash table */
snprintf(mem_name, sizeof(mem_name), "LRH_%s", name);
struct rte_hash_parameters rule_hash_tbl_params = {
.entries = config->max_rules * 1.2 +
RULE_HASH_TABLE_EXTRA_SPACE,
.key_len = sizeof(struct rte_lpm6_rule_key),
.hash_func = rule_hash,
.hash_func_init_val = 0,
.name = mem_name,
.reserved = 0,
.socket_id = socket_id,
.extra_flag = 0
};
rules_tbl = rte_hash_create(&rule_hash_tbl_params);
if (rules_tbl == NULL) {
RTE_LOG(ERR, LPM, "LPM rules hash table allocation failed: %s (%d)",
rte_strerror(rte_errno), rte_errno);
goto fail_wo_unlock;
}
snprintf(mem_name, sizeof(mem_name), "LPM_%s", name);
/* Determine the amount of memory to allocate. */
mem_size = sizeof(*lpm) + (sizeof(lpm->tbl8[0]) *
RTE_LPM6_TBL8_GROUP_NUM_ENTRIES * config->number_tbl8s);
rules_size = sizeof(struct rte_lpm6_rule) * config->max_rules;
rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
@ -154,7 +233,7 @@ rte_lpm6_create(const char *name, int socket_id,
lpm = NULL;
if (te != NULL) {
rte_errno = EEXIST;
goto exit;
goto fail;
}
/* allocate tailq entry */
@ -162,7 +241,7 @@ rte_lpm6_create(const char *name, int socket_id,
if (te == NULL) {
RTE_LOG(ERR, LPM, "Failed to allocate tailq entry!\n");
rte_errno = ENOMEM;
goto exit;
goto fail;
}
/* Allocate memory to store the LPM data structures. */
@ -173,34 +252,28 @@ rte_lpm6_create(const char *name, int socket_id,
RTE_LOG(ERR, LPM, "LPM memory allocation failed\n");
rte_free(te);
rte_errno = ENOMEM;
goto exit;
}
lpm->rules_tbl = rte_zmalloc_socket(NULL,
(size_t)rules_size, RTE_CACHE_LINE_SIZE, socket_id);
if (lpm->rules_tbl == NULL) {
RTE_LOG(ERR, LPM, "LPM rules_tbl allocation failed\n");
rte_free(lpm);
lpm = NULL;
rte_free(te);
rte_errno = ENOMEM;
goto exit;
goto fail;
}
/* Save user arguments. */
lpm->max_rules = config->max_rules;
lpm->number_tbl8s = config->number_tbl8s;
snprintf(lpm->name, sizeof(lpm->name), "%s", name);
lpm->rules_tbl = rules_tbl;
te->data = (void *) lpm;
TAILQ_INSERT_TAIL(lpm_list, te, next);
rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
return lpm;
exit:
fail:
rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
return lpm;
fail_wo_unlock:
rte_hash_free(rules_tbl);
return NULL;
}
/*
@ -259,50 +332,86 @@ rte_lpm6_free(struct rte_lpm6 *lpm)
rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
rte_free(lpm->rules_tbl);
rte_hash_free(lpm->rules_tbl);
rte_free(lpm);
rte_free(te);
}
/* Find a rule */
static inline int
rule_find_with_key(struct rte_lpm6 *lpm,
const struct rte_lpm6_rule_key *rule_key,
uint32_t *next_hop)
{
uint64_t hash_val;
int ret;
/* lookup for a rule */
ret = rte_hash_lookup_data(lpm->rules_tbl, (const void *) rule_key,
(void **) &hash_val);
if (ret >= 0) {
*next_hop = (uint32_t) hash_val;
return 1;
}
return 0;
}
/* Find a rule */
static int
rule_find(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
uint32_t *next_hop)
{
struct rte_lpm6_rule_key rule_key;
/* init a rule key */
rule_key_init(&rule_key, ip, depth);
return rule_find_with_key(lpm, &rule_key, next_hop);
}
/*
* Checks if a rule already exists in the rules table and updates
* the nexthop if so. Otherwise it adds a new rule if enough space is available.
*
* Returns:
* 0 - next hop of existed rule is updated
* 1 - new rule successfully added
* <0 - error
*/
static inline int32_t
rule_add(struct rte_lpm6 *lpm, uint8_t *ip, uint32_t next_hop, uint8_t depth)
static inline int
rule_add(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth, uint32_t next_hop)
{
uint32_t rule_index;
int ret, rule_exist;
struct rte_lpm6_rule_key rule_key;
uint32_t unused;
/* init a rule key */
rule_key_init(&rule_key, ip, depth);
/* Scan through rule list to see if rule already exists. */
for (rule_index = 0; rule_index < lpm->used_rules; rule_index++) {
/* If rule already exists update its next_hop and return. */
if ((memcmp (lpm->rules_tbl[rule_index].ip, ip,
RTE_LPM6_IPV6_ADDR_SIZE) == 0) &&
lpm->rules_tbl[rule_index].depth == depth) {
lpm->rules_tbl[rule_index].next_hop = next_hop;
return rule_index;
}
}
rule_exist = rule_find_with_key(lpm, &rule_key, &unused);
/*
* If rule does not exist check if there is space to add a new rule to
* this rule group. If there is no space return error.
*/
if (lpm->used_rules == lpm->max_rules) {
if (!rule_exist && lpm->used_rules == lpm->max_rules)
return -ENOSPC;
}
/* If there is space for the new rule add it. */
rte_memcpy(lpm->rules_tbl[rule_index].ip, ip, RTE_LPM6_IPV6_ADDR_SIZE);
lpm->rules_tbl[rule_index].next_hop = next_hop;
lpm->rules_tbl[rule_index].depth = depth;
/* add the rule or update rules next hop */
ret = rte_hash_add_key_data(lpm->rules_tbl, &rule_key,
(void *)(uintptr_t) next_hop);
if (ret < 0)
return ret;
/* Increment the used rules counter for this rule group. */
lpm->used_rules++;
if (!rule_exist) {
lpm->used_rules++;
return 1;
}
return rule_index;
return 0;
}
/*
@ -492,7 +601,7 @@ rte_lpm6_add_v1705(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
{
struct rte_lpm6_tbl_entry *tbl;
struct rte_lpm6_tbl_entry *tbl_next = NULL;
int32_t rule_index;
int32_t ret;
int status;
uint8_t masked_ip[RTE_LPM6_IPV6_ADDR_SIZE];
int i;
@ -502,16 +611,14 @@ rte_lpm6_add_v1705(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
return -EINVAL;
/* Copy the IP and mask it to avoid modifying user's input data. */
memcpy(masked_ip, ip, RTE_LPM6_IPV6_ADDR_SIZE);
mask_ip(masked_ip, depth);
ip6_copy_addr(masked_ip, ip);
ip6_mask_addr(masked_ip, depth);
/* Add the rule to the rule table. */
rule_index = rule_add(lpm, masked_ip, next_hop, depth);
ret = rule_add(lpm, masked_ip, depth, next_hop);
/* If there is no space available for new rule return error. */
if (rule_index < 0) {
return rule_index;
}
if (ret < 0)
return ret;
/* Inspect the first three bytes through tbl24 on the first step. */
tbl = lpm->tbl24;
@ -724,30 +831,6 @@ MAP_STATIC_SYMBOL(int rte_lpm6_lookup_bulk_func(const struct rte_lpm6 *lpm,
int32_t *next_hops, unsigned int n),
rte_lpm6_lookup_bulk_func_v1705);
/*
* Finds a rule in rule table.
* NOTE: Valid range for depth parameter is 1 .. 128 inclusive.
*/
static inline int32_t
rule_find(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth)
{
uint32_t rule_index;
/* Scan used rules at given depth to find rule. */
for (rule_index = 0; rule_index < lpm->used_rules; rule_index++) {
/* If rule is found return the rule index. */
if ((memcmp (lpm->rules_tbl[rule_index].ip, ip,
RTE_LPM6_IPV6_ADDR_SIZE) == 0) &&
lpm->rules_tbl[rule_index].depth == depth) {
return rule_index;
}
}
/* If rule is not found return -ENOENT. */
return -ENOENT;
}
/*
* Look for a rule in the high-level rules table
*/
@ -775,8 +858,7 @@ int
rte_lpm6_is_rule_present_v1705(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
uint32_t *next_hop)
{
uint8_t ip_masked[RTE_LPM6_IPV6_ADDR_SIZE];
int32_t rule_index;
uint8_t masked_ip[RTE_LPM6_IPV6_ADDR_SIZE];
/* Check user arguments. */
if ((lpm == NULL) || next_hop == NULL || ip == NULL ||
@ -784,19 +866,10 @@ rte_lpm6_is_rule_present_v1705(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
return -EINVAL;
/* Copy the IP and mask it to avoid modifying user's input data. */
memcpy(ip_masked, ip, RTE_LPM6_IPV6_ADDR_SIZE);
mask_ip(ip_masked, depth);
ip6_copy_addr(masked_ip, ip);
ip6_mask_addr(masked_ip, depth);
/* Look for the rule using rule_find. */
rule_index = rule_find(lpm, ip_masked, depth);
if (rule_index >= 0) {
*next_hop = lpm->rules_tbl[rule_index].next_hop;
return 1;
}
/* If rule is not found return 0. */
return 0;
return rule_find(lpm, masked_ip, depth, next_hop);
}
BIND_DEFAULT_SYMBOL(rte_lpm6_is_rule_present, _v1705, 17.05);
MAP_STATIC_SYMBOL(int rte_lpm6_is_rule_present(struct rte_lpm6 *lpm,
@ -806,16 +879,25 @@ MAP_STATIC_SYMBOL(int rte_lpm6_is_rule_present(struct rte_lpm6 *lpm,
/*
* Delete a rule from the rule table.
* NOTE: Valid range for depth parameter is 1 .. 128 inclusive.
* return
* 0 on success
* <0 on failure
*/
static inline void
rule_delete(struct rte_lpm6 *lpm, int32_t rule_index)
static inline int
rule_delete(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth)
{
/*
* Overwrite redundant rule with last rule in group and decrement rule
* counter.
*/
lpm->rules_tbl[rule_index] = lpm->rules_tbl[lpm->used_rules-1];
lpm->used_rules--;
int ret;
struct rte_lpm6_rule_key rule_key;
/* init rule key */
rule_key_init(&rule_key, ip, depth);
/* delete the rule */
ret = rte_hash_del_key(lpm->rules_tbl, (void *) &rule_key);
if (ret >= 0)
lpm->used_rules--;
return ret;
}
/*
@ -824,9 +906,8 @@ rule_delete(struct rte_lpm6 *lpm, int32_t rule_index)
int
rte_lpm6_delete(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth)
{
int32_t rule_to_delete_index;
uint8_t ip_masked[RTE_LPM6_IPV6_ADDR_SIZE];
unsigned i;
uint8_t masked_ip[RTE_LPM6_IPV6_ADDR_SIZE];
int ret;
/*
* Check input arguments.
@ -836,24 +917,13 @@ rte_lpm6_delete(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth)
}
/* Copy the IP and mask it to avoid modifying user's input data. */
memcpy(ip_masked, ip, RTE_LPM6_IPV6_ADDR_SIZE);
mask_ip(ip_masked, depth);
/*
* Find the index of the input rule, that needs to be deleted, in the
* rule table.
*/
rule_to_delete_index = rule_find(lpm, ip_masked, depth);
/*
* Check if rule_to_delete_index was found. If no rule was found the
* function rule_find returns -ENOENT.
*/
if (rule_to_delete_index < 0)
return rule_to_delete_index;
ip6_copy_addr(masked_ip, ip);
ip6_mask_addr(masked_ip, depth);
/* Delete the rule from the rule table. */
rule_delete(lpm, rule_to_delete_index);
ret = rule_delete(lpm, masked_ip, depth);
if (ret < 0)
return -ENOENT;
/*
* Set all the table entries to 0 (ie delete every rule
@ -868,10 +938,7 @@ rte_lpm6_delete(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth)
* Add every rule again (except for the one that was removed from
* the rules table).
*/
for (i = 0; i < lpm->used_rules; i++) {
rte_lpm6_add(lpm, lpm->rules_tbl[i].ip, lpm->rules_tbl[i].depth,
lpm->rules_tbl[i].next_hop);
}
rebuild_lpm(lpm);
return 0;
}
@ -883,37 +950,19 @@ int
rte_lpm6_delete_bulk_func(struct rte_lpm6 *lpm,
uint8_t ips[][RTE_LPM6_IPV6_ADDR_SIZE], uint8_t *depths, unsigned n)
{
int32_t rule_to_delete_index;
uint8_t ip_masked[RTE_LPM6_IPV6_ADDR_SIZE];
uint8_t masked_ip[RTE_LPM6_IPV6_ADDR_SIZE];
unsigned i;
/*
* Check input arguments.
*/
if ((lpm == NULL) || (ips == NULL) || (depths == NULL)) {
if ((lpm == NULL) || (ips == NULL) || (depths == NULL))
return -EINVAL;
}
for (i = 0; i < n; i++) {
/* Copy the IP and mask it to avoid modifying user's input data. */
memcpy(ip_masked, ips[i], RTE_LPM6_IPV6_ADDR_SIZE);
mask_ip(ip_masked, depths[i]);
/*
* Find the index of the input rule, that needs to be deleted, in the
* rule table.
*/
rule_to_delete_index = rule_find(lpm, ip_masked, depths[i]);
/*
* Check if rule_to_delete_index was found. If no rule was found the
* function rule_find returns -ENOENT.
*/
if (rule_to_delete_index < 0)
continue;
/* Delete the rule from the rule table. */
rule_delete(lpm, rule_to_delete_index);
ip6_copy_addr(masked_ip, ips[i]);
ip6_mask_addr(masked_ip, depths[i]);
rule_delete(lpm, masked_ip, depths[i]);
}
/*
@ -929,10 +978,7 @@ rte_lpm6_delete_bulk_func(struct rte_lpm6 *lpm,
* Add every rule again (except for the ones that were removed from
* the rules table).
*/
for (i = 0; i < lpm->used_rules; i++) {
rte_lpm6_add(lpm, lpm->rules_tbl[i].ip, lpm->rules_tbl[i].depth,
lpm->rules_tbl[i].next_hop);
}
rebuild_lpm(lpm);
return 0;
}
@ -957,5 +1003,5 @@ rte_lpm6_delete_all(struct rte_lpm6 *lpm)
RTE_LPM6_TBL8_GROUP_NUM_ENTRIES * lpm->number_tbl8s);
/* Delete all rules form the rules table. */
memset(lpm->rules_tbl, 0, sizeof(struct rte_lpm6_rule) * lpm->max_rules);
rte_hash_reset(lpm->rules_tbl);
}