99a2dd955f
There is no reason for the DPDK libraries to all have 'librte_' prefix on the directory names. This prefix makes the directory names longer and also makes it awkward to add features referring to individual libraries in the build - should the lib names be specified with or without the prefix. Therefore, we can just remove the library prefix and use the library's unique name as the directory name, i.e. 'eal' rather than 'librte_eal' Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
533 lines
13 KiB
C
533 lines
13 KiB
C
/* SPDX-License-Identifier: BSD-3-Clause
|
|
* Copyright(c) 2010-2014 Intel Corporation
|
|
*/
|
|
|
|
#include <rte_acl.h>
|
|
#include "acl.h"
|
|
|
|
#define QRANGE_MIN ((uint8_t)INT8_MIN)
|
|
|
|
#define RTE_ACL_VERIFY(exp) do { \
|
|
if (!(exp)) \
|
|
rte_panic("line %d\tassert \"" #exp "\" failed\n", __LINE__); \
|
|
} while (0)
|
|
|
|
struct acl_node_counters {
|
|
int32_t match;
|
|
int32_t match_used;
|
|
int32_t single;
|
|
int32_t quad;
|
|
int32_t quad_vectors;
|
|
int32_t dfa;
|
|
int32_t dfa_gr64;
|
|
};
|
|
|
|
struct rte_acl_indices {
|
|
int32_t dfa_index;
|
|
int32_t quad_index;
|
|
int32_t single_index;
|
|
int32_t match_index;
|
|
int32_t match_start;
|
|
};
|
|
|
|
static void
|
|
acl_gen_log_stats(const struct rte_acl_ctx *ctx,
|
|
const struct acl_node_counters *counts,
|
|
const struct rte_acl_indices *indices,
|
|
size_t max_size)
|
|
{
|
|
RTE_LOG(DEBUG, ACL, "Gen phase for ACL \"%s\":\n"
|
|
"runtime memory footprint on socket %d:\n"
|
|
"single nodes/bytes used: %d/%zu\n"
|
|
"quad nodes/vectors/bytes used: %d/%d/%zu\n"
|
|
"DFA nodes/group64/bytes used: %d/%d/%zu\n"
|
|
"match nodes/bytes used: %d/%zu\n"
|
|
"total: %zu bytes\n"
|
|
"max limit: %zu bytes\n",
|
|
ctx->name, ctx->socket_id,
|
|
counts->single, counts->single * sizeof(uint64_t),
|
|
counts->quad, counts->quad_vectors,
|
|
(indices->quad_index - indices->dfa_index) * sizeof(uint64_t),
|
|
counts->dfa, counts->dfa_gr64,
|
|
indices->dfa_index * sizeof(uint64_t),
|
|
counts->match,
|
|
counts->match * sizeof(struct rte_acl_match_results),
|
|
ctx->mem_sz,
|
|
max_size);
|
|
}
|
|
|
|
static uint64_t
|
|
acl_dfa_gen_idx(const struct rte_acl_node *node, uint32_t index)
|
|
{
|
|
uint64_t idx;
|
|
uint32_t i;
|
|
|
|
idx = 0;
|
|
for (i = 0; i != RTE_DIM(node->dfa_gr64); i++) {
|
|
RTE_ACL_VERIFY(node->dfa_gr64[i] < RTE_ACL_DFA_GR64_NUM);
|
|
RTE_ACL_VERIFY(node->dfa_gr64[i] < node->fanout);
|
|
idx |= (i - node->dfa_gr64[i]) <<
|
|
(6 + RTE_ACL_DFA_GR64_BIT * i);
|
|
}
|
|
|
|
return idx << (CHAR_BIT * sizeof(index)) | index | node->node_type;
|
|
}
|
|
|
|
static void
|
|
acl_dfa_fill_gr64(const struct rte_acl_node *node,
|
|
const uint64_t src[RTE_ACL_DFA_SIZE], uint64_t dst[RTE_ACL_DFA_SIZE])
|
|
{
|
|
uint32_t i;
|
|
|
|
for (i = 0; i != RTE_DIM(node->dfa_gr64); i++) {
|
|
memcpy(dst + node->dfa_gr64[i] * RTE_ACL_DFA_GR64_SIZE,
|
|
src + i * RTE_ACL_DFA_GR64_SIZE,
|
|
RTE_ACL_DFA_GR64_SIZE * sizeof(dst[0]));
|
|
}
|
|
}
|
|
|
|
static uint32_t
|
|
acl_dfa_count_gr64(const uint64_t array_ptr[RTE_ACL_DFA_SIZE],
|
|
uint8_t gr64[RTE_ACL_DFA_GR64_NUM])
|
|
{
|
|
uint32_t i, j, k;
|
|
|
|
k = 0;
|
|
for (i = 0; i != RTE_ACL_DFA_GR64_NUM; i++) {
|
|
gr64[i] = i;
|
|
for (j = 0; j != i; j++) {
|
|
if (memcmp(array_ptr + i * RTE_ACL_DFA_GR64_SIZE,
|
|
array_ptr + j * RTE_ACL_DFA_GR64_SIZE,
|
|
RTE_ACL_DFA_GR64_SIZE *
|
|
sizeof(array_ptr[0])) == 0)
|
|
break;
|
|
}
|
|
gr64[i] = (j != i) ? gr64[j] : k++;
|
|
}
|
|
|
|
return k;
|
|
}
|
|
|
|
static uint32_t
|
|
acl_node_fill_dfa(const struct rte_acl_node *node,
|
|
uint64_t dfa[RTE_ACL_DFA_SIZE], uint64_t no_match, int32_t resolved)
|
|
{
|
|
uint32_t n, x;
|
|
uint32_t ranges, last_bit;
|
|
struct rte_acl_node *child;
|
|
struct rte_acl_bitset *bits;
|
|
|
|
ranges = 0;
|
|
last_bit = 0;
|
|
|
|
for (n = 0; n < RTE_ACL_DFA_SIZE; n++)
|
|
dfa[n] = no_match;
|
|
|
|
for (x = 0; x < node->num_ptrs; x++) {
|
|
|
|
child = node->ptrs[x].ptr;
|
|
if (child == NULL)
|
|
continue;
|
|
|
|
bits = &node->ptrs[x].values;
|
|
for (n = 0; n < RTE_ACL_DFA_SIZE; n++) {
|
|
|
|
if (bits->bits[n / (sizeof(bits_t) * CHAR_BIT)] &
|
|
(1U << (n % (sizeof(bits_t) * CHAR_BIT)))) {
|
|
|
|
dfa[n] = resolved ? child->node_index : x;
|
|
ranges += (last_bit == 0);
|
|
last_bit = 1;
|
|
} else {
|
|
last_bit = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
return ranges;
|
|
}
|
|
|
|
/*
|
|
* Counts the number of groups of sequential bits that are
|
|
* either 0 or 1, as specified by the zero_one parameter. This is used to
|
|
* calculate the number of ranges in a node to see if it fits in a quad range
|
|
* node.
|
|
*/
|
|
static int
|
|
acl_count_sequential_groups(struct rte_acl_bitset *bits, int zero_one)
|
|
{
|
|
int n, ranges, last_bit;
|
|
|
|
ranges = 0;
|
|
last_bit = zero_one ^ 1;
|
|
|
|
for (n = QRANGE_MIN; n < UINT8_MAX + 1; n++) {
|
|
if (bits->bits[n / (sizeof(bits_t) * 8)] &
|
|
(1U << (n % (sizeof(bits_t) * 8)))) {
|
|
if (zero_one == 1 && last_bit != 1)
|
|
ranges++;
|
|
last_bit = 1;
|
|
} else {
|
|
if (zero_one == 0 && last_bit != 0)
|
|
ranges++;
|
|
last_bit = 0;
|
|
}
|
|
}
|
|
for (n = 0; n < QRANGE_MIN; n++) {
|
|
if (bits->bits[n / (sizeof(bits_t) * 8)] &
|
|
(1U << (n % (sizeof(bits_t) * CHAR_BIT)))) {
|
|
if (zero_one == 1 && last_bit != 1)
|
|
ranges++;
|
|
last_bit = 1;
|
|
} else {
|
|
if (zero_one == 0 && last_bit != 0)
|
|
ranges++;
|
|
last_bit = 0;
|
|
}
|
|
}
|
|
|
|
return ranges;
|
|
}
|
|
|
|
/*
|
|
* Count number of ranges spanned by the node's pointers
|
|
*/
|
|
static int
|
|
acl_count_fanout(struct rte_acl_node *node)
|
|
{
|
|
uint32_t n;
|
|
int ranges;
|
|
|
|
if (node->fanout != 0)
|
|
return node->fanout;
|
|
|
|
ranges = acl_count_sequential_groups(&node->values, 0);
|
|
|
|
for (n = 0; n < node->num_ptrs; n++) {
|
|
if (node->ptrs[n].ptr != NULL)
|
|
ranges += acl_count_sequential_groups(
|
|
&node->ptrs[n].values, 1);
|
|
}
|
|
|
|
node->fanout = ranges;
|
|
return node->fanout;
|
|
}
|
|
|
|
/*
|
|
* Determine the type of nodes and count each type
|
|
*/
|
|
static void
|
|
acl_count_trie_types(struct acl_node_counters *counts,
|
|
struct rte_acl_node *node, uint64_t no_match, int force_dfa)
|
|
{
|
|
uint32_t n;
|
|
int num_ptrs;
|
|
uint64_t dfa[RTE_ACL_DFA_SIZE];
|
|
|
|
/* skip if this node has been counted */
|
|
if (node->node_type != (uint32_t)RTE_ACL_NODE_UNDEFINED)
|
|
return;
|
|
|
|
if (node->match_flag != 0 || node->num_ptrs == 0) {
|
|
counts->match++;
|
|
node->node_type = RTE_ACL_NODE_MATCH;
|
|
return;
|
|
}
|
|
|
|
num_ptrs = acl_count_fanout(node);
|
|
|
|
/* Force type to dfa */
|
|
if (force_dfa)
|
|
num_ptrs = RTE_ACL_DFA_SIZE;
|
|
|
|
/* determine node type based on number of ranges */
|
|
if (num_ptrs == 1) {
|
|
counts->single++;
|
|
node->node_type = RTE_ACL_NODE_SINGLE;
|
|
} else if (num_ptrs <= RTE_ACL_QUAD_MAX) {
|
|
counts->quad++;
|
|
counts->quad_vectors += node->fanout;
|
|
node->node_type = RTE_ACL_NODE_QRANGE;
|
|
} else {
|
|
counts->dfa++;
|
|
node->node_type = RTE_ACL_NODE_DFA;
|
|
if (force_dfa != 0) {
|
|
/* always expand to a max number of nodes. */
|
|
for (n = 0; n != RTE_DIM(node->dfa_gr64); n++)
|
|
node->dfa_gr64[n] = n;
|
|
node->fanout = n;
|
|
} else {
|
|
acl_node_fill_dfa(node, dfa, no_match, 0);
|
|
node->fanout = acl_dfa_count_gr64(dfa, node->dfa_gr64);
|
|
}
|
|
counts->dfa_gr64 += node->fanout;
|
|
}
|
|
|
|
/*
|
|
* recursively count the types of all children
|
|
*/
|
|
for (n = 0; n < node->num_ptrs; n++) {
|
|
if (node->ptrs[n].ptr != NULL)
|
|
acl_count_trie_types(counts, node->ptrs[n].ptr,
|
|
no_match, 0);
|
|
}
|
|
}
|
|
|
|
static void
|
|
acl_add_ptrs(struct rte_acl_node *node, uint64_t *node_array, uint64_t no_match,
|
|
int resolved)
|
|
{
|
|
uint32_t x;
|
|
int32_t m;
|
|
uint64_t *node_a, index, dfa[RTE_ACL_DFA_SIZE];
|
|
|
|
acl_node_fill_dfa(node, dfa, no_match, resolved);
|
|
|
|
/*
|
|
* Rather than going from 0 to 256, the range count and
|
|
* the layout are from 80-ff then 0-7f due to signed compare
|
|
* for SSE (cmpgt).
|
|
*/
|
|
if (node->node_type == RTE_ACL_NODE_QRANGE) {
|
|
|
|
m = 0;
|
|
node_a = node_array;
|
|
index = dfa[QRANGE_MIN];
|
|
*node_a++ = index;
|
|
|
|
for (x = QRANGE_MIN + 1; x < UINT8_MAX + 1; x++) {
|
|
if (dfa[x] != index) {
|
|
index = dfa[x];
|
|
*node_a++ = index;
|
|
node->transitions[m++] = (uint8_t)(x - 1);
|
|
}
|
|
}
|
|
|
|
for (x = 0; x < INT8_MAX + 1; x++) {
|
|
if (dfa[x] != index) {
|
|
index = dfa[x];
|
|
*node_a++ = index;
|
|
node->transitions[m++] = (uint8_t)(x - 1);
|
|
}
|
|
}
|
|
|
|
/* fill unused locations with max value - nothing is greater */
|
|
for (; m < RTE_ACL_QUAD_SIZE; m++)
|
|
node->transitions[m] = INT8_MAX;
|
|
|
|
RTE_ACL_VERIFY(m <= RTE_ACL_QUAD_SIZE);
|
|
|
|
} else if (node->node_type == RTE_ACL_NODE_DFA && resolved) {
|
|
acl_dfa_fill_gr64(node, dfa, node_array);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Routine that allocates space for this node and recursively calls
|
|
* to allocate space for each child. Once all the children are allocated,
|
|
* then resolve all transitions for this node.
|
|
*/
|
|
static void
|
|
acl_gen_node(struct rte_acl_node *node, uint64_t *node_array,
|
|
uint64_t no_match, struct rte_acl_indices *index, int num_categories)
|
|
{
|
|
uint32_t n, sz, *qtrp;
|
|
uint64_t *array_ptr;
|
|
struct rte_acl_match_results *match;
|
|
|
|
if (node->node_index != RTE_ACL_NODE_UNDEFINED)
|
|
return;
|
|
|
|
array_ptr = NULL;
|
|
|
|
switch (node->node_type) {
|
|
case RTE_ACL_NODE_DFA:
|
|
array_ptr = &node_array[index->dfa_index];
|
|
node->node_index = acl_dfa_gen_idx(node, index->dfa_index);
|
|
sz = node->fanout * RTE_ACL_DFA_GR64_SIZE;
|
|
index->dfa_index += sz;
|
|
for (n = 0; n < sz; n++)
|
|
array_ptr[n] = no_match;
|
|
break;
|
|
case RTE_ACL_NODE_SINGLE:
|
|
node->node_index = RTE_ACL_QUAD_SINGLE | index->single_index |
|
|
node->node_type;
|
|
array_ptr = &node_array[index->single_index];
|
|
index->single_index += 1;
|
|
array_ptr[0] = no_match;
|
|
break;
|
|
case RTE_ACL_NODE_QRANGE:
|
|
array_ptr = &node_array[index->quad_index];
|
|
acl_add_ptrs(node, array_ptr, no_match, 0);
|
|
qtrp = (uint32_t *)node->transitions;
|
|
node->node_index = qtrp[0];
|
|
node->node_index <<= sizeof(index->quad_index) * CHAR_BIT;
|
|
node->node_index |= index->quad_index | node->node_type;
|
|
index->quad_index += node->fanout;
|
|
break;
|
|
case RTE_ACL_NODE_MATCH:
|
|
match = ((struct rte_acl_match_results *)
|
|
(node_array + index->match_start));
|
|
for (n = 0; n != RTE_DIM(match->results); n++)
|
|
RTE_ACL_VERIFY(match->results[0] == 0);
|
|
memcpy(match + index->match_index, node->mrt,
|
|
sizeof(*node->mrt));
|
|
node->node_index = index->match_index | node->node_type;
|
|
index->match_index += 1;
|
|
break;
|
|
case RTE_ACL_NODE_UNDEFINED:
|
|
RTE_ACL_VERIFY(node->node_type !=
|
|
(uint32_t)RTE_ACL_NODE_UNDEFINED);
|
|
break;
|
|
}
|
|
|
|
/* recursively allocate space for all children */
|
|
for (n = 0; n < node->num_ptrs; n++) {
|
|
if (node->ptrs[n].ptr != NULL)
|
|
acl_gen_node(node->ptrs[n].ptr,
|
|
node_array,
|
|
no_match,
|
|
index,
|
|
num_categories);
|
|
}
|
|
|
|
/* All children are resolved, resolve this node's pointers */
|
|
switch (node->node_type) {
|
|
case RTE_ACL_NODE_DFA:
|
|
acl_add_ptrs(node, array_ptr, no_match, 1);
|
|
break;
|
|
case RTE_ACL_NODE_SINGLE:
|
|
for (n = 0; n < node->num_ptrs; n++) {
|
|
if (node->ptrs[n].ptr != NULL)
|
|
array_ptr[0] = node->ptrs[n].ptr->node_index;
|
|
}
|
|
break;
|
|
case RTE_ACL_NODE_QRANGE:
|
|
acl_add_ptrs(node, array_ptr, no_match, 1);
|
|
break;
|
|
case RTE_ACL_NODE_MATCH:
|
|
break;
|
|
case RTE_ACL_NODE_UNDEFINED:
|
|
RTE_ACL_VERIFY(node->node_type !=
|
|
(uint32_t)RTE_ACL_NODE_UNDEFINED);
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void
|
|
acl_calc_counts_indices(struct acl_node_counters *counts,
|
|
struct rte_acl_indices *indices,
|
|
struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries,
|
|
uint64_t no_match)
|
|
{
|
|
uint32_t n;
|
|
|
|
memset(indices, 0, sizeof(*indices));
|
|
memset(counts, 0, sizeof(*counts));
|
|
|
|
/* Get stats on nodes */
|
|
for (n = 0; n < num_tries; n++) {
|
|
acl_count_trie_types(counts, node_bld_trie[n].trie,
|
|
no_match, 1);
|
|
}
|
|
|
|
indices->dfa_index = RTE_ACL_DFA_SIZE + 1;
|
|
indices->quad_index = indices->dfa_index +
|
|
counts->dfa_gr64 * RTE_ACL_DFA_GR64_SIZE;
|
|
indices->single_index = indices->quad_index + counts->quad_vectors;
|
|
indices->match_start = indices->single_index + counts->single + 1;
|
|
indices->match_start = RTE_ALIGN(indices->match_start,
|
|
(XMM_SIZE / sizeof(uint64_t)));
|
|
indices->match_index = 1;
|
|
}
|
|
|
|
/*
|
|
* Generate the runtime structure using build structure
|
|
*/
|
|
int
|
|
rte_acl_gen(struct rte_acl_ctx *ctx, struct rte_acl_trie *trie,
|
|
struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries,
|
|
uint32_t num_categories, uint32_t data_index_sz, size_t max_size)
|
|
{
|
|
void *mem;
|
|
size_t total_size;
|
|
uint64_t *node_array, no_match;
|
|
uint32_t n, match_index;
|
|
struct rte_acl_match_results *match;
|
|
struct acl_node_counters counts;
|
|
struct rte_acl_indices indices;
|
|
|
|
no_match = RTE_ACL_NODE_MATCH;
|
|
|
|
/* Fill counts and indices arrays from the nodes. */
|
|
acl_calc_counts_indices(&counts, &indices,
|
|
node_bld_trie, num_tries, no_match);
|
|
|
|
/* Allocate runtime memory (align to cache boundary) */
|
|
total_size = RTE_ALIGN(data_index_sz, RTE_CACHE_LINE_SIZE) +
|
|
indices.match_start * sizeof(uint64_t) +
|
|
(counts.match + 1) * sizeof(struct rte_acl_match_results) +
|
|
XMM_SIZE;
|
|
|
|
if (total_size > max_size) {
|
|
RTE_LOG(DEBUG, ACL,
|
|
"Gen phase for ACL ctx \"%s\" exceeds max_size limit, "
|
|
"bytes required: %zu, allowed: %zu\n",
|
|
ctx->name, total_size, max_size);
|
|
return -ERANGE;
|
|
}
|
|
|
|
mem = rte_zmalloc_socket(ctx->name, total_size, RTE_CACHE_LINE_SIZE,
|
|
ctx->socket_id);
|
|
if (mem == NULL) {
|
|
RTE_LOG(ERR, ACL,
|
|
"allocation of %zu bytes on socket %d for %s failed\n",
|
|
total_size, ctx->socket_id, ctx->name);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
/* Fill the runtime structure */
|
|
match_index = indices.match_start;
|
|
node_array = (uint64_t *)((uintptr_t)mem +
|
|
RTE_ALIGN(data_index_sz, RTE_CACHE_LINE_SIZE));
|
|
|
|
/*
|
|
* Setup the NOMATCH node (a SINGLE at the
|
|
* highest index, that points to itself)
|
|
*/
|
|
|
|
node_array[RTE_ACL_DFA_SIZE] = RTE_ACL_IDLE_NODE;
|
|
|
|
for (n = 0; n < RTE_ACL_DFA_SIZE; n++)
|
|
node_array[n] = no_match;
|
|
|
|
/* NOMATCH result at index 0 */
|
|
match = ((struct rte_acl_match_results *)(node_array + match_index));
|
|
memset(match, 0, sizeof(*match));
|
|
|
|
for (n = 0; n < num_tries; n++) {
|
|
|
|
acl_gen_node(node_bld_trie[n].trie, node_array, no_match,
|
|
&indices, num_categories);
|
|
|
|
if (node_bld_trie[n].trie->node_index == no_match)
|
|
trie[n].root_index = 0;
|
|
else
|
|
trie[n].root_index = node_bld_trie[n].trie->node_index;
|
|
}
|
|
|
|
ctx->mem = mem;
|
|
ctx->mem_sz = total_size;
|
|
ctx->data_indexes = mem;
|
|
ctx->num_tries = num_tries;
|
|
ctx->num_categories = num_categories;
|
|
ctx->match_index = match_index;
|
|
ctx->no_match = no_match;
|
|
ctx->idle = node_array[RTE_ACL_DFA_SIZE];
|
|
ctx->trans_table = node_array;
|
|
memcpy(ctx->trie, trie, sizeof(ctx->trie));
|
|
|
|
acl_gen_log_stats(ctx, &counts, &indices, max_size);
|
|
return 0;
|
|
}
|