/* SPDX-License-Identifier: BSD-3-Clause * Copyright(c) 2010-2014 Intel Corporation */ #include #include "acl.h" #define QRANGE_MIN ((uint8_t)INT8_MIN) #define RTE_ACL_VERIFY(exp) do { \ if (!(exp)) \ rte_panic("line %d\tassert \"" #exp "\" failed\n", __LINE__); \ } while (0) struct acl_node_counters { int32_t match; int32_t match_used; int32_t single; int32_t quad; int32_t quad_vectors; int32_t dfa; int32_t dfa_gr64; }; struct rte_acl_indices { int32_t dfa_index; int32_t quad_index; int32_t single_index; int32_t match_index; int32_t match_start; }; static void acl_gen_log_stats(const struct rte_acl_ctx *ctx, const struct acl_node_counters *counts, const struct rte_acl_indices *indices, size_t max_size) { RTE_LOG(DEBUG, ACL, "Gen phase for ACL \"%s\":\n" "runtime memory footprint on socket %d:\n" "single nodes/bytes used: %d/%zu\n" "quad nodes/vectors/bytes used: %d/%d/%zu\n" "DFA nodes/group64/bytes used: %d/%d/%zu\n" "match nodes/bytes used: %d/%zu\n" "total: %zu bytes\n" "max limit: %zu bytes\n", ctx->name, ctx->socket_id, counts->single, counts->single * sizeof(uint64_t), counts->quad, counts->quad_vectors, (indices->quad_index - indices->dfa_index) * sizeof(uint64_t), counts->dfa, counts->dfa_gr64, indices->dfa_index * sizeof(uint64_t), counts->match, counts->match * sizeof(struct rte_acl_match_results), ctx->mem_sz, max_size); } static uint64_t acl_dfa_gen_idx(const struct rte_acl_node *node, uint32_t index) { uint64_t idx; uint32_t i; idx = 0; for (i = 0; i != RTE_DIM(node->dfa_gr64); i++) { RTE_ACL_VERIFY(node->dfa_gr64[i] < RTE_ACL_DFA_GR64_NUM); RTE_ACL_VERIFY(node->dfa_gr64[i] < node->fanout); idx |= (i - node->dfa_gr64[i]) << (6 + RTE_ACL_DFA_GR64_BIT * i); } return idx << (CHAR_BIT * sizeof(index)) | index | node->node_type; } static void acl_dfa_fill_gr64(const struct rte_acl_node *node, const uint64_t src[RTE_ACL_DFA_SIZE], uint64_t dst[RTE_ACL_DFA_SIZE]) { uint32_t i; for (i = 0; i != RTE_DIM(node->dfa_gr64); i++) { memcpy(dst + node->dfa_gr64[i] * RTE_ACL_DFA_GR64_SIZE, src + i * RTE_ACL_DFA_GR64_SIZE, RTE_ACL_DFA_GR64_SIZE * sizeof(dst[0])); } } static uint32_t acl_dfa_count_gr64(const uint64_t array_ptr[RTE_ACL_DFA_SIZE], uint8_t gr64[RTE_ACL_DFA_GR64_NUM]) { uint32_t i, j, k; k = 0; for (i = 0; i != RTE_ACL_DFA_GR64_NUM; i++) { gr64[i] = i; for (j = 0; j != i; j++) { if (memcmp(array_ptr + i * RTE_ACL_DFA_GR64_SIZE, array_ptr + j * RTE_ACL_DFA_GR64_SIZE, RTE_ACL_DFA_GR64_SIZE * sizeof(array_ptr[0])) == 0) break; } gr64[i] = (j != i) ? gr64[j] : k++; } return k; } static uint32_t acl_node_fill_dfa(const struct rte_acl_node *node, uint64_t dfa[RTE_ACL_DFA_SIZE], uint64_t no_match, int32_t resolved) { uint32_t n, x; uint32_t ranges, last_bit; struct rte_acl_node *child; struct rte_acl_bitset *bits; ranges = 0; last_bit = 0; for (n = 0; n < RTE_ACL_DFA_SIZE; n++) dfa[n] = no_match; for (x = 0; x < node->num_ptrs; x++) { child = node->ptrs[x].ptr; if (child == NULL) continue; bits = &node->ptrs[x].values; for (n = 0; n < RTE_ACL_DFA_SIZE; n++) { if (bits->bits[n / (sizeof(bits_t) * CHAR_BIT)] & (1U << (n % (sizeof(bits_t) * CHAR_BIT)))) { dfa[n] = resolved ? child->node_index : x; ranges += (last_bit == 0); last_bit = 1; } else { last_bit = 0; } } } return ranges; } /* * Counts the number of groups of sequential bits that are * either 0 or 1, as specified by the zero_one parameter. This is used to * calculate the number of ranges in a node to see if it fits in a quad range * node. */ static int acl_count_sequential_groups(struct rte_acl_bitset *bits, int zero_one) { int n, ranges, last_bit; ranges = 0; last_bit = zero_one ^ 1; for (n = QRANGE_MIN; n < UINT8_MAX + 1; n++) { if (bits->bits[n / (sizeof(bits_t) * 8)] & (1U << (n % (sizeof(bits_t) * 8)))) { if (zero_one == 1 && last_bit != 1) ranges++; last_bit = 1; } else { if (zero_one == 0 && last_bit != 0) ranges++; last_bit = 0; } } for (n = 0; n < QRANGE_MIN; n++) { if (bits->bits[n / (sizeof(bits_t) * 8)] & (1U << (n % (sizeof(bits_t) * CHAR_BIT)))) { if (zero_one == 1 && last_bit != 1) ranges++; last_bit = 1; } else { if (zero_one == 0 && last_bit != 0) ranges++; last_bit = 0; } } return ranges; } /* * Count number of ranges spanned by the node's pointers */ static int acl_count_fanout(struct rte_acl_node *node) { uint32_t n; int ranges; if (node->fanout != 0) return node->fanout; ranges = acl_count_sequential_groups(&node->values, 0); for (n = 0; n < node->num_ptrs; n++) { if (node->ptrs[n].ptr != NULL) ranges += acl_count_sequential_groups( &node->ptrs[n].values, 1); } node->fanout = ranges; return node->fanout; } /* * Determine the type of nodes and count each type */ static void acl_count_trie_types(struct acl_node_counters *counts, struct rte_acl_node *node, uint64_t no_match, int force_dfa) { uint32_t n; int num_ptrs; uint64_t dfa[RTE_ACL_DFA_SIZE]; /* skip if this node has been counted */ if (node->node_type != (uint32_t)RTE_ACL_NODE_UNDEFINED) return; if (node->match_flag != 0 || node->num_ptrs == 0) { counts->match++; node->node_type = RTE_ACL_NODE_MATCH; return; } num_ptrs = acl_count_fanout(node); /* Force type to dfa */ if (force_dfa) num_ptrs = RTE_ACL_DFA_SIZE; /* determine node type based on number of ranges */ if (num_ptrs == 1) { counts->single++; node->node_type = RTE_ACL_NODE_SINGLE; } else if (num_ptrs <= RTE_ACL_QUAD_MAX) { counts->quad++; counts->quad_vectors += node->fanout; node->node_type = RTE_ACL_NODE_QRANGE; } else { counts->dfa++; node->node_type = RTE_ACL_NODE_DFA; if (force_dfa != 0) { /* always expand to a max number of nodes. */ for (n = 0; n != RTE_DIM(node->dfa_gr64); n++) node->dfa_gr64[n] = n; node->fanout = n; } else { acl_node_fill_dfa(node, dfa, no_match, 0); node->fanout = acl_dfa_count_gr64(dfa, node->dfa_gr64); } counts->dfa_gr64 += node->fanout; } /* * recursively count the types of all children */ for (n = 0; n < node->num_ptrs; n++) { if (node->ptrs[n].ptr != NULL) acl_count_trie_types(counts, node->ptrs[n].ptr, no_match, 0); } } static void acl_add_ptrs(struct rte_acl_node *node, uint64_t *node_array, uint64_t no_match, int resolved) { uint32_t x; int32_t m; uint64_t *node_a, index, dfa[RTE_ACL_DFA_SIZE]; acl_node_fill_dfa(node, dfa, no_match, resolved); /* * Rather than going from 0 to 256, the range count and * the layout are from 80-ff then 0-7f due to signed compare * for SSE (cmpgt). */ if (node->node_type == RTE_ACL_NODE_QRANGE) { m = 0; node_a = node_array; index = dfa[QRANGE_MIN]; *node_a++ = index; for (x = QRANGE_MIN + 1; x < UINT8_MAX + 1; x++) { if (dfa[x] != index) { index = dfa[x]; *node_a++ = index; node->transitions[m++] = (uint8_t)(x - 1); } } for (x = 0; x < INT8_MAX + 1; x++) { if (dfa[x] != index) { index = dfa[x]; *node_a++ = index; node->transitions[m++] = (uint8_t)(x - 1); } } /* fill unused locations with max value - nothing is greater */ for (; m < RTE_ACL_QUAD_SIZE; m++) node->transitions[m] = INT8_MAX; RTE_ACL_VERIFY(m <= RTE_ACL_QUAD_SIZE); } else if (node->node_type == RTE_ACL_NODE_DFA && resolved) { acl_dfa_fill_gr64(node, dfa, node_array); } } /* * Routine that allocates space for this node and recursively calls * to allocate space for each child. Once all the children are allocated, * then resolve all transitions for this node. */ static void acl_gen_node(struct rte_acl_node *node, uint64_t *node_array, uint64_t no_match, struct rte_acl_indices *index, int num_categories) { uint32_t n, sz, *qtrp; uint64_t *array_ptr; struct rte_acl_match_results *match; if (node->node_index != RTE_ACL_NODE_UNDEFINED) return; array_ptr = NULL; switch (node->node_type) { case RTE_ACL_NODE_DFA: array_ptr = &node_array[index->dfa_index]; node->node_index = acl_dfa_gen_idx(node, index->dfa_index); sz = node->fanout * RTE_ACL_DFA_GR64_SIZE; index->dfa_index += sz; for (n = 0; n < sz; n++) array_ptr[n] = no_match; break; case RTE_ACL_NODE_SINGLE: node->node_index = RTE_ACL_QUAD_SINGLE | index->single_index | node->node_type; array_ptr = &node_array[index->single_index]; index->single_index += 1; array_ptr[0] = no_match; break; case RTE_ACL_NODE_QRANGE: array_ptr = &node_array[index->quad_index]; acl_add_ptrs(node, array_ptr, no_match, 0); qtrp = (uint32_t *)node->transitions; node->node_index = qtrp[0]; node->node_index <<= sizeof(index->quad_index) * CHAR_BIT; node->node_index |= index->quad_index | node->node_type; index->quad_index += node->fanout; break; case RTE_ACL_NODE_MATCH: match = ((struct rte_acl_match_results *) (node_array + index->match_start)); for (n = 0; n != RTE_DIM(match->results); n++) RTE_ACL_VERIFY(match->results[0] == 0); memcpy(match + index->match_index, node->mrt, sizeof(*node->mrt)); node->node_index = index->match_index | node->node_type; index->match_index += 1; break; case RTE_ACL_NODE_UNDEFINED: RTE_ACL_VERIFY(node->node_type != (uint32_t)RTE_ACL_NODE_UNDEFINED); break; } /* recursively allocate space for all children */ for (n = 0; n < node->num_ptrs; n++) { if (node->ptrs[n].ptr != NULL) acl_gen_node(node->ptrs[n].ptr, node_array, no_match, index, num_categories); } /* All children are resolved, resolve this node's pointers */ switch (node->node_type) { case RTE_ACL_NODE_DFA: acl_add_ptrs(node, array_ptr, no_match, 1); break; case RTE_ACL_NODE_SINGLE: for (n = 0; n < node->num_ptrs; n++) { if (node->ptrs[n].ptr != NULL) array_ptr[0] = node->ptrs[n].ptr->node_index; } break; case RTE_ACL_NODE_QRANGE: acl_add_ptrs(node, array_ptr, no_match, 1); break; case RTE_ACL_NODE_MATCH: break; case RTE_ACL_NODE_UNDEFINED: RTE_ACL_VERIFY(node->node_type != (uint32_t)RTE_ACL_NODE_UNDEFINED); break; } } static void acl_calc_counts_indices(struct acl_node_counters *counts, struct rte_acl_indices *indices, struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries, uint64_t no_match) { uint32_t n; memset(indices, 0, sizeof(*indices)); memset(counts, 0, sizeof(*counts)); /* Get stats on nodes */ for (n = 0; n < num_tries; n++) { acl_count_trie_types(counts, node_bld_trie[n].trie, no_match, 1); } indices->dfa_index = RTE_ACL_DFA_SIZE + 1; indices->quad_index = indices->dfa_index + counts->dfa_gr64 * RTE_ACL_DFA_GR64_SIZE; indices->single_index = indices->quad_index + counts->quad_vectors; indices->match_start = indices->single_index + counts->single + 1; indices->match_start = RTE_ALIGN(indices->match_start, (XMM_SIZE / sizeof(uint64_t))); indices->match_index = 1; } /* * Generate the runtime structure using build structure */ int rte_acl_gen(struct rte_acl_ctx *ctx, struct rte_acl_trie *trie, struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries, uint32_t num_categories, uint32_t data_index_sz, size_t max_size) { void *mem; size_t total_size; uint64_t *node_array, no_match; uint32_t n, match_index; struct rte_acl_match_results *match; struct acl_node_counters counts; struct rte_acl_indices indices; no_match = RTE_ACL_NODE_MATCH; /* Fill counts and indices arrays from the nodes. */ acl_calc_counts_indices(&counts, &indices, node_bld_trie, num_tries, no_match); /* Allocate runtime memory (align to cache boundary) */ total_size = RTE_ALIGN(data_index_sz, RTE_CACHE_LINE_SIZE) + indices.match_start * sizeof(uint64_t) + (counts.match + 1) * sizeof(struct rte_acl_match_results) + XMM_SIZE; if (total_size > max_size) { RTE_LOG(DEBUG, ACL, "Gen phase for ACL ctx \"%s\" exceeds max_size limit, " "bytes required: %zu, allowed: %zu\n", ctx->name, total_size, max_size); return -ERANGE; } mem = rte_zmalloc_socket(ctx->name, total_size, RTE_CACHE_LINE_SIZE, ctx->socket_id); if (mem == NULL) { RTE_LOG(ERR, ACL, "allocation of %zu bytes on socket %d for %s failed\n", total_size, ctx->socket_id, ctx->name); return -ENOMEM; } /* Fill the runtime structure */ match_index = indices.match_start; node_array = (uint64_t *)((uintptr_t)mem + RTE_ALIGN(data_index_sz, RTE_CACHE_LINE_SIZE)); /* * Setup the NOMATCH node (a SINGLE at the * highest index, that points to itself) */ node_array[RTE_ACL_DFA_SIZE] = RTE_ACL_IDLE_NODE; for (n = 0; n < RTE_ACL_DFA_SIZE; n++) node_array[n] = no_match; /* NOMATCH result at index 0 */ match = ((struct rte_acl_match_results *)(node_array + match_index)); memset(match, 0, sizeof(*match)); for (n = 0; n < num_tries; n++) { acl_gen_node(node_bld_trie[n].trie, node_array, no_match, &indices, num_categories); if (node_bld_trie[n].trie->node_index == no_match) trie[n].root_index = 0; else trie[n].root_index = node_bld_trie[n].trie->node_index; } ctx->mem = mem; ctx->mem_sz = total_size; ctx->data_indexes = mem; ctx->num_tries = num_tries; ctx->num_categories = num_categories; ctx->match_index = match_index; ctx->no_match = no_match; ctx->idle = node_array[RTE_ACL_DFA_SIZE]; ctx->trans_table = node_array; memcpy(ctx->trie, trie, sizeof(ctx->trie)); acl_gen_log_stats(ctx, &counts, &indices, max_size); return 0; }