numam-dpdk/drivers/net/mlx5/mlx5_hws_cnt.c
Michael Baum d37435dc3f net/mlx5: assert for enough space in counter rings
There is a by-design assumption in the code that the global counter
rings can contain all the port counters.
So, enqueuing to these global rings should always succeed.

Add assertions to help for debugging this assumption.

In addition, change mlx5_hws_cnt_pool_put() function to return void due
to those assumptions.

Signed-off-by: Michael Baum <michaelba@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
Acked-by: Xiaoyu Min <jackmin@nvidia.com>
2022-11-10 18:15:45 +01:00

1240 lines
35 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright 2020 Mellanox Technologies, Ltd
*/
#include <stdint.h>
#include <rte_malloc.h>
#include <mlx5_malloc.h>
#include <rte_ring.h>
#include <mlx5_devx_cmds.h>
#include <rte_cycles.h>
#include <rte_eal_paging.h>
#if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H)
#include "mlx5_utils.h"
#include "mlx5_hws_cnt.h"
#define HWS_CNT_CACHE_SZ_DEFAULT 511
#define HWS_CNT_CACHE_PRELOAD_DEFAULT 254
#define HWS_CNT_CACHE_FETCH_DEFAULT 254
#define HWS_CNT_CACHE_THRESHOLD_DEFAULT 254
#define HWS_CNT_ALLOC_FACTOR_DEFAULT 20
static void
__hws_cnt_id_load(struct mlx5_hws_cnt_pool *cpool)
{
uint32_t preload;
uint32_t q_num = cpool->cache->q_num;
uint32_t cnt_num = mlx5_hws_cnt_pool_get_size(cpool);
cnt_id_t cnt_id;
uint32_t qidx, iidx = 0;
struct rte_ring *qcache = NULL;
/*
* Counter ID order is important for tracking the max number of in used
* counter for querying, which means counter internal index order must
* be from zero to the number user configured, i.e: 0 - 8000000.
* Need to load counter ID in this order into the cache firstly,
* and then the global free list.
* In the end, user fetch the counter from minimal to the maximum.
*/
preload = RTE_MIN(cpool->cache->preload_sz, cnt_num / q_num);
for (qidx = 0; qidx < q_num; qidx++) {
for (; iidx < preload * (qidx + 1); iidx++) {
cnt_id = mlx5_hws_cnt_id_gen(cpool, iidx);
qcache = cpool->cache->qcache[qidx];
if (qcache)
rte_ring_enqueue_elem(qcache, &cnt_id,
sizeof(cnt_id));
}
}
for (; iidx < cnt_num; iidx++) {
cnt_id = mlx5_hws_cnt_id_gen(cpool, iidx);
rte_ring_enqueue_elem(cpool->free_list, &cnt_id,
sizeof(cnt_id));
}
}
static void
__mlx5_hws_cnt_svc(struct mlx5_dev_ctx_shared *sh,
struct mlx5_hws_cnt_pool *cpool)
{
struct rte_ring *reset_list = cpool->wait_reset_list;
struct rte_ring *reuse_list = cpool->reuse_list;
uint32_t reset_cnt_num;
struct rte_ring_zc_data zcdr = {0};
struct rte_ring_zc_data zcdu = {0};
uint32_t ret __rte_unused;
reset_cnt_num = rte_ring_count(reset_list);
do {
cpool->query_gen++;
mlx5_aso_cnt_query(sh, cpool);
zcdr.n1 = 0;
zcdu.n1 = 0;
ret = rte_ring_enqueue_zc_burst_elem_start(reuse_list,
sizeof(cnt_id_t),
reset_cnt_num, &zcdu,
NULL);
MLX5_ASSERT(ret == reset_cnt_num);
ret = rte_ring_dequeue_zc_burst_elem_start(reset_list,
sizeof(cnt_id_t),
reset_cnt_num, &zcdr,
NULL);
MLX5_ASSERT(ret == reset_cnt_num);
__hws_cnt_r2rcpy(&zcdu, &zcdr, reset_cnt_num);
rte_ring_dequeue_zc_elem_finish(reset_list, reset_cnt_num);
rte_ring_enqueue_zc_elem_finish(reuse_list, reset_cnt_num);
reset_cnt_num = rte_ring_count(reset_list);
} while (reset_cnt_num > 0);
}
/**
* Release AGE parameter.
*
* @param priv
* Pointer to the port private data structure.
* @param own_cnt_index
* Counter ID to created only for this AGE to release.
* Zero means there is no such counter.
* @param age_ipool
* Pointer to AGE parameter indexed pool.
* @param idx
* Index of AGE parameter in the indexed pool.
*/
static void
mlx5_hws_age_param_free(struct mlx5_priv *priv, cnt_id_t own_cnt_index,
struct mlx5_indexed_pool *age_ipool, uint32_t idx)
{
if (own_cnt_index) {
struct mlx5_hws_cnt_pool *cpool = priv->hws_cpool;
MLX5_ASSERT(mlx5_hws_cnt_is_shared(cpool, own_cnt_index));
mlx5_hws_cnt_shared_put(cpool, &own_cnt_index);
}
mlx5_ipool_free(age_ipool, idx);
}
/**
* Check and callback event for new aged flow in the HWS counter pool.
*
* @param[in] priv
* Pointer to port private object.
* @param[in] cpool
* Pointer to current counter pool.
*/
static void
mlx5_hws_aging_check(struct mlx5_priv *priv, struct mlx5_hws_cnt_pool *cpool)
{
struct mlx5_age_info *age_info = GET_PORT_AGE_INFO(priv);
struct flow_counter_stats *stats = cpool->raw_mng->raw;
struct mlx5_hws_age_param *param;
struct rte_ring *r;
const uint64_t curr_time = MLX5_CURR_TIME_SEC;
const uint32_t time_delta = curr_time - cpool->time_of_last_age_check;
uint32_t nb_alloc_cnts = mlx5_hws_cnt_pool_get_size(cpool);
uint16_t expected1 = HWS_AGE_CANDIDATE;
uint16_t expected2 = HWS_AGE_CANDIDATE_INSIDE_RING;
uint32_t i;
cpool->time_of_last_age_check = curr_time;
for (i = 0; i < nb_alloc_cnts; ++i) {
uint32_t age_idx = cpool->pool[i].age_idx;
uint64_t hits;
if (!cpool->pool[i].in_used || age_idx == 0)
continue;
param = mlx5_ipool_get(age_info->ages_ipool, age_idx);
if (unlikely(param == NULL)) {
/*
* When AGE which used indirect counter it is user
* responsibility not using this indirect counter
* without this AGE.
* If this counter is used after the AGE was freed, the
* AGE index is invalid and using it here will cause a
* segmentation fault.
*/
DRV_LOG(WARNING,
"Counter %u is lost his AGE, it is unused.", i);
continue;
}
if (param->timeout == 0)
continue;
switch (__atomic_load_n(&param->state, __ATOMIC_RELAXED)) {
case HWS_AGE_AGED_OUT_NOT_REPORTED:
case HWS_AGE_AGED_OUT_REPORTED:
/* Already aged-out, no action is needed. */
continue;
case HWS_AGE_CANDIDATE:
case HWS_AGE_CANDIDATE_INSIDE_RING:
/* This AGE candidate to be aged-out, go to checking. */
break;
case HWS_AGE_FREE:
/*
* AGE parameter with state "FREE" couldn't be pointed
* by any counter since counter is destroyed first.
* Fall-through.
*/
default:
MLX5_ASSERT(0);
continue;
}
hits = rte_be_to_cpu_64(stats[i].hits);
if (param->nb_cnts == 1) {
if (hits != param->accumulator_last_hits) {
__atomic_store_n(&param->sec_since_last_hit, 0,
__ATOMIC_RELAXED);
param->accumulator_last_hits = hits;
continue;
}
} else {
param->accumulator_hits += hits;
param->accumulator_cnt++;
if (param->accumulator_cnt < param->nb_cnts)
continue;
param->accumulator_cnt = 0;
if (param->accumulator_last_hits !=
param->accumulator_hits) {
__atomic_store_n(&param->sec_since_last_hit,
0, __ATOMIC_RELAXED);
param->accumulator_last_hits =
param->accumulator_hits;
param->accumulator_hits = 0;
continue;
}
param->accumulator_hits = 0;
}
if (__atomic_add_fetch(&param->sec_since_last_hit, time_delta,
__ATOMIC_RELAXED) <=
__atomic_load_n(&param->timeout, __ATOMIC_RELAXED))
continue;
/* Prepare the relevant ring for this AGE parameter */
if (priv->hws_strict_queue)
r = age_info->hw_q_age->aged_lists[param->queue_id];
else
r = age_info->hw_age.aged_list;
/* Changing the state atomically and insert it into the ring. */
if (__atomic_compare_exchange_n(&param->state, &expected1,
HWS_AGE_AGED_OUT_NOT_REPORTED,
false, __ATOMIC_RELAXED,
__ATOMIC_RELAXED)) {
int ret = rte_ring_enqueue_burst_elem(r, &age_idx,
sizeof(uint32_t),
1, NULL);
/*
* The ring doesn't have enough room for this entry,
* it replace back the state for the next second.
*
* FIXME: if until next sec it get traffic, we are going
* to lose this "aged out", will be fixed later
* when optimise it to fill ring in bulks.
*/
expected2 = HWS_AGE_AGED_OUT_NOT_REPORTED;
if (ret == 0 &&
!__atomic_compare_exchange_n(&param->state,
&expected2, expected1,
false,
__ATOMIC_RELAXED,
__ATOMIC_RELAXED) &&
expected2 == HWS_AGE_FREE)
mlx5_hws_age_param_free(priv,
param->own_cnt_index,
age_info->ages_ipool,
age_idx);
/* The event is irrelevant in strict queue mode. */
if (!priv->hws_strict_queue)
MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW);
} else {
__atomic_compare_exchange_n(&param->state, &expected2,
HWS_AGE_AGED_OUT_NOT_REPORTED,
false, __ATOMIC_RELAXED,
__ATOMIC_RELAXED);
}
}
/* The event is irrelevant in strict queue mode. */
if (!priv->hws_strict_queue)
mlx5_age_event_prepare(priv->sh);
}
static void
mlx5_hws_cnt_raw_data_free(struct mlx5_dev_ctx_shared *sh,
struct mlx5_hws_cnt_raw_data_mng *mng)
{
if (mng == NULL)
return;
sh->cdev->mr_scache.dereg_mr_cb(&mng->mr);
mlx5_free(mng->raw);
mlx5_free(mng);
}
__rte_unused
static struct mlx5_hws_cnt_raw_data_mng *
mlx5_hws_cnt_raw_data_alloc(struct mlx5_dev_ctx_shared *sh, uint32_t n)
{
struct mlx5_hws_cnt_raw_data_mng *mng = NULL;
int ret;
size_t sz = n * sizeof(struct flow_counter_stats);
size_t pgsz = rte_mem_page_size();
MLX5_ASSERT(pgsz > 0);
mng = mlx5_malloc(MLX5_MEM_ANY | MLX5_MEM_ZERO, sizeof(*mng), 0,
SOCKET_ID_ANY);
if (mng == NULL)
goto error;
mng->raw = mlx5_malloc(MLX5_MEM_ANY | MLX5_MEM_ZERO, sz, pgsz,
SOCKET_ID_ANY);
if (mng->raw == NULL)
goto error;
ret = sh->cdev->mr_scache.reg_mr_cb(sh->cdev->pd, mng->raw, sz,
&mng->mr);
if (ret) {
rte_errno = errno;
goto error;
}
return mng;
error:
mlx5_hws_cnt_raw_data_free(sh, mng);
return NULL;
}
static void *
mlx5_hws_cnt_svc(void *opaque)
{
struct mlx5_dev_ctx_shared *sh =
(struct mlx5_dev_ctx_shared *)opaque;
uint64_t interval =
(uint64_t)sh->cnt_svc->query_interval * (US_PER_S / MS_PER_S);
uint16_t port_id;
uint64_t start_cycle, query_cycle = 0;
uint64_t query_us;
uint64_t sleep_us;
while (sh->cnt_svc->svc_running != 0) {
start_cycle = rte_rdtsc();
MLX5_ETH_FOREACH_DEV(port_id, sh->cdev->dev) {
struct mlx5_priv *opriv =
rte_eth_devices[port_id].data->dev_private;
if (opriv != NULL &&
opriv->sh == sh &&
opriv->hws_cpool != NULL) {
__mlx5_hws_cnt_svc(sh, opriv->hws_cpool);
if (opriv->hws_age_req)
mlx5_hws_aging_check(opriv,
opriv->hws_cpool);
}
}
query_cycle = rte_rdtsc() - start_cycle;
query_us = query_cycle / (rte_get_timer_hz() / US_PER_S);
sleep_us = interval - query_us;
if (interval > query_us)
rte_delay_us_sleep(sleep_us);
}
return NULL;
}
struct mlx5_hws_cnt_pool *
mlx5_hws_cnt_pool_init(struct mlx5_dev_ctx_shared *sh,
const struct mlx5_hws_cnt_pool_cfg *pcfg,
const struct mlx5_hws_cache_param *ccfg)
{
char mz_name[RTE_MEMZONE_NAMESIZE];
struct mlx5_hws_cnt_pool *cntp;
uint64_t cnt_num = 0;
uint32_t qidx;
MLX5_ASSERT(pcfg);
MLX5_ASSERT(ccfg);
cntp = mlx5_malloc(MLX5_MEM_ANY | MLX5_MEM_ZERO, sizeof(*cntp), 0,
SOCKET_ID_ANY);
if (cntp == NULL)
return NULL;
cntp->cfg = *pcfg;
cntp->cache = mlx5_malloc(MLX5_MEM_ANY | MLX5_MEM_ZERO,
sizeof(*cntp->cache) +
sizeof(((struct mlx5_hws_cnt_pool_caches *)0)->qcache[0])
* ccfg->q_num, 0, SOCKET_ID_ANY);
if (cntp->cache == NULL)
goto error;
/* store the necessary cache parameters. */
cntp->cache->fetch_sz = ccfg->fetch_sz;
cntp->cache->preload_sz = ccfg->preload_sz;
cntp->cache->threshold = ccfg->threshold;
cntp->cache->q_num = ccfg->q_num;
if (pcfg->request_num > sh->hws_max_nb_counters) {
DRV_LOG(ERR, "Counter number %u "
"is greater than the maximum supported (%u).",
pcfg->request_num, sh->hws_max_nb_counters);
goto error;
}
cnt_num = pcfg->request_num * (100 + pcfg->alloc_factor) / 100;
if (cnt_num > UINT32_MAX) {
DRV_LOG(ERR, "counter number %"PRIu64" is out of 32bit range",
cnt_num);
goto error;
}
/*
* When counter request number is supported, but the factor takes it
* out of size, the factor is reduced.
*/
cnt_num = RTE_MIN((uint32_t)cnt_num, sh->hws_max_nb_counters);
cntp->pool = mlx5_malloc(MLX5_MEM_ANY | MLX5_MEM_ZERO,
sizeof(struct mlx5_hws_cnt) * cnt_num,
0, SOCKET_ID_ANY);
if (cntp->pool == NULL)
goto error;
snprintf(mz_name, sizeof(mz_name), "%s_F_RING", pcfg->name);
cntp->free_list = rte_ring_create_elem(mz_name, sizeof(cnt_id_t),
(uint32_t)cnt_num, SOCKET_ID_ANY,
RING_F_SP_ENQ | RING_F_MC_HTS_DEQ | RING_F_EXACT_SZ);
if (cntp->free_list == NULL) {
DRV_LOG(ERR, "failed to create free list ring");
goto error;
}
snprintf(mz_name, sizeof(mz_name), "%s_R_RING", pcfg->name);
cntp->wait_reset_list = rte_ring_create_elem(mz_name, sizeof(cnt_id_t),
(uint32_t)cnt_num, SOCKET_ID_ANY,
RING_F_MP_HTS_ENQ | RING_F_SC_DEQ | RING_F_EXACT_SZ);
if (cntp->wait_reset_list == NULL) {
DRV_LOG(ERR, "failed to create free list ring");
goto error;
}
snprintf(mz_name, sizeof(mz_name), "%s_U_RING", pcfg->name);
cntp->reuse_list = rte_ring_create_elem(mz_name, sizeof(cnt_id_t),
(uint32_t)cnt_num, SOCKET_ID_ANY,
RING_F_SP_ENQ | RING_F_MC_HTS_DEQ | RING_F_EXACT_SZ);
if (cntp->reuse_list == NULL) {
DRV_LOG(ERR, "failed to create reuse list ring");
goto error;
}
for (qidx = 0; qidx < ccfg->q_num; qidx++) {
snprintf(mz_name, sizeof(mz_name), "%s_cache/%u", pcfg->name,
qidx);
cntp->cache->qcache[qidx] = rte_ring_create(mz_name, ccfg->size,
SOCKET_ID_ANY,
RING_F_SP_ENQ | RING_F_SC_DEQ |
RING_F_EXACT_SZ);
if (cntp->cache->qcache[qidx] == NULL)
goto error;
}
/* Initialize the time for aging-out calculation. */
cntp->time_of_last_age_check = MLX5_CURR_TIME_SEC;
return cntp;
error:
mlx5_hws_cnt_pool_deinit(cntp);
return NULL;
}
void
mlx5_hws_cnt_pool_deinit(struct mlx5_hws_cnt_pool * const cntp)
{
uint32_t qidx = 0;
if (cntp == NULL)
return;
rte_ring_free(cntp->free_list);
rte_ring_free(cntp->wait_reset_list);
rte_ring_free(cntp->reuse_list);
if (cntp->cache) {
for (qidx = 0; qidx < cntp->cache->q_num; qidx++)
rte_ring_free(cntp->cache->qcache[qidx]);
}
mlx5_free(cntp->cache);
mlx5_free(cntp->raw_mng);
mlx5_free(cntp->pool);
mlx5_free(cntp);
}
int
mlx5_hws_cnt_service_thread_create(struct mlx5_dev_ctx_shared *sh)
{
#define CNT_THREAD_NAME_MAX 256
char name[CNT_THREAD_NAME_MAX];
rte_cpuset_t cpuset;
int ret;
uint32_t service_core = sh->cnt_svc->service_core;
CPU_ZERO(&cpuset);
sh->cnt_svc->svc_running = 1;
ret = pthread_create(&sh->cnt_svc->service_thread, NULL,
mlx5_hws_cnt_svc, sh);
if (ret != 0) {
DRV_LOG(ERR, "Failed to create HW steering's counter service thread.");
return -ENOSYS;
}
snprintf(name, CNT_THREAD_NAME_MAX - 1, "%s/svc@%d",
sh->ibdev_name, service_core);
rte_thread_setname(sh->cnt_svc->service_thread, name);
CPU_SET(service_core, &cpuset);
pthread_setaffinity_np(sh->cnt_svc->service_thread, sizeof(cpuset),
&cpuset);
return 0;
}
void
mlx5_hws_cnt_service_thread_destroy(struct mlx5_dev_ctx_shared *sh)
{
if (sh->cnt_svc->service_thread == 0)
return;
sh->cnt_svc->svc_running = 0;
pthread_join(sh->cnt_svc->service_thread, NULL);
sh->cnt_svc->service_thread = 0;
}
int
mlx5_hws_cnt_pool_dcs_alloc(struct mlx5_dev_ctx_shared *sh,
struct mlx5_hws_cnt_pool *cpool)
{
struct mlx5_hca_attr *hca_attr = &sh->cdev->config.hca_attr;
uint32_t max_log_bulk_sz = sh->hws_max_log_bulk_sz;
uint32_t log_bulk_sz;
uint32_t idx, alloc_candidate, alloced = 0;
unsigned int cnt_num = mlx5_hws_cnt_pool_get_size(cpool);
struct mlx5_devx_counter_attr attr = {0};
struct mlx5_devx_obj *dcs;
if (hca_attr->flow_counter_bulk_log_max_alloc == 0) {
DRV_LOG(ERR, "Fw doesn't support bulk log max alloc");
return -1;
}
cnt_num = RTE_ALIGN_CEIL(cnt_num, 4); /* minimal 4 counter in bulk. */
log_bulk_sz = RTE_MIN(max_log_bulk_sz, rte_log2_u32(cnt_num));
attr.pd = sh->cdev->pdn;
attr.pd_valid = 1;
attr.bulk_log_max_alloc = 1;
attr.flow_counter_bulk_log_size = log_bulk_sz;
idx = 0;
dcs = mlx5_devx_cmd_flow_counter_alloc_general(sh->cdev->ctx, &attr);
if (dcs == NULL)
goto error;
cpool->dcs_mng.dcs[idx].obj = dcs;
cpool->dcs_mng.dcs[idx].batch_sz = (1 << log_bulk_sz);
cpool->dcs_mng.batch_total++;
idx++;
cpool->dcs_mng.dcs[0].iidx = 0;
alloced = cpool->dcs_mng.dcs[0].batch_sz;
if (cnt_num > cpool->dcs_mng.dcs[0].batch_sz) {
while (idx < MLX5_HWS_CNT_DCS_NUM) {
attr.flow_counter_bulk_log_size = --max_log_bulk_sz;
alloc_candidate = RTE_BIT32(max_log_bulk_sz);
if (alloced + alloc_candidate > sh->hws_max_nb_counters)
continue;
dcs = mlx5_devx_cmd_flow_counter_alloc_general
(sh->cdev->ctx, &attr);
if (dcs == NULL)
goto error;
cpool->dcs_mng.dcs[idx].obj = dcs;
cpool->dcs_mng.dcs[idx].batch_sz = alloc_candidate;
cpool->dcs_mng.dcs[idx].iidx = alloced;
alloced += cpool->dcs_mng.dcs[idx].batch_sz;
cpool->dcs_mng.batch_total++;
if (alloced >= cnt_num)
break;
idx++;
}
}
return 0;
error:
DRV_LOG(DEBUG,
"Cannot alloc device counter, allocated[%" PRIu32 "] request[%" PRIu32 "]",
alloced, cnt_num);
for (idx = 0; idx < cpool->dcs_mng.batch_total; idx++) {
mlx5_devx_cmd_destroy(cpool->dcs_mng.dcs[idx].obj);
cpool->dcs_mng.dcs[idx].obj = NULL;
cpool->dcs_mng.dcs[idx].batch_sz = 0;
cpool->dcs_mng.dcs[idx].iidx = 0;
}
cpool->dcs_mng.batch_total = 0;
return -1;
}
void
mlx5_hws_cnt_pool_dcs_free(struct mlx5_dev_ctx_shared *sh,
struct mlx5_hws_cnt_pool *cpool)
{
uint32_t idx;
if (cpool == NULL)
return;
for (idx = 0; idx < MLX5_HWS_CNT_DCS_NUM; idx++)
mlx5_devx_cmd_destroy(cpool->dcs_mng.dcs[idx].obj);
if (cpool->raw_mng) {
mlx5_hws_cnt_raw_data_free(sh, cpool->raw_mng);
cpool->raw_mng = NULL;
}
}
int
mlx5_hws_cnt_pool_action_create(struct mlx5_priv *priv,
struct mlx5_hws_cnt_pool *cpool)
{
uint32_t idx;
int ret = 0;
struct mlx5_hws_cnt_dcs *dcs;
uint32_t flags;
flags = MLX5DR_ACTION_FLAG_HWS_RX | MLX5DR_ACTION_FLAG_HWS_TX;
if (priv->sh->config.dv_esw_en && priv->master)
flags |= MLX5DR_ACTION_FLAG_HWS_FDB;
for (idx = 0; idx < cpool->dcs_mng.batch_total; idx++) {
dcs = &cpool->dcs_mng.dcs[idx];
dcs->dr_action = mlx5dr_action_create_counter(priv->dr_ctx,
(struct mlx5dr_devx_obj *)dcs->obj,
flags);
if (dcs->dr_action == NULL) {
mlx5_hws_cnt_pool_action_destroy(cpool);
ret = -ENOSYS;
break;
}
}
return ret;
}
void
mlx5_hws_cnt_pool_action_destroy(struct mlx5_hws_cnt_pool *cpool)
{
uint32_t idx;
struct mlx5_hws_cnt_dcs *dcs;
for (idx = 0; idx < cpool->dcs_mng.batch_total; idx++) {
dcs = &cpool->dcs_mng.dcs[idx];
if (dcs->dr_action != NULL) {
mlx5dr_action_destroy(dcs->dr_action);
dcs->dr_action = NULL;
}
}
}
struct mlx5_hws_cnt_pool *
mlx5_hws_cnt_pool_create(struct rte_eth_dev *dev,
const struct rte_flow_port_attr *pattr, uint16_t nb_queue)
{
struct mlx5_hws_cnt_pool *cpool = NULL;
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_hws_cache_param cparam = {0};
struct mlx5_hws_cnt_pool_cfg pcfg = {0};
char *mp_name;
int ret = 0;
size_t sz;
/* init cnt service if not. */
if (priv->sh->cnt_svc == NULL) {
ret = mlx5_hws_cnt_svc_init(priv->sh);
if (ret != 0)
return NULL;
}
cparam.fetch_sz = HWS_CNT_CACHE_FETCH_DEFAULT;
cparam.preload_sz = HWS_CNT_CACHE_PRELOAD_DEFAULT;
cparam.q_num = nb_queue;
cparam.threshold = HWS_CNT_CACHE_THRESHOLD_DEFAULT;
cparam.size = HWS_CNT_CACHE_SZ_DEFAULT;
pcfg.alloc_factor = HWS_CNT_ALLOC_FACTOR_DEFAULT;
mp_name = mlx5_malloc(MLX5_MEM_ZERO, RTE_MEMZONE_NAMESIZE, 0,
SOCKET_ID_ANY);
if (mp_name == NULL)
goto error;
snprintf(mp_name, RTE_MEMZONE_NAMESIZE, "MLX5_HWS_CNT_POOL_%u",
dev->data->port_id);
pcfg.name = mp_name;
pcfg.request_num = pattr->nb_counters;
cpool = mlx5_hws_cnt_pool_init(priv->sh, &pcfg, &cparam);
if (cpool == NULL)
goto error;
ret = mlx5_hws_cnt_pool_dcs_alloc(priv->sh, cpool);
if (ret != 0)
goto error;
sz = RTE_ALIGN_CEIL(mlx5_hws_cnt_pool_get_size(cpool), 4);
cpool->raw_mng = mlx5_hws_cnt_raw_data_alloc(priv->sh, sz);
if (cpool->raw_mng == NULL)
goto error;
__hws_cnt_id_load(cpool);
/*
* Bump query gen right after pool create so the
* pre-loaded counters can be used directly
* because they already have init value no need
* to wait for query.
*/
cpool->query_gen = 1;
ret = mlx5_hws_cnt_pool_action_create(priv, cpool);
if (ret != 0)
goto error;
priv->sh->cnt_svc->refcnt++;
return cpool;
error:
mlx5_hws_cnt_pool_destroy(priv->sh, cpool);
return NULL;
}
void
mlx5_hws_cnt_pool_destroy(struct mlx5_dev_ctx_shared *sh,
struct mlx5_hws_cnt_pool *cpool)
{
if (cpool == NULL)
return;
if (--sh->cnt_svc->refcnt == 0)
mlx5_hws_cnt_svc_deinit(sh);
mlx5_hws_cnt_pool_action_destroy(cpool);
mlx5_hws_cnt_pool_dcs_free(sh, cpool);
mlx5_hws_cnt_raw_data_free(sh, cpool->raw_mng);
mlx5_free((void *)cpool->cfg.name);
mlx5_hws_cnt_pool_deinit(cpool);
}
int
mlx5_hws_cnt_svc_init(struct mlx5_dev_ctx_shared *sh)
{
int ret;
sh->cnt_svc = mlx5_malloc(MLX5_MEM_ANY | MLX5_MEM_ZERO,
sizeof(*sh->cnt_svc), 0, SOCKET_ID_ANY);
if (sh->cnt_svc == NULL)
return -1;
sh->cnt_svc->query_interval = sh->config.cnt_svc.cycle_time;
sh->cnt_svc->service_core = sh->config.cnt_svc.service_core;
ret = mlx5_aso_cnt_queue_init(sh);
if (ret != 0) {
mlx5_free(sh->cnt_svc);
sh->cnt_svc = NULL;
return -1;
}
ret = mlx5_hws_cnt_service_thread_create(sh);
if (ret != 0) {
mlx5_aso_cnt_queue_uninit(sh);
mlx5_free(sh->cnt_svc);
sh->cnt_svc = NULL;
}
return 0;
}
void
mlx5_hws_cnt_svc_deinit(struct mlx5_dev_ctx_shared *sh)
{
if (sh->cnt_svc == NULL)
return;
mlx5_hws_cnt_service_thread_destroy(sh);
mlx5_aso_cnt_queue_uninit(sh);
mlx5_free(sh->cnt_svc);
sh->cnt_svc = NULL;
}
/**
* Destroy AGE action.
*
* @param priv
* Pointer to the port private data structure.
* @param idx
* Index of AGE parameter.
* @param error
* Pointer to error structure.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
mlx5_hws_age_action_destroy(struct mlx5_priv *priv, uint32_t idx,
struct rte_flow_error *error)
{
struct mlx5_age_info *age_info = GET_PORT_AGE_INFO(priv);
struct mlx5_indexed_pool *ipool = age_info->ages_ipool;
struct mlx5_hws_age_param *param = mlx5_ipool_get(ipool, idx);
if (param == NULL)
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
"invalid AGE parameter index");
switch (__atomic_exchange_n(&param->state, HWS_AGE_FREE,
__ATOMIC_RELAXED)) {
case HWS_AGE_CANDIDATE:
case HWS_AGE_AGED_OUT_REPORTED:
mlx5_hws_age_param_free(priv, param->own_cnt_index, ipool, idx);
break;
case HWS_AGE_AGED_OUT_NOT_REPORTED:
case HWS_AGE_CANDIDATE_INSIDE_RING:
/*
* In both cases AGE is inside the ring. Change the state here
* and destroy it later when it is taken out of ring.
*/
break;
case HWS_AGE_FREE:
/*
* If index is valid and state is FREE, it says this AGE has
* been freed for the user but not for the PMD since it is
* inside the ring.
*/
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
"this AGE has already been released");
default:
MLX5_ASSERT(0);
break;
}
return 0;
}
/**
* Create AGE action parameter.
*
* @param[in] priv
* Pointer to the port private data structure.
* @param[in] queue_id
* Which HWS queue to be used.
* @param[in] shared
* Whether it indirect AGE action.
* @param[in] flow_idx
* Flow index from indexed pool.
* For indirect AGE action it doesn't affect.
* @param[in] age
* Pointer to the aging action configuration.
* @param[out] error
* Pointer to error structure.
*
* @return
* Index to AGE action parameter on success, 0 otherwise.
*/
uint32_t
mlx5_hws_age_action_create(struct mlx5_priv *priv, uint32_t queue_id,
bool shared, const struct rte_flow_action_age *age,
uint32_t flow_idx, struct rte_flow_error *error)
{
struct mlx5_age_info *age_info = GET_PORT_AGE_INFO(priv);
struct mlx5_indexed_pool *ipool = age_info->ages_ipool;
struct mlx5_hws_age_param *param;
uint32_t age_idx;
param = mlx5_ipool_malloc(ipool, &age_idx);
if (param == NULL) {
rte_flow_error_set(error, ENOMEM,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
"cannot allocate AGE parameter");
return 0;
}
MLX5_ASSERT(__atomic_load_n(&param->state,
__ATOMIC_RELAXED) == HWS_AGE_FREE);
if (shared) {
param->nb_cnts = 0;
param->accumulator_hits = 0;
param->accumulator_cnt = 0;
flow_idx = age_idx;
} else {
param->nb_cnts = 1;
}
param->context = age->context ? age->context :
(void *)(uintptr_t)flow_idx;
param->timeout = age->timeout;
param->queue_id = queue_id;
param->accumulator_last_hits = 0;
param->own_cnt_index = 0;
param->sec_since_last_hit = 0;
param->state = HWS_AGE_CANDIDATE;
return age_idx;
}
/**
* Update indirect AGE action parameter.
*
* @param[in] priv
* Pointer to the port private data structure.
* @param[in] idx
* Index of AGE parameter.
* @param[in] update
* Update value.
* @param[out] error
* Pointer to error structure.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
mlx5_hws_age_action_update(struct mlx5_priv *priv, uint32_t idx,
const void *update, struct rte_flow_error *error)
{
const struct rte_flow_update_age *update_ade = update;
struct mlx5_age_info *age_info = GET_PORT_AGE_INFO(priv);
struct mlx5_indexed_pool *ipool = age_info->ages_ipool;
struct mlx5_hws_age_param *param = mlx5_ipool_get(ipool, idx);
bool sec_since_last_hit_reset = false;
bool state_update = false;
if (param == NULL)
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
"invalid AGE parameter index");
if (update_ade->timeout_valid) {
uint32_t old_timeout = __atomic_exchange_n(&param->timeout,
update_ade->timeout,
__ATOMIC_RELAXED);
if (old_timeout == 0)
sec_since_last_hit_reset = true;
else if (old_timeout < update_ade->timeout ||
update_ade->timeout == 0)
/*
* When timeout is increased, aged-out flows might be
* active again and state should be updated accordingly.
* When new timeout is 0, we update the state for not
* reporting aged-out stopped.
*/
state_update = true;
}
if (update_ade->touch) {
sec_since_last_hit_reset = true;
state_update = true;
}
if (sec_since_last_hit_reset)
__atomic_store_n(&param->sec_since_last_hit, 0,
__ATOMIC_RELAXED);
if (state_update) {
uint16_t expected = HWS_AGE_AGED_OUT_NOT_REPORTED;
/*
* Change states of aged-out flows to active:
* - AGED_OUT_NOT_REPORTED -> CANDIDATE_INSIDE_RING
* - AGED_OUT_REPORTED -> CANDIDATE
*/
if (!__atomic_compare_exchange_n(&param->state, &expected,
HWS_AGE_CANDIDATE_INSIDE_RING,
false, __ATOMIC_RELAXED,
__ATOMIC_RELAXED) &&
expected == HWS_AGE_AGED_OUT_REPORTED)
__atomic_store_n(&param->state, HWS_AGE_CANDIDATE,
__ATOMIC_RELAXED);
}
return 0;
}
/**
* Get the AGE context if the aged-out index is still valid.
*
* @param priv
* Pointer to the port private data structure.
* @param idx
* Index of AGE parameter.
*
* @return
* AGE context if the index is still aged-out, NULL otherwise.
*/
void *
mlx5_hws_age_context_get(struct mlx5_priv *priv, uint32_t idx)
{
struct mlx5_age_info *age_info = GET_PORT_AGE_INFO(priv);
struct mlx5_indexed_pool *ipool = age_info->ages_ipool;
struct mlx5_hws_age_param *param = mlx5_ipool_get(ipool, idx);
uint16_t expected = HWS_AGE_AGED_OUT_NOT_REPORTED;
MLX5_ASSERT(param != NULL);
if (__atomic_compare_exchange_n(&param->state, &expected,
HWS_AGE_AGED_OUT_REPORTED, false,
__ATOMIC_RELAXED, __ATOMIC_RELAXED))
return param->context;
switch (expected) {
case HWS_AGE_FREE:
/*
* This AGE couldn't have been destroyed since it was inside
* the ring. Its state has updated, and now it is actually
* destroyed.
*/
mlx5_hws_age_param_free(priv, param->own_cnt_index, ipool, idx);
break;
case HWS_AGE_CANDIDATE_INSIDE_RING:
__atomic_store_n(&param->state, HWS_AGE_CANDIDATE,
__ATOMIC_RELAXED);
break;
case HWS_AGE_CANDIDATE:
/*
* Only BG thread pushes to ring and it never pushes this state.
* When AGE inside the ring becomes candidate, it has a special
* state called HWS_AGE_CANDIDATE_INSIDE_RING.
* Fall-through.
*/
case HWS_AGE_AGED_OUT_REPORTED:
/*
* Only this thread (doing query) may write this state, and it
* happens only after the query thread takes it out of the ring.
* Fall-through.
*/
case HWS_AGE_AGED_OUT_NOT_REPORTED:
/*
* In this case the compare return true and function return
* the context immediately.
* Fall-through.
*/
default:
MLX5_ASSERT(0);
break;
}
return NULL;
}
#ifdef RTE_ARCH_64
#define MLX5_HWS_AGED_OUT_RING_SIZE_MAX UINT32_MAX
#else
#define MLX5_HWS_AGED_OUT_RING_SIZE_MAX RTE_BIT32(8)
#endif
/**
* Get the size of aged out ring list for each queue.
*
* The size is one percent of nb_counters divided by nb_queues.
* The ring size must be power of 2, so it align up to power of 2.
* In 32 bit systems, the size is limited by 256.
*
* This function is called when RTE_FLOW_PORT_FLAG_STRICT_QUEUE is on.
*
* @param nb_counters
* Final number of allocated counter in the pool.
* @param nb_queues
* Number of HWS queues in this port.
*
* @return
* Size of aged out ring per queue.
*/
static __rte_always_inline uint32_t
mlx5_hws_aged_out_q_ring_size_get(uint32_t nb_counters, uint32_t nb_queues)
{
uint32_t size = rte_align32pow2((nb_counters / 100) / nb_queues);
uint32_t max_size = MLX5_HWS_AGED_OUT_RING_SIZE_MAX;
return RTE_MIN(size, max_size);
}
/**
* Get the size of the aged out ring list.
*
* The size is one percent of nb_counters.
* The ring size must be power of 2, so it align up to power of 2.
* In 32 bit systems, the size is limited by 256.
*
* This function is called when RTE_FLOW_PORT_FLAG_STRICT_QUEUE is off.
*
* @param nb_counters
* Final number of allocated counter in the pool.
*
* @return
* Size of the aged out ring list.
*/
static __rte_always_inline uint32_t
mlx5_hws_aged_out_ring_size_get(uint32_t nb_counters)
{
uint32_t size = rte_align32pow2(nb_counters / 100);
uint32_t max_size = MLX5_HWS_AGED_OUT_RING_SIZE_MAX;
return RTE_MIN(size, max_size);
}
/**
* Initialize the shared aging list information per port.
*
* @param dev
* Pointer to the rte_eth_dev structure.
* @param nb_queues
* Number of HWS queues.
* @param strict_queue
* Indicator whether is strict_queue mode.
* @param ring_size
* Size of aged-out ring for creation.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
mlx5_hws_age_info_init(struct rte_eth_dev *dev, uint16_t nb_queues,
bool strict_queue, uint32_t ring_size)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_age_info *age_info = GET_PORT_AGE_INFO(priv);
uint32_t flags = RING_F_SP_ENQ | RING_F_SC_DEQ | RING_F_EXACT_SZ;
char mz_name[RTE_MEMZONE_NAMESIZE];
struct rte_ring *r = NULL;
uint32_t qidx;
age_info->flags = 0;
if (strict_queue) {
size_t size = sizeof(*age_info->hw_q_age) +
sizeof(struct rte_ring *) * nb_queues;
age_info->hw_q_age = mlx5_malloc(MLX5_MEM_ANY | MLX5_MEM_ZERO,
size, 0, SOCKET_ID_ANY);
if (age_info->hw_q_age == NULL)
return -ENOMEM;
for (qidx = 0; qidx < nb_queues; ++qidx) {
snprintf(mz_name, sizeof(mz_name),
"port_%u_queue_%u_aged_out_ring",
dev->data->port_id, qidx);
r = rte_ring_create(mz_name, ring_size, SOCKET_ID_ANY,
flags);
if (r == NULL) {
DRV_LOG(ERR, "\"%s\" creation failed: %s",
mz_name, rte_strerror(rte_errno));
goto error;
}
age_info->hw_q_age->aged_lists[qidx] = r;
DRV_LOG(DEBUG,
"\"%s\" is successfully created (size=%u).",
mz_name, ring_size);
}
age_info->hw_q_age->nb_rings = nb_queues;
} else {
snprintf(mz_name, sizeof(mz_name), "port_%u_aged_out_ring",
dev->data->port_id);
r = rte_ring_create(mz_name, ring_size, SOCKET_ID_ANY, flags);
if (r == NULL) {
DRV_LOG(ERR, "\"%s\" creation failed: %s", mz_name,
rte_strerror(rte_errno));
return -rte_errno;
}
age_info->hw_age.aged_list = r;
DRV_LOG(DEBUG, "\"%s\" is successfully created (size=%u).",
mz_name, ring_size);
/* In non "strict_queue" mode, initialize the event. */
MLX5_AGE_SET(age_info, MLX5_AGE_TRIGGER);
}
return 0;
error:
MLX5_ASSERT(strict_queue);
while (qidx--)
rte_ring_free(age_info->hw_q_age->aged_lists[qidx]);
mlx5_free(age_info->hw_q_age);
return -1;
}
/**
* Cleanup aged-out ring before destroying.
*
* @param priv
* Pointer to port private object.
* @param r
* Pointer to aged-out ring object.
*/
static void
mlx5_hws_aged_out_ring_cleanup(struct mlx5_priv *priv, struct rte_ring *r)
{
int ring_size = rte_ring_count(r);
while (ring_size > 0) {
uint32_t age_idx = 0;
if (rte_ring_dequeue_elem(r, &age_idx, sizeof(uint32_t)) < 0)
break;
/* get the AGE context if the aged-out index is still valid. */
mlx5_hws_age_context_get(priv, age_idx);
ring_size--;
}
rte_ring_free(r);
}
/**
* Destroy the shared aging list information per port.
*
* @param priv
* Pointer to port private object.
*/
static void
mlx5_hws_age_info_destroy(struct mlx5_priv *priv)
{
struct mlx5_age_info *age_info = GET_PORT_AGE_INFO(priv);
uint16_t nb_queues = age_info->hw_q_age->nb_rings;
struct rte_ring *r;
if (priv->hws_strict_queue) {
uint32_t qidx;
for (qidx = 0; qidx < nb_queues; ++qidx) {
r = age_info->hw_q_age->aged_lists[qidx];
mlx5_hws_aged_out_ring_cleanup(priv, r);
}
mlx5_free(age_info->hw_q_age);
} else {
r = age_info->hw_age.aged_list;
mlx5_hws_aged_out_ring_cleanup(priv, r);
}
}
/**
* Initialize the aging mechanism per port.
*
* @param dev
* Pointer to the rte_eth_dev structure.
* @param attr
* Port configuration attributes.
* @param nb_queues
* Number of HWS queues.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
mlx5_hws_age_pool_init(struct rte_eth_dev *dev,
const struct rte_flow_port_attr *attr,
uint16_t nb_queues)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_age_info *age_info = GET_PORT_AGE_INFO(priv);
struct mlx5_indexed_pool_config cfg = {
.size =
RTE_CACHE_LINE_ROUNDUP(sizeof(struct mlx5_hws_age_param)),
.trunk_size = 1 << 12,
.per_core_cache = 1 << 13,
.need_lock = 1,
.release_mem_en = !!priv->sh->config.reclaim_mode,
.malloc = mlx5_malloc,
.free = mlx5_free,
.type = "mlx5_hws_age_pool",
};
bool strict_queue = false;
uint32_t nb_alloc_cnts;
uint32_t rsize;
uint32_t nb_ages_updated;
int ret;
strict_queue = !!(attr->flags & RTE_FLOW_PORT_FLAG_STRICT_QUEUE);
MLX5_ASSERT(priv->hws_cpool);
nb_alloc_cnts = mlx5_hws_cnt_pool_get_size(priv->hws_cpool);
if (strict_queue) {
rsize = mlx5_hws_aged_out_q_ring_size_get(nb_alloc_cnts,
nb_queues);
nb_ages_updated = rsize * nb_queues + attr->nb_aging_objects;
} else {
rsize = mlx5_hws_aged_out_ring_size_get(nb_alloc_cnts);
nb_ages_updated = rsize + attr->nb_aging_objects;
}
ret = mlx5_hws_age_info_init(dev, nb_queues, strict_queue, rsize);
if (ret < 0)
return ret;
cfg.max_idx = rte_align32pow2(nb_ages_updated);
if (cfg.max_idx <= cfg.trunk_size) {
cfg.per_core_cache = 0;
cfg.trunk_size = cfg.max_idx;
} else if (cfg.max_idx <= MLX5_HW_IPOOL_SIZE_THRESHOLD) {
cfg.per_core_cache = MLX5_HW_IPOOL_CACHE_MIN;
}
age_info->ages_ipool = mlx5_ipool_create(&cfg);
if (age_info->ages_ipool == NULL) {
mlx5_hws_age_info_destroy(priv);
rte_errno = ENOMEM;
return -rte_errno;
}
priv->hws_age_req = 1;
return 0;
}
/**
* Cleanup all aging resources per port.
*
* @param priv
* Pointer to port private object.
*/
void
mlx5_hws_age_pool_destroy(struct mlx5_priv *priv)
{
struct mlx5_age_info *age_info = GET_PORT_AGE_INFO(priv);
MLX5_ASSERT(priv->hws_age_req);
mlx5_hws_age_info_destroy(priv);
mlx5_ipool_destroy(age_info->ages_ipool);
age_info->ages_ipool = NULL;
priv->hws_age_req = 0;
}
#endif