net/mlx5: fix race condition in counter pool resizing

Counter management structure has array of counter pools. This array is
invalid in management structure initialization and grows on demand.

The resizing include:
1. Allocate memory for the new size.
2. Copy the existing data to the new memory.
3. Move the pointer to the new memory.
4. Free the old memory.

The third step can be performed before for this function, and compiler
may do that, but another thread might read the pointer before coping and
read invalid data or even crash.

This patch allocates memory for this array once in management structure
initialization and limit the counters number by 16M.

Fixes: 3aa279157f ("net/mlx5: synchronize flow counter pool creation")
Cc: stable@dpdk.org

Signed-off-by: Michael Baum <michaelba@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
This commit is contained in:
Michael Baum 2022-10-31 18:08:20 +02:00 committed by Raslan Darawsheh
parent d114dbee28
commit a94e89e47b
5 changed files with 52 additions and 83 deletions

View File

@ -561,18 +561,34 @@ mlx5_flow_counter_mode_config(struct rte_eth_dev *dev __rte_unused)
*
* @param[in] sh
* Pointer to mlx5_dev_ctx_shared object to free
*
* @return
* 0 on success, otherwise negative errno value and rte_errno is set.
*/
static void
static int
mlx5_flow_counters_mng_init(struct mlx5_dev_ctx_shared *sh)
{
int i, j;
if (sh->config.dv_flow_en < 2) {
void *pools;
pools = mlx5_malloc(MLX5_MEM_ZERO,
sizeof(struct mlx5_flow_counter_pool *) *
MLX5_COUNTER_POOLS_MAX_NUM,
0, SOCKET_ID_ANY);
if (!pools) {
DRV_LOG(ERR,
"Counter management allocation was failed.");
rte_errno = ENOMEM;
return -rte_errno;
}
memset(&sh->sws_cmng, 0, sizeof(sh->sws_cmng));
TAILQ_INIT(&sh->sws_cmng.flow_counters);
sh->sws_cmng.min_id = MLX5_CNT_BATCH_OFFSET;
sh->sws_cmng.max_id = -1;
sh->sws_cmng.last_pool_idx = POOL_IDX_INVALID;
sh->sws_cmng.pools = pools;
rte_spinlock_init(&sh->sws_cmng.pool_update_sl);
for (i = 0; i < MLX5_COUNTER_TYPE_MAX; i++) {
TAILQ_INIT(&sh->sws_cmng.counters[i]);
@ -598,6 +614,7 @@ mlx5_flow_counters_mng_init(struct mlx5_dev_ctx_shared *sh)
sh->hws_max_log_bulk_sz = log_dcs;
sh->hws_max_nb_counters = max_nb_cnts;
}
return 0;
}
/**
@ -655,8 +672,7 @@ mlx5_flow_counters_mng_close(struct mlx5_dev_ctx_shared *sh)
claim_zero
(mlx5_flow_os_destroy_flow_action
(cnt->action));
if (fallback && MLX5_POOL_GET_CNT
(pool, j)->dcs_when_free)
if (fallback && cnt->dcs_when_free)
claim_zero(mlx5_devx_cmd_destroy
(cnt->dcs_when_free));
}
@ -1572,8 +1588,12 @@ mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn,
if (err)
goto error;
}
err = mlx5_flow_counters_mng_init(sh);
if (err) {
DRV_LOG(ERR, "Fail to initialize counters manage.");
goto error;
}
mlx5_flow_aging_init(sh);
mlx5_flow_counters_mng_init(sh);
mlx5_flow_ipool_create(sh);
/* Add context to the global device list. */
LIST_INSERT_HEAD(&mlx5_dev_ctx_list, sh, next);

View File

@ -386,11 +386,10 @@ struct mlx5_hw_q {
} __rte_cache_aligned;
#define MLX5_COUNTER_POOLS_MAX_NUM (1 << 15)
#define MLX5_COUNTERS_PER_POOL 512
#define MLX5_MAX_PENDING_QUERIES 4
#define MLX5_CNT_CONTAINER_RESIZE 64
#define MLX5_CNT_MR_ALLOC_BULK 64
#define MLX5_CNT_SHARED_OFFSET 0x80000000
#define IS_BATCH_CNT(cnt) (((cnt) & (MLX5_CNT_SHARED_OFFSET - 1)) >= \
MLX5_CNT_BATCH_OFFSET)
@ -549,7 +548,6 @@ TAILQ_HEAD(mlx5_counter_pools, mlx5_flow_counter_pool);
/* Counter global management structure. */
struct mlx5_flow_counter_mng {
volatile uint16_t n_valid; /* Number of valid pools. */
uint16_t n; /* Number of pools. */
uint16_t last_pool_idx; /* Last used pool index */
int min_id; /* The minimum counter ID in the pools. */
int max_id; /* The maximum counter ID in the pools. */
@ -621,6 +619,7 @@ struct mlx5_aso_age_action {
};
#define MLX5_ASO_AGE_ACTIONS_PER_POOL 512
#define MLX5_ASO_AGE_CONTAINER_RESIZE 64
struct mlx5_aso_age_pool {
struct mlx5_devx_obj *flow_hit_aso_obj;

View File

@ -9061,7 +9061,7 @@ mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh)
{
struct mlx5_counter_stats_mem_mng *mem_mng;
volatile struct flow_counter_stats *raw_data;
int raws_n = MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES;
int raws_n = MLX5_CNT_MR_ALLOC_BULK + MLX5_MAX_PENDING_QUERIES;
int size = (sizeof(struct flow_counter_stats) *
MLX5_COUNTERS_PER_POOL +
sizeof(struct mlx5_counter_stats_raw)) * raws_n +
@ -9099,7 +9099,7 @@ mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh)
}
for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i)
LIST_INSERT_HEAD(&sh->sws_cmng.free_stat_raws,
mem_mng->raws + MLX5_CNT_CONTAINER_RESIZE + i,
mem_mng->raws + MLX5_CNT_MR_ALLOC_BULK + i,
next);
LIST_INSERT_HEAD(&sh->sws_cmng.mem_mngs, mem_mng, next);
sh->sws_cmng.mem_mng = mem_mng;
@ -9123,14 +9123,13 @@ mlx5_flow_set_counter_stat_mem(struct mlx5_dev_ctx_shared *sh,
{
struct mlx5_flow_counter_mng *cmng = &sh->sws_cmng;
/* Resize statistic memory once used out. */
if (!(pool->index % MLX5_CNT_CONTAINER_RESIZE) &&
if (!(pool->index % MLX5_CNT_MR_ALLOC_BULK) &&
mlx5_flow_create_counter_stat_mem_mng(sh)) {
DRV_LOG(ERR, "Cannot resize counter stat mem.");
return -1;
}
rte_spinlock_lock(&pool->sl);
pool->raw = cmng->mem_mng->raws + pool->index %
MLX5_CNT_CONTAINER_RESIZE;
pool->raw = cmng->mem_mng->raws + pool->index % MLX5_CNT_MR_ALLOC_BULK;
rte_spinlock_unlock(&pool->sl);
pool->raw_hw = NULL;
return 0;
@ -9172,13 +9171,13 @@ void
mlx5_flow_query_alarm(void *arg)
{
struct mlx5_dev_ctx_shared *sh = arg;
int ret;
uint16_t pool_index = sh->sws_cmng.pool_index;
struct mlx5_flow_counter_mng *cmng = &sh->sws_cmng;
uint16_t pool_index = cmng->pool_index;
struct mlx5_flow_counter_pool *pool;
uint16_t n_valid;
int ret;
if (sh->sws_cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
if (cmng->pending_queries >= MLX5_MAX_PENDING_QUERIES)
goto set_alarm;
rte_spinlock_lock(&cmng->pool_update_sl);
pool = cmng->pools[pool_index];
@ -9190,8 +9189,7 @@ mlx5_flow_query_alarm(void *arg)
if (pool->raw_hw)
/* There is a pool query in progress. */
goto set_alarm;
pool->raw_hw =
LIST_FIRST(&sh->sws_cmng.free_stat_raws);
pool->raw_hw = LIST_FIRST(&cmng->free_stat_raws);
if (!pool->raw_hw)
/* No free counter statistics raw memory. */
goto set_alarm;
@ -9217,12 +9215,12 @@ mlx5_flow_query_alarm(void *arg)
goto set_alarm;
}
LIST_REMOVE(pool->raw_hw, next);
sh->sws_cmng.pending_queries++;
cmng->pending_queries++;
pool_index++;
if (pool_index >= n_valid)
pool_index = 0;
set_alarm:
sh->sws_cmng.pool_index = pool_index;
cmng->pool_index = pool_index;
mlx5_set_query_alarm(sh);
}
@ -9753,7 +9751,7 @@ mlx5_flow_dev_dump_sh_all(struct rte_eth_dev *dev,
}
/* get counter */
MLX5_ASSERT(cmng->n_valid <= cmng->n);
MLX5_ASSERT(cmng->n_valid <= MLX5_COUNTER_POOLS_MAX_NUM);
max = MLX5_COUNTERS_PER_POOL * cmng->n_valid;
for (j = 1; j <= max; j++) {
action = NULL;

View File

@ -6091,7 +6091,7 @@ flow_dv_counter_get_by_idx(struct rte_eth_dev *dev,
/* Decrease to original index and clear shared bit. */
idx = (idx - 1) & (MLX5_CNT_SHARED_OFFSET - 1);
MLX5_ASSERT(idx / MLX5_COUNTERS_PER_POOL < cmng->n);
MLX5_ASSERT(idx / MLX5_COUNTERS_PER_POOL < MLX5_COUNTER_POOLS_MAX_NUM);
pool = cmng->pools[idx / MLX5_COUNTERS_PER_POOL];
MLX5_ASSERT(pool);
if (ppool)
@ -6167,39 +6167,6 @@ flow_dv_find_pool_by_id(struct mlx5_flow_counter_mng *cmng, int id)
return pool;
}
/**
* Resize a counter container.
*
* @param[in] dev
* Pointer to the Ethernet device structure.
*
* @return
* 0 on success, otherwise negative errno value and rte_errno is set.
*/
static int
flow_dv_container_resize(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_flow_counter_mng *cmng = &priv->sh->sws_cmng;
void *old_pools = cmng->pools;
uint32_t resize = cmng->n + MLX5_CNT_CONTAINER_RESIZE;
uint32_t mem_size = sizeof(struct mlx5_flow_counter_pool *) * resize;
void *pools = mlx5_malloc(MLX5_MEM_ZERO, mem_size, 0, SOCKET_ID_ANY);
if (!pools) {
rte_errno = ENOMEM;
return -ENOMEM;
}
if (old_pools)
memcpy(pools, old_pools, cmng->n *
sizeof(struct mlx5_flow_counter_pool *));
cmng->n = resize;
cmng->pools = pools;
if (old_pools)
mlx5_free(old_pools);
return 0;
}
/**
* Query a devx flow counter.
*
@ -6251,8 +6218,6 @@ _flow_dv_query_count(struct rte_eth_dev *dev, uint32_t counter, uint64_t *pkts,
* The devX counter handle.
* @param[in] age
* Whether the pool is for counter that was allocated for aging.
* @param[in/out] cont_cur
* Pointer to the container pointer, it will be update in pool resize.
*
* @return
* The pool container pointer on success, NULL otherwise and rte_errno is set.
@ -6264,9 +6229,14 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_flow_counter_pool *pool;
struct mlx5_flow_counter_mng *cmng = &priv->sh->sws_cmng;
bool fallback = priv->sh->sws_cmng.counter_fallback;
bool fallback = cmng->counter_fallback;
uint32_t size = sizeof(*pool);
if (cmng->n_valid == MLX5_COUNTER_POOLS_MAX_NUM) {
DRV_LOG(ERR, "All counter is in used, try again later.");
rte_errno = EAGAIN;
return NULL;
}
size += MLX5_COUNTERS_PER_POOL * MLX5_CNT_SIZE;
size += (!age ? 0 : MLX5_COUNTERS_PER_POOL * MLX5_AGE_SIZE);
pool = mlx5_malloc(MLX5_MEM_ZERO, size, 0, SOCKET_ID_ANY);
@ -6285,11 +6255,6 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
pool->time_of_last_age_check = MLX5_CURR_TIME_SEC;
rte_spinlock_lock(&cmng->pool_update_sl);
pool->index = cmng->n_valid;
if (pool->index == cmng->n && flow_dv_container_resize(dev)) {
mlx5_free(pool);
rte_spinlock_unlock(&cmng->pool_update_sl);
return NULL;
}
cmng->pools[pool->index] = pool;
cmng->n_valid++;
if (unlikely(fallback)) {
@ -12511,7 +12476,7 @@ flow_dv_aso_age_release(struct rte_eth_dev *dev, uint32_t age_idx)
}
/**
* Resize the ASO age pools array by MLX5_CNT_CONTAINER_RESIZE pools.
* Resize the ASO age pools array by MLX5_ASO_AGE_CONTAINER_RESIZE pools.
*
* @param[in] dev
* Pointer to the Ethernet device structure.
@ -12525,7 +12490,7 @@ flow_dv_aso_age_pools_resize(struct rte_eth_dev *dev)
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_aso_age_mng *mng = priv->sh->aso_age_mng;
void *old_pools = mng->pools;
uint32_t resize = mng->n + MLX5_CNT_CONTAINER_RESIZE;
uint32_t resize = mng->n + MLX5_ASO_AGE_CONTAINER_RESIZE;
uint32_t mem_size = sizeof(struct mlx5_aso_age_pool *) * resize;
void *pools = mlx5_malloc(MLX5_MEM_ZERO, mem_size, 0, SOCKET_ID_ANY);

View File

@ -232,27 +232,14 @@ flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t id __rte_unused)
break;
}
if (!cnt) {
struct mlx5_flow_counter_pool **pools;
uint32_t size;
if (n_valid == cmng->n) {
/* Resize the container pool array. */
size = sizeof(struct mlx5_flow_counter_pool *) *
(n_valid + MLX5_CNT_CONTAINER_RESIZE);
pools = mlx5_malloc(MLX5_MEM_ZERO, size, 0,
SOCKET_ID_ANY);
if (!pools)
return 0;
if (n_valid) {
memcpy(pools, cmng->pools,
sizeof(struct mlx5_flow_counter_pool *) *
n_valid);
mlx5_free(cmng->pools);
}
cmng->pools = pools;
cmng->n += MLX5_CNT_CONTAINER_RESIZE;
if (n_valid == MLX5_COUNTER_POOLS_MAX_NUM) {
DRV_LOG(ERR, "All counter is in used, try again later.");
rte_errno = EAGAIN;
return 0;
}
/* Allocate memory for new pool*/
/* Allocate memory for new pool */
size = sizeof(*pool) + sizeof(*cnt) * MLX5_COUNTERS_PER_POOL;
pool = mlx5_malloc(MLX5_MEM_ZERO, size, 0, SOCKET_ID_ANY);
if (!pool)