net/mlx5: fix flow counter container resize

The design of counter container resize used double buffer algorithm in
order to synchronize between the query thread to the control thread.
When the control thread detected resize need, it created new bigger
buffer for the counter pools in a new container and change the container
index atomically.
In case the query thread had not detect the previous resize before a new
one need was detected by the control thread, the control thread returned
EAGAIN to the flow creation API used a COUNT action.

The rte_flow API doesn't allow unblocked commands and doesn't expect to
get EAGAIN error type.

So, when a lot of flows were created between 2 different periodic
queries, 2 different resizes might try to be created and caused EAGAIN
error.
This behavior may blame flow creations.

Change the synchronization way to use lock instead of double buffer
algorithm.

The critical section of this lock is very small, so flow insertion
rate should not be decreased.

Fixes: ebbac312e4 ("net/mlx5: resize a full counter container")
Cc: stable@dpdk.org

Signed-off-by: Matan Azrad <matan@mellanox.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
This commit is contained in:
Matan Azrad 2020-05-12 12:52:13 +00:00 committed by Ferruh Yigit
parent f772cc424c
commit 5af61440dd
6 changed files with 104 additions and 148 deletions

View File

@ -467,13 +467,13 @@ mlx5_flow_aging_init(struct mlx5_ibv_shared *sh)
static void
mlx5_flow_counters_mng_init(struct mlx5_ibv_shared *sh)
{
uint8_t i, age;
int i;
sh->cmng.age = 0;
memset(&sh->cmng, 0, sizeof(sh->cmng));
TAILQ_INIT(&sh->cmng.flow_counters);
for (age = 0; age < RTE_DIM(sh->cmng.ccont[0]); ++age) {
for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i)
TAILQ_INIT(&sh->cmng.ccont[i][age].pool_list);
for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i) {
TAILQ_INIT(&sh->cmng.ccont[i].pool_list);
rte_spinlock_init(&sh->cmng.ccont[i].resize_sl);
}
}
@ -504,7 +504,7 @@ static void
mlx5_flow_counters_mng_close(struct mlx5_ibv_shared *sh)
{
struct mlx5_counter_stats_mem_mng *mng;
uint8_t i, age = 0;
int i;
int j;
int retries = 1024;
@ -515,42 +515,34 @@ mlx5_flow_counters_mng_close(struct mlx5_ibv_shared *sh)
break;
rte_pause();
}
for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i) {
struct mlx5_flow_counter_pool *pool;
uint32_t batch = !!(i > 1);
for (age = 0; age < RTE_DIM(sh->cmng.ccont[0]); ++age) {
for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i) {
struct mlx5_flow_counter_pool *pool;
uint32_t batch = !!(i % 2);
if (!sh->cmng.ccont[i][age].pools)
continue;
pool = TAILQ_FIRST(&sh->cmng.ccont[i][age].pool_list);
while (pool) {
if (batch) {
if (pool->min_dcs)
claim_zero
(mlx5_devx_cmd_destroy
(pool->min_dcs));
}
for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) {
if (MLX5_POOL_GET_CNT(pool, j)->action)
claim_zero
(mlx5_glue->destroy_flow_action
(MLX5_POOL_GET_CNT
(pool, j)->action));
if (!batch && MLX5_GET_POOL_CNT_EXT
(pool, j)->dcs)
claim_zero(mlx5_devx_cmd_destroy
(MLX5_GET_POOL_CNT_EXT
(pool, j)->dcs));
}
TAILQ_REMOVE(&sh->cmng.ccont[i][age].pool_list,
pool, next);
rte_free(pool);
pool = TAILQ_FIRST
(&sh->cmng.ccont[i][age].pool_list);
if (!sh->cmng.ccont[i].pools)
continue;
pool = TAILQ_FIRST(&sh->cmng.ccont[i].pool_list);
while (pool) {
if (batch && pool->min_dcs)
claim_zero(mlx5_devx_cmd_destroy
(pool->min_dcs));
for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) {
if (MLX5_POOL_GET_CNT(pool, j)->action)
claim_zero
(mlx5_glue->destroy_flow_action
(MLX5_POOL_GET_CNT
(pool, j)->action));
if (!batch && MLX5_GET_POOL_CNT_EXT
(pool, j)->dcs)
claim_zero(mlx5_devx_cmd_destroy
(MLX5_GET_POOL_CNT_EXT
(pool, j)->dcs));
}
rte_free(sh->cmng.ccont[i][age].pools);
TAILQ_REMOVE(&sh->cmng.ccont[i].pool_list, pool, next);
rte_free(pool);
pool = TAILQ_FIRST(&sh->cmng.ccont[i].pool_list);
}
rte_free(sh->cmng.ccont[i].pools);
}
mng = LIST_FIRST(&sh->cmng.mem_mngs);
while (mng) {

View File

@ -227,13 +227,11 @@ struct mlx5_drop {
#define CNTEXT_SIZE (sizeof(struct mlx5_flow_counter_ext))
#define AGE_SIZE (sizeof(struct mlx5_age_param))
#define MLX5_AGING_TIME_DELAY 7
#define CNT_POOL_TYPE_EXT (1 << 0)
#define CNT_POOL_TYPE_AGE (1 << 1)
#define IS_EXT_POOL(pool) (((pool)->type) & CNT_POOL_TYPE_EXT)
#define IS_AGE_POOL(pool) (((pool)->type) & CNT_POOL_TYPE_AGE)
#define MLX_CNT_IS_AGE(counter) ((counter) & MLX5_CNT_AGE_OFFSET ? 1 : 0)
#define MLX5_CNT_LEN(pool) \
(CNT_SIZE + \
(IS_AGE_POOL(pool) ? AGE_SIZE : 0) + \
@ -270,6 +268,17 @@ enum {
AGE_TMOUT, /* Timeout, wait for rte_flow_get_aged_flows and destroy. */
};
#define MLX5_CNT_CONTAINER(sh, batch, age) (&(sh)->cmng.ccont \
[(batch) * 2 + (age)])
enum {
MLX5_CCONT_TYPE_SINGLE,
MLX5_CCONT_TYPE_SINGLE_FOR_AGE,
MLX5_CCONT_TYPE_BATCH,
MLX5_CCONT_TYPE_BATCH_FOR_AGE,
MLX5_CCONT_TYPE_MAX,
};
/* Counter age parameter. */
struct mlx5_age_param {
rte_atomic16_t state; /**< Age state. */
@ -313,7 +322,6 @@ struct mlx5_flow_counter_ext {
};
};
TAILQ_HEAD(mlx5_counters, mlx5_flow_counter);
/* Generic counter pool structure - query is in pool resolution. */
@ -358,17 +366,16 @@ TAILQ_HEAD(mlx5_counter_pools, mlx5_flow_counter_pool);
struct mlx5_pools_container {
rte_atomic16_t n_valid; /* Number of valid pools. */
uint16_t n; /* Number of pools. */
rte_spinlock_t resize_sl; /* The resize lock. */
struct mlx5_counter_pools pool_list; /* Counter pool list. */
struct mlx5_flow_counter_pool **pools; /* Counter pool array. */
struct mlx5_counter_stats_mem_mng *init_mem_mng;
struct mlx5_counter_stats_mem_mng *mem_mng;
/* Hold the memory management for the next allocated pools raws. */
};
/* Counter global management structure. */
struct mlx5_flow_counter_mng {
uint8_t mhi[2][2]; /* master \ host and age \ no age container index. */
struct mlx5_pools_container ccont[2 * 2][2];
/* master \ host and age \ no age pools container. */
struct mlx5_pools_container ccont[MLX5_CCONT_TYPE_MAX];
struct mlx5_counters flow_counters; /* Legacy flow counter list. */
uint8_t pending_queries;
uint8_t batch;
@ -378,6 +385,7 @@ struct mlx5_flow_counter_mng {
LIST_HEAD(mem_mngs, mlx5_counter_stats_mem_mng) mem_mngs;
LIST_HEAD(stat_raws, mlx5_counter_stats_raw) free_stat_raws;
};
#define MLX5_AGE_EVENT_NEW 1
#define MLX5_AGE_TRIGGER 2
#define MLX5_AGE_SET(age_info, BIT) \
@ -393,6 +401,7 @@ struct mlx5_age_info {
struct mlx5_counters aged_counters; /* Aged flow counter list. */
rte_spinlock_t aged_sl; /* Aged flow counter list lock. */
};
/* Per port data of shared IB device. */
struct mlx5_ibv_shared_port {
uint32_t ih_port_id;

View File

@ -5801,16 +5801,11 @@ mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
static uint32_t
mlx5_get_all_valid_pool_count(struct mlx5_ibv_shared *sh)
{
uint8_t age, i;
int i;
uint32_t pools_n = 0;
struct mlx5_pools_container *cont;
for (age = 0; age < RTE_DIM(sh->cmng.ccont[0]); ++age) {
for (i = 0; i < 2 ; ++i) {
cont = MLX5_CNT_CONTAINER(sh, i, 0, age);
pools_n += rte_atomic16_read(&cont->n_valid);
}
}
for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i)
pools_n += rte_atomic16_read(&sh->cmng.ccont[i].n_valid);
return pools_n;
}
@ -5855,32 +5850,19 @@ mlx5_flow_query_alarm(void *arg)
uint8_t age = sh->cmng.age;
uint16_t pool_index = sh->cmng.pool_index;
struct mlx5_pools_container *cont;
struct mlx5_pools_container *mcont;
struct mlx5_flow_counter_pool *pool;
int cont_loop = MLX5_CCONT_TYPE_MAX;
if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
goto set_alarm;
next_container:
cont = MLX5_CNT_CONTAINER(sh, batch, 1, age);
mcont = MLX5_CNT_CONTAINER(sh, batch, 0, age);
/* Check if resize was done and need to flip a container. */
if (cont != mcont) {
if (cont->pools) {
/* Clean the old container. */
rte_free(cont->pools);
memset(cont, 0, sizeof(*cont));
}
rte_cio_wmb();
/* Flip the host container. */
sh->cmng.mhi[batch][age] ^= (uint8_t)2;
cont = mcont;
}
cont = MLX5_CNT_CONTAINER(sh, batch, age);
rte_spinlock_lock(&cont->resize_sl);
if (!cont->pools) {
/* 2 empty containers case is unexpected. */
if (unlikely(batch != sh->cmng.batch) &&
unlikely(age != sh->cmng.age)) {
rte_spinlock_unlock(&cont->resize_sl);
/* Check if all the containers are empty. */
if (unlikely(--cont_loop == 0))
goto set_alarm;
}
batch ^= 0x1;
pool_index = 0;
if (batch == 0 && pool_index == 0) {
@ -5891,6 +5873,7 @@ mlx5_flow_query_alarm(void *arg)
goto next_container;
}
pool = cont->pools[pool_index];
rte_spinlock_unlock(&cont->resize_sl);
if (pool->raw_hw)
/* There is a pool query in progress. */
goto set_alarm;

View File

@ -900,12 +900,6 @@ struct mlx5_flow_driver_ops {
mlx5_flow_get_aged_flows_t get_aged_flows;
};
#define MLX5_CNT_CONTAINER(sh, batch, thread, age) (&(sh)->cmng.ccont \
[(((sh)->cmng.mhi[batch][age] >> (thread)) & 0x1) * 2 + (batch)][age])
#define MLX5_CNT_CONTAINER_UNUSED(sh, batch, thread, age) (&(sh)->cmng.ccont \
[(~((sh)->cmng.mhi[batch][age] >> (thread)) & 0x1) * 2 + (batch)][age])
/* mlx5_flow.c */
struct mlx5_flow_id_pool *mlx5_flow_id_pool_alloc(uint32_t max_id);

View File

@ -4038,7 +4038,7 @@ flow_dv_counter_get_by_idx(struct rte_eth_dev *dev,
idx -= MLX5_CNT_BATCH_OFFSET;
batch = 1;
}
cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0, age);
cont = MLX5_CNT_CONTAINER(priv->sh, batch, age);
MLX5_ASSERT(idx / MLX5_COUNTERS_PER_POOL < cont->n);
pool = cont->pools[idx / MLX5_COUNTERS_PER_POOL];
MLX5_ASSERT(pool);
@ -4165,69 +4165,55 @@ flow_dv_create_counter_stat_mem_mng(struct rte_eth_dev *dev, int raws_n)
* Whether the pool is for Aging counter.
*
* @return
* The new container pointer on success, otherwise NULL and rte_errno is set.
* 0 on success, otherwise negative errno value and rte_errno is set.
*/
static struct mlx5_pools_container *
static int
flow_dv_container_resize(struct rte_eth_dev *dev,
uint32_t batch, uint32_t age)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_pools_container *cont =
MLX5_CNT_CONTAINER(priv->sh, batch, 0, age);
struct mlx5_pools_container *new_cont =
MLX5_CNT_CONTAINER_UNUSED(priv->sh, batch, 0, age);
struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch,
age);
struct mlx5_counter_stats_mem_mng *mem_mng = NULL;
void *old_pools = cont->pools;
uint32_t resize = cont->n + MLX5_CNT_CONTAINER_RESIZE;
uint32_t mem_size = sizeof(struct mlx5_flow_counter_pool *) * resize;
int i;
void *pools = rte_calloc(__func__, 1, mem_size, 0);
/* Fallback mode has no background thread. Skip the check. */
if (!priv->counter_fallback &&
cont != MLX5_CNT_CONTAINER(priv->sh, batch, 1, age)) {
/* The last resize still hasn't detected by the host thread. */
rte_errno = EAGAIN;
return NULL;
}
new_cont->pools = rte_calloc(__func__, 1, mem_size, 0);
if (!new_cont->pools) {
if (!pools) {
rte_errno = ENOMEM;
return NULL;
return -ENOMEM;
}
if (cont->n)
memcpy(new_cont->pools, cont->pools, cont->n *
sizeof(struct mlx5_flow_counter_pool *));
if (old_pools)
memcpy(pools, old_pools, cont->n *
sizeof(struct mlx5_flow_counter_pool *));
/*
* Fallback mode query the counter directly, no background query
* resources are needed.
*/
if (!priv->counter_fallback) {
int i;
mem_mng = flow_dv_create_counter_stat_mem_mng(dev,
MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES);
MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES);
if (!mem_mng) {
rte_free(new_cont->pools);
return NULL;
rte_free(pools);
return -ENOMEM;
}
for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i)
LIST_INSERT_HEAD(&priv->sh->cmng.free_stat_raws,
mem_mng->raws +
MLX5_CNT_CONTAINER_RESIZE +
i, next);
} else {
/*
* Release the old container pools directly as no background
* thread helps that.
*/
rte_free(cont->pools);
}
new_cont->n = resize;
rte_atomic16_set(&new_cont->n_valid, rte_atomic16_read(&cont->n_valid));
TAILQ_INIT(&new_cont->pool_list);
TAILQ_CONCAT(&new_cont->pool_list, &cont->pool_list, next);
new_cont->init_mem_mng = mem_mng;
rte_cio_wmb();
/* Flip the master container. */
priv->sh->cmng.mhi[batch][age] ^= (uint8_t)1;
return new_cont;
rte_spinlock_lock(&cont->resize_sl);
cont->n = resize;
cont->mem_mng = mem_mng;
cont->pools = pools;
rte_spinlock_unlock(&cont->resize_sl);
if (old_pools)
rte_free(old_pools);
return 0;
}
/**
@ -4299,22 +4285,19 @@ _flow_dv_query_count(struct rte_eth_dev *dev, uint32_t counter, uint64_t *pkts,
* @return
* The pool container pointer on success, NULL otherwise and rte_errno is set.
*/
static struct mlx5_pools_container *
static struct mlx5_flow_counter_pool *
flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
uint32_t batch, uint32_t age)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_flow_counter_pool *pool;
struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch,
0, age);
age);
int16_t n_valid = rte_atomic16_read(&cont->n_valid);
uint32_t size = sizeof(*pool);
if (cont->n == n_valid) {
cont = flow_dv_container_resize(dev, batch, age);
if (!cont)
return NULL;
}
if (cont->n == n_valid && flow_dv_container_resize(dev, batch, age))
return NULL;
size += MLX5_COUNTERS_PER_POOL * CNT_SIZE;
size += (batch ? 0 : MLX5_COUNTERS_PER_POOL * CNTEXT_SIZE);
size += (!age ? 0 : MLX5_COUNTERS_PER_POOL * AGE_SIZE);
@ -4325,8 +4308,8 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
}
pool->min_dcs = dcs;
if (!priv->counter_fallback)
pool->raw = cont->init_mem_mng->raws + n_valid %
MLX5_CNT_CONTAINER_RESIZE;
pool->raw = cont->mem_mng->raws + n_valid %
MLX5_CNT_CONTAINER_RESIZE;
pool->raw_hw = NULL;
pool->type = 0;
pool->type |= (batch ? 0 : CNT_POOL_TYPE_EXT);
@ -4354,7 +4337,7 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
/* Pool initialization must be updated before host thread access. */
rte_cio_wmb();
rte_atomic16_add(&cont->n_valid, 1);
return cont;
return pool;
}
/**
@ -4378,7 +4361,7 @@ flow_dv_counter_update_min_dcs(struct rte_eth_dev *dev,
struct mlx5_flow_counter_pool *other;
struct mlx5_pools_container *cont;
cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0, (age ^ 0x1));
cont = MLX5_CNT_CONTAINER(priv->sh, batch, (age ^ 0x1));
other = flow_dv_find_pool_by_id(cont, pool->min_dcs->id);
if (!other)
return;
@ -4403,10 +4386,10 @@ flow_dv_counter_update_min_dcs(struct rte_eth_dev *dev,
* Whether the pool is for counter that was allocated for aging.
*
* @return
* The counter container pointer and @p cnt_free is set on success,
* The counter pool pointer and @p cnt_free is set on success,
* NULL otherwise and rte_errno is set.
*/
static struct mlx5_pools_container *
static struct mlx5_flow_counter_pool *
flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
struct mlx5_flow_counter **cnt_free,
uint32_t batch, uint32_t age)
@ -4418,7 +4401,7 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
struct mlx5_flow_counter *cnt;
uint32_t i;
cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0, age);
cont = MLX5_CNT_CONTAINER(priv->sh, batch, age);
if (!batch) {
/* bulk_bitmap must be 0 for single counter allocation. */
dcs = mlx5_devx_cmd_flow_counter_alloc(priv->sh->ctx, 0);
@ -4426,12 +4409,11 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
return NULL;
pool = flow_dv_find_pool_by_id(cont, dcs->id);
if (!pool) {
cont = flow_dv_pool_create(dev, dcs, batch, age);
if (!cont) {
pool = flow_dv_pool_create(dev, dcs, batch, age);
if (!pool) {
mlx5_devx_cmd_destroy(dcs);
return NULL;
}
pool = TAILQ_FIRST(&cont->pool_list);
} else if (dcs->id < pool->min_dcs->id) {
rte_atomic64_set(&pool->a64_dcs,
(int64_t)(uintptr_t)dcs);
@ -4443,7 +4425,7 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
TAILQ_INSERT_HEAD(&pool->counters, cnt, next);
MLX5_GET_POOL_CNT_EXT(pool, i)->dcs = dcs;
*cnt_free = cnt;
return cont;
return pool;
}
/* bulk_bitmap is in 128 counters units. */
if (priv->config.hca_attr.flow_counter_bulk_alloc_bitmap & 0x4)
@ -4452,18 +4434,17 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
rte_errno = ENODATA;
return NULL;
}
cont = flow_dv_pool_create(dev, dcs, batch, age);
if (!cont) {
pool = flow_dv_pool_create(dev, dcs, batch, age);
if (!pool) {
mlx5_devx_cmd_destroy(dcs);
return NULL;
}
pool = TAILQ_FIRST(&cont->pool_list);
for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
cnt = MLX5_POOL_GET_CNT(pool, i);
TAILQ_INSERT_HEAD(&pool->counters, cnt, next);
}
*cnt_free = MLX5_POOL_GET_CNT(pool, 0);
return cont;
return pool;
}
/**
@ -4537,7 +4518,7 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id,
*/
uint32_t batch = (group && !shared && !priv->counter_fallback) ? 1 : 0;
struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch,
0, age);
age);
uint32_t cnt_idx;
if (!priv->config.devx) {
@ -4576,10 +4557,9 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id,
cnt_free = NULL;
}
if (!cnt_free) {
cont = flow_dv_counter_pool_prepare(dev, &cnt_free, batch, age);
if (!cont)
pool = flow_dv_counter_pool_prepare(dev, &cnt_free, batch, age);
if (!pool)
return 0;
pool = TAILQ_FIRST(&cont->pool_list);
}
if (!batch)
cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt_free);

View File

@ -56,8 +56,7 @@ flow_verbs_counter_get_by_idx(struct rte_eth_dev *dev,
struct mlx5_flow_counter_pool **ppool)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0,
0);
struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0);
struct mlx5_flow_counter_pool *pool;
idx--;
@ -152,8 +151,7 @@ static uint32_t
flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0,
0);
struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0);
struct mlx5_flow_counter_pool *pool = NULL;
struct mlx5_flow_counter_ext *cnt_ext = NULL;
struct mlx5_flow_counter *cnt = NULL;