net/mlx5: make three level table thread safe

This commit adds thread safety support in three level table using
spinlock and reference counter for each table entry.

An new mlx5_l3t_prepare_entry() function is added in order to support
multiple-thread operation.

Signed-off-by: Suanming Mou <suanmingm@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
This commit is contained in:
Suanming Mou 2020-10-20 11:02:25 +08:00 committed by Ferruh Yigit
parent 3aa279157f
commit 0796c7b1de
2 changed files with 230 additions and 54 deletions

View File

@ -551,26 +551,23 @@ mlx5_l3t_create(enum mlx5_l3t_type type)
tbl->type = type;
switch (type) {
case MLX5_L3T_TYPE_WORD:
l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_word) +
sizeof(uint16_t) * MLX5_L3T_ET_SIZE;
l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_word);
l3t_ip_cfg.type = "mlx5_l3t_e_tbl_w";
break;
case MLX5_L3T_TYPE_DWORD:
l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_dword) +
sizeof(uint32_t) * MLX5_L3T_ET_SIZE;
l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_dword);
l3t_ip_cfg.type = "mlx5_l3t_e_tbl_dw";
break;
case MLX5_L3T_TYPE_QWORD:
l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_qword) +
sizeof(uint64_t) * MLX5_L3T_ET_SIZE;
l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_qword);
l3t_ip_cfg.type = "mlx5_l3t_e_tbl_qw";
break;
default:
l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_ptr) +
sizeof(void *) * MLX5_L3T_ET_SIZE;
l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_ptr);
l3t_ip_cfg.type = "mlx5_l3t_e_tbl_tpr";
break;
}
rte_spinlock_init(&tbl->sl);
tbl->eip = mlx5_ipool_create(&l3t_ip_cfg);
if (!tbl->eip) {
rte_errno = ENOMEM;
@ -620,11 +617,15 @@ mlx5_l3t_destroy(struct mlx5_l3t_tbl *tbl)
mlx5_free(tbl);
}
uint32_t
mlx5_l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
union mlx5_l3t_data *data)
static int32_t
__l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
union mlx5_l3t_data *data)
{
struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
struct mlx5_l3t_entry_word *w_e_tbl;
struct mlx5_l3t_entry_dword *dw_e_tbl;
struct mlx5_l3t_entry_qword *qw_e_tbl;
struct mlx5_l3t_entry_ptr *ptr_e_tbl;
void *e_tbl;
uint32_t entry_idx;
@ -640,26 +641,46 @@ mlx5_l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
entry_idx = idx & MLX5_L3T_ET_MASK;
switch (tbl->type) {
case MLX5_L3T_TYPE_WORD:
data->word = ((struct mlx5_l3t_entry_word *)e_tbl)->entry
[entry_idx];
w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
data->word = w_e_tbl->entry[entry_idx].data;
if (w_e_tbl->entry[entry_idx].data)
w_e_tbl->entry[entry_idx].ref_cnt++;
break;
case MLX5_L3T_TYPE_DWORD:
data->dword = ((struct mlx5_l3t_entry_dword *)e_tbl)->entry
[entry_idx];
dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
data->dword = dw_e_tbl->entry[entry_idx].data;
if (dw_e_tbl->entry[entry_idx].data)
dw_e_tbl->entry[entry_idx].ref_cnt++;
break;
case MLX5_L3T_TYPE_QWORD:
data->qword = ((struct mlx5_l3t_entry_qword *)e_tbl)->entry
[entry_idx];
qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
data->qword = qw_e_tbl->entry[entry_idx].data;
if (qw_e_tbl->entry[entry_idx].data)
qw_e_tbl->entry[entry_idx].ref_cnt++;
break;
default:
data->ptr = ((struct mlx5_l3t_entry_ptr *)e_tbl)->entry
[entry_idx];
ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
data->ptr = ptr_e_tbl->entry[entry_idx].data;
if (ptr_e_tbl->entry[entry_idx].data)
ptr_e_tbl->entry[entry_idx].ref_cnt++;
break;
}
return 0;
}
void
int32_t
mlx5_l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
union mlx5_l3t_data *data)
{
int ret;
rte_spinlock_lock(&tbl->sl);
ret = __l3t_get_entry(tbl, idx, data);
rte_spinlock_unlock(&tbl->sl);
return ret;
}
int32_t
mlx5_l3t_clear_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx)
{
struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
@ -670,36 +691,54 @@ mlx5_l3t_clear_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx)
void *e_tbl;
uint32_t entry_idx;
uint64_t ref_cnt;
int32_t ret = -1;
rte_spinlock_lock(&tbl->sl);
g_tbl = tbl->tbl;
if (!g_tbl)
return;
goto out;
m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
if (!m_tbl)
return;
goto out;
e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
if (!e_tbl)
return;
goto out;
entry_idx = idx & MLX5_L3T_ET_MASK;
switch (tbl->type) {
case MLX5_L3T_TYPE_WORD:
w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
w_e_tbl->entry[entry_idx] = 0;
MLX5_ASSERT(w_e_tbl->entry[entry_idx].ref_cnt);
ret = --w_e_tbl->entry[entry_idx].ref_cnt;
if (ret)
goto out;
w_e_tbl->entry[entry_idx].data = 0;
ref_cnt = --w_e_tbl->ref_cnt;
break;
case MLX5_L3T_TYPE_DWORD:
dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
dw_e_tbl->entry[entry_idx] = 0;
MLX5_ASSERT(dw_e_tbl->entry[entry_idx].ref_cnt);
ret = --dw_e_tbl->entry[entry_idx].ref_cnt;
if (ret)
goto out;
dw_e_tbl->entry[entry_idx].data = 0;
ref_cnt = --dw_e_tbl->ref_cnt;
break;
case MLX5_L3T_TYPE_QWORD:
qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
qw_e_tbl->entry[entry_idx] = 0;
MLX5_ASSERT(qw_e_tbl->entry[entry_idx].ref_cnt);
ret = --qw_e_tbl->entry[entry_idx].ref_cnt;
if (ret)
goto out;
qw_e_tbl->entry[entry_idx].data = 0;
ref_cnt = --qw_e_tbl->ref_cnt;
break;
default:
ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
ptr_e_tbl->entry[entry_idx] = NULL;
MLX5_ASSERT(ptr_e_tbl->entry[entry_idx].ref_cnt);
ret = --ptr_e_tbl->entry[entry_idx].ref_cnt;
if (ret)
goto out;
ptr_e_tbl->entry[entry_idx].data = NULL;
ref_cnt = --ptr_e_tbl->ref_cnt;
break;
}
@ -718,11 +757,14 @@ mlx5_l3t_clear_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx)
}
}
}
out:
rte_spinlock_unlock(&tbl->sl);
return ret;
}
uint32_t
mlx5_l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
union mlx5_l3t_data *data)
static int32_t
__l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
union mlx5_l3t_data *data)
{
struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
struct mlx5_l3t_entry_word *w_e_tbl;
@ -783,24 +825,105 @@ mlx5_l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
switch (tbl->type) {
case MLX5_L3T_TYPE_WORD:
w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
w_e_tbl->entry[entry_idx] = data->word;
if (w_e_tbl->entry[entry_idx].data) {
data->word = w_e_tbl->entry[entry_idx].data;
w_e_tbl->entry[entry_idx].ref_cnt++;
rte_errno = EEXIST;
return -1;
}
w_e_tbl->entry[entry_idx].data = data->word;
w_e_tbl->entry[entry_idx].ref_cnt = 1;
w_e_tbl->ref_cnt++;
break;
case MLX5_L3T_TYPE_DWORD:
dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
dw_e_tbl->entry[entry_idx] = data->dword;
if (dw_e_tbl->entry[entry_idx].data) {
data->dword = dw_e_tbl->entry[entry_idx].data;
dw_e_tbl->entry[entry_idx].ref_cnt++;
rte_errno = EEXIST;
return -1;
}
dw_e_tbl->entry[entry_idx].data = data->dword;
dw_e_tbl->entry[entry_idx].ref_cnt = 1;
dw_e_tbl->ref_cnt++;
break;
case MLX5_L3T_TYPE_QWORD:
qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
qw_e_tbl->entry[entry_idx] = data->qword;
if (qw_e_tbl->entry[entry_idx].data) {
data->qword = qw_e_tbl->entry[entry_idx].data;
qw_e_tbl->entry[entry_idx].ref_cnt++;
rte_errno = EEXIST;
return -1;
}
qw_e_tbl->entry[entry_idx].data = data->qword;
qw_e_tbl->entry[entry_idx].ref_cnt = 1;
qw_e_tbl->ref_cnt++;
break;
default:
ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
ptr_e_tbl->entry[entry_idx] = data->ptr;
if (ptr_e_tbl->entry[entry_idx].data) {
data->ptr = ptr_e_tbl->entry[entry_idx].data;
ptr_e_tbl->entry[entry_idx].ref_cnt++;
rte_errno = EEXIST;
return -1;
}
ptr_e_tbl->entry[entry_idx].data = data->ptr;
ptr_e_tbl->entry[entry_idx].ref_cnt = 1;
ptr_e_tbl->ref_cnt++;
break;
}
return 0;
}
int32_t
mlx5_l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
union mlx5_l3t_data *data)
{
int ret;
rte_spinlock_lock(&tbl->sl);
ret = __l3t_set_entry(tbl, idx, data);
rte_spinlock_unlock(&tbl->sl);
return ret;
}
int32_t
mlx5_l3t_prepare_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
union mlx5_l3t_data *data,
mlx5_l3t_alloc_callback_fn cb, void *ctx)
{
int32_t ret;
rte_spinlock_lock(&tbl->sl);
/* Check if entry data is ready. */
ret = __l3t_get_entry(tbl, idx, data);
if (!ret) {
switch (tbl->type) {
case MLX5_L3T_TYPE_WORD:
if (data->word)
goto out;
break;
case MLX5_L3T_TYPE_DWORD:
if (data->dword)
goto out;
break;
case MLX5_L3T_TYPE_QWORD:
if (data->qword)
goto out;
break;
default:
if (data->ptr)
goto out;
break;
}
}
/* Entry data is not ready, use user callback to create it. */
ret = cb(ctx, data);
if (ret)
goto out;
/* Save the new allocated data to entry. */
ret = __l3t_set_entry(tbl, idx, data);
out:
rte_spinlock_unlock(&tbl->sl);
return ret;
}

View File

@ -118,29 +118,41 @@ struct mlx5_l3t_level_tbl {
struct mlx5_l3t_entry_word {
uint32_t idx; /* Table index. */
uint64_t ref_cnt; /* Table ref_cnt. */
uint16_t entry[]; /* Entry array. */
};
struct {
uint16_t data;
uint32_t ref_cnt;
} entry[MLX5_L3T_ET_SIZE]; /* Entry array */
} __rte_packed;
/* L3 double word entry table data structure. */
struct mlx5_l3t_entry_dword {
uint32_t idx; /* Table index. */
uint64_t ref_cnt; /* Table ref_cnt. */
uint32_t entry[]; /* Entry array. */
};
struct {
uint32_t data;
int32_t ref_cnt;
} entry[MLX5_L3T_ET_SIZE]; /* Entry array */
} __rte_packed;
/* L3 quad word entry table data structure. */
struct mlx5_l3t_entry_qword {
uint32_t idx; /* Table index. */
uint64_t ref_cnt; /* Table ref_cnt. */
uint64_t entry[]; /* Entry array. */
};
struct {
uint64_t data;
uint32_t ref_cnt;
} entry[MLX5_L3T_ET_SIZE]; /* Entry array */
} __rte_packed;
/* L3 pointer entry table data structure. */
struct mlx5_l3t_entry_ptr {
uint32_t idx; /* Table index. */
uint64_t ref_cnt; /* Table ref_cnt. */
void *entry[]; /* Entry array. */
};
struct {
void *data;
uint32_t ref_cnt;
} entry[MLX5_L3T_ET_SIZE]; /* Entry array */
} __rte_packed;
/* L3 table data structure. */
struct mlx5_l3t_tbl {
@ -148,8 +160,13 @@ struct mlx5_l3t_tbl {
struct mlx5_indexed_pool *eip;
/* Table index pool handles. */
struct mlx5_l3t_level_tbl *tbl; /* Global table index. */
rte_spinlock_t sl; /* The table lock. */
};
/** Type of function that is used to handle the data before freeing. */
typedef int32_t (*mlx5_l3t_alloc_callback_fn)(void *ctx,
union mlx5_l3t_data *data);
/*
* The indexed memory entry index is made up of trunk index and offset of
* the entry in the trunk. Since the entry index is 32 bits, in case user
@ -535,32 +552,68 @@ void mlx5_l3t_destroy(struct mlx5_l3t_tbl *tbl);
* 0 if success, -1 on error.
*/
uint32_t mlx5_l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
int32_t mlx5_l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
union mlx5_l3t_data *data);
/**
* This function clears the index entry from Three-level table.
*
* @param tbl
* Pointer to the l3t.
* @param idx
* Index to the entry.
*/
void mlx5_l3t_clear_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx);
/**
* This function gets the index entry from Three-level table.
*
* If the index entry is not available, allocate new one by callback
* function and fill in the entry.
*
* @param tbl
* Pointer to the l3t.
* @param idx
* Index to the entry.
* @param data
* Pointer to the memory which contains the entry data save to l3t.
* Pointer to the memory which saves the entry data.
* When function call returns 0, data contains the entry data get from
* l3t.
* When function call returns -1, data is not modified.
* @param cb
* Callback function to allocate new data.
* @param ctx
* Context for callback function.
*
* @return
* 0 if success, -1 on error.
*/
uint32_t mlx5_l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
int32_t mlx5_l3t_prepare_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
union mlx5_l3t_data *data,
mlx5_l3t_alloc_callback_fn cb, void *ctx);
/**
* This function decreases and clear index entry if reference
* counter is 0 from Three-level table.
*
* @param tbl
* Pointer to the l3t.
* @param idx
* Index to the entry.
*
* @return
* The remaining reference count, 0 means entry be cleared, -1 on error.
*/
int32_t mlx5_l3t_clear_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx);
/**
* This function sets the index entry to Three-level table.
* If the entry is already set, the EEXIST errno will be given, and
* the set data will be filled to the data.
*
* @param tbl[in]
* Pointer to the l3t.
* @param idx[in]
* Index to the entry.
* @param data[in/out]
* Pointer to the memory which contains the entry data save to l3t.
* If the entry is already set, the set data will be filled.
*
* @return
* 0 if success, -1 on error.
*/
int32_t mlx5_l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
union mlx5_l3t_data *data);
/*