Widen EPOCH(9) usage in mlx5en(4).

Make completion event path mostly lockless using EPOCH(9).

Implement a mechanism using EPOCH(9) which allows us to make
the callback path for completion events mostly lockless.

Simplify draining callback events using epoch_wait().

While at it make sure all receive completion callbacks are
covered by the network EPOCH(9), because this is required
when calling if_input() and ether_input() after r357012.

Sponsored by:	Mellanox Technologies
This commit is contained in:
Hans Petter Selasky 2020-01-30 12:35:13 +00:00
parent 780c568fec
commit e48813009c
4 changed files with 91 additions and 102 deletions

View File

@ -38,8 +38,6 @@ struct mlx5_core_cq {
int cqe_sz; int cqe_sz;
__be32 *set_ci_db; __be32 *set_ci_db;
__be32 *arm_db; __be32 *arm_db;
atomic_t refcount;
struct completion free;
unsigned vector; unsigned vector;
int irqn; int irqn;
void (*comp) (struct mlx5_core_cq *); void (*comp) (struct mlx5_core_cq *);

View File

@ -514,21 +514,17 @@ struct mlx5_core_health {
struct workqueue_struct *wq_cmd; struct workqueue_struct *wq_cmd;
}; };
#ifdef RATELIMIT
#define MLX5_CQ_LINEAR_ARRAY_SIZE (128 * 1024)
#else
#define MLX5_CQ_LINEAR_ARRAY_SIZE 1024 #define MLX5_CQ_LINEAR_ARRAY_SIZE 1024
#endif
struct mlx5_cq_linear_array_entry { struct mlx5_cq_linear_array_entry {
spinlock_t lock;
struct mlx5_core_cq * volatile cq; struct mlx5_core_cq * volatile cq;
}; };
struct mlx5_cq_table { struct mlx5_cq_table {
/* protect radix tree /* protect radix tree
*/ */
spinlock_t lock; spinlock_t writerlock;
atomic_t writercount;
struct radix_tree_root tree; struct radix_tree_root tree;
struct mlx5_cq_linear_array_entry linear_array[MLX5_CQ_LINEAR_ARRAY_SIZE]; struct mlx5_cq_linear_array_entry linear_array[MLX5_CQ_LINEAR_ARRAY_SIZE];
}; };

View File

@ -33,72 +33,91 @@
#include <dev/mlx5/cq.h> #include <dev/mlx5/cq.h>
#include "mlx5_core.h" #include "mlx5_core.h"
#include <sys/epoch.h>
static void
mlx5_cq_table_write_lock(struct mlx5_cq_table *table)
{
atomic_inc(&table->writercount);
/* make sure all see the updated writercount */
NET_EPOCH_WAIT();
spin_lock(&table->writerlock);
}
static void
mlx5_cq_table_write_unlock(struct mlx5_cq_table *table)
{
spin_unlock(&table->writerlock);
atomic_dec(&table->writercount);
/* drain all pending CQ callers */
NET_EPOCH_WAIT();
}
void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn) void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn)
{ {
struct mlx5_core_cq *cq;
struct mlx5_cq_table *table = &dev->priv.cq_table; struct mlx5_cq_table *table = &dev->priv.cq_table;
struct mlx5_core_cq *cq;
struct epoch_tracker et;
bool do_lock;
if (cqn < MLX5_CQ_LINEAR_ARRAY_SIZE) { NET_EPOCH_ENTER(et);
struct mlx5_cq_linear_array_entry *entry;
entry = &table->linear_array[cqn]; do_lock = atomic_read(&table->writercount) != 0;
spin_lock(&entry->lock); if (unlikely(do_lock))
cq = entry->cq; spin_lock(&table->writerlock);
if (cq == NULL) {
mlx5_core_warn(dev, if (likely(cqn < MLX5_CQ_LINEAR_ARRAY_SIZE))
"Completion event for bogus CQ 0x%x\n", cqn); cq = table->linear_array[cqn].cq;
} else { else
++cq->arm_sn; cq = radix_tree_lookup(&table->tree, cqn);
cq->comp(cq);
} if (unlikely(do_lock))
spin_unlock(&entry->lock); spin_unlock(&table->writerlock);
return;
if (likely(cq != NULL)) {
++cq->arm_sn;
cq->comp(cq);
} else {
mlx5_core_warn(dev,
"Completion event for bogus CQ 0x%x\n", cqn);
} }
spin_lock(&table->lock); NET_EPOCH_EXIT(et);
cq = radix_tree_lookup(&table->tree, cqn);
if (likely(cq))
atomic_inc(&cq->refcount);
spin_unlock(&table->lock);
if (!cq) {
mlx5_core_warn(dev, "Completion event for bogus CQ 0x%x\n", cqn);
return;
}
++cq->arm_sn;
cq->comp(cq);
if (atomic_dec_and_test(&cq->refcount))
complete(&cq->free);
} }
void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type) void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type)
{ {
struct mlx5_cq_table *table = &dev->priv.cq_table; struct mlx5_cq_table *table = &dev->priv.cq_table;
struct mlx5_core_cq *cq; struct mlx5_core_cq *cq;
struct epoch_tracker et;
bool do_lock;
spin_lock(&table->lock); NET_EPOCH_ENTER(et);
cq = radix_tree_lookup(&table->tree, cqn); do_lock = atomic_read(&table->writercount) != 0;
if (cq) if (unlikely(do_lock))
atomic_inc(&cq->refcount); spin_lock(&table->writerlock);
spin_unlock(&table->lock); if (likely(cqn < MLX5_CQ_LINEAR_ARRAY_SIZE))
cq = table->linear_array[cqn].cq;
else
cq = radix_tree_lookup(&table->tree, cqn);
if (!cq) { if (unlikely(do_lock))
mlx5_core_warn(dev, "Async event for bogus CQ 0x%x\n", cqn); spin_unlock(&table->writerlock);
return;
if (likely(cq != NULL)) {
cq->event(cq, event_type);
} else {
mlx5_core_warn(dev,
"Asynchronous event for bogus CQ 0x%x\n", cqn);
} }
cq->event(cq, event_type); NET_EPOCH_EXIT(et);
if (atomic_dec_and_test(&cq->refcount))
complete(&cq->free);
} }
int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
u32 *in, int inlen) u32 *in, int inlen)
{ {
@ -116,24 +135,16 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
cq->cqn = MLX5_GET(create_cq_out, out, cqn); cq->cqn = MLX5_GET(create_cq_out, out, cqn);
cq->cons_index = 0; cq->cons_index = 0;
cq->arm_sn = 0; cq->arm_sn = 0;
atomic_set(&cq->refcount, 1);
init_completion(&cq->free);
spin_lock_irq(&table->lock); mlx5_cq_table_write_lock(table);
err = radix_tree_insert(&table->tree, cq->cqn, cq); err = radix_tree_insert(&table->tree, cq->cqn, cq);
spin_unlock_irq(&table->lock); if (likely(err == 0 && cq->cqn < MLX5_CQ_LINEAR_ARRAY_SIZE))
table->linear_array[cq->cqn].cq = cq;
mlx5_cq_table_write_unlock(table);
if (err) if (err)
goto err_cmd; goto err_cmd;
if (cq->cqn < MLX5_CQ_LINEAR_ARRAY_SIZE) {
struct mlx5_cq_linear_array_entry *entry;
entry = &table->linear_array[cq->cqn];
spin_lock_irq(&entry->lock);
entry->cq = cq;
spin_unlock_irq(&entry->lock);
}
cq->pid = curthread->td_proc->p_pid; cq->pid = curthread->td_proc->p_pid;
return 0; return 0;
@ -152,44 +163,24 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
u32 out[MLX5_ST_SZ_DW(destroy_cq_out)] = {0}; u32 out[MLX5_ST_SZ_DW(destroy_cq_out)] = {0};
u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0}; u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0};
struct mlx5_core_cq *tmp; struct mlx5_core_cq *tmp;
int err;
if (cq->cqn < MLX5_CQ_LINEAR_ARRAY_SIZE) { mlx5_cq_table_write_lock(table);
struct mlx5_cq_linear_array_entry *entry; if (likely(cq->cqn < MLX5_CQ_LINEAR_ARRAY_SIZE))
table->linear_array[cq->cqn].cq = NULL;
entry = &table->linear_array[cq->cqn];
spin_lock_irq(&entry->lock);
entry->cq = NULL;
spin_unlock_irq(&entry->lock);
}
spin_lock_irq(&table->lock);
tmp = radix_tree_delete(&table->tree, cq->cqn); tmp = radix_tree_delete(&table->tree, cq->cqn);
spin_unlock_irq(&table->lock); mlx5_cq_table_write_unlock(table);
if (!tmp) {
if (unlikely(tmp == NULL)) {
mlx5_core_warn(dev, "cq 0x%x not found in tree\n", cq->cqn); mlx5_core_warn(dev, "cq 0x%x not found in tree\n", cq->cqn);
return -EINVAL; return -EINVAL;
} } else if (unlikely(tmp != cq)) {
if (tmp != cq) { mlx5_core_warn(dev, "corrupted cqn 0x%x\n", cq->cqn);
mlx5_core_warn(dev, "corruption on srqn 0x%x\n", cq->cqn);
return -EINVAL; return -EINVAL;
} }
MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ); MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ);
MLX5_SET(destroy_cq_in, in, cqn, cq->cqn); MLX5_SET(destroy_cq_in, in, cqn, cq->cqn);
err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
if (err)
goto out;
synchronize_irq(cq->irqn);
if (atomic_dec_and_test(&cq->refcount))
complete(&cq->free);
wait_for_completion(&cq->free);
out:
return err;
} }
EXPORT_SYMBOL(mlx5_core_destroy_cq); EXPORT_SYMBOL(mlx5_core_destroy_cq);
@ -259,17 +250,12 @@ int mlx5_core_modify_cq_moderation_mode(struct mlx5_core_dev *dev,
int mlx5_init_cq_table(struct mlx5_core_dev *dev) int mlx5_init_cq_table(struct mlx5_core_dev *dev)
{ {
struct mlx5_cq_table *table = &dev->priv.cq_table; struct mlx5_cq_table *table = &dev->priv.cq_table;
int err;
int x;
memset(table, 0, sizeof(*table)); memset(table, 0, sizeof(*table));
spin_lock_init(&table->lock); spin_lock_init(&table->writerlock);
for (x = 0; x != MLX5_CQ_LINEAR_ARRAY_SIZE; x++)
spin_lock_init(&table->linear_array[x].lock);
INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
err = 0;
return err; return 0;
} }
void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev) void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev)

View File

@ -2182,6 +2182,7 @@ mlx5e_open_channel(struct mlx5e_priv *priv,
struct mlx5e_channel_param *cparam, struct mlx5e_channel_param *cparam,
struct mlx5e_channel *c) struct mlx5e_channel *c)
{ {
struct epoch_tracker et;
int i, err; int i, err;
/* zero non-persistant data */ /* zero non-persistant data */
@ -2209,7 +2210,9 @@ mlx5e_open_channel(struct mlx5e_priv *priv,
goto err_close_sqs; goto err_close_sqs;
/* poll receive queue initially */ /* poll receive queue initially */
NET_EPOCH_ENTER(et);
c->rq.cq.mcq.comp(&c->rq.cq.mcq); c->rq.cq.mcq.comp(&c->rq.cq.mcq);
NET_EPOCH_EXIT(et);
return (0); return (0);
@ -3746,6 +3749,7 @@ static void
mlx5e_disable_rx_dma(struct mlx5e_channel *ch) mlx5e_disable_rx_dma(struct mlx5e_channel *ch)
{ {
struct mlx5e_rq *rq = &ch->rq; struct mlx5e_rq *rq = &ch->rq;
struct epoch_tracker et;
int err; int err;
mtx_lock(&rq->mtx); mtx_lock(&rq->mtx);
@ -3761,7 +3765,9 @@ mlx5e_disable_rx_dma(struct mlx5e_channel *ch)
while (!mlx5_wq_ll_is_empty(&rq->wq)) { while (!mlx5_wq_ll_is_empty(&rq->wq)) {
msleep(1); msleep(1);
NET_EPOCH_ENTER(et);
rq->cq.mcq.comp(&rq->cq.mcq); rq->cq.mcq.comp(&rq->cq.mcq);
NET_EPOCH_EXIT(et);
} }
/* /*
@ -3779,6 +3785,7 @@ static void
mlx5e_enable_rx_dma(struct mlx5e_channel *ch) mlx5e_enable_rx_dma(struct mlx5e_channel *ch)
{ {
struct mlx5e_rq *rq = &ch->rq; struct mlx5e_rq *rq = &ch->rq;
struct epoch_tracker et;
int err; int err;
rq->wq.wqe_ctr = 0; rq->wq.wqe_ctr = 0;
@ -3791,7 +3798,9 @@ mlx5e_enable_rx_dma(struct mlx5e_channel *ch)
rq->enabled = 1; rq->enabled = 1;
NET_EPOCH_ENTER(et);
rq->cq.mcq.comp(&rq->cq.mcq); rq->cq.mcq.comp(&rq->cq.mcq);
NET_EPOCH_EXIT(et);
} }
void void