From e48813009c6adbc8b461d24074ab3e926d9ae3df Mon Sep 17 00:00:00 2001 From: Hans Petter Selasky Date: Thu, 30 Jan 2020 12:35:13 +0000 Subject: [PATCH] Widen EPOCH(9) usage in mlx5en(4). Make completion event path mostly lockless using EPOCH(9). Implement a mechanism using EPOCH(9) which allows us to make the callback path for completion events mostly lockless. Simplify draining callback events using epoch_wait(). While at it make sure all receive completion callbacks are covered by the network EPOCH(9), because this is required when calling if_input() and ether_input() after r357012. Sponsored by: Mellanox Technologies --- sys/dev/mlx5/cq.h | 2 - sys/dev/mlx5/driver.h | 8 +- sys/dev/mlx5/mlx5_core/mlx5_cq.c | 174 +++++++++++++--------------- sys/dev/mlx5/mlx5_en/mlx5_en_main.c | 9 ++ 4 files changed, 91 insertions(+), 102 deletions(-) diff --git a/sys/dev/mlx5/cq.h b/sys/dev/mlx5/cq.h index 161fbe13001a..9e22f8dbd497 100644 --- a/sys/dev/mlx5/cq.h +++ b/sys/dev/mlx5/cq.h @@ -38,8 +38,6 @@ struct mlx5_core_cq { int cqe_sz; __be32 *set_ci_db; __be32 *arm_db; - atomic_t refcount; - struct completion free; unsigned vector; int irqn; void (*comp) (struct mlx5_core_cq *); diff --git a/sys/dev/mlx5/driver.h b/sys/dev/mlx5/driver.h index 5ea37307b8b6..e2a68725751b 100644 --- a/sys/dev/mlx5/driver.h +++ b/sys/dev/mlx5/driver.h @@ -514,21 +514,17 @@ struct mlx5_core_health { struct workqueue_struct *wq_cmd; }; -#ifdef RATELIMIT -#define MLX5_CQ_LINEAR_ARRAY_SIZE (128 * 1024) -#else #define MLX5_CQ_LINEAR_ARRAY_SIZE 1024 -#endif struct mlx5_cq_linear_array_entry { - spinlock_t lock; struct mlx5_core_cq * volatile cq; }; struct mlx5_cq_table { /* protect radix tree */ - spinlock_t lock; + spinlock_t writerlock; + atomic_t writercount; struct radix_tree_root tree; struct mlx5_cq_linear_array_entry linear_array[MLX5_CQ_LINEAR_ARRAY_SIZE]; }; diff --git a/sys/dev/mlx5/mlx5_core/mlx5_cq.c b/sys/dev/mlx5/mlx5_core/mlx5_cq.c index cd2093924242..4cbc4eb61377 100644 --- a/sys/dev/mlx5/mlx5_core/mlx5_cq.c +++ b/sys/dev/mlx5/mlx5_core/mlx5_cq.c @@ -33,72 +33,91 @@ #include #include "mlx5_core.h" +#include + +static void +mlx5_cq_table_write_lock(struct mlx5_cq_table *table) +{ + + atomic_inc(&table->writercount); + /* make sure all see the updated writercount */ + NET_EPOCH_WAIT(); + spin_lock(&table->writerlock); +} + +static void +mlx5_cq_table_write_unlock(struct mlx5_cq_table *table) +{ + + spin_unlock(&table->writerlock); + atomic_dec(&table->writercount); + /* drain all pending CQ callers */ + NET_EPOCH_WAIT(); +} + void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn) { - struct mlx5_core_cq *cq; struct mlx5_cq_table *table = &dev->priv.cq_table; + struct mlx5_core_cq *cq; + struct epoch_tracker et; + bool do_lock; - if (cqn < MLX5_CQ_LINEAR_ARRAY_SIZE) { - struct mlx5_cq_linear_array_entry *entry; + NET_EPOCH_ENTER(et); - entry = &table->linear_array[cqn]; - spin_lock(&entry->lock); - cq = entry->cq; - if (cq == NULL) { - mlx5_core_warn(dev, - "Completion event for bogus CQ 0x%x\n", cqn); - } else { - ++cq->arm_sn; - cq->comp(cq); - } - spin_unlock(&entry->lock); - return; + do_lock = atomic_read(&table->writercount) != 0; + if (unlikely(do_lock)) + spin_lock(&table->writerlock); + + if (likely(cqn < MLX5_CQ_LINEAR_ARRAY_SIZE)) + cq = table->linear_array[cqn].cq; + else + cq = radix_tree_lookup(&table->tree, cqn); + + if (unlikely(do_lock)) + spin_unlock(&table->writerlock); + + if (likely(cq != NULL)) { + ++cq->arm_sn; + cq->comp(cq); + } else { + mlx5_core_warn(dev, + "Completion event for bogus CQ 0x%x\n", cqn); } - spin_lock(&table->lock); - cq = radix_tree_lookup(&table->tree, cqn); - if (likely(cq)) - atomic_inc(&cq->refcount); - spin_unlock(&table->lock); - - if (!cq) { - mlx5_core_warn(dev, "Completion event for bogus CQ 0x%x\n", cqn); - return; - } - - ++cq->arm_sn; - - cq->comp(cq); - - if (atomic_dec_and_test(&cq->refcount)) - complete(&cq->free); + NET_EPOCH_EXIT(et); } void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type) { struct mlx5_cq_table *table = &dev->priv.cq_table; struct mlx5_core_cq *cq; + struct epoch_tracker et; + bool do_lock; - spin_lock(&table->lock); + NET_EPOCH_ENTER(et); - cq = radix_tree_lookup(&table->tree, cqn); - if (cq) - atomic_inc(&cq->refcount); + do_lock = atomic_read(&table->writercount) != 0; + if (unlikely(do_lock)) + spin_lock(&table->writerlock); - spin_unlock(&table->lock); + if (likely(cqn < MLX5_CQ_LINEAR_ARRAY_SIZE)) + cq = table->linear_array[cqn].cq; + else + cq = radix_tree_lookup(&table->tree, cqn); - if (!cq) { - mlx5_core_warn(dev, "Async event for bogus CQ 0x%x\n", cqn); - return; + if (unlikely(do_lock)) + spin_unlock(&table->writerlock); + + if (likely(cq != NULL)) { + cq->event(cq, event_type); + } else { + mlx5_core_warn(dev, + "Asynchronous event for bogus CQ 0x%x\n", cqn); } - cq->event(cq, event_type); - - if (atomic_dec_and_test(&cq->refcount)) - complete(&cq->free); + NET_EPOCH_EXIT(et); } - int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 *in, int inlen) { @@ -116,24 +135,16 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, cq->cqn = MLX5_GET(create_cq_out, out, cqn); cq->cons_index = 0; cq->arm_sn = 0; - atomic_set(&cq->refcount, 1); - init_completion(&cq->free); - spin_lock_irq(&table->lock); + mlx5_cq_table_write_lock(table); err = radix_tree_insert(&table->tree, cq->cqn, cq); - spin_unlock_irq(&table->lock); + if (likely(err == 0 && cq->cqn < MLX5_CQ_LINEAR_ARRAY_SIZE)) + table->linear_array[cq->cqn].cq = cq; + mlx5_cq_table_write_unlock(table); + if (err) goto err_cmd; - if (cq->cqn < MLX5_CQ_LINEAR_ARRAY_SIZE) { - struct mlx5_cq_linear_array_entry *entry; - - entry = &table->linear_array[cq->cqn]; - spin_lock_irq(&entry->lock); - entry->cq = cq; - spin_unlock_irq(&entry->lock); - } - cq->pid = curthread->td_proc->p_pid; return 0; @@ -152,44 +163,24 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) u32 out[MLX5_ST_SZ_DW(destroy_cq_out)] = {0}; u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0}; struct mlx5_core_cq *tmp; - int err; - if (cq->cqn < MLX5_CQ_LINEAR_ARRAY_SIZE) { - struct mlx5_cq_linear_array_entry *entry; - - entry = &table->linear_array[cq->cqn]; - spin_lock_irq(&entry->lock); - entry->cq = NULL; - spin_unlock_irq(&entry->lock); - } - - spin_lock_irq(&table->lock); + mlx5_cq_table_write_lock(table); + if (likely(cq->cqn < MLX5_CQ_LINEAR_ARRAY_SIZE)) + table->linear_array[cq->cqn].cq = NULL; tmp = radix_tree_delete(&table->tree, cq->cqn); - spin_unlock_irq(&table->lock); - if (!tmp) { + mlx5_cq_table_write_unlock(table); + + if (unlikely(tmp == NULL)) { mlx5_core_warn(dev, "cq 0x%x not found in tree\n", cq->cqn); return -EINVAL; - } - if (tmp != cq) { - mlx5_core_warn(dev, "corruption on srqn 0x%x\n", cq->cqn); + } else if (unlikely(tmp != cq)) { + mlx5_core_warn(dev, "corrupted cqn 0x%x\n", cq->cqn); return -EINVAL; } MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ); MLX5_SET(destroy_cq_in, in, cqn, cq->cqn); - err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - if (err) - goto out; - - synchronize_irq(cq->irqn); - - if (atomic_dec_and_test(&cq->refcount)) - complete(&cq->free); - wait_for_completion(&cq->free); - -out: - - return err; + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_destroy_cq); @@ -259,17 +250,12 @@ int mlx5_core_modify_cq_moderation_mode(struct mlx5_core_dev *dev, int mlx5_init_cq_table(struct mlx5_core_dev *dev) { struct mlx5_cq_table *table = &dev->priv.cq_table; - int err; - int x; memset(table, 0, sizeof(*table)); - spin_lock_init(&table->lock); - for (x = 0; x != MLX5_CQ_LINEAR_ARRAY_SIZE; x++) - spin_lock_init(&table->linear_array[x].lock); + spin_lock_init(&table->writerlock); INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); - err = 0; - return err; + return 0; } void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev) diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c index 2c8b46af12cd..504f6c01591e 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c @@ -2182,6 +2182,7 @@ mlx5e_open_channel(struct mlx5e_priv *priv, struct mlx5e_channel_param *cparam, struct mlx5e_channel *c) { + struct epoch_tracker et; int i, err; /* zero non-persistant data */ @@ -2209,7 +2210,9 @@ mlx5e_open_channel(struct mlx5e_priv *priv, goto err_close_sqs; /* poll receive queue initially */ + NET_EPOCH_ENTER(et); c->rq.cq.mcq.comp(&c->rq.cq.mcq); + NET_EPOCH_EXIT(et); return (0); @@ -3746,6 +3749,7 @@ static void mlx5e_disable_rx_dma(struct mlx5e_channel *ch) { struct mlx5e_rq *rq = &ch->rq; + struct epoch_tracker et; int err; mtx_lock(&rq->mtx); @@ -3761,7 +3765,9 @@ mlx5e_disable_rx_dma(struct mlx5e_channel *ch) while (!mlx5_wq_ll_is_empty(&rq->wq)) { msleep(1); + NET_EPOCH_ENTER(et); rq->cq.mcq.comp(&rq->cq.mcq); + NET_EPOCH_EXIT(et); } /* @@ -3779,6 +3785,7 @@ static void mlx5e_enable_rx_dma(struct mlx5e_channel *ch) { struct mlx5e_rq *rq = &ch->rq; + struct epoch_tracker et; int err; rq->wq.wqe_ctr = 0; @@ -3791,7 +3798,9 @@ mlx5e_enable_rx_dma(struct mlx5e_channel *ch) rq->enabled = 1; + NET_EPOCH_ENTER(et); rq->cq.mcq.comp(&rq->cq.mcq); + NET_EPOCH_EXIT(et); } void