From f2d43ff54d2f82b0ad6fc524e1cdcf331a42565f Mon Sep 17 00:00:00 2001 From: Dariusz Sosnowski Date: Thu, 6 Oct 2022 11:01:02 +0000 Subject: [PATCH] net/mlx5: allow hairpin Rx queue in locked memory This patch adds a capability to place hairpin Rx queue in locked device memory. This capability is equivalent to storing hairpin RQ's data buffers in locked internal device memory. Hairpin Rx queue creation is extended with requesting that RQ is allocated in locked internal device memory. If allocation fails and force_memory hairpin configuration is set, then hairpin queue creation (and, as a result, device start) fails. If force_memory is unset, then PMD will fallback to allocating memory for hairpin RQ in unlocked internal device memory. To allow such allocation, the user must set HAIRPIN_DATA_BUFFER_LOCK flag in FW using mlxconfig tool. Signed-off-by: Dariusz Sosnowski Acked-by: Viacheslav Ovsiienko --- doc/guides/nics/mlx5.rst | 14 ++++++++++ doc/guides/platform/mlx5.rst | 5 ++++ drivers/net/mlx5/mlx5_devx.c | 51 ++++++++++++++++++++++++++++------ drivers/net/mlx5/mlx5_ethdev.c | 2 ++ 4 files changed, 63 insertions(+), 9 deletions(-) diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst index 3525246c47..bb436892a0 100644 --- a/doc/guides/nics/mlx5.rst +++ b/doc/guides/nics/mlx5.rst @@ -1517,6 +1517,20 @@ which is shared with other resources (e.g. flow rules). Starting with DPDK 22.11 and NVIDIA MLNX_OFED 5.8, applications are allowed to: +#. Place data buffers and Rx packet descriptors in dedicated device memory. + Application can request that configuration + through ``use_locked_device_memory`` configuration option. + + Placing data buffers and Rx packet descriptors in dedicated device memory + can decrease latency on hairpinned traffic, + since traffic processing for the hairpin queue will not be memory starved. + + However, reserving device memory for hairpin Rx queues + may decrease throughput under heavy load, + since less resources will be available on device. + + This option is supported only for Rx hairpin queues. + #. Place Tx packet descriptors in host memory. Application can request that configuration through ``use_rte_memory`` configuration option. diff --git a/doc/guides/platform/mlx5.rst b/doc/guides/platform/mlx5.rst index 46b394c4ee..3cc1dd29e2 100644 --- a/doc/guides/platform/mlx5.rst +++ b/doc/guides/platform/mlx5.rst @@ -555,6 +555,11 @@ Below are some firmware configurations listed. REAL_TIME_CLOCK_ENABLE=1 +- allow locking hairpin RQ data buffer in device memory:: + + HAIRPIN_DATA_BUFFER_LOCK=1 + MEMIC_SIZE_LIMIT=0 + .. _mlx5_common_driver_options: diff --git a/drivers/net/mlx5/mlx5_devx.c b/drivers/net/mlx5/mlx5_devx.c index c61c34bd99..fe303a73bb 100644 --- a/drivers/net/mlx5/mlx5_devx.c +++ b/drivers/net/mlx5/mlx5_devx.c @@ -468,14 +468,16 @@ mlx5_rxq_obj_hairpin_new(struct mlx5_rxq_priv *rxq) { uint16_t idx = rxq->idx; struct mlx5_priv *priv = rxq->priv; + struct mlx5_hca_attr *hca_attr __rte_unused = &priv->sh->cdev->config.hca_attr; struct mlx5_rxq_ctrl *rxq_ctrl = rxq->ctrl; - struct mlx5_devx_create_rq_attr attr = { 0 }; + struct mlx5_devx_create_rq_attr unlocked_attr = { 0 }; + struct mlx5_devx_create_rq_attr locked_attr = { 0 }; struct mlx5_rxq_obj *tmpl = rxq_ctrl->obj; uint32_t max_wq_data; MLX5_ASSERT(rxq != NULL && rxq->ctrl != NULL && tmpl != NULL); tmpl->rxq_ctrl = rxq_ctrl; - attr.hairpin = 1; + unlocked_attr.hairpin = 1; max_wq_data = priv->sh->cdev->config.hca_attr.log_max_hairpin_wq_data_sz; /* Jumbo frames > 9KB should be supported, and more packets. */ @@ -487,20 +489,50 @@ mlx5_rxq_obj_hairpin_new(struct mlx5_rxq_priv *rxq) rte_errno = ERANGE; return -rte_errno; } - attr.wq_attr.log_hairpin_data_sz = priv->config.log_hp_size; + unlocked_attr.wq_attr.log_hairpin_data_sz = priv->config.log_hp_size; } else { - attr.wq_attr.log_hairpin_data_sz = + unlocked_attr.wq_attr.log_hairpin_data_sz = (max_wq_data < MLX5_HAIRPIN_JUMBO_LOG_SIZE) ? max_wq_data : MLX5_HAIRPIN_JUMBO_LOG_SIZE; } /* Set the packets number to the maximum value for performance. */ - attr.wq_attr.log_hairpin_num_packets = - attr.wq_attr.log_hairpin_data_sz - + unlocked_attr.wq_attr.log_hairpin_num_packets = + unlocked_attr.wq_attr.log_hairpin_data_sz - MLX5_HAIRPIN_QUEUE_STRIDE; - attr.counter_set_id = priv->counter_set_id; + unlocked_attr.counter_set_id = priv->counter_set_id; rxq_ctrl->rxq.delay_drop = priv->config.hp_delay_drop; - attr.delay_drop_en = priv->config.hp_delay_drop; - tmpl->rq = mlx5_devx_cmd_create_rq(priv->sh->cdev->ctx, &attr, + unlocked_attr.delay_drop_en = priv->config.hp_delay_drop; + unlocked_attr.hairpin_data_buffer_type = + MLX5_RQC_HAIRPIN_DATA_BUFFER_TYPE_UNLOCKED_INTERNAL_BUFFER; + if (rxq->hairpin_conf.use_locked_device_memory) { + /* + * It is assumed that configuration is verified against capabilities + * during queue setup. + */ + MLX5_ASSERT(hca_attr->hairpin_data_buffer_locked); + rte_memcpy(&locked_attr, &unlocked_attr, sizeof(locked_attr)); + locked_attr.hairpin_data_buffer_type = + MLX5_RQC_HAIRPIN_DATA_BUFFER_TYPE_LOCKED_INTERNAL_BUFFER; + tmpl->rq = mlx5_devx_cmd_create_rq(priv->sh->cdev->ctx, &locked_attr, + rxq_ctrl->socket); + if (!tmpl->rq && rxq->hairpin_conf.force_memory) { + DRV_LOG(ERR, "Port %u Rx hairpin queue %u can't create RQ object" + " with locked memory buffer", + priv->dev_data->port_id, idx); + return -rte_errno; + } else if (!tmpl->rq && !rxq->hairpin_conf.force_memory) { + DRV_LOG(WARNING, "Port %u Rx hairpin queue %u can't create RQ object" + " with locked memory buffer. Falling back to unlocked" + " device memory.", + priv->dev_data->port_id, idx); + rte_errno = 0; + goto create_rq_unlocked; + } + goto create_rq_set_state; + } + +create_rq_unlocked: + tmpl->rq = mlx5_devx_cmd_create_rq(priv->sh->cdev->ctx, &unlocked_attr, rxq_ctrl->socket); if (!tmpl->rq) { DRV_LOG(ERR, @@ -509,6 +541,7 @@ mlx5_rxq_obj_hairpin_new(struct mlx5_rxq_priv *rxq) rte_errno = errno; return -rte_errno; } +create_rq_set_state: priv->dev_data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_HAIRPIN; return 0; } diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c index c59005ea2b..4a85415ff3 100644 --- a/drivers/net/mlx5/mlx5_ethdev.c +++ b/drivers/net/mlx5/mlx5_ethdev.c @@ -740,6 +740,8 @@ mlx5_hairpin_cap_get(struct rte_eth_dev *dev, struct rte_eth_hairpin_cap *cap) cap->max_tx_2_rx = 1; cap->max_nb_desc = 8192; hca_attr = &priv->sh->cdev->config.hca_attr; + cap->rx_cap.locked_device_memory = hca_attr->hairpin_data_buffer_locked; + cap->rx_cap.rte_memory = 0; cap->tx_cap.locked_device_memory = 0; cap->tx_cap.rte_memory = hca_attr->hairpin_sq_wq_in_host_mem; return 0;