net/mlx5: recover secondary process Rx errors

The RQ errors recovery mechanism in the PMD invokes a Verbs functions to
modify the RQ states in order to reset the RQ and to reactivate it.

These Verbs functions are not allowed to be invoked from a secondary
process, hence the PMD skips the recovery when the error is captured by
secondary processes queues.

Using the DPDK IPC mechanism the secondary process can request Verbs
queues state modifications to be done synchronically by the primary
process.

Add support for secondary process Rx errors recovery.

Cc: stable@dpdk.org

Signed-off-by: Matan Azrad <matan@mellanox.com>
Acked-by: Shahaf Shuler <shahafs@mellanox.com>
This commit is contained in:
Matan Azrad 2019-05-30 10:20:38 +00:00 committed by Ferruh Yigit
parent 957e45fb7b
commit 2d77cb615b
5 changed files with 141 additions and 18 deletions

View File

@ -61,6 +61,13 @@ enum mlx5_mp_req_type {
MLX5_MP_REQ_CREATE_MR,
MLX5_MP_REQ_START_RXTX,
MLX5_MP_REQ_STOP_RXTX,
MLX5_MP_REQ_QUEUE_STATE_MODIFY,
};
struct mlx5_mp_arg_queue_state_modify {
uint8_t is_wq; /* Set if WQ. */
uint16_t queue_id; /* DPDK queue ID. */
enum ibv_wq_state state; /* WQ requested state. */
};
/* Pameters for IPC. */
@ -71,6 +78,8 @@ struct mlx5_mp_param {
RTE_STD_C11
union {
uintptr_t addr; /* MLX5_MP_REQ_CREATE_MR */
struct mlx5_mp_arg_queue_state_modify state_modify;
/* MLX5_MP_REQ_QUEUE_STATE_MODIFY */
} args;
};
@ -546,6 +555,8 @@ void mlx5_mp_req_start_rxtx(struct rte_eth_dev *dev);
void mlx5_mp_req_stop_rxtx(struct rte_eth_dev *dev);
int mlx5_mp_req_mr_create(struct rte_eth_dev *dev, uintptr_t addr);
int mlx5_mp_req_verbs_cmd_fd(struct rte_eth_dev *dev);
int mlx5_mp_req_queue_state_modify(struct rte_eth_dev *dev,
struct mlx5_mp_arg_queue_state_modify *sm);
int mlx5_mp_init_primary(void);
void mlx5_mp_uninit_primary(void);
int mlx5_mp_init_secondary(void);

View File

@ -85,6 +85,12 @@ mp_primary_handle(const struct rte_mp_msg *mp_msg, const void *peer)
res->result = 0;
ret = rte_mp_reply(&mp_res, peer);
break;
case MLX5_MP_REQ_QUEUE_STATE_MODIFY:
mp_init_msg(dev, &mp_res, param->type);
res->result = mlx5_queue_state_modify_primary
(dev, &param->args.state_modify);
ret = rte_mp_reply(&mp_res, peer);
break;
default:
rte_errno = EINVAL;
DRV_LOG(ERR, "port %u invalid mp request type",
@ -271,6 +277,46 @@ mlx5_mp_req_mr_create(struct rte_eth_dev *dev, uintptr_t addr)
return ret;
}
/**
* Request Verbs queue state modification to the primary process.
*
* @param[in] dev
* Pointer to Ethernet structure.
* @param sm
* State modify parameters.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
mlx5_mp_req_queue_state_modify(struct rte_eth_dev *dev,
struct mlx5_mp_arg_queue_state_modify *sm)
{
struct rte_mp_msg mp_req;
struct rte_mp_msg *mp_res;
struct rte_mp_reply mp_rep;
struct mlx5_mp_param *req = (struct mlx5_mp_param *)mp_req.param;
struct mlx5_mp_param *res;
struct timespec ts = {.tv_sec = MLX5_MP_REQ_TIMEOUT_SEC, .tv_nsec = 0};
int ret;
assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
mp_init_msg(dev, &mp_req, MLX5_MP_REQ_QUEUE_STATE_MODIFY);
req->args.state_modify = *sm;
ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts);
if (ret) {
DRV_LOG(ERR, "port %u request to primary process failed",
dev->data->port_id);
return -rte_errno;
}
assert(mp_rep.nb_received == 1);
mp_res = &mp_rep.msgs[0];
res = (struct mlx5_mp_param *)mp_res->param;
ret = res->result;
free(mp_rep.msgs);
return ret;
}
/**
* Request Verbs command file descriptor for mmap to the primary process.
*

View File

@ -2030,6 +2030,75 @@ mlx5_rxq_initialize(struct mlx5_rxq_data *rxq)
*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
}
/**
* Modify a Verbs queue state.
* This must be called from the primary process.
*
* @param dev
* Pointer to Ethernet device.
* @param sm
* State modify request parameters.
*
* @return
* 0 in case of success else non-zero value and rte_errno is set.
*/
int
mlx5_queue_state_modify_primary(struct rte_eth_dev *dev,
const struct mlx5_mp_arg_queue_state_modify *sm)
{
int ret;
struct mlx5_priv *priv = dev->data->dev_private;
if (sm->is_wq) {
struct ibv_wq_attr mod = {
.attr_mask = IBV_WQ_ATTR_STATE,
.wq_state = sm->state,
};
struct mlx5_rxq_data *rxq = (*priv->rxqs)[sm->queue_id];
struct mlx5_rxq_ctrl *rxq_ctrl =
container_of(rxq, struct mlx5_rxq_ctrl, rxq);
ret = mlx5_glue->modify_wq(rxq_ctrl->ibv->wq, &mod);
if (ret) {
DRV_LOG(ERR, "Cannot change Rx WQ state to %u - %s\n",
sm->state, strerror(errno));
rte_errno = errno;
return ret;
}
}
return 0;
}
/**
* Modify a Verbs queue state.
*
* @param dev
* Pointer to Ethernet device.
* @param sm
* State modify request parameters.
*
* @return
* 0 in case of success else non-zero value.
*/
static int
mlx5_queue_state_modify(struct rte_eth_dev *dev,
struct mlx5_mp_arg_queue_state_modify *sm)
{
int ret = 0;
switch (rte_eal_process_type()) {
case RTE_PROC_PRIMARY:
ret = mlx5_queue_state_modify_primary(dev, sm);
break;
case RTE_PROC_SECONDARY:
ret = mlx5_mp_req_queue_state_modify(dev, sm);
break;
default:
break;
}
return ret;
}
/**
* Handle a Rx error.
* The function inserts the RQ state to reset when the first error CQE is
@ -2053,15 +2122,13 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t mbuf_prepare)
const unsigned int wqe_n = 1 << rxq->elts_n;
struct mlx5_rxq_ctrl *rxq_ctrl =
container_of(rxq, struct mlx5_rxq_ctrl, rxq);
struct ibv_wq_attr mod = {
.attr_mask = IBV_WQ_ATTR_STATE,
};
union {
volatile struct mlx5_cqe *cqe;
volatile struct mlx5_err_cqe *err_cqe;
} u = {
.cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask],
};
struct mlx5_mp_arg_queue_state_modify sm;
int ret;
switch (rxq->err_state) {
@ -2069,21 +2136,17 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t mbuf_prepare)
rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_RESET;
/* Fall-through */
case MLX5_RXQ_ERR_STATE_NEED_RESET:
if (rte_eal_process_type() != RTE_PROC_PRIMARY)
sm.is_wq = 1;
sm.queue_id = rxq->idx;
sm.state = IBV_WQS_RESET;
if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), &sm))
return -1;
mod.wq_state = IBV_WQS_RESET;
ret = mlx5_glue->modify_wq(rxq_ctrl->ibv->wq, &mod);
if (ret) {
DRV_LOG(ERR, "Cannot change Rx WQ state to RESET %s\n",
strerror(errno));
return -1;
}
if (rxq_ctrl->dump_file_n <
rxq_ctrl->priv->config.max_dump_files_num) {
MKSTR(err_str, "Unexpected CQE error syndrome "
"0x%02x CQN = %u RQN = %u wqe_counter = %u"
" rq_ci = %u cq_ci = %u", u.err_cqe->syndrome,
rxq->cqn, rxq_ctrl->ibv->wq->wq_num,
rxq->cqn, rxq_ctrl->wqn,
rte_be_to_cpu_16(u.err_cqe->wqe_counter),
rxq->rq_ci << rxq->sges_n, rxq->cq_ci);
MKSTR(name, "dpdk_mlx5_port_%u_rxq_%u_%u",
@ -2113,13 +2176,12 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t mbuf_prepare)
*/
*rxq->rq_db = rte_cpu_to_be_32(0);
rte_cio_wmb();
mod.wq_state = IBV_WQS_RDY;
ret = mlx5_glue->modify_wq(rxq_ctrl->ibv->wq, &mod);
if (ret) {
DRV_LOG(ERR, "Cannot change Rx WQ state to RDY"
" %s\n", strerror(errno));
sm.is_wq = 1;
sm.queue_id = rxq->idx;
sm.state = IBV_WQS_RDY;
if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv),
&sm))
return -1;
}
if (mbuf_prepare) {
const uint16_t q_mask = wqe_n - 1;
uint16_t elt_idx;

View File

@ -161,6 +161,7 @@ struct mlx5_rxq_ctrl {
unsigned int irq:1; /* Whether IRQ is enabled. */
uint32_t flow_mark_n; /* Number of Mark/Flag flows using this Queue. */
uint32_t flow_tunnels_n[MLX5_FLOW_TUNNEL]; /* Tunnels counters. */
uint32_t wqn; /* WQ number. */
uint16_t dump_file_n; /* Number of dump files. */
};
@ -355,6 +356,8 @@ int mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset);
uint32_t mlx5_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id);
void mlx5_dump_debug_information(const char *path, const char *title,
const void *buf, unsigned int len);
int mlx5_queue_state_modify_primary(struct rte_eth_dev *dev,
const struct mlx5_mp_arg_queue_state_modify *sm);
/* Vectorized version of mlx5_rxtx.c */
int mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev);

View File

@ -126,6 +126,7 @@ mlx5_rxq_start(struct rte_eth_dev *dev)
rxq_ctrl->ibv = mlx5_rxq_ibv_new(dev, i);
if (!rxq_ctrl->ibv)
goto error;
rxq_ctrl->wqn = rxq_ctrl->ibv->wq->wq_num;
}
return 0;
error: