mlx5en: Improve CQE error debugging.

MFC after:	1 week
Sponsored by:	NVIDIA Networking
This commit is contained in:
Hans Petter Selasky 2022-02-17 12:50:22 +01:00
parent 015f22f5d0
commit bc531a1faa
4 changed files with 31 additions and 1 deletions

View File

@ -1205,6 +1205,8 @@ int mlx5e_open_locked(struct ifnet *);
int mlx5e_close_locked(struct ifnet *);
void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, int event);
void mlx5e_dump_err_cqe(struct mlx5e_cq *, u32, const struct mlx5_err_cqe *);
mlx5e_cq_comp_t mlx5e_rx_cq_comp;
mlx5e_cq_comp_t mlx5e_tx_cq_comp;
struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq);

View File

@ -495,6 +495,7 @@ mlx5e_poll_rx_cq(struct mlx5e_rq *rq, int budget)
BUS_DMASYNC_POSTREAD);
if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
mlx5e_dump_err_cqe(&rq->cq, rq->rqn, (const void *)cqe);
rq->stats.wqe_err++;
goto wq_ll_pop;
}

View File

@ -1045,8 +1045,10 @@ mlx5e_poll_tx_cq(struct mlx5e_sq *sq, int budget)
mlx5_cqwq_pop(&sq->cq.wq);
/* check if the completion event indicates an error */
if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ))
if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
mlx5e_dump_err_cqe(&sq->cq, sq->sqn, (const void *)cqe);
sq->stats.cqe_err++;
}
/* setup local variables */
sqcc_this = be16toh(cqe->wqe_counter);

View File

@ -28,6 +28,8 @@
#include "opt_rss.h"
#include "opt_ratelimit.h"
#include <linux/printk.h>
#include <dev/mlx5/mlx5_en/en.h>
struct mlx5_cqe64 *
@ -54,3 +56,26 @@ mlx5e_cq_error_event(struct mlx5_core_cq *mcq, int event)
mlx5_en_err(cq->priv->ifp, "cqn=0x%.6x event=0x%.2x\n",
mcq->cqn, event);
}
void
mlx5e_dump_err_cqe(struct mlx5e_cq *cq, u32 qn, const struct mlx5_err_cqe *err_cqe)
{
u32 ci;
/* Don't print flushed in error syndromes. */
if (err_cqe->vendor_err_synd == 0xf9 && err_cqe->syndrome == 0x05)
return;
/* Don't print when the queue is set to error state by software. */
if (err_cqe->vendor_err_synd == 0xf5 && err_cqe->syndrome == 0x05)
return;
ci = (cq->wq.cc - 1) & cq->wq.sz_m1;
mlx5_en_err(cq->priv->ifp,
"Error CQE on CQN 0x%x, CI 0x%x, QN 0x%x, OPCODE 0x%x, SYNDROME 0x%x, VENDOR SYNDROME 0x%x\n",
cq->mcq.cqn, ci, qn, err_cqe->op_own >> 4,
err_cqe->syndrome, err_cqe->vendor_err_synd);
print_hex_dump(NULL, NULL, DUMP_PREFIX_OFFSET,
16, 1, err_cqe, sizeof(*err_cqe), false);
}