Add port module event software counters in mlx5core.
While at it, fixup PME based on latest PRM defines. Submitted by: slavash@ MFC after: 3 days Sponsored by: Mellanox Technologies
This commit is contained in:
parent
980e2c987e
commit
d6e923c7f0
@ -537,7 +537,7 @@ enum {
|
||||
MLX5_MODULE_STATUS_PLUGGED_ENABLED = 0x1,
|
||||
MLX5_MODULE_STATUS_UNPLUGGED = 0x2,
|
||||
MLX5_MODULE_STATUS_ERROR = 0x3,
|
||||
MLX5_MODULE_STATUS_PLUGGED_DISABLED = 0x4,
|
||||
MLX5_MODULE_STATUS_NUM ,
|
||||
};
|
||||
|
||||
enum {
|
||||
@ -549,7 +549,7 @@ enum {
|
||||
MLX5_MODULE_EVENT_ERROR_UNSUPPORTED_CABLE = 0x5,
|
||||
MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE = 0x6,
|
||||
MLX5_MODULE_EVENT_ERROR_CABLE_IS_SHORTED = 0x7,
|
||||
MLX5_MODULE_EVENT_ERROR_PCIE_SYSTEM_POWER_SLOT_EXCEEDED = 0xc,
|
||||
MLX5_MODULE_EVENT_ERROR_NUM ,
|
||||
};
|
||||
|
||||
struct mlx5_eqe_port_module_event {
|
||||
|
@ -569,6 +569,11 @@ struct mlx5_rl_table {
|
||||
};
|
||||
#endif
|
||||
|
||||
struct mlx5_pme_stats {
|
||||
u64 status_counters[MLX5_MODULE_STATUS_NUM];
|
||||
u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM];
|
||||
};
|
||||
|
||||
struct mlx5_priv {
|
||||
char name[MLX5_MAX_NAME_LEN];
|
||||
struct mlx5_eq_table eq_table;
|
||||
@ -624,6 +629,7 @@ struct mlx5_priv {
|
||||
#ifdef RATELIMIT
|
||||
struct mlx5_rl_table rl_table;
|
||||
#endif
|
||||
struct mlx5_pme_stats pme_stats;
|
||||
};
|
||||
|
||||
enum mlx5_device_state {
|
||||
|
@ -639,9 +639,9 @@ static const char *mlx5_port_module_event_error_type_to_string(u8 error_type)
|
||||
{
|
||||
switch (error_type) {
|
||||
case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED:
|
||||
return "Power Budget Exceeded";
|
||||
return "Power budget exceeded";
|
||||
case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX_CABLE_MODULE:
|
||||
return "Long Range for non MLNX cable/module";
|
||||
return "Long Range for non MLNX cable";
|
||||
case MLX5_MODULE_EVENT_ERROR_BUS_STUCK:
|
||||
return "Bus stuck(I2C or data shorted)";
|
||||
case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT:
|
||||
@ -649,18 +649,11 @@ static const char *mlx5_port_module_event_error_type_to_string(u8 error_type)
|
||||
case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST:
|
||||
return "Enforce part number list";
|
||||
case MLX5_MODULE_EVENT_ERROR_UNSUPPORTED_CABLE:
|
||||
return "Unsupported Cable";
|
||||
return "Unknown identifier";
|
||||
case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE:
|
||||
return "High Temperature";
|
||||
case MLX5_MODULE_EVENT_ERROR_CABLE_IS_SHORTED:
|
||||
return "Cable is shorted";
|
||||
case MLX5_MODULE_EVENT_ERROR_PCIE_SYSTEM_POWER_SLOT_EXCEEDED:
|
||||
return "One or more network ports have been powered "
|
||||
"down due to insufficient/unadvertised power on "
|
||||
"the PCIe slot. Please refer to the card's user "
|
||||
"manual for power specifications or contact "
|
||||
"Mellanox support.";
|
||||
|
||||
return "Bad or shorted cable/module";
|
||||
default:
|
||||
return "Unknown error type";
|
||||
}
|
||||
@ -686,29 +679,36 @@ static void mlx5_port_module_event(struct mlx5_core_dev *dev,
|
||||
|
||||
module_num = (unsigned int)module_event_eqe->module;
|
||||
module_status = (unsigned int)module_event_eqe->module_status &
|
||||
PORT_MODULE_EVENT_MODULE_STATUS_MASK;
|
||||
PORT_MODULE_EVENT_MODULE_STATUS_MASK;
|
||||
error_type = (unsigned int)module_event_eqe->error_type &
|
||||
PORT_MODULE_EVENT_ERROR_TYPE_MASK;
|
||||
PORT_MODULE_EVENT_ERROR_TYPE_MASK;
|
||||
|
||||
if (module_status < MLX5_MODULE_STATUS_NUM)
|
||||
dev->priv.pme_stats.status_counters[module_status]++;
|
||||
switch (module_status) {
|
||||
case MLX5_MODULE_STATUS_PLUGGED_ENABLED:
|
||||
device_printf((&pdev->dev)->bsddev, "INFO: ""Module %u, status: plugged and enabled\n", module_num);
|
||||
device_printf((&pdev->dev)->bsddev,
|
||||
"INFO: Module %u, status: plugged and enabled\n",
|
||||
module_num);
|
||||
break;
|
||||
|
||||
case MLX5_MODULE_STATUS_UNPLUGGED:
|
||||
device_printf((&pdev->dev)->bsddev, "INFO: ""Module %u, status: unplugged\n", module_num);
|
||||
device_printf((&pdev->dev)->bsddev,
|
||||
"INFO: Module %u, status: unplugged\n", module_num);
|
||||
break;
|
||||
|
||||
case MLX5_MODULE_STATUS_ERROR:
|
||||
device_printf((&pdev->dev)->bsddev, "INFO: ""Module %u, status: error, %s\n", module_num, mlx5_port_module_event_error_type_to_string(error_type));
|
||||
break;
|
||||
|
||||
case MLX5_MODULE_STATUS_PLUGGED_DISABLED:
|
||||
device_printf((&pdev->dev)->bsddev, "INFO: ""Module %u, status: plugged but disabled\n", module_num);
|
||||
device_printf((&pdev->dev)->bsddev,
|
||||
"ERROR: Module %u, status: error, %s\n",
|
||||
module_num,
|
||||
mlx5_port_module_event_error_type_to_string(error_type));
|
||||
if (error_type < MLX5_MODULE_EVENT_ERROR_NUM)
|
||||
dev->priv.pme_stats.error_counters[error_type]++;
|
||||
break;
|
||||
|
||||
default:
|
||||
device_printf((&pdev->dev)->bsddev, "INFO: ""Module %u, unknown status\n", module_num);
|
||||
device_printf((&pdev->dev)->bsddev,
|
||||
"INFO: Module %u, unknown status\n", module_num);
|
||||
}
|
||||
/* store module status */
|
||||
if (module_num < MLX5_MAX_PORTS)
|
||||
|
@ -1244,13 +1244,31 @@ struct mlx5_core_event_handler {
|
||||
void *data);
|
||||
};
|
||||
|
||||
#define MLX5_STATS_DESC(a, b, c, d, e, ...) d, e,
|
||||
|
||||
#define MLX5_PORT_MODULE_ERROR_STATS(m) \
|
||||
m(+1, u64, power_budget_exceeded, "power_budget", "Module Power Budget Exceeded") \
|
||||
m(+1, u64, long_range, "long_range", "Module Long Range for non MLNX cable/module") \
|
||||
m(+1, u64, bus_stuck, "bus_stuck", "Module Bus stuck(I2C or data shorted)") \
|
||||
m(+1, u64, no_eeprom, "no_eeprom", "No EEPROM/retry timeout") \
|
||||
m(+1, u64, enforce_part_number, "enforce_part_number", "Module Enforce part number list") \
|
||||
m(+1, u64, unknown_id, "unknown_id", "Module Unknown identifier") \
|
||||
m(+1, u64, high_temp, "high_temp", "Module High Temperature") \
|
||||
m(+1, u64, cable_shorted, "cable_shorted", "Module Cable is shorted")
|
||||
|
||||
static const char *mlx5_pme_err_desc[] = {
|
||||
MLX5_PORT_MODULE_ERROR_STATS(MLX5_STATS_DESC)
|
||||
};
|
||||
|
||||
static int init_one(struct pci_dev *pdev,
|
||||
const struct pci_device_id *id)
|
||||
{
|
||||
struct mlx5_core_dev *dev;
|
||||
struct mlx5_priv *priv;
|
||||
device_t bsddev = pdev->dev.bsddev;
|
||||
int err;
|
||||
int i,err;
|
||||
struct sysctl_oid *pme_sysctl_node;
|
||||
struct sysctl_oid *pme_err_sysctl_node;
|
||||
|
||||
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
|
||||
priv = &dev->priv;
|
||||
@ -1282,6 +1300,41 @@ static int init_one(struct pci_dev *pdev,
|
||||
OID_AUTO, "power_value", CTLFLAG_RD, &dev->pwr_value, 0,
|
||||
"Current power value in Watts");
|
||||
|
||||
pme_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx,
|
||||
SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)),
|
||||
OID_AUTO, "pme_stats", CTLFLAG_RD, NULL,
|
||||
"Port module event statistics");
|
||||
if (pme_sysctl_node == NULL) {
|
||||
err = -ENOMEM;
|
||||
goto clean_sysctl_ctx;
|
||||
}
|
||||
pme_err_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx,
|
||||
SYSCTL_CHILDREN(pme_sysctl_node),
|
||||
OID_AUTO, "errors", CTLFLAG_RD, NULL,
|
||||
"Port module event error statistics");
|
||||
if (pme_err_sysctl_node == NULL) {
|
||||
err = -ENOMEM;
|
||||
goto clean_sysctl_ctx;
|
||||
}
|
||||
SYSCTL_ADD_U64(&dev->sysctl_ctx,
|
||||
SYSCTL_CHILDREN(pme_sysctl_node), OID_AUTO,
|
||||
"module_plug", CTLFLAG_RD | CTLFLAG_MPSAFE,
|
||||
&dev->priv.pme_stats.status_counters[MLX5_MODULE_STATUS_PLUGGED_ENABLED],
|
||||
0, "Number of time module plugged");
|
||||
SYSCTL_ADD_U64(&dev->sysctl_ctx,
|
||||
SYSCTL_CHILDREN(pme_sysctl_node), OID_AUTO,
|
||||
"module_unplug", CTLFLAG_RD | CTLFLAG_MPSAFE,
|
||||
&dev->priv.pme_stats.status_counters[MLX5_MODULE_STATUS_UNPLUGGED],
|
||||
0, "Number of time module unplugged");
|
||||
for (i = 0 ; i < MLX5_MODULE_EVENT_ERROR_NUM; i++) {
|
||||
SYSCTL_ADD_U64(&dev->sysctl_ctx,
|
||||
SYSCTL_CHILDREN(pme_err_sysctl_node), OID_AUTO,
|
||||
mlx5_pme_err_desc[2 * i], CTLFLAG_RD | CTLFLAG_MPSAFE,
|
||||
&dev->priv.pme_stats.error_counters[i],
|
||||
0, mlx5_pme_err_desc[2 * i + 1]);
|
||||
}
|
||||
|
||||
|
||||
INIT_LIST_HEAD(&priv->ctx_list);
|
||||
spin_lock_init(&priv->ctx_lock);
|
||||
mutex_init(&dev->pci_status_mutex);
|
||||
@ -1320,8 +1373,9 @@ clean_health:
|
||||
close_pci:
|
||||
mlx5_pci_close(dev, priv);
|
||||
clean_dev:
|
||||
sysctl_ctx_free(&dev->sysctl_ctx);
|
||||
mtx_destroy(&dev->dump_lock);
|
||||
clean_sysctl_ctx:
|
||||
sysctl_ctx_free(&dev->sysctl_ctx);
|
||||
kfree(dev);
|
||||
return err;
|
||||
}
|
||||
|
@ -3389,8 +3389,7 @@ out:
|
||||
}
|
||||
/* Check if module is present before doing an access */
|
||||
module_status = mlx5_query_module_status(priv->mdev, module_num);
|
||||
if (module_status != MLX5_MODULE_STATUS_PLUGGED_ENABLED &&
|
||||
module_status != MLX5_MODULE_STATUS_PLUGGED_DISABLED) {
|
||||
if (module_status != MLX5_MODULE_STATUS_PLUGGED_ENABLED) {
|
||||
error = EINVAL;
|
||||
goto err_i2c;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user