Add port module event software counters in mlx5core.
While at it, fixup PME based on latest PRM defines. Submitted by: slavash@ MFC after: 3 days Sponsored by: Mellanox Technologies
This commit is contained in:
parent
55221653c0
commit
111b57c359
@ -537,7 +537,7 @@ enum {
|
|||||||
MLX5_MODULE_STATUS_PLUGGED_ENABLED = 0x1,
|
MLX5_MODULE_STATUS_PLUGGED_ENABLED = 0x1,
|
||||||
MLX5_MODULE_STATUS_UNPLUGGED = 0x2,
|
MLX5_MODULE_STATUS_UNPLUGGED = 0x2,
|
||||||
MLX5_MODULE_STATUS_ERROR = 0x3,
|
MLX5_MODULE_STATUS_ERROR = 0x3,
|
||||||
MLX5_MODULE_STATUS_PLUGGED_DISABLED = 0x4,
|
MLX5_MODULE_STATUS_NUM ,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
@ -549,7 +549,7 @@ enum {
|
|||||||
MLX5_MODULE_EVENT_ERROR_UNSUPPORTED_CABLE = 0x5,
|
MLX5_MODULE_EVENT_ERROR_UNSUPPORTED_CABLE = 0x5,
|
||||||
MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE = 0x6,
|
MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE = 0x6,
|
||||||
MLX5_MODULE_EVENT_ERROR_CABLE_IS_SHORTED = 0x7,
|
MLX5_MODULE_EVENT_ERROR_CABLE_IS_SHORTED = 0x7,
|
||||||
MLX5_MODULE_EVENT_ERROR_PCIE_SYSTEM_POWER_SLOT_EXCEEDED = 0xc,
|
MLX5_MODULE_EVENT_ERROR_NUM ,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct mlx5_eqe_port_module_event {
|
struct mlx5_eqe_port_module_event {
|
||||||
|
@ -569,6 +569,11 @@ struct mlx5_rl_table {
|
|||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
struct mlx5_pme_stats {
|
||||||
|
u64 status_counters[MLX5_MODULE_STATUS_NUM];
|
||||||
|
u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM];
|
||||||
|
};
|
||||||
|
|
||||||
struct mlx5_priv {
|
struct mlx5_priv {
|
||||||
char name[MLX5_MAX_NAME_LEN];
|
char name[MLX5_MAX_NAME_LEN];
|
||||||
struct mlx5_eq_table eq_table;
|
struct mlx5_eq_table eq_table;
|
||||||
@ -624,6 +629,7 @@ struct mlx5_priv {
|
|||||||
#ifdef RATELIMIT
|
#ifdef RATELIMIT
|
||||||
struct mlx5_rl_table rl_table;
|
struct mlx5_rl_table rl_table;
|
||||||
#endif
|
#endif
|
||||||
|
struct mlx5_pme_stats pme_stats;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum mlx5_device_state {
|
enum mlx5_device_state {
|
||||||
|
@ -639,9 +639,9 @@ static const char *mlx5_port_module_event_error_type_to_string(u8 error_type)
|
|||||||
{
|
{
|
||||||
switch (error_type) {
|
switch (error_type) {
|
||||||
case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED:
|
case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED:
|
||||||
return "Power Budget Exceeded";
|
return "Power budget exceeded";
|
||||||
case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX_CABLE_MODULE:
|
case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX_CABLE_MODULE:
|
||||||
return "Long Range for non MLNX cable/module";
|
return "Long Range for non MLNX cable";
|
||||||
case MLX5_MODULE_EVENT_ERROR_BUS_STUCK:
|
case MLX5_MODULE_EVENT_ERROR_BUS_STUCK:
|
||||||
return "Bus stuck(I2C or data shorted)";
|
return "Bus stuck(I2C or data shorted)";
|
||||||
case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT:
|
case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT:
|
||||||
@ -649,18 +649,11 @@ static const char *mlx5_port_module_event_error_type_to_string(u8 error_type)
|
|||||||
case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST:
|
case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST:
|
||||||
return "Enforce part number list";
|
return "Enforce part number list";
|
||||||
case MLX5_MODULE_EVENT_ERROR_UNSUPPORTED_CABLE:
|
case MLX5_MODULE_EVENT_ERROR_UNSUPPORTED_CABLE:
|
||||||
return "Unsupported Cable";
|
return "Unknown identifier";
|
||||||
case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE:
|
case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE:
|
||||||
return "High Temperature";
|
return "High Temperature";
|
||||||
case MLX5_MODULE_EVENT_ERROR_CABLE_IS_SHORTED:
|
case MLX5_MODULE_EVENT_ERROR_CABLE_IS_SHORTED:
|
||||||
return "Cable is shorted";
|
return "Bad or shorted cable/module";
|
||||||
case MLX5_MODULE_EVENT_ERROR_PCIE_SYSTEM_POWER_SLOT_EXCEEDED:
|
|
||||||
return "One or more network ports have been powered "
|
|
||||||
"down due to insufficient/unadvertised power on "
|
|
||||||
"the PCIe slot. Please refer to the card's user "
|
|
||||||
"manual for power specifications or contact "
|
|
||||||
"Mellanox support.";
|
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return "Unknown error type";
|
return "Unknown error type";
|
||||||
}
|
}
|
||||||
@ -686,29 +679,36 @@ static void mlx5_port_module_event(struct mlx5_core_dev *dev,
|
|||||||
|
|
||||||
module_num = (unsigned int)module_event_eqe->module;
|
module_num = (unsigned int)module_event_eqe->module;
|
||||||
module_status = (unsigned int)module_event_eqe->module_status &
|
module_status = (unsigned int)module_event_eqe->module_status &
|
||||||
PORT_MODULE_EVENT_MODULE_STATUS_MASK;
|
PORT_MODULE_EVENT_MODULE_STATUS_MASK;
|
||||||
error_type = (unsigned int)module_event_eqe->error_type &
|
error_type = (unsigned int)module_event_eqe->error_type &
|
||||||
PORT_MODULE_EVENT_ERROR_TYPE_MASK;
|
PORT_MODULE_EVENT_ERROR_TYPE_MASK;
|
||||||
|
|
||||||
|
if (module_status < MLX5_MODULE_STATUS_NUM)
|
||||||
|
dev->priv.pme_stats.status_counters[module_status]++;
|
||||||
switch (module_status) {
|
switch (module_status) {
|
||||||
case MLX5_MODULE_STATUS_PLUGGED_ENABLED:
|
case MLX5_MODULE_STATUS_PLUGGED_ENABLED:
|
||||||
device_printf((&pdev->dev)->bsddev, "INFO: ""Module %u, status: plugged and enabled\n", module_num);
|
device_printf((&pdev->dev)->bsddev,
|
||||||
|
"INFO: Module %u, status: plugged and enabled\n",
|
||||||
|
module_num);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case MLX5_MODULE_STATUS_UNPLUGGED:
|
case MLX5_MODULE_STATUS_UNPLUGGED:
|
||||||
device_printf((&pdev->dev)->bsddev, "INFO: ""Module %u, status: unplugged\n", module_num);
|
device_printf((&pdev->dev)->bsddev,
|
||||||
|
"INFO: Module %u, status: unplugged\n", module_num);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case MLX5_MODULE_STATUS_ERROR:
|
case MLX5_MODULE_STATUS_ERROR:
|
||||||
device_printf((&pdev->dev)->bsddev, "INFO: ""Module %u, status: error, %s\n", module_num, mlx5_port_module_event_error_type_to_string(error_type));
|
device_printf((&pdev->dev)->bsddev,
|
||||||
break;
|
"ERROR: Module %u, status: error, %s\n",
|
||||||
|
module_num,
|
||||||
case MLX5_MODULE_STATUS_PLUGGED_DISABLED:
|
mlx5_port_module_event_error_type_to_string(error_type));
|
||||||
device_printf((&pdev->dev)->bsddev, "INFO: ""Module %u, status: plugged but disabled\n", module_num);
|
if (error_type < MLX5_MODULE_EVENT_ERROR_NUM)
|
||||||
|
dev->priv.pme_stats.error_counters[error_type]++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
device_printf((&pdev->dev)->bsddev, "INFO: ""Module %u, unknown status\n", module_num);
|
device_printf((&pdev->dev)->bsddev,
|
||||||
|
"INFO: Module %u, unknown status\n", module_num);
|
||||||
}
|
}
|
||||||
/* store module status */
|
/* store module status */
|
||||||
if (module_num < MLX5_MAX_PORTS)
|
if (module_num < MLX5_MAX_PORTS)
|
||||||
|
@ -1244,13 +1244,31 @@ struct mlx5_core_event_handler {
|
|||||||
void *data);
|
void *data);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define MLX5_STATS_DESC(a, b, c, d, e, ...) d, e,
|
||||||
|
|
||||||
|
#define MLX5_PORT_MODULE_ERROR_STATS(m) \
|
||||||
|
m(+1, u64, power_budget_exceeded, "power_budget", "Module Power Budget Exceeded") \
|
||||||
|
m(+1, u64, long_range, "long_range", "Module Long Range for non MLNX cable/module") \
|
||||||
|
m(+1, u64, bus_stuck, "bus_stuck", "Module Bus stuck(I2C or data shorted)") \
|
||||||
|
m(+1, u64, no_eeprom, "no_eeprom", "No EEPROM/retry timeout") \
|
||||||
|
m(+1, u64, enforce_part_number, "enforce_part_number", "Module Enforce part number list") \
|
||||||
|
m(+1, u64, unknown_id, "unknown_id", "Module Unknown identifier") \
|
||||||
|
m(+1, u64, high_temp, "high_temp", "Module High Temperature") \
|
||||||
|
m(+1, u64, cable_shorted, "cable_shorted", "Module Cable is shorted")
|
||||||
|
|
||||||
|
static const char *mlx5_pme_err_desc[] = {
|
||||||
|
MLX5_PORT_MODULE_ERROR_STATS(MLX5_STATS_DESC)
|
||||||
|
};
|
||||||
|
|
||||||
static int init_one(struct pci_dev *pdev,
|
static int init_one(struct pci_dev *pdev,
|
||||||
const struct pci_device_id *id)
|
const struct pci_device_id *id)
|
||||||
{
|
{
|
||||||
struct mlx5_core_dev *dev;
|
struct mlx5_core_dev *dev;
|
||||||
struct mlx5_priv *priv;
|
struct mlx5_priv *priv;
|
||||||
device_t bsddev = pdev->dev.bsddev;
|
device_t bsddev = pdev->dev.bsddev;
|
||||||
int err;
|
int i,err;
|
||||||
|
struct sysctl_oid *pme_sysctl_node;
|
||||||
|
struct sysctl_oid *pme_err_sysctl_node;
|
||||||
|
|
||||||
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
|
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
|
||||||
priv = &dev->priv;
|
priv = &dev->priv;
|
||||||
@ -1282,6 +1300,41 @@ static int init_one(struct pci_dev *pdev,
|
|||||||
OID_AUTO, "power_value", CTLFLAG_RD, &dev->pwr_value, 0,
|
OID_AUTO, "power_value", CTLFLAG_RD, &dev->pwr_value, 0,
|
||||||
"Current power value in Watts");
|
"Current power value in Watts");
|
||||||
|
|
||||||
|
pme_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx,
|
||||||
|
SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)),
|
||||||
|
OID_AUTO, "pme_stats", CTLFLAG_RD, NULL,
|
||||||
|
"Port module event statistics");
|
||||||
|
if (pme_sysctl_node == NULL) {
|
||||||
|
err = -ENOMEM;
|
||||||
|
goto clean_sysctl_ctx;
|
||||||
|
}
|
||||||
|
pme_err_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx,
|
||||||
|
SYSCTL_CHILDREN(pme_sysctl_node),
|
||||||
|
OID_AUTO, "errors", CTLFLAG_RD, NULL,
|
||||||
|
"Port module event error statistics");
|
||||||
|
if (pme_err_sysctl_node == NULL) {
|
||||||
|
err = -ENOMEM;
|
||||||
|
goto clean_sysctl_ctx;
|
||||||
|
}
|
||||||
|
SYSCTL_ADD_U64(&dev->sysctl_ctx,
|
||||||
|
SYSCTL_CHILDREN(pme_sysctl_node), OID_AUTO,
|
||||||
|
"module_plug", CTLFLAG_RD | CTLFLAG_MPSAFE,
|
||||||
|
&dev->priv.pme_stats.status_counters[MLX5_MODULE_STATUS_PLUGGED_ENABLED],
|
||||||
|
0, "Number of time module plugged");
|
||||||
|
SYSCTL_ADD_U64(&dev->sysctl_ctx,
|
||||||
|
SYSCTL_CHILDREN(pme_sysctl_node), OID_AUTO,
|
||||||
|
"module_unplug", CTLFLAG_RD | CTLFLAG_MPSAFE,
|
||||||
|
&dev->priv.pme_stats.status_counters[MLX5_MODULE_STATUS_UNPLUGGED],
|
||||||
|
0, "Number of time module unplugged");
|
||||||
|
for (i = 0 ; i < MLX5_MODULE_EVENT_ERROR_NUM; i++) {
|
||||||
|
SYSCTL_ADD_U64(&dev->sysctl_ctx,
|
||||||
|
SYSCTL_CHILDREN(pme_err_sysctl_node), OID_AUTO,
|
||||||
|
mlx5_pme_err_desc[2 * i], CTLFLAG_RD | CTLFLAG_MPSAFE,
|
||||||
|
&dev->priv.pme_stats.error_counters[i],
|
||||||
|
0, mlx5_pme_err_desc[2 * i + 1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
INIT_LIST_HEAD(&priv->ctx_list);
|
INIT_LIST_HEAD(&priv->ctx_list);
|
||||||
spin_lock_init(&priv->ctx_lock);
|
spin_lock_init(&priv->ctx_lock);
|
||||||
mutex_init(&dev->pci_status_mutex);
|
mutex_init(&dev->pci_status_mutex);
|
||||||
@ -1320,8 +1373,9 @@ static int init_one(struct pci_dev *pdev,
|
|||||||
close_pci:
|
close_pci:
|
||||||
mlx5_pci_close(dev, priv);
|
mlx5_pci_close(dev, priv);
|
||||||
clean_dev:
|
clean_dev:
|
||||||
sysctl_ctx_free(&dev->sysctl_ctx);
|
|
||||||
mtx_destroy(&dev->dump_lock);
|
mtx_destroy(&dev->dump_lock);
|
||||||
|
clean_sysctl_ctx:
|
||||||
|
sysctl_ctx_free(&dev->sysctl_ctx);
|
||||||
kfree(dev);
|
kfree(dev);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
@ -3389,8 +3389,7 @@ mlx5e_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
|
|||||||
}
|
}
|
||||||
/* Check if module is present before doing an access */
|
/* Check if module is present before doing an access */
|
||||||
module_status = mlx5_query_module_status(priv->mdev, module_num);
|
module_status = mlx5_query_module_status(priv->mdev, module_num);
|
||||||
if (module_status != MLX5_MODULE_STATUS_PLUGGED_ENABLED &&
|
if (module_status != MLX5_MODULE_STATUS_PLUGGED_ENABLED) {
|
||||||
module_status != MLX5_MODULE_STATUS_PLUGGED_DISABLED) {
|
|
||||||
error = EINVAL;
|
error = EINVAL;
|
||||||
goto err_i2c;
|
goto err_i2c;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user