From 45fe161a52804a83f52fd8e9bdb6369e535ef161 Mon Sep 17 00:00:00 2001 From: hselasky Date: Wed, 7 Mar 2018 15:17:36 +0000 Subject: [PATCH] Implement rate limit per traffic class in mlx5core. Add support for rate limiting traffic class via sysctl. Submitted by: Slava Shwartsman MFC after: 1 week Sponsored by: Mellanox Technologies --- sys/dev/mlx5/mlx5_core/mlx5_port.c | 83 ++++++++++++++++++ sys/dev/mlx5/mlx5_en/en.h | 26 ++++++ sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c | 116 ++++++++++++++++++++++++- sys/dev/mlx5/mlx5_ifc.h | 9 ++ sys/dev/mlx5/port.h | 7 ++ 5 files changed, 240 insertions(+), 1 deletion(-) diff --git a/sys/dev/mlx5/mlx5_core/mlx5_port.c b/sys/dev/mlx5/mlx5_core/mlx5_port.c index 597f3a9210fd..4a9243617ef3 100644 --- a/sys/dev/mlx5/mlx5_core/mlx5_port.c +++ b/sys/dev/mlx5/mlx5_core/mlx5_port.c @@ -860,6 +860,89 @@ int mlx5_query_port_cong_params(struct mlx5_core_dev *mdev, int protocol, out, out_size); } +static int mlx5_query_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *out, + int outlen) +{ + u32 in[MLX5_ST_SZ_DW(qtct_reg)]; + + if (!MLX5_CAP_GEN(mdev, ets)) + return -ENOTSUPP; + + memset(in, 0, sizeof(in)); + return mlx5_core_access_reg(mdev, in, sizeof(in), out, outlen, + MLX5_REG_QETCR, 0, 0); +} + +int mlx5_max_tc(struct mlx5_core_dev *mdev) +{ + u8 num_tc = MLX5_CAP_GEN(mdev, max_tc) ? : 8; + + return num_tc - 1; +} +EXPORT_SYMBOL_GPL(mlx5_max_tc); + +static int mlx5_set_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *in, + int inlen) +{ + u32 out[MLX5_ST_SZ_DW(qtct_reg)]; + + if (!MLX5_CAP_GEN(mdev, ets)) + return -ENOTSUPP; + + return mlx5_core_access_reg(mdev, in, inlen, out, sizeof(out), + MLX5_REG_QETCR, 0, 1); +} + +int mlx5_query_port_tc_rate_limit(struct mlx5_core_dev *mdev, + u8 *max_bw_value, + u8 *max_bw_units) +{ + u32 out[MLX5_ST_SZ_DW(qetc_reg)]; + void *ets_tcn_conf; + int err; + int i; + + err = mlx5_query_port_qetcr_reg(mdev, out, sizeof(out)); + if (err) + return err; + + for (i = 0; i <= mlx5_max_tc(mdev); i++) { + ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, out, tc_configuration[i]); + + max_bw_value[i] = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf, + max_bw_value); + max_bw_units[i] = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf, + max_bw_units); + } + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_tc_rate_limit); + +int mlx5_modify_port_tc_rate_limit(struct mlx5_core_dev *mdev, + const u8 *max_bw_value, + const u8 *max_bw_units) +{ + u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {}; + void *ets_tcn_conf; + int i; + + MLX5_SET(qetc_reg, in, port_number, 1); + + for (i = 0; i <= mlx5_max_tc(mdev); i++) { + ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, in, tc_configuration[i]); + + MLX5_SET(ets_tcn_config_reg, ets_tcn_conf, r, 1); + MLX5_SET(ets_tcn_config_reg, ets_tcn_conf, max_bw_units, + max_bw_units[i]); + MLX5_SET(ets_tcn_config_reg, ets_tcn_conf, max_bw_value, + max_bw_value[i]); + } + + return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in)); +} +EXPORT_SYMBOL_GPL(mlx5_modify_port_tc_rate_limit); + int mlx5_modify_port_cong_params(struct mlx5_core_dev *mdev, void *in, int in_size) { diff --git a/sys/dev/mlx5/mlx5_en/en.h b/sys/dev/mlx5/mlx5_en/en.h index 908ea3354361..fcb07dcc08e6 100644 --- a/sys/dev/mlx5/mlx5_en/en.h +++ b/sys/dev/mlx5/mlx5_en/en.h @@ -70,6 +70,8 @@ #include #include +#define IEEE_8021QAZ_MAX_TCS 8 + #define MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE 0x7 #define MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE 0xa #define MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE 0xe @@ -114,6 +116,9 @@ (MLX5E_MAX_TX_HEADER - sizeof(struct mlx5e_tx_wqe) + \ sizeof(((struct mlx5e_tx_wqe *)0)->eth.inline_hdr_start)) /* bytes */ +#define MLX5E_100MB (100000) +#define MLX5E_1GB (1000000) + MALLOC_DECLARE(M_MLX5EN); struct mlx5_core_dev; @@ -417,11 +422,13 @@ struct mlx5e_params { m(+1, u64 mc_local_lb, "mc_local_lb", "0: Local multicast loopback enabled 1: Disabled") \ m(+1, u64 uc_local_lb, "uc_local_lb", "0: Local unicast loopback enabled 1: Disabled") + #define MLX5E_PARAMS_NUM (0 MLX5E_PARAMS(MLX5E_STATS_COUNT)) struct mlx5e_params_ethtool { u64 arg [0]; MLX5E_PARAMS(MLX5E_STATS_VAR) + u64 max_bw_value[IEEE_8021QAZ_MAX_TCS]; }; /* EEPROM Standards for plug in modules */ @@ -629,6 +636,12 @@ enum { MLX5E_STATE_OPENED, }; +enum { + MLX5_BW_NO_LIMIT = 0, + MLX5_100_MBPS_UNIT = 3, + MLX5_GBPS_UNIT = 4, +}; + struct mlx5e_vlan_db { unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; struct mlx5_flow_rule *active_vlans_ft_rule[VLAN_N_VID]; @@ -748,6 +761,19 @@ struct mlx5e_eeprom { u32 *data; }; +/* + * This structure contains rate limit extension to the IEEE 802.1Qaz ETS + * managed object. + * Values are 64 bits long and specified in Kbps to enable usage over both + * slow and very fast networks. + * + * @tc_maxrate: maximal tc tx bandwidth indexed by traffic class + */ +struct ieee_maxrate { + __u64 tc_maxrate[IEEE_8021QAZ_MAX_TCS]; +}; + + #define MLX5E_FLD_MAX(typ, fld) ((1ULL << __mlx5_bit_sz(typ, fld)) - 1ULL) int mlx5e_xmit(struct ifnet *, struct mbuf *); diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c b/sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c index 2770d1674cdc..1cfef3954b4f 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c @@ -84,6 +84,97 @@ mlx5e_ethtool_sync_tx_completion_fact(struct mlx5e_priv *priv) priv->params_ethtool.tx_completion_fact = max; } +static int +mlx5e_getmaxrate(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u8 max_bw_unit[IEEE_8021QAZ_MAX_TCS]; + u8 max_bw_value[IEEE_8021QAZ_MAX_TCS]; + int err; + int i; + + PRIV_LOCK(priv); + err = -mlx5_query_port_tc_rate_limit(mdev, max_bw_value, max_bw_unit); + if (err) + goto done; + + for (i = 0; i <= mlx5_max_tc(mdev); i++) { + switch (max_bw_unit[i]) { + case MLX5_100_MBPS_UNIT: + priv->params_ethtool.max_bw_value[i] = max_bw_value[i] * MLX5E_100MB; + break; + case MLX5_GBPS_UNIT: + priv->params_ethtool.max_bw_value[i] = max_bw_value[i] * MLX5E_1GB; + break; + case MLX5_BW_NO_LIMIT: + priv->params_ethtool.max_bw_value[i] = 0; + break; + default: + priv->params_ethtool.max_bw_value[i] = -1; + WARN_ONCE(true, "non-supported BW unit"); + break; + } + } +done: + PRIV_UNLOCK(priv); + return (err); +} + +static int +mlx5e_tc_maxrate_handler(SYSCTL_HANDLER_ARGS) +{ + struct mlx5e_priv *priv = arg1; + int prio_index = arg2; + struct mlx5_core_dev *mdev = priv->mdev; + u8 max_bw_unit[IEEE_8021QAZ_MAX_TCS]; + u8 max_bw_value[IEEE_8021QAZ_MAX_TCS]; + int i, err; + u64 bw_val; + u64 result = priv->params_ethtool.max_bw_value[prio_index]; + const u64 upper_limit_mbps = 255 * MLX5E_100MB; + const u64 upper_limit_gbps = 255 * MLX5E_1GB; + + PRIV_LOCK(priv); + err = sysctl_handle_64(oidp, &result, 0, req); + if (err || !req->newptr || + result == priv->params_ethtool.max_bw_value[prio_index]) + goto done; + + if (result % MLX5E_100MB) { + err = ERANGE; + goto done; + } + + memset(max_bw_value, 0, sizeof(max_bw_value)); + memset(max_bw_unit, 0, sizeof(max_bw_unit)); + + for (i = 0; i <= mlx5_max_tc(mdev); i++) { + bw_val = (i == prio_index) ? result : priv->params_ethtool.max_bw_value[i]; + + if (!bw_val) { + max_bw_unit[i] = MLX5_BW_NO_LIMIT; + } else if (bw_val > upper_limit_gbps) { + result = 0; + max_bw_unit[i] = MLX5_BW_NO_LIMIT; + } else if (bw_val <= upper_limit_mbps) { + max_bw_value[i] = howmany(bw_val, MLX5E_100MB); + max_bw_unit[i] = MLX5_100_MBPS_UNIT; + } else { + max_bw_value[i] = howmany(bw_val, MLX5E_1GB); + max_bw_unit[i] = MLX5_GBPS_UNIT; + } + } + + err = -mlx5_modify_port_tc_rate_limit(mdev, max_bw_value, max_bw_unit); + if (err) + goto done; + + priv->params_ethtool.max_bw_value[prio_index] = result; +done: + PRIV_UNLOCK(priv); + return (err); +} + #define MLX5_PARAM_OFFSET(n) \ __offsetof(struct mlx5e_priv, params_ethtool.n) @@ -734,9 +825,11 @@ mlx5e_create_diagnostics(struct mlx5e_priv *priv) void mlx5e_create_ethtool(struct mlx5e_priv *priv) { - struct sysctl_oid *node; + struct mlx5_core_dev *mdev = priv->mdev; + struct sysctl_oid *node, *qos_node; const char *pnameunit; unsigned x; + int i; /* set some defaults */ priv->params_ethtool.tx_queue_size_max = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE; @@ -830,4 +923,25 @@ mlx5e_create_ethtool(struct mlx5e_priv *priv) /* Diagnostics support */ mlx5e_create_diagnostics(priv); + + /* create qos node */ + qos_node = SYSCTL_ADD_NODE(&priv->sysctl_ctx, + SYSCTL_CHILDREN(node), OID_AUTO, + "qos", CTLFLAG_RW, NULL, "Quality Of Service configuration"); + if (node == NULL) + return; + + /* Prioriry rate limit support */ + if (mlx5e_getmaxrate(priv)) + return; + + for (i = 0; i <= mlx5_max_tc(mdev); i++) { + char name[32]; + snprintf(name, sizeof(name), "tc_%d_max_rate", i); + SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(qos_node), + OID_AUTO, name, CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, + priv, i, mlx5e_tc_maxrate_handler, "QU", + "Max rate for priority, specified in kilobits, where kilo=1000, \ + max_rate must be divisible by 100000"); + } } diff --git a/sys/dev/mlx5/mlx5_ifc.h b/sys/dev/mlx5/mlx5_ifc.h index 267a79a22f57..25d397627083 100644 --- a/sys/dev/mlx5/mlx5_ifc.h +++ b/sys/dev/mlx5/mlx5_ifc.h @@ -9096,6 +9096,15 @@ struct mlx5_ifc_ets_global_config_reg_bits { u8 max_bw_value[0x8]; }; +struct mlx5_ifc_qetc_reg_bits { + u8 reserved_at_0[0x8]; + u8 port_number[0x8]; + u8 reserved_at_10[0x30]; + + struct mlx5_ifc_ets_tcn_config_reg_bits tc_configuration[0x8]; + struct mlx5_ifc_ets_global_config_reg_bits global_configuration; +}; + struct mlx5_ifc_nodnic_mac_filters_bits { struct mlx5_ifc_mac_address_layout_bits mac_filter0; diff --git a/sys/dev/mlx5/port.h b/sys/dev/mlx5/port.h index 15aee3ed3fc7..51419ca9dd9b 100644 --- a/sys/dev/mlx5/port.h +++ b/sys/dev/mlx5/port.h @@ -144,4 +144,11 @@ int mlx5_query_eeprom(struct mlx5_core_dev *dev, int i2c_addr, int page_num, int device_addr, int size, int module_num, u32 *data, int *size_read); +int mlx5_max_tc(struct mlx5_core_dev *mdev); +int mlx5_query_port_tc_rate_limit(struct mlx5_core_dev *mdev, + u8 *max_bw_value, + u8 *max_bw_units); +int mlx5_modify_port_tc_rate_limit(struct mlx5_core_dev *mdev, + const u8 *max_bw_value, + const u8 *max_bw_units); #endif /* __MLX5_PORT_H__ */