Add support for hardware rate limiting to mlx5en(4).

The hardware rate limiting feature is enabled by the RATELIMIT kernel
option. Please refer to ifconfig(8) and the txrtlmt option and the
SO_MAX_PACING_RATE set socket option for more information. This
feature is compatible with hardware transmit send offload, TSO.

A set of sysctl(8) knobs under dev.mce.<N>.rate_limit are provided to
setup the ratelimit table and also to fine tune various rate limit
related parameters.

Sponsored by:	Mellanox Technologies
This commit is contained in:
Hans Petter Selasky 2018-05-29 14:04:57 +00:00
parent 9c7c97c0ff
commit 38535d6cab
14 changed files with 2070 additions and 9 deletions

View File

@ -24,18 +24,19 @@
.\"
.\" $FreeBSD$
.\"
.Dd December 3, 2015
.Dd May 29, 2018
.Dt MLX5EN 4
.Os
.Sh NAME
.Nm mlx5en
.Nd "Mellanox ConnectX-4 and ConnectX-4 LX based 100Gb, 50Gb, 40Gb, 25Gb and 10Gb Ethernet adapter driver"
.Nd "Mellanox ConnectX-4, ConnectX-4 LX and ConnectX-5 based 100Gb, 50Gb, 40Gb, 25Gb and 10Gb Ethernet adapter driver"
.Sh SYNOPSIS
To compile this driver into the kernel,
place the following lines in your
kernel configuration file:
.Bd -ragged -offset indent
.Cd "options COMPAT_LINUXKPI"
.Cd "options RATELIMIT"
.Cd "device mlx5"
.Cd "device mlx5en"
.Ed
@ -56,11 +57,12 @@ mlx5en_load="YES"
The
.Nm
driver provides support for PCI Express Ethernet adapters based on
ConnectX-4 and ConnectX-4 LX.
ConnectX-4, ConnectX-4 LX and ConnectX-5.
The driver supports Jumbo Frames, Transmit/Receive checksum offload,
TCP segmentation offload (TSO), Large Receive Offload (LRO),
HW Large Receive Offload (HW LRO), VLAN tag insertion/extraction,
VLAN checksum offload, VLAN TSO, and Receive Side Steering (RSS).
VLAN checksum offload, VLAN TSO, hardware rate limiting (TXRTLMT)
and Receive Side Steering (RSS).
.br
The network interface is named mce.
.br
@ -74,6 +76,7 @@ For more information on configuring this device, see
The
.Nm
driver supports 100Gb, 50Gb, 40Gb, 25Gb and 10Gb Ethernet adapters.
ConnectX-5 supports:10/20/25/40/50/56/100Gb/s speeds.
ConnectX-4 supports:10/20/25/40/50/56/100Gb/s speeds.
ConnectX-4 LX supports:10/25/40/50Gb/s speeds (and reduced power consumption) :
.Pp

View File

@ -4775,6 +4775,8 @@ dev/mlx5/mlx5_core/mlx5_port.c optional mlx5 pci \
compile-with "${OFED_C}"
dev/mlx5/mlx5_core/mlx5_qp.c optional mlx5 pci \
compile-with "${OFED_C}"
dev/mlx5/mlx5_core/mlx5_rl.c optional mlx5 pci \
compile-with "${OFED_C}"
dev/mlx5/mlx5_core/mlx5_srq.c optional mlx5 pci \
compile-with "${OFED_C}"
dev/mlx5/mlx5_core/mlx5_transobj.c optional mlx5 pci \
@ -4798,6 +4800,8 @@ dev/mlx5/mlx5_en/mlx5_en_flow_table.c optional mlx5en pci inet inet6 \
compile-with "${OFED_C}"
dev/mlx5/mlx5_en/mlx5_en_rx.c optional mlx5en pci inet inet6 \
compile-with "${OFED_C}"
dev/mlx5/mlx5_en/mlx5_en_rl.c optional mlx5en pci inet inet6 \
compile-with "${OFED_C}"
dev/mlx5/mlx5_en/mlx5_en_txrx.c optional mlx5en pci inet inet6 \
compile-with "${OFED_C}"

View File

@ -420,6 +420,7 @@ BOOTP_WIRED_TO opt_bootp.h
DEVICE_POLLING
DUMMYNET opt_ipdn.h
RATELIMIT opt_ratelimit.h
RATELIMIT_DEBUG opt_ratelimit.h
INET opt_inet.h
INET6 opt_inet6.h
IPDIVERT

View File

@ -28,6 +28,8 @@
#ifndef MLX5_DRIVER_H
#define MLX5_DRIVER_H
#include "opt_ratelimit.h"
#include <linux/kernel.h>
#include <linux/completion.h>
#include <linux/pci.h>
@ -500,7 +502,11 @@ struct mlx5_core_health {
struct delayed_work recover_work;
};
#ifdef RATELIMIT
#define MLX5_CQ_LINEAR_ARRAY_SIZE (128 * 1024)
#else
#define MLX5_CQ_LINEAR_ARRAY_SIZE 1024
#endif
struct mlx5_cq_linear_array_entry {
spinlock_t lock;
@ -540,6 +546,23 @@ struct mlx5_irq_info {
char name[MLX5_MAX_IRQ_NAME];
};
#ifdef RATELIMIT
struct mlx5_rl_entry {
u32 rate;
u16 burst;
u16 index;
u32 refcount;
};
struct mlx5_rl_table {
struct mutex rl_lock;
u16 max_size;
u32 max_rate;
u32 min_rate;
struct mlx5_rl_entry *rl_entry;
};
#endif
struct mlx5_priv {
char name[MLX5_MAX_NAME_LEN];
struct mlx5_eq_table eq_table;
@ -592,6 +615,9 @@ struct mlx5_priv {
struct list_head ctx_list;
spinlock_t ctx_lock;
unsigned long pci_dev_data;
#ifdef RATELIMIT
struct mlx5_rl_table rl_table;
#endif
};
enum mlx5_device_state {
@ -1084,5 +1110,17 @@ static inline int mlx5_core_is_pf(struct mlx5_core_dev *dev)
{
return !(dev->priv.pci_dev_data & MLX5_PCI_DEV_IS_VF);
}
#ifdef RATELIMIT
int mlx5_init_rl_table(struct mlx5_core_dev *dev);
void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev);
int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u32 burst, u16 *index);
void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate, u32 burst);
bool mlx5_rl_is_in_range(const struct mlx5_core_dev *dev, u32 rate, u32 burst);
static inline bool mlx5_rl_is_supported(struct mlx5_core_dev *dev)
{
return !!(dev->priv.rl_table.max_size);
}
#endif
#endif /* MLX5_DRIVER_H */

View File

@ -905,8 +905,23 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
mlx5_init_srq_table(dev);
mlx5_init_mr_table(dev);
#ifdef RATELIMIT
err = mlx5_init_rl_table(dev);
if (err) {
dev_err(&pdev->dev, "Failed to init rate limiting\n");
goto err_tables_cleanup;
}
#endif
return 0;
#ifdef RATELIMIT
err_tables_cleanup:
mlx5_cleanup_mr_table(dev);
mlx5_cleanup_srq_table(dev);
mlx5_cleanup_qp_table(dev);
mlx5_cleanup_cq_table(dev);
#endif
err_eq_cleanup:
mlx5_eq_cleanup(dev);
@ -916,6 +931,9 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
{
#ifdef RATELIMIT
mlx5_cleanup_rl_table(dev);
#endif
mlx5_cleanup_mr_table(dev);
mlx5_cleanup_srq_table(dev);
mlx5_cleanup_qp_table(dev);

View File

@ -0,0 +1,206 @@
/*-
* Copyright (c) 2013-2017, Mellanox Technologies, Ltd. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <dev/mlx5/driver.h>
#include "mlx5_core.h"
#ifdef RATELIMIT
/* Finds an entry where we can register the given rate
* If the rate already exists, return the entry where it is registered,
* otherwise return the first available entry.
* If the table is full, return NULL
*/
static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table,
u32 rate, u16 burst)
{
struct mlx5_rl_entry *ret_entry = NULL;
struct mlx5_rl_entry *entry;
u16 i;
for (i = 0; i < table->max_size; i++) {
entry = table->rl_entry + i;
if (entry->rate == rate && entry->burst == burst)
return entry;
if (ret_entry == NULL && entry->rate == 0)
ret_entry = entry;
}
return ret_entry;
}
static int mlx5_set_rate_limit_cmd(struct mlx5_core_dev *dev,
u32 rate, u32 burst, u16 index)
{
u32 in[MLX5_ST_SZ_DW(set_rate_limit_in)] = {0};
u32 out[MLX5_ST_SZ_DW(set_rate_limit_out)] = {0};
MLX5_SET(set_rate_limit_in, in, opcode,
MLX5_CMD_OP_SET_RATE_LIMIT);
MLX5_SET(set_rate_limit_in, in, rate_limit_index, index);
MLX5_SET(set_rate_limit_in, in, rate_limit, rate);
if (MLX5_CAP_QOS(dev, packet_pacing_burst_bound))
MLX5_SET(set_rate_limit_in, in, burst_upper_bound, burst);
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
bool mlx5_rl_is_in_range(const struct mlx5_core_dev *dev, u32 rate, u32 burst)
{
const struct mlx5_rl_table *table = &dev->priv.rl_table;
return (rate <= table->max_rate && rate >= table->min_rate &&
burst <= 65535);
}
EXPORT_SYMBOL(mlx5_rl_is_in_range);
int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u32 burst, u16 *index)
{
struct mlx5_rl_table *table = &dev->priv.rl_table;
struct mlx5_rl_entry *entry;
int err = 0;
mutex_lock(&table->rl_lock);
if (!rate || !mlx5_rl_is_in_range(dev, rate, burst)) {
mlx5_core_err(dev, "Invalid rate: %u, should be %u to %u\n",
rate, table->min_rate, table->max_rate);
err = -ERANGE;
goto out;
}
entry = find_rl_entry(table, rate, burst);
if (!entry) {
mlx5_core_err(dev, "Max number of %u rates reached\n",
table->max_size);
err = -ENOSPC;
goto out;
}
if (entry->refcount == 0xFFFFFFFFU) {
/* out of refcounts */
err = -ENOMEM;
goto out;
} else if (entry->refcount != 0) {
/* rate already configured */
entry->refcount++;
} else {
/* new rate limit */
err = mlx5_set_rate_limit_cmd(dev, rate, burst, entry->index);
if (err) {
mlx5_core_err(dev, "Failed configuring rate: %u (%d)\n",
rate, err);
goto out;
}
entry->rate = rate;
entry->burst = burst;
entry->refcount = 1;
}
*index = entry->index;
out:
mutex_unlock(&table->rl_lock);
return err;
}
EXPORT_SYMBOL(mlx5_rl_add_rate);
void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate, u32 burst)
{
struct mlx5_rl_table *table = &dev->priv.rl_table;
struct mlx5_rl_entry *entry = NULL;
/* 0 is a reserved value for unlimited rate */
if (rate == 0)
return;
mutex_lock(&table->rl_lock);
entry = find_rl_entry(table, rate, burst);
if (!entry || !entry->refcount) {
mlx5_core_warn(dev, "Rate %u is not configured\n", rate);
goto out;
}
entry->refcount--;
if (!entry->refcount) {
/* need to remove rate */
mlx5_set_rate_limit_cmd(dev, 0, 0, entry->index);
entry->rate = 0;
entry->burst = 0;
}
out:
mutex_unlock(&table->rl_lock);
}
EXPORT_SYMBOL(mlx5_rl_remove_rate);
int mlx5_init_rl_table(struct mlx5_core_dev *dev)
{
struct mlx5_rl_table *table = &dev->priv.rl_table;
int i;
mutex_init(&table->rl_lock);
if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, packet_pacing)) {
table->max_size = 0;
return 0;
}
/* First entry is reserved for unlimited rate */
table->max_size = MLX5_CAP_QOS(dev, packet_pacing_rate_table_size) - 1;
table->max_rate = MLX5_CAP_QOS(dev, packet_pacing_max_rate);
table->min_rate = MLX5_CAP_QOS(dev, packet_pacing_min_rate);
table->rl_entry = kcalloc(table->max_size, sizeof(struct mlx5_rl_entry),
GFP_KERNEL);
if (!table->rl_entry)
return -ENOMEM;
/* The index represents the index in HW rate limit table
* Index 0 is reserved for unlimited rate
*/
for (i = 0; i < table->max_size; i++)
table->rl_entry[i].index = i + 1;
return 0;
}
void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev)
{
struct mlx5_rl_table *table = &dev->priv.rl_table;
int i;
/* Clear all configured rates */
for (i = 0; i < table->max_size; i++)
if (table->rl_entry[i].rate)
mlx5_set_rate_limit_cmd(dev, 0, 0,
table->rl_entry[i].index);
kfree(dev->priv.rl_table.rl_entry);
}
#endif

View File

@ -49,6 +49,7 @@
#include <netinet/udp.h>
#include <net/ethernet.h>
#include <sys/buf_ring.h>
#include <sys/kthread.h>
#include "opt_rss.h"
@ -711,6 +712,10 @@ struct mlx5e_flow_tables {
struct mlx5e_flow_table inner_rss;
};
#ifdef RATELIMIT
#include "en_rl.h"
#endif
#define MLX5E_TSTMP_PREC 10
struct mlx5e_clbr_point {
@ -778,6 +783,9 @@ struct mlx5e_priv {
int media_active_last;
struct callout watchdog;
#ifdef RATELIMIT
struct mlx5e_rl_priv_data rl;
#endif
struct callout tstmp_clbr;
int clbr_done;

View File

@ -0,0 +1,174 @@
/*-
* Copyright (c) 2016 Mellanox Technologies. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef __MLX5_EN_RL_H__
#define __MLX5_EN_RL_H__
#include <sys/param.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/sx.h>
#include <sys/proc.h>
#include <sys/condvar.h>
#include <sys/interrupt.h>
#include <sys/unistd.h>
#include <sys/queue.h>
#define MLX5E_RL_MAX_WORKERS 128 /* limited by Toeplitz hash */
#define MLX5E_RL_MAX_TX_RATES (64 * 1024) /* software limit */
#define MLX5E_RL_DEF_SQ_PER_WORKER (12 * 1024) /* software limit */
#define MLX5E_RL_MAX_SQS (120 * 1024) /* software limit */
#define MLX5E_RL_TX_COAL_USEC_DEFAULT 32
#define MLX5E_RL_TX_COAL_PKTS_DEFAULT 4
#define MLX5E_RL_TX_COAL_MODE_DEFAULT 0
#define MLX5E_RL_TX_COMP_FACT_DEFAULT 1
#define MLX5E_RL_WORKER_LOCK(rlw) mtx_lock(&(rlw)->mtx)
#define MLX5E_RL_WORKER_UNLOCK(rlw) mtx_unlock(&(rlw)->mtx)
#define MLX5E_RL_RLOCK(rl) sx_slock(&(rl)->rl_sxlock)
#define MLX5E_RL_RUNLOCK(rl) sx_sunlock(&(rl)->rl_sxlock)
#define MLX5E_RL_WLOCK(rl) sx_xlock(&(rl)->rl_sxlock)
#define MLX5E_RL_WUNLOCK(rl) sx_xunlock(&(rl)->rl_sxlock)
#define MLX5E_RL_PARAMS(m) \
m(+1, u64 tx_queue_size, "tx_queue_size", "Default send queue size") \
m(+1, u64 tx_coalesce_usecs, "tx_coalesce_usecs", "Limit in usec for joining TX packets") \
m(+1, u64 tx_coalesce_pkts, "tx_coalesce_pkts", "Maximum number of TX packets to join") \
m(+1, u64 tx_coalesce_mode, "tx_coalesce_mode", "0: EQE mode 1: CQE mode") \
m(+1, u64 tx_completion_fact, "tx_completion_fact", "1..MAX: Completion event ratio") \
m(+1, u64 tx_completion_fact_max, "tx_completion_fact_max", "Maximum completion event ratio") \
m(+1, u64 tx_worker_threads_max, "tx_worker_threads_max", "Max number of TX worker threads") \
m(+1, u64 tx_worker_threads_def, "tx_worker_threads_def", "Default number of TX worker threads") \
m(+1, u64 tx_channels_per_worker_max, "tx_channels_per_worker_max", "Max number of TX channels per worker") \
m(+1, u64 tx_channels_per_worker_def, "tx_channels_per_worker_def", "Default number of TX channels per worker") \
m(+1, u64 tx_rates_max, "tx_rates_max", "Max number of TX rates") \
m(+1, u64 tx_rates_def, "tx_rates_def", "Default number of TX rates") \
m(+1, u64 tx_limit_min, "tx_limit_min", "Minimum TX rate in bits/s") \
m(+1, u64 tx_limit_max, "tx_limit_max", "Maximum TX rate in bits/s") \
m(+1, u64 tx_burst_size, "tx_burst_size", "Current burst size in number of packets. A value of zero means use firmware default.") \
m(+1, u64 tx_burst_size_max, "tx_burst_size_max", "Maximum burst size in number of packets") \
m(+1, u64 tx_burst_size_min, "tx_burst_size_min", "Minimum burst size in number of packets")
#define MLX5E_RL_PARAMS_NUM (0 MLX5E_RL_PARAMS(MLX5E_STATS_COUNT))
#define MLX5E_RL_STATS(m) \
m(+1, u64 tx_allocate_resource_failure, "tx_allocate_resource_failure", "Number of times firmware resource allocation failed") \
m(+1, u64 tx_add_new_rate_failure, "tx_add_new_rate_failure", "Number of times adding a new firmware rate failed") \
m(+1, u64 tx_modify_rate_failure, "tx_modify_rate_failure", "Number of times modifying a firmware rate failed") \
m(+1, u64 tx_active_connections, "tx_active_connections", "Number of active connections") \
m(+1, u64 tx_open_queues, "tx_open_queues", "Number of open TX queues") \
m(+1, u64 tx_available_resource_failure, "tx_available_resource_failure", "Number of times TX resources were not available")
#define MLX5E_RL_STATS_NUM (0 MLX5E_RL_STATS(MLX5E_STATS_COUNT))
#define MLX5E_RL_TABLE_PARAMS(m) \
m(+1, u64 tx_limit_add, "tx_limit_add", "Add TX rate limit in bits/s to empty slot") \
m(+1, u64 tx_limit_clr, "tx_limit_clr", "Clear all TX rates in table") \
m(+1, u64 tx_allowed_deviation, "tx_allowed_deviation", "Relative rate deviation allowed in 1/1000") \
m(+1, u64 tx_allowed_deviation_min, "tx_allowed_deviation_min", "Minimum allowed rate deviation in 1/1000") \
m(+1, u64 tx_allowed_deviation_max, "tx_allowed_deviation_max", "Maximum allowed rate deviation in 1/1000")
#define MLX5E_RL_TABLE_PARAMS_NUM (0 MLX5E_RL_TABLE_PARAMS(MLX5E_STATS_COUNT))
#define MLX5E_RL_PARAMS_INDEX(n) \
(__offsetof(struct mlx5e_rl_params, n) / sizeof(uint64_t))
struct mlx5e_priv;
/* Indicates channel's state */
enum {
MLX5E_RL_ST_FREE,
MLX5E_RL_ST_USED,
MLX5E_RL_ST_MODIFY,
MLX5E_RL_ST_DESTROY,
};
struct mlx5e_rl_stats {
u64 arg [0];
MLX5E_RL_STATS(MLX5E_STATS_VAR)
};
struct mlx5e_rl_params {
u64 arg [0];
MLX5E_RL_PARAMS(MLX5E_STATS_VAR)
u64 table_arg [0];
MLX5E_RL_TABLE_PARAMS(MLX5E_STATS_VAR)
};
struct mlx5e_rl_channel_param {
struct mlx5e_sq_param sq;
struct mlx5e_cq_param cq;
};
struct mlx5e_rl_channel {
struct m_snd_tag m_snd_tag;
STAILQ_ENTRY(mlx5e_rl_channel) entry;
struct mlx5e_sq * volatile sq;
struct mlx5e_rl_worker *worker;
uint64_t new_rate;
uint64_t init_rate;
uint64_t last_rate;
uint16_t last_burst;
uint16_t state;
};
struct mlx5e_rl_worker {
struct mtx mtx;
struct cv cv;
STAILQ_HEAD(, mlx5e_rl_channel) index_list_head;
STAILQ_HEAD(, mlx5e_rl_channel) process_head;
struct mlx5e_priv *priv;
struct mlx5e_rl_channel *channels;
unsigned worker_done;
};
struct mlx5e_rl_priv_data {
struct sx rl_sxlock;
struct sysctl_ctx_list ctx;
struct mlx5e_rl_channel_param chan_param;
struct mlx5e_rl_params param;
struct mlx5e_rl_stats stats;
struct mlx5_uar sq_uar;
struct mlx5e_rl_worker *workers;
struct mlx5e_priv *priv;
uint64_t *rate_limit_table;
unsigned opened;
uint32_t tisn;
};
int mlx5e_rl_init(struct mlx5e_priv *priv);
void mlx5e_rl_cleanup(struct mlx5e_priv *priv);
if_snd_tag_alloc_t mlx5e_rl_snd_tag_alloc;
if_snd_tag_modify_t mlx5e_rl_snd_tag_modify;
if_snd_tag_query_t mlx5e_rl_snd_tag_query;
if_snd_tag_free_t mlx5e_rl_snd_tag_free;
#endif /* __MLX5_EN_RL_H__ */

View File

@ -3507,6 +3507,13 @@ mlx5e_create_ifp(struct mlx5_core_dev *mdev)
ifp->if_capabilities |= IFCAP_LRO;
ifp->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO;
ifp->if_capabilities |= IFCAP_HWSTATS | IFCAP_HWRXTSTMP;
#ifdef RATELIMIT
ifp->if_capabilities |= IFCAP_TXRTLMT;
ifp->if_snd_tag_alloc = mlx5e_rl_snd_tag_alloc;
ifp->if_snd_tag_free = mlx5e_rl_snd_tag_free;
ifp->if_snd_tag_modify = mlx5e_rl_snd_tag_modify;
ifp->if_snd_tag_query = mlx5e_rl_snd_tag_query;
#endif
/* set TSO limits so that we don't have to drop TX packets */
ifp->if_hw_tsomax = MLX5E_MAX_TX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
@ -3588,6 +3595,14 @@ mlx5e_create_ifp(struct mlx5_core_dev *mdev)
random_ether_addr(dev_addr);
if_printf(ifp, "Assigned random MAC address\n");
}
#ifdef RATELIMIT
err = mlx5e_rl_init(priv);
if (err) {
if_printf(ifp, "%s: mlx5e_rl_init failed, %d\n",
__func__, err);
goto err_create_mkey;
}
#endif
/* set default MTU */
mlx5e_set_dev_port_mtu(ifp, ifp->if_mtu);
@ -3673,6 +3688,10 @@ mlx5e_create_ifp(struct mlx5_core_dev *mdev)
return (priv);
#ifdef RATELIMIT
err_create_mkey:
mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
#endif
err_dealloc_transport_domain:
mlx5_dealloc_transport_domain(mdev, priv->tdn);
@ -3715,6 +3734,18 @@ mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv)
/* XXX wait a bit to allow IOCTL handlers to complete */
pause("W", hz);
#ifdef RATELIMIT
/*
* The kernel can have reference(s) via the m_snd_tag's into
* the ratelimit channels, and these must go away before
* detaching:
*/
while (READ_ONCE(priv->rl.stats.tx_active_connections) != 0) {
if_printf(priv->ifp, "Waiting for all ratelimit connections "
"to terminate\n");
pause("W", hz);
}
#endif
/* stop watchdog timer */
callout_drain(&priv->watchdog);
@ -3735,6 +3766,9 @@ mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv)
ether_ifdetach(ifp);
if_free(ifp);
#ifdef RATELIMIT
mlx5e_rl_cleanup(priv);
#endif
/* destroy all remaining sysctl nodes */
if (priv->sysctl_debug)
sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);

File diff suppressed because it is too large Load Diff

View File

@ -103,6 +103,25 @@ mlx5e_select_queue(struct ifnet *ifp, struct mbuf *mb)
ch = priv->params.num_channels;
#ifdef RATELIMIT
if (mb->m_pkthdr.snd_tag != NULL) {
struct mlx5e_sq *sq;
/* check for route change */
if (mb->m_pkthdr.snd_tag->ifp != ifp)
return (NULL);
/* get pointer to sendqueue */
sq = container_of(mb->m_pkthdr.snd_tag,
struct mlx5e_rl_channel, m_snd_tag)->sq;
/* check if valid */
if (sq != NULL && sq->stopped == 0)
return (sq);
/* FALLTHROUGH */
}
#endif
/* check if flowid is set */
if (M_HASHTYPE_GET(mb) != M_HASHTYPE_NONE) {
#ifdef RSS
@ -540,8 +559,24 @@ mlx5e_xmit(struct ifnet *ifp, struct mbuf *mb)
sq = mlx5e_select_queue(ifp, mb);
if (unlikely(sq == NULL)) {
/* Invalid send queue */
#ifdef RATELIMIT
/* Check for route change */
if (mb->m_pkthdr.snd_tag != NULL &&
mb->m_pkthdr.snd_tag->ifp != ifp) {
/* Free mbuf */
m_freem(mb);
/*
* Tell upper layers about route change and to
* re-transmit this packet:
*/
return (EAGAIN);
}
#endif
/* Free mbuf */
m_freem(mb);
/* Invalid send queue */
return (ENXIO);
}

View File

@ -23,6 +23,7 @@ mlx5_pagealloc.c \
mlx5_pd.c \
mlx5_port.c \
mlx5_qp.c \
mlx5_rl.c \
mlx5_srq.c \
mlx5_transobj.c \
mlx5_uar.c \
@ -30,7 +31,7 @@ mlx5_vport.c \
mlx5_vsc.c \
mlx5_wq.c \
device_if.h bus_if.h vnode_if.h pci_if.h \
opt_inet.h opt_inet6.h opt_rss.h
opt_inet.h opt_inet6.h opt_rss.h opt_ratelimit.h
CFLAGS+= -I${SRCTOP}/sys/ofed/include
CFLAGS+= -I${SRCTOP}/sys/compat/linuxkpi/common/include

View File

@ -8,9 +8,10 @@ mlx5_en_main.c \
mlx5_en_tx.c \
mlx5_en_flow_table.c \
mlx5_en_rx.c \
mlx5_en_rl.c \
mlx5_en_txrx.c \
device_if.h bus_if.h vnode_if.h pci_if.h \
opt_inet.h opt_inet6.h opt_rss.h
opt_inet.h opt_inet6.h opt_rss.h opt_ratelimit.h
.if defined(HAVE_PER_CQ_EVENT_PACKET)
CFLAGS+= -DHAVE_PER_CQ_EVENT_PACKET

View File

@ -16,13 +16,12 @@ mlx5_ib_qp.c \
mlx5_ib_srq.c \
mlx5_ib_virt.c \
device_if.h bus_if.h vnode_if.h pci_if.h \
opt_inet.h opt_inet6.h
opt_inet.h opt_inet6.h opt_ratelimit.h
CFLAGS+= -I${SRCTOP}/sys/ofed/include
CFLAGS+= -I${SRCTOP}/sys/ofed/include/uapi
CFLAGS+= -I${SRCTOP}/sys/compat/linuxkpi/common/include
CFLAGS+= -DCONFIG_INFINIBAND_USER_MEM
CFLAGS+= -DINET -DINET6
.include <bsd.kmod.mk>