Improve transmit sending offload, TSO, algorithm in general.
The current TSO limitation feature only takes the total number of bytes in an mbuf chain into account and does not limit by the number of mbufs in a chain. Some kinds of hardware is limited by two factors. One is the fragment length and the second is the fragment count. Both of these limits need to be taken into account when doing TSO. Else some kinds of hardware might have to drop completely valid mbuf chains because they cannot loaded into the given hardware's DMA engine. The new way of doing TSO limitation has been made backwards compatible as input from other FreeBSD developers and will use defaults for values not set. MFC after: 1 week Sponsored by: Mellanox Technologies
This commit is contained in:
parent
d38ee29868
commit
3d04a989df
@ -1731,7 +1731,10 @@ oce_attach_ifp(POCE_SOFTC sc)
|
||||
sc->ifp->if_baudrate = IF_Gbps(10);
|
||||
|
||||
#if __FreeBSD_version >= 1000000
|
||||
sc->ifp->if_hw_tsomax = OCE_MAX_TSO_SIZE;
|
||||
sc->ifp->if_hw_tsomax = IF_HW_TSOMAX_BUILD_VALUE(
|
||||
65535 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN) /* bytes */,
|
||||
OCE_MAX_TX_ELEMENTS /* maximum frag count */,
|
||||
12 /* 4K frag size */);
|
||||
#endif
|
||||
|
||||
ether_ifattach(sc->ifp, sc->macaddr.mac_addr);
|
||||
|
@ -152,7 +152,6 @@ extern int mp_ncpus; /* system's total active cpu cores */
|
||||
#define OCE_MAX_TX_ELEMENTS 29
|
||||
#define OCE_MAX_TX_DESC 1024
|
||||
#define OCE_MAX_TX_SIZE 65535
|
||||
#define OCE_MAX_TSO_SIZE (65535 - ETHER_HDR_LEN)
|
||||
#define OCE_MAX_RX_SIZE 4096
|
||||
#define OCE_MAX_RQ_POSTS 255
|
||||
#define OCE_DEFAULT_PROMISCUOUS 0
|
||||
|
@ -1722,7 +1722,11 @@ vmxnet3_setup_interface(struct vmxnet3_softc *sc)
|
||||
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
|
||||
ifp->if_init = vmxnet3_init;
|
||||
ifp->if_ioctl = vmxnet3_ioctl;
|
||||
ifp->if_hw_tsomax = VMXNET3_TSO_MAXSIZE;
|
||||
|
||||
ifp->if_hw_tsomax = IF_HW_TSOMAX_BUILD_VALUE(
|
||||
65535 - sizeof(struct ether_vlan_header) /* bytes */,
|
||||
VMXNET3_TX_MAXSEGS /* maximum frag count */,
|
||||
VMXNET3_TX_MAXSEGSHIFT /* frag size */);
|
||||
|
||||
#ifdef VMXNET3_LEGACY_TX
|
||||
ifp->if_start = vmxnet3_start;
|
||||
|
@ -277,14 +277,13 @@ struct vmxnet3_softc {
|
||||
*/
|
||||
#define VMXNET3_TX_MAXSEGS 32
|
||||
#define VMXNET3_TX_MAXSIZE (VMXNET3_TX_MAXSEGS * MCLBYTES)
|
||||
#define VMXNET3_TSO_MAXSIZE \
|
||||
(VMXNET3_TX_MAXSIZE - sizeof(struct ether_vlan_header))
|
||||
|
||||
/*
|
||||
* Maximum support Tx segments size. The length field in the
|
||||
* Tx descriptor is 14 bits.
|
||||
*/
|
||||
#define VMXNET3_TX_MAXSEGSIZE (1 << 14)
|
||||
#define VMXNET3_TX_MAXSEGSHIFT 14
|
||||
#define VMXNET3_TX_MAXSEGSIZE (1 << VMXNET3_TX_MAXSEGSHIFT)
|
||||
|
||||
/*
|
||||
* The maximum number of Rx segments we accept. When LRO is enabled,
|
||||
|
@ -134,7 +134,6 @@ static const int MODPARM_rx_flip = 0;
|
||||
* to mirror the Linux MAX_SKB_FRAGS constant.
|
||||
*/
|
||||
#define MAX_TX_REQ_FRAGS (65536 / PAGE_SIZE + 2)
|
||||
#define NF_TSO_MAXBURST ((IP_MAXPACKET / PAGE_SIZE) * MCLBYTES)
|
||||
|
||||
#define RX_COPY_THRESHOLD 256
|
||||
|
||||
@ -2102,7 +2101,10 @@ create_netdev(device_t dev)
|
||||
|
||||
ifp->if_hwassist = XN_CSUM_FEATURES;
|
||||
ifp->if_capabilities = IFCAP_HWCSUM;
|
||||
ifp->if_hw_tsomax = NF_TSO_MAXBURST;
|
||||
ifp->if_hw_tsomax = IF_HW_TSOMAX_BUILD_VALUE(
|
||||
65535 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN) /* bytes */,
|
||||
MAX_TX_REQ_FRAGS /* maximum frag count */,
|
||||
PAGE_SHIFT /* PAGE_SIZE frag size */);
|
||||
|
||||
ether_ifattach(ifp, np->mac);
|
||||
callout_init(&np->xn_stat_ch, CALLOUT_MPSAFE);
|
||||
|
60
sys/net/if.c
60
sys/net/if.c
@ -422,6 +422,52 @@ if_grow(void)
|
||||
V_ifindex_table = e;
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute the least common value of two "if_hw_tsomax" values:
|
||||
*/
|
||||
u_int
|
||||
if_hw_tsomax_common(u_int a, u_int b)
|
||||
{
|
||||
u_int a_bytes = IF_HW_TSOMAX_GET_BYTES(a);
|
||||
u_int a_frag_count = IF_HW_TSOMAX_GET_FRAG_COUNT(a);
|
||||
u_int a_frag_size = IF_HW_TSOMAX_GET_FRAG_SIZE(a);
|
||||
u_int b_bytes = IF_HW_TSOMAX_GET_BYTES(b);
|
||||
u_int b_frag_count = IF_HW_TSOMAX_GET_FRAG_COUNT(b);
|
||||
u_int b_frag_size = IF_HW_TSOMAX_GET_FRAG_SIZE(b);
|
||||
|
||||
return (IF_HW_TSOMAX_BUILD_VALUE(min(a_bytes, b_bytes),
|
||||
min(a_frag_count, b_frag_count),
|
||||
min(a_frag_size, b_frag_size)));
|
||||
}
|
||||
|
||||
/*
|
||||
* Range check the "if_hw_tsomax" value:
|
||||
*/
|
||||
u_int
|
||||
if_hw_tsomax_range_check(u_int a)
|
||||
{
|
||||
u_int a_bytes = IF_HW_TSOMAX_GET_BYTES(a);
|
||||
u_int a_frag_count = IF_HW_TSOMAX_GET_FRAG_COUNT(a);
|
||||
u_int a_frag_size = IF_HW_TSOMAX_GET_FRAG_SIZE(a);
|
||||
|
||||
/* round down to nearest 4 bytes */
|
||||
a_bytes &= 0xFFFC;
|
||||
|
||||
/* use default, if zero */
|
||||
if (a_bytes == 0)
|
||||
a_bytes = IF_HW_TSOMAX_DEFAULT_BYTES;
|
||||
|
||||
/* use default, if zero */
|
||||
if (a_frag_count == 0)
|
||||
a_frag_count = IF_HW_TSOMAX_DEFAULT_FRAG_COUNT;
|
||||
|
||||
/* use default, if zero */
|
||||
if (a_frag_size == 0)
|
||||
a_frag_size = IF_HW_TSOMAX_DEFAULT_FRAG_SIZE;
|
||||
|
||||
return (IF_HW_TSOMAX_BUILD_VALUE(a_bytes, a_frag_count, a_frag_size));
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a struct ifnet and an index for an interface. A layer 2
|
||||
* common structure will also be allocated if an allocation routine is
|
||||
@ -445,6 +491,7 @@ if_alloc(u_char type)
|
||||
ifp->if_index = idx;
|
||||
ifp->if_type = type;
|
||||
ifp->if_alloctype = type;
|
||||
ifp->if_hw_tsomax = IF_HW_TSOMAX_DEFAULT_VALUE();
|
||||
if (if_com_alloc[type] != NULL) {
|
||||
ifp->if_l2com = if_com_alloc[type](type, ifp);
|
||||
if (ifp->if_l2com == NULL) {
|
||||
@ -657,16 +704,9 @@ if_attach_internal(struct ifnet *ifp, int vmove)
|
||||
TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
|
||||
/* Reliably crash if used uninitialized. */
|
||||
ifp->if_broadcastaddr = NULL;
|
||||
|
||||
#if defined(INET) || defined(INET6)
|
||||
/* Initialize to max value. */
|
||||
if (ifp->if_hw_tsomax == 0)
|
||||
ifp->if_hw_tsomax = min(IP_MAXPACKET, 32 * MCLBYTES -
|
||||
(ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
|
||||
KASSERT(ifp->if_hw_tsomax <= IP_MAXPACKET &&
|
||||
ifp->if_hw_tsomax >= IP_MAXPACKET / 8,
|
||||
("%s: tsomax outside of range", __func__));
|
||||
#endif
|
||||
/* range check TSO value */
|
||||
ifp->if_hw_tsomax =
|
||||
if_hw_tsomax_range_check(ifp->if_hw_tsomax);
|
||||
}
|
||||
#ifdef VIMAGE
|
||||
else {
|
||||
|
@ -445,11 +445,7 @@ lagg_capabilities(struct lagg_softc *sc)
|
||||
struct lagg_port *lp;
|
||||
int cap = ~0, ena = ~0;
|
||||
u_long hwa = ~0UL;
|
||||
#if defined(INET) || defined(INET6)
|
||||
u_int hw_tsomax = IP_MAXPACKET; /* Initialize to the maximum value. */
|
||||
#else
|
||||
u_int hw_tsomax = ~0; /* if_hw_tsomax is only for INET/INET6, but.. */
|
||||
#endif
|
||||
u_int hw_tsomax = IF_HW_TSOMAX_DEFAULT_VALUE();
|
||||
|
||||
LAGG_WLOCK_ASSERT(sc);
|
||||
|
||||
@ -458,10 +454,9 @@ lagg_capabilities(struct lagg_softc *sc)
|
||||
cap &= lp->lp_ifp->if_capabilities;
|
||||
ena &= lp->lp_ifp->if_capenable;
|
||||
hwa &= lp->lp_ifp->if_hwassist;
|
||||
/* Set to the minimum value of the lagg ports. */
|
||||
if (lp->lp_ifp->if_hw_tsomax < hw_tsomax &&
|
||||
lp->lp_ifp->if_hw_tsomax > 0)
|
||||
hw_tsomax = lp->lp_ifp->if_hw_tsomax;
|
||||
/* Set to the common value of the lagg ports. */
|
||||
hw_tsomax = if_hw_tsomax_common(hw_tsomax,
|
||||
lp->lp_ifp->if_hw_tsomax);
|
||||
}
|
||||
cap = (cap == ~0 ? 0 : cap);
|
||||
ena = (ena == ~0 ? 0 : ena);
|
||||
|
@ -119,6 +119,43 @@ typedef void (*if_qflush_fn_t)(if_t);
|
||||
typedef int (*if_transmit_fn_t)(if_t, struct mbuf *);
|
||||
typedef uint64_t (*if_get_counter_t)(if_t, ifnet_counter);
|
||||
|
||||
/*
|
||||
* Macros defining how to decode the "if_hw_tsomax" field:
|
||||
*/
|
||||
#define IF_HW_TSOMAX_GET_BYTES(x) \
|
||||
((uint16_t)(x)) /* 32..65535 */
|
||||
|
||||
#define IF_HW_TSOMAX_GET_FRAG_COUNT(x) \
|
||||
((uint8_t)((x) >> 16)) /* 1..255 */
|
||||
|
||||
#define IF_HW_TSOMAX_GET_FRAG_SIZE(x) \
|
||||
((uint8_t)((x) >> 24)) /* 12..16 */
|
||||
|
||||
/*
|
||||
* The following macro defines how to build the "if_hw_tsomax"
|
||||
* field. The "bytes" field has unit 1 bytes and declares the maximum
|
||||
* number of bytes which can be transferred by a single transmit
|
||||
* offload, TSO, job. The "bytes" field is rounded down to the neares
|
||||
* 4 bytes to avoid having the hardware do unaligned memory
|
||||
* accesses. The "frag_count" field has unit 1 fragment and declares
|
||||
* the maximum number of fragments a TSO job can contain. The
|
||||
* "frag_size" field has unit logarithm in base 2 of the actual value
|
||||
* in bytes and declares the maximum size of a fragment.
|
||||
*/
|
||||
#define IF_HW_TSOMAX_BUILD_VALUE(bytes, frag_count, frag_size) \
|
||||
(((bytes) & 0xFFFC) | (((frag_count) & 0xFF) << 16) | \
|
||||
(((frag_size) & 0xFF) << 24))
|
||||
|
||||
#define IF_HW_TSOMAX_DEFAULT_BYTES (65536 - 4)
|
||||
#define IF_HW_TSOMAX_DEFAULT_FRAG_COUNT 255
|
||||
#define IF_HW_TSOMAX_DEFAULT_FRAG_SIZE 16
|
||||
|
||||
#define IF_HW_TSOMAX_DEFAULT_VALUE() \
|
||||
IF_HW_TSOMAX_BUILD_VALUE( \
|
||||
IF_HW_TSOMAX_DEFAULT_BYTES, \
|
||||
IF_HW_TSOMAX_DEFAULT_FRAG_COUNT, \
|
||||
IF_HW_TSOMAX_DEFAULT_FRAG_SIZE)
|
||||
|
||||
/*
|
||||
* Structure defining a network interface.
|
||||
*
|
||||
@ -222,8 +259,7 @@ struct ifnet {
|
||||
if_get_counter_t if_get_counter; /* get counter values */
|
||||
|
||||
/* Stuff that's only temporary and doesn't belong here. */
|
||||
u_int if_hw_tsomax; /* tso burst length limit, the minimum
|
||||
* is (IP_MAXPACKET / 8).
|
||||
u_int if_hw_tsomax; /* TSO burst length limits.
|
||||
* XXXAO: Have to find a better place
|
||||
* for it eventually. */
|
||||
/*
|
||||
@ -608,6 +644,10 @@ void if_setioctlfn(if_t ifp, int (*)(if_t, u_long, caddr_t));
|
||||
void if_setstartfn(if_t ifp, void (*)(if_t));
|
||||
void if_settransmitfn(if_t ifp, if_transmit_fn_t);
|
||||
void if_setqflushfn(if_t ifp, if_qflush_fn_t);
|
||||
|
||||
/* "if_hw_tsomax" related functions */
|
||||
u_int if_hw_tsomax_common(u_int, u_int);
|
||||
u_int if_hw_tsomax_range_check(u_int);
|
||||
|
||||
/* Revisit the below. These are inline functions originally */
|
||||
int drbr_inuse_drv(if_t ifp, struct buf_ring *br);
|
||||
|
@ -1511,8 +1511,8 @@ vlan_capabilities(struct ifvlan *ifv)
|
||||
* propagate the hardware-assisted flag. TSO on VLANs
|
||||
* does not necessarily require hardware VLAN tagging.
|
||||
*/
|
||||
if (p->if_hw_tsomax > 0)
|
||||
ifp->if_hw_tsomax = p->if_hw_tsomax;
|
||||
ifp->if_hw_tsomax = if_hw_tsomax_common(ifp->if_hw_tsomax,
|
||||
p->if_hw_tsomax);
|
||||
if (p->if_capabilities & IFCAP_VLAN_HWTSO)
|
||||
ifp->if_capabilities |= p->if_capabilities & IFCAP_TSO;
|
||||
if (p->if_capenable & IFCAP_VLAN_HWTSO) {
|
||||
|
@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/sysctl.h>
|
||||
|
||||
#include <net/if.h>
|
||||
#include <net/if_var.h>
|
||||
#include <net/route.h>
|
||||
#include <net/vnet.h>
|
||||
|
||||
@ -767,18 +768,88 @@ send:
|
||||
flags &= ~TH_FIN;
|
||||
|
||||
if (tso) {
|
||||
u_int if_hw_tsomax_bytes;
|
||||
u_int if_hw_tsomax_frag_count;
|
||||
u_int if_hw_tsomax_frag_size;
|
||||
struct mbuf *mb;
|
||||
u_int moff;
|
||||
int max_len;
|
||||
|
||||
/* extract TSO information */
|
||||
if_hw_tsomax_bytes =
|
||||
IF_HW_TSOMAX_GET_BYTES(tp->t_tsomax);
|
||||
if_hw_tsomax_frag_count =
|
||||
IF_HW_TSOMAX_GET_FRAG_COUNT(tp->t_tsomax);
|
||||
if_hw_tsomax_frag_size =
|
||||
IF_HW_TSOMAX_GET_FRAG_SIZE(tp->t_tsomax);
|
||||
|
||||
/* compute maximum TSO length */
|
||||
max_len = (if_hw_tsomax_bytes - hdrlen);
|
||||
|
||||
/* clamp maximum length value */
|
||||
if (max_len > IP_MAXPACKET)
|
||||
max_len = IP_MAXPACKET;
|
||||
else if (max_len < 0)
|
||||
max_len = 0;
|
||||
|
||||
/* get smallest length */
|
||||
if (len > (u_int)max_len) {
|
||||
if (max_len != 0)
|
||||
sendalot = 1;
|
||||
len = (u_int)max_len;
|
||||
}
|
||||
|
||||
KASSERT(ipoptlen == 0,
|
||||
("%s: TSO can't do IP options", __func__));
|
||||
|
||||
max_len = 0;
|
||||
mb = sbsndptr(&so->so_snd, off, len, &moff);
|
||||
|
||||
/* now make sure the number of fragments fit too */
|
||||
while (mb != NULL && (u_int)max_len < len) {
|
||||
u_int cur_length;
|
||||
u_int cur_frags;
|
||||
|
||||
/*
|
||||
* Get length of mbuf fragment and how
|
||||
* many hardware frags, rounded up, it
|
||||
* would use:
|
||||
*/
|
||||
cur_length = (mb->m_len - moff);
|
||||
cur_frags = (cur_length +
|
||||
(1 << if_hw_tsomax_frag_size) - 1) >>
|
||||
if_hw_tsomax_frag_size;
|
||||
|
||||
/* Handle special case: Zero Length Mbuf */
|
||||
if (cur_frags == 0)
|
||||
cur_frags = 1;
|
||||
|
||||
/*
|
||||
* Check if the fragment limit will be
|
||||
* reached or exceeded:
|
||||
*/
|
||||
if (cur_frags >= if_hw_tsomax_frag_count) {
|
||||
max_len += min(cur_length,
|
||||
if_hw_tsomax_frag_count <<
|
||||
if_hw_tsomax_frag_size);
|
||||
break;
|
||||
}
|
||||
max_len += cur_length;
|
||||
if_hw_tsomax_frag_count -= cur_frags;
|
||||
moff = 0;
|
||||
mb = mb->m_next;
|
||||
}
|
||||
|
||||
/*
|
||||
* Limit a burst to t_tsomax minus IP,
|
||||
* TCP and options length to keep ip->ip_len
|
||||
* from overflowing or exceeding the maximum
|
||||
* length allowed by the network interface.
|
||||
*/
|
||||
if (len > tp->t_tsomax - hdrlen) {
|
||||
len = tp->t_tsomax - hdrlen;
|
||||
sendalot = 1;
|
||||
if (len > (u_int)max_len) {
|
||||
if (max_len != 0)
|
||||
sendalot = 1;
|
||||
len = (u_int)max_len;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -673,6 +673,12 @@ int mlx4_en_do_start_port(struct net_device *dev)
|
||||
else
|
||||
priv->rx_csum = 0;
|
||||
|
||||
/* set TSO limits so that we don't have to drop TX packets */
|
||||
dev->if_hw_tsomax = IF_HW_TSOMAX_BUILD_VALUE(
|
||||
65535 - sizeof(struct ether_vlan_header) /* bytes */,
|
||||
16 /* maximum frag count */,
|
||||
16 /* can do up to 4GByte */);
|
||||
|
||||
err = mlx4_wol_read(priv->mdev->dev, &config, priv->port);
|
||||
if (err) {
|
||||
en_err(priv, "Failed to get WoL info, unable to modify\n");
|
||||
|
Loading…
x
Reference in New Issue
Block a user