cxgbe(4): add support for stateless offloads for VXLAN traffic.

Hardware assistance includes checksumming (tx and rx), TSO, and RSS on
the inner traffic in a VXLAN tunnel.

Relnotes:	Yes
Sponsored by:	Chelsio Communications
This commit is contained in:
Navdeep Parhar 2020-09-18 03:01:47 +00:00
parent b092fd6c97
commit a4a4ad2dd9
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=365871
7 changed files with 684 additions and 185 deletions

View File

@ -31,7 +31,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd Dec 10, 2019
.Dd September 17, 2020
.Dt CXGBE 4
.Os
.Sh NAME
@ -61,8 +61,8 @@ driver provides support for PCI Express Ethernet adapters based on
the Chelsio Terminator 4, Terminator 5, and Terminator 6 ASICs (T4, T5, and T6).
The driver supports Jumbo Frames, Transmit/Receive checksum offload,
TCP segmentation offload (TSO), Large Receive Offload (LRO), VLAN
tag insertion/extraction, VLAN checksum offload, VLAN TSO, and
Receive Side Steering (RSS).
tag insertion/extraction, VLAN checksum offload, VLAN TSO, VXLAN checksum
offload, VXLAN TSO, and Receive Side Steering (RSS).
For further hardware information and questions related to hardware
requirements, see
.Pa http://www.chelsio.com/ .

View File

@ -119,6 +119,7 @@ enum {
TX_SGL_SEGS = 39,
TX_SGL_SEGS_TSO = 38,
TX_SGL_SEGS_EO_TSO = 30, /* XXX: lower for IPv6. */
TX_SGL_SEGS_VXLAN_TSO = 37,
TX_WR_FLITS = SGE_MAX_WR_LEN / 8
};
@ -286,6 +287,7 @@ struct port_info {
int nvi;
int up_vis;
int uld_vis;
bool vxlan_tcam_entry;
struct tx_sched_params *sched_params;
@ -593,6 +595,8 @@ struct sge_txq {
uint64_t txpkts0_pkts; /* # of frames in type0 coalesced tx WRs */
uint64_t txpkts1_pkts; /* # of frames in type1 coalesced tx WRs */
uint64_t raw_wrs; /* # of raw work requests (alloc_wr_mbuf) */
uint64_t vxlan_tso_wrs; /* # of VXLAN TSO work requests */
uint64_t vxlan_txcsum;
uint64_t kern_tls_records;
uint64_t kern_tls_short;
@ -625,6 +629,7 @@ struct sge_rxq {
uint64_t rxcsum; /* # of times hardware assisted with checksum */
uint64_t vlan_extraction;/* # of times VLAN tag was extracted */
uint64_t vxlan_rxcsum;
/* stats for not-that-common events */
@ -848,6 +853,11 @@ struct adapter {
int lro_timeout;
int sc_do_rxcopy;
int vxlan_port;
u_int vxlan_refcount;
int rawf_base;
int nrawf;
struct taskqueue *tq[MAX_NCHAN]; /* General purpose taskqueues */
struct task async_event_task;
struct port_info *port[MAX_NPORTS];

View File

@ -249,7 +249,7 @@ struct tp_params {
uint32_t max_rx_pdu;
uint32_t max_tx_pdu;
uint64_t hash_filter_mask;
__be16 err_vec_mask;
bool rx_pkt_encap;
int8_t fcoe_shift;
int8_t port_shift;

View File

@ -9647,19 +9647,11 @@ int t4_init_tp_params(struct adapter *adap, bool sleep_ok)
read_filter_mode_and_ingress_config(adap, sleep_ok);
/*
* Cache a mask of the bits that represent the error vector portion of
* rx_pkt.err_vec. T6+ can use a compressed error vector to make room
* for information about outer encapsulation (GENEVE/VXLAN/NVGRE).
*/
tpp->err_vec_mask = htobe16(0xffff);
if (chip_id(adap) > CHELSIO_T5) {
v = t4_read_reg(adap, A_TP_OUT_CONFIG);
if (v & F_CRXPKTENC) {
tpp->err_vec_mask =
htobe16(V_T6_COMPR_RXERR_VEC(M_T6_COMPR_RXERR_VEC));
}
}
tpp->rx_pkt_encap = v & F_CRXPKTENC;
} else
tpp->rx_pkt_encap = false;
rx_len = t4_read_reg(adap, A_TP_PMM_RX_PAGE_SIZE);
tx_len = t4_read_reg(adap, A_TP_PMM_TX_PAGE_SIZE);

View File

@ -146,7 +146,8 @@
nethctrl = 1024
neq = 2048
nqpcq = 8192
nexactf = 456
nexactf = 454
nrawf = 2
cmask = all
pmask = all
ncrypto_lookaside = 16
@ -272,7 +273,7 @@
[fini]
version = 0x1
checksum = 0x13640470
checksum = 0xa92352a8
#
# $FreeBSD$
#

View File

@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
#include <sys/priv.h>
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/eventhandler.h>
#include <sys/module.h>
#include <sys/malloc.h>
#include <sys/queue.h>
@ -1069,6 +1070,8 @@ t4_attach(device_t dev)
TASK_INIT(&sc->async_event_task, 0, t4_async_event, sc);
#endif
refcount_init(&sc->vxlan_refcount, 0);
rc = t4_map_bars_0_and_4(sc);
if (rc != 0)
goto done; /* error message displayed already */
@ -1716,6 +1719,7 @@ cxgbe_vi_attach(device_t dev, struct vi_info *vi)
struct ifnet *ifp;
struct sbuf *sb;
struct pfil_head_args pa;
struct adapter *sc = vi->adapter;
vi->xact_addr_filt = -1;
callout_init(&vi->tick, 1);
@ -1749,28 +1753,36 @@ cxgbe_vi_attach(device_t dev, struct vi_info *vi)
ifp->if_capabilities = T4_CAP;
ifp->if_capenable = T4_CAP_ENABLE;
ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
if (chip_id(sc) >= CHELSIO_T6) {
ifp->if_capabilities |= IFCAP_VXLAN_HWCSUM | IFCAP_VXLAN_HWTSO;
ifp->if_capenable |= IFCAP_VXLAN_HWCSUM | IFCAP_VXLAN_HWTSO;
ifp->if_hwassist |= CSUM_INNER_IP6_UDP | CSUM_INNER_IP6_TCP |
CSUM_INNER_IP6_TSO | CSUM_INNER_IP | CSUM_INNER_IP_UDP |
CSUM_INNER_IP_TCP | CSUM_INNER_IP_TSO | CSUM_ENCAP_VXLAN;
}
#ifdef TCP_OFFLOAD
if (vi->nofldrxq != 0 && (vi->adapter->flags & KERN_TLS_OK) == 0)
if (vi->nofldrxq != 0 && (sc->flags & KERN_TLS_OK) == 0)
ifp->if_capabilities |= IFCAP_TOE;
#endif
#ifdef RATELIMIT
if (is_ethoffload(vi->adapter) && vi->nofldtxq != 0) {
if (is_ethoffload(sc) && vi->nofldtxq != 0) {
ifp->if_capabilities |= IFCAP_TXRTLMT;
ifp->if_capenable |= IFCAP_TXRTLMT;
}
#endif
ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
ifp->if_hw_tsomax = IP_MAXPACKET;
ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_TSO;
#ifdef RATELIMIT
if (is_ethoffload(vi->adapter) && vi->nofldtxq != 0)
if (is_ethoffload(sc) && vi->nofldtxq != 0)
ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_EO_TSO;
#endif
ifp->if_hw_tsomaxsegsize = 65536;
#ifdef KERN_TLS
if (vi->adapter->flags & KERN_TLS_OK) {
if (sc->flags & KERN_TLS_OK) {
ifp->if_capabilities |= IFCAP_TXTLS;
ifp->if_capenable |= IFCAP_TXTLS;
}
@ -2100,6 +2112,17 @@ cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data)
if (mask & IFCAP_TXTLS)
ifp->if_capenable ^= (mask & IFCAP_TXTLS);
#endif
if (mask & IFCAP_VXLAN_HWCSUM) {
ifp->if_capenable ^= IFCAP_VXLAN_HWCSUM;
ifp->if_hwassist ^= CSUM_INNER_IP6_UDP |
CSUM_INNER_IP6_TCP | CSUM_INNER_IP |
CSUM_INNER_IP_UDP | CSUM_INNER_IP_TCP;
}
if (mask & IFCAP_VXLAN_HWTSO) {
ifp->if_capenable ^= IFCAP_VXLAN_HWTSO;
ifp->if_hwassist ^= CSUM_INNER_IP6_TSO |
CSUM_INNER_IP_TSO;
}
#ifdef VLAN_CAPABILITIES
VLAN_CAPABILITIES(ifp);
@ -4411,6 +4434,19 @@ get_params__post_init(struct adapter *sc)
MPASS(sc->tids.hpftid_base == 0);
MPASS(sc->tids.tid_base == sc->tids.nhpftids);
}
param[0] = FW_PARAM_PFVF(RAWF_START);
param[1] = FW_PARAM_PFVF(RAWF_END);
rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
if (rc != 0) {
device_printf(sc->dev,
"failed to query rawf parameters: %d.\n", rc);
return (rc);
}
if ((int)val[1] > (int)val[0]) {
sc->rawf_base = val[0];
sc->nrawf = val[1] - val[0] + 1;
}
}
/*
@ -5142,6 +5178,7 @@ update_mac_settings(struct ifnet *ifp, int flags)
struct port_info *pi = vi->pi;
struct adapter *sc = pi->adapter;
int mtu = -1, promisc = -1, allmulti = -1, vlanex = -1;
uint8_t match_all_mac[ETHER_ADDR_LEN] = {0};
ASSERT_SYNCHRONIZED_OP(sc);
KASSERT(flags, ("%s: not told what to update.", __func__));
@ -5215,7 +5252,7 @@ update_mac_settings(struct ifnet *ifp, int flags)
rc = -rc;
for (j = 0; j < ctx.i; j++) {
if_printf(ifp,
"failed to add mc address"
"failed to add mcast address"
" %02x:%02x:%02x:"
"%02x:%02x:%02x rc=%d\n",
ctx.mcaddr[j][0], ctx.mcaddr[j][1],
@ -5225,12 +5262,34 @@ update_mac_settings(struct ifnet *ifp, int flags)
}
return (rc);
}
ctx.del = 0;
} else
NET_EPOCH_EXIT(et);
rc = -t4_set_addr_hash(sc, sc->mbox, vi->viid, 0, ctx.hash, 0);
if (rc != 0)
if_printf(ifp, "failed to set mc address hash: %d", rc);
if_printf(ifp, "failed to set mcast address hash: %d\n",
rc);
if (ctx.del == 0) {
/* We clobbered the VXLAN entry if there was one. */
pi->vxlan_tcam_entry = false;
}
}
if (IS_MAIN_VI(vi) && sc->vxlan_refcount > 0 &&
pi->vxlan_tcam_entry == false) {
rc = t4_alloc_raw_mac_filt(sc, vi->viid, match_all_mac,
match_all_mac, sc->rawf_base + pi->port_id, 1, pi->port_id,
true);
if (rc < 0) {
rc = -rc;
if_printf(ifp, "failed to add VXLAN TCAM entry: %d.\n",
rc);
} else {
MPASS(rc == sc->rawf_base + pi->port_id);
rc = 0;
pi->vxlan_tcam_entry = true;
}
}
return (rc);
@ -10407,6 +10466,7 @@ clear_stats(struct adapter *sc, u_int port_id)
#endif
rxq->rxcsum = 0;
rxq->vlan_extraction = 0;
rxq->vxlan_rxcsum = 0;
rxq->fl.cl_allocated = 0;
rxq->fl.cl_recycled = 0;
@ -10425,6 +10485,8 @@ clear_stats(struct adapter *sc, u_int port_id)
txq->txpkts0_pkts = 0;
txq->txpkts1_pkts = 0;
txq->raw_wrs = 0;
txq->vxlan_tso_wrs = 0;
txq->vxlan_txcsum = 0;
txq->kern_tls_records = 0;
txq->kern_tls_short = 0;
txq->kern_tls_partial = 0;
@ -11235,6 +11297,116 @@ DB_FUNC(tcb, db_show_t4tcb, db_t4_table, CS_OWN, NULL)
}
#endif
static eventhandler_tag vxlan_start_evtag;
static eventhandler_tag vxlan_stop_evtag;
struct vxlan_evargs {
struct ifnet *ifp;
uint16_t port;
};
static void
t4_vxlan_start(struct adapter *sc, void *arg)
{
struct vxlan_evargs *v = arg;
struct port_info *pi;
uint8_t match_all_mac[ETHER_ADDR_LEN] = {0};
int i, rc;
if (sc->nrawf == 0 || chip_id(sc) <= CHELSIO_T5)
return;
if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4vxst") != 0)
return;
if (sc->vxlan_refcount == 0) {
sc->vxlan_port = v->port;
sc->vxlan_refcount = 1;
t4_write_reg(sc, A_MPS_RX_VXLAN_TYPE,
V_VXLAN(v->port) | F_VXLAN_EN);
for_each_port(sc, i) {
pi = sc->port[i];
if (pi->vxlan_tcam_entry == true)
continue;
rc = t4_alloc_raw_mac_filt(sc, pi->vi[0].viid,
match_all_mac, match_all_mac,
sc->rawf_base + pi->port_id, 1, pi->port_id, true);
if (rc < 0) {
rc = -rc;
log(LOG_ERR,
"%s: failed to add VXLAN TCAM entry: %d.\n",
device_get_name(pi->vi[0].dev), rc);
} else {
MPASS(rc == sc->rawf_base + pi->port_id);
rc = 0;
pi->vxlan_tcam_entry = true;
}
}
} else if (sc->vxlan_port == v->port) {
sc->vxlan_refcount++;
} else {
log(LOG_ERR, "%s: VXLAN already configured on port %d; "
"ignoring attempt to configure it on port %d\n",
device_get_nameunit(sc->dev), sc->vxlan_port, v->port);
}
end_synchronized_op(sc, 0);
}
static void
t4_vxlan_stop(struct adapter *sc, void *arg)
{
struct vxlan_evargs *v = arg;
if (sc->nrawf == 0 || chip_id(sc) <= CHELSIO_T5)
return;
if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4vxsp") != 0)
return;
/*
* VXLANs may have been configured before the driver was loaded so we
* may see more stops than starts. This is not handled cleanly but at
* least we keep the refcount sane.
*/
if (sc->vxlan_port != v->port)
goto done;
if (sc->vxlan_refcount == 0) {
log(LOG_ERR,
"%s: VXLAN operation on port %d was stopped earlier; "
"ignoring attempt to stop it again.\n",
device_get_nameunit(sc->dev), sc->vxlan_port);
} else if (--sc->vxlan_refcount == 0) {
t4_set_reg_field(sc, A_MPS_RX_VXLAN_TYPE, F_VXLAN_EN, 0);
}
done:
end_synchronized_op(sc, 0);
}
static void
t4_vxlan_start_handler(void *arg __unused, struct ifnet *ifp,
sa_family_t family, u_int port)
{
struct vxlan_evargs v;
MPASS(family == AF_INET || family == AF_INET6);
v.ifp = ifp;
v.port = port;
t4_iterate(t4_vxlan_start, &v);
}
static void
t4_vxlan_stop_handler(void *arg __unused, struct ifnet *ifp, sa_family_t family,
u_int port)
{
struct vxlan_evargs v;
MPASS(family == AF_INET || family == AF_INET6);
v.ifp = ifp;
v.port = port;
t4_iterate(t4_vxlan_stop, &v);
}
static struct sx mlu; /* mod load unload */
SX_SYSINIT(cxgbe_mlu, &mlu, "cxgbe mod load/unload");
@ -11278,6 +11450,14 @@ mod_event(module_t mod, int cmd, void *arg)
#endif
t4_tracer_modload();
tweak_tunables();
vxlan_start_evtag =
EVENTHANDLER_REGISTER(vxlan_start,
t4_vxlan_start_handler, NULL,
EVENTHANDLER_PRI_ANY);
vxlan_stop_evtag =
EVENTHANDLER_REGISTER(vxlan_stop,
t4_vxlan_stop_handler, NULL,
EVENTHANDLER_PRI_ANY);
}
sx_xunlock(&mlu);
break;
@ -11314,6 +11494,10 @@ mod_event(module_t mod, int cmd, void *arg)
sx_sunlock(&t4_list_lock);
if (t4_sge_extfree_refs() == 0) {
EVENTHANDLER_DEREGISTER(vxlan_start,
vxlan_start_evtag);
EVENTHANDLER_DEREGISTER(vxlan_stop,
vxlan_stop_evtag);
t4_tracer_modunload();
#ifdef KERN_TLS
t6_ktls_modunload();

View File

@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$");
#include <net/ethernet.h>
#include <net/if.h>
#include <net/if_vlan_var.h>
#include <net/if_vxlan.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/ip6.h>
@ -266,8 +267,9 @@ static int find_refill_source(struct adapter *, int, bool);
static void add_fl_to_sfl(struct adapter *, struct sge_fl *);
static inline void get_pkt_gl(struct mbuf *, struct sglist *);
static inline u_int txpkt_len16(u_int, u_int);
static inline u_int txpkt_vm_len16(u_int, u_int);
static inline u_int txpkt_len16(u_int, const u_int);
static inline u_int txpkt_vm_len16(u_int, const u_int);
static inline void calculate_mbuf_len16(struct adapter *, struct mbuf *);
static inline u_int txpkts0_len16(u_int);
static inline u_int txpkts1_len16(void);
static u_int write_raw_wr(struct sge_txq *, void *, struct mbuf *, u_int);
@ -1917,12 +1919,41 @@ eth_rx(struct adapter *sc, struct sge_rxq *rxq, const struct iq_desc *d,
#if defined(INET) || defined(INET6)
struct lro_ctrl *lro = &rxq->lro;
#endif
uint16_t err_vec, tnl_type, tnlhdr_len;
static const int sw_hashtype[4][2] = {
{M_HASHTYPE_NONE, M_HASHTYPE_NONE},
{M_HASHTYPE_RSS_IPV4, M_HASHTYPE_RSS_IPV6},
{M_HASHTYPE_RSS_TCP_IPV4, M_HASHTYPE_RSS_TCP_IPV6},
{M_HASHTYPE_RSS_UDP_IPV4, M_HASHTYPE_RSS_UDP_IPV6},
};
static const int sw_csum_flags[2][2] = {
{
/* IP, inner IP */
CSUM_ENCAP_VXLAN |
CSUM_L3_CALC | CSUM_L3_VALID |
CSUM_L4_CALC | CSUM_L4_VALID |
CSUM_INNER_L3_CALC | CSUM_INNER_L3_VALID |
CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID,
/* IP, inner IP6 */
CSUM_ENCAP_VXLAN |
CSUM_L3_CALC | CSUM_L3_VALID |
CSUM_L4_CALC | CSUM_L4_VALID |
CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID,
},
{
/* IP6, inner IP */
CSUM_ENCAP_VXLAN |
CSUM_L4_CALC | CSUM_L4_VALID |
CSUM_INNER_L3_CALC | CSUM_INNER_L3_VALID |
CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID,
/* IP6, inner IP6 */
CSUM_ENCAP_VXLAN |
CSUM_L4_CALC | CSUM_L4_VALID |
CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID,
},
};
MPASS(plen > sc->params.sge.fl_pktshift);
if (vi->pfil != NULL && PFIL_HOOKED_IN(vi->pfil) &&
@ -1963,23 +1994,73 @@ eth_rx(struct adapter *sc, struct sge_rxq *rxq, const struct iq_desc *d,
m0->m_pkthdr.flowid = be32toh(d->rss.hash_val);
cpl = (const void *)(&d->rss + 1);
if (cpl->csum_calc && !(cpl->err_vec & sc->params.tp.err_vec_mask)) {
if (ifp->if_capenable & IFCAP_RXCSUM &&
cpl->l2info & htobe32(F_RXF_IP)) {
m0->m_pkthdr.csum_flags = (CSUM_IP_CHECKED |
CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
rxq->rxcsum++;
} else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 &&
cpl->l2info & htobe32(F_RXF_IP6)) {
m0->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 |
CSUM_PSEUDO_HDR);
rxq->rxcsum++;
}
if (sc->params.tp.rx_pkt_encap) {
const uint16_t ev = be16toh(cpl->err_vec);
if (__predict_false(cpl->ip_frag))
m0->m_pkthdr.csum_data = be16toh(cpl->csum);
else
m0->m_pkthdr.csum_data = 0xffff;
err_vec = G_T6_COMPR_RXERR_VEC(ev);
tnl_type = G_T6_RX_TNL_TYPE(ev);
tnlhdr_len = G_T6_RX_TNLHDR_LEN(ev);
} else {
err_vec = be16toh(cpl->err_vec);
tnl_type = 0;
tnlhdr_len = 0;
}
if (cpl->csum_calc && err_vec == 0) {
int ipv6 = !!(cpl->l2info & htobe32(F_RXF_IP6));
/* checksum(s) calculated and found to be correct. */
MPASS((cpl->l2info & htobe32(F_RXF_IP)) ^
(cpl->l2info & htobe32(F_RXF_IP6)));
m0->m_pkthdr.csum_data = be16toh(cpl->csum);
if (tnl_type == 0) {
if (!ipv6 && ifp->if_capenable & IFCAP_RXCSUM) {
m0->m_pkthdr.csum_flags = CSUM_L3_CALC |
CSUM_L3_VALID | CSUM_L4_CALC |
CSUM_L4_VALID;
} else if (ipv6 && ifp->if_capenable & IFCAP_RXCSUM_IPV6) {
m0->m_pkthdr.csum_flags = CSUM_L4_CALC |
CSUM_L4_VALID;
}
rxq->rxcsum++;
} else {
MPASS(tnl_type == RX_PKT_TNL_TYPE_VXLAN);
if (__predict_false(cpl->ip_frag)) {
/*
* csum_data is for the inner frame (which is an
* IP fragment) and is not 0xffff. There is no
* way to pass the inner csum_data to the stack.
* We don't want the stack to use the inner
* csum_data to validate the outer frame or it
* will get rejected. So we fix csum_data here
* and let sw do the checksum of inner IP
* fragments.
*
* XXX: Need 32b for csum_data2 in an rx mbuf.
* Maybe stuff it into rcv_tstmp?
*/
m0->m_pkthdr.csum_data = 0xffff;
if (ipv6) {
m0->m_pkthdr.csum_flags = CSUM_L4_CALC |
CSUM_L4_VALID;
} else {
m0->m_pkthdr.csum_flags = CSUM_L3_CALC |
CSUM_L3_VALID | CSUM_L4_CALC |
CSUM_L4_VALID;
}
} else {
int outer_ipv6;
MPASS(m0->m_pkthdr.csum_data == 0xffff);
outer_ipv6 = tnlhdr_len >=
sizeof(struct ether_header) +
sizeof(struct ip6_hdr);
m0->m_pkthdr.csum_flags =
sw_csum_flags[outer_ipv6][ipv6];
}
rxq->vxlan_rxcsum++;
}
}
if (cpl->vlan_ex) {
@ -2007,7 +2088,7 @@ eth_rx(struct adapter *sc, struct sge_rxq *rxq, const struct iq_desc *d,
m0->m_pkthdr.numa_domain = ifp->if_numa_domain;
#endif
#if defined(INET) || defined(INET6)
if (rxq->iq.flags & IQ_LRO_ENABLED &&
if (rxq->iq.flags & IQ_LRO_ENABLED && tnl_type == 0 &&
(M_HASHTYPE_GET(m0) == M_HASHTYPE_RSS_TCP_IPV4 ||
M_HASHTYPE_GET(m0) == M_HASHTYPE_RSS_TCP_IPV6)) {
if (sort_before_lro(lro)) {
@ -2179,10 +2260,10 @@ mbuf_nsegs(struct mbuf *m)
{
M_ASSERTPKTHDR(m);
KASSERT(m->m_pkthdr.l5hlen > 0,
KASSERT(m->m_pkthdr.inner_l5hlen > 0,
("%s: mbuf %p missing information on # of segments.", __func__, m));
return (m->m_pkthdr.l5hlen);
return (m->m_pkthdr.inner_l5hlen);
}
static inline void
@ -2190,7 +2271,7 @@ set_mbuf_nsegs(struct mbuf *m, uint8_t nsegs)
{
M_ASSERTPKTHDR(m);
m->m_pkthdr.l5hlen = nsegs;
m->m_pkthdr.inner_l5hlen = nsegs;
}
static inline int
@ -2316,63 +2397,108 @@ alloc_wr_mbuf(int len, int how)
return (m);
}
static inline int
static inline bool
needs_hwcsum(struct mbuf *m)
{
const uint32_t csum_flags = CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP |
CSUM_IP_TSO | CSUM_INNER_IP | CSUM_INNER_IP_UDP |
CSUM_INNER_IP_TCP | CSUM_INNER_IP_TSO | CSUM_IP6_UDP |
CSUM_IP6_TCP | CSUM_IP6_TSO | CSUM_INNER_IP6_UDP |
CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_TSO;
M_ASSERTPKTHDR(m);
return (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_IP |
CSUM_TSO | CSUM_UDP_IPV6 | CSUM_TCP_IPV6));
return (m->m_pkthdr.csum_flags & csum_flags);
}
static inline int
static inline bool
needs_tso(struct mbuf *m)
{
const uint32_t csum_flags = CSUM_IP_TSO | CSUM_IP6_TSO |
CSUM_INNER_IP_TSO | CSUM_INNER_IP6_TSO;
M_ASSERTPKTHDR(m);
return (m->m_pkthdr.csum_flags & CSUM_TSO);
return (m->m_pkthdr.csum_flags & csum_flags);
}
static inline int
static inline bool
needs_vxlan_csum(struct mbuf *m)
{
M_ASSERTPKTHDR(m);
return (m->m_pkthdr.csum_flags & CSUM_ENCAP_VXLAN);
}
static inline bool
needs_vxlan_tso(struct mbuf *m)
{
const uint32_t csum_flags = CSUM_ENCAP_VXLAN | CSUM_INNER_IP_TSO |
CSUM_INNER_IP6_TSO;
M_ASSERTPKTHDR(m);
return ((m->m_pkthdr.csum_flags & csum_flags) != 0 &&
(m->m_pkthdr.csum_flags & csum_flags) != CSUM_ENCAP_VXLAN);
}
static inline bool
needs_inner_tcp_csum(struct mbuf *m)
{
const uint32_t csum_flags = CSUM_INNER_IP_TSO | CSUM_INNER_IP6_TSO;
M_ASSERTPKTHDR(m);
return (m->m_pkthdr.csum_flags & csum_flags);
}
static inline bool
needs_l3_csum(struct mbuf *m)
{
const uint32_t csum_flags = CSUM_IP | CSUM_IP_TSO | CSUM_INNER_IP |
CSUM_INNER_IP_TSO;
M_ASSERTPKTHDR(m);
return (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO));
return (m->m_pkthdr.csum_flags & csum_flags);
}
static inline int
needs_tcp_csum(struct mbuf *m)
static inline bool
needs_outer_tcp_csum(struct mbuf *m)
{
const uint32_t csum_flags = CSUM_IP_TCP | CSUM_IP_TSO | CSUM_IP6_TCP |
CSUM_IP6_TSO;
M_ASSERTPKTHDR(m);
return (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_TCP_IPV6 | CSUM_TSO));
return (m->m_pkthdr.csum_flags & csum_flags);
}
#ifdef RATELIMIT
static inline int
needs_l4_csum(struct mbuf *m)
static inline bool
needs_outer_l4_csum(struct mbuf *m)
{
const uint32_t csum_flags = CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP_TSO |
CSUM_IP6_UDP | CSUM_IP6_TCP | CSUM_IP6_TSO;
M_ASSERTPKTHDR(m);
return (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 |
CSUM_TCP_IPV6 | CSUM_TSO));
return (m->m_pkthdr.csum_flags & csum_flags);
}
static inline int
needs_udp_csum(struct mbuf *m)
static inline bool
needs_outer_udp_csum(struct mbuf *m)
{
const uint32_t csum_flags = CSUM_IP_UDP | CSUM_IP6_UDP;
M_ASSERTPKTHDR(m);
return (m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_UDP_IPV6));
return (m->m_pkthdr.csum_flags & csum_flags);
}
#endif
static inline int
static inline bool
needs_vlan_insertion(struct mbuf *m)
{
@ -2512,6 +2638,23 @@ count_mbuf_nsegs(struct mbuf *m, int skip, uint8_t *cflags)
return (nsegs);
}
/*
* The maximum number of segments that can fit in a WR.
*/
static int
max_nsegs_allowed(struct mbuf *m)
{
if (needs_tso(m)) {
if (needs_vxlan_tso(m))
return (TX_SGL_SEGS_VXLAN_TSO);
else
return (TX_SGL_SEGS_TSO);
}
return (TX_SGL_SEGS);
}
/*
* Analyze the mbuf to determine its tx needs. The mbuf passed in may change:
* a) caller can assume it's been freed if this function returns with an error.
@ -2570,7 +2713,7 @@ parse_pkt(struct adapter *sc, struct mbuf **mp)
return (0);
}
#endif
if (nsegs > (needs_tso(m0) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS)) {
if (nsegs > max_nsegs_allowed(m0)) {
if (defragged++ > 0 || (m = m_defrag(m0, M_NOWAIT)) == NULL) {
rc = EFBIG;
goto fail;
@ -2592,18 +2735,15 @@ parse_pkt(struct adapter *sc, struct mbuf **mp)
}
set_mbuf_nsegs(m0, nsegs);
set_mbuf_cflags(m0, cflags);
if (sc->flags & IS_VF)
set_mbuf_len16(m0, txpkt_vm_len16(nsegs, needs_tso(m0)));
else
set_mbuf_len16(m0, txpkt_len16(nsegs, needs_tso(m0)));
calculate_mbuf_len16(sc, m0);
#ifdef RATELIMIT
/*
* Ethofld is limited to TCP and UDP for now, and only when L4 hw
* checksumming is enabled. needs_l4_csum happens to check for all the
* right things.
* checksumming is enabled. needs_outer_l4_csum happens to check for
* all the right things.
*/
if (__predict_false(needs_eo(cst) && !needs_l4_csum(m0))) {
if (__predict_false(needs_eo(cst) && !needs_outer_l4_csum(m0))) {
m_snd_tag_rele(m0->m_pkthdr.snd_tag);
m0->m_pkthdr.snd_tag = NULL;
m0->m_pkthdr.csum_flags &= ~CSUM_SND_TAG;
@ -2635,21 +2775,27 @@ parse_pkt(struct adapter *sc, struct mbuf **mp)
switch (eh_type) {
#ifdef INET6
case ETHERTYPE_IPV6:
{
struct ip6_hdr *ip6 = l3hdr;
MPASS(!needs_tso(m0) || ip6->ip6_nxt == IPPROTO_TCP);
m0->m_pkthdr.l3hlen = sizeof(*ip6);
m0->m_pkthdr.l3hlen = sizeof(struct ip6_hdr);
break;
}
#endif
#ifdef INET
case ETHERTYPE_IP:
{
struct ip *ip = l3hdr;
m0->m_pkthdr.l3hlen = ip->ip_hl * 4;
if (needs_vxlan_csum(m0)) {
/* Driver will do the outer IP hdr checksum. */
ip->ip_sum = 0;
if (needs_vxlan_tso(m0)) {
const uint16_t ipl = ip->ip_len;
ip->ip_len = 0;
ip->ip_sum = ~in_cksum_hdr(ip);
ip->ip_len = ipl;
} else
ip->ip_sum = in_cksum_hdr(ip);
}
m0->m_pkthdr.l3hlen = ip->ip_hl << 2;
break;
}
#endif
@ -2659,8 +2805,59 @@ parse_pkt(struct adapter *sc, struct mbuf **mp)
__func__, eh_type);
}
if (needs_vxlan_csum(m0)) {
m0->m_pkthdr.l4hlen = sizeof(struct udphdr);
m0->m_pkthdr.l5hlen = sizeof(struct vxlan_header);
/* Inner headers. */
eh = m_advance(&m, &offset, m0->m_pkthdr.l3hlen +
sizeof(struct udphdr) + sizeof(struct vxlan_header));
eh_type = ntohs(eh->ether_type);
if (eh_type == ETHERTYPE_VLAN) {
struct ether_vlan_header *evh = (void *)eh;
eh_type = ntohs(evh->evl_proto);
m0->m_pkthdr.inner_l2hlen = sizeof(*evh);
} else
m0->m_pkthdr.inner_l2hlen = sizeof(*eh);
l3hdr = m_advance(&m, &offset, m0->m_pkthdr.inner_l2hlen);
switch (eh_type) {
#ifdef INET6
case ETHERTYPE_IPV6:
m0->m_pkthdr.inner_l3hlen = sizeof(struct ip6_hdr);
break;
#endif
#ifdef INET
case ETHERTYPE_IP:
{
struct ip *ip = l3hdr;
m0->m_pkthdr.inner_l3hlen = ip->ip_hl << 2;
break;
}
#endif
default:
panic("%s: VXLAN hw offload requested with unknown "
"ethertype 0x%04x. if_cxgbe must be compiled"
" with the same INET/INET6 options as the kernel.",
__func__, eh_type);
}
#if defined(INET) || defined(INET6)
if (needs_tcp_csum(m0)) {
if (needs_inner_tcp_csum(m0)) {
tcp = m_advance(&m, &offset, m0->m_pkthdr.inner_l3hlen);
m0->m_pkthdr.inner_l4hlen = tcp->th_off * 4;
}
#endif
MPASS((m0->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
m0->m_pkthdr.csum_flags &= CSUM_INNER_IP6_UDP |
CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_TSO | CSUM_INNER_IP |
CSUM_INNER_IP_UDP | CSUM_INNER_IP_TCP | CSUM_INNER_IP_TSO |
CSUM_ENCAP_VXLAN;
}
#if defined(INET) || defined(INET6)
if (needs_outer_tcp_csum(m0)) {
tcp = m_advance(&m, &offset, m0->m_pkthdr.l3hlen);
m0->m_pkthdr.l4hlen = tcp->th_off * 4;
#ifdef RATELIMIT
@ -2670,7 +2867,7 @@ parse_pkt(struct adapter *sc, struct mbuf **mp)
V_FW_ETH_TX_EO_WR_TSOFF(sizeof(*tcp) / 2 + 1));
} else
set_mbuf_eo_tsclk_tsoff(m0, 0);
} else if (needs_udp_csum(m0)) {
} else if (needs_outer_udp_csum(m0)) {
m0->m_pkthdr.l4hlen = sizeof(struct udphdr);
#endif
}
@ -3627,6 +3824,9 @@ alloc_rxq(struct vi_info *vi, struct sge_rxq *rxq, int intr_idx, int idx,
SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vlan_extraction",
CTLFLAG_RD, &rxq->vlan_extraction,
"# of times hardware extracted 802.1Q tag");
SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vxlan_rxcsum",
CTLFLAG_RD, &rxq->vxlan_rxcsum,
"# of times hardware assisted with inner checksum (VXLAN) ");
add_fl_sysctls(sc, &vi->ctx, oid, &rxq->fl);
@ -4281,6 +4481,11 @@ alloc_txq(struct vi_info *vi, struct sge_txq *txq, int idx,
"# of frames tx'd using type1 txpkts work requests");
SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "raw_wrs", CTLFLAG_RD,
&txq->raw_wrs, "# of raw work requests (non-packets)");
SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vxlan_tso_wrs",
CTLFLAG_RD, &txq->vxlan_tso_wrs, "# of VXLAN TSO work requests");
SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vxlan_txcsum",
CTLFLAG_RD, &txq->vxlan_txcsum,
"# of times hardware assisted with inner checksums (VXLAN)");
#ifdef KERN_TLS
if (sc->flags & KERN_TLS_OK) {
@ -4570,27 +4775,25 @@ get_pkt_gl(struct mbuf *m, struct sglist *gl)
KASSERT(gl->sg_nseg == mbuf_nsegs(m),
("%s: nsegs changed for mbuf %p from %d to %d", __func__, m,
mbuf_nsegs(m), gl->sg_nseg));
KASSERT(gl->sg_nseg > 0 &&
gl->sg_nseg <= (needs_tso(m) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS),
KASSERT(gl->sg_nseg > 0 && gl->sg_nseg <= max_nsegs_allowed(m),
("%s: %d segments, should have been 1 <= nsegs <= %d", __func__,
gl->sg_nseg, needs_tso(m) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS));
gl->sg_nseg, max_nsegs_allowed(m)));
}
/*
* len16 for a txpkt WR with a GL. Includes the firmware work request header.
*/
static inline u_int
txpkt_len16(u_int nsegs, u_int tso)
txpkt_len16(u_int nsegs, const u_int extra)
{
u_int n;
MPASS(nsegs > 0);
nsegs--; /* first segment is part of ulptx_sgl */
n = sizeof(struct fw_eth_tx_pkt_wr) + sizeof(struct cpl_tx_pkt_core) +
n = extra + sizeof(struct fw_eth_tx_pkt_wr) +
sizeof(struct cpl_tx_pkt_core) +
sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1));
if (tso)
n += sizeof(struct cpl_tx_pkt_lso_core);
return (howmany(n, 16));
}
@ -4600,22 +4803,43 @@ txpkt_len16(u_int nsegs, u_int tso)
* request header.
*/
static inline u_int
txpkt_vm_len16(u_int nsegs, u_int tso)
txpkt_vm_len16(u_int nsegs, const u_int extra)
{
u_int n;
MPASS(nsegs > 0);
nsegs--; /* first segment is part of ulptx_sgl */
n = sizeof(struct fw_eth_tx_pkt_vm_wr) +
n = extra + sizeof(struct fw_eth_tx_pkt_vm_wr) +
sizeof(struct cpl_tx_pkt_core) +
sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1));
if (tso)
n += sizeof(struct cpl_tx_pkt_lso_core);
return (howmany(n, 16));
}
static inline void
calculate_mbuf_len16(struct adapter *sc, struct mbuf *m)
{
const int lso = sizeof(struct cpl_tx_pkt_lso_core);
const int tnl_lso = sizeof(struct cpl_tx_tnl_lso);
if (sc->flags & IS_VF) {
if (needs_tso(m))
set_mbuf_len16(m, txpkt_vm_len16(mbuf_nsegs(m), lso));
else
set_mbuf_len16(m, txpkt_vm_len16(mbuf_nsegs(m), 0));
return;
}
if (needs_tso(m)) {
if (needs_vxlan_tso(m))
set_mbuf_len16(m, txpkt_len16(mbuf_nsegs(m), tnl_lso));
else
set_mbuf_len16(m, txpkt_len16(mbuf_nsegs(m), lso));
} else
set_mbuf_len16(m, txpkt_len16(mbuf_nsegs(m), 0));
}
/*
* len16 for a txpkts type 0 WR with a GL. Does not include the firmware work
* request header.
@ -4664,51 +4888,162 @@ static inline uint64_t
csum_to_ctrl(struct adapter *sc, struct mbuf *m)
{
uint64_t ctrl;
int csum_type;
int csum_type, l2hlen, l3hlen;
int x, y;
static const int csum_types[3][2] = {
{TX_CSUM_TCPIP, TX_CSUM_TCPIP6},
{TX_CSUM_UDPIP, TX_CSUM_UDPIP6},
{TX_CSUM_IP, 0}
};
M_ASSERTPKTHDR(m);
if (needs_hwcsum(m) == 0)
if (!needs_hwcsum(m))
return (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS);
ctrl = 0;
if (needs_l3_csum(m) == 0)
ctrl |= F_TXPKT_IPCSUM_DIS;
switch (m->m_pkthdr.csum_flags &
(CSUM_IP_TCP | CSUM_IP_UDP | CSUM_IP6_TCP | CSUM_IP6_UDP)) {
case CSUM_IP_TCP:
csum_type = TX_CSUM_TCPIP;
break;
case CSUM_IP_UDP:
csum_type = TX_CSUM_UDPIP;
break;
case CSUM_IP6_TCP:
csum_type = TX_CSUM_TCPIP6;
break;
case CSUM_IP6_UDP:
csum_type = TX_CSUM_UDPIP6;
break;
default:
/* needs_hwcsum told us that at least some hwcsum is needed. */
MPASS(ctrl == 0);
MPASS(m->m_pkthdr.csum_flags & CSUM_IP);
ctrl |= F_TXPKT_L4CSUM_DIS;
csum_type = TX_CSUM_IP;
break;
MPASS(m->m_pkthdr.l2hlen >= ETHER_HDR_LEN);
MPASS(m->m_pkthdr.l3hlen >= sizeof(struct ip));
if (needs_vxlan_csum(m)) {
MPASS(m->m_pkthdr.l4hlen > 0);
MPASS(m->m_pkthdr.l5hlen > 0);
MPASS(m->m_pkthdr.inner_l2hlen >= ETHER_HDR_LEN);
MPASS(m->m_pkthdr.inner_l3hlen >= sizeof(struct ip));
l2hlen = m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen +
m->m_pkthdr.l4hlen + m->m_pkthdr.l5hlen +
m->m_pkthdr.inner_l2hlen - ETHER_HDR_LEN;
l3hlen = m->m_pkthdr.inner_l3hlen;
} else {
l2hlen = m->m_pkthdr.l2hlen - ETHER_HDR_LEN;
l3hlen = m->m_pkthdr.l3hlen;
}
MPASS(m->m_pkthdr.l2hlen > 0);
MPASS(m->m_pkthdr.l3hlen > 0);
ctrl |= V_TXPKT_CSUM_TYPE(csum_type) |
V_TXPKT_IPHDR_LEN(m->m_pkthdr.l3hlen);
if (chip_id(sc) <= CHELSIO_T5)
ctrl |= V_TXPKT_ETHHDR_LEN(m->m_pkthdr.l2hlen - ETHER_HDR_LEN);
ctrl = 0;
if (!needs_l3_csum(m))
ctrl |= F_TXPKT_IPCSUM_DIS;
if (m->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_INNER_IP_TCP |
CSUM_IP6_TCP | CSUM_INNER_IP6_TCP))
x = 0; /* TCP */
else if (m->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_INNER_IP_UDP |
CSUM_IP6_UDP | CSUM_INNER_IP6_UDP))
x = 1; /* UDP */
else
ctrl |= V_T6_TXPKT_ETHHDR_LEN(m->m_pkthdr.l2hlen - ETHER_HDR_LEN);
x = 2;
if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP |
CSUM_INNER_IP | CSUM_INNER_IP_TCP | CSUM_INNER_IP_UDP))
y = 0; /* IPv4 */
else {
MPASS(m->m_pkthdr.csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP |
CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_UDP));
y = 1; /* IPv6 */
}
/*
* needs_hwcsum returned true earlier so there must be some kind of
* checksum to calculate.
*/
csum_type = csum_types[x][y];
MPASS(csum_type != 0);
if (csum_type == TX_CSUM_IP)
ctrl |= F_TXPKT_L4CSUM_DIS;
ctrl |= V_TXPKT_CSUM_TYPE(csum_type) | V_TXPKT_IPHDR_LEN(l3hlen);
if (chip_id(sc) <= CHELSIO_T5)
ctrl |= V_TXPKT_ETHHDR_LEN(l2hlen);
else
ctrl |= V_T6_TXPKT_ETHHDR_LEN(l2hlen);
return (ctrl);
}
static inline void *
write_lso_cpl(void *cpl, struct mbuf *m0)
{
struct cpl_tx_pkt_lso_core *lso;
uint32_t ctrl;
KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 &&
m0->m_pkthdr.l4hlen > 0,
("%s: mbuf %p needs TSO but missing header lengths",
__func__, m0));
ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) |
F_LSO_FIRST_SLICE | F_LSO_LAST_SLICE |
V_LSO_ETHHDR_LEN((m0->m_pkthdr.l2hlen - ETHER_HDR_LEN) >> 2) |
V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) |
V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2);
if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr))
ctrl |= F_LSO_IPV6;
lso = cpl;
lso->lso_ctrl = htobe32(ctrl);
lso->ipid_ofst = htobe16(0);
lso->mss = htobe16(m0->m_pkthdr.tso_segsz);
lso->seqno_offset = htobe32(0);
lso->len = htobe32(m0->m_pkthdr.len);
return (lso + 1);
}
static void *
write_tnl_lso_cpl(void *cpl, struct mbuf *m0)
{
struct cpl_tx_tnl_lso *tnl_lso = cpl;
uint32_t ctrl;
KASSERT(m0->m_pkthdr.inner_l2hlen > 0 &&
m0->m_pkthdr.inner_l3hlen > 0 && m0->m_pkthdr.inner_l4hlen > 0 &&
m0->m_pkthdr.inner_l5hlen > 0,
("%s: mbuf %p needs VXLAN_TSO but missing inner header lengths",
__func__, m0));
KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 &&
m0->m_pkthdr.l4hlen > 0 && m0->m_pkthdr.l5hlen > 0,
("%s: mbuf %p needs VXLAN_TSO but missing outer header lengths",
__func__, m0));
/* Outer headers. */
ctrl = V_CPL_TX_TNL_LSO_OPCODE(CPL_TX_TNL_LSO) |
F_CPL_TX_TNL_LSO_FIRST | F_CPL_TX_TNL_LSO_LAST |
V_CPL_TX_TNL_LSO_ETHHDRLENOUT(
(m0->m_pkthdr.l2hlen - ETHER_HDR_LEN) >> 2) |
V_CPL_TX_TNL_LSO_IPHDRLENOUT(m0->m_pkthdr.l3hlen >> 2) |
F_CPL_TX_TNL_LSO_IPLENSETOUT;
if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr))
ctrl |= F_CPL_TX_TNL_LSO_IPV6OUT;
else {
ctrl |= F_CPL_TX_TNL_LSO_IPHDRCHKOUT |
F_CPL_TX_TNL_LSO_IPIDINCOUT;
}
tnl_lso->op_to_IpIdSplitOut = htobe32(ctrl);
tnl_lso->IpIdOffsetOut = 0;
tnl_lso->UdpLenSetOut_to_TnlHdrLen =
htobe16(F_CPL_TX_TNL_LSO_UDPCHKCLROUT |
F_CPL_TX_TNL_LSO_UDPLENSETOUT |
V_CPL_TX_TNL_LSO_TNLHDRLEN(m0->m_pkthdr.l2hlen +
m0->m_pkthdr.l3hlen + m0->m_pkthdr.l4hlen +
m0->m_pkthdr.l5hlen) |
V_CPL_TX_TNL_LSO_TNLTYPE(TX_TNL_TYPE_VXLAN));
tnl_lso->r1 = 0;
/* Inner headers. */
ctrl = V_CPL_TX_TNL_LSO_ETHHDRLEN(
(m0->m_pkthdr.inner_l2hlen - ETHER_HDR_LEN) >> 2) |
V_CPL_TX_TNL_LSO_IPHDRLEN(m0->m_pkthdr.inner_l3hlen >> 2) |
V_CPL_TX_TNL_LSO_TCPHDRLEN(m0->m_pkthdr.inner_l4hlen >> 2);
if (m0->m_pkthdr.inner_l3hlen == sizeof(struct ip6_hdr))
ctrl |= F_CPL_TX_TNL_LSO_IPV6;
tnl_lso->Flow_to_TcpHdrLen = htobe32(ctrl);
tnl_lso->IpIdOffset = 0;
tnl_lso->IpIdSplit_to_Mss =
htobe16(V_CPL_TX_TNL_LSO_MSS(m0->m_pkthdr.tso_segsz));
tnl_lso->TCPSeqOffset = 0;
tnl_lso->EthLenOffset_Size =
htobe32(V_CPL_TX_TNL_LSO_SIZE(m0->m_pkthdr.len));
return (tnl_lso + 1);
}
#define VM_TX_L2HDR_LEN 16 /* ethmacdst to vlantci */
/*
@ -4762,29 +5097,7 @@ write_txpkt_vm_wr(struct adapter *sc, struct sge_txq *txq, struct mbuf *m0)
m_copydata(m0, 0, VM_TX_L2HDR_LEN, wr->ethmacdst);
if (needs_tso(m0)) {
struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1);
KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 &&
m0->m_pkthdr.l4hlen > 0,
("%s: mbuf %p needs TSO but missing header lengths",
__func__, m0));
ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE |
F_LSO_LAST_SLICE | V_LSO_ETHHDR_LEN((m0->m_pkthdr.l2hlen -
ETHER_HDR_LEN) >> 2) |
V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) |
V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2);
if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr))
ctrl |= F_LSO_IPV6;
lso->lso_ctrl = htobe32(ctrl);
lso->ipid_ofst = htobe16(0);
lso->mss = htobe16(m0->m_pkthdr.tso_segsz);
lso->seqno_offset = htobe32(0);
lso->len = htobe32(pktlen);
cpl = (void *)(lso + 1);
cpl = write_lso_cpl(wr + 1, m0);
txq->tso_wrs++;
} else
cpl = (void *)(wr + 1);
@ -4892,9 +5205,12 @@ write_txpkt_wr(struct adapter *sc, struct sge_txq *txq, struct mbuf *m0,
nsegs = mbuf_nsegs(m0);
pktlen = m0->m_pkthdr.len;
ctrl = sizeof(struct cpl_tx_pkt_core);
if (needs_tso(m0))
ctrl += sizeof(struct cpl_tx_pkt_lso_core);
else if (!(mbuf_cflags(m0) & MC_NOMAP) && pktlen <= imm_payload(2) &&
if (needs_tso(m0)) {
if (needs_vxlan_tso(m0))
ctrl += sizeof(struct cpl_tx_tnl_lso);
else
ctrl += sizeof(struct cpl_tx_pkt_lso_core);
} else if (!(mbuf_cflags(m0) & MC_NOMAP) && pktlen <= imm_payload(2) &&
available >= 2) {
/* Immediate data. Recalculate len16 and set nsegs to 0. */
ctrl += pktlen;
@ -4916,41 +5232,30 @@ write_txpkt_wr(struct adapter *sc, struct sge_txq *txq, struct mbuf *m0,
wr->r3 = 0;
if (needs_tso(m0)) {
struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1);
KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 &&
m0->m_pkthdr.l4hlen > 0,
("%s: mbuf %p needs TSO but missing header lengths",
__func__, m0));
ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE |
F_LSO_LAST_SLICE | V_LSO_ETHHDR_LEN((m0->m_pkthdr.l2hlen -
ETHER_HDR_LEN) >> 2) |
V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) |
V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2);
if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr))
ctrl |= F_LSO_IPV6;
lso->lso_ctrl = htobe32(ctrl);
lso->ipid_ofst = htobe16(0);
lso->mss = htobe16(m0->m_pkthdr.tso_segsz);
lso->seqno_offset = htobe32(0);
lso->len = htobe32(pktlen);
cpl = (void *)(lso + 1);
txq->tso_wrs++;
if (needs_vxlan_tso(m0)) {
cpl = write_tnl_lso_cpl(wr + 1, m0);
txq->vxlan_tso_wrs++;
} else {
cpl = write_lso_cpl(wr + 1, m0);
txq->tso_wrs++;
}
} else
cpl = (void *)(wr + 1);
/* Checksum offload */
ctrl1 = csum_to_ctrl(sc, m0);
if (ctrl1 != (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS))
txq->txcsum++; /* some hardware assistance provided */
if (ctrl1 != (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS)) {
/* some hardware assistance provided */
if (needs_vxlan_csum(m0))
txq->vxlan_txcsum++;
else
txq->txcsum++;
}
/* VLAN tag insertion */
if (needs_vlan_insertion(m0)) {
ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag);
ctrl1 |= F_TXPKT_VLAN_VLD |
V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag);
txq->vlan_insertion++;
}
@ -4962,6 +5267,8 @@ write_txpkt_wr(struct adapter *sc, struct sge_txq *txq, struct mbuf *m0,
/* SGL */
dst = (void *)(cpl + 1);
if (__predict_false((uintptr_t)dst == (uintptr_t)&eq->desc[eq->sidx]))
dst = (caddr_t)&eq->desc[0];
if (nsegs > 0) {
write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx);
@ -5207,8 +5514,13 @@ write_txpkts_wr(struct adapter *sc, struct sge_txq *txq)
/* Checksum offload */
ctrl1 = csum_to_ctrl(sc, m);
if (ctrl1 != (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS))
txq->txcsum++; /* some hardware assistance provided */
if (ctrl1 != (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS)) {
/* some hardware assistance provided */
if (needs_vxlan_csum(m))
txq->vxlan_txcsum++;
else
txq->txcsum++;
}
/* VLAN tag insertion */
if (needs_vlan_insertion(m)) {
@ -5967,7 +6279,7 @@ write_ethofld_wr(struct cxgbe_rate_tag *cst, struct fw_eth_tx_eo_wr *wr,
wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(len16) |
V_FW_WR_FLOWID(cst->etid));
wr->r3 = 0;
if (needs_udp_csum(m0)) {
if (needs_outer_udp_csum(m0)) {
wr->u.udpseg.type = FW_ETH_TX_EO_TYPE_UDPSEG;
wr->u.udpseg.ethlen = m0->m_pkthdr.l2hlen;
wr->u.udpseg.iplen = htobe16(m0->m_pkthdr.l3hlen);
@ -5979,7 +6291,7 @@ write_ethofld_wr(struct cxgbe_rate_tag *cst, struct fw_eth_tx_eo_wr *wr,
wr->u.udpseg.plen = htobe32(pktlen - immhdrs);
cpl = (void *)(wr + 1);
} else {
MPASS(needs_tcp_csum(m0));
MPASS(needs_outer_tcp_csum(m0));
wr->u.tcpseg.type = FW_ETH_TX_EO_TYPE_TCPSEG;
wr->u.tcpseg.ethlen = m0->m_pkthdr.l2hlen;
wr->u.tcpseg.iplen = htobe16(m0->m_pkthdr.l3hlen);
@ -6016,7 +6328,7 @@ write_ethofld_wr(struct cxgbe_rate_tag *cst, struct fw_eth_tx_eo_wr *wr,
}
/* Checksum offload must be requested for ethofld. */
MPASS(needs_l4_csum(m0));
MPASS(needs_outer_l4_csum(m0));
ctrl1 = csum_to_ctrl(cst->adapter, m0);
/* VLAN tag insertion */