freebsd-dev/sys/dev/e1000/igb_txrx.c
Vincenzo Maffione f7926a6d0c net: iflib: fix vlan processing in the drivers
The logic that sets iri_vtag and M_VLANTAG does not handle the
case where the 802.11q VLAN tag is 0. Fix this issue across
the iflib drivers. While there, also improve and align the
VLAN tag check extraction, by moving it outside the RX descriptor
loop, eliminating a local variable and additional checks.

PR:             260068
Reviewed by:    kbowling, gallatin
Reported by:	erj
MFC after:      1 month
Differential Revision:  https://reviews.freebsd.org/D33156
2021-12-28 11:11:41 +00:00

572 lines
16 KiB
C

/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2016 Matthew Macy <mmacy@mattmacy.io>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/* $FreeBSD$ */
#include "if_em.h"
#ifdef RSS
#include <net/rss_config.h>
#include <netinet/in_rss.h>
#endif
#ifdef VERBOSE_DEBUG
#define DPRINTF device_printf
#else
#define DPRINTF(...)
#endif
/*********************************************************************
* Local Function prototypes
*********************************************************************/
static int igb_isc_txd_encap(void *arg, if_pkt_info_t pi);
static void igb_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx);
static int igb_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear);
static void igb_isc_rxd_refill(void *arg, if_rxd_update_t iru);
static void igb_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused,
qidx_t pidx);
static int igb_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx,
qidx_t budget);
static int igb_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri);
static int igb_tx_ctx_setup(struct tx_ring *txr, if_pkt_info_t pi,
uint32_t *cmd_type_len, uint32_t *olinfo_status);
static int igb_tso_setup(struct tx_ring *txr, if_pkt_info_t pi,
uint32_t *cmd_type_len, uint32_t *olinfo_status);
static void igb_rx_checksum(uint32_t staterr, if_rxd_info_t ri, uint32_t ptype);
static int igb_determine_rsstype(uint16_t pkt_info);
extern void igb_if_enable_intr(if_ctx_t ctx);
extern int em_intr(void *arg);
struct if_txrx igb_txrx = {
.ift_txd_encap = igb_isc_txd_encap,
.ift_txd_flush = igb_isc_txd_flush,
.ift_txd_credits_update = igb_isc_txd_credits_update,
.ift_rxd_available = igb_isc_rxd_available,
.ift_rxd_pkt_get = igb_isc_rxd_pkt_get,
.ift_rxd_refill = igb_isc_rxd_refill,
.ift_rxd_flush = igb_isc_rxd_flush,
.ift_legacy_intr = em_intr
};
/**********************************************************************
*
* Setup work for hardware segmentation offload (TSO) on
* adapters using advanced tx descriptors
*
**********************************************************************/
static int
igb_tso_setup(struct tx_ring *txr, if_pkt_info_t pi, uint32_t *cmd_type_len,
uint32_t *olinfo_status)
{
struct e1000_adv_tx_context_desc *TXD;
struct e1000_softc *sc = txr->sc;
uint32_t type_tucmd_mlhl = 0, vlan_macip_lens = 0;
uint32_t mss_l4len_idx = 0;
uint32_t paylen;
switch(pi->ipi_etype) {
case ETHERTYPE_IPV6:
type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
break;
case ETHERTYPE_IP:
type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
/* Tell transmit desc to also do IPv4 checksum. */
*olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
break;
default:
panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
__func__, ntohs(pi->ipi_etype));
break;
}
TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[pi->ipi_pidx];
/* This is used in the transmit desc in encap */
paylen = pi->ipi_len - pi->ipi_ehdrlen - pi->ipi_ip_hlen - pi->ipi_tcp_hlen;
/* VLAN MACLEN IPLEN */
if (pi->ipi_mflags & M_VLANTAG) {
vlan_macip_lens |= (pi->ipi_vtag << E1000_ADVTXD_VLAN_SHIFT);
}
vlan_macip_lens |= pi->ipi_ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
vlan_macip_lens |= pi->ipi_ip_hlen;
TXD->vlan_macip_lens = htole32(vlan_macip_lens);
/* ADV DTYPE TUCMD */
type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
/* MSS L4LEN IDX */
mss_l4len_idx |= (pi->ipi_tso_segsz << E1000_ADVTXD_MSS_SHIFT);
mss_l4len_idx |= (pi->ipi_tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
/* 82575 needs the queue index added */
if (sc->hw.mac.type == e1000_82575)
mss_l4len_idx |= txr->me << 4;
TXD->mss_l4len_idx = htole32(mss_l4len_idx);
TXD->u.seqnum_seed = htole32(0);
*cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
*olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
return (1);
}
/*********************************************************************
*
* Advanced Context Descriptor setup for VLAN, CSUM or TSO
*
**********************************************************************/
static int
igb_tx_ctx_setup(struct tx_ring *txr, if_pkt_info_t pi, uint32_t *cmd_type_len,
uint32_t *olinfo_status)
{
struct e1000_adv_tx_context_desc *TXD;
struct e1000_softc *sc = txr->sc;
uint32_t vlan_macip_lens, type_tucmd_mlhl;
uint32_t mss_l4len_idx;
mss_l4len_idx = vlan_macip_lens = type_tucmd_mlhl = 0;
/* First check if TSO is to be used */
if (pi->ipi_csum_flags & CSUM_TSO)
return (igb_tso_setup(txr, pi, cmd_type_len, olinfo_status));
/* Indicate the whole packet as payload when not doing TSO */
*olinfo_status |= pi->ipi_len << E1000_ADVTXD_PAYLEN_SHIFT;
/* Now ready a context descriptor */
TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[pi->ipi_pidx];
/*
** In advanced descriptors the vlan tag must
** be placed into the context descriptor. Hence
** we need to make one even if not doing offloads.
*/
if (pi->ipi_mflags & M_VLANTAG) {
vlan_macip_lens |= (pi->ipi_vtag << E1000_ADVTXD_VLAN_SHIFT);
} else if ((pi->ipi_csum_flags & IGB_CSUM_OFFLOAD) == 0) {
return (0);
}
/* Set the ether header length */
vlan_macip_lens |= pi->ipi_ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
switch(pi->ipi_etype) {
case ETHERTYPE_IP:
type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
break;
case ETHERTYPE_IPV6:
type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
break;
default:
break;
}
vlan_macip_lens |= pi->ipi_ip_hlen;
type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
switch (pi->ipi_ipproto) {
case IPPROTO_TCP:
if (pi->ipi_csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) {
type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
}
break;
case IPPROTO_UDP:
if (pi->ipi_csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP)) {
type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
}
break;
case IPPROTO_SCTP:
if (pi->ipi_csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP)) {
type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
}
break;
default:
break;
}
/* 82575 needs the queue index added */
if (sc->hw.mac.type == e1000_82575)
mss_l4len_idx = txr->me << 4;
/* Now copy bits into descriptor */
TXD->vlan_macip_lens = htole32(vlan_macip_lens);
TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
TXD->u.seqnum_seed = htole32(0);
TXD->mss_l4len_idx = htole32(mss_l4len_idx);
return (1);
}
static int
igb_isc_txd_encap(void *arg, if_pkt_info_t pi)
{
struct e1000_softc *sc = arg;
if_softc_ctx_t scctx = sc->shared;
struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx];
struct tx_ring *txr = &que->txr;
int nsegs = pi->ipi_nsegs;
bus_dma_segment_t *segs = pi->ipi_segs;
union e1000_adv_tx_desc *txd = NULL;
int i, j, pidx_last;
uint32_t olinfo_status, cmd_type_len, txd_flags;
qidx_t ntxd;
pidx_last = olinfo_status = 0;
/* Basic descriptor defines */
cmd_type_len = (E1000_ADVTXD_DTYP_DATA |
E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT);
if (pi->ipi_mflags & M_VLANTAG)
cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
i = pi->ipi_pidx;
ntxd = scctx->isc_ntxd[0];
txd_flags = pi->ipi_flags & IPI_TX_INTR ? E1000_ADVTXD_DCMD_RS : 0;
/* Consume the first descriptor */
i += igb_tx_ctx_setup(txr, pi, &cmd_type_len, &olinfo_status);
if (i == scctx->isc_ntxd[0])
i = 0;
/* 82575 needs the queue index added */
if (sc->hw.mac.type == e1000_82575)
olinfo_status |= txr->me << 4;
for (j = 0; j < nsegs; j++) {
bus_size_t seglen;
bus_addr_t segaddr;
txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
seglen = segs[j].ds_len;
segaddr = htole64(segs[j].ds_addr);
txd->read.buffer_addr = segaddr;
txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS |
cmd_type_len | seglen);
txd->read.olinfo_status = htole32(olinfo_status);
pidx_last = i;
if (++i == scctx->isc_ntxd[0]) {
i = 0;
}
}
if (txd_flags) {
txr->tx_rsq[txr->tx_rs_pidx] = pidx_last;
txr->tx_rs_pidx = (txr->tx_rs_pidx+1) & (ntxd-1);
MPASS(txr->tx_rs_pidx != txr->tx_rs_cidx);
}
txd->read.cmd_type_len |= htole32(E1000_TXD_CMD_EOP | txd_flags);
pi->ipi_new_pidx = i;
return (0);
}
static void
igb_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx)
{
struct e1000_softc *sc = arg;
struct em_tx_queue *que = &sc->tx_queues[txqid];
struct tx_ring *txr = &que->txr;
E1000_WRITE_REG(&sc->hw, E1000_TDT(txr->me), pidx);
}
static int
igb_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear)
{
struct e1000_softc *sc = arg;
if_softc_ctx_t scctx = sc->shared;
struct em_tx_queue *que = &sc->tx_queues[txqid];
struct tx_ring *txr = &que->txr;
qidx_t processed = 0;
int updated;
qidx_t cur, prev, ntxd, rs_cidx;
int32_t delta;
uint8_t status;
rs_cidx = txr->tx_rs_cidx;
if (rs_cidx == txr->tx_rs_pidx)
return (0);
cur = txr->tx_rsq[rs_cidx];
status = ((union e1000_adv_tx_desc *)&txr->tx_base[cur])->wb.status;
updated = !!(status & E1000_TXD_STAT_DD);
if (!updated)
return (0);
/* If clear is false just let caller know that there
* are descriptors to reclaim */
if (!clear)
return (1);
prev = txr->tx_cidx_processed;
ntxd = scctx->isc_ntxd[0];
do {
MPASS(prev != cur);
delta = (int32_t)cur - (int32_t)prev;
if (delta < 0)
delta += ntxd;
MPASS(delta > 0);
processed += delta;
prev = cur;
rs_cidx = (rs_cidx + 1) & (ntxd-1);
if (rs_cidx == txr->tx_rs_pidx)
break;
cur = txr->tx_rsq[rs_cidx];
status = ((union e1000_adv_tx_desc *)&txr->tx_base[cur])->wb.status;
} while ((status & E1000_TXD_STAT_DD));
txr->tx_rs_cidx = rs_cidx;
txr->tx_cidx_processed = prev;
return (processed);
}
static void
igb_isc_rxd_refill(void *arg, if_rxd_update_t iru)
{
struct e1000_softc *sc = arg;
if_softc_ctx_t scctx = sc->shared;
uint16_t rxqid = iru->iru_qsidx;
struct em_rx_queue *que = &sc->rx_queues[rxqid];
union e1000_adv_rx_desc *rxd;
struct rx_ring *rxr = &que->rxr;
uint64_t *paddrs;
uint32_t next_pidx, pidx;
uint16_t count;
int i;
paddrs = iru->iru_paddrs;
pidx = iru->iru_pidx;
count = iru->iru_count;
for (i = 0, next_pidx = pidx; i < count; i++) {
rxd = (union e1000_adv_rx_desc *)&rxr->rx_base[next_pidx];
rxd->read.pkt_addr = htole64(paddrs[i]);
if (++next_pidx == scctx->isc_nrxd[0])
next_pidx = 0;
}
}
static void
igb_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, qidx_t pidx)
{
struct e1000_softc *sc = arg;
struct em_rx_queue *que = &sc->rx_queues[rxqid];
struct rx_ring *rxr = &que->rxr;
E1000_WRITE_REG(&sc->hw, E1000_RDT(rxr->me), pidx);
}
static int
igb_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
{
struct e1000_softc *sc = arg;
if_softc_ctx_t scctx = sc->shared;
struct em_rx_queue *que = &sc->rx_queues[rxqid];
struct rx_ring *rxr = &que->rxr;
union e1000_adv_rx_desc *rxd;
uint32_t staterr = 0;
int cnt, i;
for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) {
rxd = (union e1000_adv_rx_desc *)&rxr->rx_base[i];
staterr = le32toh(rxd->wb.upper.status_error);
if ((staterr & E1000_RXD_STAT_DD) == 0)
break;
if (++i == scctx->isc_nrxd[0])
i = 0;
if (staterr & E1000_RXD_STAT_EOP)
cnt++;
}
return (cnt);
}
/****************************************************************
* Routine sends data which has been dma'ed into host memory
* to upper layer. Initialize ri structure.
*
* Returns 0 upon success, errno on failure
***************************************************************/
static int
igb_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
{
struct e1000_softc *sc = arg;
if_softc_ctx_t scctx = sc->shared;
struct em_rx_queue *que = &sc->rx_queues[ri->iri_qsidx];
struct rx_ring *rxr = &que->rxr;
union e1000_adv_rx_desc *rxd;
uint16_t pkt_info, len;
uint32_t ptype, staterr;
int i, cidx;
bool eop;
staterr = i = 0;
cidx = ri->iri_cidx;
do {
rxd = (union e1000_adv_rx_desc *)&rxr->rx_base[cidx];
staterr = le32toh(rxd->wb.upper.status_error);
pkt_info = le16toh(rxd->wb.lower.lo_dword.hs_rss.pkt_info);
MPASS ((staterr & E1000_RXD_STAT_DD) != 0);
len = le16toh(rxd->wb.upper.length);
ptype = le32toh(rxd->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
ri->iri_len += len;
rxr->rx_bytes += ri->iri_len;
rxd->wb.upper.status_error = 0;
eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
/* Make sure bad packets are discarded */
if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) {
sc->dropped_pkts++;
++rxr->rx_discarded;
return (EBADMSG);
}
ri->iri_frags[i].irf_flid = 0;
ri->iri_frags[i].irf_idx = cidx;
ri->iri_frags[i].irf_len = len;
if (++cidx == scctx->isc_nrxd[0])
cidx = 0;
#ifdef notyet
if (rxr->hdr_split == true) {
ri->iri_frags[i].irf_flid = 1;
ri->iri_frags[i].irf_idx = cidx;
if (++cidx == scctx->isc_nrxd[0])
cidx = 0;
}
#endif
i++;
} while (!eop);
rxr->rx_packets++;
if ((scctx->isc_capenable & IFCAP_RXCSUM) != 0)
igb_rx_checksum(staterr, ri, ptype);
if (staterr & E1000_RXD_STAT_VP) {
if (((sc->hw.mac.type == e1000_i350) ||
(sc->hw.mac.type == e1000_i354)) &&
(staterr & E1000_RXDEXT_STATERR_LB))
ri->iri_vtag = be16toh(rxd->wb.upper.vlan);
else
ri->iri_vtag = le16toh(rxd->wb.upper.vlan);
ri->iri_flags |= M_VLANTAG;
}
ri->iri_flowid =
le32toh(rxd->wb.lower.hi_dword.rss);
ri->iri_rsstype = igb_determine_rsstype(pkt_info);
ri->iri_nfrags = i;
return (0);
}
/*********************************************************************
*
* Verify that the hardware indicated that the checksum is valid.
* Inform the stack about the status of checksum so that stack
* doesn't spend time verifying the checksum.
*
*********************************************************************/
static void
igb_rx_checksum(uint32_t staterr, if_rxd_info_t ri, uint32_t ptype)
{
uint16_t status = (uint16_t)staterr;
uint8_t errors = (uint8_t)(staterr >> 24);
if (__predict_false(status & E1000_RXD_STAT_IXSM))
return;
/* If there is a layer 3 or 4 error we are done */
if (__predict_false(errors & (E1000_RXD_ERR_IPE | E1000_RXD_ERR_TCPE)))
return;
/* IP Checksum Good */
if (status & E1000_RXD_STAT_IPCS)
ri->iri_csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
/* Valid L4E checksum */
if (__predict_true(status &
(E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))) {
/* SCTP header present */
if (__predict_false((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
(ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)) {
ri->iri_csum_flags |= CSUM_SCTP_VALID;
} else {
ri->iri_csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
ri->iri_csum_data = htons(0xffff);
}
}
}
/********************************************************************
*
* Parse the packet type to determine the appropriate hash
*
******************************************************************/
static int
igb_determine_rsstype(uint16_t pkt_info)
{
switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
case E1000_RXDADV_RSSTYPE_IPV4_TCP:
return M_HASHTYPE_RSS_TCP_IPV4;
case E1000_RXDADV_RSSTYPE_IPV4:
return M_HASHTYPE_RSS_IPV4;
case E1000_RXDADV_RSSTYPE_IPV6_TCP:
return M_HASHTYPE_RSS_TCP_IPV6;
case E1000_RXDADV_RSSTYPE_IPV6_EX:
return M_HASHTYPE_RSS_IPV6_EX;
case E1000_RXDADV_RSSTYPE_IPV6:
return M_HASHTYPE_RSS_IPV6;
case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
return M_HASHTYPE_RSS_TCP_IPV6_EX;
default:
return M_HASHTYPE_OPAQUE;
}
}