freebsd-nq/sys/dev/i40e/i40e_txrx.c
2014-06-06 11:12:19 +00:00

1502 lines
39 KiB
C
Executable File

/******************************************************************************
Copyright (c) 2013-2014, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
******************************************************************************/
/*$FreeBSD$*/
/*
** I40E driver TX/RX Routines:
** This was seperated to allow usage by
** both the BASE and the VF drivers.
*/
#include "opt_inet.h"
#include "opt_inet6.h"
#include "i40e.h"
/* Local Prototypes */
static void i40e_rx_checksum(struct mbuf *, u32, u32, u32);
static void i40e_refresh_mbufs(struct i40e_queue *, int);
static int i40e_xmit(struct i40e_queue *, struct mbuf **);
static int i40e_tx_setup_offload(struct i40e_queue *,
struct mbuf *, u32 *, u32 *);
static bool i40e_tso_setup(struct i40e_queue *, struct mbuf *);
static __inline void i40e_rx_discard(struct rx_ring *, int);
static __inline void i40e_rx_input(struct rx_ring *, struct ifnet *,
struct mbuf *, u32);
/*
** Multiqueue Transmit driver
**
*/
int
i40e_mq_start(struct ifnet *ifp, struct mbuf *m)
{
struct i40e_vsi *vsi = ifp->if_softc;
struct i40e_queue *que;
struct tx_ring *txr;
int err, i;
/* Which queue to use */
if ((m->m_flags & M_FLOWID) != 0)
i = m->m_pkthdr.flowid % vsi->num_queues;
else
i = curcpu % vsi->num_queues;
que = &vsi->queues[i];
txr = &que->txr;
err = drbr_enqueue(ifp, txr->br, m);
if (err)
return(err);
if (I40E_TX_TRYLOCK(txr)) {
err = i40e_mq_start_locked(ifp, txr);
I40E_TX_UNLOCK(txr);
} else
taskqueue_enqueue(que->tq, &que->tx_task);
return (err);
}
int
i40e_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
{
struct i40e_queue *que = txr->que;
struct i40e_vsi *vsi = que->vsi;
struct mbuf *next;
int err = 0;
if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
vsi->link_active == 0)
return (ENETDOWN);
/* Process the transmit queue */
while ((next = drbr_peek(ifp, txr->br)) != NULL) {
if ((err = i40e_xmit(que, &next)) != 0) {
if (next == NULL)
drbr_advance(ifp, txr->br);
else
drbr_putback(ifp, txr->br, next);
break;
}
drbr_advance(ifp, txr->br);
/* Send a copy of the frame to the BPF listener */
ETHER_BPF_MTAP(ifp, next);
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
break;
}
if (txr->avail < I40E_TX_CLEANUP_THRESHOLD)
i40e_txeof(que);
return (err);
}
/*
* Called from a taskqueue to drain queued transmit packets.
*/
void
i40e_deferred_mq_start(void *arg, int pending)
{
struct i40e_queue *que = arg;
struct tx_ring *txr = &que->txr;
struct i40e_vsi *vsi = que->vsi;
struct ifnet *ifp = vsi->ifp;
I40E_TX_LOCK(txr);
if (!drbr_empty(ifp, txr->br))
i40e_mq_start_locked(ifp, txr);
I40E_TX_UNLOCK(txr);
}
/*
** Flush all queue ring buffers
*/
void
i40e_qflush(struct ifnet *ifp)
{
struct i40e_vsi *vsi = ifp->if_softc;
for (int i = 0; i < vsi->num_queues; i++) {
struct i40e_queue *que = &vsi->queues[i];
struct tx_ring *txr = &que->txr;
struct mbuf *m;
I40E_TX_LOCK(txr);
while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
m_freem(m);
I40E_TX_UNLOCK(txr);
}
if_qflush(ifp);
}
/*********************************************************************
*
* This routine maps the mbufs to tx descriptors, allowing the
* TX engine to transmit the packets.
* - return 0 on success, positive on failure
*
**********************************************************************/
#define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
static int
i40e_xmit(struct i40e_queue *que, struct mbuf **m_headp)
{
struct i40e_vsi *vsi = que->vsi;
struct i40e_hw *hw = vsi->hw;
struct tx_ring *txr = &que->txr;
struct i40e_tx_buf *buf;
struct i40e_tx_desc *txd = NULL;
struct mbuf *m_head;
int i, j, error, nsegs;
int first, last = 0;
u16 vtag = 0;
u32 cmd, off;
bus_dmamap_t map;
bus_dma_segment_t segs[I40E_MAX_SEGS];
cmd = off = 0;
m_head = *m_headp;
/* Grab the VLAN tag */
if (m_head->m_flags & M_VLANTAG)
vtag = htole16(m_head->m_pkthdr.ether_vtag);
/*
* Important to capture the first descriptor
* used because it will contain the index of
* the one we tell the hardware to report back
*/
first = txr->next_avail;
buf = &txr->buffers[first];
map = buf->map;
/*
* Map the packet for DMA.
*/
error = bus_dmamap_load_mbuf_sg(txr->tag, map,
*m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
if (error == EFBIG) {
struct mbuf *m;
m = m_defrag(*m_headp, M_NOWAIT);
if (m == NULL) {
que->mbuf_defrag_failed++;
m_freem(*m_headp);
*m_headp = NULL;
return (ENOBUFS);
}
*m_headp = m;
/* Try it again */
error = bus_dmamap_load_mbuf_sg(txr->tag, map,
*m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
if (error == ENOMEM) {
que->tx_dma_setup++;
return (error);
} else if (error != 0) {
que->tx_dma_setup++;
m_freem(*m_headp);
*m_headp = NULL;
return (error);
}
} else if (error == ENOMEM) {
que->tx_dma_setup++;
return (error);
} else if (error != 0) {
que->tx_dma_setup++;
m_freem(*m_headp);
*m_headp = NULL;
return (error);
}
/* Make certain there are enough descriptors */
if (nsegs > txr->avail - 2) {
txr->no_desc++;
error = ENOBUFS;
goto xmit_fail;
}
m_head = *m_headp;
/* Set up the TSO/CSUM offload */
error = i40e_tx_setup_offload(que, m_head, &cmd, &off);
if (error)
goto xmit_fail;
cmd |= I40E_TX_DESC_CMD_ICRC;
/* Add vlan tag to each descriptor */
if (m_head->m_flags & M_VLANTAG)
cmd |= I40E_TX_DESC_CMD_IL2TAG1;
i = txr->next_avail;
for (j = 0; j < nsegs; j++) {
bus_size_t seglen;
buf = &txr->buffers[i];
txd = &txr->base[i];
seglen = segs[j].ds_len;
txd->buffer_addr = htole64(segs[j].ds_addr);
txd->cmd_type_offset_bsz =
htole64(I40E_TX_DESC_DTYPE_DATA
| ((u64)cmd << I40E_TXD_QW1_CMD_SHIFT)
| ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT)
| ((u64)seglen << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
| ((u64)vtag << I40E_TXD_QW1_L2TAG1_SHIFT));
last = i; /* descriptor that will get completion IRQ */
if (++i == que->num_desc)
i = 0;
buf->m_head = NULL;
buf->eop_index = -1;
}
/* Set the last descriptor for report */
txd->cmd_type_offset_bsz |=
htole64(((u64)I40E_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT));
txr->avail -= nsegs;
txr->next_avail = i;
buf->m_head = m_head;
/* Swap the dma map between the first and last descriptor */
txr->buffers[first].map = buf->map;
buf->map = map;
bus_dmamap_sync(txr->tag, map, BUS_DMASYNC_PREWRITE);
/* Set the index of the descriptor that will be marked done */
buf = &txr->buffers[first];
buf->eop_index = last;
bus_dmamap_sync(txr->dma.tag, txr->dma.map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/*
* Advance the Transmit Descriptor Tail (Tdt), this tells the
* hardware that this frame is available to transmit.
*/
++txr->total_packets;
wr32(hw, I40E_QTX_TAIL(que->me), i);
i40e_flush(hw);
/* Mark outstanding work */
if (que->busy == 0)
que->busy = 1;
return (0);
xmit_fail:
bus_dmamap_unload(txr->tag, buf->map);
return (error);
}
/*********************************************************************
*
* Allocate memory for tx_buffer structures. The tx_buffer stores all
* the information needed to transmit a packet on the wire. This is
* called only once at attach, setup is done every reset.
*
**********************************************************************/
int
i40e_allocate_tx_data(struct i40e_queue *que)
{
struct tx_ring *txr = &que->txr;
struct i40e_vsi *vsi = que->vsi;
device_t dev = vsi->dev;
struct i40e_tx_buf *buf;
int error = 0;
/*
* Setup DMA descriptor areas.
*/
if ((error = bus_dma_tag_create(NULL, /* parent */
1, 0, /* alignment, bounds */
BUS_SPACE_MAXADDR, /* lowaddr */
BUS_SPACE_MAXADDR, /* highaddr */
NULL, NULL, /* filter, filterarg */
I40E_TSO_SIZE, /* maxsize */
32,
PAGE_SIZE, /* maxsegsize */
0, /* flags */
NULL, /* lockfunc */
NULL, /* lockfuncarg */
&txr->tag))) {
device_printf(dev,"Unable to allocate TX DMA tag\n");
goto fail;
}
if (!(txr->buffers =
(struct i40e_tx_buf *) malloc(sizeof(struct i40e_tx_buf) *
que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
device_printf(dev, "Unable to allocate tx_buffer memory\n");
error = ENOMEM;
goto fail;
}
/* Create the descriptor buffer dma maps */
buf = txr->buffers;
for (int i = 0; i < que->num_desc; i++, buf++) {
error = bus_dmamap_create(txr->tag, 0, &buf->map);
if (error != 0) {
device_printf(dev, "Unable to create TX DMA map\n");
goto fail;
}
}
fail:
return (error);
}
/*********************************************************************
*
* (Re)Initialize a queue transmit ring.
* - called by init, it clears the descriptor ring,
* and frees any stale mbufs
*
**********************************************************************/
void
i40e_init_tx_ring(struct i40e_queue *que)
{
struct tx_ring *txr = &que->txr;
struct i40e_tx_buf *buf;
/* Clear the old ring contents */
I40E_TX_LOCK(txr);
bzero((void *)txr->base,
(sizeof(struct i40e_tx_desc)) * que->num_desc);
/* Reset indices */
txr->next_avail = 0;
txr->next_to_clean = 0;
#ifdef I40E_FDIR
/* Initialize flow director */
txr->atr_rate = i40e_atr_rate;
txr->atr_count = 0;
#endif
/* Free any existing tx mbufs. */
buf = txr->buffers;
for (int i = 0; i < que->num_desc; i++, buf++) {
if (buf->m_head != NULL) {
bus_dmamap_sync(txr->tag, buf->map,
BUS_DMASYNC_POSTWRITE);
bus_dmamap_unload(txr->tag, buf->map);
m_freem(buf->m_head);
buf->m_head = NULL;
}
/* Clear the EOP index */
buf->eop_index = -1;
}
/* Set number of descriptors available */
txr->avail = que->num_desc;
bus_dmamap_sync(txr->dma.tag, txr->dma.map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
I40E_TX_UNLOCK(txr);
}
/*********************************************************************
*
* Free transmit ring related data structures.
*
**********************************************************************/
void
i40e_free_que_tx(struct i40e_queue *que)
{
struct tx_ring *txr = &que->txr;
struct i40e_tx_buf *buf;
INIT_DEBUGOUT("i40e_free_que_tx: begin");
for (int i = 0; i < que->num_desc; i++) {
buf = &txr->buffers[i];
if (buf->m_head != NULL) {
bus_dmamap_sync(txr->tag, buf->map,
BUS_DMASYNC_POSTWRITE);
bus_dmamap_unload(txr->tag,
buf->map);
m_freem(buf->m_head);
buf->m_head = NULL;
if (buf->map != NULL) {
bus_dmamap_destroy(txr->tag,
buf->map);
buf->map = NULL;
}
} else if (buf->map != NULL) {
bus_dmamap_unload(txr->tag,
buf->map);
bus_dmamap_destroy(txr->tag,
buf->map);
buf->map = NULL;
}
}
if (txr->br != NULL)
buf_ring_free(txr->br, M_DEVBUF);
if (txr->buffers != NULL) {
free(txr->buffers, M_DEVBUF);
txr->buffers = NULL;
}
if (txr->tag != NULL) {
bus_dma_tag_destroy(txr->tag);
txr->tag = NULL;
}
return;
}
/*********************************************************************
*
* Setup descriptor for hw offloads
*
**********************************************************************/
static int
i40e_tx_setup_offload(struct i40e_queue *que,
struct mbuf *mp, u32 *cmd, u32 *off)
{
struct ether_vlan_header *eh;
struct ip *ip = NULL;
struct tcphdr *th = NULL;
struct ip6_hdr *ip6;
int elen, ip_hlen = 0, tcp_hlen;
u16 etype;
u8 ipproto = 0;
bool tso = FALSE;
/* Set up the TSO context descriptor if required */
if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
tso = i40e_tso_setup(que, mp);
if (tso)
++que->tso;
else
return (ENXIO);
}
/*
* Determine where frame payload starts.
* Jump over vlan headers if already present,
* helpful for QinQ too.
*/
eh = mtod(mp, struct ether_vlan_header *);
if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
etype = ntohs(eh->evl_proto);
elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
} else {
etype = ntohs(eh->evl_encap_proto);
elen = ETHER_HDR_LEN;
}
switch (etype) {
case ETHERTYPE_IP:
ip = (struct ip *)(mp->m_data + elen);
ip_hlen = ip->ip_hl << 2;
ipproto = ip->ip_p;
th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
/* The IP checksum must be recalculated with TSO */
if (tso)
*cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
else
*cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
break;
case ETHERTYPE_IPV6:
ip6 = (struct ip6_hdr *)(mp->m_data + elen);
ip_hlen = sizeof(struct ip6_hdr);
ipproto = ip6->ip6_nxt;
th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
*cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
/* Falls thru */
default:
break;
}
*off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
*off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
switch (ipproto) {
case IPPROTO_TCP:
tcp_hlen = th->th_off << 2;
if (mp->m_pkthdr.csum_flags & CSUM_TCP ||
mp->m_pkthdr.csum_flags & CSUM_TCP_IPV6) {
*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
*off |= (tcp_hlen >> 2) <<
I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
}
#ifdef I40E_FDIR
i40e_atr(que, th, etype);
#endif
break;
case IPPROTO_UDP:
if (mp->m_pkthdr.csum_flags & CSUM_UDP ||
mp->m_pkthdr.csum_flags & CSUM_UDP_IPV6) {
*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
*off |= (sizeof(struct udphdr) >> 2) <<
I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
}
break;
case IPPROTO_SCTP:
if (mp->m_pkthdr.csum_flags & CSUM_SCTP) {
*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
*off |= (sizeof(struct sctphdr) >> 2) <<
I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
}
/* Fall Thru */
default:
break;
}
return (0);
}
/**********************************************************************
*
* Setup context for hardware segmentation offload (TSO)
*
**********************************************************************/
static bool
i40e_tso_setup(struct i40e_queue *que, struct mbuf *mp)
{
struct tx_ring *txr = &que->txr;
struct i40e_tx_context_desc *TXD;
struct i40e_tx_buf *buf;
u32 cmd, mss, type, tsolen;
u16 etype;
int idx, elen, ip_hlen, tcp_hlen;
struct ether_vlan_header *eh;
struct ip *ip;
struct ip6_hdr *ip6;
struct tcphdr *th;
u64 type_cmd_tso_mss;
/*
* Determine where frame payload starts.
* Jump over vlan headers if already present
*/
eh = mtod(mp, struct ether_vlan_header *);
if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
etype = eh->evl_proto;
} else {
elen = ETHER_HDR_LEN;
etype = eh->evl_encap_proto;
}
switch (ntohs(etype)) {
#ifdef INET6
case ETHERTYPE_IPV6:
ip6 = (struct ip6_hdr *)(mp->m_data + elen);
if (ip6->ip6_nxt != IPPROTO_TCP)
return (ENXIO);
ip_hlen = sizeof(struct ip6_hdr);
th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
tcp_hlen = th->th_off << 2;
break;
#endif
#ifdef INET
case ETHERTYPE_IP:
ip = (struct ip *)(mp->m_data + elen);
if (ip->ip_p != IPPROTO_TCP)
return (ENXIO);
ip->ip_sum = 0;
ip_hlen = ip->ip_hl << 2;
th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
th->th_sum = in_pseudo(ip->ip_src.s_addr,
ip->ip_dst.s_addr, htons(IPPROTO_TCP));
tcp_hlen = th->th_off << 2;
break;
#endif
default:
panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
__func__, ntohs(etype));
break;
}
/* Ensure we have at least the IP+TCP header in the first mbuf. */
if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr))
return FALSE;
idx = txr->next_avail;
buf = &txr->buffers[idx];
TXD = (struct i40e_tx_context_desc *) &txr->base[idx];
tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen);
type = I40E_TX_DESC_DTYPE_CONTEXT;
cmd = I40E_TX_CTX_DESC_TSO;
mss = mp->m_pkthdr.tso_segsz;
type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) |
((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
TXD->tunneling_params = htole32(0);
buf->m_head = NULL;
buf->eop_index = -1;
if (++idx == que->num_desc)
idx = 0;
txr->avail--;
txr->next_avail = idx;
return TRUE;
}
/**********************************************************************
*
* Examine each tx_buffer in the used queue. If the hardware is done
* processing the packet then free associated resources. The
* tx_buffer is put back on the free queue.
*
**********************************************************************/
bool
i40e_txeof(struct i40e_queue *que)
{
struct i40e_vsi *vsi = que->vsi;
struct ifnet *ifp = vsi->ifp;
struct tx_ring *txr = &que->txr;
u32 first, last, done, processed;
struct i40e_tx_buf *buf;
struct i40e_tx_desc *tx_desc, *eop_desc;
mtx_assert(&txr->mtx, MA_OWNED);
/* These are not the descriptors you seek, move along :) */
if (txr->avail == que->num_desc) {
que->busy = 0;
return FALSE;
}
processed = 0;
first = txr->next_to_clean;
buf = &txr->buffers[first];
tx_desc = (struct i40e_tx_desc *)&txr->base[first];
last = buf->eop_index;
if (last == -1)
return FALSE;
eop_desc = (struct i40e_tx_desc *)&txr->base[last];
/*
** Get the index of the first descriptor
** BEYOND the EOP and call that 'done'.
** I do this so the comparison in the
** inner while loop below can be simple
*/
if (++last == que->num_desc) last = 0;
done = last;
bus_dmamap_sync(txr->dma.tag, txr->dma.map,
BUS_DMASYNC_POSTREAD);
/*
** Only the EOP descriptor of a packet now has the DD
** bit set, this is what we look for...
*/
while (eop_desc->cmd_type_offset_bsz &
htole32(I40E_TX_DESC_DTYPE_DESC_DONE)) {
/* We clean the range of the packet */
while (first != done) {
tx_desc->cmd_type_offset_bsz &=
~I40E_TXD_QW1_DTYPE_MASK;
++txr->avail;
++processed;
if (buf->m_head) {
txr->bytes +=
buf->m_head->m_pkthdr.len;
bus_dmamap_sync(txr->tag,
buf->map,
BUS_DMASYNC_POSTWRITE);
bus_dmamap_unload(txr->tag,
buf->map);
m_freem(buf->m_head);
buf->m_head = NULL;
buf->map = NULL;
}
buf->eop_index = -1;
if (++first == que->num_desc)
first = 0;
buf = &txr->buffers[first];
tx_desc = &txr->base[first];
}
++txr->packets;
++ifp->if_opackets;
/* See if there is more work now */
last = buf->eop_index;
if (last != -1) {
eop_desc = &txr->base[last];
/* Get next done point */
if (++last == que->num_desc) last = 0;
done = last;
} else
break;
}
bus_dmamap_sync(txr->dma.tag, txr->dma.map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
txr->next_to_clean = first;
/*
** Hang detection, we know there's
** work outstanding or the first return
** would have been taken, so indicate an
** unsuccessful pass, in local_timer if
** the value is too great the queue will
** be considered hung. If anything has been
** cleaned then reset the state.
*/
if (!processed)
++que->busy;
else
que->busy = 1;
/*
* If there are no pending descriptors, clear the timeout.
*/
if (txr->avail == que->num_desc) {
que->busy = 0;
return FALSE;
}
return TRUE;
}
/*********************************************************************
*
* Refresh mbuf buffers for RX descriptor rings
* - now keeps its own state so discards due to resource
* exhaustion are unnecessary, if an mbuf cannot be obtained
* it just returns, keeping its placeholder, thus it can simply
* be recalled to try again.
*
**********************************************************************/
static void
i40e_refresh_mbufs(struct i40e_queue *que, int limit)
{
struct i40e_vsi *vsi = que->vsi;
struct rx_ring *rxr = &que->rxr;
bus_dma_segment_t hseg[1];
bus_dma_segment_t pseg[1];
struct i40e_rx_buf *buf;
struct mbuf *mh, *mp;
int i, j, nsegs, error;
bool refreshed = FALSE;
i = j = rxr->next_refresh;
/* Control the loop with one beyond */
if (++j == que->num_desc)
j = 0;
while (j != limit) {
buf = &rxr->buffers[i];
if (rxr->hdr_split == FALSE)
goto no_split;
if (buf->m_head == NULL) {
mh = m_gethdr(M_NOWAIT, MT_DATA);
if (mh == NULL)
goto update;
} else
mh = buf->m_head;
mh->m_pkthdr.len = mh->m_len = MHLEN;
mh->m_len = MHLEN;
mh->m_flags |= M_PKTHDR;
/* Get the memory mapping */
error = bus_dmamap_load_mbuf_sg(rxr->htag,
buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
if (error != 0) {
printf("Refresh mbufs: hdr dmamap load"
" failure - %d\n", error);
m_free(mh);
buf->m_head = NULL;
goto update;
}
buf->m_head = mh;
bus_dmamap_sync(rxr->htag, buf->hmap,
BUS_DMASYNC_PREREAD);
rxr->base[i].read.hdr_addr =
htole64(hseg[0].ds_addr);
no_split:
if (buf->m_pack == NULL) {
mp = m_getjcl(M_NOWAIT, MT_DATA,
M_PKTHDR, rxr->mbuf_sz);
if (mp == NULL)
goto update;
} else
mp = buf->m_pack;
mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
/* Get the memory mapping */
error = bus_dmamap_load_mbuf_sg(rxr->ptag,
buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
if (error != 0) {
printf("Refresh mbufs: payload dmamap load"
" failure - %d\n", error);
m_free(mp);
buf->m_pack = NULL;
goto update;
}
buf->m_pack = mp;
bus_dmamap_sync(rxr->ptag, buf->pmap,
BUS_DMASYNC_PREREAD);
rxr->base[i].read.pkt_addr =
htole64(pseg[0].ds_addr);
rxr->base[i].read.hdr_addr = 0;
refreshed = TRUE;
/* Next is precalculated */
i = j;
rxr->next_refresh = i;
if (++j == que->num_desc)
j = 0;
}
update:
if (refreshed) /* Update hardware tail index */
wr32(vsi->hw, I40E_QRX_TAIL(que->me), rxr->next_refresh);
return;
}
/*********************************************************************
*
* Allocate memory for rx_buffer structures. Since we use one
* rx_buffer per descriptor, the maximum number of rx_buffer's
* that we'll need is equal to the number of receive descriptors
* that we've defined.
*
**********************************************************************/
int
i40e_allocate_rx_data(struct i40e_queue *que)
{
struct rx_ring *rxr = &que->rxr;
struct i40e_vsi *vsi = que->vsi;
device_t dev = vsi->dev;
struct i40e_rx_buf *buf;
int i, bsize, error;
bsize = sizeof(struct i40e_rx_buf) * que->num_desc;
if (!(rxr->buffers =
(struct i40e_rx_buf *) malloc(bsize,
M_DEVBUF, M_NOWAIT | M_ZERO))) {
device_printf(dev, "Unable to allocate rx_buffer memory\n");
error = ENOMEM;
return (error);
}
if ((error = bus_dma_tag_create(NULL, /* parent */
1, 0, /* alignment, bounds */
BUS_SPACE_MAXADDR, /* lowaddr */
BUS_SPACE_MAXADDR, /* highaddr */
NULL, NULL, /* filter, filterarg */
MSIZE, /* maxsize */
1, /* nsegments */
MSIZE, /* maxsegsize */
0, /* flags */
NULL, /* lockfunc */
NULL, /* lockfuncarg */
&rxr->htag))) {
device_printf(dev, "Unable to create RX DMA htag\n");
return (error);
}
if ((error = bus_dma_tag_create(NULL, /* parent */
1, 0, /* alignment, bounds */
BUS_SPACE_MAXADDR, /* lowaddr */
BUS_SPACE_MAXADDR, /* highaddr */
NULL, NULL, /* filter, filterarg */
MJUM16BYTES, /* maxsize */
1, /* nsegments */
MJUM16BYTES, /* maxsegsize */
0, /* flags */
NULL, /* lockfunc */
NULL, /* lockfuncarg */
&rxr->ptag))) {
device_printf(dev, "Unable to create RX DMA ptag\n");
return (error);
}
for (i = 0; i < que->num_desc; i++) {
buf = &rxr->buffers[i];
error = bus_dmamap_create(rxr->htag,
BUS_DMA_NOWAIT, &buf->hmap);
if (error) {
device_printf(dev, "Unable to create RX head map\n");
break;
}
error = bus_dmamap_create(rxr->ptag,
BUS_DMA_NOWAIT, &buf->pmap);
if (error) {
device_printf(dev, "Unable to create RX pkt map\n");
break;
}
}
return (error);
}
/*********************************************************************
*
* (Re)Initialize the queue receive ring and its buffers.
*
**********************************************************************/
int
i40e_init_rx_ring(struct i40e_queue *que)
{
struct i40e_vsi *vsi = que->vsi;
struct ifnet *ifp = vsi->ifp;
struct rx_ring *rxr = &que->rxr;
struct lro_ctrl *lro = &rxr->lro;
struct i40e_rx_buf *buf;
bus_dma_segment_t pseg[1], hseg[1];
int rsize, nsegs, error = 0;
I40E_RX_LOCK(rxr);
/* Clear the ring contents */
rsize = roundup2(que->num_desc *
sizeof(union i40e_rx_desc), DBA_ALIGN);
bzero((void *)rxr->base, rsize);
/* Cleanup any existing buffers */
for (int i = 0; i < que->num_desc; i++) {
buf = &rxr->buffers[i];
if (buf->m_head != NULL) {
bus_dmamap_sync(rxr->htag, buf->hmap,
BUS_DMASYNC_POSTREAD);
bus_dmamap_unload(rxr->htag, buf->hmap);
buf->m_head->m_flags |= M_PKTHDR;
m_freem(buf->m_head);
}
if (buf->m_pack != NULL) {
bus_dmamap_sync(rxr->ptag, buf->pmap,
BUS_DMASYNC_POSTREAD);
bus_dmamap_unload(rxr->ptag, buf->pmap);
buf->m_pack->m_flags |= M_PKTHDR;
m_freem(buf->m_pack);
}
buf->m_head = NULL;
buf->m_pack = NULL;
}
rxr->hdr_split = FALSE;
/* Now replenish the mbufs */
for (int j = 0; j != que->num_desc; ++j) {
struct mbuf *mh, *mp;
buf = &rxr->buffers[j];
/*
** Don't allocate mbufs if not
** doing header split, its wasteful
*/
if (rxr->hdr_split == FALSE)
goto skip_head;
/* First the header */
buf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
if (buf->m_head == NULL) {
error = ENOBUFS;
goto fail;
}
m_adj(buf->m_head, ETHER_ALIGN);
mh = buf->m_head;
mh->m_len = mh->m_pkthdr.len = MHLEN;
mh->m_flags |= M_PKTHDR;
/* Get the memory mapping */
error = bus_dmamap_load_mbuf_sg(rxr->htag,
buf->hmap, buf->m_head, hseg,
&nsegs, BUS_DMA_NOWAIT);
if (error != 0) /* Nothing elegant to do here */
goto fail;
bus_dmamap_sync(rxr->htag,
buf->hmap, BUS_DMASYNC_PREREAD);
/* Update descriptor */
rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
skip_head:
/* Now the payload cluster */
buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
M_PKTHDR, rxr->mbuf_sz);
if (buf->m_pack == NULL) {
error = ENOBUFS;
goto fail;
}
mp = buf->m_pack;
mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
/* Get the memory mapping */
error = bus_dmamap_load_mbuf_sg(rxr->ptag,
buf->pmap, mp, pseg,
&nsegs, BUS_DMA_NOWAIT);
if (error != 0)
goto fail;
bus_dmamap_sync(rxr->ptag,
buf->pmap, BUS_DMASYNC_PREREAD);
/* Update descriptor */
rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
rxr->base[j].read.hdr_addr = 0;
}
/* Setup our descriptor indices */
rxr->next_check = 0;
rxr->next_refresh = 0;
rxr->lro_enabled = FALSE;
rxr->split = 0;
rxr->bytes = 0;
rxr->discard = FALSE;
/*
** Now set up the LRO interface:
*/
if (ifp->if_capenable & IFCAP_LRO) {
int err = tcp_lro_init(lro);
if (err) {
printf("LRO Initialization failed!\n");
goto fail;
}
INIT_DEBUGOUT("RX Soft LRO Initialized\n");
rxr->lro_enabled = TRUE;
lro->ifp = vsi->ifp;
}
bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
fail:
I40E_RX_UNLOCK(rxr);
return (error);
}
/*********************************************************************
*
* Free station receive ring data structures
*
**********************************************************************/
void
i40e_free_que_rx(struct i40e_queue *que)
{
struct rx_ring *rxr = &que->rxr;
struct i40e_rx_buf *buf;
INIT_DEBUGOUT("free_que_rx: begin");
/* Cleanup any existing buffers */
if (rxr->buffers != NULL) {
for (int i = 0; i < que->num_desc; i++) {
buf = &rxr->buffers[i];
if (buf->m_head != NULL) {
bus_dmamap_sync(rxr->htag, buf->hmap,
BUS_DMASYNC_POSTREAD);
bus_dmamap_unload(rxr->htag, buf->hmap);
buf->m_head->m_flags |= M_PKTHDR;
m_freem(buf->m_head);
}
if (buf->m_pack != NULL) {
bus_dmamap_sync(rxr->ptag, buf->pmap,
BUS_DMASYNC_POSTREAD);
bus_dmamap_unload(rxr->ptag, buf->pmap);
buf->m_pack->m_flags |= M_PKTHDR;
m_freem(buf->m_pack);
}
buf->m_head = NULL;
buf->m_pack = NULL;
if (buf->hmap != NULL) {
bus_dmamap_destroy(rxr->htag, buf->hmap);
buf->hmap = NULL;
}
if (buf->pmap != NULL) {
bus_dmamap_destroy(rxr->ptag, buf->pmap);
buf->pmap = NULL;
}
}
if (rxr->buffers != NULL) {
free(rxr->buffers, M_DEVBUF);
rxr->buffers = NULL;
}
}
if (rxr->htag != NULL) {
bus_dma_tag_destroy(rxr->htag);
rxr->htag = NULL;
}
if (rxr->ptag != NULL) {
bus_dma_tag_destroy(rxr->ptag);
rxr->ptag = NULL;
}
return;
}
static __inline void
i40e_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
{
/*
* ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
* should be computed by hardware. Also it should not have VLAN tag in
* ethernet header.
*/
if (rxr->lro_enabled &&
(ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
(m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
(CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
/*
* Send to the stack if:
** - LRO not enabled, or
** - no LRO resources, or
** - lro enqueue fails
*/
if (rxr->lro.lro_cnt != 0)
if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
return;
}
I40E_RX_UNLOCK(rxr);
(*ifp->if_input)(ifp, m);
I40E_RX_LOCK(rxr);
}
static __inline void
i40e_rx_discard(struct rx_ring *rxr, int i)
{
struct i40e_rx_buf *rbuf;
rbuf = &rxr->buffers[i];
if (rbuf->fmp != NULL) {/* Partial chain ? */
rbuf->fmp->m_flags |= M_PKTHDR;
m_freem(rbuf->fmp);
rbuf->fmp = NULL;
}
/*
** With advanced descriptors the writeback
** clobbers the buffer addrs, so its easier
** to just free the existing mbufs and take
** the normal refresh path to get new buffers
** and mapping.
*/
if (rbuf->m_head) {
m_free(rbuf->m_head);
rbuf->m_head = NULL;
}
if (rbuf->m_pack) {
m_free(rbuf->m_pack);
rbuf->m_pack = NULL;
}
return;
}
/*********************************************************************
*
* This routine executes in interrupt context. It replenishes
* the mbufs in the descriptor and sends data which has been
* dma'ed into host memory to upper layer.
*
* We loop at most count times if count is > 0, or until done if
* count < 0.
*
* Return TRUE for more work, FALSE for all clean.
*********************************************************************/
bool
i40e_rxeof(struct i40e_queue *que, int count)
{
struct i40e_vsi *vsi = que->vsi;
struct rx_ring *rxr = &que->rxr;
struct ifnet *ifp = vsi->ifp;
struct lro_ctrl *lro = &rxr->lro;
struct lro_entry *queued;
int i, nextp, processed = 0;
union i40e_rx_desc *cur;
struct i40e_rx_buf *rbuf, *nbuf;
I40E_RX_LOCK(rxr);
for (i = rxr->next_check; count != 0;) {
struct mbuf *sendmp, *mh, *mp;
u32 rsc, ptype, status, error;
u16 hlen, plen, vtag;
u64 qword;
bool eop;
/* Sync the ring. */
bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
cur = &rxr->base[i];
qword = le64toh(cur->wb.qword1.status_error_len);
status = (qword & I40E_RXD_QW1_STATUS_MASK)
>> I40E_RXD_QW1_STATUS_SHIFT;
error = (qword & I40E_RXD_QW1_ERROR_MASK)
>> I40E_RXD_QW1_ERROR_SHIFT;
plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
>> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK)
>> I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
ptype = (qword & I40E_RXD_QW1_PTYPE_MASK)
>> I40E_RXD_QW1_PTYPE_SHIFT;
if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) {
++rxr->not_done;
break;
}
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
break;
count--;
sendmp = NULL;
nbuf = NULL;
rsc = 0;
cur->wb.qword1.status_error_len = 0;
rbuf = &rxr->buffers[i];
mh = rbuf->m_head;
mp = rbuf->m_pack;
eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT));
if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT))
vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
else
vtag = 0;
/*
** Make sure bad packets are discarded,
** note that only EOP descriptor has valid
** error results.
*/
if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
ifp->if_ierrors++;
rxr->discarded++;
i40e_rx_discard(rxr, i);
goto next_desc;
}
/* Prefetch the next buffer */
if (!eop) {
nextp = i + 1;
if (nextp == que->num_desc)
nextp = 0;
nbuf = &rxr->buffers[nextp];
prefetch(nbuf);
}
/*
** The header mbuf is ONLY used when header
** split is enabled, otherwise we get normal
** behavior, ie, both header and payload
** are DMA'd into the payload buffer.
**
** Rather than using the fmp/lmp global pointers
** we now keep the head of a packet chain in the
** buffer struct and pass this along from one
** descriptor to the next, until we get EOP.
*/
if (rxr->hdr_split && (rbuf->fmp == NULL)) {
if (hlen > I40E_RX_HDR)
hlen = I40E_RX_HDR;
mh->m_len = hlen;
mh->m_flags |= M_PKTHDR;
mh->m_next = NULL;
mh->m_pkthdr.len = mh->m_len;
/* Null buf pointer so it is refreshed */
rbuf->m_head = NULL;
/*
** Check the payload length, this
** could be zero if its a small
** packet.
*/
if (plen > 0) {
mp->m_len = plen;
mp->m_next = NULL;
mp->m_flags &= ~M_PKTHDR;
mh->m_next = mp;
mh->m_pkthdr.len += mp->m_len;
/* Null buf pointer so it is refreshed */
rbuf->m_pack = NULL;
rxr->split++;
}
/*
** Now create the forward
** chain so when complete
** we wont have to.
*/
if (eop == 0) {
/* stash the chain head */
nbuf->fmp = mh;
/* Make forward chain */
if (plen)
mp->m_next = nbuf->m_pack;
else
mh->m_next = nbuf->m_pack;
} else {
/* Singlet, prepare to send */
sendmp = mh;
if (vtag) {
sendmp->m_pkthdr.ether_vtag = vtag;
sendmp->m_flags |= M_VLANTAG;
}
}
} else {
/*
** Either no header split, or a
** secondary piece of a fragmented
** split packet.
*/
mp->m_len = plen;
/*
** See if there is a stored head
** that determines what we are
*/
sendmp = rbuf->fmp;
rbuf->m_pack = rbuf->fmp = NULL;
if (sendmp != NULL) /* secondary frag */
sendmp->m_pkthdr.len += mp->m_len;
else {
/* first desc of a non-ps chain */
sendmp = mp;
sendmp->m_flags |= M_PKTHDR;
sendmp->m_pkthdr.len = mp->m_len;
if (vtag) {
sendmp->m_pkthdr.ether_vtag = vtag;
sendmp->m_flags |= M_VLANTAG;
}
}
/* Pass the head pointer on */
if (eop == 0) {
nbuf->fmp = sendmp;
sendmp = NULL;
mp->m_next = nbuf->m_pack;
}
}
++processed;
/* Sending this frame? */
if (eop) {
sendmp->m_pkthdr.rcvif = ifp;
/* gather stats */
ifp->if_ipackets++;
rxr->rx_packets++;
rxr->rx_bytes += sendmp->m_pkthdr.len;
/* capture data for dynamic ITR adjustment */
rxr->packets++;
rxr->bytes += sendmp->m_pkthdr.len;
if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
i40e_rx_checksum(sendmp, status, error, ptype);
sendmp->m_pkthdr.flowid = que->msix;
sendmp->m_flags |= M_FLOWID;
}
next_desc:
bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/* Advance our pointers to the next descriptor. */
if (++i == que->num_desc)
i = 0;
/* Now send to the stack or do LRO */
if (sendmp != NULL) {
rxr->next_check = i;
i40e_rx_input(rxr, ifp, sendmp, ptype);
i = rxr->next_check;
}
/* Every 8 descriptors we go to refresh mbufs */
if (processed == 8) {
i40e_refresh_mbufs(que, i);
processed = 0;
}
}
/* Refresh any remaining buf structs */
if (i40e_rx_unrefreshed(que))
i40e_refresh_mbufs(que, i);
rxr->next_check = i;
/*
* Flush any outstanding LRO work
*/
while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
SLIST_REMOVE_HEAD(&lro->lro_active, next);
tcp_lro_flush(lro, queued);
}
I40E_RX_UNLOCK(rxr);
return (FALSE);
}
/*********************************************************************
*
* Verify that the hardware indicated that the checksum is valid.
* Inform the stack about the status of checksum so that stack
* doesn't spend time verifying the checksum.
*
*********************************************************************/
static void
i40e_rx_checksum(struct mbuf * mp, u32 status, u32 error, u32 ptype)
{
/* Errors? */
if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
(1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) {
mp->m_pkthdr.csum_flags = 0;
return;
}
/* IP Checksum Good */
mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) {
mp->m_pkthdr.csum_flags |=
(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
mp->m_pkthdr.csum_data |= htons(0xffff);
}
return;
}