freebsd-dev/sys/dev/netmap/if_em_netmap.h
Sean Bruno 23c9098b2a Change EM_MULTIQUEUE to a real kernconf entry and enable support for
up to 2 rx/tx queues for the 82574.

Program the 82574 to enable 5 msix vectors, assign 1 to each rx queue,
1 to each tx queue and 1 to the link handler.

Inspired by DragonFlyBSD, enable some RSS logic for handling tx queue
handling/processing.

Move multiqueue handler functions so that they line up better in a diff
review to if_igb.c

Always enqueue tx work to be done in em_mq_start, if unable to acquire
the TX lock, then this will be processed in the background later by the
taskqueue.  Remove mbuf argument from em_start_mq_locked() as the work
is always enqueued.  (stolen from igb)

Setup TARC, TXDCTL and RXDCTL registers for better performance and stability
in multiqueue and singlequeue implementations. Handle Intel errata  3 and
generic multiqueue behavior with the initialization of TARC(0) and TARC(1)

Bind interrupt threads to cpus in order.  (stolen from igb)

Add 2 new DDB functions, one to display the queue(s) and their settings and
one to reset the adapter.  Primarily used for debugging.

In the multiqueue configuration, bump RXD and TXD ring size to max for the
adapter (4096).  Setup an RDTR of 64 and an RADV of 128 in multiqueue configuration
to cut down on the number of interrupts.  RADV was arbitrarily set to 2x RDTR
and can be adjusted as needed.

Cleanup the display in top a bit to make it clearer where the taskqueue threads
are running and what they should be doing.

Ensure that both queues are processed by em_local_timer() by writing them both
to the IMS register to generate soft interrupts.

Ensure that an soft interrupt is generated when em_msix_link() is run so that
any races between assertion of the link/status interrupt and a rx/tx interrupt
are handled.

Document existing tuneables: hw.em.eee_setting, hw.em.msix, hw.em.smart_pwr_down, hw.em.sbp

Document use of hw.em.num_queues and the new kernel option EM_MULTIQUEUE

Thanks to Intel for their continued support of FreeBSD.

Reviewed by:	erj jfv hiren gnn wblock
Obtained from:	Intel Corporation
MFC after:	2 weeks
Relnotes:	Yes
Sponsored by:	Limelight Networks
Differential Revision:	https://reviews.freebsd.org/D1994
2015-06-03 18:01:09 +00:00

335 lines
9.4 KiB
C

/*
* Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* $FreeBSD$
*
* netmap support for: em.
*
* For more details on netmap support please see ixgbe_netmap.h
*/
#include <net/netmap.h>
#include <sys/selinfo.h>
#include <vm/vm.h>
#include <vm/pmap.h> /* vtophys ? */
#include <dev/netmap/netmap_kern.h>
// XXX do we need to block/unblock the tasks ?
static void
em_netmap_block_tasks(struct adapter *adapter)
{
if (adapter->msix > 1) { /* MSIX */
int i;
struct tx_ring *txr = adapter->tx_rings;
struct rx_ring *rxr = adapter->rx_rings;
for (i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
taskqueue_block(txr->tq);
taskqueue_drain(txr->tq, &txr->tx_task);
taskqueue_block(rxr->tq);
taskqueue_drain(rxr->tq, &rxr->rx_task);
}
} else { /* legacy */
taskqueue_block(adapter->tq);
taskqueue_drain(adapter->tq, &adapter->link_task);
taskqueue_drain(adapter->tq, &adapter->que_task);
}
}
static void
em_netmap_unblock_tasks(struct adapter *adapter)
{
if (adapter->msix > 1) {
struct tx_ring *txr = adapter->tx_rings;
struct rx_ring *rxr = adapter->rx_rings;
int i;
for (i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
taskqueue_unblock(txr->tq);
taskqueue_unblock(rxr->tq);
}
} else { /* legacy */
taskqueue_unblock(adapter->tq);
}
}
/*
* Register/unregister. We are already under netmap lock.
*/
static int
em_netmap_reg(struct netmap_adapter *na, int onoff)
{
struct ifnet *ifp = na->ifp;
struct adapter *adapter = ifp->if_softc;
EM_CORE_LOCK(adapter);
em_disable_intr(adapter);
/* Tell the stack that the interface is no longer active */
ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
em_netmap_block_tasks(adapter);
/* enable or disable flags and callbacks in na and ifp */
if (onoff) {
nm_set_native_flags(na);
} else {
nm_clear_native_flags(na);
}
em_init_locked(adapter); /* also enable intr */
em_netmap_unblock_tasks(adapter);
EM_CORE_UNLOCK(adapter);
return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
}
/*
* Reconcile kernel and user view of the transmit ring.
*/
static int
em_netmap_txsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = kring->rhead;
/* generate an interrupt approximately every half ring */
u_int report_frequency = kring->nkr_num_slots >> 1;
/* device-specific */
struct adapter *adapter = ifp->if_softc;
struct tx_ring *txr = &adapter->tx_rings[kring->ring_id];
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_POSTREAD);
/*
* First part: process new packets to send.
*/
nm_i = kring->nr_hwcur;
if (nm_i != head) { /* we have new packets to send */
nic_i = netmap_idx_k2n(kring, nm_i);
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
u_int len = slot->len;
uint64_t paddr;
void *addr = PNMB(na, slot, &paddr);
/* device-specific */
struct e1000_tx_desc *curr = &txr->tx_base[nic_i];
struct em_buffer *txbuf = &txr->tx_buffers[nic_i];
int flags = (slot->flags & NS_REPORT ||
nic_i == 0 || nic_i == report_frequency) ?
E1000_TXD_CMD_RS : 0;
NM_CHECK_ADDR_LEN(na, addr, len);
if (slot->flags & NS_BUF_CHANGED) {
curr->buffer_addr = htole64(paddr);
/* buffer has changed, reload map */
netmap_reload_map(na, txr->txtag, txbuf->map, addr);
}
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
/* Fill the slot in the NIC ring. */
curr->upper.data = 0;
curr->lower.data = htole32(adapter->txd_cmd | len |
(E1000_TXD_CMD_EOP | flags) );
bus_dmamap_sync(txr->txtag, txbuf->map,
BUS_DMASYNC_PREWRITE);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwcur = head;
/* synchronize the NIC ring */
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/* (re)start the tx unit up to slot nic_i (excluded) */
E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), nic_i);
}
/*
* Second part: reclaim buffers for completed transmissions.
*/
if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
/* record completed transmissions using TDH */
nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(kring->ring_id));
if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
D("TDH wrap %d", nic_i);
nic_i -= kring->nkr_num_slots;
}
if (nic_i != txr->next_to_clean) {
txr->next_to_clean = nic_i;
kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
}
}
nm_txsync_finalize(kring);
return 0;
}
/*
* Reconcile kernel and user view of the receive ring.
*/
static int
em_netmap_rxsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = nm_rxsync_prologue(kring);
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* device-specific */
struct adapter *adapter = ifp->if_softc;
struct rx_ring *rxr = &adapter->rx_rings[kring->ring_id];
if (head > lim)
return netmap_ring_reinit(kring);
/* XXX check sync modes */
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
/*
* First part: import newly received packets.
*/
if (netmap_no_pendintr || force_update) {
uint16_t slot_flags = kring->nkr_slot_flags;
nic_i = rxr->next_to_check;
nm_i = netmap_idx_n2k(kring, nic_i);
for (n = 0; ; n++) { // XXX no need to count
struct e1000_rx_desc *curr = &rxr->rx_base[nic_i];
uint32_t staterr = le32toh(curr->status);
if ((staterr & E1000_RXD_STAT_DD) == 0)
break;
ring->slot[nm_i].len = le16toh(curr->length);
ring->slot[nm_i].flags = slot_flags;
bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[nic_i].map,
BUS_DMASYNC_POSTREAD);
nm_i = nm_next(nm_i, lim);
/* make sure next_to_refresh follows next_to_check */
rxr->next_to_refresh = nic_i; // XXX
nic_i = nm_next(nic_i, lim);
}
if (n) { /* update the state variables */
rxr->next_to_check = nic_i;
kring->nr_hwtail = nm_i;
}
kring->nr_kflags &= ~NKR_PENDINTR;
}
/*
* Second part: skip past packets that userspace has released.
*/
nm_i = kring->nr_hwcur;
if (nm_i != head) {
nic_i = netmap_idx_k2n(kring, nm_i);
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(na, slot, &paddr);
struct e1000_rx_desc *curr = &rxr->rx_base[nic_i];
struct em_buffer *rxbuf = &rxr->rx_buffers[nic_i];
if (addr == NETMAP_BUF_BASE(na)) /* bad buf */
goto ring_reset;
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
curr->buffer_addr = htole64(paddr);
netmap_reload_map(na, rxr->rxtag, rxbuf->map, addr);
slot->flags &= ~NS_BUF_CHANGED;
}
curr->status = 0;
bus_dmamap_sync(rxr->rxtag, rxbuf->map,
BUS_DMASYNC_PREREAD);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwcur = head;
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/*
* IMPORTANT: we must leave one free slot in the ring,
* so move nic_i back by one unit
*/
nic_i = nm_prev(nic_i, lim);
E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), nic_i);
}
/* tell userspace that there might be new packets */
nm_rxsync_finalize(kring);
return 0;
ring_reset:
return netmap_ring_reinit(kring);
}
static void
em_netmap_attach(struct adapter *adapter)
{
struct netmap_adapter na;
bzero(&na, sizeof(na));
na.ifp = adapter->ifp;
na.na_flags = NAF_BDG_MAYSLEEP;
na.num_tx_desc = adapter->num_tx_desc;
na.num_rx_desc = adapter->num_rx_desc;
na.nm_txsync = em_netmap_txsync;
na.nm_rxsync = em_netmap_rxsync;
na.nm_register = em_netmap_reg;
na.num_tx_rings = na.num_rx_rings = adapter->num_queues;
netmap_attach(&na);
}
/* end of file */