onvert vmx(4) to being an iflib driver.

Also, expose IFLIB_MAX_RX_SEGS to iflib drivers and add
iflib_dma_alloc_align() to the iflib API.

Performance is generally better with the tunable/sysctl
dev.vmx.<index>.iflib.tx_abdicate=1.

Reviewed by:	shurd
MFC after:	1 week
Relnotes:	yes
Sponsored by:	RG Nets
Differential Revision:	https://reviews.freebsd.org/D18761
This commit is contained in:
Patrick Kelsey 2019-01-22 01:11:17 +00:00
parent 7f3eb9dab3
commit 8f82136aec
4 changed files with 1236 additions and 2911 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,7 @@
/*- /*-
* Copyright (c) 2013 Tsubai Masanari * Copyright (c) 2013 Tsubai Masanari
* Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org> * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
* Copyright (c) 2018 Patrick Kelsey
* *
* Permission to use, copy, modify, and distribute this software for any * Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above * purpose with or without fee is hereby granted, provided that the above
@ -22,14 +23,6 @@
struct vmxnet3_softc; struct vmxnet3_softc;
struct vmxnet3_dma_alloc {
bus_addr_t dma_paddr;
caddr_t dma_vaddr;
bus_dma_tag_t dma_tag;
bus_dmamap_t dma_map;
bus_size_t dma_size;
};
/* /*
* The number of Rx/Tx queues this driver prefers. * The number of Rx/Tx queues this driver prefers.
*/ */
@ -57,153 +50,68 @@ struct vmxnet3_dma_alloc {
#define VMXNET3_MAX_RX_NCOMPDESC \ #define VMXNET3_MAX_RX_NCOMPDESC \
(VMXNET3_MAX_RX_NDESC * VMXNET3_RXRINGS_PERQ) (VMXNET3_MAX_RX_NDESC * VMXNET3_RXRINGS_PERQ)
struct vmxnet3_txbuf {
bus_dmamap_t vtxb_dmamap;
struct mbuf *vtxb_m;
};
struct vmxnet3_txring { struct vmxnet3_txring {
struct vmxnet3_txbuf *vxtxr_txbuf;
u_int vxtxr_head;
u_int vxtxr_next; u_int vxtxr_next;
u_int vxtxr_ndesc; u_int vxtxr_ndesc;
int vxtxr_gen; int vxtxr_gen;
bus_dma_tag_t vxtxr_txtag;
struct vmxnet3_txdesc *vxtxr_txd; struct vmxnet3_txdesc *vxtxr_txd;
struct vmxnet3_dma_alloc vxtxr_dma; bus_addr_t vxtxr_paddr;
};
static inline int
VMXNET3_TXRING_AVAIL(struct vmxnet3_txring *txr)
{
int avail = txr->vxtxr_next - txr->vxtxr_head - 1;
return (avail < 0 ? txr->vxtxr_ndesc + avail : avail);
}
struct vmxnet3_rxbuf {
bus_dmamap_t vrxb_dmamap;
struct mbuf *vrxb_m;
}; };
struct vmxnet3_rxring { struct vmxnet3_rxring {
struct vmxnet3_rxbuf *vxrxr_rxbuf;
struct vmxnet3_rxdesc *vxrxr_rxd; struct vmxnet3_rxdesc *vxrxr_rxd;
u_int vxrxr_fill;
u_int vxrxr_ndesc; u_int vxrxr_ndesc;
int vxrxr_gen; int vxrxr_gen;
int vxrxr_rid; bus_addr_t vxrxr_paddr;
bus_dma_tag_t vxrxr_rxtag;
struct vmxnet3_dma_alloc vxrxr_dma;
bus_dmamap_t vxrxr_spare_dmap;
}; };
static inline void
vmxnet3_rxr_increment_fill(struct vmxnet3_rxring *rxr)
{
if (++rxr->vxrxr_fill == rxr->vxrxr_ndesc) {
rxr->vxrxr_fill = 0;
rxr->vxrxr_gen ^= 1;
}
}
struct vmxnet3_comp_ring { struct vmxnet3_comp_ring {
union { union {
struct vmxnet3_txcompdesc *txcd; struct vmxnet3_txcompdesc *txcd;
struct vmxnet3_rxcompdesc *rxcd; struct vmxnet3_rxcompdesc *rxcd;
} vxcr_u; } vxcr_u;
/*
* vxcr_next is used on the transmit side to track the next index to
* begin cleaning at. It is not used on the receive side.
*/
u_int vxcr_next; u_int vxcr_next;
u_int vxcr_ndesc; u_int vxcr_ndesc;
int vxcr_gen; int vxcr_gen;
struct vmxnet3_dma_alloc vxcr_dma; bus_addr_t vxcr_paddr;
};
struct vmxnet3_txq_stats {
uint64_t vmtxs_opackets; /* if_opackets */
uint64_t vmtxs_obytes; /* if_obytes */
uint64_t vmtxs_omcasts; /* if_omcasts */
uint64_t vmtxs_csum;
uint64_t vmtxs_tso;
uint64_t vmtxs_full;
uint64_t vmtxs_offload_failed;
}; };
struct vmxnet3_txqueue { struct vmxnet3_txqueue {
struct mtx vxtxq_mtx;
struct vmxnet3_softc *vxtxq_sc; struct vmxnet3_softc *vxtxq_sc;
#ifndef VMXNET3_LEGACY_TX
struct buf_ring *vxtxq_br;
#endif
int vxtxq_id; int vxtxq_id;
int vxtxq_last_flush;
int vxtxq_intr_idx; int vxtxq_intr_idx;
int vxtxq_watchdog;
struct vmxnet3_txring vxtxq_cmd_ring; struct vmxnet3_txring vxtxq_cmd_ring;
struct vmxnet3_comp_ring vxtxq_comp_ring; struct vmxnet3_comp_ring vxtxq_comp_ring;
struct vmxnet3_txq_stats vxtxq_stats;
struct vmxnet3_txq_shared *vxtxq_ts; struct vmxnet3_txq_shared *vxtxq_ts;
struct sysctl_oid_list *vxtxq_sysctl; struct sysctl_oid_list *vxtxq_sysctl;
#ifndef VMXNET3_LEGACY_TX
struct task vxtxq_defrtask;
#endif
char vxtxq_name[16]; char vxtxq_name[16];
} __aligned(CACHE_LINE_SIZE); } __aligned(CACHE_LINE_SIZE);
#define VMXNET3_TXQ_LOCK(_txq) mtx_lock(&(_txq)->vxtxq_mtx)
#define VMXNET3_TXQ_TRYLOCK(_txq) mtx_trylock(&(_txq)->vxtxq_mtx)
#define VMXNET3_TXQ_UNLOCK(_txq) mtx_unlock(&(_txq)->vxtxq_mtx)
#define VMXNET3_TXQ_LOCK_ASSERT(_txq) \
mtx_assert(&(_txq)->vxtxq_mtx, MA_OWNED)
#define VMXNET3_TXQ_LOCK_ASSERT_NOTOWNED(_txq) \
mtx_assert(&(_txq)->vxtxq_mtx, MA_NOTOWNED)
struct vmxnet3_rxq_stats {
uint64_t vmrxs_ipackets; /* if_ipackets */
uint64_t vmrxs_ibytes; /* if_ibytes */
uint64_t vmrxs_iqdrops; /* if_iqdrops */
uint64_t vmrxs_ierrors; /* if_ierrors */
};
struct vmxnet3_rxqueue { struct vmxnet3_rxqueue {
struct mtx vxrxq_mtx;
struct vmxnet3_softc *vxrxq_sc; struct vmxnet3_softc *vxrxq_sc;
int vxrxq_id; int vxrxq_id;
int vxrxq_intr_idx; int vxrxq_intr_idx;
struct mbuf *vxrxq_mhead; struct if_irq vxrxq_irq;
struct mbuf *vxrxq_mtail;
struct vmxnet3_rxring vxrxq_cmd_ring[VMXNET3_RXRINGS_PERQ]; struct vmxnet3_rxring vxrxq_cmd_ring[VMXNET3_RXRINGS_PERQ];
struct vmxnet3_comp_ring vxrxq_comp_ring; struct vmxnet3_comp_ring vxrxq_comp_ring;
struct vmxnet3_rxq_stats vxrxq_stats;
struct vmxnet3_rxq_shared *vxrxq_rs; struct vmxnet3_rxq_shared *vxrxq_rs;
struct sysctl_oid_list *vxrxq_sysctl; struct sysctl_oid_list *vxrxq_sysctl;
char vxrxq_name[16]; char vxrxq_name[16];
} __aligned(CACHE_LINE_SIZE); } __aligned(CACHE_LINE_SIZE);
#define VMXNET3_RXQ_LOCK(_rxq) mtx_lock(&(_rxq)->vxrxq_mtx)
#define VMXNET3_RXQ_UNLOCK(_rxq) mtx_unlock(&(_rxq)->vxrxq_mtx)
#define VMXNET3_RXQ_LOCK_ASSERT(_rxq) \
mtx_assert(&(_rxq)->vxrxq_mtx, MA_OWNED)
#define VMXNET3_RXQ_LOCK_ASSERT_NOTOWNED(_rxq) \
mtx_assert(&(_rxq)->vxrxq_mtx, MA_NOTOWNED)
struct vmxnet3_statistics {
uint32_t vmst_defragged;
uint32_t vmst_defrag_failed;
uint32_t vmst_mgetcl_failed;
uint32_t vmst_mbuf_load_failed;
};
struct vmxnet3_interrupt {
struct resource *vmxi_irq;
int vmxi_rid;
void *vmxi_handler;
};
struct vmxnet3_softc { struct vmxnet3_softc {
device_t vmx_dev; device_t vmx_dev;
if_ctx_t vmx_ctx;
if_shared_ctx_t vmx_sctx;
if_softc_ctx_t vmx_scctx;
struct ifnet *vmx_ifp; struct ifnet *vmx_ifp;
struct vmxnet3_driver_shared *vmx_ds; struct vmxnet3_driver_shared *vmx_ds;
uint32_t vmx_flags; uint32_t vmx_flags;
#define VMXNET3_FLAG_NO_MSIX 0x0001
#define VMXNET3_FLAG_RSS 0x0002 #define VMXNET3_FLAG_RSS 0x0002
struct vmxnet3_rxqueue *vmx_rxq; struct vmxnet3_rxqueue *vmx_rxq;
@ -215,56 +123,24 @@ struct vmxnet3_softc {
struct resource *vmx_res1; struct resource *vmx_res1;
bus_space_tag_t vmx_iot1; bus_space_tag_t vmx_iot1;
bus_space_handle_t vmx_ioh1; bus_space_handle_t vmx_ioh1;
struct resource *vmx_msix_res;
int vmx_link_active; int vmx_link_active;
int vmx_link_speed;
int vmx_if_flags;
int vmx_ntxqueues;
int vmx_nrxqueues;
int vmx_ntxdescs;
int vmx_nrxdescs;
int vmx_max_rxsegs;
int vmx_rx_max_chain;
struct vmxnet3_statistics vmx_stats;
int vmx_intr_type;
int vmx_intr_mask_mode; int vmx_intr_mask_mode;
int vmx_event_intr_idx; int vmx_event_intr_idx;
int vmx_nintrs; struct if_irq vmx_event_intr_irq;
struct vmxnet3_interrupt vmx_intrs[VMXNET3_MAX_INTRS];
struct mtx vmx_mtx;
#ifndef VMXNET3_LEGACY_TX
struct taskqueue *vmx_tq;
#endif
uint8_t *vmx_mcast; uint8_t *vmx_mcast;
void *vmx_qs;
struct vmxnet3_rss_shared *vmx_rss; struct vmxnet3_rss_shared *vmx_rss;
struct callout vmx_tick; struct iflib_dma_info vmx_ds_dma;
struct vmxnet3_dma_alloc vmx_ds_dma; struct iflib_dma_info vmx_qs_dma;
struct vmxnet3_dma_alloc vmx_qs_dma; struct iflib_dma_info vmx_mcast_dma;
struct vmxnet3_dma_alloc vmx_mcast_dma; struct iflib_dma_info vmx_rss_dma;
struct vmxnet3_dma_alloc vmx_rss_dma; struct ifmedia *vmx_media;
struct ifmedia vmx_media;
int vmx_max_ntxqueues;
int vmx_max_nrxqueues;
eventhandler_tag vmx_vlan_attach;
eventhandler_tag vmx_vlan_detach;
uint32_t vmx_vlan_filter[4096/32]; uint32_t vmx_vlan_filter[4096/32];
uint8_t vmx_lladdr[ETHER_ADDR_LEN]; uint8_t vmx_lladdr[ETHER_ADDR_LEN];
}; };
#define VMXNET3_CORE_LOCK_INIT(_sc, _name) \
mtx_init(&(_sc)->vmx_mtx, _name, "VMXNET3 Lock", MTX_DEF)
#define VMXNET3_CORE_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->vmx_mtx)
#define VMXNET3_CORE_LOCK(_sc) mtx_lock(&(_sc)->vmx_mtx)
#define VMXNET3_CORE_UNLOCK(_sc) mtx_unlock(&(_sc)->vmx_mtx)
#define VMXNET3_CORE_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->vmx_mtx, MA_OWNED)
#define VMXNET3_CORE_LOCK_ASSERT_NOTOWNED(_sc) \
mtx_assert(&(_sc)->vmx_mtx, MA_NOTOWNED)
/* /*
* Our driver version we report to the hypervisor; we just keep * Our driver version we report to the hypervisor; we just keep
* this value constant. * this value constant.
@ -275,21 +151,28 @@ struct vmxnet3_softc {
* Max descriptors per Tx packet. We must limit the size of the * Max descriptors per Tx packet. We must limit the size of the
* any TSO packets based on the number of segments. * any TSO packets based on the number of segments.
*/ */
#define VMXNET3_TX_MAXSEGS 32 #define VMXNET3_TX_MAXSEGS 32 /* 64K @ 2K segment size */
#define VMXNET3_TX_MAXSIZE (VMXNET3_TX_MAXSEGS * MCLBYTES) #define VMXNET3_TX_MAXSIZE (VMXNET3_TX_MAXSEGS * MCLBYTES)
#define VMXNET3_TSO_MAXSIZE (VMXNET3_TX_MAXSIZE - ETHER_VLAN_ENCAP_LEN)
/* /*
* Maximum support Tx segments size. The length field in the * Maximum supported Tx segment size. The length field in the
* Tx descriptor is 14 bits. * Tx descriptor is 14 bits.
*
* XXX It's possible a descriptor length field of 0 means 2^14, but this
* isn't confirmed, so limit to 2^14 - 1 for now.
*/ */
#define VMXNET3_TX_MAXSEGSIZE (1 << 14) #define VMXNET3_TX_MAXSEGSIZE ((1 << 14) - 1)
/* /*
* The maximum number of Rx segments we accept. When LRO is enabled, * Maximum supported Rx segment size. The length field in the
* this allows us to receive the maximum sized frame with one MCLBYTES * Rx descriptor is 14 bits.
* cluster followed by 16 MJUMPAGESIZE clusters. *
* The reference drivers skip zero-length descriptors, which seems to be a
* strong indication that on the receive side, a descriptor length field of
* zero does not mean 2^14.
*/ */
#define VMXNET3_MAX_RX_SEGS 17 #define VMXNET3_RX_MAXSEGSIZE ((1 << 14) - 1)
/* /*
* Predetermined size of the multicast MACs filter table. If the * Predetermined size of the multicast MACs filter table. If the
@ -298,17 +181,6 @@ struct vmxnet3_softc {
*/ */
#define VMXNET3_MULTICAST_MAX 32 #define VMXNET3_MULTICAST_MAX 32
/*
* Our Tx watchdog timeout.
*/
#define VMXNET3_WATCHDOG_TIMEOUT 5
/*
* Number of slots in the Tx bufrings. This value matches most other
* multiqueue drivers.
*/
#define VMXNET3_DEF_BUFRING_SIZE 4096
/* /*
* IP protocols that we can perform Tx checksum offloading of. * IP protocols that we can perform Tx checksum offloading of.
*/ */
@ -318,28 +190,4 @@ struct vmxnet3_softc {
#define VMXNET3_CSUM_ALL_OFFLOAD \ #define VMXNET3_CSUM_ALL_OFFLOAD \
(VMXNET3_CSUM_OFFLOAD | VMXNET3_CSUM_OFFLOAD_IPV6 | CSUM_TSO) (VMXNET3_CSUM_OFFLOAD | VMXNET3_CSUM_OFFLOAD_IPV6 | CSUM_TSO)
/*
* Compat macros to keep this driver compiling on old releases.
*/
#if !defined(SYSCTL_ADD_UQUAD)
#define SYSCTL_ADD_UQUAD SYSCTL_ADD_QUAD
#endif
#if !defined(IFCAP_TXCSUM_IPV6)
#define IFCAP_TXCSUM_IPV6 0
#endif
#if !defined(IFCAP_RXCSUM_IPV6)
#define IFCAP_RXCSUM_IPV6 0
#endif
#if !defined(CSUM_TCP_IPV6)
#define CSUM_TCP_IPV6 0
#endif
#if !defined(CSUM_UDP_IPV6)
#define CSUM_UDP_IPV6 0
#endif
#endif /* _IF_VMXVAR_H */ #endif /* _IF_VMXVAR_H */

View File

@ -289,8 +289,6 @@ typedef struct iflib_sw_tx_desc_array {
/* magic number that should be high enough for any hardware */ /* magic number that should be high enough for any hardware */
#define IFLIB_MAX_TX_SEGS 128 #define IFLIB_MAX_TX_SEGS 128
/* bnxt supports 64 with hardware LRO enabled */
#define IFLIB_MAX_RX_SEGS 64
#define IFLIB_RX_COPY_THRESH 128 #define IFLIB_RX_COPY_THRESH 128
#define IFLIB_MAX_RX_REFRESH 32 #define IFLIB_MAX_RX_REFRESH 32
/* The minimum descriptors per second before we start coalescing */ /* The minimum descriptors per second before we start coalescing */
@ -1327,16 +1325,13 @@ _iflib_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err)
} }
int int
iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags) iflib_dma_alloc_align(if_ctx_t ctx, int size, int align, iflib_dma_info_t dma, int mapflags)
{ {
int err; int err;
if_shared_ctx_t sctx = ctx->ifc_sctx;
device_t dev = ctx->ifc_dev; device_t dev = ctx->ifc_dev;
KASSERT(sctx->isc_q_align != 0, ("alignment value not initialized")); err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
align, 0, /* alignment, bounds */
err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
sctx->isc_q_align, 0, /* alignment, bounds */
BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* lowaddr */
BUS_SPACE_MAXADDR, /* highaddr */ BUS_SPACE_MAXADDR, /* highaddr */
NULL, NULL, /* filter, filterarg */ NULL, NULL, /* filter, filterarg */
@ -1386,6 +1381,16 @@ fail_0:
return (err); return (err);
} }
int
iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags)
{
if_shared_ctx_t sctx = ctx->ifc_sctx;
KASSERT(sctx->isc_q_align != 0, ("alignment value not initialized"));
return (iflib_dma_alloc_align(ctx, size, sctx->isc_q_align, dma, mapflags));
}
int int
iflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, int mapflags, int count) iflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, int mapflags, int count)
{ {

View File

@ -69,6 +69,9 @@ typedef struct if_rxd_frag {
uint16_t irf_len; uint16_t irf_len;
} *if_rxd_frag_t; } *if_rxd_frag_t;
/* bnxt supports 64 with hardware LRO enabled */
#define IFLIB_MAX_RX_SEGS 64
typedef struct if_rxd_info { typedef struct if_rxd_info {
/* set by iflib */ /* set by iflib */
uint16_t iri_qsidx; /* qset index */ uint16_t iri_qsidx; /* qset index */
@ -428,6 +431,7 @@ void iflib_iov_intr_deferred(if_ctx_t ctx);
void iflib_link_state_change(if_ctx_t ctx, int linkstate, uint64_t baudrate); void iflib_link_state_change(if_ctx_t ctx, int linkstate, uint64_t baudrate);
int iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags); int iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags);
int iflib_dma_alloc_align(if_ctx_t ctx, int size, int align, iflib_dma_info_t dma, int mapflags);
void iflib_dma_free(iflib_dma_info_t dma); void iflib_dma_free(iflib_dma_info_t dma);
int iflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, int mapflags, int count); int iflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, int mapflags, int count);