axgbe: driver changes for netmap support

AMD 10GbE hardware is designed to have two buffers per receive descriptor to
support split header feature. For this purpose, the driver was designed to use
2 iflib freelists per receive queue. So, that buffers from 2 freelists are used
to refill an entry in the receive descriptor. The current design holds good
with regular data traffic.

But, when netmap comes into play, the current design will not fit in. The
current netmap interfaces and netmap implementation in iflib doesn't seem
to accomodate the design of 2 freelists per receive queue. So, exercising
Netmap capability with inbuilt tools like bridge, pkt-gen doesn't work with
the 2 freelists driver design.

So, the driver design is changed to accomodate the current netmap interfaces
and netmap implementation in iflib by using single freelist per receive queue
approach when Netmap capability is exercised without disturbing the current
2 freelists approach.
The dev.ax.sph_enable tunable can be set to 0 to configure the single
free list mode.

Thanks to Stephan Dewt for his Initial set of code changes for the stated
problem.

Submitted by:	rajesh1.kumar_amd.com
Approved by:	vmaffione
MFC after:	3 weeks
Differential Revision:	https://reviews.freebsd.org/D27797
This commit is contained in:
Vincenzo Maffione 2021-01-16 08:23:19 +00:00
parent ad1ebbe5ce
commit 2968dde3de
5 changed files with 199 additions and 66 deletions

View File

@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
#include <sys/rman.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <net/if.h>
#include <net/if_media.h>
@ -62,6 +63,7 @@ __FBSDID("$FreeBSD$");
MALLOC_DEFINE(M_AXGBE, "axgbe", "axgbe data");
extern struct if_txrx axgbe_txrx;
static int axgbe_sph_enable;
/* Function prototypes */
static void *axgbe_register(device_t);
@ -252,16 +254,11 @@ static struct if_shared_ctx axgbe_sctx_init = {
.isc_vendor_info = axgbe_vendor_info_array,
.isc_driver_version = XGBE_DRV_VERSION,
.isc_nrxd_min = {XGBE_RX_DESC_CNT_MIN, XGBE_RX_DESC_CNT_MIN},
.isc_nrxd_default = {XGBE_RX_DESC_CNT_DEFAULT, XGBE_RX_DESC_CNT_DEFAULT},
.isc_nrxd_max = {XGBE_RX_DESC_CNT_MAX, XGBE_RX_DESC_CNT_MAX},
.isc_ntxd_min = {XGBE_TX_DESC_CNT_MIN},
.isc_ntxd_default = {XGBE_TX_DESC_CNT_DEFAULT},
.isc_ntxd_max = {XGBE_TX_DESC_CNT_MAX},
.isc_nfl = 2,
.isc_ntxqs = 1,
.isc_nrxqs = 2,
.isc_flags = IFLIB_TSO_INIT_IP | IFLIB_NEED_SCRATCH |
IFLIB_NEED_ZERO_CSUM | IFLIB_NEED_ETHER_PAD,
};
@ -269,6 +266,44 @@ static struct if_shared_ctx axgbe_sctx_init = {
static void *
axgbe_register(device_t dev)
{
int axgbe_nfl;
int axgbe_nrxqs;
int error, i;
char *value = NULL;
value = kern_getenv("dev.ax.sph_enable");
if (value) {
axgbe_sph_enable = strtol(value, NULL, 10);
freeenv(value);
} else {
/*
* No tunable found, generate one with default values
* Note: only a reboot will reveal the new kenv
*/
error = kern_setenv("dev.ax.sph_enable", "1");
if (error) {
printf("Error setting tunable, using default driver values\n");
}
axgbe_sph_enable = 1;
}
if (!axgbe_sph_enable) {
axgbe_nfl = 1;
axgbe_nrxqs = 1;
} else {
axgbe_nfl = 2;
axgbe_nrxqs = 2;
}
axgbe_sctx_init.isc_nfl = axgbe_nfl;
axgbe_sctx_init.isc_nrxqs = axgbe_nrxqs;
for (i = 0 ; i < axgbe_nrxqs ; i++) {
axgbe_sctx_init.isc_nrxd_min[i] = XGBE_RX_DESC_CNT_MIN;
axgbe_sctx_init.isc_nrxd_default[i] = XGBE_RX_DESC_CNT_DEFAULT;
axgbe_sctx_init.isc_nrxd_max[i] = XGBE_RX_DESC_CNT_MAX;
}
return (&axgbe_sctx_init);
}
@ -1292,6 +1327,9 @@ axgbe_if_attach_post(if_ctx_t ctx)
if_softc_ctx_t scctx = sc->scctx;
int i, ret;
/* set split header support based on tunable */
pdata->sph_enable = axgbe_sph_enable;
/* Initialize ECC timestamps */
pdata->tx_sec_period = ticks;
pdata->tx_ded_period = ticks;
@ -1404,6 +1442,8 @@ axgbe_if_attach_post(if_ctx_t ctx)
axgbe_sysctl_init(pdata);
axgbe_pci_init(pdata);
return (0);
} /* axgbe_if_attach_post */
@ -1491,7 +1531,12 @@ axgbe_pci_init(struct xgbe_prv_data *pdata)
struct xgbe_phy_if *phy_if = &pdata->phy_if;
struct xgbe_hw_if *hw_if = &pdata->hw_if;
int ret = 0;
if (!__predict_false((test_bit(XGBE_DOWN, &pdata->dev_state)))) {
axgbe_printf(1, "%s: Starting when XGBE_UP\n", __func__);
return;
}
hw_if->init(pdata);
ret = phy_if->phy_start(pdata);
@ -1656,7 +1701,11 @@ axgbe_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *va, uint64_t *pa, int nrxqs,
MPASS(scctx->isc_nrxqsets > 0);
MPASS(scctx->isc_nrxqsets == nrxqsets);
MPASS(nrxqs == 2);
if (!pdata->sph_enable) {
MPASS(nrxqs == 1);
} else {
MPASS(nrxqs == 2);
}
axgbe_printf(1, "%s: rxqsets %d/%d rxqs %d\n", __func__,
scctx->isc_nrxqsets, nrxqsets, nrxqs);
@ -2258,15 +2307,40 @@ axgbe_if_media_change(if_ctx_t ctx)
static int
axgbe_if_promisc_set(if_ctx_t ctx, int flags)
{
struct axgbe_if_softc *sc = iflib_get_softc(ctx);
struct axgbe_if_softc *sc = iflib_get_softc(ctx);
struct xgbe_prv_data *pdata = &sc->pdata;
struct ifnet *ifp = pdata->netdev;
if (XGMAC_IOREAD_BITS(&sc->pdata, MAC_PFR, PR) == 1)
return (0);
axgbe_printf(1, "%s: MAC_PFR 0x%x drv_flags 0x%x if_flags 0x%x\n",
__func__, XGMAC_IOREAD(pdata, MAC_PFR), ifp->if_drv_flags, ifp->if_flags);
XGMAC_IOWRITE_BITS(&sc->pdata, MAC_PFR, PR, 1);
XGMAC_IOWRITE_BITS(&sc->pdata, MAC_PFR, VTFE, 0);
if (ifp->if_flags & IFF_PPROMISC) {
return (0);
axgbe_printf(1, "User requested to enter promisc mode\n");
if (XGMAC_IOREAD_BITS(pdata, MAC_PFR, PR) == 1) {
axgbe_printf(1, "Already in promisc mode\n");
return (0);
}
axgbe_printf(1, "Entering promisc mode\n");
XGMAC_IOWRITE_BITS(pdata, MAC_PFR, PR, 1);
XGMAC_IOWRITE_BITS(pdata, MAC_PFR, VTFE, 0);
} else {
axgbe_printf(1, "User requested to leave promisc mode\n");
if (XGMAC_IOREAD_BITS(pdata, MAC_PFR, PR) == 0) {
axgbe_printf(1, "Already not in promisc mode\n");
return (0);
}
axgbe_printf(1, "Leaving promisc mode\n");
XGMAC_IOWRITE_BITS(pdata, MAC_PFR, PR, 0);
XGMAC_IOWRITE_BITS(pdata, MAC_PFR, VTFE, 1);
}
return (0);
}
static uint64_t

View File

@ -293,12 +293,14 @@ xgbe_config_tso_mode(struct xgbe_prv_data *pdata)
{
unsigned int i;
int tso_enabled = (if_getcapenable(pdata->netdev) & IFCAP_TSO);
for (i = 0; i < pdata->channel_count; i++) {
if (!pdata->channel[i]->tx_ring)
break;
axgbe_printf(0, "Enabling TSO in channel %d\n", i);
XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, TSE, 1);
axgbe_printf(1, "TSO in channel %d %s\n", i, tso_enabled ? "enabled" : "disabled");
XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, TSE, tso_enabled ? 1 : 0);
}
}
@ -306,15 +308,33 @@ static void
xgbe_config_sph_mode(struct xgbe_prv_data *pdata)
{
unsigned int i;
int sph_enable_flag = XGMAC_IOREAD_BITS(pdata, MAC_HWF1R, SPHEN);
axgbe_printf(1, "sph_enable %d sph feature enabled?: %d\n",
pdata->sph_enable, sph_enable_flag);
if (pdata->sph_enable && sph_enable_flag)
axgbe_printf(0, "SPH Enabled\n");
for (i = 0; i < pdata->channel_count; i++) {
if (!pdata->channel[i]->rx_ring)
break;
if (pdata->sph_enable && sph_enable_flag) {
/* Enable split header feature */
XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_CR, SPH, 1);
} else {
/* Disable split header feature */
XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_CR, SPH, 0);
}
XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_CR, SPH, 1);
/* per-channel confirmation of SPH being disabled/enabled */
int val = XGMAC_DMA_IOREAD_BITS(pdata->channel[i], DMA_CH_CR, SPH);
axgbe_printf(0, "%s: SPH %s in channel %d\n", __func__,
(val ? "enabled" : "disabled"), i);
}
XGMAC_IOWRITE_BITS(pdata, MAC_RCR, HDSMS, XGBE_SPH_HDSMS_SIZE);
if (pdata->sph_enable && sph_enable_flag)
XGMAC_IOWRITE_BITS(pdata, MAC_RCR, HDSMS, XGBE_SPH_HDSMS_SIZE);
}
static int
@ -953,8 +973,8 @@ xgbe_config_rx_mode(struct xgbe_prv_data *pdata)
{
unsigned int pr_mode, am_mode;
pr_mode = ((pdata->netdev->if_drv_flags & IFF_PPROMISC) != 0);
am_mode = ((pdata->netdev->if_drv_flags & IFF_ALLMULTI) != 0);
pr_mode = ((pdata->netdev->if_flags & IFF_PPROMISC) != 0);
am_mode = ((pdata->netdev->if_flags & IFF_ALLMULTI) != 0);
xgbe_set_promiscuous_mode(pdata, pr_mode);
xgbe_set_all_multicast_mode(pdata, am_mode);
@ -1352,7 +1372,6 @@ xgbe_dev_read(struct xgbe_channel *channel)
if (XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, CTXT)) {
/* TODO - Timestamp Context Descriptor */
XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
CONTEXT, 1);
XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,

View File

@ -1614,6 +1614,10 @@ axgbe_sysctl_init(struct xgbe_prv_data *pdata)
SYSCTL_ADD_UINT(clist, top, OID_AUTO, "axgbe_debug_level", CTLFLAG_RWTUN,
&pdata->debug_level, 0, "axgbe log level -- higher is verbose");
SYSCTL_ADD_UINT(clist, top, OID_AUTO, "sph_enable",
CTLFLAG_RDTUN, &pdata->sph_enable, 1,
"shows the split header feature state (1 - enable, 0 - disable");
SYSCTL_ADD_PROC(clist, top, OID_AUTO, "xgmac_register",
CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
pdata, 0, sysctl_xgmac_reg_addr_handler, "IU",

View File

@ -379,20 +379,9 @@ axgbe_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx)
axgbe_printf(1, "--> %s: flush txq %d pidx %d cur %d dirty %d\n",
__func__, txqid, pidx, ring->cur, ring->dirty);
MPASS(ring->cur == pidx);
if (__predict_false(ring->cur != pidx)) {
axgbe_error("--> %s: cur(%d) ne pidx(%d)\n", __func__,
ring->cur, pidx);
}
wmb();
/* Ring Doorbell */
if (XGMAC_DMA_IOREAD(channel, DMA_CH_TDTR_LO) !=
lower_32_bits(rdata->rdata_paddr)) {
XGMAC_DMA_IOWRITE(channel, DMA_CH_TDTR_LO,
lower_32_bits(rdata->rdata_paddr));
}
XGMAC_DMA_IOWRITE(channel, DMA_CH_TDTR_LO,
lower_32_bits(rdata->rdata_paddr));
}
static int
@ -466,6 +455,7 @@ axgbe_isc_rxd_refill(void *arg, if_rxd_update_t iru)
unsigned int inte;
uint8_t count = iru->iru_count;
int i, j;
bool config_intr = false;
axgbe_printf(1, "--> %s: rxq %d fl %d pidx %d count %d ring cur %d "
"dirty %d\n", __func__, iru->iru_qsidx, iru->iru_flidx,
@ -473,7 +463,7 @@ axgbe_isc_rxd_refill(void *arg, if_rxd_update_t iru)
for (i = iru->iru_pidx, j = 0 ; j < count ; i++, j++) {
if (i == XGBE_RX_DESC_CNT_DEFAULT)
if (i == sc->scctx->isc_nrxd[0])
i = 0;
rdata = XGBE_GET_DESC_DATA(ring, i);
@ -485,29 +475,42 @@ axgbe_isc_rxd_refill(void *arg, if_rxd_update_t iru)
"pidx %d\n", __func__, ring->cur, ring->dirty, j, i);
}
/* Assuming split header is enabled */
if (iru->iru_flidx == 0) {
if (pdata->sph_enable) {
if (iru->iru_flidx == 0) {
/* Fill header/buffer1 address */
rdesc->desc0 =
cpu_to_le32(lower_32_bits(iru->iru_paddrs[j]));
rdesc->desc1 =
cpu_to_le32(upper_32_bits(iru->iru_paddrs[j]));
/* Fill header/buffer1 address */
rdesc->desc0 =
cpu_to_le32(lower_32_bits(iru->iru_paddrs[j]));
rdesc->desc1 =
cpu_to_le32(upper_32_bits(iru->iru_paddrs[j]));
} else {
/* Fill data/buffer2 address */
rdesc->desc2 =
cpu_to_le32(lower_32_bits(iru->iru_paddrs[j]));
rdesc->desc3 =
cpu_to_le32(upper_32_bits(iru->iru_paddrs[j]));
config_intr = true;
}
} else {
/* Fill data/buffer2 address */
rdesc->desc2 =
/* Fill header/buffer1 address */
rdesc->desc0 = rdesc->desc2 =
cpu_to_le32(lower_32_bits(iru->iru_paddrs[j]));
rdesc->desc3 =
rdesc->desc1 = rdesc->desc3 =
cpu_to_le32(upper_32_bits(iru->iru_paddrs[j]));
config_intr = true;
}
if (config_intr) {
if (!rx_usecs && !rx_frames) {
/* No coalescing, interrupt for every descriptor */
inte = 1;
} else {
/* Set interrupt based on Rx frame coalescing setting */
if (rx_frames &&
!(((ring->dirty + 1) &(ring->rdesc_count - 1)) % rx_frames))
if (rx_frames && !((ring->dirty + 1) % rx_frames))
inte = 1;
else
inte = 0;
@ -520,6 +523,8 @@ axgbe_isc_rxd_refill(void *arg, if_rxd_update_t iru)
wmb();
ring->dirty = ((ring->dirty + 1) & (ring->rdesc_count - 1));
config_intr = false;
}
}
@ -539,15 +544,16 @@ axgbe_isc_rxd_flush(void *arg, uint16_t qsidx, uint8_t flidx, qidx_t pidx)
axgbe_printf(1, "--> %s: rxq %d fl %d pidx %d cur %d dirty %d\n",
__func__, qsidx, flidx, pidx, ring->cur, ring->dirty);
if (flidx == 1) {
rdata = XGBE_GET_DESC_DATA(ring, pidx);
rdata = XGBE_GET_DESC_DATA(ring, pidx);
/*
* update RX descriptor tail pointer in hardware to indicate
* that new buffers are present in the allocated memory region
*/
if (!pdata->sph_enable || flidx == 1) {
XGMAC_DMA_IOWRITE(channel, DMA_CH_RDTR_LO,
lower_32_bits(rdata->rdata_paddr));
}
wmb();
}
static int
@ -560,12 +566,17 @@ axgbe_isc_rxd_available(void *arg, uint16_t qsidx, qidx_t idx, qidx_t budget)
struct xgbe_ring_data *rdata;
struct xgbe_ring_desc *rdesc;
unsigned int cur;
int count;
int count = 0;
uint8_t incomplete = 1, context_next = 0, running = 0;
axgbe_printf(1, "--> %s: rxq %d idx %d budget %d cur %d dirty %d\n",
__func__, qsidx, idx, budget, ring->cur, ring->dirty);
if (__predict_false(test_bit(XGBE_DOWN, &pdata->dev_state))) {
axgbe_printf(0, "%s: Polling when XGBE_DOWN\n", __func__);
return (count);
}
cur = ring->cur;
for (count = 0; count <= budget; ) {
@ -609,11 +620,14 @@ static unsigned int
xgbe_rx_buf1_len(struct xgbe_prv_data *pdata, struct xgbe_ring_data *rdata,
struct xgbe_packet_data *packet)
{
unsigned int ret = 0;
/* Always zero if not the first descriptor */
if (!XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, FIRST)) {
axgbe_printf(1, "%s: Not First\n", __func__);
return (0);
if (pdata->sph_enable) {
/* Always zero if not the first descriptor */
if (!XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, FIRST)) {
axgbe_printf(1, "%s: Not First\n", __func__);
return (0);
}
}
/* First descriptor with split header, return header length */
@ -623,21 +637,33 @@ xgbe_rx_buf1_len(struct xgbe_prv_data *pdata, struct xgbe_ring_data *rdata,
}
/* First descriptor but not the last descriptor and no split header,
* so the full buffer was used
* so the full buffer was used, 256 represents the hardcoded value of
* a max header split defined in the hardware
*/
if (!XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, LAST)) {
axgbe_printf(1, "%s: Not last %d\n", __func__,
pdata->rx_buf_size);
return (256);
if (pdata->sph_enable) {
return (256);
} else {
return (pdata->rx_buf_size);
}
}
/* First descriptor and last descriptor and no split header, so
* calculate how much of the buffer was used
* calculate how much of the buffer was used, we can return the
* segment length or the remaining bytes of the packet
*/
axgbe_printf(1, "%s: pkt_len %d buf_size %d\n", __func__, rdata->rx.len,
pdata->rx_buf_size);
return (min_t(unsigned int, 256, rdata->rx.len));
if (pdata->sph_enable) {
ret = min_t(unsigned int, 256, rdata->rx.len);
} else {
ret = rdata->rx.len;
}
return (ret);
}
static unsigned int
@ -712,8 +738,10 @@ axgbe_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
/* Get the data length in the descriptor buffers */
buf1_len = xgbe_rx_buf1_len(pdata, rdata, packet);
len += buf1_len;
buf2_len = xgbe_rx_buf2_len(pdata, rdata, packet, len);
len += buf2_len;
if (pdata->sph_enable) {
buf2_len = xgbe_rx_buf2_len(pdata, rdata, packet, len);
len += buf2_len;
}
} else
buf1_len = buf2_len = 0;
@ -724,8 +752,10 @@ axgbe_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
axgbe_add_frag(pdata, ri, prev_cur, buf1_len, i, 0);
i++;
axgbe_add_frag(pdata, ri, prev_cur, buf2_len, i, 1);
i++;
if (pdata->sph_enable) {
axgbe_add_frag(pdata, ri, prev_cur, buf2_len, i, 1);
i++;
}
if (!last || context_next)
goto read_again;
@ -758,7 +788,7 @@ axgbe_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
}
if (__predict_false(len == 0))
axgbe_error("%s: Zero len packet\n", __func__);
axgbe_printf(1, "%s: Discarding Zero len packet\n", __func__);
if (__predict_false(len > max_len))
axgbe_error("%s: Big packet %d/%d\n", __func__, len, max_len);

View File

@ -1296,6 +1296,12 @@ struct xgbe_prv_data {
uint64_t rx_coalesce_usecs;
unsigned int debug_level;
/*
* Toggles the split header feature.
* This requires a complete restart.
*/
unsigned int sph_enable;
};
struct axgbe_if_softc {