Add optional support to mxge for MSI-X interrupts and multiple receive

queues (which we call slices).  The NIC will steer traffic into up to
hw.mxge.max_slices different receive rings based on a configurable
hash type (hw.mxge.rss_hash_type).

Currently the driver defaults to using a single slice, so the default
behavior is unchanged.  Also, transmit from non-zero slices is
disabled currently.
This commit is contained in:
Andrew Gallatin 2008-01-15 20:34:49 +00:00
parent 707dd47849
commit 1e413cf932
12 changed files with 31687 additions and 536 deletions

View File

@ -829,6 +829,8 @@ dev/mxge/if_mxge.c optional mxge pci
dev/mxge/mxge_lro.c optional mxge pci
dev/mxge/mxge_eth_z8e.c optional mxge pci
dev/mxge/mxge_ethp_z8e.c optional mxge pci
dev/mxge/mxge_rss_eth_z8e.c optional mxge pci
dev/mxge/mxge_rss_ethp_z8e.c optional mxge pci
dev/my/if_my.c optional my
dev/ncv/ncr53c500.c optional ncv
dev/ncv/ncr53c500_pccard.c optional ncv pccard

File diff suppressed because it is too large Load Diff

View File

@ -85,10 +85,11 @@ typedef struct
int cl_size;
int alloc_fail;
int mask; /* number of rx slots -1 */
} mxge_rx_buf_t;
} mxge_rx_ring_t;
typedef struct
{
struct mtx mtx;
volatile mcp_kreq_ether_send_t *lanai; /* lanai ptr for sendq */
mcp_kreq_ether_send_t *req_list; /* host shadow of sendq */
char *req_bytes;
@ -99,14 +100,15 @@ typedef struct
int mask; /* number of transmit slots -1 */
int done; /* transmits completed */
int pkt_done; /* packets completed */
int boundary; /* boundary transmits cannot cross*/
int max_desc; /* max descriptors per xmit */
int stall; /* #times hw queue exhausted */
int wake; /* #times irq re-enabled xmit */
int watchdog_req; /* cache of req */
int watchdog_done; /* cache of done */
int watchdog_rx_pause; /* cache of pause rq recvd */
} mxge_tx_buf_t;
int defrag;
char mtx_name[16];
} mxge_tx_ring_t;
struct lro_entry;
struct lro_entry
@ -133,30 +135,42 @@ struct lro_entry
};
SLIST_HEAD(lro_head, lro_entry);
typedef struct {
struct ifnet* ifp;
struct mtx tx_mtx;
int csum_flag; /* rx_csums? */
mxge_tx_buf_t tx; /* transmit ring */
mxge_rx_buf_t rx_small;
mxge_rx_buf_t rx_big;
struct mxge_softc;
typedef struct mxge_softc mxge_softc_t;
struct mxge_slice_state {
mxge_softc_t *sc;
mxge_tx_ring_t tx; /* transmit ring */
mxge_rx_ring_t rx_small;
mxge_rx_ring_t rx_big;
mxge_rx_done_t rx_done;
mcp_irq_data_t *fw_stats;
bus_dma_tag_t parent_dmat;
volatile uint8_t *sram;
volatile uint32_t *irq_claim;
u_long ipackets;
struct lro_head lro_active;
struct lro_head lro_free;
int lro_queued;
int lro_flushed;
int lro_bad_csum;
mxge_dma_t fw_stats_dma;
struct sysctl_oid *sysctl_tree;
struct sysctl_ctx_list sysctl_ctx;
char scratch[256];
};
struct mxge_softc {
struct ifnet* ifp;
struct mxge_slice_state *ss;
int csum_flag; /* rx_csums? */
int tx_boundary; /* boundary transmits cannot cross*/
int lro_cnt;
bus_dma_tag_t parent_dmat;
volatile uint8_t *sram;
int sram_size;
volatile uint32_t *irq_deassert;
volatile uint32_t *irq_claim;
mcp_cmd_response_t *cmd;
mxge_dma_t cmd_dma;
mxge_dma_t zeropad_dma;
mxge_dma_t fw_stats_dma;
struct pci_dev *pdev;
int msi_enabled;
int link_state;
@ -170,11 +184,15 @@ typedef struct {
int stop_queue;
int down_cnt;
int watchdog_resets;
int tx_defragged;
int watchdog_countdown;
int pause;
struct resource *mem_res;
struct resource *irq_res;
struct resource **msix_irq_res;
struct resource *msix_table_res;
struct resource *msix_pba_res;
void *ih;
void **msix_ih;
char *fw_name;
char eeprom_strings[MXGE_EEPROM_STRINGS_SIZE];
char fw_version[128];
@ -193,17 +211,19 @@ typedef struct {
int tx_defrag;
int media_flags;
int need_media_probe;
int num_slices;
int rx_ring_size;
mxge_dma_t dmabench_dma;
struct callout co_hdl;
struct sysctl_oid *slice_sysctl_tree;
struct sysctl_ctx_list slice_sysctl_ctx;
char *mac_addr_string;
uint8_t mac_addr[6]; /* eeprom mac address */
char product_code_string[64];
char serial_number_string[64];
char scratch[256];
char tx_mtx_name[16];
char cmd_mtx_name[16];
char driver_mtx_name[16];
} mxge_softc_t;
};
#define MXGE_PCI_VENDOR_MYRICOM 0x14c1
#define MXGE_PCI_DEVICE_Z8E 0x0008
@ -255,8 +275,9 @@ mxge_pio_copy(volatile void *to_v, void *from_v, size_t size)
}
void mxge_lro_flush(mxge_softc_t *mgp, struct lro_entry *lro);
int mxge_lro_rx(mxge_softc_t *mgp, struct mbuf *m_head, uint32_t csum);
void mxge_lro_flush(struct mxge_slice_state *ss, struct lro_entry *lro);
int mxge_lro_rx(struct mxge_slice_state *ss, struct mbuf *m_head,
uint32_t csum);

View File

@ -1,6 +1,6 @@
/******************************************************************************
Copyright (c) 2007, Myricom Inc.
Copyright (c) 2007-2008, Myricom Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$");
#include <sys/mbuf.h>
#include <sys/kernel.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <net/if.h>
#include <net/ethernet.h>
@ -73,8 +74,9 @@ mxge_csum_generic(uint16_t *raw, int len)
void
mxge_lro_flush(mxge_softc_t *mgp, struct lro_entry *lro)
mxge_lro_flush(struct mxge_slice_state *ss, struct lro_entry *lro)
{
mxge_softc_t *mgp = ss->sc;
struct ifnet *ifp;
struct ip *ip;
struct tcphdr *tcp;
@ -132,16 +134,16 @@ mxge_lro_flush(mxge_softc_t *mgp, struct lro_entry *lro)
}
ifp = mgp->ifp;
(*ifp->if_input)(mgp->ifp, lro->m_head);
mgp->lro_queued += lro->append_cnt + 1;
mgp->lro_flushed++;
ss->lro_queued += lro->append_cnt + 1;
ss->lro_flushed++;
lro->m_head = NULL;
lro->timestamp = 0;
lro->append_cnt = 0;
SLIST_INSERT_HEAD(&mgp->lro_free, lro, next);
SLIST_INSERT_HEAD(&ss->lro_free, lro, next);
}
int
mxge_lro_rx(mxge_softc_t *mgp, struct mbuf *m_head, uint32_t csum)
mxge_lro_rx(struct mxge_slice_state *ss, struct mbuf *m_head, uint32_t csum)
{
struct ether_header *eh;
struct ip *ip;
@ -171,7 +173,7 @@ mxge_lro_rx(mxge_softc_t *mgp, struct mbuf *m_head, uint32_t csum)
/* verify that the IP header checksum is correct */
tmp_csum = mxge_csum_generic((uint16_t *)ip, sizeof (*ip));
if (__predict_false((tmp_csum ^ 0xffff) != 0)) {
mgp->lro_bad_csum++;
ss->lro_bad_csum++;
return -1;
}
@ -224,7 +226,7 @@ mxge_lro_rx(mxge_softc_t *mgp, struct mbuf *m_head, uint32_t csum)
hlen = ip_len + ETHER_HDR_LEN - tcp_data_len;
seq = ntohl(tcp->th_seq);
SLIST_FOREACH(lro, &mgp->lro_active, next) {
SLIST_FOREACH(lro, &ss->lro_active, next) {
if (lro->source_port == tcp->th_sport &&
lro->dest_port == tcp->th_dport &&
lro->source_ip == ip->ip_src.s_addr &&
@ -233,9 +235,9 @@ mxge_lro_rx(mxge_softc_t *mgp, struct mbuf *m_head, uint32_t csum)
if (__predict_false(seq != lro->next_seq)) {
/* out of order packet */
SLIST_REMOVE(&mgp->lro_active, lro,
SLIST_REMOVE(&ss->lro_active, lro,
lro_entry, next);
mxge_lro_flush(mgp, lro);
mxge_lro_flush(ss, lro);
return -1;
}
@ -287,23 +289,23 @@ mxge_lro_rx(mxge_softc_t *mgp, struct mbuf *m_head, uint32_t csum)
/* advance the last pointer */
lro->m_tail = m_tail;
/* flush packet if required */
device_mtu = mgp->ifp->if_mtu;
device_mtu = ss->sc->ifp->if_mtu;
if (lro->len > (65535 - device_mtu)) {
SLIST_REMOVE(&mgp->lro_active, lro,
SLIST_REMOVE(&ss->lro_active, lro,
lro_entry, next);
mxge_lro_flush(mgp, lro);
mxge_lro_flush(ss, lro);
}
return 0;
}
}
if (SLIST_EMPTY(&mgp->lro_free))
if (SLIST_EMPTY(&ss->lro_free))
return -1;
/* start a new chain */
lro = SLIST_FIRST(&mgp->lro_free);
SLIST_REMOVE_HEAD(&mgp->lro_free, next);
SLIST_INSERT_HEAD(&mgp->lro_active, lro, next);
lro = SLIST_FIRST(&ss->lro_free);
SLIST_REMOVE_HEAD(&ss->lro_free, next);
SLIST_INSERT_HEAD(&ss->lro_active, lro, next);
lro->source_port = tcp->th_sport;
lro->dest_port = tcp->th_dport;
lro->source_ip = ip->ip_src.s_addr;

View File

@ -1,6 +1,6 @@
/*******************************************************************************
Copyright (c) 2006-2007, Myricom Inc.
Copyright (c) 2006-2008, Myricom Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
@ -52,20 +52,25 @@ struct mcp_dma_addr {
};
typedef struct mcp_dma_addr mcp_dma_addr_t;
/* 4 Bytes. 8 Bytes for NDIS drivers. */
/* 4 Bytes */
struct mcp_slot {
#ifdef MXGEFW_NDIS
/* Place at the top so it gets written before length.
* The driver polls length.
*/
uint32_t hash;
#endif
uint16_t checksum;
uint16_t length;
};
typedef struct mcp_slot mcp_slot_t;
#ifdef MXGEFW_NDIS
/* 8-byte descriptor, exclusively used by NDIS drivers. */
struct mcp_slot_8 {
/* Place hash value at the top so it gets written before length.
* The driver polls length.
*/
uint32_t hash;
uint16_t checksum;
uint16_t length;
};
typedef struct mcp_slot_8 mcp_slot_8_t;
/* Two bits of length in mcp_slot are used to indicate hash type. */
#define MXGEFW_RSS_HASH_NULL (0 << 14) /* bit 15:14 = 00 */
#define MXGEFW_RSS_HASH_IPV4 (1 << 14) /* bit 15:14 = 01 */
@ -289,10 +294,14 @@ enum myri10ge_mcp_cmd_type {
MXGEFW_CMD_GET_MAX_RSS_QUEUES,
MXGEFW_CMD_ENABLE_RSS_QUEUES,
/* data0 = number of slices n (0, 1, ..., n-1) to enable
* data1 = interrupt mode. 0=share one INTx/MSI, 1=use one MSI-X per queue.
* data1 = interrupt mode.
* 0=share one INTx/MSI, 1=use one MSI-X per queue.
* If all queues share one interrupt, the driver must have set
* RSS_SHARED_INTERRUPT_DMA before enabling queues.
*/
#define MXGEFW_SLICE_INTR_MODE_SHARED 0
#define MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE 1
MXGEFW_CMD_GET_RSS_SHARED_INTERRUPT_MASK_OFFSET,
MXGEFW_CMD_SET_RSS_SHARED_INTERRUPT_DMA,
/* data0, data1 = bus address lsw, msw */
@ -309,11 +318,15 @@ enum myri10ge_mcp_cmd_type {
* 0: disable rss. nic does not distribute receive packets.
* 1: enable rss. nic distributes receive packets among queues.
* data1 = hash type
* 1: IPV4
* 2: TCP_IPV4
* 3: IPV4 | TCP_IPV4
* 1: IPV4 (required by RSS)
* 2: TCP_IPV4 (required by RSS)
* 3: IPV4 | TCP_IPV4 (required by RSS)
* 4: source port
*/
#define MXGEFW_RSS_HASH_TYPE_IPV4 0x1
#define MXGEFW_RSS_HASH_TYPE_TCP_IPV4 0x2
#define MXGEFW_RSS_HASH_TYPE_SRC_PORT 0x4
MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE,
/* Return data = the max. size of the entire headers of a IPv6 TSO packet.
* If the header size of a IPv6 TSO packet is larger than the specified
@ -328,6 +341,8 @@ enum myri10ge_mcp_cmd_type {
* 0: Linux/FreeBSD style (NIC default)
* 1: NDIS/NetBSD style
*/
#define MXGEFW_TSO_MODE_LINUX 0
#define MXGEFW_TSO_MODE_NDIS 1
MXGEFW_CMD_MDIO_READ,
/* data0 = dev_addr (PMA/PMD or PCS ...), data1 = register/addr */
@ -343,12 +358,45 @@ enum myri10ge_mcp_cmd_type {
* During the i2c operation, MXGEFW_CMD_XFP_I2C_READ or MXGEFW_CMD_XFP_BYTE attempts
* will return MXGEFW_CMD_ERROR_BUSY
*/
MXGEFW_CMD_XFP_BYTE
MXGEFW_CMD_XFP_BYTE,
/* Return the last obtained copy of a given byte in the xfp i2c table
* (copy cached during the last relevant MXGEFW_CMD_XFP_I2C_READ)
* data0 : index of the desired table entry
* Return data = the byte stored at the requested index in the table
*/
MXGEFW_CMD_GET_VPUMP_OFFSET,
/* Return data = NIC memory offset of mcp_vpump_public_global */
MXGEFW_CMD_RESET_VPUMP,
/* Resets the VPUMP state */
MXGEFW_CMD_SET_RSS_MCP_SLOT_TYPE,
/* data0 = mcp_slot type to use.
* 0 = the default 4B mcp_slot
* 1 = 8B mcp_slot_8
*/
#define MXGEFW_RSS_MCP_SLOT_TYPE_MIN 0
#define MXGEFW_RSS_MCP_SLOT_TYPE_WITH_HASH 1
MXGEFW_CMD_SET_THROTTLE_FACTOR,
/* set the throttle factor for ethp_z8e
data0 = throttle_factor
throttle_factor = 256 * pcie-raw-speed / tx_speed
tx_speed = 256 * pcie-raw-speed / throttle_factor
For PCI-E x8: pcie-raw-speed == 16Gb/s
For PCI-E x4: pcie-raw-speed == 8Gb/s
ex1: throttle_factor == 0x1a0 (416), tx_speed == 1.23GB/s == 9.846 Gb/s
ex2: throttle_factor == 0x200 (512), tx_speed == 1.0GB/s == 8 Gb/s
with tx_boundary == 2048, max-throttle-factor == 8191 => min-speed == 500Mb/s
with tx_boundary == 4096, max-throttle-factor == 4095 => min-speed == 1Gb/s
*/
MXGEFW_CMD_VPUMP_UP
/* Allocates VPump Connection, Send Request and Zero copy buffer address tables */
};
typedef enum myri10ge_mcp_cmd_type myri10ge_mcp_cmd_type_t;
@ -406,6 +454,7 @@ struct mcp_irq_data {
typedef struct mcp_irq_data mcp_irq_data_t;
#ifdef MXGEFW_NDIS
/* Exclusively used by NDIS drivers */
struct mcp_rss_shared_interrupt {
uint8_t pad[2];
uint8_t queue;

View File

@ -0,0 +1,47 @@
/*
* from: FreeBSD: src/sys/tools/fw_stub.awk,v 1.6 2007/03/02 11:42:53 flz
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/errno.h>
#include <sys/kernel.h>
#include <sys/module.h>
#include <sys/linker.h>
#include <sys/firmware.h>
#include <sys/systm.h>
#include <dev/mxge/rss_eth_z8e.h>
static int
mxge_rss_eth_z8e_fw_modevent(module_t mod, int type, void *unused)
{
const struct firmware *fp, *parent;
int error;
switch (type) {
case MOD_LOAD:
fp = firmware_register("mxge_rss_eth_z8e", rss_eth_z8e,
(size_t)rss_eth_z8e_length,
rss_eth_z8e_uncompressed_length, NULL);
if (fp == NULL)
goto fail_0;
parent = fp;
return (0);
fail_0:
return (ENXIO);
case MOD_UNLOAD:
error = firmware_unregister("mxge_rss_eth_z8e");
return (error);
}
return (EINVAL);
}
static moduledata_t mxge_rss_eth_z8e_fw_mod = {
"mxge_rss_eth_z8e_fw",
mxge_rss_eth_z8e_fw_modevent,
0
};
DECLARE_MODULE(mxge_rss_eth_z8e_fw, mxge_rss_eth_z8e_fw_mod, SI_SUB_DRIVERS, SI_ORDER_FIRST);
MODULE_VERSION(mxge_rss_eth_z8e_fw, 1);
MODULE_DEPEND(mxge_rss_eth_z8e_fw, firmware, 1, 1, 1);

View File

@ -0,0 +1,47 @@
/*
* from: FreeBSD: src/sys/tools/fw_stub.awk,v 1.6 2007/03/02 11:42:53 flz
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/errno.h>
#include <sys/kernel.h>
#include <sys/module.h>
#include <sys/linker.h>
#include <sys/firmware.h>
#include <sys/systm.h>
#include <dev/mxge/rss_ethp_z8e.h>
static int
mxge_rss_ethp_z8e_fw_modevent(module_t mod, int type, void *unused)
{
const struct firmware *fp, *parent;
int error;
switch (type) {
case MOD_LOAD:
fp = firmware_register("mxge_rss_ethp_z8e", rss_ethp_z8e,
(size_t)rss_ethp_z8e_length,
rss_ethp_z8e_uncompressed_length, NULL);
if (fp == NULL)
goto fail_0;
parent = fp;
return (0);
fail_0:
return (ENXIO);
case MOD_UNLOAD:
error = firmware_unregister("mxge_rss_ethp_z8e");
return (error);
}
return (EINVAL);
}
static moduledata_t mxge_rss_ethp_z8e_fw_mod = {
"mxge_rss_ethp_z8e_fw",
mxge_rss_ethp_z8e_fw_modevent,
0
};
DECLARE_MODULE(mxge_rss_ethp_z8e_fw, mxge_rss_ethp_z8e_fw_mod, SI_SUB_DRIVERS, SI_ORDER_FIRST);
MODULE_VERSION(mxge_rss_ethp_z8e_fw, 1);
MODULE_DEPEND(mxge_rss_ethp_z8e_fw, firmware, 1, 1, 1);

15126
sys/dev/mxge/rss_eth_z8e.h Normal file

File diff suppressed because it is too large Load Diff

15207
sys/dev/mxge/rss_ethp_z8e.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -3,5 +3,7 @@
SUBDIR= mxge
SUBDIR+=mxge_eth_z8e
SUBDIR+=mxge_ethp_z8e
SUBDIR+=mxge_rss_eth_z8e
SUBDIR+=mxge_rss_ethp_z8e
.include <bsd.subdir.mk>

View File

@ -0,0 +1,7 @@
# $FreeBSD$
.PATH: ${.CURDIR}/../../../dev/mxge
KMOD= mxge_rss_eth_z8e
SRCS= mxge_rss_eth_z8e.c
.include <bsd.kmod.mk>

View File

@ -0,0 +1,7 @@
# $FreeBSD$
.PATH: ${.CURDIR}/../../../dev/mxge
KMOD= mxge_rss_ethp_z8e
SRCS= mxge_rss_ethp_z8e.c
.include <bsd.kmod.mk>