- Updated TOE support in the kernel.

- Stateful TCP offload drivers for Terminator 3 and 4 (T3 and T4) ASICs.
  These are available as t3_tom and t4_tom modules that augment cxgb(4)
  and cxgbe(4) respectively.  The cxgb/cxgbe drivers continue to work as
  usual with or without these extra features.

- iWARP driver for Terminator 3 ASIC (kernel verbs).  T4 iWARP in the
  works and will follow soon.

Build-tested with make universe.

30s overview
============
What interfaces support TCP offload?  Look for TOE4 and/or TOE6 in the
capabilities of an interface:
# ifconfig -m | grep TOE

Enable/disable TCP offload on an interface (just like any other ifnet
capability):
# ifconfig cxgbe0 toe
# ifconfig cxgbe0 -toe

Which connections are offloaded?  Look for toe4 and/or toe6 in the
output of netstat and sockstat:
# netstat -np tcp | grep toe
# sockstat -46c | grep toe

Reviewed by:	bz, gnn
Sponsored by:	Chelsio communications.
MFC after:	~3 months (after 9.1, and after ensuring MFC is feasible)
This commit is contained in:
Navdeep Parhar 2012-06-19 07:34:13 +00:00
parent 41b8cbda7d
commit 09fe63205c
104 changed files with 11036 additions and 13009 deletions

View File

@ -916,7 +916,7 @@ unsetifdescr(const char *val, int value, int s, const struct afswtch *afp)
#define IFCAPBITS \
"\020\1RXCSUM\2TXCSUM\3NETCONS\4VLAN_MTU\5VLAN_HWTAGGING\6JUMBO_MTU\7POLLING" \
"\10VLAN_HWCSUM\11TSO4\12TSO6\13LRO\14WOL_UCAST\15WOL_MCAST\16WOL_MAGIC" \
"\21VLAN_HWFILTER\23VLAN_HWTSO\24LINKSTATE\25NETMAP" \
"\17TOE4\20TOE6\21VLAN_HWFILTER\23VLAN_HWTSO\24LINKSTATE\25NETMAP" \
"\26RXCSUM_IPV6\27TXCSUM_IPV6"
/*
@ -1212,6 +1212,8 @@ static struct cmd basic_cmds[] = {
DEF_CMD("-tso4", -IFCAP_TSO4, setifcap),
DEF_CMD("tso", IFCAP_TSO, setifcap),
DEF_CMD("-tso", -IFCAP_TSO, setifcap),
DEF_CMD("toe", IFCAP_TOE, setifcap),
DEF_CMD("-toe", -IFCAP_TOE, setifcap),
DEF_CMD("lro", IFCAP_LRO, setifcap),
DEF_CMD("-lro", -IFCAP_LRO, setifcap),
DEF_CMD("wol", IFCAP_WOL, setifcap),

View File

@ -28,6 +28,7 @@ options SCHED_ULE # ULE scheduler
options PREEMPTION # Enable kernel thread preemption
options INET # InterNETworking
options INET6 # IPv6 communications protocols
options TCP_OFFLOAD # TCP offload
options SCTP # Stream Control Transmission Protocol
options FFS # Berkeley Fast Filesystem
options SOFTUPDATES # Enable FFS soft updates support

View File

@ -545,6 +545,8 @@ options INET6 #IPv6 communications protocols
options ROUTETABLES=2 # max 16. 1 is back compatible.
options TCP_OFFLOAD # TCP offload support.
# In order to enable IPSEC you MUST also add device crypto to
# your kernel configuration
options IPSEC #IP security (requires device crypto)

View File

@ -1038,8 +1038,6 @@ dev/cs/if_cs_isa.c optional cs isa
dev/cs/if_cs_pccard.c optional cs pccard
dev/cxgb/cxgb_main.c optional cxgb pci \
compile-with "${NORMAL_C} -I$S/dev/cxgb"
dev/cxgb/cxgb_offload.c optional cxgb pci \
compile-with "${NORMAL_C} -I$S/dev/cxgb"
dev/cxgb/cxgb_sge.c optional cxgb pci \
compile-with "${NORMAL_C} -I$S/dev/cxgb"
dev/cxgb/common/cxgb_mc5.c optional cxgb pci \
@ -3037,7 +3035,7 @@ netinet/tcp_hostcache.c optional inet | inet6
netinet/tcp_input.c optional inet | inet6
netinet/tcp_lro.c optional inet | inet6
netinet/tcp_output.c optional inet | inet6
netinet/tcp_offload.c optional inet | inet6
netinet/tcp_offload.c optional tcp_offload inet | tcp_offload inet6
netinet/tcp_reass.c optional inet | inet6
netinet/tcp_sack.c optional inet | inet6
netinet/tcp_subr.c optional inet | inet6

View File

@ -434,7 +434,7 @@ RADIX_MPATH opt_mpath.h
ROUTETABLES opt_route.h
SLIP_IFF_OPTS opt_slip.h
TCPDEBUG
TCP_OFFLOAD_DISABLE opt_inet.h #Disable code to dispatch tcp offloading
TCP_OFFLOAD opt_inet.h # Enable code to dispatch TCP offloading
TCP_SIGNATURE opt_inet.h
VLAN_ARRAY opt_vlan.h
XBONEHACK

View File

@ -41,7 +41,6 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/socket.h>
#include <sys/module.h>
#include <sys/endian.h>
#include <sys/limits.h>
#include <sys/proc.h>
@ -53,11 +52,13 @@ __FBSDID("$FreeBSD$");
#include <sys/queue.h>
#include <sys/taskqueue.h>
#include <sys/syslog.h>
#include <netinet/in.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#include <contrib/rdma/rdma_cm.h>
#include <linux/types.h>
#include <rdma/rdma_cm.h>
#include "getopt.h"
#include "krping.h"
@ -83,6 +84,7 @@ static const struct krping_option krping_opts[] = {
{"bw", OPT_NOPARAM, 'B'},
{"tx-depth", OPT_INT, 't'},
{"poll", OPT_NOPARAM, 'P'},
{"memlimit", OPT_INT, 'm'},
{NULL, 0, 0}
};
@ -254,10 +256,14 @@ static void krping_cq_event_handler(struct ib_cq *cq, void *ctx)
ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP);
while ((ret = ib_poll_cq(cb->cq, 1, &wc)) == 1) {
if (wc.status) {
if (wc.status != IB_WC_WR_FLUSH_ERR)
log(LOG_ERR, "cq completion failed status %d\n",
if (wc.status == IB_WC_WR_FLUSH_ERR) {
DEBUG_LOG("cq flushed\n");
continue;
} else {
log(LOG_CRIT, "cq completion failed status %d\n",
wc.status);
goto error;
goto error;
}
}
switch (wc.opcode) {
@ -432,8 +438,17 @@ static int krping_setup_buffers(struct krping_cb *cb)
}
}
cb->rdma_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, 0, -1UL,
PAGE_SIZE, 0);
/* RNIC adapters have a limit upto which it can register physical memory
* If DMA-MR memory mode is set then normally driver registers maximum
* supported memory. After that if contigmalloc allocates memory beyond the
* specified RNIC limit then Krping may not work.
*/
if (cb->use_dmamr && cb->memlimit)
cb->rdma_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, 0, cb->memlimit,
PAGE_SIZE, 0);
else
cb->rdma_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, 0, -1UL,
PAGE_SIZE, 0);
if (!cb->rdma_buf) {
log(LOG_ERR, "rdma_buf malloc failed\n");
@ -458,8 +473,12 @@ static int krping_setup_buffers(struct krping_cb *cb)
}
if (!cb->server || cb->wlat || cb->rlat || cb->bw) {
cb->start_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK,
0, -1UL, PAGE_SIZE, 0);
if (cb->use_dmamr && cb->memlimit)
cb->start_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK,
0, cb->memlimit, PAGE_SIZE, 0);
else
cb->start_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK,
0, -1UL, PAGE_SIZE, 0);
if (!cb->start_buf) {
log(LOG_ERR, "start_buf malloc failed\n");
ret = ENOMEM;
@ -1636,6 +1655,8 @@ int krping_doit(char *cmd)
cb->state = IDLE;
cb->size = 64;
cb->txdepth = RPING_SQ_DEPTH;
cb->use_dmamr = 1;
cb->memlimit = 0;
mtx_init(&cb->lock, "krping mtx", NULL, MTX_DUPOK|MTX_DEF);
while ((op = krping_getopt("krping", &cmd, krping_opts, NULL, &optarg,
@ -1713,6 +1734,15 @@ int krping_doit(char *cmd)
case 'd':
debug++;
break;
case 'm':
cb->memlimit = optint;
if (cb->memlimit < 1) {
log(LOG_ERR, "Invalid memory limit %ju\n",
cb->memlimit);
ret = EINVAL;
} else
DEBUG_LOG(PFX "memory limit %d\n", (int)optint);
break;
default:
log(LOG_ERR, "unknown opt %s\n", optarg);
ret = EINVAL;

View File

@ -1,7 +1,7 @@
/*
* $FreeBSD$
*/
#include <contrib/rdma/ib_verbs.h>
#include <rdma/ib_verbs.h>
#include <netinet/in.h>
/*
@ -92,6 +92,8 @@ struct krping_cb {
int count; /* ping count */
int size; /* ping data size */
int validate; /* validate ping data */
uint64_t memlimit; /* limit of the physical memory that
can be registered with dma_mr mode */
/* CM stuff */
struct rdma_cm_id *cm_id; /* connection on client side,*/

View File

@ -14,7 +14,6 @@
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/module.h>
#include <sys/systm.h> /* uprintf */
#include <sys/errno.h>
#include <sys/param.h> /* defines used in kernel.h */
@ -51,6 +50,9 @@ typedef struct s_krping {
/* vars */
static struct cdev *krping_dev;
#undef MODULE_VERSION
#include <sys/module.h>
static int
krping_loader(struct module *m, int what, void *arg)
{
@ -175,6 +177,4 @@ krping_write(struct cdev *dev, struct uio *uio, int ioflag)
return(err);
}
MODULE_DEPEND(krping, rdma_core, 1, 1, 1);
MODULE_DEPEND(krping, rdma_cma, 1, 1, 1);
DEV_MODULE(krping,krping_loader,NULL);

View File

@ -117,7 +117,8 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct ifnet *dev,
const unsigned char *dst_dev_addr)
{
dev_addr->dev_type = RDMA_NODE_RNIC;
memcpy(dev_addr->src_dev_addr, IF_LLADDR(dev), MAX_ADDR_LEN);
memset(dev_addr->src_dev_addr, 0, MAX_ADDR_LEN);
memcpy(dev_addr->src_dev_addr, IF_LLADDR(dev), dev->if_addrlen);
memcpy(dev_addr->broadcast, dev->if_broadcastaddr, MAX_ADDR_LEN);
if (dst_dev_addr)
memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
@ -207,7 +208,7 @@ static int addr_resolve_remote(struct sockaddr_in *src_in,
goto put;
}
ret = arpresolve(iproute.ro_rt->rt_ifp, iproute.ro_rt, NULL,
rt_key(iproute.ro_rt), dmac, &lle);
(struct sockaddr *)dst_in, dmac, &lle);
if (ret) {
goto put;
}

View File

@ -132,7 +132,7 @@ int ib_find_cached_gid(struct ib_device *device,
for (p = 0; p <= end_port(device) - start_port(device); ++p) {
cache = device->cache.gid_cache[p];
for (i = 0; i < cache->table_len; ++i) {
if (!memcmp(gid, &cache->table[i], 6)) { /* XXX */
if (!memcmp(gid, &cache->table[i], sizeof *gid)) {
*port_num = p + start_port(device);
if (index)
*index = i;

View File

@ -60,14 +60,12 @@ struct mtutab {
const unsigned short *mtus; /* the MTU table values */
};
struct net_device;
/*
* Structure used to request the adapter net_device owning a given MAC address.
* Structure used to request the ifnet that owns a given MAC address.
*/
struct iff_mac {
struct net_device *dev; /* the net_device */
const unsigned char *mac_addr; /* MAC address to lookup */
struct ifnet *dev;
const unsigned char *mac_addr;
u16 vlan_tag;
};
@ -85,7 +83,7 @@ struct ddp_params {
struct adap_ports {
unsigned int nports; /* number of ports on this adapter */
struct net_device *lldevs[MAX_NPORTS];
struct ifnet *lldevs[MAX_NPORTS];
};
/*

View File

@ -57,7 +57,6 @@ $FreeBSD$
#include <dev/pci/pcivar.h>
#include <cxgb_osdep.h>
#include <t3cdev.h>
#include <sys/mbufq.h>
struct adapter;
@ -130,6 +129,7 @@ enum {
CXGB_OFLD_INIT = (1 << 7),
TP_PARITY_INIT = (1 << 8),
CXGB_BUSY = (1 << 9),
TOM_INIT_DONE = (1 << 10),
/* port flags */
DOOMED = (1 << 0),
@ -179,7 +179,6 @@ struct sge_rspq {
uint32_t async_notif;
uint32_t cntxt_id;
uint32_t offload_pkts;
uint32_t offload_bundles;
uint32_t pure_rsps;
uint32_t unhandled_irqs;
uint32_t starved;
@ -291,6 +290,7 @@ struct sge_qset {
uint32_t txq_stopped; /* which Tx queues are stopped */
uint64_t port_stats[SGE_PSTAT_MAX];
struct port_info *port;
struct adapter *adap;
int idx; /* qset # */
int qs_flags;
int coalescing;
@ -307,10 +307,13 @@ struct sge {
struct filter_info;
typedef int (*cpl_handler_t)(struct sge_qset *, struct rsp_desc *,
struct mbuf *);
struct adapter {
SLIST_ENTRY(adapter) link;
device_t dev;
int flags;
TAILQ_ENTRY(adapter) adapter_entry;
/* PCI register resources */
int regs_rid;
@ -376,11 +379,16 @@ struct adapter {
struct port_info port[MAX_NPORTS];
device_t portdev[MAX_NPORTS];
struct t3cdev tdev;
#ifdef TCP_OFFLOAD
void *tom_softc;
void *iwarp_softc;
#endif
char fw_version[64];
char port_types[MAX_NPORTS + 1];
uint32_t open_device_map;
uint32_t registered_device_map;
#ifdef TCP_OFFLOAD
int offload_map;
#endif
struct mtx lock;
driver_intr_t *cxgb_intr;
int msi_count;
@ -392,6 +400,11 @@ struct adapter {
char elmerlockbuf[ADAPTER_LOCK_NAME_LEN];
int timestamp;
#ifdef TCP_OFFLOAD
#define NUM_CPL_HANDLERS 0xa7
cpl_handler_t cpl_handler[NUM_CPL_HANDLERS] __aligned(CACHE_LINE_SIZE);
#endif
};
struct t3_rx_mode {
@ -502,10 +515,12 @@ void t3_os_link_changed(adapter_t *adapter, int port_id, int link_status,
int speed, int duplex, int fc, int mac_was_reset);
void t3_os_phymod_changed(struct adapter *adap, int port_id);
void t3_sge_err_intr_handler(adapter_t *adapter);
int t3_offload_tx(struct t3cdev *, struct mbuf *);
#ifdef TCP_OFFLOAD
int t3_offload_tx(struct adapter *, struct mbuf *);
#endif
void t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[]);
int t3_mgmt_tx(adapter_t *adap, struct mbuf *m);
int t3_register_cpl_handler(struct adapter *, int, cpl_handler_t);
int t3_sge_alloc(struct adapter *);
int t3_sge_free(struct adapter *);
@ -556,15 +571,9 @@ txq_to_qset(struct sge_txq *q, int qidx)
return container_of(q, struct sge_qset, txq[qidx]);
}
static __inline struct adapter *
tdev2adap(struct t3cdev *d)
{
return container_of(d, struct adapter, tdev);
}
#undef container_of
#define OFFLOAD_DEVMAP_BIT 15
#define OFFLOAD_DEVMAP_BIT (1 << MAX_NPORTS)
static inline int offload_running(adapter_t *adapter)
{
return isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
@ -573,4 +582,5 @@ static inline int offload_running(adapter_t *adapter)
void cxgb_tx_watchdog(void *arg);
int cxgb_transmit(struct ifnet *ifp, struct mbuf *m);
void cxgb_qflush(struct ifnet *ifp);
void t3_iterate(void (*)(struct adapter *, void *), void *);
#endif

View File

@ -30,6 +30,8 @@ POSSIBILITY OF SUCH DAMAGE.
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
@ -107,6 +109,9 @@ static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
unsigned int, u64, u64);
static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
unsigned int, u64, u64);
#ifdef TCP_OFFLOAD
static int cpl_not_handled(struct sge_qset *, struct rsp_desc *, struct mbuf *);
#endif
/* Attachment glue for the PCI controller end of the device. Each port of
* the device is attached separately, as defined later.
@ -119,10 +124,11 @@ static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned i
unsigned int end);
static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
static int cxgb_get_regs_len(void);
static int offload_open(struct port_info *pi);
static void touch_bars(device_t dev);
static int offload_close(struct t3cdev *tdev);
static void cxgb_update_mac_settings(struct port_info *p);
#ifdef TCP_OFFLOAD
static int toe_capability(struct port_info *, int);
#endif
static device_method_t cxgb_controller_methods[] = {
DEVMETHOD(device_probe, cxgb_controller_probe),
@ -138,8 +144,11 @@ static driver_t cxgb_controller_driver = {
sizeof(struct adapter)
};
static int cxgbc_mod_event(module_t, int, void *);
static devclass_t cxgb_controller_devclass;
DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass,
cxgbc_mod_event, 0);
MODULE_VERSION(cxgbc, 1);
/*
* Attachment glue for the ports. Attachment is done directly to the
@ -177,6 +186,14 @@ static struct cdevsw cxgb_cdevsw = {
static devclass_t cxgb_port_devclass;
DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
MODULE_VERSION(cxgb, 1);
static struct mtx t3_list_lock;
static SLIST_HEAD(, adapter) t3_list;
#ifdef TCP_OFFLOAD
static struct mtx t3_uld_list_lock;
static SLIST_HEAD(, uld_info) t3_uld_list;
#endif
/*
* The driver uses the best interrupt scheme available on a platform in the
@ -194,15 +211,6 @@ SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
SYSCTL_INT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
"MSI-X, MSI, INTx selector");
/*
* The driver enables offload as a default.
* To disable it, use ofld_disable = 1.
*/
static int ofld_disable = 0;
TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
SYSCTL_INT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
"disable ULP offload");
/*
* The driver uses an auto-queue algorithm by default.
* To disable it and force a single queue-set per port, use multiq = 0
@ -445,6 +453,25 @@ cxgb_controller_attach(device_t dev)
sc->msi_count = 0;
ai = cxgb_get_adapter_info(dev);
snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
device_get_unit(dev));
ADAPTER_LOCK_INIT(sc, sc->lockbuf);
snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
device_get_unit(dev));
snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
device_get_unit(dev));
snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
device_get_unit(dev));
MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
mtx_lock(&t3_list_lock);
SLIST_INSERT_HEAD(&t3_list, sc, link);
mtx_unlock(&t3_list_lock);
/* find the PCIe link width and set max read request to 4KB*/
if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
uint16_t lnk;
@ -471,24 +498,10 @@ cxgb_controller_attach(device_t dev)
if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
&sc->regs_rid, RF_ACTIVE)) == NULL) {
device_printf(dev, "Cannot allocate BAR region 0\n");
return (ENXIO);
error = ENXIO;
goto out;
}
snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
device_get_unit(dev));
ADAPTER_LOCK_INIT(sc, sc->lockbuf);
snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
device_get_unit(dev));
snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
device_get_unit(dev));
snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
device_get_unit(dev));
MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
sc->bt = rman_get_bustag(sc->regs_res);
sc->bh = rman_get_bushandle(sc->regs_res);
sc->mmio_len = rman_get_size(sc->regs_res);
@ -604,7 +617,7 @@ cxgb_controller_attach(device_t dev)
} else {
sc->flags |= TPS_UPTODATE;
}
/*
* Create a child device for each MAC. The ethernet attachment
* will be done in these children.
@ -636,12 +649,7 @@ cxgb_controller_attach(device_t dev)
t3_sge_init_adapter(sc);
t3_led_ready(sc);
cxgb_offload_init();
if (is_offload(sc)) {
setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
cxgb_adapter_ofld(sc);
}
error = t3_get_fw_version(sc, &vers);
if (error)
goto out;
@ -662,6 +670,11 @@ cxgb_controller_attach(device_t dev)
device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
t3_add_attach_sysctls(sc);
#ifdef TCP_OFFLOAD
for (i = 0; i < NUM_CPL_HANDLERS; i++)
sc->cpl_handler[i] = cpl_not_handled;
#endif
out:
if (error)
cxgb_free(sc);
@ -775,20 +788,9 @@ cxgb_free(struct adapter *sc)
sc->tq = NULL;
}
if (is_offload(sc)) {
clrbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
cxgb_adapter_unofld(sc);
}
#ifdef notyet
if (sc->flags & CXGB_OFLD_INIT)
cxgb_offload_deactivate(sc);
#endif
free(sc->filters, M_DEVBUF);
t3_sge_free(sc);
cxgb_offload_exit();
if (sc->udbs_res != NULL)
bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
sc->udbs_res);
@ -800,6 +802,9 @@ cxgb_free(struct adapter *sc)
MTX_DESTROY(&sc->mdio_lock);
MTX_DESTROY(&sc->sge.reg_lock);
MTX_DESTROY(&sc->elmer_lock);
mtx_lock(&t3_list_lock);
SLIST_REMOVE(&t3_list, sc, adapter, link);
mtx_unlock(&t3_list_lock);
ADAPTER_LOCK_DEINIT(sc);
}
@ -1017,6 +1022,10 @@ cxgb_port_attach(device_t dev)
ifp->if_qflush = cxgb_qflush;
ifp->if_capabilities = CXGB_CAP;
#ifdef TCP_OFFLOAD
if (is_offload(sc))
ifp->if_capabilities |= IFCAP_TOE4;
#endif
ifp->if_capenable = CXGB_CAP_ENABLE;
ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO;
@ -1420,65 +1429,6 @@ setup_rss(adapter_t *adap)
cpus, rspq_map);
}
/*
* Sends an mbuf to an offload queue driver
* after dealing with any active network taps.
*/
static inline int
offload_tx(struct t3cdev *tdev, struct mbuf *m)
{
int ret;
ret = t3_offload_tx(tdev, m);
return (ret);
}
static int
write_smt_entry(struct adapter *adapter, int idx)
{
struct port_info *pi = &adapter->port[idx];
struct cpl_smt_write_req *req;
struct mbuf *m;
if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
return (ENOMEM);
req = mtod(m, struct cpl_smt_write_req *);
m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
req->mtu_idx = NMTUS - 1; /* should be 0 but there's a T3 bug */
req->iff = idx;
memset(req->src_mac1, 0, sizeof(req->src_mac1));
memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
m_set_priority(m, 1);
offload_tx(&adapter->tdev, m);
return (0);
}
static int
init_smt(struct adapter *adapter)
{
int i;
for_each_port(adapter, i)
write_smt_entry(adapter, i);
return 0;
}
static void
init_port_mtus(adapter_t *adapter)
{
unsigned int mtus = ETHERMTU | (ETHERMTU << 16);
t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
}
static void
send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
int hi, int port)
@ -1705,45 +1655,6 @@ cxgb_down(struct adapter *sc)
t3_intr_disable(sc);
}
static int
offload_open(struct port_info *pi)
{
struct adapter *sc = pi->adapter;
struct t3cdev *tdev = &sc->tdev;
setbit(&sc->open_device_map, OFFLOAD_DEVMAP_BIT);
t3_tp_set_offload_mode(sc, 1);
tdev->lldev = pi->ifp;
init_port_mtus(sc);
t3_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd,
sc->params.rev == 0 ? sc->port[0].ifp->if_mtu : 0xffff);
init_smt(sc);
cxgb_add_clients(tdev);
return (0);
}
static int
offload_close(struct t3cdev *tdev)
{
struct adapter *adapter = tdev2adap(tdev);
if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
return (0);
/* Call back all registered clients */
cxgb_remove_clients(tdev);
tdev->lldev = NULL;
cxgb_set_dummy_ops(tdev);
t3_tp_set_offload_mode(adapter, 0);
clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
return (0);
}
/*
* if_init for cxgb ports.
*/
@ -1793,15 +1704,9 @@ cxgb_init_locked(struct port_info *p)
ADAPTER_UNLOCK(sc);
}
if (sc->open_device_map == 0) {
if ((rc = cxgb_up(sc)) != 0)
if (sc->open_device_map == 0 && ((rc = cxgb_up(sc)) != 0))
goto done;
if (is_offload(sc) && !ofld_disable && offload_open(p))
log(LOG_WARNING,
"Could not initialize offload capabilities\n");
}
PORT_LOCK(p);
if (isset(&sc->open_device_map, p->port_id) &&
(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
@ -1929,7 +1834,6 @@ cxgb_uninit_synchronized(struct port_info *pi)
DELAY(100 * 1000);
t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
pi->phy.ops->power_down(&pi->phy, 1);
PORT_UNLOCK(pi);
@ -1937,9 +1841,6 @@ cxgb_uninit_synchronized(struct port_info *pi)
pi->link_config.link_ok = 0;
t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
if ((sc->open_device_map & PORT_MASK) == 0)
offload_close(&sc->tdev);
if (sc->open_device_map == 0)
cxgb_down(pi->adapter);
@ -2081,6 +1982,15 @@ fail:
/* Safe to do this even if cxgb_up not called yet */
cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
}
#ifdef TCP_OFFLOAD
if (mask & IFCAP_TOE4) {
int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE4;
error = toe_capability(p, enable);
if (error == 0)
ifp->if_capenable ^= mask;
}
#endif
if (mask & IFCAP_VLAN_HWTAGGING) {
ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
@ -3362,3 +3272,235 @@ set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
mk_set_tcb_field(req, tid, word, mask, val);
}
void
t3_iterate(void (*func)(struct adapter *, void *), void *arg)
{
struct adapter *sc;
mtx_lock(&t3_list_lock);
SLIST_FOREACH(sc, &t3_list, link) {
/*
* func should not make any assumptions about what state sc is
* in - the only guarantee is that sc->sc_lock is a valid lock.
*/
func(sc, arg);
}
mtx_unlock(&t3_list_lock);
}
#ifdef TCP_OFFLOAD
static int
toe_capability(struct port_info *pi, int enable)
{
int rc;
struct adapter *sc = pi->adapter;
ADAPTER_LOCK_ASSERT_OWNED(sc);
if (!is_offload(sc))
return (ENODEV);
if (enable) {
if (!(sc->flags & FULL_INIT_DONE)) {
log(LOG_WARNING,
"You must enable a cxgb interface first\n");
return (EAGAIN);
}
if (isset(&sc->offload_map, pi->port_id))
return (0);
if (!(sc->flags & TOM_INIT_DONE)) {
rc = t3_activate_uld(sc, ULD_TOM);
if (rc == EAGAIN) {
log(LOG_WARNING,
"You must kldload t3_tom.ko before trying "
"to enable TOE on a cxgb interface.\n");
}
if (rc != 0)
return (rc);
KASSERT(sc->tom_softc != NULL,
("%s: TOM activated but softc NULL", __func__));
KASSERT(sc->flags & TOM_INIT_DONE,
("%s: TOM activated but flag not set", __func__));
}
setbit(&sc->offload_map, pi->port_id);
/*
* XXX: Temporary code to allow iWARP to be enabled when TOE is
* enabled on any port. Need to figure out how to enable,
* disable, load, and unload iWARP cleanly.
*/
if (!isset(&sc->offload_map, MAX_NPORTS) &&
t3_activate_uld(sc, ULD_IWARP) == 0)
setbit(&sc->offload_map, MAX_NPORTS);
} else {
if (!isset(&sc->offload_map, pi->port_id))
return (0);
KASSERT(sc->flags & TOM_INIT_DONE,
("%s: TOM never initialized?", __func__));
clrbit(&sc->offload_map, pi->port_id);
}
return (0);
}
/*
* Add an upper layer driver to the global list.
*/
int
t3_register_uld(struct uld_info *ui)
{
int rc = 0;
struct uld_info *u;
mtx_lock(&t3_uld_list_lock);
SLIST_FOREACH(u, &t3_uld_list, link) {
if (u->uld_id == ui->uld_id) {
rc = EEXIST;
goto done;
}
}
SLIST_INSERT_HEAD(&t3_uld_list, ui, link);
ui->refcount = 0;
done:
mtx_unlock(&t3_uld_list_lock);
return (rc);
}
int
t3_unregister_uld(struct uld_info *ui)
{
int rc = EINVAL;
struct uld_info *u;
mtx_lock(&t3_uld_list_lock);
SLIST_FOREACH(u, &t3_uld_list, link) {
if (u == ui) {
if (ui->refcount > 0) {
rc = EBUSY;
goto done;
}
SLIST_REMOVE(&t3_uld_list, ui, uld_info, link);
rc = 0;
goto done;
}
}
done:
mtx_unlock(&t3_uld_list_lock);
return (rc);
}
int
t3_activate_uld(struct adapter *sc, int id)
{
int rc = EAGAIN;
struct uld_info *ui;
mtx_lock(&t3_uld_list_lock);
SLIST_FOREACH(ui, &t3_uld_list, link) {
if (ui->uld_id == id) {
rc = ui->activate(sc);
if (rc == 0)
ui->refcount++;
goto done;
}
}
done:
mtx_unlock(&t3_uld_list_lock);
return (rc);
}
int
t3_deactivate_uld(struct adapter *sc, int id)
{
int rc = EINVAL;
struct uld_info *ui;
mtx_lock(&t3_uld_list_lock);
SLIST_FOREACH(ui, &t3_uld_list, link) {
if (ui->uld_id == id) {
rc = ui->deactivate(sc);
if (rc == 0)
ui->refcount--;
goto done;
}
}
done:
mtx_unlock(&t3_uld_list_lock);
return (rc);
}
static int
cpl_not_handled(struct sge_qset *qs __unused, struct rsp_desc *r __unused,
struct mbuf *m)
{
m_freem(m);
return (EDOOFUS);
}
int
t3_register_cpl_handler(struct adapter *sc, int opcode, cpl_handler_t h)
{
uintptr_t *loc, new;
if (opcode >= NUM_CPL_HANDLERS)
return (EINVAL);
new = h ? (uintptr_t)h : (uintptr_t)cpl_not_handled;
loc = (uintptr_t *) &sc->cpl_handler[opcode];
atomic_store_rel_ptr(loc, new);
return (0);
}
#endif
static int
cxgbc_mod_event(module_t mod, int cmd, void *arg)
{
int rc = 0;
switch (cmd) {
case MOD_LOAD:
mtx_init(&t3_list_lock, "T3 adapters", 0, MTX_DEF);
SLIST_INIT(&t3_list);
#ifdef TCP_OFFLOAD
mtx_init(&t3_uld_list_lock, "T3 ULDs", 0, MTX_DEF);
SLIST_INIT(&t3_uld_list);
#endif
break;
case MOD_UNLOAD:
#ifdef TCP_OFFLOAD
mtx_lock(&t3_uld_list_lock);
if (!SLIST_EMPTY(&t3_uld_list)) {
rc = EBUSY;
mtx_unlock(&t3_uld_list_lock);
break;
}
mtx_unlock(&t3_uld_list_lock);
mtx_destroy(&t3_uld_list_lock);
#endif
mtx_lock(&t3_list_lock);
if (!SLIST_EMPTY(&t3_list)) {
rc = EBUSY;
mtx_unlock(&t3_list_lock);
break;
}
mtx_unlock(&t3_list_lock);
mtx_destroy(&t3_list_lock);
break;
}
return (rc);
}

View File

@ -1,465 +0,0 @@
/**************************************************************************
Copyright (c) 2007-2008, Chelsio Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Neither the name of the Chelsio Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***************************************************************************/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/module.h>
#include <sys/pciio.h>
#include <sys/conf.h>
#include <machine/bus.h>
#include <machine/resource.h>
#include <sys/bus_dma.h>
#include <sys/rman.h>
#include <sys/ioccom.h>
#include <sys/mbuf.h>
#include <sys/linker.h>
#include <sys/firmware.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/queue.h>
#include <sys/taskqueue.h>
#include <sys/proc.h>
#include <cxgb_include.h>
#include <net/route.h>
#define VALIDATE_TID 0
MALLOC_DEFINE(M_CXGB, "cxgb", "Chelsio 10 Gigabit Ethernet and services");
TAILQ_HEAD(, cxgb_client) client_list;
TAILQ_HEAD(, t3cdev) ofld_dev_list;
static struct mtx cxgb_db_lock;
static int inited = 0;
static inline int
offload_activated(struct t3cdev *tdev)
{
struct adapter *adapter = tdev2adap(tdev);
return (isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT));
}
static inline void
register_tdev(struct t3cdev *tdev)
{
static int unit;
mtx_lock(&cxgb_db_lock);
snprintf(tdev->name, sizeof(tdev->name), "ofld_dev%d", unit++);
TAILQ_INSERT_TAIL(&ofld_dev_list, tdev, entry);
mtx_unlock(&cxgb_db_lock);
}
static inline void
unregister_tdev(struct t3cdev *tdev)
{
if (!inited)
return;
mtx_lock(&cxgb_db_lock);
TAILQ_REMOVE(&ofld_dev_list, tdev, entry);
mtx_unlock(&cxgb_db_lock);
}
#ifndef TCP_OFFLOAD_DISABLE
/**
* cxgb_register_client - register an offload client
* @client: the client
*
* Add the client to the client list,
* and call backs the client for each activated offload device
*/
void
cxgb_register_client(struct cxgb_client *client)
{
struct t3cdev *tdev;
mtx_lock(&cxgb_db_lock);
TAILQ_INSERT_TAIL(&client_list, client, client_entry);
if (client->add) {
TAILQ_FOREACH(tdev, &ofld_dev_list, entry) {
if (offload_activated(tdev)) {
client->add(tdev);
} else
CTR1(KTR_CXGB,
"cxgb_register_client: %p not activated", tdev);
}
}
mtx_unlock(&cxgb_db_lock);
}
/**
* cxgb_unregister_client - unregister an offload client
* @client: the client
*
* Remove the client to the client list,
* and call backs the client for each activated offload device.
*/
void
cxgb_unregister_client(struct cxgb_client *client)
{
struct t3cdev *tdev;
mtx_lock(&cxgb_db_lock);
TAILQ_REMOVE(&client_list, client, client_entry);
if (client->remove) {
TAILQ_FOREACH(tdev, &ofld_dev_list, entry) {
if (offload_activated(tdev))
client->remove(tdev);
}
}
mtx_unlock(&cxgb_db_lock);
}
/**
* cxgb_add_clients - activate register clients for an offload device
* @tdev: the offload device
*
* Call backs all registered clients once a offload device is activated
*/
void
cxgb_add_clients(struct t3cdev *tdev)
{
struct cxgb_client *client;
mtx_lock(&cxgb_db_lock);
TAILQ_FOREACH(client, &client_list, client_entry) {
if (client->add)
client->add(tdev);
}
mtx_unlock(&cxgb_db_lock);
}
/**
* cxgb_remove_clients - activate register clients for an offload device
* @tdev: the offload device
*
* Call backs all registered clients once a offload device is deactivated
*/
void
cxgb_remove_clients(struct t3cdev *tdev)
{
struct cxgb_client *client;
mtx_lock(&cxgb_db_lock);
TAILQ_FOREACH(client, &client_list, client_entry) {
if (client->remove)
client->remove(tdev);
}
mtx_unlock(&cxgb_db_lock);
}
#endif
/**
* cxgb_ofld_recv - process n received offload packets
* @dev: the offload device
* @m: an array of offload packets
* @n: the number of offload packets
*
* Process an array of ingress offload packets. Each packet is forwarded
* to any active network taps and then passed to the offload device's receive
* method. We optimize passing packets to the receive method by passing
* it the whole array at once except when there are active taps.
*/
int
cxgb_ofld_recv(struct t3cdev *dev, struct mbuf **m, int n)
{
return dev->recv(dev, m, n);
}
/*
* Dummy handler for Rx offload packets in case we get an offload packet before
* proper processing is setup. This complains and drops the packet as it isn't
* normal to get offload packets at this stage.
*/
static int
rx_offload_blackhole(struct t3cdev *dev, struct mbuf **m, int n)
{
while (n--)
m_freem(m[n]);
return 0;
}
static void
dummy_neigh_update(struct t3cdev *dev, struct rtentry *neigh, uint8_t *enaddr,
struct sockaddr *sa)
{
}
void
cxgb_set_dummy_ops(struct t3cdev *dev)
{
dev->recv = rx_offload_blackhole;
dev->arp_update = dummy_neigh_update;
}
static int
do_smt_write_rpl(struct t3cdev *dev, struct mbuf *m)
{
struct cpl_smt_write_rpl *rpl = cplhdr(m);
if (rpl->status != CPL_ERR_NONE)
log(LOG_ERR,
"Unexpected SMT_WRITE_RPL status %u for entry %u\n",
rpl->status, GET_TID(rpl));
return CPL_RET_BUF_DONE;
}
static int
do_l2t_write_rpl(struct t3cdev *dev, struct mbuf *m)
{
struct cpl_l2t_write_rpl *rpl = cplhdr(m);
if (rpl->status != CPL_ERR_NONE)
log(LOG_ERR,
"Unexpected L2T_WRITE_RPL status %u for entry %u\n",
rpl->status, GET_TID(rpl));
return CPL_RET_BUF_DONE;
}
static int
do_rte_write_rpl(struct t3cdev *dev, struct mbuf *m)
{
struct cpl_rte_write_rpl *rpl = cplhdr(m);
if (rpl->status != CPL_ERR_NONE)
log(LOG_ERR,
"Unexpected L2T_WRITE_RPL status %u for entry %u\n",
rpl->status, GET_TID(rpl));
return CPL_RET_BUF_DONE;
}
static int
do_set_tcb_rpl(struct t3cdev *dev, struct mbuf *m)
{
struct cpl_set_tcb_rpl *rpl = cplhdr(m);
if (rpl->status != CPL_ERR_NONE)
log(LOG_ERR,
"Unexpected SET_TCB_RPL status %u for tid %u\n",
rpl->status, GET_TID(rpl));
return CPL_RET_BUF_DONE;
}
static int
do_trace(struct t3cdev *dev, struct mbuf *m)
{
#if 0
struct cpl_trace_pkt *p = cplhdr(m);
skb->protocol = 0xffff;
skb->dev = dev->lldev;
skb_pull(skb, sizeof(*p));
skb->mac.raw = mtod(m, (char *));
netif_receive_skb(skb);
#endif
return 0;
}
/*
* Process a received packet with an unknown/unexpected CPL opcode.
*/
static int
do_bad_cpl(struct t3cdev *dev, struct mbuf *m)
{
log(LOG_ERR, "%s: received bad CPL command 0x%x\n", dev->name,
0xFF & *mtod(m, uint32_t *));
return (CPL_RET_BUF_DONE | CPL_RET_BAD_MSG);
}
/*
* Handlers for each CPL opcode
*/
static cpl_handler_func cpl_handlers[256];
/*
* T3CDEV's receive method.
*/
int
process_rx(struct t3cdev *dev, struct mbuf **m, int n)
{
while (n--) {
struct mbuf *m0 = *m++;
unsigned int opcode = G_OPCODE(ntohl(m0->m_pkthdr.csum_data));
int ret;
DPRINTF("processing op=0x%x m=%p data=%p\n", opcode, m0, m0->m_data);
ret = cpl_handlers[opcode] (dev, m0);
#if VALIDATE_TID
if (ret & CPL_RET_UNKNOWN_TID) {
union opcode_tid *p = cplhdr(m0);
log(LOG_ERR, "%s: CPL message (opcode %u) had "
"unknown TID %u\n", dev->name, opcode,
G_TID(ntohl(p->opcode_tid)));
}
#endif
if (ret & CPL_RET_BUF_DONE)
m_freem(m0);
}
return 0;
}
/*
* Add a new handler to the CPL dispatch table. A NULL handler may be supplied
* to unregister an existing handler.
*/
void
t3_register_cpl_handler(unsigned int opcode, cpl_handler_func h)
{
if (opcode < NUM_CPL_CMDS)
cpl_handlers[opcode] = h ? h : do_bad_cpl;
else
log(LOG_ERR, "T3C: handler registration for "
"opcode %x failed\n", opcode);
}
/*
* Allocate a chunk of memory using kmalloc or, if that fails, vmalloc.
* The allocated memory is cleared.
*/
void *
cxgb_alloc_mem(unsigned long size)
{
return malloc(size, M_CXGB, M_ZERO|M_NOWAIT);
}
/*
* Free memory allocated through t3_alloc_mem().
*/
void
cxgb_free_mem(void *addr)
{
free(addr, M_CXGB);
}
static __inline int
adap2type(struct adapter *adapter)
{
int type = 0;
switch (adapter->params.rev) {
case T3_REV_A:
type = T3A;
break;
case T3_REV_B:
case T3_REV_B2:
type = T3B;
break;
case T3_REV_C:
type = T3C;
break;
}
return type;
}
void
cxgb_adapter_ofld(struct adapter *adapter)
{
struct t3cdev *tdev = &adapter->tdev;
cxgb_set_dummy_ops(tdev);
tdev->type = adap2type(adapter);
tdev->adapter = adapter;
register_tdev(tdev);
}
void
cxgb_adapter_unofld(struct adapter *adapter)
{
struct t3cdev *tdev = &adapter->tdev;
tdev->recv = NULL;
tdev->arp_update = NULL;
unregister_tdev(tdev);
}
void
cxgb_offload_init(void)
{
int i;
if (inited++)
return;
mtx_init(&cxgb_db_lock, "ofld db", NULL, MTX_DEF);
TAILQ_INIT(&client_list);
TAILQ_INIT(&ofld_dev_list);
for (i = 0; i < 0x100; ++i)
cpl_handlers[i] = do_bad_cpl;
t3_register_cpl_handler(CPL_SMT_WRITE_RPL, do_smt_write_rpl);
t3_register_cpl_handler(CPL_RTE_WRITE_RPL, do_rte_write_rpl);
t3_register_cpl_handler(CPL_L2T_WRITE_RPL, do_l2t_write_rpl);
t3_register_cpl_handler(CPL_SET_TCB_RPL, do_set_tcb_rpl);
t3_register_cpl_handler(CPL_TRACE_PKT, do_trace);
}
void
cxgb_offload_exit(void)
{
if (--inited)
return;
mtx_destroy(&cxgb_db_lock);
}
MODULE_VERSION(if_cxgb, 1);

View File

@ -1,4 +1,3 @@
/**************************************************************************
Copyright (c) 2007-2008, Chelsio Inc.
@ -33,221 +32,93 @@ $FreeBSD$
#ifndef _CXGB_OFFLOAD_H
#define _CXGB_OFFLOAD_H
#include <common/cxgb_tcb.h>
#include <t3cdev.h>
MALLOC_DECLARE(M_CXGB);
struct adapter;
struct cxgb_client;
void cxgb_offload_init(void);
void cxgb_offload_exit(void);
void cxgb_adapter_ofld(struct adapter *adapter);
void cxgb_adapter_unofld(struct adapter *adapter);
int cxgb_offload_activate(struct adapter *adapter);
void cxgb_offload_deactivate(struct adapter *adapter);
int cxgb_ofld_recv(struct t3cdev *dev, struct mbuf **m, int n);
void cxgb_set_dummy_ops(struct t3cdev *dev);
/*
* Client registration. Users of T3 driver must register themselves.
* The T3 driver will call the add function of every client for each T3
* adapter activated, passing up the t3cdev ptr. Each client fills out an
* array of callback functions to process CPL messages.
*/
void cxgb_register_client(struct cxgb_client *client);
void cxgb_unregister_client(struct cxgb_client *client);
void cxgb_add_clients(struct t3cdev *tdev);
void cxgb_remove_clients(struct t3cdev *tdev);
typedef int (*cxgb_cpl_handler_func)(struct t3cdev *dev,
struct mbuf *m, void *ctx);
struct l2t_entry;
struct cxgb_client {
char *name;
void (*add) (struct t3cdev *);
void (*remove) (struct t3cdev *);
cxgb_cpl_handler_func *handlers;
int (*redirect)(void *ctx, struct rtentry *old,
struct rtentry *new,
struct l2t_entry *l2t);
TAILQ_ENTRY(cxgb_client) client_entry;
#ifdef TCP_OFFLOAD
enum {
ULD_TOM = 1,
ULD_IWARP = 2,
};
/*
* TID allocation services.
*/
int cxgb_alloc_atid(struct t3cdev *dev, struct cxgb_client *client,
void *ctx);
int cxgb_alloc_stid(struct t3cdev *dev, struct cxgb_client *client,
void *ctx);
void *cxgb_free_atid(struct t3cdev *dev, int atid);
void cxgb_free_stid(struct t3cdev *dev, int stid);
void *cxgb_get_lctx(struct t3cdev *tdev, int stid);
void cxgb_insert_tid(struct t3cdev *dev, struct cxgb_client *client,
void *ctx,
unsigned int tid);
void cxgb_queue_tid_release(struct t3cdev *dev, unsigned int tid);
void cxgb_remove_tid(struct t3cdev *dev, void *ctx, unsigned int tid);
struct adapter;
struct uld_info {
SLIST_ENTRY(uld_info) link;
int refcount;
int uld_id;
int (*activate)(struct adapter *);
int (*deactivate)(struct adapter *);
};
struct toe_tid_entry {
struct cxgb_client *client;
void *ctx;
struct tom_tunables {
int sndbuf;
int ddp;
int indsz;
int ddp_thres;
};
/* CPL message priority levels */
enum {
CPL_PRIORITY_DATA = 0, /* data messages */
CPL_PRIORITY_SETUP = 1, /* connection setup messages */
CPL_PRIORITY_TEARDOWN = 0, /* connection teardown messages */
CPL_PRIORITY_LISTEN = 1, /* listen start/stop messages */
CPL_PRIORITY_ACK = 1, /* RX ACK messages */
CPL_PRIORITY_CONTROL = 1 /* offload control messages */
};
/* Flags for return value of CPL message handlers */
enum {
CPL_RET_BUF_DONE = 1, // buffer processing done, buffer may be freed
CPL_RET_BAD_MSG = 2, // bad CPL message (e.g., unknown opcode)
CPL_RET_UNKNOWN_TID = 4 // unexpected unknown TID
};
#define S_HDR_NDESC 0
#define M_HDR_NDESC 0xf
#define V_HDR_NDESC(x) ((x) << S_HDR_NDESC)
#define G_HDR_NDESC(x) (((x) >> S_HDR_NDESC) & M_HDR_NDESC)
typedef int (*cpl_handler_func)(struct t3cdev *dev, struct mbuf *m);
#define S_HDR_QSET 4
#define M_HDR_QSET 0xf
#define V_HDR_QSET(x) ((x) << S_HDR_QSET)
#define G_HDR_QSET(x) (((x) >> S_HDR_QSET) & M_HDR_QSET)
/*
* Returns a pointer to the first byte of the CPL header in an sk_buff that
* contains a CPL message.
*/
static inline void *cplhdr(struct mbuf *m)
#define S_HDR_CTRL 8
#define V_HDR_CTRL(x) ((x) << S_HDR_CTRL)
#define F_HDR_CTRL V_HDR_CTRL(1U)
#define S_HDR_DF 9
#define V_HDR_DF(x) ((x) << S_HDR_DF)
#define F_HDR_DF V_HDR_DF(1U)
#define S_HDR_SGL 10
#define V_HDR_SGL(x) ((x) << S_HDR_SGL)
#define F_HDR_SGL V_HDR_SGL(1U)
struct ofld_hdr
{
return mtod(m, uint8_t *);
}
void t3_register_cpl_handler(unsigned int opcode, cpl_handler_func h);
union listen_entry {
struct toe_tid_entry toe_tid;
union listen_entry *next;
};
union active_open_entry {
struct toe_tid_entry toe_tid;
union active_open_entry *next;
void *sgl; /* SGL, if F_HDR_SGL set in flags */
int plen; /* amount of payload (in bytes) */
int flags;
};
/*
* Holds the size, base address, free list start, etc of the TID, server TID,
* and active-open TID tables for a offload device.
* The tables themselves are allocated dynamically.
* Convenience function for fixed size CPLs that fit in 1 desc.
*/
struct tid_info {
struct toe_tid_entry *tid_tab;
unsigned int ntids;
volatile unsigned int tids_in_use;
union listen_entry *stid_tab;
unsigned int nstids;
unsigned int stid_base;
union active_open_entry *atid_tab;
unsigned int natids;
unsigned int atid_base;
/*
* The following members are accessed R/W so we put them in their own
* cache lines.
*
* XXX We could combine the atid fields above with the lock here since
* atids are use once (unlike other tids). OTOH the above fields are
* usually in cache due to tid_tab.
*/
struct mtx atid_lock /* ____cacheline_aligned_in_smp */;
union active_open_entry *afree;
unsigned int atids_in_use;
struct mtx stid_lock /*____cacheline_aligned */;
union listen_entry *sfree;
unsigned int stids_in_use;
};
struct t3c_data {
struct t3cdev *dev;
unsigned int tx_max_chunk; /* max payload for TX_DATA */
unsigned int max_wrs; /* max in-flight WRs per connection */
unsigned int nmtus;
const unsigned short *mtus;
struct tid_info tid_maps;
struct toe_tid_entry *tid_release_list;
struct mtx tid_release_lock;
struct task tid_release_task;
};
/*
* t3cdev -> toe_data accessor
*/
#define T3C_DATA(dev) (*(struct t3c_data **)&(dev)->l4opt)
/*
* Map an ATID or STID to their entries in the corresponding TID tables.
*/
static inline union active_open_entry *atid2entry(const struct tid_info *t,
unsigned int atid)
#define M_GETHDR_OFLD(qset, ctrl, cpl) \
m_gethdr_ofld(qset, ctrl, sizeof(*cpl), (void **)&cpl)
static inline struct mbuf *
m_gethdr_ofld(int qset, int ctrl, int cpllen, void **cpl)
{
return &t->atid_tab[atid - t->atid_base];
struct mbuf *m;
struct ofld_hdr *oh;
m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
return (NULL);
oh = mtod(m, struct ofld_hdr *);
oh->flags = V_HDR_NDESC(1) | V_HDR_QSET(qset) | V_HDR_CTRL(ctrl);
*cpl = (void *)(oh + 1);
m->m_pkthdr.len = m->m_len = sizeof(*oh) + cpllen;
return (m);
}
int t3_register_uld(struct uld_info *);
int t3_unregister_uld(struct uld_info *);
int t3_activate_uld(struct adapter *, int);
int t3_deactivate_uld(struct adapter *, int);
#endif /* TCP_OFFLOAD */
static inline union listen_entry *stid2entry(const struct tid_info *t,
unsigned int stid)
{
return &t->stid_tab[stid - t->stid_base];
}
#define CXGB_UNIMPLEMENTED() \
panic("IMPLEMENT: %s:%s:%d", __FUNCTION__, __FILE__, __LINE__)
/*
* Find the connection corresponding to a TID.
*/
static inline struct toe_tid_entry *lookup_tid(const struct tid_info *t,
unsigned int tid)
{
return tid < t->ntids ? &(t->tid_tab[tid]) : NULL;
}
/*
* Find the connection corresponding to a server TID.
*/
static inline struct toe_tid_entry *lookup_stid(const struct tid_info *t,
unsigned int tid)
{
if (tid < t->stid_base || tid >= t->stid_base + t->nstids)
return NULL;
return &(stid2entry(t, tid)->toe_tid);
}
/*
* Find the connection corresponding to an active-open TID.
*/
static inline struct toe_tid_entry *lookup_atid(const struct tid_info *t,
unsigned int tid)
{
if (tid < t->atid_base || tid >= t->atid_base + t->natids)
return NULL;
return &(atid2entry(t, tid)->toe_tid);
}
void *cxgb_alloc_mem(unsigned long size);
void cxgb_free_mem(void *addr);
void cxgb_neigh_update(struct rtentry *rt, uint8_t *enaddr, struct sockaddr *sa);
void cxgb_redirect(struct rtentry *old, struct rtentry *new, struct sockaddr *sa);
int process_rx(struct t3cdev *dev, struct mbuf **m, int n);
int attach_t3cdev(struct t3cdev *dev);
void detach_t3cdev(struct t3cdev *dev);
#define CXGB_UNIMPLEMENTED() panic("IMPLEMENT: %s:%s:%d", __FUNCTION__, __FILE__, __LINE__)
#endif

View File

@ -67,27 +67,6 @@ struct t3_mbuf_hdr {
} while (0)
#endif
#define m_get_priority(m) ((uintptr_t)(m)->m_pkthdr.rcvif)
#define m_set_priority(m, pri) ((m)->m_pkthdr.rcvif = (struct ifnet *)((uintptr_t)pri))
#define m_set_sgl(m, sgl) ((m)->m_pkthdr.header = (sgl))
#define m_get_sgl(m) ((bus_dma_segment_t *)(m)->m_pkthdr.header)
#define m_set_sgllen(m, len) ((m)->m_pkthdr.ether_vtag = len)
#define m_get_sgllen(m) ((m)->m_pkthdr.ether_vtag)
/*
* XXX FIXME
*/
#define m_set_toep(m, a) ((m)->m_pkthdr.header = (a))
#define m_get_toep(m) ((m)->m_pkthdr.header)
#define m_set_handler(m, handler) ((m)->m_pkthdr.header = (handler))
#define m_set_socket(m, a) ((m)->m_pkthdr.header = (a))
#define m_get_socket(m) ((m)->m_pkthdr.header)
#define KTR_CXGB KTR_SPARE2
#define MT_DONTFREE 128
#if __FreeBSD_version < 800054
#if defined (__GNUC__)
#if #cpu(i386) || defined __i386 || defined i386 || defined __i386__ || #cpu(x86_64) || defined __x86_64__
@ -123,13 +102,6 @@ struct t3_mbuf_hdr {
#define CXGB_TX_CLEANUP_THRESHOLD 32
#ifdef DEBUG_PRINT
#define DPRINTF printf
#else
#define DPRINTF(...)
#endif
#define TX_MAX_SIZE (1 << 16) /* 64KB */
#define TX_MAX_SEGS 36 /* maximum supported by card */
@ -199,7 +171,6 @@ static const int debug_flags = DBG_RX;
#define test_and_clear_bit(bit, p) atomic_cmpset_int((p), ((*(p)) | (1<<bit)), ((*(p)) & ~(1<<bit)))
#define max_t(type, a, b) (type)max((a), (b))
#define net_device ifnet
#define cpu_to_be32 htobe32
/* Standard PHY definitions */

View File

@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/syslog.h>
#include <sys/socket.h>
#include <sys/sglist.h>
#include <net/bpf.h>
#include <net/ethernet.h>
@ -78,6 +79,10 @@ __FBSDID("$FreeBSD$");
int txq_fills = 0;
int multiq_tx_enable = 1;
#ifdef TCP_OFFLOAD
CTASSERT(NUM_CPL_HANDLERS >= NUM_CPL_CMDS);
#endif
extern struct sysctl_oid_list sysctl__hw_cxgb_children;
int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE;
TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size);
@ -471,10 +476,17 @@ static int
get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
{
m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE;
if (resp->rss_hdr.opcode == CPL_RX_DATA) {
const struct cpl_rx_data *cpl = (const void *)&resp->imm_data[0];
m->m_len = sizeof(*cpl) + ntohs(cpl->len);
} else if (resp->rss_hdr.opcode == CPL_RX_PKT) {
const struct cpl_rx_pkt *cpl = (const void *)&resp->imm_data[0];
m->m_len = sizeof(*cpl) + ntohs(cpl->len);
} else
m->m_len = IMMED_PKT_SIZE;
m->m_ext.ext_buf = NULL;
m->m_ext.ext_type = 0;
memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE);
memcpy(mtod(m, uint8_t *), resp->imm_data, m->m_len);
return (0);
}
@ -703,7 +715,8 @@ refill_fl(adapter_t *sc, struct sge_fl *q, int n)
cb_arg.error = 0;
while (n--) {
/*
* We only allocate a cluster, mbuf allocation happens after rx
* We allocate an uninitialized mbuf + cluster, mbuf is
* initialized after rx.
*/
if (q->zone == zone_pack) {
if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL)
@ -1170,57 +1183,6 @@ calc_tx_descs(const struct mbuf *m, int nsegs)
return flits_to_desc(flits);
}
static unsigned int
busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs)
{
struct mbuf *m0;
int err, pktlen, pass = 0;
bus_dma_tag_t tag = txq->entry_tag;
retry:
err = 0;
m0 = *m;
pktlen = m0->m_pkthdr.len;
#if defined(__i386__) || defined(__amd64__)
if (busdma_map_sg_collapse(tag, txsd->map, m, segs, nsegs) == 0) {
goto done;
} else
#endif
err = bus_dmamap_load_mbuf_sg(tag, txsd->map, m0, segs, nsegs, 0);
if (err == 0) {
goto done;
}
if (err == EFBIG && pass == 0) {
pass = 1;
/* Too many segments, try to defrag */
m0 = m_defrag(m0, M_DONTWAIT);
if (m0 == NULL) {
m_freem(*m);
*m = NULL;
return (ENOBUFS);
}
*m = m0;
goto retry;
} else if (err == ENOMEM) {
return (err);
} if (err) {
if (cxgb_debug)
printf("map failure err=%d pktlen=%d\n", err, pktlen);
m_freem(m0);
*m = NULL;
return (err);
}
done:
#if !defined(__i386__) && !defined(__amd64__)
bus_dmamap_sync(tag, txsd->map, BUS_DMASYNC_PREWRITE);
#endif
txsd->flags |= TX_SW_DESC_MAPPED;
return (0);
}
/**
* make_sgl - populate a scatter/gather list for a packet
* @sgp: the SGL to populate
@ -1328,10 +1290,10 @@ write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs
if (__predict_true(ndesc == 1)) {
set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
V_WR_SGLSFLT(flits)) | wr_hi,
htonl(V_WR_LEN(flits + sgl_flits) |
V_WR_GEN(txqs->gen)) | wr_lo);
/* XXX gen? */
V_WR_SGLSFLT(flits)) | wr_hi,
htonl(V_WR_LEN(flits + sgl_flits) | V_WR_GEN(txqs->gen)) |
wr_lo);
wr_gen2(txd, txqs->gen);
} else {
@ -1813,34 +1775,23 @@ cxgb_qflush(struct ifnet *ifp)
* its entirety.
*/
static __inline void
write_imm(struct tx_desc *d, struct mbuf *m,
write_imm(struct tx_desc *d, caddr_t src,
unsigned int len, unsigned int gen)
{
struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
struct work_request_hdr *from = (struct work_request_hdr *)src;
struct work_request_hdr *to = (struct work_request_hdr *)d;
uint32_t wr_hi, wr_lo;
if (len > WR_LEN)
panic("len too big %d\n", len);
if (len < sizeof(*from))
panic("len too small %d", len);
KASSERT(len <= WR_LEN && len >= sizeof(*from),
("%s: invalid len %d", __func__, len));
memcpy(&to[1], &from[1], len - sizeof(*from));
wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP |
V_WR_BCNTLFLT(len & 7));
wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) |
V_WR_LEN((len + 7) / 8));
V_WR_BCNTLFLT(len & 7));
wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | V_WR_LEN((len + 7) / 8));
set_wr_hdr(to, wr_hi, wr_lo);
wmb();
wr_gen2(d, gen);
/*
* This check is a hack we should really fix the logic so
* that this can't happen
*/
if (m->m_type != MT_DONTFREE)
m_freem(m);
}
/**
@ -1908,12 +1859,6 @@ reclaim_completed_tx_imm(struct sge_txq *q)
q->cleaned += reclaim;
}
static __inline int
immediate(const struct mbuf *m)
{
return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ;
}
/**
* ctrl_xmit - send a packet through an SGE control Tx queue
* @adap: the adapter
@ -1931,11 +1876,8 @@ ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
struct sge_txq *q = &qs->txq[TXQ_CTRL];
if (__predict_false(!immediate(m))) {
m_freem(m);
return 0;
}
KASSERT(m->m_len <= WR_LEN, ("%s: bad tx data", __func__));
wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP);
wrp->wrh_lo = htonl(V_WR_TID(q->token));
@ -1950,7 +1892,7 @@ again: reclaim_completed_tx_imm(q);
}
goto again;
}
write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen);
q->in_use++;
if (++q->pidx >= q->size) {
@ -1960,7 +1902,9 @@ again: reclaim_completed_tx_imm(q);
TXQ_UNLOCK(qs);
wmb();
t3_write_reg(adap, A_SG_KDOORBELL,
F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
m_free(m);
return (0);
}
@ -1985,7 +1929,8 @@ again: reclaim_completed_tx_imm(q);
while (q->in_use < q->size &&
(m = mbufq_dequeue(&q->sendq)) != NULL) {
write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen);
m_free(m);
if (++q->pidx >= q->size) {
q->pidx = 0;
@ -2239,6 +2184,7 @@ is_new_response(const struct rsp_desc *r,
/* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
#define NOMEM_INTR_DELAY 2500
#ifdef TCP_OFFLOAD
/**
* write_ofld_wr - write an offload work request
* @adap: the adapter
@ -2252,70 +2198,65 @@ is_new_response(const struct rsp_desc *r,
* data already carry the work request with most fields populated.
*/
static void
write_ofld_wr(adapter_t *adap, struct mbuf *m,
struct sge_txq *q, unsigned int pidx,
unsigned int gen, unsigned int ndesc,
bus_dma_segment_t *segs, unsigned int nsegs)
write_ofld_wr(adapter_t *adap, struct mbuf *m, struct sge_txq *q,
unsigned int pidx, unsigned int gen, unsigned int ndesc)
{
unsigned int sgl_flits, flits;
int i, idx, nsegs, wrlen;
struct work_request_hdr *from;
struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
struct sg_ent *sgp, t3sgl[TX_MAX_SEGS / 2 + 1];
struct tx_desc *d = &q->desc[pidx];
struct txq_state txqs;
if (immediate(m) && nsegs == 0) {
write_imm(d, m, m->m_len, gen);
struct sglist_seg *segs;
struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
struct sglist *sgl;
from = (void *)(oh + 1); /* Start of WR within mbuf */
wrlen = m->m_len - sizeof(*oh);
if (!(oh->flags & F_HDR_SGL)) {
write_imm(d, (caddr_t)from, wrlen, gen);
/*
* mbuf with "real" immediate tx data will be enqueue_wr'd by
* t3_push_frames and freed in wr_ack. Others, like those sent
* down by close_conn, t3_send_reset, etc. should be freed here.
*/
if (!(oh->flags & F_HDR_DF))
m_free(m);
return;
}
/* Only TX_DATA builds SGLs */
from = mtod(m, struct work_request_hdr *);
memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from));
memcpy(&d->flit[1], &from[1], wrlen - sizeof(*from));
flits = m->m_len / 8;
sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
sgl = oh->sgl;
flits = wrlen / 8;
sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : t3sgl;
nsegs = sgl->sg_nseg;
segs = sgl->sg_segs;
for (idx = 0, i = 0; i < nsegs; i++) {
KASSERT(segs[i].ss_len, ("%s: 0 len in sgl", __func__));
if (i && idx == 0)
++sgp;
sgp->len[idx] = htobe32(segs[i].ss_len);
sgp->addr[idx] = htobe64(segs[i].ss_paddr);
idx ^= 1;
}
if (idx) {
sgp->len[idx] = 0;
sgp->addr[idx] = 0;
}
make_sgl(sgp, segs, nsegs);
sgl_flits = sgl_len(nsegs);
txqs.gen = gen;
txqs.pidx = pidx;
txqs.compl = 0;
write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
write_wr_hdr_sgl(ndesc, d, &txqs, q, t3sgl, flits, sgl_flits,
from->wrh_hi, from->wrh_lo);
}
/**
* calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
* @m: the packet
*
* Returns the number of Tx descriptors needed for the given offload
* packet. These packets are already fully constructed.
*/
static __inline unsigned int
calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
{
unsigned int flits, cnt = 0;
int ndescs;
if (m->m_len <= WR_LEN && nsegs == 0)
return (1); /* packet fits as immediate data */
/*
* This needs to be re-visited for TOE
*/
cnt = nsegs;
/* headers */
flits = m->m_len / 8;
ndescs = flits_to_desc(flits + sgl_len(cnt));
return (ndescs);
}
/**
* ofld_xmit - send a packet through an offload queue
* @adap: the adapter
@ -2327,28 +2268,19 @@ calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
static int
ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
{
int ret, nsegs;
int ret;
unsigned int ndesc;
unsigned int pidx, gen;
struct sge_txq *q = &qs->txq[TXQ_OFLD];
bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs;
struct tx_sw_desc *stx;
struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
nsegs = m_get_sgllen(m);
vsegs = m_get_sgl(m);
ndesc = calc_tx_descs_ofld(m, nsegs);
busdma_map_sgl(vsegs, segs, nsegs);
ndesc = G_HDR_NDESC(oh->flags);
stx = &q->sdesc[q->pidx];
TXQ_LOCK(qs);
again: reclaim_completed_tx(qs, 16, TXQ_OFLD);
ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
if (__predict_false(ret)) {
if (ret == 1) {
printf("no ofld desc avail\n");
m_set_priority(m, ndesc); /* save for restart */
TXQ_UNLOCK(qs);
return (EINTR);
}
@ -2363,16 +2295,11 @@ again: reclaim_completed_tx(qs, 16, TXQ_OFLD);
q->pidx -= q->size;
q->gen ^= 1;
}
#ifdef T3_TRACE
T3_TRACE5(adap->tb[q->cntxt_id & 7],
"ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
ndesc, pidx, skb->len, skb->len - skb->data_len,
skb_shinfo(skb)->nr_frags);
#endif
write_ofld_wr(adap, m, q, pidx, gen, ndesc);
check_ring_tx_db(adap, q, 1);
TXQ_UNLOCK(qs);
write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
check_ring_tx_db(adap, q, 1);
return (0);
}
@ -2389,16 +2316,15 @@ restart_offloadq(void *data, int npending)
struct sge_qset *qs = data;
struct sge_txq *q = &qs->txq[TXQ_OFLD];
adapter_t *adap = qs->port->adapter;
bus_dma_segment_t segs[TX_MAX_SEGS];
struct tx_sw_desc *stx = &q->sdesc[q->pidx];
int nsegs, cleaned;
int cleaned;
TXQ_LOCK(qs);
again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD);
while ((m = mbufq_peek(&q->sendq)) != NULL) {
unsigned int gen, pidx;
unsigned int ndesc = m_get_priority(m);
struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
unsigned int ndesc = G_HDR_NDESC(oh->flags);
if (__predict_false(q->size - q->in_use < ndesc)) {
setbit(&qs->txq_stopped, TXQ_OFLD);
@ -2419,9 +2345,8 @@ again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD);
}
(void)mbufq_dequeue(&q->sendq);
busdma_map_mbufs(&m, q, stx, segs, &nsegs);
TXQ_UNLOCK(qs);
write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
write_ofld_wr(adap, m, q, pidx, gen, ndesc);
TXQ_LOCK(qs);
}
#if USE_GTS
@ -2434,35 +2359,8 @@ again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD);
F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
}
/**
* queue_set - return the queue set a packet should use
* @m: the packet
*
* Maps a packet to the SGE queue set it should use. The desired queue
* set is carried in bits 1-3 in the packet's priority.
*/
static __inline int
queue_set(const struct mbuf *m)
{
return m_get_priority(m) >> 1;
}
/**
* is_ctrl_pkt - return whether an offload packet is a control packet
* @m: the packet
*
* Determines whether an offload packet should use an OFLD or a CTRL
* Tx queue. This is indicated by bit 0 in the packet's priority.
*/
static __inline int
is_ctrl_pkt(const struct mbuf *m)
{
return m_get_priority(m) & 1;
}
/**
* t3_offload_tx - send an offload packet
* @tdev: the offload device to send to
* @m: the packet
*
* Sends an offload packet. We use the packet priority to select the
@ -2470,77 +2368,35 @@ is_ctrl_pkt(const struct mbuf *m)
* should be sent as regular or control, bits 1-3 select the queue set.
*/
int
t3_offload_tx(struct t3cdev *tdev, struct mbuf *m)
t3_offload_tx(struct adapter *sc, struct mbuf *m)
{
adapter_t *adap = tdev2adap(tdev);
struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
struct sge_qset *qs = &sc->sge.qs[G_HDR_QSET(oh->flags)];
if (__predict_false(is_ctrl_pkt(m)))
return ctrl_xmit(adap, qs, m);
return ofld_xmit(adap, qs, m);
}
/**
* deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
* @tdev: the offload device that will be receiving the packets
* @q: the SGE response queue that assembled the bundle
* @m: the partial bundle
* @n: the number of packets in the bundle
*
* Delivers a (partial) bundle of Rx offload packets to an offload device.
*/
static __inline void
deliver_partial_bundle(struct t3cdev *tdev,
struct sge_rspq *q,
struct mbuf *mbufs[], int n)
{
if (n) {
q->offload_bundles++;
cxgb_ofld_recv(tdev, mbufs, n);
}
}
static __inline int
rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
struct mbuf *m, struct mbuf *rx_gather[],
unsigned int gather_idx)
{
rq->offload_pkts++;
m->m_pkthdr.header = mtod(m, void *);
rx_gather[gather_idx++] = m;
if (gather_idx == RX_BUNDLE_SIZE) {
cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE);
gather_idx = 0;
rq->offload_bundles++;
}
return (gather_idx);
if (oh->flags & F_HDR_CTRL) {
m_adj(m, sizeof (*oh)); /* trim ofld_hdr off */
return (ctrl_xmit(sc, qs, m));
} else
return (ofld_xmit(sc, qs, m));
}
#endif
static void
restart_tx(struct sge_qset *qs)
{
struct adapter *sc = qs->port->adapter;
if (isset(&qs->txq_stopped, TXQ_OFLD) &&
should_restart_tx(&qs->txq[TXQ_OFLD]) &&
test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
qs->txq[TXQ_OFLD].restarts++;
DPRINTF("restarting TXQ_OFLD\n");
taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
}
DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n",
qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]),
qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned,
qs->txq[TXQ_CTRL].in_use);
if (isset(&qs->txq_stopped, TXQ_CTRL) &&
should_restart_tx(&qs->txq[TXQ_CTRL]) &&
test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
qs->txq[TXQ_CTRL].restarts++;
DPRINTF("restarting TXQ_CTRL\n");
taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
}
}
@ -2569,6 +2425,7 @@ t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF);
q->port = pi;
q->adap = sc;
if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size,
M_DEVBUF, M_WAITOK, &q->lock)) == NULL) {
@ -2630,8 +2487,10 @@ t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
q->txq[i].gen = 1;
q->txq[i].size = p->txq_size[i];
}
#ifdef TCP_OFFLOAD
TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
#endif
TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q);
TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q);
@ -2736,8 +2595,7 @@ t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
mtx_unlock_spin(&sc->sge.reg_lock);
t3_update_qset_coalesce(q, p);
q->port = pi;
refill_fl(sc, &q->fl[0], q->fl[0].size);
refill_fl(sc, &q->fl[1], q->fl[1].size);
refill_rspq(sc, &q->rspq, q->rspq.size - 1);
@ -2768,8 +2626,6 @@ t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
struct ifnet *ifp = pi->ifp;
DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
cpl->csum_valid && cpl->csum == 0xffff) {
m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
@ -2967,8 +2823,6 @@ process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
int skip_lro;
struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
#endif
struct mbuf *offload_mbufs[RX_BUNDLE_SIZE];
int ngathered = 0;
struct t3_mbuf_hdr *mh = &rspq->rspq_mh;
#ifdef DEBUG
static int last_holdoff = 0;
@ -2982,10 +2836,10 @@ process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
while (__predict_true(budget_left && is_new_response(r, rspq))) {
int eth, eop = 0, ethpad = 0;
uint32_t flags = ntohl(r->flags);
uint32_t rss_csum = *(const uint32_t *)r;
uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
uint8_t opcode = r->rss_hdr.opcode;
eth = (r->rss_hdr.opcode == CPL_RX_PKT);
eth = (opcode == CPL_RX_PKT);
if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
struct mbuf *m;
@ -3005,27 +2859,27 @@ process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
memcpy(mtod(m, char *), r, AN_PKT_SIZE);
m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
*mtod(m, char *) = CPL_ASYNC_NOTIF;
rss_csum = htonl(CPL_ASYNC_NOTIF << 24);
opcode = CPL_ASYNC_NOTIF;
eop = 1;
rspq->async_notif++;
goto skip;
} else if (flags & F_RSPD_IMM_DATA_VALID) {
struct mbuf *m = NULL;
struct mbuf *m = m_gethdr(M_DONTWAIT, MT_DATA);
DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n",
r->rss_hdr.opcode, rspq->cidx);
if (mh->mh_head == NULL)
mh->mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
else
m = m_gethdr(M_DONTWAIT, MT_DATA);
if (mh->mh_head == NULL && m == NULL) {
if (m == NULL) {
no_mem:
rspq->next_holdoff = NOMEM_INTR_DELAY;
budget_left--;
break;
}
get_imm_packet(adap, r, mh->mh_head);
if (mh->mh_head == NULL)
mh->mh_head = m;
else
mh->mh_tail->m_next = m;
mh->mh_tail = m;
get_imm_packet(adap, r, m);
mh->mh_head->m_pkthdr.len += m->m_len;
eop = 1;
rspq->imm_data++;
} else if (r->len_cq) {
@ -3048,30 +2902,14 @@ process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
handle_rsp_cntrl_info(qs, flags);
}
r++;
if (__predict_false(++rspq->cidx == rspq->size)) {
rspq->cidx = 0;
rspq->gen ^= 1;
r = rspq->desc;
}
if (++rspq->credits >= 64) {
refill_rspq(adap, rspq, rspq->credits);
rspq->credits = 0;
}
if (!eth && eop) {
mh->mh_head->m_pkthdr.csum_data = rss_csum;
/*
* XXX size mismatch
*/
m_set_priority(mh->mh_head, rss_hash);
ngathered = rx_offload(&adap->tdev, rspq,
mh->mh_head, offload_mbufs, ngathered);
rspq->offload_pkts++;
#ifdef TCP_OFFLOAD
adap->cpl_handler[opcode](qs, r, mh->mh_head);
#else
m_freem(mh->mh_head);
#endif
mh->mh_head = NULL;
DPRINTF("received offload packet\n");
} else if (eth && eop) {
struct mbuf *m = mh->mh_head;
@ -3106,13 +2944,23 @@ process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
mh->mh_head = NULL;
}
r++;
if (__predict_false(++rspq->cidx == rspq->size)) {
rspq->cidx = 0;
rspq->gen ^= 1;
r = rspq->desc;
}
if (++rspq->credits >= 64) {
refill_rspq(adap, rspq, rspq->credits);
rspq->credits = 0;
}
__refill_fl_lt(adap, &qs->fl[0], 32);
__refill_fl_lt(adap, &qs->fl[1], 32);
--budget_left;
}
deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered);
#if defined(INET6) || defined(INET)
/* Flush LRO */
while (!SLIST_EMPTY(&lro_ctrl->lro_active)) {

View File

@ -31,15 +31,6 @@
#define _MVEC_H_
#include <machine/bus.h>
#define M_DDP 0x200000 /* direct data placement mbuf */
#define EXT_PHYS 10 /* physical/bus address */
#define m_cur_offset m_ext.ext_size /* override to provide ddp offset */
#define m_seq m_pkthdr.csum_data /* stored sequence */
#define m_ddp_gl m_ext.ext_buf /* ddp list */
#define m_ddp_flags m_pkthdr.csum_flags /* ddp flags */
#define m_ulp_mode m_pkthdr.tso_segsz /* upper level protocol */
static __inline void
busdma_map_mbuf_fast(bus_dma_tag_t tag, bus_dmamap_t map,
struct mbuf *m, bus_dma_segment_t *seg)
@ -58,17 +49,6 @@ int busdma_map_sg_collapse(bus_dma_tag_t tag, bus_dmamap_t map,
struct mbuf **m, bus_dma_segment_t *segs, int *nsegs);
void busdma_map_sg_vec(bus_dma_tag_t tag, bus_dmamap_t map,
struct mbuf *m, bus_dma_segment_t *segs, int *nsegs);
static __inline int
busdma_map_sgl(bus_dma_segment_t *vsegs, bus_dma_segment_t *segs, int count)
{
while (count--) {
segs->ds_addr = pmap_kextract((vm_offset_t)vsegs->ds_addr);
segs->ds_len = vsegs->ds_len;
segs++;
vsegs++;
}
return (0);
}
static __inline void
m_freem_list(struct mbuf *m)
@ -84,5 +64,4 @@ m_freem_list(struct mbuf *m)
}
}
#endif /* _MVEC_H_ */

View File

@ -1,62 +0,0 @@
/*-
* Copyright (c) 2007-2008, Chelsio Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Neither the name of the Chelsio Corporation nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _T3CDEV_H_
#define _T3CDEV_H_
#define T3CNAMSIZ 16
/* Get the t3cdev associated with an ifnet */
#define T3CDEV(ifp) (&(((struct port_info *)(ifp)->if_softc))->adapter->tdev)
struct cxgb3_client;
enum t3ctype {
T3A = 0,
T3B,
T3C
};
struct t3cdev {
char name[T3CNAMSIZ]; /* T3C device name */
enum t3ctype type;
TAILQ_ENTRY(t3cdev) entry; /* for list linking */
struct ifnet *lldev; /* LL dev associated with T3C messages */
struct adapter *adapter;
int (*send)(struct t3cdev *dev, struct mbuf *m);
int (*recv)(struct t3cdev *dev, struct mbuf **m, int n);
int (*ctl)(struct t3cdev *dev, unsigned int req, void *data);
void (*arp_update)(struct t3cdev *dev, struct rtentry *neigh, uint8_t *enaddr, struct sockaddr *sa);
void *priv; /* driver private data */
void *l2opt; /* optional layer 2 data */
void *l3opt; /* optional layer 3 data */
void *l4opt; /* optional layer 4 data */
void *ulp; /* ulp stuff */
};
#endif /* _T3CDEV_H_ */

View File

@ -29,11 +29,12 @@ POSSIBILITY OF SUCH DAMAGE.
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/module.h>
#include <sys/pciio.h>
#include <sys/conf.h>
#include <machine/bus.h>
@ -54,20 +55,14 @@ __FBSDID("$FreeBSD$");
#include <sys/proc.h>
#include <sys/eventhandler.h>
#if __FreeBSD_version < 800044
#define V_ifnet ifnet
#endif
#include <net/if.h>
#include <net/if_var.h>
#if __FreeBSD_version >= 800056
#include <net/vnet.h>
#endif
#include <netinet/in.h>
#include <netinet/toecore.h>
#include <contrib/rdma/ib_verbs.h>
#include <rdma/ib_verbs.h>
#include <linux/idr.h>
#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
#ifdef TCP_OFFLOAD
#include <cxgb_include.h>
#include <ulp/iw_cxgb/iw_cxgb_wr.h>
#include <ulp/iw_cxgb/iw_cxgb_hal.h>
@ -75,26 +70,21 @@ __FBSDID("$FreeBSD$");
#include <ulp/iw_cxgb/iw_cxgb_cm.h>
#include <ulp/iw_cxgb/iw_cxgb.h>
/*
* XXX :-/
*
*/
static int iwch_mod_load(void);
static int iwch_mod_unload(void);
static int iwch_activate(struct adapter *);
static int iwch_deactivate(struct adapter *);
#define idr_init(x)
cxgb_cpl_handler_func t3c_handlers[NUM_CPL_CMDS];
static void open_rnic_dev(struct t3cdev *);
static void close_rnic_dev(struct t3cdev *);
static TAILQ_HEAD( ,iwch_dev) dev_list;
static struct mtx dev_mutex;
static eventhandler_tag event_tag;
static struct uld_info iwch_uld_info = {
.uld_id = ULD_IWARP,
.activate = iwch_activate,
.deactivate = iwch_deactivate,
};
static void
rnic_init(struct iwch_dev *rnicp)
{
CTR2(KTR_IW_CXGB, "%s iwch_dev %p", __FUNCTION__, rnicp);
idr_init(&rnicp->cqidr);
idr_init(&rnicp->qpidr);
idr_init(&rnicp->mmidr);
@ -103,15 +93,16 @@ rnic_init(struct iwch_dev *rnicp)
rnicp->attr.vendor_id = 0x168;
rnicp->attr.vendor_part_id = 7;
rnicp->attr.max_qps = T3_MAX_NUM_QP - 32;
rnicp->attr.max_wrs = (1UL << 24) - 1;
rnicp->attr.max_wrs = T3_MAX_QP_DEPTH;
rnicp->attr.max_sge_per_wr = T3_MAX_SGE;
rnicp->attr.max_sge_per_rdma_write_wr = T3_MAX_SGE;
rnicp->attr.max_cqs = T3_MAX_NUM_CQ - 1;
rnicp->attr.max_cqes_per_cq = (1UL << 24) - 1;
rnicp->attr.max_cqes_per_cq = T3_MAX_CQ_DEPTH;
rnicp->attr.max_mem_regs = cxio_num_stags(&rnicp->rdev);
rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE;
rnicp->attr.max_pds = T3_MAX_NUM_PD - 1;
rnicp->attr.mem_pgsizes_bitmask = 0x7FFF; /* 4KB-128MB */
rnicp->attr.mem_pgsizes_bitmask = T3_PAGESIZE_MASK;
rnicp->attr.max_mr_size = T3_MAX_MR_SIZE;
rnicp->attr.can_resize_wq = 0;
rnicp->attr.max_rdma_reads_per_qp = 8;
rnicp->attr.max_rdma_read_resources =
@ -127,170 +118,183 @@ rnic_init(struct iwch_dev *rnicp)
rnicp->attr.zbva_support = 1;
rnicp->attr.local_invalidate_fence = 1;
rnicp->attr.cq_overflow_detection = 1;
return;
}
static void
open_rnic_dev(struct t3cdev *tdev)
rnic_uninit(struct iwch_dev *rnicp)
{
idr_destroy(&rnicp->cqidr);
idr_destroy(&rnicp->qpidr);
idr_destroy(&rnicp->mmidr);
mtx_destroy(&rnicp->lock);
}
static int
iwch_activate(struct adapter *sc)
{
struct iwch_dev *rnicp;
static int vers_printed;
int rc;
KASSERT(!isset(&sc->offload_map, MAX_NPORTS),
("%s: iWARP already activated on %s", __func__,
device_get_nameunit(sc->dev)));
CTR2(KTR_IW_CXGB, "%s t3cdev %p", __FUNCTION__, tdev);
if (!vers_printed++)
printf("Chelsio T3 RDMA Driver - version x.xx\n");
rnicp = (struct iwch_dev *)ib_alloc_device(sizeof(*rnicp));
if (!rnicp) {
printf("Cannot allocate ib device\n");
return;
}
rnicp->rdev.ulp = rnicp;
rnicp->rdev.t3cdev_p = tdev;
if (rnicp == NULL)
return (ENOMEM);
mtx_lock(&dev_mutex);
sc->iwarp_softc = rnicp;
rnicp->rdev.adap = sc;
if (cxio_rdev_open(&rnicp->rdev)) {
mtx_unlock(&dev_mutex);
cxio_hal_init(sc);
iwch_cm_init_cpl(sc);
rc = cxio_rdev_open(&rnicp->rdev);
if (rc != 0) {
printf("Unable to open CXIO rdev\n");
ib_dealloc_device(&rnicp->ibdev);
return;
goto err1;
}
rnic_init(rnicp);
TAILQ_INSERT_TAIL(&dev_list, rnicp, entry);
mtx_unlock(&dev_mutex);
if (iwch_register_device(rnicp)) {
rc = iwch_register_device(rnicp);
if (rc != 0) {
printf("Unable to register device\n");
close_rnic_dev(tdev);
goto err2;
}
#ifdef notyet
printf("Initialized device %s\n",
pci_name(rnicp->rdev.rnic_info.pdev));
#endif
return;
return (0);
err2:
rnic_uninit(rnicp);
cxio_rdev_close(&rnicp->rdev);
err1:
cxio_hal_uninit(sc);
iwch_cm_term_cpl(sc);
sc->iwarp_softc = NULL;
return (rc);
}
static void
close_rnic_dev(struct t3cdev *tdev)
{
struct iwch_dev *dev, *tmp;
CTR2(KTR_IW_CXGB, "%s t3cdev %p", __FUNCTION__, tdev);
mtx_lock(&dev_mutex);
TAILQ_FOREACH_SAFE(dev, &dev_list, entry, tmp) {
if (dev->rdev.t3cdev_p == tdev) {
#ifdef notyet
list_del(&dev->entry);
iwch_unregister_device(dev);
cxio_rdev_close(&dev->rdev);
idr_destroy(&dev->cqidr);
idr_destroy(&dev->qpidr);
idr_destroy(&dev->mmidr);
ib_dealloc_device(&dev->ibdev);
#endif
break;
}
}
mtx_unlock(&dev_mutex);
}
static ifaddr_event_handler_t
ifaddr_event_handler(void *arg, struct ifnet *ifp)
{
printf("%s if name %s \n", __FUNCTION__, ifp->if_xname);
if (ifp->if_capabilities & IFCAP_TOE4) {
KASSERT(T3CDEV(ifp) != NULL, ("null t3cdev ptr!"));
if (cxio_hal_find_rdev_by_t3cdev(T3CDEV(ifp)) == NULL)
open_rnic_dev(T3CDEV(ifp));
}
return 0;
}
static int
iwch_init_module(void)
iwch_deactivate(struct adapter *sc)
{
VNET_ITERATOR_DECL(vnet_iter);
int err;
struct ifnet *ifp;
struct iwch_dev *rnicp;
printf("%s enter\n", __FUNCTION__);
TAILQ_INIT(&dev_list);
mtx_init(&dev_mutex, "iwch dev_list lock", NULL, MTX_DEF);
err = cxio_hal_init();
if (err)
return err;
err = iwch_cm_init();
if (err)
return err;
cxio_register_ev_cb(iwch_ev_dispatch);
rnicp = sc->iwarp_softc;
/* Register for ifaddr events to dynamically add TOE devs */
event_tag = EVENTHANDLER_REGISTER(ifaddr_event, ifaddr_event_handler,
NULL, EVENTHANDLER_PRI_ANY);
iwch_unregister_device(rnicp);
rnic_uninit(rnicp);
cxio_rdev_close(&rnicp->rdev);
cxio_hal_uninit(sc);
iwch_cm_term_cpl(sc);
ib_dealloc_device(&rnicp->ibdev);
/* Register existing TOE interfaces by walking the ifnet chain */
IFNET_RLOCK();
VNET_LIST_RLOCK();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter); /* XXX CURVNET_SET_QUIET() ? */
TAILQ_FOREACH(ifp, &V_ifnet, if_link)
(void)ifaddr_event_handler(NULL, ifp);
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK();
IFNET_RUNLOCK();
return 0;
sc->iwarp_softc = NULL;
return (0);
}
static void
iwch_exit_module(void)
iwch_activate_all(struct adapter *sc, void *arg __unused)
{
EVENTHANDLER_DEREGISTER(ifaddr_event, event_tag);
cxio_unregister_ev_cb(iwch_ev_dispatch);
ADAPTER_LOCK(sc);
if ((sc->open_device_map & sc->offload_map) != 0 &&
t3_activate_uld(sc, ULD_IWARP) == 0)
setbit(&sc->offload_map, MAX_NPORTS);
ADAPTER_UNLOCK(sc);
}
static void
iwch_deactivate_all(struct adapter *sc, void *arg __unused)
{
ADAPTER_LOCK(sc);
if (isset(&sc->offload_map, MAX_NPORTS) &&
t3_deactivate_uld(sc, ULD_IWARP) == 0)
clrbit(&sc->offload_map, MAX_NPORTS);
ADAPTER_UNLOCK(sc);
}
static int
iwch_mod_load(void)
{
int rc;
rc = iwch_cm_init();
if (rc != 0)
return (rc);
rc = t3_register_uld(&iwch_uld_info);
if (rc != 0) {
iwch_cm_term();
return (rc);
}
t3_iterate(iwch_activate_all, NULL);
return (rc);
}
static int
iwch_mod_unload(void)
{
t3_iterate(iwch_deactivate_all, NULL);
iwch_cm_term();
cxio_hal_exit();
}
static int
iwch_load(module_t mod, int cmd, void *arg)
if (t3_unregister_uld(&iwch_uld_info) == EBUSY)
return (EBUSY);
return (0);
}
#endif /* TCP_OFFLOAD */
#undef MODULE_VERSION
#include <sys/module.h>
static int
iwch_modevent(module_t mod, int cmd, void *arg)
{
int err = 0;
int rc = 0;
switch (cmd) {
case MOD_LOAD:
printf("Loading iw_cxgb.\n");
#ifdef TCP_OFFLOAD
switch (cmd) {
case MOD_LOAD:
rc = iwch_mod_load();
if(rc)
printf("iw_cxgb: Chelsio T3 RDMA Driver failed to load\n");
else
printf("iw_cxgb: Chelsio T3 RDMA Driver loaded\n");
break;
iwch_init_module();
break;
case MOD_QUIESCE:
break;
case MOD_UNLOAD:
printf("Unloading iw_cxgb.\n");
iwch_exit_module();
break;
case MOD_SHUTDOWN:
break;
default:
err = EOPNOTSUPP;
break;
}
case MOD_UNLOAD:
rc = iwch_mod_unload();
if(rc)
printf("iw_cxgb: Chelsio T3 RDMA Driver failed to unload\n");
else
printf("iw_cxgb: Chelsio T3 RDMA Driver unloaded\n");
break;
return (err);
default:
rc = EINVAL;
}
#else
printf("iw_cxgb: compiled without TCP_OFFLOAD support.\n");
rc = EOPNOTSUPP;
#endif
return (rc);
}
static moduledata_t mod_data = {
static moduledata_t iwch_mod_data = {
"iw_cxgb",
iwch_load,
iwch_modevent,
0
};
MODULE_VERSION(iw_cxgb, 1);
DECLARE_MODULE(iw_cxgb, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);
MODULE_DEPEND(iw_cxgb, rdma_core, 1, 1, 1);
MODULE_DEPEND(iw_cxgb, if_cxgb, 1, 1, 1);
DECLARE_MODULE(iw_cxgb, iwch_mod_data, SI_SUB_EXEC, SI_ORDER_ANY);
MODULE_DEPEND(t3_tom, cxgbc, 1, 1, 1);
MODULE_DEPEND(iw_cxgb, toecore, 1, 1, 1);
MODULE_DEPEND(iw_cxgb, t3_tom, 1, 1, 1);

View File

@ -37,6 +37,13 @@ struct iwch_cq;
struct iwch_qp;
struct iwch_mr;
enum t3ctype {
T3A = 0,
T3B,
T3C
};
#define PAGE_MASK_IWARP (~(PAGE_SIZE-1))
struct iwch_rnic_attributes {
u32 vendor_id;
@ -57,6 +64,7 @@ struct iwch_rnic_attributes {
* size (4k)^i. Phys block list mode unsupported.
*/
u32 mem_pgsizes_bitmask;
u64 max_mr_size;
u8 can_resize_wq;
/*
@ -97,9 +105,9 @@ struct iwch_dev {
struct cxio_rdev rdev;
u32 device_cap_flags;
struct iwch_rnic_attributes attr;
struct kvl cqidr;
struct kvl qpidr;
struct kvl mmidr;
struct idr cqidr;
struct idr qpidr;
struct idr mmidr;
struct mtx lock;
TAILQ_ENTRY(iwch_dev) entry;
};
@ -113,40 +121,43 @@ static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev)
return container_of(ibdev, struct iwch_dev, ibdev);
}
static inline int t3b_device(const struct iwch_dev *rhp)
static inline int t3b_device(const struct iwch_dev *rhp __unused)
{
return rhp->rdev.t3cdev_p->type == T3B;
return (0);
}
static inline int t3a_device(const struct iwch_dev *rhp)
static inline int t3a_device(const struct iwch_dev *rhp __unused)
{
return rhp->rdev.t3cdev_p->type == T3A;
return (0);
}
static inline struct iwch_cq *get_chp(struct iwch_dev *rhp, u32 cqid)
{
return kvl_lookup(&rhp->cqidr, cqid);
return idr_find(&rhp->cqidr, cqid);
}
static inline struct iwch_qp *get_qhp(struct iwch_dev *rhp, u32 qpid)
{
return kvl_lookup(&rhp->qpidr, qpid);
return idr_find(&rhp->qpidr, qpid);
}
static inline struct iwch_mr *get_mhp(struct iwch_dev *rhp, u32 mmid)
{
return kvl_lookup(&rhp->mmidr, mmid);
return idr_find(&rhp->mmidr, mmid);
}
static inline int insert_handle(struct iwch_dev *rhp, struct kvl *kvlp,
static inline int insert_handle(struct iwch_dev *rhp, struct idr *idr,
void *handle, u32 id)
{
int ret;
u32 newid;
do {
if (!idr_pre_get(idr, GFP_KERNEL)) {
return -ENOMEM;
}
mtx_lock(&rhp->lock);
ret = kvl_alloc_above(kvlp, handle, id, &newid);
ret = idr_get_new_above(idr, handle, id, &newid);
WARN_ON(ret != 0);
WARN_ON(!ret && newid != id);
mtx_unlock(&rhp->lock);
@ -155,14 +166,12 @@ static inline int insert_handle(struct iwch_dev *rhp, struct kvl *kvlp,
return ret;
}
static inline void remove_handle(struct iwch_dev *rhp, struct kvl *kvlp, u32 id)
static inline void remove_handle(struct iwch_dev *rhp, struct idr *idr, u32 id)
{
mtx_lock(&rhp->lock);
kvl_delete(kvlp, id);
idr_remove(idr, id);
mtx_unlock(&rhp->lock);
}
extern struct cxgb_client t3c_client;
extern cxgb_cpl_handler_func t3c_handlers[NUM_CPL_CMDS];
extern void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct mbuf *m);
void iwch_ev_dispatch(struct iwch_dev *, struct mbuf *);
#endif

View File

@ -29,11 +29,13 @@ POSSIBILITY OF SUCH DAMAGE.
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#ifdef TCP_OFFLOAD
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/module.h>
#include <sys/pciio.h>
#include <sys/conf.h>
#include <machine/bus.h>
@ -66,13 +68,17 @@ __FBSDID("$FreeBSD$");
#include <netinet/tcp.h>
#include <netinet/tcpip.h>
#include <contrib/rdma/ib_verbs.h>
#include <rdma/ib_verbs.h>
#include <linux/idr.h>
#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
#include <cxgb_include.h>
#include <ulp/tom/cxgb_tom.h>
#include <ulp/tom/cxgb_t3_ddp.h>
#include <ulp/tom/cxgb_defs.h>
#include <ulp/tom/cxgb_toepcb.h>
#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
#include <rdma/ib_verbs.h>
#include <linux/idr.h>
#include <ulp/iw_cxgb/iw_cxgb_wr.h>
#include <ulp/iw_cxgb/iw_cxgb_hal.h>
#include <ulp/iw_cxgb/iw_cxgb_provider.h>
@ -97,46 +103,46 @@ static char *states[] = {
};
#endif
SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "iw_cxgb driver parameters");
SYSCTL_NODE(_hw, OID_AUTO, iw_cxgb, CTLFLAG_RD, 0, "iw_cxgb driver parameters");
static int ep_timeout_secs = 10;
static int ep_timeout_secs = 60;
TUNABLE_INT("hw.iw_cxgb.ep_timeout_secs", &ep_timeout_secs);
SYSCTL_INT(_hw_cxgb, OID_AUTO, ep_timeout_secs, CTLFLAG_RDTUN, &ep_timeout_secs, 0,
"CM Endpoint operation timeout in seconds (default=10)");
SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, ep_timeout_secs, CTLFLAG_RW, &ep_timeout_secs, 0,
"CM Endpoint operation timeout in seconds (default=60)");
static int mpa_rev = 1;
TUNABLE_INT("hw.iw_cxgb.mpa_rev", &mpa_rev);
SYSCTL_INT(_hw_cxgb, OID_AUTO, mpa_rev, CTLFLAG_RDTUN, &mpa_rev, 0,
SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, mpa_rev, CTLFLAG_RW, &mpa_rev, 0,
"MPA Revision, 0 supports amso1100, 1 is spec compliant. (default=1)");
static int markers_enabled = 0;
TUNABLE_INT("hw.iw_cxgb.markers_enabled", &markers_enabled);
SYSCTL_INT(_hw_cxgb, OID_AUTO, markers_enabled, CTLFLAG_RDTUN, &markers_enabled, 0,
SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, markers_enabled, CTLFLAG_RW, &markers_enabled, 0,
"Enable MPA MARKERS (default(0)=disabled)");
static int crc_enabled = 1;
TUNABLE_INT("hw.iw_cxgb.crc_enabled", &crc_enabled);
SYSCTL_INT(_hw_cxgb, OID_AUTO, crc_enabled, CTLFLAG_RDTUN, &crc_enabled, 0,
SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, crc_enabled, CTLFLAG_RW, &crc_enabled, 0,
"Enable MPA CRC (default(1)=enabled)");
static int rcv_win = 256 * 1024;
TUNABLE_INT("hw.iw_cxgb.rcv_win", &rcv_win);
SYSCTL_INT(_hw_cxgb, OID_AUTO, rcv_win, CTLFLAG_RDTUN, &rcv_win, 0,
SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, rcv_win, CTLFLAG_RW, &rcv_win, 0,
"TCP receive window in bytes (default=256KB)");
static int snd_win = 32 * 1024;
TUNABLE_INT("hw.iw_cxgb.snd_win", &snd_win);
SYSCTL_INT(_hw_cxgb, OID_AUTO, snd_win, CTLFLAG_RDTUN, &snd_win, 0,
SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, snd_win, CTLFLAG_RW, &snd_win, 0,
"TCP send window in bytes (default=32KB)");
static unsigned int nocong = 0;
TUNABLE_INT("hw.iw_cxgb.nocong", &nocong);
SYSCTL_UINT(_hw_cxgb, OID_AUTO, nocong, CTLFLAG_RDTUN, &nocong, 0,
SYSCTL_UINT(_hw_iw_cxgb, OID_AUTO, nocong, CTLFLAG_RW, &nocong, 0,
"Turn off congestion control (default=0)");
static unsigned int cong_flavor = 1;
TUNABLE_INT("hw.iw_cxgb.cong_flavor", &cong_flavor);
SYSCTL_UINT(_hw_cxgb, OID_AUTO, cong_flavor, CTLFLAG_RDTUN, &cong_flavor, 0,
SYSCTL_UINT(_hw_iw_cxgb, OID_AUTO, cong_flavor, CTLFLAG_RW, &cong_flavor, 0,
"TCP Congestion control flavor (default=1)");
static void ep_timeout(void *arg);
@ -174,42 +180,44 @@ static void
stop_ep_timer(struct iwch_ep *ep)
{
CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
if (!callout_pending(&ep->timer)) {
CTR3(KTR_IW_CXGB, "%s timer stopped when its not running! ep %p state %u\n",
__func__, ep, ep->com.state);
return;
}
callout_drain(&ep->timer);
put_ep(&ep->com);
}
static int set_tcpinfo(struct iwch_ep *ep)
static int
set_tcpinfo(struct iwch_ep *ep)
{
struct tcp_info ti;
struct sockopt sopt;
int err;
struct socket *so = ep->com.so;
struct inpcb *inp = sotoinpcb(so);
struct tcpcb *tp;
struct toepcb *toep;
int rc = 0;
sopt.sopt_dir = SOPT_GET;
sopt.sopt_level = IPPROTO_TCP;
sopt.sopt_name = TCP_INFO;
sopt.sopt_val = (caddr_t)&ti;
sopt.sopt_valsize = sizeof ti;
sopt.sopt_td = NULL;
err = sogetopt(ep->com.so, &sopt);
if (err) {
printf("%s can't get tcpinfo\n", __FUNCTION__);
return -err;
}
if (!(ti.tcpi_options & TCPI_OPT_TOE)) {
printf("%s connection NOT OFFLOADED!\n", __FUNCTION__);
return -EINVAL;
}
INP_WLOCK(inp);
tp = intotcpcb(inp);
ep->snd_seq = ti.tcpi_snd_nxt;
ep->rcv_seq = ti.tcpi_rcv_nxt;
ep->emss = ti.tcpi_snd_mss - sizeof(struct tcpiphdr);
ep->hwtid = TOEPCB(ep->com.so)->tp_tid; /* XXX */
if (ti.tcpi_options & TCPI_OPT_TIMESTAMPS)
ep->emss -= 12;
if ((tp->t_flags & TF_TOE) == 0) {
rc = EINVAL;
printf("%s: connection NOT OFFLOADED!\n", __func__);
goto done;
}
toep = tp->t_toe;
ep->hwtid = toep->tp_tid;
ep->snd_seq = tp->snd_nxt;
ep->rcv_seq = tp->rcv_nxt;
ep->emss = tp->t_maxseg;
if (ep->emss < 128)
ep->emss = 128;
return 0;
done:
INP_WUNLOCK(inp);
return (rc);
}
static enum iwch_ep_state
@ -264,56 +272,6 @@ void __free_ep(struct iwch_ep_common *epc)
free(epc, M_DEVBUF);
}
int
iwch_quiesce_tid(struct iwch_ep *ep)
{
#ifdef notyet
struct cpl_set_tcb_field *req;
struct mbuf *m = get_mbuf(NULL, sizeof(*req), M_NOWAIT);
if (m == NULL)
return (-ENOMEM);
req = (struct cpl_set_tcb_field *) mbuf_put(m, sizeof(*req));
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
req->reply = 0;
req->cpu_idx = 0;
req->word = htons(W_TCB_RX_QUIESCE);
req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE);
m_set_priority(m, CPL_PRIORITY_DATA);
cxgb_ofld_send(ep->com.tdev, m);
#endif
return 0;
}
int
iwch_resume_tid(struct iwch_ep *ep)
{
#ifdef notyet
struct cpl_set_tcb_field *req;
struct mbuf *m = get_mbuf(NULL, sizeof(*req), M_NOWAIT);
if (m == NULL)
return (-ENOMEM);
req = (struct cpl_set_tcb_field *) mbuf_put(m, sizeof(*req));
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
req->reply = 0;
req->cpu_idx = 0;
req->word = htons(W_TCB_RX_QUIESCE);
req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
req->val = 0;
m_set_priority(m, CPL_PRIORITY_DATA);
cxgb_ofld_send(ep->com.tdev, m);
#endif
return 0;
}
static struct rtentry *
find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
__be16 peer_port, u8 tos)
@ -331,13 +289,16 @@ find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
}
static void
close_socket(struct iwch_ep_common *epc)
close_socket(struct iwch_ep_common *epc, int close)
{
CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]);
SOCK_LOCK(epc->so);
soupcall_clear(epc->so, SO_RCV);
SOCK_UNLOCK(epc->so);
soshutdown(epc->so, SHUT_WR|SHUT_RD);
if (close)
soclose(epc->so);
else
soshutdown(epc->so, SHUT_WR|SHUT_RD);
epc->so = NULL;
}
@ -500,7 +461,7 @@ abort_connection(struct iwch_ep *ep)
CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
state_set(&ep->com, ABORTING);
abort_socket(ep);
close_socket(&ep->com);
close_socket(&ep->com, 0);
close_complete_upcall(ep);
state_set(&ep->com, DEAD);
put_ep(&ep->com);
@ -582,12 +543,13 @@ connect_request_upcall(struct iwch_ep *ep)
event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
event.provider_data = ep;
event.so = ep->com.so;
if (state_read(&ep->parent_ep->com) != DEAD)
if (state_read(&ep->parent_ep->com) != DEAD) {
get_ep(&ep->com);
ep->parent_ep->com.cm_id->event_handler(
ep->parent_ep->com.cm_id,
&event);
}
put_ep(&ep->parent_ep->com);
ep->parent_ep = NULL;
}
static void
@ -729,6 +691,7 @@ process_mpa_reply(struct iwch_ep *ep)
*/
CTR1(KTR_IW_CXGB, "%s mpa rpl looks good!", __FUNCTION__);
state_set(&ep->com, FPDU_MODE);
ep->mpa_attr.initiator = 1;
ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
ep->mpa_attr.recv_marker_enabled = markers_enabled;
ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
@ -885,6 +848,7 @@ process_mpa_request(struct iwch_ep *ep)
* If we get here we have accumulated the entire mpa
* start reply message including private data.
*/
ep->mpa_attr.initiator = 0;
ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
ep->mpa_attr.recv_marker_enabled = markers_enabled;
ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
@ -934,7 +898,6 @@ process_peer_close(struct iwch_ep *ep)
* rejects the CR.
*/
__state_set(&ep->com, CLOSING);
get_ep(&ep->com);
break;
case MPA_REP_SENT:
__state_set(&ep->com, CLOSING);
@ -961,7 +924,7 @@ process_peer_close(struct iwch_ep *ep)
iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
}
close_socket(&ep->com);
close_socket(&ep->com, 0);
close_complete_upcall(ep);
__state_set(&ep->com, DEAD);
release = 1;
@ -986,11 +949,10 @@ process_conn_error(struct iwch_ep *ep)
{
struct iwch_qp_attributes attrs;
int ret;
int state;
state = state_read(&ep->com);
CTR5(KTR_IW_CXGB, "%s ep %p so %p so->so_error %u state %s", __FUNCTION__, ep, ep->com.so, ep->com.so->so_error, states[ep->com.state]);
switch (state) {
mtx_lock(&ep->com.lock);
CTR3(KTR_IW_CXGB, "%s ep %p state %u", __func__, ep, ep->com.state);
switch (ep->com.state) {
case MPA_REQ_WAIT:
stop_ep_timer(ep);
break;
@ -1009,7 +971,6 @@ process_conn_error(struct iwch_ep *ep)
* the reference on it until the ULP accepts or
* rejects the CR.
*/
get_ep(&ep->com);
break;
case MORIBUND:
case CLOSING:
@ -1031,6 +992,7 @@ process_conn_error(struct iwch_ep *ep)
case ABORTING:
break;
case DEAD:
mtx_unlock(&ep->com.lock);
CTR2(KTR_IW_CXGB, "%s so_error %d IN DEAD STATE!!!!", __FUNCTION__,
ep->com.so->so_error);
return;
@ -1039,11 +1001,12 @@ process_conn_error(struct iwch_ep *ep)
break;
}
if (state != ABORTING) {
close_socket(&ep->com);
state_set(&ep->com, DEAD);
if (ep->com.state != ABORTING) {
close_socket(&ep->com, 0);
__state_set(&ep->com, DEAD);
put_ep(&ep->com);
}
mtx_unlock(&ep->com.lock);
return;
}
@ -1071,7 +1034,10 @@ process_close_complete(struct iwch_ep *ep)
IWCH_QP_ATTR_NEXT_STATE,
&attrs, 1);
}
close_socket(&ep->com);
if (ep->parent_ep)
close_socket(&ep->com, 1);
else
close_socket(&ep->com, 0);
close_complete_upcall(ep);
__state_set(&ep->com, DEAD);
release = 1;
@ -1102,77 +1068,59 @@ process_close_complete(struct iwch_ep *ep)
* terminate() handles case (1)...
*/
static int
terminate(struct t3cdev *tdev, struct mbuf *m, void *ctx)
terminate(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
{
struct toepcb *toep = (struct toepcb *)ctx;
struct socket *so = toeptoso(toep);
struct adapter *sc = qs->adap;
struct tom_data *td = sc->tom_softc;
uint32_t hash = *((uint32_t *)r + 1);
unsigned int tid = ntohl(hash) >> 8 & 0xfffff;
struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
struct socket *so = toep->tp_inp->inp_socket;
struct iwch_ep *ep = so->so_rcv.sb_upcallarg;
CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
if (state_read(&ep->com) != FPDU_MODE)
goto done;
m_adj(m, sizeof(struct cpl_rdma_terminate));
CTR2(KTR_IW_CXGB, "%s saving %d bytes of term msg", __FUNCTION__, m->m_len);
CTR4(KTR_IW_CXGB, "%s: tid %u, ep %p, saved %d bytes",
__func__, tid, ep, m->m_len);
m_copydata(m, 0, m->m_len, ep->com.qp->attr.terminate_buffer);
ep->com.qp->attr.terminate_msg_len = m->m_len;
ep->com.qp->attr.is_terminate_local = 0;
return CPL_RET_BUF_DONE;
done:
m_freem(m);
return (0);
}
static int
ec_status(struct t3cdev *tdev, struct mbuf *m, void *ctx)
ec_status(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
{
struct toepcb *toep = (struct toepcb *)ctx;
struct socket *so = toeptoso(toep);
struct cpl_rdma_ec_status *rep = cplhdr(m);
struct iwch_ep *ep;
struct iwch_qp_attributes attrs;
int release = 0;
struct adapter *sc = qs->adap;
struct tom_data *td = sc->tom_softc;
struct cpl_rdma_ec_status *rep = mtod(m, void *);
unsigned int tid = GET_TID(rep);
struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
struct socket *so = toep->tp_inp->inp_socket;
struct iwch_ep *ep = so->so_rcv.sb_upcallarg;
ep = so->so_rcv.sb_upcallarg;
CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s ec_status %d", __FUNCTION__, ep, ep->com.so, states[ep->com.state], rep->status);
if (!so || !ep) {
panic("bogosity ep %p state %d, so %p state %x\n", ep, ep ? ep->com.state : -1, so, so ? so->so_state : -1);
}
mtx_lock(&ep->com.lock);
switch (ep->com.state) {
case CLOSING:
if (!rep->status)
__state_set(&ep->com, MORIBUND);
else
__state_set(&ep->com, ABORTING);
break;
case MORIBUND:
stop_ep_timer(ep);
if (!rep->status) {
if ((ep->com.cm_id) && (ep->com.qp)) {
attrs.next_state = IWCH_QP_STATE_IDLE;
iwch_modify_qp(ep->com.qp->rhp,
ep->com.qp,
IWCH_QP_ATTR_NEXT_STATE,
&attrs, 1);
}
close_socket(&ep->com);
close_complete_upcall(ep);
__state_set(&ep->com, DEAD);
release = 1;
}
break;
case DEAD:
break;
default:
panic("unknown state: %d\n", ep->com.state);
}
mtx_unlock(&ep->com.lock);
if (rep->status) {
log(LOG_ERR, "%s BAD CLOSE - Aborting tid %u\n",
__FUNCTION__, ep->hwtid);
struct iwch_qp_attributes attrs;
CTR1(KTR_IW_CXGB, "%s BAD CLOSE - Aborting", __FUNCTION__);
stop_ep_timer(ep);
attrs.next_state = IWCH_QP_STATE_ERROR;
iwch_modify_qp(ep->com.qp->rhp,
ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
&attrs, 1);
ep->com.qp,
IWCH_QP_ATTR_NEXT_STATE,
&attrs, 1);
abort_connection(ep);
}
if (release)
put_ep(&ep->com);
return CPL_RET_BUF_DONE;
m_freem(m);
return (0);
}
static void
@ -1181,24 +1129,29 @@ ep_timeout(void *arg)
struct iwch_ep *ep = (struct iwch_ep *)arg;
struct iwch_qp_attributes attrs;
int err = 0;
int abort = 1;
mtx_lock(&ep->com.lock);
CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
switch (ep->com.state) {
case MPA_REQ_SENT:
__state_set(&ep->com, ABORTING);
connect_reply_upcall(ep, -ETIMEDOUT);
break;
case MPA_REQ_WAIT:
__state_set(&ep->com, ABORTING);
break;
case CLOSING:
case MORIBUND:
if (ep->com.cm_id && ep->com.qp)
err = 1;
__state_set(&ep->com, ABORTING);
break;
default:
panic("unknown state: %d\n", ep->com.state);
CTR3(KTR_IW_CXGB, "%s unexpected state ep %p state %u\n",
__func__, ep, ep->com.state);
abort = 0;
}
__state_set(&ep->com, ABORTING);
mtx_unlock(&ep->com.lock);
if (err){
attrs.next_state = IWCH_QP_STATE_ERROR;
@ -1206,7 +1159,8 @@ ep_timeout(void *arg)
ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
&attrs, 1);
}
abort_connection(ep);
if (abort)
abort_connection(ep);
put_ep(&ep->com);
}
@ -1228,6 +1182,7 @@ iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
err = send_mpa_reject(ep, pdata, pdata_len);
err = soshutdown(ep->com.so, 3);
}
put_ep(&ep->com);
return 0;
}
@ -1242,8 +1197,10 @@ iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
if (state_read(&ep->com) == DEAD)
return (-ECONNRESET);
if (state_read(&ep->com) == DEAD) {
err = -ECONNRESET;
goto err;
}
PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD);
PANIC_IF(!qp);
@ -1251,7 +1208,8 @@ iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) ||
(conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) {
abort_connection(ep);
return (-EINVAL);
err = -EINVAL;
goto err;
}
cm_id->add_ref(cm_id);
@ -1263,11 +1221,10 @@ iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
ep->ird = conn_param->ird;
ep->ord = conn_param->ord;
CTR3(KTR_IW_CXGB, "%s ird %d ord %d", __FUNCTION__, ep->ird, ep->ord);
get_ep(&ep->com);
/* bind QP to EP and move to RTS */
attrs.mpa_attr = ep->mpa_attr;
attrs.max_ird = ep->ord;
attrs.max_ird = ep->ird;
attrs.max_ord = ep->ord;
attrs.llp_stream_handle = ep;
attrs.next_state = IWCH_QP_STATE_RTS;
@ -1283,20 +1240,21 @@ iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
ep->com.qp, mask, &attrs, 1);
if (err)
goto err;
goto err1;
err = send_mpa_reply(ep, conn_param->private_data,
conn_param->private_data_len);
if (err)
goto err;
goto err1;
state_set(&ep->com, FPDU_MODE);
established_upcall(ep);
put_ep(&ep->com);
return 0;
err:
err1:
ep->com.cm_id = NULL;
ep->com.qp = NULL;
cm_id->rem_ref(cm_id);
err:
put_ep(&ep->com);
return err;
}
@ -1312,15 +1270,6 @@ static int init_sock(struct iwch_ep_common *epc)
epc->so->so_state |= SS_NBIO;
SOCK_UNLOCK(epc->so);
sopt.sopt_dir = SOPT_SET;
sopt.sopt_level = SOL_SOCKET;
sopt.sopt_name = SO_NO_DDP;
sopt.sopt_val = (caddr_t)&on;
sopt.sopt_valsize = sizeof on;
sopt.sopt_td = NULL;
err = sosetopt(epc->so, &sopt);
if (err)
printf("%s can't set SO_NO_DDP err %d\n", __FUNCTION__, err);
sopt.sopt_dir = SOPT_SET;
sopt.sopt_level = IPPROTO_TCP;
sopt.sopt_name = TCP_NODELAY;
sopt.sopt_val = (caddr_t)&on;
@ -1400,16 +1349,14 @@ iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (!(rt->rt_ifp->if_flags & IFCAP_TOE)) {
printf("%s - interface not TOE capable.\n", __FUNCTION__);
goto fail3;
RTFREE(rt);
goto fail2;
}
tdev = TOEDEV(rt->rt_ifp);
if (tdev == NULL) {
printf("%s - No toedev for interface.\n", __FUNCTION__);
goto fail3;
}
if (!tdev->tod_can_offload(tdev, ep->com.so)) {
printf("%s - interface cannot offload!.\n", __FUNCTION__);
goto fail3;
RTFREE(rt);
goto fail2;
}
RTFREE(rt);
@ -1420,8 +1367,6 @@ iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
ep->com.thread);
if (!err)
goto out;
fail3:
RTFREE(ep->dst);
fail2:
put_ep(&ep->com);
out:
@ -1458,7 +1403,7 @@ iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
cm_id->provider_data = ep;
goto out;
}
close_socket(&ep->com);
close_socket(&ep->com, 0);
fail:
cm_id->rem_ref(cm_id);
put_ep(&ep->com);
@ -1474,7 +1419,7 @@ iwch_destroy_listen(struct iw_cm_id *cm_id)
CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
state_set(&ep->com, DEAD);
close_socket(&ep->com);
close_socket(&ep->com, 0);
cm_id->rem_ref(cm_id);
put_ep(&ep->com);
return 0;
@ -1493,47 +1438,48 @@ iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, int flags)
CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s, abrupt %d", __FUNCTION__, ep,
ep->com.so, states[ep->com.state], abrupt);
if (ep->com.state == DEAD) {
CTR2(KTR_IW_CXGB, "%s already dead ep %p", __FUNCTION__, ep);
goto out;
}
if (abrupt) {
if (ep->com.state != ABORTING) {
ep->com.state = ABORTING;
close = 1;
}
goto out;
}
switch (ep->com.state) {
case MPA_REQ_WAIT:
case MPA_REQ_SENT:
case MPA_REQ_RCVD:
case MPA_REP_SENT:
case FPDU_MODE:
start_ep_timer(ep);
ep->com.state = CLOSING;
close = 1;
if (abrupt)
ep->com.state = ABORTING;
else {
ep->com.state = CLOSING;
start_ep_timer(ep);
}
break;
case CLOSING:
ep->com.state = MORIBUND;
close = 1;
if (abrupt) {
stop_ep_timer(ep);
ep->com.state = ABORTING;
} else
ep->com.state = MORIBUND;
break;
case MORIBUND:
case ABORTING:
case DEAD:
CTR3(KTR_IW_CXGB, "%s ignoring disconnect ep %p state %u\n",
__func__, ep, ep->com.state);
break;
default:
panic("unknown state: %d\n", ep->com.state);
break;
}
out:
mtx_unlock(&ep->com.lock);
if (close) {
if (abrupt)
abort_connection(ep);
else
else {
if (!ep->parent_ep)
__state_set(&ep->com, MORIBUND);
shutdown_socket(&ep->com);
}
}
return 0;
}
@ -1587,7 +1533,7 @@ process_connected(struct iwch_ep *ep)
send_mpa_req(ep);
} else {
connect_reply_upcall(ep, -ep->com.so->so_error);
close_socket(&ep->com);
close_socket(&ep->com, 0);
state_set(&ep->com, DEAD);
put_ep(&ep->com);
}
@ -1643,10 +1589,20 @@ process_newconn(struct iwch_ep *parent_ep)
}
CTR3(KTR_IW_CXGB, "%s remote addr %s port %d", __FUNCTION__,
inet_ntoa(remote->sin_addr), ntohs(remote->sin_port));
child_ep->com.tdev = parent_ep->com.tdev;
child_ep->com.local_addr.sin_family = parent_ep->com.local_addr.sin_family;
child_ep->com.local_addr.sin_port = parent_ep->com.local_addr.sin_port;
child_ep->com.local_addr.sin_addr.s_addr = parent_ep->com.local_addr.sin_addr.s_addr;
child_ep->com.local_addr.sin_len = parent_ep->com.local_addr.sin_len;
child_ep->com.remote_addr.sin_family = remote->sin_family;
child_ep->com.remote_addr.sin_port = remote->sin_port;
child_ep->com.remote_addr.sin_addr.s_addr = remote->sin_addr.s_addr;
child_ep->com.remote_addr.sin_len = remote->sin_len;
child_ep->com.so = child_so;
child_ep->com.cm_id = NULL;
child_ep->com.thread = parent_ep->com.thread;
child_ep->parent_ep = parent_ep;
free(remote, M_SONAME);
get_ep(&parent_ep->com);
child_ep->parent_ep = parent_ep;
@ -1747,17 +1703,30 @@ iwch_cm_init(void)
}
taskqueue_start_threads(&iw_cxgb_taskq, 1, PI_NET, "iw_cxgb taskq");
TASK_INIT(&iw_cxgb_task, 0, process_req, NULL);
t3tom_register_cpl_handler(CPL_RDMA_TERMINATE, terminate);
t3tom_register_cpl_handler(CPL_RDMA_EC_STATUS, ec_status);
return 0;
return (0);
}
void
iwch_cm_term(void)
{
t3tom_register_cpl_handler(CPL_RDMA_TERMINATE, NULL);
t3tom_register_cpl_handler(CPL_RDMA_EC_STATUS, NULL);
taskqueue_drain(iw_cxgb_taskq, &iw_cxgb_task);
taskqueue_free(iw_cxgb_taskq);
}
void
iwch_cm_init_cpl(struct adapter *sc)
{
t3_register_cpl_handler(sc, CPL_RDMA_TERMINATE, terminate);
t3_register_cpl_handler(sc, CPL_RDMA_EC_STATUS, ec_status);
}
void
iwch_cm_term_cpl(struct adapter *sc)
{
t3_register_cpl_handler(sc, CPL_RDMA_TERMINATE, NULL);
t3_register_cpl_handler(sc, CPL_RDMA_EC_STATUS, NULL);
}
#endif

View File

@ -31,8 +31,8 @@ $FreeBSD$
#ifndef _IWCH_CM_H_
#define _IWCH_CM_H_
#include <contrib/rdma/ib_verbs.h>
#include <contrib/rdma/iw_cm.h>
#include <rdma/ib_verbs.h>
#include <rdma/iw_cm.h>
#include <sys/refcount.h>
#include <sys/condvar.h>
#include <sys/proc.h>
@ -42,21 +42,21 @@ $FreeBSD$
#define MPA_KEY_REP "MPA ID Rep Frame"
#define MPA_MAX_PRIVATE_DATA 256
#define MPA_REV o0 /* XXX - amso1100 uses rev 0 ! */
#define MPA_REV 0 /* XXX - amso1100 uses rev 0 ! */
#define MPA_REJECT 0x20
#define MPA_CRC 0x40
#define MPA_MARKERS 0x80
#define MPA_FLAGS_MASK 0xE0
#define put_ep(ep) { \
CTR4(KTR_IW_CXGB, "put_ep (via %s:%u) ep %p refcnt %d\n", __FUNCTION__, __LINE__, \
CTR4(KTR_IW_CXGB, "put_ep (via %s:%u) ep %p refcnt %d", __FUNCTION__, __LINE__, \
ep, atomic_load_acq_int(&((ep)->refcount))); \
if (refcount_release(&((ep)->refcount))) \
__free_ep(ep); \
}
#define get_ep(ep) { \
CTR4(KTR_IW_CXGB, "get_ep (via %s:%u) ep %p, refcnt %d\n", __FUNCTION__, __LINE__, \
CTR4(KTR_IW_CXGB, "get_ep (via %s:%u) ep %p, refcnt %d", __FUNCTION__, __LINE__, \
ep, atomic_load_acq_int(&((ep)->refcount))); \
refcount_acquire(&((ep)->refcount)); \
}
@ -148,7 +148,7 @@ struct iwch_ep_common {
TAILQ_ENTRY(iwch_ep_common) entry;
struct iw_cm_id *cm_id;
struct iwch_qp *qp;
struct t3cdev *tdev;
struct toedev *tdev;
enum iwch_ep_state state;
u_int refcount;
struct cv waitq;
@ -176,7 +176,6 @@ struct iwch_ep {
u32 snd_seq;
u32 rcv_seq;
struct l2t_entry *l2t;
struct rtentry *dst;
struct mbuf *mpa_mbuf;
struct iwch_mpa_attributes mpa_attr;
unsigned int mpa_pkt_len;
@ -237,13 +236,13 @@ int iwch_destroy_listen(struct iw_cm_id *cm_id);
int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, int flags);
int iwch_quiesce_tid(struct iwch_ep *ep);
int iwch_resume_tid(struct iwch_ep *ep);
void __free_ep(struct iwch_ep_common *ep);
void iwch_rearp(struct iwch_ep *ep);
int iwch_ep_redirect(void *ctx, struct rtentry *old, struct rtentry *new, struct l2t_entry *l2t);
int iwch_cm_init(void);
void iwch_cm_term(void);
void iwch_cm_init_cpl(struct adapter *);
void iwch_cm_term_cpl(struct adapter *);
#endif /* _IWCH_CM_H_ */

View File

@ -30,11 +30,13 @@ POSSIBILITY OF SUCH DAMAGE.
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#ifdef TCP_OFFLOAD
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/module.h>
#include <sys/pciio.h>
#include <sys/conf.h>
#include <machine/bus.h>
@ -59,9 +61,11 @@ __FBSDID("$FreeBSD$");
#include <sys/libkern.h>
#include <netinet/in.h>
#include <contrib/rdma/ib_verbs.h>
#include <contrib/rdma/ib_umem.h>
#include <contrib/rdma/ib_user_verbs.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
#include <linux/idr.h>
#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
#include <cxgb_include.h>
#include <ulp/iw_cxgb/iw_cxgb_wr.h>
@ -261,4 +265,4 @@ int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
return npolled;
}
}
#endif

View File

@ -30,11 +30,12 @@ POSSIBILITY OF SUCH DAMAGE.
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/module.h>
#include <sys/pciio.h>
#include <sys/conf.h>
#include <machine/bus.h>
@ -60,11 +61,13 @@ __FBSDID("$FreeBSD$");
#include <netinet/in.h>
#include <contrib/rdma/ib_verbs.h>
#include <contrib/rdma/ib_umem.h>
#include <contrib/rdma/ib_user_verbs.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
#include <linux/idr.h>
#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
#ifdef DEBUG
#if defined(INVARIANTS) && defined(TCP_OFFLOAD)
#include <cxgb_include.h>
#include <ulp/iw_cxgb/iw_cxgb_wr.h>
#include <ulp/iw_cxgb/iw_cxgb_hal.h>
@ -74,75 +77,100 @@ __FBSDID("$FreeBSD$");
#include <ulp/iw_cxgb/iw_cxgb_resource.h>
#include <ulp/iw_cxgb/iw_cxgb_user.h>
static int
cxio_rdma_get_mem(struct cxio_rdev *rdev, struct ch_mem_range *m)
{
struct adapter *sc = rdev->adap;
struct mc7 *mem;
if ((m->addr & 7) || (m->len & 7))
return (EINVAL);
if (m->mem_id == MEM_CM)
mem = &sc->cm;
else if (m->mem_id == MEM_PMRX)
mem = &sc->pmrx;
else if (m->mem_id == MEM_PMTX)
mem = &sc->pmtx;
else
return (EINVAL);
return (t3_mc7_bd_read(mem, m->addr/8, m->len/8, (u64 *)m->buf));
}
void cxio_dump_tpt(struct cxio_rdev *rdev, uint32_t stag)
{
struct ch_mem_range *m;
struct ch_mem_range m;
u64 *data;
u32 addr;
int rc;
int size = 32;
m = kmalloc(sizeof(*m) + size, M_NOWAIT);
if (!m) {
m.buf = malloc(size, M_DEVBUF, M_NOWAIT);
if (m.buf == NULL) {
CTR1(KTR_IW_CXGB, "%s couldn't allocate memory.", __FUNCTION__);
return;
}
m->mem_id = MEM_PMRX;
m->addr = (stag>>8) * 32 + rdev->rnic_info.tpt_base;
m->len = size;
CTR3(KTR_IW_CXGB, "%s TPT addr 0x%x len %d", __FUNCTION__, m->addr, m->len);
rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
m.mem_id = MEM_PMRX;
m.addr = (stag >> 8) * 32 + rdev->rnic_info.tpt_base;
m.len = size;
CTR3(KTR_IW_CXGB, "%s TPT addr 0x%x len %d", __FUNCTION__, m.addr, m.len);
rc = cxio_rdma_get_mem(rdev, &m);
if (rc) {
CTR2(KTR_IW_CXGB, "%s toectl returned error %d", __FUNCTION__, rc);
free(m, M_DEVBUF);
free(m.buf, M_DEVBUF);
return;
}
data = (u64 *)m->buf;
data = (u64 *)m.buf;
addr = m.addr;
while (size > 0) {
CTR2(KTR_IW_CXGB, "TPT %08x: %016llx", m->addr, (unsigned long long) *data);
CTR2(KTR_IW_CXGB, "TPT %08x: %016llx", addr, (unsigned long long) *data);
size -= 8;
data++;
m->addr += 8;
addr += 8;
}
free(m, M_DEVBUF);
free(m.buf, M_DEVBUF);
}
void cxio_dump_pbl(struct cxio_rdev *rdev, uint32_t pbl_addr, uint32_t len, u8 shift)
{
struct ch_mem_range *m;
struct ch_mem_range m;
u64 *data;
u32 addr;
int rc;
int size, npages;
shift += 12;
npages = (len + (1ULL << shift) - 1) >> shift;
size = npages * sizeof(u64);
m = kmalloc(sizeof(*m) + size, M_NOWAIT);
if (!m) {
m.buf = malloc(size, M_DEVBUF, M_NOWAIT);
if (m.buf == NULL) {
CTR1(KTR_IW_CXGB, "%s couldn't allocate memory.", __FUNCTION__);
return;
}
m->mem_id = MEM_PMRX;
m->addr = pbl_addr;
m->len = size;
m.mem_id = MEM_PMRX;
m.addr = pbl_addr;
m.len = size;
CTR4(KTR_IW_CXGB, "%s PBL addr 0x%x len %d depth %d",
__FUNCTION__, m->addr, m->len, npages);
rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
__FUNCTION__, m.addr, m.len, npages);
rc = cxio_rdma_get_mem(rdev, &m);
if (rc) {
CTR2(KTR_IW_CXGB, "%s toectl returned error %d", __FUNCTION__, rc);
free(m, M_DEVBUF);
free(m.buf, M_DEVBUF);
return;
}
data = (u64 *)m->buf;
data = (u64 *)m.buf;
addr = m.addr;
while (size > 0) {
CTR2(KTR_IW_CXGB, "PBL %08x: %016llx", m->addr, (unsigned long long) *data);
CTR2(KTR_IW_CXGB, "PBL %08x: %016llx", addr, (unsigned long long) *data);
size -= 8;
data++;
m->addr += 8;
addr += 8;
}
free(m, M_DEVBUF);
free(m.buf, M_DEVBUF);
}
void cxio_dump_wqe(union t3_wr *wqe)
@ -175,70 +203,76 @@ void cxio_dump_wce(struct t3_cqe *wce)
void cxio_dump_rqt(struct cxio_rdev *rdev, uint32_t hwtid, int nents)
{
struct ch_mem_range *m;
struct ch_mem_range m;
int size = nents * 64;
u64 *data;
u32 addr;
int rc;
m = kmalloc(sizeof(*m) + size, M_NOWAIT);
if (!m) {
m.buf = malloc(size, M_DEVBUF, M_NOWAIT);
if (m.buf == NULL) {
CTR1(KTR_IW_CXGB, "%s couldn't allocate memory.", __FUNCTION__);
return;
}
m->mem_id = MEM_PMRX;
m->addr = ((hwtid)<<10) + rdev->rnic_info.rqt_base;
m->len = size;
CTR3(KTR_IW_CXGB, "%s RQT addr 0x%x len %d", __FUNCTION__, m->addr, m->len);
rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
m.mem_id = MEM_PMRX;
m.addr = ((hwtid)<<10) + rdev->rnic_info.rqt_base;
m.len = size;
CTR3(KTR_IW_CXGB, "%s RQT addr 0x%x len %d", __FUNCTION__, m.addr, m.len);
rc = cxio_rdma_get_mem(rdev, &m);
if (rc) {
CTR2(KTR_IW_CXGB, "%s toectl returned error %d", __FUNCTION__, rc);
free(m, M_DEVBUF);
free(m.buf, M_DEVBUF);
return;
}
data = (u64 *)m->buf;
data = (u64 *)m.buf;
addr = m.addr;
while (size > 0) {
CTR2(KTR_IW_CXGB, "RQT %08x: %016llx", m->addr, (unsigned long long) *data);
CTR2(KTR_IW_CXGB, "RQT %08x: %016llx", addr, (unsigned long long) *data);
size -= 8;
data++;
m->addr += 8;
addr += 8;
}
free(m, M_DEVBUF);
free(m.buf, M_DEVBUF);
}
void cxio_dump_tcb(struct cxio_rdev *rdev, uint32_t hwtid)
{
struct ch_mem_range *m;
struct ch_mem_range m;
int size = TCB_SIZE;
uint32_t *data;
uint32_t addr;
int rc;
m = kmalloc(sizeof(*m) + size, M_NOWAIT);
if (!m) {
m.buf = malloc(size, M_DEVBUF, M_NOWAIT);
if (m.buf == NULL) {
CTR1(KTR_IW_CXGB, "%s couldn't allocate memory.", __FUNCTION__);
return;
}
m->mem_id = MEM_CM;
m->addr = hwtid * size;
m->len = size;
CTR3(KTR_IW_CXGB, "%s TCB %d len %d", __FUNCTION__, m->addr, m->len);
rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
m.mem_id = MEM_CM;
m.addr = hwtid * size;
m.len = size;
CTR3(KTR_IW_CXGB, "%s TCB %d len %d", __FUNCTION__, m.addr, m.len);
rc = cxio_rdma_get_mem(rdev, &m);
if (rc) {
CTR2(KTR_IW_CXGB, "%s toectl returned error %d", __FUNCTION__, rc);
free(m, M_DEVBUF);
free(m.buf, M_DEVBUF);
return;
}
data = (uint32_t *)m->buf;
data = (uint32_t *)m.buf;
addr = m.addr;
while (size > 0) {
printf("%2u: %08x %08x %08x %08x %08x %08x %08x %08x\n",
m->addr,
addr,
*(data+2), *(data+3), *(data),*(data+1),
*(data+6), *(data+7), *(data+4), *(data+5));
size -= 32;
data += 8;
m->addr += 32;
addr += 32;
}
free(m, M_DEVBUF);
free(m.buf, M_DEVBUF);
}
#endif

View File

@ -29,11 +29,13 @@ POSSIBILITY OF SUCH DAMAGE.
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#ifdef TCP_OFFLOAD
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/module.h>
#include <sys/pciio.h>
#include <sys/conf.h>
#include <machine/bus.h>
@ -59,9 +61,11 @@ __FBSDID("$FreeBSD$");
#include <netinet/in.h>
#include <contrib/rdma/ib_verbs.h>
#include <contrib/rdma/ib_umem.h>
#include <contrib/rdma/ib_user_verbs.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
#include <linux/idr.h>
#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
#include <cxgb_include.h>
#include <ulp/iw_cxgb/iw_cxgb_wr.h>
@ -81,11 +85,22 @@ post_qp_event(struct iwch_dev *rnicp, struct iwch_qp *qhp, struct iwch_cq *chp,
struct ib_event event;
struct iwch_qp_attributes attrs;
mtx_lock(&rnicp->lock);
if (!qhp) {
CTR3(KTR_IW_CXGB, "%s unaffiliated error 0x%x qpid 0x%x\n",
__func__, CQE_STATUS(rsp_msg->cqe),
CQE_QPID(rsp_msg->cqe));
mtx_unlock(&rnicp->lock);
return;
}
if ((qhp->attr.state == IWCH_QP_STATE_ERROR) ||
(qhp->attr.state == IWCH_QP_STATE_TERMINATE)) {
CTR4(KTR_IW_CXGB, "%s AE received after RTS - "
"qp state %d qpid 0x%x status 0x%x", __FUNCTION__,
qhp->attr.state, qhp->wq.qpid, CQE_STATUS(rsp_msg->cqe));
mtx_unlock(&rnicp->lock);
return;
}
@ -95,6 +110,15 @@ post_qp_event(struct iwch_dev *rnicp, struct iwch_qp *qhp, struct iwch_cq *chp,
CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe),
CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
mtx_unlock(&rnicp->lock);
if (qhp->attr.state == IWCH_QP_STATE_RTS) {
attrs.next_state = IWCH_QP_STATE_TERMINATE;
iwch_modify_qp(qhp->rhp, qhp, IWCH_QP_ATTR_NEXT_STATE,
&attrs, 1);
if (send_term)
iwch_post_terminate(qhp, rsp_msg);
}
event.event = ib_event;
event.device = chp->ibcq.device;
@ -106,25 +130,17 @@ post_qp_event(struct iwch_dev *rnicp, struct iwch_qp *qhp, struct iwch_cq *chp,
if (qhp->ibqp.event_handler)
(*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context);
if (qhp->attr.state == IWCH_QP_STATE_RTS) {
attrs.next_state = IWCH_QP_STATE_TERMINATE;
iwch_modify_qp(qhp->rhp, qhp, IWCH_QP_ATTR_NEXT_STATE,
&attrs, 1);
if (send_term)
iwch_post_terminate(qhp, rsp_msg);
}
(*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
}
void
iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct mbuf *m)
iwch_ev_dispatch(struct iwch_dev *rnicp, struct mbuf *m)
{
struct iwch_dev *rnicp;
struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) m->m_data;
struct iwch_cq *chp;
struct iwch_qp *qhp;
u32 cqid = RSPQ_CQID(rsp_msg);
rnicp = (struct iwch_dev *) rdev_p->ulp;
mtx_lock(&rnicp->lock);
chp = get_chp(rnicp, cqid);
qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe));
@ -136,7 +152,7 @@ iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct mbuf *m)
CQE_TYPE(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe),
CQE_WRID_LOW(rsp_msg->cqe));
mtx_unlock(&rnicp->lock);
goto out;
return;
}
iwch_qp_add_ref(&qhp->ibqp);
mtx_lock(&chp->lock);
@ -200,12 +216,6 @@ iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct mbuf *m)
case TPT_ERR_BOUND:
case TPT_ERR_INVALIDATE_SHARED_MR:
case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
log(LOG_ERR, "%s - CQE Err qpid 0x%x opcode %d status 0x%x "
"type %d wrid.hi 0x%x wrid.lo 0x%x \n", __FUNCTION__,
CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe),
CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe),
CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
(*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
post_qp_event(rnicp, qhp, chp, rsp_msg, IB_EVENT_QP_ACCESS_ERR, 1);
break;
@ -248,6 +258,5 @@ done:
wakeup(chp);
mtx_unlock(&chp->lock);
iwch_qp_rem_ref(&qhp->ibqp);
out:
m_free(m);
}
#endif

View File

@ -1,4 +1,3 @@
/**************************************************************************
Copyright (c) 2007, Chelsio Inc.
@ -30,11 +29,13 @@ POSSIBILITY OF SUCH DAMAGE.
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#ifdef TCP_OFFLOAD
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/module.h>
#include <sys/pciio.h>
#include <sys/conf.h>
#include <machine/bus.h>
@ -47,6 +48,8 @@ __FBSDID("$FreeBSD$");
#include <sys/linker.h>
#include <sys/firmware.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sockopt.h>
#include <sys/sockio.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
@ -59,12 +62,25 @@ __FBSDID("$FreeBSD$");
#include <vm/vm.h>
#include <vm/pmap.h>
#include <net/route.h>
#include <netinet/in_systm.h>
#include <netinet/in.h>
#include <netinet/in_pcb.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <netinet/tcp_var.h>
#include <netinet/toecore.h>
#include <netinet/tcp.h>
#include <netinet/tcpip.h>
#include <contrib/rdma/ib_verbs.h>
#include <rdma/ib_verbs.h>
#include <linux/idr.h>
#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
#include <cxgb_include.h>
#include <ulp/tom/cxgb_l2t.h>
#include <ulp/tom/cxgb_tom.h>
#include <ulp/tom/cxgb_toepcb.h>
#include <ulp/iw_cxgb/iw_cxgb_wr.h>
#include <ulp/iw_cxgb/iw_cxgb_hal.h>
#include <ulp/iw_cxgb/iw_cxgb_provider.h>
@ -72,29 +88,21 @@ __FBSDID("$FreeBSD$");
#include <ulp/iw_cxgb/iw_cxgb.h>
#include <ulp/iw_cxgb/iw_cxgb_resource.h>
static TAILQ_HEAD( ,cxio_rdev) rdev_list;
static cxio_hal_ev_callback_func_t cxio_ev_cb = NULL;
static struct cxio_rdev *
cxio_hal_find_rdev_by_name(char *dev_name)
/* Response queue used for RDMA events. */
#define ASYNC_NOTIF_RSPQ 0
static inline int
cxio_rdma_cq_setup(struct cxio_rdev *rdev_p, unsigned id, uint64_t base_addr,
unsigned size, unsigned ovfl_mode, unsigned credits, unsigned credit_thres)
{
struct cxio_rdev *rdev;
struct adapter *sc = rdev_p->adap;
int rc;
TAILQ_FOREACH(rdev, &rdev_list, entry)
if (!strcmp(rdev->dev_name, dev_name))
return rdev;
return NULL;
}
mtx_lock_spin(&sc->sge.reg_lock);
rc = -t3_sge_init_cqcntxt(sc, id, base_addr, size, ASYNC_NOTIF_RSPQ,
ovfl_mode, credits, credit_thres);
mtx_unlock_spin(&sc->sge.reg_lock);
struct cxio_rdev *
cxio_hal_find_rdev_by_t3cdev(struct t3cdev *tdev)
{
struct cxio_rdev *rdev;
TAILQ_FOREACH(rdev, &rdev_list, entry)
if (rdev->t3cdev_p == tdev)
return rdev;
return NULL;
return (rc);
}
int
@ -104,12 +112,14 @@ cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq,
int ret;
struct t3_cqe *cqe;
u32 rptr;
struct adapter *sc = rdev_p->adap;
struct rdma_cq_op setup;
setup.id = cq->cqid;
setup.credits = (op == CQ_CREDIT_UPDATE) ? credit : 0;
setup.op = op;
ret = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_OP, &setup);
if (op != CQ_CREDIT_UPDATE)
credit = 0;
mtx_lock_spin(&sc->sge.reg_lock);
ret = t3_sge_cqcntxt_op(sc, cq->cqid, op, credit);
mtx_unlock_spin(&sc->sge.reg_lock);
if ((ret < 0) || (op == CQ_CREDIT_UPDATE))
return (ret);
@ -140,30 +150,26 @@ cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq,
while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) {
DELAY(1);
if (i++ > 1000000) {
PANIC_IF(1);
struct adapter *sc = rdev_p->adap;
log(LOG_ERR, "%s: stalled rnic\n",
rdev_p->dev_name);
device_get_nameunit(sc->dev));
PANIC_IF(1);
return (-EIO);
}
}
return 1;
return (1);
}
return 0;
return (0);
}
static int
cxio_hal_clear_cq_ctx(struct cxio_rdev *rdev_p, u32 cqid)
{
struct rdma_cq_setup setup;
setup.id = cqid;
setup.base_addr = 0; /* NULL address */
setup.size = 0; /* disaable the CQ */
setup.credits = 0;
setup.credit_thres = 0;
setup.ovfl_mode = 0;
return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
return (cxio_rdma_cq_setup(rdev_p, cqid, 0, 0, 0, 0, 0));
}
static int
@ -171,43 +177,38 @@ cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid)
{
u64 sge_cmd;
struct t3_modify_qp_wr *wqe;
struct mbuf *m = m_gethdr(MT_DATA, M_NOWAIT);
struct mbuf *m;
m = M_GETHDR_OFLD(0, CPL_PRIORITY_CONTROL, wqe);
if (m == NULL) {
CTR1(KTR_IW_CXGB, "%s m_gethdr failed", __FUNCTION__);
return (-ENOMEM);
}
wqe = mtod(m, struct t3_modify_qp_wr *);
m->m_len = m->m_pkthdr.len = sizeof(*wqe);
memset(wqe, 0, sizeof(*wqe));
build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 0, qpid, 7);
wqe->flags = htobe32(MODQP_WRITE_EC);
sge_cmd = qpid << 8 | 3;
wqe->sge_cmd = htobe64(sge_cmd);
m_set_priority(m, CPL_PRIORITY_CONTROL);
m_set_sgl(m, NULL);
m_set_sgllen(m, 0);
return (cxgb_ofld_send(rdev_p->t3cdev_p, m));
return t3_offload_tx(rdev_p->adap, m);
}
int
cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq, int kernel)
{
struct rdma_cq_setup setup;
int size = (1UL << (cq->size_log2)) * sizeof(struct t3_cqe);
size += 1; /* one extra page for storing cq-in-err state */
cq->cqid = cxio_hal_get_cqid(rdev_p->rscp);
if (!cq->cqid)
return (-ENOMEM);
cq->sw_queue = malloc(size, M_DEVBUF, M_NOWAIT|M_ZERO);
if (!cq->sw_queue)
return (-ENOMEM);
#if 0
cq->queue = dma_alloc_coherent(rdev_p->rnic_info.pdev,
(1UL << (cq->size_log2)) *
sizeof(struct t3_cqe),
&(cq->dma_addr), M_NOWAIT);
#else
cq->queue = contigmalloc((1UL << (cq->size_log2))*sizeof(struct t3_cqe),
if (kernel) {
cq->sw_queue = malloc(size, M_DEVBUF, M_NOWAIT|M_ZERO);
if (!cq->sw_queue)
return (-ENOMEM);
}
cq->queue = contigmalloc(size,
M_DEVBUF, M_NOWAIT, 0ul, ~0ul, 4096, 0);
if (cq->queue)
cq->dma_addr = vtophys(cq->queue);
@ -215,35 +216,10 @@ cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
free(cq->sw_queue, M_DEVBUF);
return (-ENOMEM);
}
#endif
#ifdef notyet
pci_unmap_addr_set(cq, mapping, cq->dma_addr);
#endif
memset(cq->queue, 0, size);
setup.id = cq->cqid;
setup.base_addr = (u64) (cq->dma_addr);
setup.size = 1UL << cq->size_log2;
setup.credits = 65535;
setup.credit_thres = 1;
if (rdev_p->t3cdev_p->type != T3A)
setup.ovfl_mode = 0;
else
setup.ovfl_mode = 1;
return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
}
int
cxio_resize_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
{
struct rdma_cq_setup setup;
setup.id = cq->cqid;
setup.base_addr = (u64) (cq->dma_addr);
setup.size = 1UL << cq->size_log2;
setup.credits = setup.size;
setup.credit_thres = setup.size; /* TBD: overflow recovery */
setup.ovfl_mode = 1;
return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
return (cxio_rdma_cq_setup(rdev_p, cq->cqid, cq->dma_addr,
1UL << cq->size_log2, 0, 65535, 1));
}
static u32
@ -325,7 +301,7 @@ cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain,
if (!wq->qpid)
return (-ENOMEM);
wq->rq = malloc(depth * sizeof(u64), M_DEVBUF, M_NOWAIT|M_ZERO);
wq->rq = malloc(depth * sizeof(struct t3_swrq), M_DEVBUF, M_NOWAIT|M_ZERO);
if (!wq->rq)
goto err1;
@ -336,28 +312,19 @@ cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain,
wq->sq = malloc(depth * sizeof(struct t3_swsq), M_DEVBUF, M_NOWAIT|M_ZERO);
if (!wq->sq)
goto err3;
#if 0
wq->queue = dma_alloc_coherent(rdev_p->rnic_info.pdev,
depth * sizeof(union t3_wr),
&(wq->dma_addr), M_NOWAIT);
#else
wq->queue = contigmalloc(depth *sizeof(union t3_wr),
M_DEVBUF, M_NOWAIT, 0ul, ~0ul, 4096, 0);
if (wq->queue)
wq->dma_addr = vtophys(wq->queue);
#endif
if (!wq->queue)
else
goto err4;
memset(wq->queue, 0, depth * sizeof(union t3_wr));
#ifdef notyet
pci_unmap_addr_set(wq, mapping, wq->dma_addr);
#endif
wq->doorbell = rdev_p->rnic_info.kdb_addr;
if (!kernel_domain)
wq->udb = (u64)rdev_p->rnic_info.udbell_physbase +
(wq->qpid << rdev_p->qpshift);
wq->rdev = rdev_p;
CTR4(KTR_IW_CXGB, "%s qpid 0x%x doorbell 0x%p udb 0x%llx", __FUNCTION__,
wq->qpid, wq->doorbell, (unsigned long long) wq->udb);
return 0;
@ -431,10 +398,11 @@ insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq)
cq->sw_wptr++;
}
void
int
cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count)
{
u32 ptr;
int flushed = 0;
CTR3(KTR_IW_CXGB, "%s wq %p cq %p", __FUNCTION__, wq, cq);
@ -442,8 +410,11 @@ cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count)
CTR4(KTR_IW_CXGB, "%s rq_rptr %u rq_wptr %u skip count %u", __FUNCTION__,
wq->rq_rptr, wq->rq_wptr, count);
ptr = wq->rq_rptr + count;
while (ptr++ != wq->rq_wptr)
while (ptr++ != wq->rq_wptr) {
insert_recv_cqe(wq, cq);
flushed++;
}
return flushed;
}
static void
@ -468,19 +439,22 @@ insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq,
cq->sw_wptr++;
}
void
int
cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count)
{
__u32 ptr;
int flushed = 0;
struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2);
ptr = wq->sq_rptr + count;
sqp += count;
sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
while (ptr != wq->sq_wptr) {
insert_sq_cqe(wq, cq, sqp);
sqp++;
ptr++;
sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
flushed++;
}
return flushed;
}
/*
@ -516,7 +490,7 @@ static int cqe_completes_wr(struct t3_cqe *cqe, struct t3_wq *wq)
if ((CQE_OPCODE(*cqe) == T3_READ_RESP) && SQ_TYPE(*cqe))
return 0;
if ((CQE_OPCODE(*cqe) == T3_SEND) && RQ_TYPE(*cqe) &&
if (CQE_OPCODE(*cqe) && RQ_TYPE(*cqe) &&
Q_EMPTY(wq->rq_rptr, wq->rq_wptr))
return 0;
@ -563,16 +537,8 @@ cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
static int
cxio_hal_init_ctrl_cq(struct cxio_rdev *rdev_p)
{
struct rdma_cq_setup setup;
setup.id = 0;
setup.base_addr = 0; /* NULL address */
setup.size = 1; /* enable the CQ */
setup.credits = 0;
/* force SGE to redirect to RspQ and interrupt */
setup.credit_thres = 0;
setup.ovfl_mode = 1;
return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
return (cxio_rdma_cq_setup(rdev_p, 0, 0, 1, 1, 0, 0));
}
static int
@ -584,41 +550,28 @@ cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
struct t3_modify_qp_wr *wqe;
struct mbuf *m;
m = m_gethdr(MT_DATA, M_NOWAIT);
m = M_GETHDR_OFLD(0, CPL_PRIORITY_CONTROL, wqe);
if (m == NULL) {
CTR1(KTR_IW_CXGB, "%s m_gethdr failed", __FUNCTION__);
return (-ENOMEM);
return (ENOMEM);
}
err = cxio_hal_init_ctrl_cq(rdev_p);
if (err) {
CTR2(KTR_IW_CXGB, "%s err %d initializing ctrl_cq", __FUNCTION__, err);
goto err;
}
#if 0
rdev_p->ctrl_qp.workq = dma_alloc_coherent(
rdev_p->rnic_info.pdev,
(1 << T3_CTRL_QP_SIZE_LOG2) *
sizeof(union t3_wr),
&(rdev_p->ctrl_qp.dma_addr),
M_NOWAIT);
#else
rdev_p->ctrl_qp.workq = contigmalloc((1 << T3_CTRL_QP_SIZE_LOG2)
*sizeof(union t3_wr), M_DEVBUF, M_NOWAIT, 0ul, ~0ul, 4096, 0);
if (rdev_p->ctrl_qp.workq)
rdev_p->ctrl_qp.dma_addr = vtophys(rdev_p->ctrl_qp.workq);
#endif
if (!rdev_p->ctrl_qp.workq) {
else {
CTR1(KTR_IW_CXGB, "%s dma_alloc_coherent failed", __FUNCTION__);
err = -ENOMEM;
err = ENOMEM;
goto err;
}
#if 0
pci_unmap_addr_set(&rdev_p->ctrl_qp, mapping,
rdev_p->ctrl_qp.dma_addr);
#endif
rdev_p->ctrl_qp.doorbell = (void /*__iomem */ *)rdev_p->rnic_info.kdb_addr;
rdev_p->ctrl_qp.doorbell = rdev_p->rnic_info.kdb_addr;
memset(rdev_p->ctrl_qp.workq, 0,
(1 << T3_CTRL_QP_SIZE_LOG2) * sizeof(union t3_wr));
@ -637,10 +590,8 @@ cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
ctx1 |= ((u64) (V_EC_BASE_HI((u32) base_addr & 0xf) | V_EC_RESPQ(0) |
V_EC_TYPE(0) | V_EC_GEN(1) |
V_EC_UP_TOKEN(T3_CTL_QP_TID) | F_EC_VALID)) << 32;
wqe = mtod(m, struct t3_modify_qp_wr *);
m->m_len = m->m_pkthdr.len = sizeof(*wqe);
memset(wqe, 0, sizeof(*wqe));
build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 0,
build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 0,
T3_CTL_QP_TID, 7);
wqe->flags = htobe32(MODQP_WRITE_EC);
sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3;
@ -650,12 +601,9 @@ cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
CTR3(KTR_IW_CXGB, "CtrlQP dma_addr 0x%llx workq %p size %d",
(unsigned long long) rdev_p->ctrl_qp.dma_addr,
rdev_p->ctrl_qp.workq, 1 << T3_CTRL_QP_SIZE_LOG2);
m_set_priority(m, CPL_PRIORITY_CONTROL);
m_set_sgl(m, NULL);
m_set_sgllen(m, 0);
return (cxgb_ofld_send(rdev_p->t3cdev_p, m));
return t3_offload_tx(rdev_p->adap, m);
err:
m_free(m);
m_freem(m);
return err;
}
@ -681,7 +629,7 @@ cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p)
*/
static int
cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
u32 len, void *data, int completion)
u32 len, void *data)
{
u32 i, nr_wqe, copy_len;
u8 *copy_data;
@ -718,7 +666,7 @@ cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
flag = 0;
if (i == (nr_wqe - 1)) {
/* last WQE */
flag = completion ? T3_COMPLETION_FLAG : 0;
flag = T3_COMPLETION_FLAG;
if (len % 32)
utx_len = len / 32 + 1;
else
@ -786,14 +734,13 @@ static int
__cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
u32 *stag, u8 stag_state, u32 pdid,
enum tpt_mem_type type, enum tpt_mem_perm perm,
u32 zbva, u64 to, u32 len, u8 page_size, __be64 *pbl,
u32 *pbl_size, u32 *pbl_addr)
u32 zbva, u64 to, u32 len, u8 page_size,
u32 pbl_size, u32 pbl_addr)
{
int err;
struct tpt_entry tpt;
u32 stag_idx;
u32 wptr;
int rereg = (*stag != T3_STAG_UNSET);
stag_state = stag_state > 0;
stag_idx = (*stag) >> 8;
@ -807,30 +754,8 @@ __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
CTR5(KTR_IW_CXGB, "%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x",
__FUNCTION__, stag_state, type, pdid, stag_idx);
if (reset_tpt_entry)
cxio_hal_pblpool_free(rdev_p, *pbl_addr, *pbl_size << 3);
else if (!rereg) {
*pbl_addr = cxio_hal_pblpool_alloc(rdev_p, *pbl_size << 3);
if (!*pbl_addr) {
return (-ENOMEM);
}
}
mtx_lock(&rdev_p->ctrl_qp.lock);
/* write PBL first if any - update pbl only if pbl list exist */
if (pbl) {
CTR4(KTR_IW_CXGB, "%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d",
__FUNCTION__, *pbl_addr, rdev_p->rnic_info.pbl_base,
*pbl_size);
err = cxio_hal_ctrl_qp_write_mem(rdev_p,
(*pbl_addr >> 5),
(*pbl_size << 3), pbl, 0);
if (err)
goto ret;
}
/* write TPT entry */
if (reset_tpt_entry)
memset(&tpt, 0, sizeof(tpt));
@ -845,23 +770,23 @@ __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) |
V_TPT_PAGE_SIZE(page_size));
tpt.rsvd_pbl_addr = reset_tpt_entry ? 0 :
htobe32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, *pbl_addr)>>3));
htobe32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3));
tpt.len = htobe32(len);
tpt.va_hi = htobe32((u32) (to >> 32));
tpt.va_low_or_fbo = htobe32((u32) (to & 0xFFFFFFFFULL));
tpt.rsvd_bind_cnt_or_pstag = 0;
tpt.rsvd_pbl_size = reset_tpt_entry ? 0 :
htobe32(V_TPT_PBL_SIZE((*pbl_size) >> 2));
htobe32(V_TPT_PBL_SIZE((pbl_size) >> 2));
}
err = cxio_hal_ctrl_qp_write_mem(rdev_p,
stag_idx +
(rdev_p->rnic_info.tpt_base >> 5),
sizeof(tpt), &tpt, 1);
sizeof(tpt), &tpt);
/* release the stag index to free pool */
if (reset_tpt_entry)
cxio_hal_put_stag(rdev_p->rscp, stag_idx);
ret:
wptr = rdev_p->ctrl_qp.wptr;
mtx_unlock(&rdev_p->ctrl_qp.lock);
if (!err)
@ -872,61 +797,90 @@ ret:
return err;
}
int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl,
u32 pbl_addr, u32 pbl_size)
{
u32 wptr;
int err;
CTR4(KTR_IW_CXGB, "%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d",
__func__, pbl_addr, rdev_p->rnic_info.pbl_base,
pbl_size);
mtx_lock(&rdev_p->ctrl_qp.lock);
err = cxio_hal_ctrl_qp_write_mem(rdev_p, pbl_addr >> 5, pbl_size << 3,
pbl);
wptr = rdev_p->ctrl_qp.wptr;
mtx_unlock(&rdev_p->ctrl_qp.lock);
if (err)
return err;
if (cxio_wait(&rdev_p->ctrl_qp,
&rdev_p->ctrl_qp.lock,
SEQ32_GE(rdev_p->ctrl_qp.rptr, wptr)))
return ERESTART;
return 0;
}
int
cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
u8 page_size, __be64 *pbl, u32 *pbl_size,
u32 *pbl_addr)
u8 page_size, u32 pbl_size, u32 pbl_addr)
{
*stag = T3_STAG_UNSET;
return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
zbva, to, len, page_size, pbl, pbl_size, pbl_addr);
zbva, to, len, page_size, pbl_size, pbl_addr);
}
int
cxio_reregister_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
u8 page_size, __be64 *pbl, u32 *pbl_size,
u32 *pbl_addr)
u8 page_size, u32 pbl_size, u32 pbl_addr)
{
return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
zbva, to, len, page_size, pbl, pbl_size, pbl_addr);
zbva, to, len, page_size, pbl_size, pbl_addr);
}
int
cxio_dereg_mem(struct cxio_rdev *rdev_p, u32 stag, u32 pbl_size,
u32 pbl_addr)
{
return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL,
&pbl_size, &pbl_addr);
return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0,
pbl_size, pbl_addr);
}
int
cxio_allocate_window(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid)
{
u32 pbl_size = 0;
*stag = T3_STAG_UNSET;
return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_MW, 0, 0, 0ULL, 0, 0,
NULL, &pbl_size, NULL);
0, 0);
}
int
cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag)
{
return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL,
NULL, NULL);
return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0,
0, 0);
}
int
cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr,
struct socket *so)
{
struct t3_rdma_init_wr *wqe;
struct mbuf *m = m_gethdr(MT_DATA, M_NOWAIT);
struct mbuf *m;
struct ofld_hdr *oh;
int rc;
struct tcpcb *tp;
struct inpcb *inp;
struct toepcb *toep;
m = M_GETHDR_OFLD(0, CPL_PRIORITY_DATA, wqe);
if (m == NULL)
return (-ENOMEM);
CTR2(KTR_IW_CXGB, "%s rdev_p %p", __FUNCTION__, rdev_p);
wqe = mtod(m, struct t3_rdma_init_wr *);
m->m_len = m->m_pkthdr.len = sizeof(*wqe);
wqe->wrh.op_seop_flags = htobe32(V_FW_RIWR_OP(T3_WR_INIT));
wqe->wrh.gen_tid_len = htobe32(V_FW_RIWR_TID(attr->tid) |
V_FW_RIWR_LEN(sizeof(*wqe) >> 3));
@ -940,36 +894,41 @@ cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
wqe->mpaattrs = attr->mpaattrs;
wqe->qpcaps = attr->qpcaps;
wqe->ulpdu_size = htobe16(attr->tcp_emss);
wqe->flags = htobe32(attr->flags);
wqe->rqe_count = htobe16(attr->rqe_count);
wqe->flags_rtr_type = htobe16(attr->flags |
V_RTR_TYPE(attr->rtr_type) |
V_CHAN(attr->chan));
wqe->ord = htobe32(attr->ord);
wqe->ird = htobe32(attr->ird);
wqe->qp_dma_addr = htobe64(attr->qp_dma_addr);
wqe->qp_dma_size = htobe32(attr->qp_dma_size);
wqe->irs = htobe32(attr->irs);
m_set_priority(m, 0); /* 0=>ToeQ; 1=>CtrlQ */
m_set_sgl(m, NULL);
m_set_sgllen(m, 0);
return (cxgb_ofld_send(rdev_p->t3cdev_p, m));
}
void
cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb)
{
cxio_ev_cb = ev_cb;
}
/* XXX: bad form, fix later */
inp = sotoinpcb(so);
INP_WLOCK(inp);
tp = intotcpcb(inp);
toep = tp->t_toe;
oh = mtod(m, struct ofld_hdr *);
oh->plen = 0;
oh->flags |= F_HDR_DF;
enqueue_wr(toep, m);
toep->tp_wr_avail--;
toep->tp_wr_unacked++;
rc = t3_offload_tx(rdev_p->adap, m);
INP_WUNLOCK(inp);
void
cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb)
{
cxio_ev_cb = NULL;
return (rc);
}
static int
cxio_hal_ev_handler(struct t3cdev *t3cdev_p, struct mbuf *m)
cxio_hal_ev_handler(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
{
static int cnt;
struct cxio_rdev *rdev_p = NULL;
struct adapter *sc = qs->adap;
struct iwch_dev *rnicp = sc->iwarp_softc;
struct cxio_rdev *rdev_p = &rnicp->rdev;
struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) m->m_data;
int qpid = CQE_QPID(rsp_msg->cqe);
CTR6(KTR_IW_CXGB, "%s cq_id 0x%x cq_ptr 0x%x genbit %0x overflow %0x an %0x",
__FUNCTION__, RSPQ_CQID(rsp_msg), RSPQ_CQPTR(rsp_msg),
@ -978,80 +937,50 @@ cxio_hal_ev_handler(struct t3cdev *t3cdev_p, struct mbuf *m)
RSPQ_SE(rsp_msg), RSPQ_NOTIFY(rsp_msg), RSPQ_CQBRANCH(rsp_msg),
RSPQ_CREDIT_THRESH(rsp_msg));
CTR4(KTR_IW_CXGB, "CQE: QPID 0x%0x type 0x%0x status 0x%0x opcode %d",
CQE_QPID(rsp_msg->cqe),
CQE_TYPE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
CQE_OPCODE(rsp_msg->cqe));
qpid, CQE_TYPE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
CQE_OPCODE(rsp_msg->cqe));
CTR3(KTR_IW_CXGB, "len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x",
CQE_LEN(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
rdev_p = (struct cxio_rdev *)t3cdev_p->ulp;
if (!rdev_p) {
CTR2(KTR_IW_CXGB, "%s called by t3cdev %p with null ulp", __FUNCTION__,
t3cdev_p);
return 0;
}
if (CQE_QPID(rsp_msg->cqe) == T3_CTRL_QP_ID) {
switch(qpid) {
case T3_CTRL_QP_ID:
mtx_lock(&rdev_p->ctrl_qp.lock);
rdev_p->ctrl_qp.rptr = CQE_WRID_LOW(rsp_msg->cqe) + 1;
wakeup(&rdev_p->ctrl_qp);
mtx_unlock(&rdev_p->ctrl_qp.lock);
m_free(m);
} else if (CQE_QPID(rsp_msg->cqe) == 0xfff8)
m_free(m);
else if (cxio_ev_cb)
(*cxio_ev_cb) (rdev_p, m);
else
m_free(m);
cnt++;
return 0;
break;
case 0xfff8:
break;
default:
iwch_ev_dispatch(rnicp, m);
}
m_freem(m);
return (0);
}
/* Caller takes care of locking if needed */
int
cxio_rdev_open(struct cxio_rdev *rdev_p)
{
struct ifnet *ifp;
int err = 0;
struct rdma_info *ri = &rdev_p->rnic_info;
struct adapter *sc = rdev_p->adap;
if (strlen(rdev_p->dev_name)) {
if (cxio_hal_find_rdev_by_name(rdev_p->dev_name)) {
return (-EBUSY);
}
ifp = rdev_p->ifp;
if (ifp == NULL)
return (-EINVAL);
if_free(ifp);
} else if (rdev_p->t3cdev_p) {
if (cxio_hal_find_rdev_by_t3cdev(rdev_p->t3cdev_p))
return (-EBUSY);
ifp = rdev_p->t3cdev_p->lldev;
strncpy(rdev_p->dev_name, rdev_p->t3cdev_p->name,
T3_MAX_DEV_NAME_LEN);
} else {
CTR1(KTR_IW_CXGB, "%s t3cdev_p or dev_name must be set", __FUNCTION__);
return (-EINVAL);
}
KASSERT(rdev_p->adap, ("%s: adap is NULL", __func__));
TAILQ_INSERT_TAIL(&rdev_list, rdev_p, entry);
CTR2(KTR_IW_CXGB, "%s opening rnic dev %s", __FUNCTION__, rdev_p->dev_name);
memset(&rdev_p->ctrl_qp, 0, sizeof(rdev_p->ctrl_qp));
if (!rdev_p->t3cdev_p)
rdev_p->t3cdev_p = T3CDEV(ifp);
rdev_p->t3cdev_p->ulp = (void *) rdev_p;
err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_GET_PARAMS,
&(rdev_p->rnic_info));
if (err) {
log(LOG_ERR, "%s t3cdev_p(%p)->ctl returned error %d.\n",
__FUNCTION__, rdev_p->t3cdev_p, err);
goto err1;
}
err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_PORTS,
&(rdev_p->port_info));
if (err) {
log(LOG_ERR, "%s t3cdev_p(%p)->ctl returned error %d.\n",
__FUNCTION__, rdev_p->t3cdev_p, err);
goto err1;
}
ri->udbell_physbase = rman_get_start(sc->udbs_res);
ri->udbell_len = rman_get_size(sc->udbs_res);
ri->tpt_base = t3_read_reg(sc, A_ULPTX_TPT_LLIMIT);
ri->tpt_top = t3_read_reg(sc, A_ULPTX_TPT_ULIMIT);
ri->pbl_base = t3_read_reg(sc, A_ULPTX_PBL_LLIMIT);
ri->pbl_top = t3_read_reg(sc, A_ULPTX_PBL_ULIMIT);
ri->rqt_base = t3_read_reg(sc, A_ULPRX_RQ_LLIMIT);
ri->rqt_top = t3_read_reg(sc, A_ULPRX_RQ_ULIMIT);
ri->kdb_addr = (void *)((unsigned long)
rman_get_virtual(sc->regs_res) + A_SG_KDOORBELL);
/*
* qpshift is the number of bits to shift the qpid left in order
@ -1064,8 +993,8 @@ cxio_rdev_open(struct cxio_rdev *rdev_p)
PAGE_SHIFT));
rdev_p->qpnr = rdev_p->rnic_info.udbell_len >> PAGE_SHIFT;
rdev_p->qpmask = (65536 >> ilog2(rdev_p->qpnr)) - 1;
CTR4(KTR_IW_CXGB, "cxio_rdev_open rnic %s info: tpt_base 0x%0x tpt_top 0x%0x num stags %d",
rdev_p->dev_name, rdev_p->rnic_info.tpt_base,
CTR4(KTR_IW_CXGB, "cxio_rdev_open rnic %p info: tpt_base 0x%0x tpt_top 0x%0x num stags %d",
rdev_p->adap, rdev_p->rnic_info.tpt_base,
rdev_p->rnic_info.tpt_top, cxio_num_stags(rdev_p));
CTR4(KTR_IW_CXGB, "pbl_base 0x%0x pbl_top 0x%0x rqt_base 0x%0x, rqt_top 0x%0x",
rdev_p->rnic_info.pbl_base,
@ -1111,43 +1040,34 @@ err3:
err2:
cxio_hal_destroy_ctrl_qp(rdev_p);
err1:
TAILQ_REMOVE(&rdev_list, rdev_p, entry);
return err;
}
void
cxio_rdev_close(struct cxio_rdev *rdev_p)
{
if (rdev_p) {
cxio_hal_pblpool_destroy(rdev_p);
cxio_hal_rqtpool_destroy(rdev_p);
TAILQ_REMOVE(&rdev_list, rdev_p, entry);
rdev_p->t3cdev_p->ulp = NULL;
cxio_hal_destroy_ctrl_qp(rdev_p);
cxio_hal_destroy_resource(rdev_p->rscp);
}
cxio_hal_pblpool_destroy(rdev_p);
cxio_hal_rqtpool_destroy(rdev_p);
cxio_hal_destroy_ctrl_qp(rdev_p);
cxio_hal_destroy_resource(rdev_p->rscp);
}
int
cxio_hal_init(void)
cxio_hal_init(struct adapter *sc)
{
TAILQ_INIT(&rdev_list);
#ifdef needed
if (cxio_hal_init_rhdl_resource(T3_MAX_NUM_RI))
return (-ENOMEM);
return (ENOMEM);
#endif
t3_register_cpl_handler(CPL_ASYNC_NOTIF, cxio_hal_ev_handler);
return 0;
t3_register_cpl_handler(sc, CPL_ASYNC_NOTIF, cxio_hal_ev_handler);
return (0);
}
void
cxio_hal_exit(void)
cxio_hal_uninit(struct adapter *sc)
{
struct cxio_rdev *rdev, *tmp;
t3_register_cpl_handler(CPL_ASYNC_NOTIF, NULL);
TAILQ_FOREACH_SAFE(rdev, &rdev_list, entry, tmp)
cxio_rdev_close(rdev);
t3_register_cpl_handler(sc, CPL_ASYNC_NOTIF, NULL);
#ifdef needed
cxio_hal_destroy_rhdl_resource();
#endif
@ -1304,11 +1224,12 @@ cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
}
/* incoming SEND with no receive posted failures */
if ((CQE_OPCODE(*hw_cqe) == T3_SEND) && RQ_TYPE(*hw_cqe) &&
if (CQE_OPCODE(*hw_cqe) && RQ_TYPE(*hw_cqe) &&
Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) {
ret = -1;
goto skip_cqe;
}
PANIC_IF((*cqe_flushed == 0) && !SW_CQE(*hw_cqe));
goto proc_cqe;
}
@ -1323,6 +1244,13 @@ cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
* then we complete this with TPT_ERR_MSN and mark the wq in
* error.
*/
if (Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) {
wq->error = 1;
ret = -1;
goto skip_cqe;
}
if (__predict_false((CQE_WRID_MSN(*hw_cqe) != (wq->rq_rptr + 1)))) {
wq->error = 1;
hw_cqe->header |= htonl(V_CQE_STATUS(TPT_ERR_MSN));
@ -1367,13 +1295,17 @@ proc_cqe:
wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe);
CTR2(KTR_IW_CXGB, "%s completing sq idx %ld", __FUNCTION__,
Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2));
*cookie = (wq->sq +
Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2))->wr_id;
*cookie = wq->sq[Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)].wr_id;
wq->sq_rptr++;
} else {
CTR2(KTR_IW_CXGB, "%s completing rq idx %ld", __FUNCTION__,
Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2));
*cookie = *(wq->rq + Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2));
*cookie = wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].wr_id;
if (wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].pbl_addr)
cxio_hal_pblpool_free(wq->rdev,
wq->rq[Q_PTR2IDX(wq->rq_rptr,
wq->rq_size_log2)].pbl_addr, T3_STAG0_PBL_SIZE);
PANIC_IF(Q_EMPTY(wq->rq_rptr, wq->rq_wptr));
wq->rq_rptr++;
}
@ -1404,5 +1336,4 @@ skip_cqe:
}
return ret;
}
#endif

View File

@ -45,7 +45,11 @@ $FreeBSD$
#define T3_MAX_NUM_PD (1<<15)
#define T3_MAX_PBL_SIZE 256
#define T3_MAX_RQ_SIZE 1024
#define T3_MAX_QP_DEPTH (T3_MAX_RQ_SIZE-1)
#define T3_MAX_CQ_DEPTH 65536
#define T3_MAX_NUM_STAG (1<<15)
#define T3_MAX_MR_SIZE 0x100000000ULL
#define T3_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */
#define T3_STAG_UNSET 0xffffffff
@ -55,12 +59,9 @@ struct cxio_hal_ctrl_qp {
u32 wptr;
u32 rptr;
struct mtx lock; /* for the wtpr, can sleep */
#ifdef notyet
DECLARE_PCI_UNMAP_ADDR(mapping)
#endif
union t3_wr *workq; /* the work request queue */
bus_addr_t dma_addr; /* pci bus address of the workq */
void /* __iomem */ *doorbell;
void *doorbell;
};
struct cxio_hal_resource {
@ -85,13 +86,10 @@ struct cxio_ucontext {
};
struct cxio_rdev {
char dev_name[T3_MAX_DEV_NAME_LEN];
struct t3cdev *t3cdev_p;
struct adapter *adap;
struct rdma_info rnic_info;
struct adap_ports port_info;
struct cxio_hal_resource *rscp;
struct cxio_hal_ctrl_qp ctrl_qp;
void *ulp;
unsigned long qpshift;
u32 qpnr;
u32 qpmask;
@ -139,9 +137,8 @@ int cxio_rdev_open(struct cxio_rdev *rdev);
void cxio_rdev_close(struct cxio_rdev *rdev);
int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq,
enum t3_cq_opcode op, u32 credit);
int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq, int kernel);
int cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
int cxio_resize_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
void cxio_release_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx);
void cxio_init_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx);
int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq,
@ -149,27 +146,27 @@ int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq,
int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq,
struct cxio_ucontext *uctx);
int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode);
int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl,
u32 pbl_addr, u32 pbl_size);
int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
u8 page_size, __be64 *pbl, u32 *pbl_size,
u32 *pbl_addr);
u8 page_size, u32 pbl_size, u32 pbl_addr);
int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
u8 page_size, __be64 *pbl, u32 *pbl_size,
u32 *pbl_addr);
u8 page_size, u32 pbl_size, u32 pbl_addr);
int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size,
u32 pbl_addr);
int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid);
int cxio_deallocate_window(struct cxio_rdev *rdev, u32 stag);
int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr);
void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb);
void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb);
int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr,
struct socket *so);
u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp);
void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid);
int cxio_hal_init(void);
int cxio_hal_init(struct adapter *);
void cxio_hal_uninit(struct adapter *);
void cxio_hal_exit(void);
void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count);
void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count);
int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count);
int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count);
void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
void cxio_flush_hw_cq(struct t3_cq *cq);
@ -178,7 +175,7 @@ int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
#define MOD "iw_cxgb: "
#ifdef DEBUG
#ifdef INVARIANTS
void cxio_dump_tpt(struct cxio_rdev *rev, u32 stag);
void cxio_dump_pbl(struct cxio_rdev *rev, u32 pbl_addr, uint32_t len, u8 shift);
void cxio_dump_wqe(union t3_wr *wqe);
@ -187,60 +184,7 @@ void cxio_dump_rqt(struct cxio_rdev *rdev, u32 hwtid, int nents);
void cxio_dump_tcb(struct cxio_rdev *rdev, u32 hwtid);
#endif
static unsigned char hiBitSetTab[] = {
0, 1, 2, 2, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5,
6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6,
7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7
};
static __inline
int ilog2(unsigned long val)
{
unsigned long tmp;
tmp = val >> 24;
if (tmp) {
return hiBitSetTab[tmp] + 23;
}
tmp = (val >> 16) & 0xff;
if (tmp) {
return hiBitSetTab[tmp] + 15;
}
tmp = (val >> 8) & 0xff;
if (tmp) {
return hiBitSetTab[tmp] + 7;
}
return hiBitSetTab[val & 0xff] - 1;
}
#define cxfree(a) free((a), M_DEVBUF);
#define kmalloc(a, b) malloc((a), M_DEVBUF, (b))
#define kzalloc(a, b) malloc((a), M_DEVBUF, (b)|M_ZERO)
static __inline __attribute__((const))
unsigned long roundup_pow_of_two(unsigned long n)
{
return 1UL << flsl(n - 1);
}
#define PAGE_ALIGN(x) roundup2((x), PAGE_SIZE)
#include <sys/blist.h>
struct gen_pool {
@ -259,6 +203,7 @@ gen_pool_create(daddr_t base, u_int chunk_shift, u_int len)
if (gp == NULL)
return (NULL);
memset(gp, 0, sizeof(struct gen_pool));
gp->gen_list = blist_create(len >> chunk_shift, M_NOWAIT);
if (gp->gen_list == NULL) {
free(gp, M_DEVBUF);
@ -323,8 +268,7 @@ gen_pool_destroy(struct gen_pool *gp)
mtx_unlock(lockp); \
__ret; \
})
extern struct cxio_rdev *cxio_hal_find_rdev_by_t3cdev(struct t3cdev *tdev);
#define KTR_IW_CXGB KTR_SPARE4
#define KTR_IW_CXGB KTR_SPARE3
#endif

View File

@ -0,0 +1,22 @@
#ifndef __IB_INTFC_H__
#define __IB_INTFC_H__
/* $FreeBSD$ */
#undef prefetch
#undef WARN_ON
#undef max_t
#undef udelay
#undef le32_to_cpu
#undef le16_to_cpu
#undef cpu_to_le32
#undef swab32
#undef container_of
#undef LIST_HEAD
#define LIST_HEAD(name, type) \
struct name { \
struct type *lh_first; /* first element */ \
}
#endif /* __IB_INTFC_H__ */

View File

@ -29,11 +29,13 @@ POSSIBILITY OF SUCH DAMAGE.
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#ifdef TCP_OFFLOAD
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/module.h>
#include <sys/pciio.h>
#include <sys/conf.h>
#include <machine/bus.h>
@ -59,9 +61,11 @@ __FBSDID("$FreeBSD$");
#include <netinet/in.h>
#include <contrib/rdma/ib_verbs.h>
#include <contrib/rdma/ib_umem.h>
#include <contrib/rdma/ib_user_verbs.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
#include <linux/idr.h>
#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
#include <cxgb_include.h>
#include <ulp/iw_cxgb/iw_cxgb_wr.h>
@ -72,14 +76,24 @@ __FBSDID("$FreeBSD$");
#include <ulp/iw_cxgb/iw_cxgb_resource.h>
#include <ulp/iw_cxgb/iw_cxgb_user.h>
int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
struct iwch_mr *mhp,
int shift,
__be64 *page_list)
static int iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag)
{
u32 stag;
u32 mmid;
mhp->attr.state = 1;
mhp->attr.stag = stag;
mmid = stag >> 8;
mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
CTR3(KTR_IW_CXGB, "%s mmid 0x%x mhp %p", __func__, mmid, mhp);
return insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid);
}
int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
struct iwch_mr *mhp,
int shift)
{
u32 stag;
int ret;
if (cxio_register_phys_mem(&rhp->rdev,
&stag, mhp->attr.pdid,
@ -87,28 +101,24 @@ int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
mhp->attr.zbva,
mhp->attr.va_fbo,
mhp->attr.len,
shift-12,
page_list,
&mhp->attr.pbl_size, &mhp->attr.pbl_addr))
shift - 12,
mhp->attr.pbl_size, mhp->attr.pbl_addr))
return (-ENOMEM);
mhp->attr.state = 1;
mhp->attr.stag = stag;
mmid = stag >> 8;
mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
insert_handle(rhp, &rhp->mmidr, mhp, mmid);
CTR3(KTR_IW_CXGB, "%s mmid 0x%x mhp %p", __FUNCTION__, mmid, mhp);
return 0;
ret = iwch_finish_mem_reg(mhp, stag);
if (ret)
cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
mhp->attr.pbl_addr);
return ret;
}
int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
struct iwch_mr *mhp,
int shift,
__be64 *page_list,
int npages)
{
u32 stag;
u32 mmid;
int ret;
/* We could support this... */
if (npages > mhp->attr.pbl_size)
@ -121,17 +131,40 @@ int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
mhp->attr.zbva,
mhp->attr.va_fbo,
mhp->attr.len,
shift-12,
page_list,
&mhp->attr.pbl_size, &mhp->attr.pbl_addr))
shift - 12,
mhp->attr.pbl_size, mhp->attr.pbl_addr))
return (-ENOMEM);
mhp->attr.state = 1;
mhp->attr.stag = stag;
mmid = stag >> 8;
mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
insert_handle(rhp, &rhp->mmidr, mhp, mmid);
CTR3(KTR_IW_CXGB, "%s mmid 0x%x mhp %p", __FUNCTION__, mmid, mhp);
ret = iwch_finish_mem_reg(mhp, stag);
if (ret)
cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
mhp->attr.pbl_addr);
return ret;
}
int iwch_alloc_pbl(struct iwch_mr *mhp, int npages)
{
mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev,
npages << 3);
if (!mhp->attr.pbl_addr)
return -ENOMEM;
mhp->attr.pbl_size = npages;
return 0;
}
void iwch_free_pbl(struct iwch_mr *mhp)
{
cxio_hal_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
mhp->attr.pbl_size << 3);
}
int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset)
{
return cxio_write_pbl(&mhp->rhp->rdev, pages,
mhp->attr.pbl_addr + (offset << 3), npages);
}
int build_phys_page_list(struct ib_phys_buf *buffer_list,
@ -204,3 +237,4 @@ int build_phys_page_list(struct ib_phys_buf *buffer_list,
return 0;
}
#endif

View File

@ -29,11 +29,13 @@ POSSIBILITY OF SUCH DAMAGE.
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#ifdef TCP_OFFLOAD
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/module.h>
#include <sys/pciio.h>
#include <sys/conf.h>
#include <machine/bus.h>
@ -62,9 +64,12 @@ __FBSDID("$FreeBSD$");
#include <vm/vm.h>
#include <vm/pmap.h>
#include <contrib/rdma/ib_verbs.h>
#include <contrib/rdma/ib_umem.h>
#include <contrib/rdma/ib_user_verbs.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
#include <linux/idr.h>
#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
#include <cxgb_include.h>
#include <ulp/iw_cxgb/iw_cxgb_wr.h>
@ -180,6 +185,8 @@ iwch_create_cq(struct ib_device *ibdev, int entries, int vector,
struct iwch_create_cq_resp uresp;
struct iwch_create_cq_req ureq;
struct iwch_ucontext *ucontext = NULL;
static int warned;
size_t resplen;
CTR3(KTR_IW_CXGB, "%s ib_dev %p entries %d", __FUNCTION__, ibdev, entries);
rhp = to_iwch_dev(ibdev);
@ -214,7 +221,7 @@ iwch_create_cq(struct ib_device *ibdev, int entries, int vector,
entries = roundup_pow_of_two(entries);
chp->cq.size_log2 = ilog2(entries);
if (cxio_create_cq(&rhp->rdev, &chp->cq)) {
if (cxio_create_cq(&rhp->rdev, &chp->cq, !ucontext)) {
cxfree(chp);
return ERR_PTR(-ENOMEM);
}
@ -222,7 +229,11 @@ iwch_create_cq(struct ib_device *ibdev, int entries, int vector,
chp->ibcq.cqe = 1 << chp->cq.size_log2;
mtx_init(&chp->lock, "cxgb cq", NULL, MTX_DEF|MTX_DUPOK);
chp->refcnt = 1;
insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid);
if (insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid)) {
cxio_destroy_cq(&chp->rhp->rdev, &chp->cq);
cxfree(chp);
return ERR_PTR(-ENOMEM);
}
if (ucontext) {
struct iwch_mm_entry *mm;
@ -238,15 +249,27 @@ iwch_create_cq(struct ib_device *ibdev, int entries, int vector,
uresp.key = ucontext->key;
ucontext->key += PAGE_SIZE;
mtx_unlock(&ucontext->mmap_lock);
if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) {
mm->key = uresp.key;
mm->addr = vtophys(chp->cq.queue);
if (udata->outlen < sizeof uresp) {
if (!warned++)
CTR1(KTR_IW_CXGB, "%s Warning - "
"downlevel libcxgb3 (non-fatal).\n",
__func__);
mm->len = PAGE_ALIGN((1UL << uresp.size_log2) *
sizeof(struct t3_cqe));
resplen = sizeof(struct iwch_create_cq_resp_v0);
} else {
mm->len = PAGE_ALIGN(((1UL << uresp.size_log2) + 1) *
sizeof(struct t3_cqe));
uresp.memsize = mm->len;
resplen = sizeof uresp;
}
if (ib_copy_to_udata(udata, &uresp, resplen)) {
cxfree(mm);
iwch_destroy_cq(&chp->ibcq);
return ERR_PTR(-EFAULT);
}
mm->key = uresp.key;
mm->addr = vtophys(chp->cq.queue);
mm->len = PAGE_ALIGN((1UL << uresp.size_log2) *
sizeof (struct t3_cqe));
insert_mmap(ucontext, mm);
}
CTR4(KTR_IW_CXGB, "created cqid 0x%0x chp %p size 0x%0x, dma_addr 0x%0llx",
@ -256,72 +279,11 @@ iwch_create_cq(struct ib_device *ibdev, int entries, int vector,
}
static int
iwch_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
iwch_resize_cq(struct ib_cq *cq __unused, int cqe __unused,
struct ib_udata *udata __unused)
{
#ifdef notyet
struct iwch_cq *chp = to_iwch_cq(cq);
struct t3_cq oldcq, newcq;
int ret;
CTR3(KTR_IW_CXGB, "%s ib_cq %p cqe %d", __FUNCTION__, cq, cqe);
/* We don't downsize... */
if (cqe <= cq->cqe)
return 0;
/* create new t3_cq with new size */
cqe = roundup_pow_of_two(cqe+1);
newcq.size_log2 = ilog2(cqe);
/* Dont allow resize to less than the current wce count */
if (cqe < Q_COUNT(chp->cq.rptr, chp->cq.wptr)) {
return (-ENOMEM);
}
/* Quiesce all QPs using this CQ */
ret = iwch_quiesce_qps(chp);
if (ret) {
return (ret);
}
ret = cxio_create_cq(&chp->rhp->rdev, &newcq);
if (ret) {
return (ret);
}
/* copy CQEs */
memcpy(newcq.queue, chp->cq.queue, (1 << chp->cq.size_log2) *
sizeof(struct t3_cqe));
/* old iwch_qp gets new t3_cq but keeps old cqid */
oldcq = chp->cq;
chp->cq = newcq;
chp->cq.cqid = oldcq.cqid;
/* resize new t3_cq to update the HW context */
ret = cxio_resize_cq(&chp->rhp->rdev, &chp->cq);
if (ret) {
chp->cq = oldcq;
return ret;
}
chp->ibcq.cqe = (1<<chp->cq.size_log2) - 1;
/* destroy old t3_cq */
oldcq.cqid = newcq.cqid;
ret = cxio_destroy_cq(&chp->rhp->rdev, &oldcq);
if (ret) {
log(LOG_ERR, "%s - cxio_destroy_cq failed %d\n",
__FUNCTION__, ret);
}
/* add user hooks here */
/* resume qps */
ret = iwch_resume_qps(chp);
return ret;
#else
return (-ENOSYS);
#endif
}
static int
@ -357,67 +319,12 @@ iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
return err;
}
#ifdef notyet
static int
iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
iwch_mmap(struct ib_ucontext *context __unused, struct vm_area_struct *vma __unused)
{
#ifdef notyet
int len = vma->vm_end - vma->vm_start;
u32 key = vma->vm_pgoff << PAGE_SHIFT;
struct cxio_rdev *rdev_p;
int ret = 0;
struct iwch_mm_entry *mm;
struct iwch_ucontext *ucontext;
u64 addr;
CTR4(KTR_IW_CXGB, "%s pgoff 0x%lx key 0x%x len %d", __FUNCTION__, vma->vm_pgoff,
key, len);
if (vma->vm_start & (PAGE_SIZE-1)) {
return (-EINVAL);
}
rdev_p = &(to_iwch_dev(context->device)->rdev);
ucontext = to_iwch_ucontext(context);
mm = remove_mmap(ucontext, key, len);
if (!mm)
return (-EINVAL);
addr = mm->addr;
cxfree(mm);
if ((addr >= rdev_p->rnic_info.udbell_physbase) &&
(addr < (rdev_p->rnic_info.udbell_physbase +
rdev_p->rnic_info.udbell_len))) {
/*
* Map T3 DB register.
*/
if (vma->vm_flags & VM_READ) {
return (-EPERM);
}
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
vma->vm_flags &= ~VM_MAYREAD;
ret = io_remap_pfn_range(vma, vma->vm_start,
addr >> PAGE_SHIFT,
len, vma->vm_page_prot);
} else {
/*
* Map WQ or CQ contig dma memory...
*/
ret = remap_pfn_range(vma, vma->vm_start,
addr >> PAGE_SHIFT,
len, vma->vm_page_prot);
}
return ret;
#endif
return (0);
return (-ENOSYS);
}
#endif
static int iwch_deallocate_pd(struct ib_pd *pd)
{
@ -470,7 +377,7 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
CTR2(KTR_IW_CXGB, "%s ib_mr %p", __FUNCTION__, ib_mr);
/* There can be no memory windows */
if (atomic_load_acq_int(&ib_mr->usecnt))
if (atomic_load_acq_int(&ib_mr->usecnt.counter))
return (-EINVAL);
mhp = to_iwch_mr(ib_mr);
@ -478,6 +385,7 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
mmid = mhp->attr.stag >> 8;
cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
mhp->attr.pbl_addr);
iwch_free_pbl(mhp);
remove_handle(rhp, &rhp->mmidr, mmid);
if (mhp->kva)
cxfree((void *) (unsigned long) mhp->kva);
@ -511,6 +419,8 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
if (!mhp)
return ERR_PTR(-ENOMEM);
mhp->rhp = rhp;
/* First check that we have enough alignment */
if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) {
ret = -EINVAL;
@ -528,7 +438,17 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
if (ret)
goto err;
mhp->rhp = rhp;
ret = iwch_alloc_pbl(mhp, npages);
if (ret) {
cxfree(page_list);
goto err_pbl;
}
ret = iwch_write_pbl(mhp, page_list, npages, 0);
cxfree(page_list);
if (ret)
goto err;
mhp->attr.pdid = php->pdid;
mhp->attr.zbva = 0;
@ -538,15 +458,18 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
mhp->attr.len = (u32) total_size;
mhp->attr.pbl_size = npages;
ret = iwch_register_mem(rhp, php, mhp, shift, page_list);
cxfree(page_list);
if (ret) {
goto err;
}
ret = iwch_register_mem(rhp, php, mhp, shift);
if (ret)
goto err_pbl;
return &mhp->ibmr;
err_pbl:
iwch_free_pbl(mhp);
err:
cxfree(mhp);
return ERR_PTR(-ret);
return ERR_PTR(ret);
}
@ -570,7 +493,7 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr,
CTR3(KTR_IW_CXGB, "%s ib_mr %p ib_pd %p", __FUNCTION__, mr, pd);
/* There can be no memory windows */
if (atomic_load_acq_int(&mr->usecnt))
if (atomic_load_acq_int(&mr->usecnt.counter))
return (-EINVAL);
mhp = to_iwch_mr(mr);
@ -596,7 +519,7 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr,
return ret;
}
ret = iwch_reregister_mem(rhp, php, &mh, shift, page_list, npages);
ret = iwch_reregister_mem(rhp, php, &mh, shift, npages);
cxfree(page_list);
if (ret) {
return ret;
@ -640,7 +563,9 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (!mhp)
return ERR_PTR(-ENOMEM);
mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc);
mhp->rhp = rhp;
mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
if (IS_ERR(mhp->umem)) {
err = PTR_ERR(mhp->umem);
cxfree(mhp);
@ -650,18 +575,22 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
shift = ffs(mhp->umem->page_size) - 1;
n = 0;
TAILQ_FOREACH(chunk, &mhp->umem->chunk_list, entry)
list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
n += chunk->nents;
pages = kmalloc(n * sizeof(u64), M_NOWAIT);
err = iwch_alloc_pbl(mhp, n);
if (err)
goto err;
pages = (__be64 *) kmalloc(n * sizeof(u64), M_NOWAIT);
if (!pages) {
err = -ENOMEM;
goto err;
goto err_pbl;
}
i = n = 0;
#if 0
#ifdef notyet
TAILQ_FOREACH(chunk, &mhp->umem->chunk_list, entry)
for (j = 0; j < chunk->nmap; ++j) {
len = sg_dma_len(&chunk->page_list[j]) >> shift;
@ -669,21 +598,36 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
pages[i++] = htobe64(sg_dma_address(
&chunk->page_list[j]) +
mhp->umem->page_size * k);
if (i == PAGE_SIZE / sizeof *pages) {
err = iwch_write_pbl(mhp, pages, i, n);
if (err)
goto pbl_done;
n += i;
i = 0;
}
}
}
#endif
mhp->rhp = rhp;
if (i)
err = iwch_write_pbl(mhp, pages, i, n);
#ifdef notyet
pbl_done:
#endif
cxfree(pages);
if (err)
goto err_pbl;
mhp->attr.pdid = php->pdid;
mhp->attr.zbva = 0;
mhp->attr.perms = iwch_ib_to_tpt_access(acc);
mhp->attr.va_fbo = virt;
mhp->attr.page_size = shift - 12;
mhp->attr.len = (u32) length;
mhp->attr.pbl_size = i;
err = iwch_register_mem(rhp, php, mhp, shift, pages);
cxfree(pages);
err = iwch_register_mem(rhp, php, mhp, shift);
if (err)
goto err;
goto err_pbl;
if (udata && !t3a_device(rhp)) {
uresp.pbl_addr = (mhp->attr.pbl_addr -
@ -700,6 +644,9 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return &mhp->ibmr;
err_pbl:
iwch_free_pbl(mhp);
err:
ib_umem_release(mhp->umem);
cxfree(mhp);
@ -748,7 +695,12 @@ static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd)
mhp->attr.type = TPT_MW;
mhp->attr.stag = stag;
mmid = (stag) >> 8;
insert_handle(rhp, &rhp->mmidr, mhp, mmid);
mhp->ibmw.rkey = stag;
if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) {
cxio_deallocate_window(&rhp->rdev, mhp->attr.stag);
cxfree(mhp);
return ERR_PTR(-ENOMEM);
}
CTR4(KTR_IW_CXGB, "%s mmid 0x%x mhp %p stag 0x%x", __FUNCTION__, mmid, mhp, stag);
return &(mhp->ibmw);
}
@ -893,7 +845,13 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
mtx_init(&qhp->lock, "cxgb qp", NULL, MTX_DEF|MTX_DUPOK);
qhp->refcnt = 1;
insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.qpid);
if (insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.qpid)) {
cxio_destroy_qp(&rhp->rdev, &qhp->wq,
ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
cxfree(qhp);
return ERR_PTR(-ENOMEM);
}
if (udata) {
@ -1023,12 +981,14 @@ static int iwch_query_gid(struct ib_device *ibdev, u8 port,
{
struct iwch_dev *dev;
struct port_info *pi;
struct adapter *sc;
CTR5(KTR_IW_CXGB, "%s ibdev %p, port %d, index %d, gid %p",
__FUNCTION__, ibdev, port, index, gid);
dev = to_iwch_dev(ibdev);
sc = dev->rdev.adap;
PANIC_IF(port == 0 || port > 2);
pi = ((struct port_info *)dev->rdev.port_info.lldevs[port-1]->if_softc);
pi = &sc->port[port - 1];
memset(&(gid->raw[0]), 0, sizeof(gid->raw));
memcpy(&(gid->raw[0]), pi->hw_addr, 6);
return 0;
@ -1037,21 +997,20 @@ static int iwch_query_gid(struct ib_device *ibdev, u8 port,
static int iwch_query_device(struct ib_device *ibdev,
struct ib_device_attr *props)
{
struct iwch_dev *dev;
struct adapter *sc;
CTR2(KTR_IW_CXGB, "%s ibdev %p", __FUNCTION__, ibdev);
dev = to_iwch_dev(ibdev);
sc = dev->rdev.adap;
memset(props, 0, sizeof *props);
#ifdef notyet
memcpy(&props->sys_image_guid, dev->rdev.t3cdev_p->lldev->if_addr.ifa_addr, 6);
#endif
memcpy(&props->sys_image_guid, sc->port[0].hw_addr, 6);
props->device_cap_flags = dev->device_cap_flags;
#ifdef notyet
props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor;
props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device;
#endif
props->max_mr_size = ~0ull;
props->page_size_cap = dev->attr.mem_pgsizes_bitmask;
props->vendor_id = pci_get_vendor(sc->dev);
props->vendor_part_id = pci_get_device(sc->dev);
props->max_mr_size = dev->attr.max_mr_size;
props->max_qp = dev->attr.max_qps;
props->max_qp_wr = dev->attr.max_wrs;
props->max_sge = dev->attr.max_sge_per_wr;
@ -1071,13 +1030,10 @@ static int iwch_query_port(struct ib_device *ibdev,
u8 port, struct ib_port_attr *props)
{
CTR2(KTR_IW_CXGB, "%s ibdev %p", __FUNCTION__, ibdev);
memset(props, 0, sizeof(struct ib_port_attr));
props->max_mtu = IB_MTU_4096;
props->lid = 0;
props->lmc = 0;
props->sm_lid = 0;
props->sm_sl = 0;
props->active_mtu = IB_MTU_2048;
props->state = IB_PORT_ACTIVE;
props->phys_state = 0;
props->port_cap_flags =
IB_PORT_CM_SUP |
IB_PORT_SNMP_TUNNEL_SUP |
@ -1086,7 +1042,6 @@ static int iwch_query_port(struct ib_device *ibdev,
IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
props->gid_tbl_len = 1;
props->pkey_tbl_len = 1;
props->qkey_viol_cntr = 0;
props->active_width = 2;
props->active_speed = 2;
props->max_msg_sz = -1;
@ -1094,80 +1049,18 @@ static int iwch_query_port(struct ib_device *ibdev,
return 0;
}
#ifdef notyet
static ssize_t show_rev(struct class_device *cdev, char *buf)
{
struct iwch_dev *dev = container_of(cdev, struct iwch_dev,
ibdev.class_dev);
CTR2(KTR_IW_CXGB, "%s class dev 0x%p", __FUNCTION__, cdev);
return sprintf(buf, "%d\n", dev->rdev.t3cdev_p->type);
}
static ssize_t show_fw_ver(struct class_device *cdev, char *buf)
{
struct iwch_dev *dev = container_of(cdev, struct iwch_dev,
ibdev.class_dev);
struct ethtool_drvinfo info;
struct net_device *lldev = dev->rdev.t3cdev_p->lldev;
CTR2(KTR_IW_CXGB, "%s class dev 0x%p", __FUNCTION__, cdev);
lldev->ethtool_ops->get_drvinfo(lldev, &info);
return sprintf(buf, "%s\n", info.fw_version);
}
static ssize_t show_hca(struct class_device *cdev, char *buf)
{
struct iwch_dev *dev = container_of(cdev, struct iwch_dev,
ibdev.class_dev);
struct ethtool_drvinfo info;
struct net_device *lldev = dev->rdev.t3cdev_p->lldev;
CTR2(KTR_IW_CXGB, "%s class dev 0x%p", __FUNCTION__, cdev);
lldev->ethtool_ops->get_drvinfo(lldev, &info);
return sprintf(buf, "%s\n", info.driver);
}
static ssize_t show_board(struct class_device *cdev, char *buf)
{
struct iwch_dev *dev = container_of(cdev, struct iwch_dev,
ibdev.class_dev);
CTR2(KTR_IW_CXGB, "%s class dev 0x%p", __FUNCTION__, dev);
#ifdef notyet
return sprintf(buf, "%x.%x\n", dev->rdev.rnic_info.pdev->vendor,
dev->rdev.rnic_info.pdev->device);
#else
return sprintf(buf, "%x.%x\n", 0xdead, 0xbeef); /* XXX */
#endif
}
static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
static CLASS_DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
static struct class_device_attribute *iwch_class_attributes[] = {
&class_device_attr_hw_rev,
&class_device_attr_fw_ver,
&class_device_attr_hca_type,
&class_device_attr_board_id
};
#endif
int iwch_register_device(struct iwch_dev *dev)
{
int ret;
#ifdef notyet
int i;
#endif
struct adapter *sc = dev->rdev.adap;
CTR2(KTR_IW_CXGB, "%s iwch_dev %p", __FUNCTION__, dev);
strlcpy(dev->ibdev.name, "cxgb3_%d", IB_DEVICE_NAME_MAX);
memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
#ifdef notyet
memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
#endif
memcpy(&dev->ibdev.node_guid, sc->port[0].hw_addr, 6);
dev->device_cap_flags =
(IB_DEVICE_ZERO_STAG |
IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW);
(IB_DEVICE_LOCAL_DMA_LKEY |
IB_DEVICE_MEM_WINDOW);
dev->ibdev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
@ -1189,9 +1082,9 @@ int iwch_register_device(struct iwch_dev *dev)
(1ull << IB_USER_VERBS_CMD_POST_RECV);
dev->ibdev.node_type = RDMA_NODE_RNIC;
memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC));
dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports;
dev->ibdev.phys_port_cnt = sc->params.nports;
dev->ibdev.num_comp_vectors = 1;
dev->ibdev.dma_device = dev->rdev.rnic_info.pdev;
dev->ibdev.dma_device = dev->rdev.adap->dev;
dev->ibdev.query_device = iwch_query_device;
dev->ibdev.query_port = iwch_query_port;
dev->ibdev.modify_port = iwch_modify_port;
@ -1199,9 +1092,7 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.query_gid = iwch_query_gid;
dev->ibdev.alloc_ucontext = iwch_alloc_ucontext;
dev->ibdev.dealloc_ucontext = iwch_dealloc_ucontext;
#ifdef notyet
dev->ibdev.mmap = iwch_mmap;
#endif
dev->ibdev.alloc_pd = iwch_allocate_pd;
dev->ibdev.dealloc_pd = iwch_deallocate_pd;
dev->ibdev.create_ah = iwch_ah_create;
@ -1229,11 +1120,13 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.req_notify_cq = iwch_arm_cq;
dev->ibdev.post_send = iwch_post_send;
dev->ibdev.post_recv = iwch_post_receive;
dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION;
dev->ibdev.iwcm =
(struct iw_cm_verbs *) kmalloc(sizeof(struct iw_cm_verbs),
M_NOWAIT);
kmalloc(sizeof(struct iw_cm_verbs), M_NOWAIT);
if (!dev->ibdev.iwcm)
return (ENOMEM);
dev->ibdev.iwcm->connect = iwch_connect;
dev->ibdev.iwcm->accept = iwch_accept_cr;
dev->ibdev.iwcm->reject = iwch_reject_cr;
@ -1246,35 +1139,19 @@ int iwch_register_device(struct iwch_dev *dev)
ret = ib_register_device(&dev->ibdev);
if (ret)
goto bail1;
#ifdef notyet
for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i) {
ret = class_device_create_file(&dev->ibdev.class_dev,
iwch_class_attributes[i]);
if (ret) {
goto bail2;
}
}
#endif
return 0;
#ifdef notyet
bail2:
#endif
ib_unregister_device(&dev->ibdev);
return (0);
bail1:
return ret;
cxfree(dev->ibdev.iwcm);
return (ret);
}
void iwch_unregister_device(struct iwch_dev *dev)
{
#ifdef notyet
int i;
CTR2(KTR_IW_CXGB, "%s iwch_dev %p", __FUNCTION__, dev);
for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i)
class_device_remove_file(&dev->ibdev.class_dev,
iwch_class_attributes[i]);
#endif
ib_unregister_device(&dev->ibdev);
cxfree(dev->ibdev.iwcm);
return;
}
#endif

View File

@ -31,7 +31,7 @@ $FreeBSD$
#ifndef __IWCH_PROVIDER_H__
#define __IWCH_PROVIDER_H__
#include <contrib/rdma/ib_verbs.h>
#include <rdma/ib_verbs.h>
struct iwch_pd {
struct ib_pd ibpd;
@ -116,6 +116,7 @@ enum IWCH_QP_FLAGS {
};
struct iwch_mpa_attributes {
u8 initiator;
u8 recv_marker_enabled;
u8 xmit_marker_enabled; /* iWARP: enable inbound Read Resp. */
u8 crc_enabled;
@ -336,18 +337,17 @@ int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg);
int iwch_register_device(struct iwch_dev *dev);
void iwch_unregister_device(struct iwch_dev *dev);
int iwch_quiesce_qps(struct iwch_cq *chp);
int iwch_resume_qps(struct iwch_cq *chp);
void stop_read_rep_timer(struct iwch_qp *qhp);
int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
struct iwch_mr *mhp,
int shift,
__be64 *page_list);
int shift);
int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
struct iwch_mr *mhp,
int shift,
__be64 *page_list,
int npages);
int iwch_alloc_pbl(struct iwch_mr *mhp, int npages);
void iwch_free_pbl(struct iwch_mr *mhp);
int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset);
int build_phys_page_list(struct ib_phys_buf *buffer_list,
int num_phys_buf,
u64 *iova_start,

View File

@ -1,4 +1,3 @@
/**************************************************************************
Copyright (c) 2007, Chelsio Inc.
@ -30,11 +29,13 @@ POSSIBILITY OF SUCH DAMAGE.
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#ifdef TCP_OFFLOAD
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/module.h>
#include <sys/pciio.h>
#include <sys/conf.h>
#include <machine/bus.h>
@ -48,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include <sys/linker.h>
#include <sys/firmware.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sockio.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
@ -57,14 +59,26 @@ __FBSDID("$FreeBSD$");
#include <sys/proc.h>
#include <sys/queue.h>
#include <net/route.h>
#include <netinet/in_systm.h>
#include <netinet/in.h>
#include <netinet/toecore.h>
#include <netinet/in_pcb.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp.h>
#include <netinet/tcpip.h>
#include <contrib/rdma/ib_verbs.h>
#include <contrib/rdma/ib_umem.h>
#include <contrib/rdma/ib_user_verbs.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
#include <linux/idr.h>
#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
#include <cxgb_include.h>
#include <ulp/tom/cxgb_l2t.h>
#include <ulp/tom/cxgb_toepcb.h>
#include <ulp/iw_cxgb/iw_cxgb_wr.h>
#include <ulp/iw_cxgb/iw_cxgb_hal.h>
#include <ulp/iw_cxgb/iw_cxgb_provider.h>
@ -75,7 +89,7 @@ __FBSDID("$FreeBSD$");
#define NO_SUPPORT -1
static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
static int build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
u8 * flit_cnt)
{
int i;
@ -83,59 +97,46 @@ static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
switch (wr->opcode) {
case IB_WR_SEND:
case IB_WR_SEND_WITH_IMM:
if (wr->send_flags & IB_SEND_SOLICITED)
wqe->send.rdmaop = T3_SEND_WITH_SE;
else
wqe->send.rdmaop = T3_SEND;
wqe->send.rem_stag = 0;
break;
#if 0 /* Not currently supported */
case TYPE_SEND_INVALIDATE:
case TYPE_SEND_INVALIDATE_IMMEDIATE:
wqe->send.rdmaop = T3_SEND_WITH_INV;
wqe->send.rem_stag = htobe32(wr->wr.rdma.rkey);
case IB_WR_SEND_WITH_IMM:
if (wr->send_flags & IB_SEND_SOLICITED)
wqe->send.rdmaop = T3_SEND_WITH_SE_INV;
else
wqe->send.rdmaop = T3_SEND_WITH_INV;
wqe->send.rem_stag = 0;
break;
case TYPE_SEND_SE_INVALIDATE:
wqe->send.rdmaop = T3_SEND_WITH_SE_INV;
wqe->send.rem_stag = htobe32(wr->wr.rdma.rkey);
break;
#endif
default:
break;
return -EINVAL;
}
if (wr->num_sge > T3_MAX_SGE)
return (-EINVAL);
wqe->send.reserved[0] = 0;
wqe->send.reserved[1] = 0;
wqe->send.reserved[2] = 0;
if (wr->opcode == IB_WR_SEND_WITH_IMM) {
plen = 4;
wqe->send.sgl[0].stag = wr->imm_data;
wqe->send.sgl[0].len = 0;
wqe->send.num_sgle = 0;
*flit_cnt = 5;
} else {
plen = 0;
for (i = 0; i < wr->num_sge; i++) {
if ((plen + wr->sg_list[i].length) < plen) {
return (-EMSGSIZE);
}
plen += wr->sg_list[i].length;
wqe->send.sgl[i].stag =
htobe32(wr->sg_list[i].lkey);
wqe->send.sgl[i].len =
htobe32(wr->sg_list[i].length);
wqe->send.sgl[i].to = htobe64(wr->sg_list[i].addr);
plen = 0;
for (i = 0; i < wr->num_sge; i++) {
if ((plen + wr->sg_list[i].length) < plen) {
return (-EMSGSIZE);
}
wqe->send.num_sgle = htobe32(wr->num_sge);
*flit_cnt = 4 + ((wr->num_sge) << 1);
plen += wr->sg_list[i].length;
wqe->send.sgl[i].stag =
htobe32(wr->sg_list[i].lkey);
wqe->send.sgl[i].len =
htobe32(wr->sg_list[i].length);
wqe->send.sgl[i].to = htobe64(wr->sg_list[i].addr);
}
wqe->send.num_sgle = htobe32(wr->num_sge);
*flit_cnt = 4 + ((wr->num_sge) << 1);
wqe->send.plen = htobe32(plen);
return 0;
}
static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
u8 *flit_cnt)
{
int i;
@ -152,7 +153,7 @@ static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
plen = 4;
wqe->write.sgl[0].stag = wr->imm_data;
wqe->write.sgl[0].stag = wr->ex.imm_data;
wqe->write.sgl[0].len = 0;
wqe->write.num_sgle = 0;
*flit_cnt = 6;
@ -177,7 +178,7 @@ static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
return 0;
}
static int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
u8 *flit_cnt)
{
if (wr->num_sge > 1)
@ -195,15 +196,12 @@ static int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
return 0;
}
/*
* TBD: this is going to be moved to firmware. Missing pdid/qpid check for now.
*/
static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list,
u32 num_sgle, u32 * pbl_addr, u8 * page_size)
{
int i;
struct iwch_mr *mhp;
u32 offset;
u64 offset;
for (i = 0; i < num_sgle; i++) {
mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8);
@ -235,8 +233,8 @@ static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list,
return (-EINVAL);
}
offset = sg_list[i].addr - mhp->attr.va_fbo;
offset += ((u32) mhp->attr.va_fbo) %
(1UL << (12 + mhp->attr.page_size));
offset += mhp->attr.va_fbo &
((1UL << (12 + mhp->attr.page_size)) - 1);
pbl_addr[i] = ((mhp->attr.pbl_addr -
rhp->rdev.rnic_info.pbl_base) >> 3) +
(offset >> (12 + mhp->attr.page_size));
@ -245,26 +243,113 @@ static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list,
return 0;
}
static int iwch_build_rdma_recv(struct iwch_dev *rhp, union t3_wr *wqe,
static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe,
struct ib_recv_wr *wr)
{
int i;
if (wr->num_sge > T3_MAX_SGE)
int i, err = 0;
u32 pbl_addr[T3_MAX_SGE];
u8 page_size[T3_MAX_SGE];
if (wr->num_sge > T3_MAX_SGE)
return (-EINVAL);
err = iwch_sgl2pbl_map(qhp->rhp, wr->sg_list, wr->num_sge, pbl_addr,
page_size);
if (err)
return err;
wqe->recv.pagesz[0] = page_size[0];
wqe->recv.pagesz[1] = page_size[1];
wqe->recv.pagesz[2] = page_size[2];
wqe->recv.pagesz[3] = page_size[3];
wqe->recv.num_sgle = htobe32(wr->num_sge);
for (i = 0; i < wr->num_sge; i++) {
wqe->recv.sgl[i].stag = htobe32(wr->sg_list[i].lkey);
wqe->recv.sgl[i].len = htobe32(wr->sg_list[i].length);
wqe->recv.sgl[i].to = htobe64(wr->sg_list[i].addr);
wqe->recv.sgl[i].to = htobe64(((u32)wr->sg_list[i].addr) &
((1UL << (12 + page_size[i])) - 1));
/* pbl_addr is the adapters address in the PBL */
wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]);
}
for (; i < T3_MAX_SGE; i++) {
wqe->recv.sgl[i].stag = 0;
wqe->recv.sgl[i].len = 0;
wqe->recv.sgl[i].to = 0;
wqe->recv.pbl_addr[i] = 0;
}
qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
qhp->wq.rq_size_log2)].wr_id = wr->wr_id;
qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
qhp->wq.rq_size_log2)].pbl_addr = 0;
return 0;
}
static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe,
struct ib_recv_wr *wr)
{
int i;
u32 pbl_addr;
u32 pbl_offset;
/*
* The T3 HW requires the PBL in the HW recv descriptor to reference
* a PBL entry. So we allocate the max needed PBL memory here and pass
* it to the uP in the recv WR. The uP will build the PBL and setup
* the HW recv descriptor.
*/
pbl_addr = cxio_hal_pblpool_alloc(&qhp->rhp->rdev, T3_STAG0_PBL_SIZE);
if (!pbl_addr)
return -ENOMEM;
/*
* Compute the 8B aligned offset.
*/
pbl_offset = (pbl_addr - qhp->rhp->rdev.rnic_info.pbl_base) >> 3;
wqe->recv.num_sgle = cpu_to_be32(wr->num_sge);
for (i = 0; i < wr->num_sge; i++) {
/*
* Use a 128MB page size. This and an imposed 128MB
* sge length limit allows us to require only a 2-entry HW
* PBL for each SGE. This restriction is acceptable since
* since it is not possible to allocate 128MB of contiguous
* DMA coherent memory!
*/
if (wr->sg_list[i].length > T3_STAG0_MAX_PBE_LEN)
return -EINVAL;
wqe->recv.pagesz[i] = T3_STAG0_PAGE_SHIFT;
/*
* T3 restricts a recv to all zero-stag or all non-zero-stag.
*/
if (wr->sg_list[i].lkey != 0)
return -EINVAL;
wqe->recv.sgl[i].stag = 0;
wqe->recv.sgl[i].len = htobe32(wr->sg_list[i].length);
wqe->recv.sgl[i].to = htobe64(wr->sg_list[i].addr);
wqe->recv.pbl_addr[i] = htobe32(pbl_offset);
pbl_offset += 2;
}
for (; i < T3_MAX_SGE; i++) {
wqe->recv.pagesz[i] = 0;
wqe->recv.sgl[i].stag = 0;
wqe->recv.sgl[i].len = 0;
wqe->recv.sgl[i].to = 0;
wqe->recv.pbl_addr[i] = 0;
}
qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
qhp->wq.rq_size_log2)].wr_id = wr->wr_id;
qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
qhp->wq.rq_size_log2)].pbl_addr = pbl_addr;
return 0;
}
int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr)
{
@ -282,18 +367,19 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
mtx_lock(&qhp->lock);
if (qhp->attr.state > IWCH_QP_STATE_RTS) {
mtx_unlock(&qhp->lock);
return (-EINVAL);
err = -EINVAL;
goto out;
}
num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
qhp->wq.sq_size_log2);
if (num_wrs <= 0) {
if (num_wrs == 0) {
mtx_unlock(&qhp->lock);
return (-ENOMEM);
err = -EINVAL;
goto out;
}
while (wr) {
if (num_wrs == 0) {
err = -ENOMEM;
*bad_wr = wr;
break;
}
idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
@ -311,17 +397,17 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
case IB_WR_SEND:
case IB_WR_SEND_WITH_IMM:
t3_wr_opcode = T3_WR_SEND;
err = iwch_build_rdma_send(wqe, wr, &t3_wr_flit_cnt);
err = build_rdma_send(wqe, wr, &t3_wr_flit_cnt);
break;
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
t3_wr_opcode = T3_WR_WRITE;
err = iwch_build_rdma_write(wqe, wr, &t3_wr_flit_cnt);
err = build_rdma_write(wqe, wr, &t3_wr_flit_cnt);
break;
case IB_WR_RDMA_READ:
t3_wr_opcode = T3_WR_READ;
t3_wr_flags = 0; /* T3 reads are always signaled */
err = iwch_build_rdma_read(wqe, wr, &t3_wr_flit_cnt);
err = build_rdma_read(wqe, wr, &t3_wr_flit_cnt);
if (err)
break;
sqp->read_len = wqe->read.local_len;
@ -333,10 +419,9 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
wr->opcode);
err = -EINVAL;
}
if (err) {
*bad_wr = wr;
if (err)
break;
}
wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
sqp->wr_id = wr->wr_id;
sqp->opcode = wr2opcode(t3_wr_opcode);
@ -358,6 +443,9 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
mtx_unlock(&qhp->lock);
ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
out:
if (err)
*bad_wr = wr;
return err;
}
@ -374,27 +462,35 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
mtx_lock(&qhp->lock);
if (qhp->attr.state > IWCH_QP_STATE_RTS) {
mtx_unlock(&qhp->lock);
return (-EINVAL);
err = -EINVAL;
goto out;
}
num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr,
qhp->wq.rq_size_log2) - 1;
if (!wr) {
mtx_unlock(&qhp->lock);
return (-EINVAL);
err = -EINVAL;
goto out;
}
while (wr) {
if (wr->num_sge > T3_MAX_SGE) {
err = -EINVAL;
break;
}
idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
wqe = (union t3_wr *) (qhp->wq.queue + idx);
if (num_wrs)
err = iwch_build_rdma_recv(qhp->rhp, wqe, wr);
else
if (num_wrs) {
if (wr->sg_list[0].lkey)
err = build_rdma_recv(qhp, wqe, wr);
else
err = build_zero_stag_recv(qhp, wqe, wr);
} else
err = -ENOMEM;
if (err) {
*bad_wr = wr;
if (err)
break;
}
qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, qhp->wq.rq_size_log2)] =
wr->wr_id;
build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
0, sizeof(struct t3_receive_wr) >> 3);
@ -408,6 +504,9 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
}
mtx_unlock(&qhp->lock);
ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
out:
if (err)
*bad_wr = wr;
return err;
}
@ -439,7 +538,7 @@ int iwch_bind_mw(struct ib_qp *qp,
}
num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
qhp->wq.sq_size_log2);
if ((num_wrs) <= 0) {
if ((num_wrs) == 0) {
mtx_unlock(&qhp->lock);
return (-ENOMEM);
}
@ -491,7 +590,7 @@ int iwch_bind_mw(struct ib_qp *qp,
return err;
}
static inline void build_term_codes(struct respQ_msg_t *rsp_msg,
static void build_term_codes(struct respQ_msg_t *rsp_msg,
u8 *layer_type, u8 *ecode)
{
int status = TPT_ERR_INTERNAL_ERR;
@ -631,15 +730,18 @@ int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg)
union t3_wr *wqe;
struct terminate_message *term;
struct mbuf *m;
struct ofld_hdr *oh;
CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
m = m_gethdr(MT_DATA, M_NOWAIT);
if (!m) {
CTR3(KTR_IW_CXGB, "%s: tid %u, %p", __func__, qhp->ep->hwtid, rsp_msg);
m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL) {
log(LOG_ERR, "%s cannot send TERMINATE!\n", __FUNCTION__);
return (-ENOMEM);
}
wqe = mtod(m, union t3_wr *);
m->m_len = m->m_pkthdr.len = 40;
oh = mtod(m, struct ofld_hdr *);
m->m_pkthdr.len = m->m_len = sizeof(*oh) + 40;
oh->flags = V_HDR_NDESC(1) | V_HDR_CTRL(CPL_PRIORITY_DATA) | V_HDR_QSET(0);
wqe = (void *)(oh + 1);
memset(wqe, 0, 40);
wqe->send.rdmaop = T3_TERMINATE;
@ -653,22 +755,17 @@ int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg)
V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG));
wqe->send.wrh.gen_tid_len = htobe32(V_FW_RIWR_TID(qhp->ep->hwtid));
m_set_priority(m, CPL_PRIORITY_DATA);
m_set_sgl(m, NULL);
m_set_sgllen(m, 0);
return cxgb_ofld_send(qhp->rhp->rdev.t3cdev_p, m);
return t3_offload_tx(qhp->rhp->rdev.adap, m);
}
/*
* Assumes qhp lock is held.
*/
static void __flush_qp(struct iwch_qp *qhp)
static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp,
struct iwch_cq *schp)
{
struct iwch_cq *rchp, *schp;
int count;
rchp = get_chp(qhp->rhp, qhp->attr.rcq);
schp = get_chp(qhp->rhp, qhp->attr.scq);
int flushed;
CTR4(KTR_IW_CXGB, "%s qhp %p rchp %p schp %p", __FUNCTION__, qhp, rchp, schp);
/* take a ref on the qhp since we must release the lock */
@ -680,20 +777,22 @@ static void __flush_qp(struct iwch_qp *qhp)
mtx_lock(&qhp->lock);
cxio_flush_hw_cq(&rchp->cq);
cxio_count_rcqes(&rchp->cq, &qhp->wq, &count);
cxio_flush_rq(&qhp->wq, &rchp->cq, count);
flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count);
mtx_unlock(&qhp->lock);
mtx_unlock(&rchp->lock);
(*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
if (flushed)
(*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
/* locking hierarchy: cq lock first, then qp lock. */
mtx_lock(&schp->lock);
mtx_lock(&qhp->lock);
cxio_flush_hw_cq(&schp->cq);
cxio_count_scqes(&schp->cq, &qhp->wq, &count);
cxio_flush_sq(&qhp->wq, &schp->cq, count);
flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count);
mtx_unlock(&qhp->lock);
mtx_unlock(&schp->lock);
(*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
if (flushed)
(*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
/* deref */
mtx_lock(&qhp->lock);
@ -703,10 +802,23 @@ static void __flush_qp(struct iwch_qp *qhp)
static void flush_qp(struct iwch_qp *qhp)
{
if (qhp->ibqp.uobject)
struct iwch_cq *rchp, *schp;
rchp = get_chp(qhp->rhp, qhp->attr.rcq);
schp = get_chp(qhp->rhp, qhp->attr.scq);
if (qhp->ibqp.uobject) {
cxio_set_wq_in_error(&qhp->wq);
else
__flush_qp(qhp);
cxio_set_cq_in_error(&rchp->cq);
(*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
if (schp != rchp) {
cxio_set_cq_in_error(&schp->cq);
(*schp->ibcq.comp_handler)(&schp->ibcq,
schp->ibcq.cq_context);
}
return;
}
__flush_qp(qhp, rchp, schp);
}
@ -715,7 +827,13 @@ static void flush_qp(struct iwch_qp *qhp)
*/
static int rqes_posted(struct iwch_qp *qhp)
{
return fw_riwrh_opcode((struct fw_riwrh *)qhp->wq.queue) == T3_WR_RCV;
union t3_wr *wqe = qhp->wq.queue;
u16 count = 0;
while ((count+1) != 0 && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) {
count++;
wqe++;
}
return count;
}
static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
@ -724,6 +842,10 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
{
struct t3_rdma_init_attr init_attr;
int ret;
struct socket *so = qhp->ep->com.so;
struct inpcb *inp = sotoinpcb(so);
struct tcpcb *tp;
struct toepcb *toep;
init_attr.tid = qhp->ep->hwtid;
init_attr.qpid = qhp->wq.qpid;
@ -737,32 +859,28 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
(qhp->attr.mpa_attr.xmit_marker_enabled << 1) |
(qhp->attr.mpa_attr.crc_enabled << 2);
/*
* XXX - The IWCM doesn't quite handle getting these
* attrs set before going into RTS. For now, just turn
* them on always...
*/
#if 0
init_attr.qpcaps = qhp->attr.enableRdmaRead |
(qhp->attr.enableRdmaWrite << 1) |
(qhp->attr.enableBind << 2) |
(qhp->attr.enable_stag0_fastreg << 3) |
(qhp->attr.enable_stag0_fastreg << 4);
#else
init_attr.qpcaps = 0x1f;
#endif
init_attr.qpcaps = uP_RI_QP_RDMA_READ_ENABLE |
uP_RI_QP_RDMA_WRITE_ENABLE |
uP_RI_QP_BIND_ENABLE;
if (!qhp->ibqp.uobject)
init_attr.qpcaps |= uP_RI_QP_STAG0_ENABLE;
init_attr.tcp_emss = qhp->ep->emss;
init_attr.ord = qhp->attr.max_ord;
init_attr.ird = qhp->attr.max_ird;
init_attr.qp_dma_addr = qhp->wq.dma_addr;
init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
init_attr.flags = rqes_posted(qhp) ? RECVS_POSTED : 0;
init_attr.rqe_count = rqes_posted(qhp);
init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0;
init_attr.rtr_type = 0;
tp = intotcpcb(inp);
toep = tp->t_toe;
init_attr.chan = toep->tp_l2t->smt_idx;
init_attr.irs = qhp->ep->rcv_seq;
CTR5(KTR_IW_CXGB, "%s init_attr.rq_addr 0x%x init_attr.rq_size = %d "
"flags 0x%x qpcaps 0x%x", __FUNCTION__,
init_attr.rq_addr, init_attr.rq_size,
init_attr.flags, init_attr.qpcaps);
ret = cxio_rdma_init(&rhp->rdev, &init_attr);
ret = cxio_rdma_init(&rhp->rdev, &init_attr, qhp->ep->com.so);
CTR2(KTR_IW_CXGB, "%s ret %d", __FUNCTION__, ret);
return ret;
}
@ -870,8 +988,8 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
abort=0;
disconnect = 1;
ep = qhp->ep;
get_ep(&ep->com);
}
flush_qp(qhp);
break;
case IWCH_QP_STATE_TERMINATE:
qhp->attr.state = IWCH_QP_STATE_TERMINATE;
@ -886,6 +1004,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
abort=1;
disconnect = 1;
ep = qhp->ep;
get_ep(&ep->com);
}
goto err;
break;
@ -901,6 +1020,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
}
switch (attrs->next_state) {
case IWCH_QP_STATE_IDLE:
flush_qp(qhp);
qhp->attr.state = IWCH_QP_STATE_IDLE;
qhp->attr.llp_stream_handle = NULL;
put_ep(&qhp->ep->com);
@ -908,7 +1028,6 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
wakeup(qhp);
break;
case IWCH_QP_STATE_ERROR:
disconnect=1;
goto err;
default:
ret = -EINVAL;
@ -960,81 +1079,29 @@ err:
out:
mtx_unlock(&qhp->lock);
if (terminate)
if (terminate)
iwch_post_terminate(qhp, NULL);
/*
* If disconnect is 1, then we need to initiate a disconnect
* on the EP. This can be a normal close (RTS->CLOSING) or
* an abnormal close (RTS/CLOSING->ERROR).
*/
if (disconnect)
if (disconnect) {
iwch_ep_disconnect(ep, abort, M_NOWAIT);
put_ep(&ep->com);
}
/*
* If free is 1, then we've disassociated the EP from the QP
* and we need to dereference the EP.
*/
if (free)
if (free)
put_ep(&ep->com);
CTR2(KTR_IW_CXGB, "%s exit state %d", __FUNCTION__, qhp->attr.state);
return ret;
}
static int quiesce_qp(struct iwch_qp *qhp)
{
mtx_lock(&qhp->lock);
iwch_quiesce_tid(qhp->ep);
qhp->flags |= QP_QUIESCED;
mtx_unlock(&qhp->lock);
return 0;
}
static int resume_qp(struct iwch_qp *qhp)
{
mtx_lock(&qhp->lock);
iwch_resume_tid(qhp->ep);
qhp->flags &= ~QP_QUIESCED;
mtx_lock(&qhp->lock);
return 0;
}
int iwch_quiesce_qps(struct iwch_cq *chp)
{
int i;
struct iwch_qp *qhp;
for (i=0; i < T3_MAX_NUM_QP; i++) {
qhp = get_qhp(chp->rhp, i);
if (!qhp)
continue;
if ((qhp->attr.rcq == chp->cq.cqid) && !qp_quiesced(qhp)) {
quiesce_qp(qhp);
continue;
}
if ((qhp->attr.scq == chp->cq.cqid) && !qp_quiesced(qhp))
quiesce_qp(qhp);
}
return 0;
}
int iwch_resume_qps(struct iwch_cq *chp)
{
int i;
struct iwch_qp *qhp;
for (i=0; i < T3_MAX_NUM_QP; i++) {
qhp = get_qhp(chp->rhp, i);
if (!qhp)
continue;
if ((qhp->attr.rcq == chp->cq.cqid) && qp_quiesced(qhp)) {
resume_qp(qhp);
continue;
}
if ((qhp->attr.scq == chp->cq.cqid) && qp_quiesced(qhp))
resume_qp(qhp);
}
return 0;
}
#endif

View File

@ -29,11 +29,13 @@ POSSIBILITY OF SUCH DAMAGE.
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#ifdef TCP_OFFLOAD
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/module.h>
#include <sys/pciio.h>
#include <sys/conf.h>
#include <machine/bus.h>
@ -59,9 +61,11 @@ __FBSDID("$FreeBSD$");
#include <netinet/in.h>
#include <contrib/rdma/ib_verbs.h>
#include <contrib/rdma/ib_umem.h>
#include <contrib/rdma/ib_user_verbs.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
#include <linux/idr.h>
#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
#include <cxgb_include.h>
#include <ulp/iw_cxgb/iw_cxgb_wr.h>
@ -369,3 +373,4 @@ void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p)
{
gen_pool_destroy(rdev_p->rqt_pool);
}
#endif

View File

@ -47,10 +47,18 @@ struct iwch_create_cq_req {
uint64_t user_rptr_addr;
};
struct iwch_create_cq_resp_v0 {
__u64 key;
__u32 cqid;
__u32 size_log2;
};
struct iwch_create_cq_resp {
uint64_t key;
uint32_t cqid;
uint32_t size_log2;
__u32 memsize;
__u32 reserved;
};
struct iwch_create_qp_resp {

View File

@ -32,6 +32,9 @@ $FreeBSD$
#define __CXIO_WR_H__
#define T3_MAX_SGE 4
#define T3_MAX_INLINE 64
#define T3_STAG0_PBL_SIZE (2 * T3_MAX_SGE << 3)
#define T3_STAG0_MAX_PBE_LEN (128 * 1024 * 1024)
#define T3_STAG0_PAGE_SHIFT 15
#define Q_EMPTY(rptr,wptr) ((rptr)==(wptr))
#define Q_FULL(rptr,wptr,size_log2) ( (((wptr)-(rptr))>>(size_log2)) && \
@ -272,6 +275,22 @@ enum t3_qp_caps {
uP_RI_QP_STAG0_ENABLE = 0x10
} __attribute__ ((packed));
enum rdma_init_rtr_types {
RTR_READ = 1,
RTR_WRITE = 2,
RTR_SEND = 3,
};
#define S_RTR_TYPE 2
#define M_RTR_TYPE 0x3
#define V_RTR_TYPE(x) ((x) << S_RTR_TYPE)
#define G_RTR_TYPE(x) ((((x) >> S_RTR_TYPE)) & M_RTR_TYPE)
#define S_CHAN 4
#define M_CHAN 0x3
#define V_CHAN(x) ((x) << S_CHAN)
#define G_CHAN(x) ((((x) >> S_CHAN)) & M_CHAN)
struct t3_rdma_init_attr {
u32 tid;
u32 qpid;
@ -287,8 +306,11 @@ struct t3_rdma_init_attr {
u32 ird;
u64 qp_dma_addr;
u32 qp_dma_size;
u32 flags;
enum rdma_init_rtr_types rtr_type;
u16 flags;
u16 rqe_count;
u32 irs;
u32 chan;
};
struct t3_rdma_init_wr {
@ -303,13 +325,13 @@ struct t3_rdma_init_wr {
u8 mpaattrs; /* 5 */
u8 qpcaps;
__be16 ulpdu_size;
__be32 flags; /* bits 31-1 - reservered */
/* bit 0 - set if RECV posted */
__be16 flags_rtr_type;
__be16 rqe_count;
__be32 ord; /* 6 */
__be32 ird;
__be64 qp_dma_addr; /* 7 */
__be32 qp_dma_size; /* 8 */
u32 irs;
__be32 irs;
};
struct t3_genbit {
@ -318,7 +340,8 @@ struct t3_genbit {
};
enum rdma_init_wr_flags {
RECVS_POSTED = 1,
MPA_INITIATOR = (1<<0),
PRIV_QP = (1<<1),
};
union t3_wr {
@ -531,6 +554,12 @@ struct t3_cqe {
#define CQE_STATUS(x) (G_CQE_STATUS(be32toh((x).header)))
#define CQE_OPCODE(x) (G_CQE_OPCODE(be32toh((x).header)))
#define CQE_SEND_OPCODE(x)( \
(G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND) || \
(G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_SE) || \
(G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_INV) || \
(G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_SE_INV))
#define CQE_LEN(x) (be32toh((x).len))
/* used for RQ completion processing */
@ -589,21 +618,23 @@ struct t3_swsq {
uint64_t wr_id;
struct t3_cqe cqe;
uint32_t sq_wptr;
uint32_t read_len;
__be32 read_len;
int opcode;
int complete;
int signaled;
};
struct t3_swrq {
__u64 wr_id;
__u32 pbl_addr;
};
/*
* A T3 WQ implements both the SQ and RQ.
*/
struct t3_wq {
union t3_wr *queue; /* DMA accessable memory */
bus_addr_t dma_addr; /* DMA address for HW */
#ifdef notyet
DECLARE_PCI_UNMAP_ADDR(mapping) /* unmap kruft */
#endif
u32 error; /* 1 once we go to ERROR */
u32 qpid;
u32 wptr; /* idx to next available WR slot */
@ -613,14 +644,15 @@ struct t3_wq {
u32 sq_wptr; /* sq_wptr - sq_rptr == count of */
u32 sq_rptr; /* pending wrs */
u32 sq_size_log2; /* sq size */
u64 *rq; /* SW RQ (holds consumer wr_ids */
struct t3_swrq *rq; /* SW RQ (holds consumer wr_ids */
u32 rq_wptr; /* rq_wptr - rq_rptr == count of */
u32 rq_rptr; /* pending wrs */
u64 *rq_oldest_wr; /* oldest wr on the SW RQ */
struct t3_swrq *rq_oldest_wr; /* oldest wr on the SW RQ */
u32 rq_size_log2; /* rq size */
u32 rq_addr; /* rq adapter address */
void /* __iomem */ *doorbell; /* kernel db */
void *doorbell; /* kernel db */
u64 udb; /* user db if any */
struct cxio_rdev *rdev;
};
struct t3_cq {
@ -629,9 +661,6 @@ struct t3_cq {
u32 wptr;
u32 size_log2;
bus_addr_t dma_addr;
#ifdef notyet
DECLARE_PCI_UNMAP_ADDR(mapping)
#endif
struct t3_cqe *queue;
struct t3_cqe *sw_queue;
u32 sw_rptr;
@ -641,6 +670,22 @@ struct t3_cq {
#define CQ_VLD_ENTRY(ptr,size_log2,cqe) (Q_GENBIT(ptr,size_log2) == \
CQE_GENBIT(*cqe))
struct t3_cq_status_page {
u32 cq_err;
};
static inline int cxio_cq_in_error(struct t3_cq *cq)
{
return ((struct t3_cq_status_page *)
&cq->queue[1 << cq->size_log2])->cq_err;
}
static inline void cxio_set_cq_in_error(struct t3_cq *cq)
{
((struct t3_cq_status_page *)
&cq->queue[1 << cq->size_log2])->cq_err = 1;
}
static inline void cxio_set_wq_in_error(struct t3_wq *wq)
{
wq->queue->flit[13] = 1;

View File

@ -1,49 +0,0 @@
/**************************************************************************
Copyright (c) 2007, Chelsio Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Neither the name of the Chelsio Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
$FreeBSD$
***************************************************************************/
#ifndef _CXGB_TOEDEV_H_
#define _CXGB_TOEDEV_H_
#include <netinet/toedev.h>
/* offload type ids */
enum {
TOE_ID_CHELSIO_T1 = 1,
TOE_ID_CHELSIO_T1C,
TOE_ID_CHELSIO_T2,
TOE_ID_CHELSIO_T3,
TOE_ID_CHELSIO_T3B,
TOE_ID_CHELSIO_T3C,
}
;
#endif

View File

@ -1,420 +0,0 @@
/**************************************************************************
Copyright (c) 2007, Chelsio Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Neither the name of the Chelsio Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***************************************************************************/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/module.h>
#include <sys/queue.h>
#include <sys/mbuf.h>
#include <sys/proc.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <net/bpf.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <net/route.h>
/*
* XXX
*/
#include <cxgb_include.h>
#include <ulp/toecore/cxgb_toedev.h>
static struct mtx offload_db_lock;
static TAILQ_HEAD(, toedev) offload_dev_list;
static TAILQ_HEAD(, tom_info) offload_module_list;
/*
* Returns the entry in the given table with the given offload id, or NULL
* if the id is not found.
*/
static const struct offload_id *
id_find(unsigned int id, const struct offload_id *table)
{
for ( ; table->id; ++table)
if (table->id == id)
return table;
return NULL;
}
/*
* Returns true if an offload device is presently attached to an offload module.
*/
static inline int
is_attached(const struct toedev *dev)
{
return dev->tod_offload_mod != NULL;
}
/*
* Try to attach a new offload device to an existing TCP offload module that
* can handle the device's offload id. Returns 0 if it succeeds.
*
* Must be called with the offload_db_lock held.
*/
static int
offload_attach(struct toedev *dev)
{
struct tom_info *t;
TAILQ_FOREACH(t, &offload_module_list, entry) {
const struct offload_id *entry;
entry = id_find(dev->tod_ttid, t->ti_id_table);
if (entry && t->ti_attach(dev, entry) == 0) {
dev->tod_offload_mod = t;
return 0;
}
}
return (ENOPROTOOPT);
}
/**
* register_tom - register a TCP Offload Module (TOM)
* @t: the offload module to register
*
* Register a TCP Offload Module (TOM).
*/
int
register_tom(struct tom_info *t)
{
mtx_lock(&offload_db_lock);
toedev_registration_count++;
TAILQ_INSERT_HEAD(&offload_module_list, t, entry);
mtx_unlock(&offload_db_lock);
return 0;
}
/**
* unregister_tom - unregister a TCP Offload Module (TOM)
* @t: the offload module to register
*
* Unregister a TCP Offload Module (TOM). Note that this does not affect any
* TOE devices to which the TOM is already attached.
*/
int
unregister_tom(struct tom_info *t)
{
mtx_lock(&offload_db_lock);
TAILQ_REMOVE(&offload_module_list, t, entry);
mtx_unlock(&offload_db_lock);
return 0;
}
/*
* Find an offload device by name. Must be called with offload_db_lock held.
*/
static struct toedev *
__find_offload_dev_by_name(const char *name)
{
struct toedev *dev;
TAILQ_FOREACH(dev, &offload_dev_list, entry) {
if (!strncmp(dev->tod_name, name, TOENAMSIZ))
return dev;
}
return NULL;
}
/*
* Returns true if an offload device is already registered.
* Must be called with the offload_db_lock held.
*/
static int
is_registered(const struct toedev *dev)
{
struct toedev *d;
TAILQ_FOREACH(d, &offload_dev_list, entry) {
if (d == dev)
return 1;
}
return 0;
}
/*
* Finalize the name of an offload device by assigning values to any format
* strings in its name.
*/
static int
assign_name(struct toedev *dev, const char *name, int limit)
{
int i;
for (i = 0; i < limit; ++i) {
char s[TOENAMSIZ];
if (snprintf(s, sizeof(s), name, i) >= sizeof(s))
return -1; /* name too long */
if (!__find_offload_dev_by_name(s)) {
strcpy(dev->tod_name, s);
return 0;
}
}
return -1;
}
/**
* register_toedev - register a TOE device
* @dev: the device
* @name: a name template for the device
*
* Register a TOE device and try to attach an appropriate TCP offload module
* to it. @name is a template that may contain at most one %d format
* specifier.
*/
int
register_toedev(struct toedev *dev, const char *name)
{
int ret;
const char *p;
/*
* Validate the name template. Only one %d allowed and name must be
* a valid filename so it can appear in sysfs.
*/
if (!name || !*name || !strcmp(name, ".") || !strcmp(name, "..") ||
strchr(name, '/'))
return EINVAL;
p = strchr(name, '%');
if (p && (p[1] != 'd' || strchr(p + 2, '%')))
return EINVAL;
mtx_lock(&offload_db_lock);
if (is_registered(dev)) { /* device already registered */
ret = EEXIST;
goto out;
}
if ((ret = assign_name(dev, name, 32)) != 0)
goto out;
dev->tod_offload_mod = NULL;
TAILQ_INSERT_TAIL(&offload_dev_list, dev, entry);
out:
mtx_unlock(&offload_db_lock);
return ret;
}
/**
* unregister_toedev - unregister a TOE device
* @dev: the device
*
* Unregister a TOE device. The device must not be attached to an offload
* module.
*/
int
unregister_toedev(struct toedev *dev)
{
int ret = 0;
mtx_lock(&offload_db_lock);
if (!is_registered(dev)) {
ret = ENODEV;
goto out;
}
if (is_attached(dev)) {
ret = EBUSY;
goto out;
}
TAILQ_REMOVE(&offload_dev_list, dev, entry);
out:
mtx_unlock(&offload_db_lock);
return ret;
}
/**
* activate_offload - activate an offload device
* @dev: the device
*
* Activate an offload device by locating an appropriate registered offload
* module. If no module is found the operation fails and may be retried at
* a later time.
*/
int
activate_offload(struct toedev *dev)
{
int ret = 0;
mtx_lock(&offload_db_lock);
if (!is_registered(dev))
ret = ENODEV;
else if (!is_attached(dev))
ret = offload_attach(dev);
mtx_unlock(&offload_db_lock);
return ret;
}
/**
* toe_send - send a packet to a TOE device
* @dev: the device
* @m: the packet
*
* Sends an mbuf to a TOE driver after dealing with any active network taps.
*/
int
toe_send(struct toedev *dev, struct mbuf *m)
{
int r;
critical_enter(); /* XXX neccessary? */
r = dev->tod_send(dev, m);
critical_exit();
if (r)
BPF_MTAP(dev->tod_lldev, m);
return r;
}
/**
* toe_receive_mbuf - process n received TOE packets
* @dev: the toe device
* @m: an array of offload packets
* @n: the number of offload packets
*
* Process an array of ingress offload packets. Each packet is forwarded
* to any active network taps and then passed to the toe device's receive
* method. We optimize passing packets to the receive method by passing
* it the whole array at once except when there are active taps.
*/
int
toe_receive_mbuf(struct toedev *dev, struct mbuf **m, int n)
{
if (__predict_true(!bpf_peers_present(dev->tod_lldev->if_bpf)))
return dev->tod_recv(dev, m, n);
for ( ; n; n--, m++) {
m[0]->m_pkthdr.rcvif = dev->tod_lldev;
BPF_MTAP(dev->tod_lldev, m[0]);
dev->tod_recv(dev, m, 1);
}
return 0;
}
static inline int
ifnet_is_offload(const struct ifnet *ifp)
{
return (ifp->if_flags & IFCAP_TOE);
}
void
toe_arp_update(struct rtentry *rt)
{
struct ifnet *ifp = rt->rt_ifp;
if (ifp && ifnet_is_offload(ifp)) {
struct toedev *tdev = TOEDEV(ifp);
if (tdev && tdev->tod_arp_update)
tdev->tod_arp_update(tdev, rt);
}
}
/**
* offload_get_phys_egress - find the physical egress device
* @root_dev: the root device anchoring the search
* @so: the socket used to determine egress port in bonding mode
* @context: in bonding mode, indicates a connection set up or failover
*
* Given a root network device it returns the physical egress device that is a
* descendant of the root device. The root device may be either a physical
* device, in which case it is the device returned, or a virtual device, such
* as a VLAN or bonding device. In case of a bonding device the search
* considers the decisions of the bonding device given its mode to locate the
* correct egress device.
*/
struct ifnet *
offload_get_phys_egress(struct ifnet *root_dev, struct socket *so, int context)
{
#if 0
while (root_dev && ifnet_is_offload(root_dev)) {
if (root_dev->tod_priv_flags & IFF_802_1Q_VLAN)
root_dev = VLAN_DEV_INFO(root_dev)->real_dev;
else if (root_dev->tod_flags & IFF_MASTER)
root_dev = toe_bond_get_slave(root_dev, sk, context);
else
break;
}
#endif
return root_dev;
}
static int
toecore_load(module_t mod, int cmd, void *arg)
{
int err = 0;
switch (cmd) {
case MOD_LOAD:
mtx_init(&offload_db_lock, "toedev lock", NULL, MTX_DEF);
TAILQ_INIT(&offload_dev_list);
TAILQ_INIT(&offload_module_list);
break;
case MOD_QUIESCE:
break;
case MOD_UNLOAD:
mtx_lock(&offload_db_lock);
if (!TAILQ_EMPTY(&offload_dev_list) ||
!TAILQ_EMPTY(&offload_module_list)) {
err = EBUSY;
mtx_unlock(&offload_db_lock);
break;
}
mtx_unlock(&offload_db_lock);
mtx_destroy(&offload_db_lock);
break;
case MOD_SHUTDOWN:
break;
default:
err = EOPNOTSUPP;
break;
}
return (err);
}
static moduledata_t mod_data= {
"toecore",
toecore_load,
0
};
MODULE_VERSION(toecore, 1);
DECLARE_MODULE(toecore, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,738 +0,0 @@
/**************************************************************************
Copyright (c) 2007-2008, Chelsio Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Neither the name of the Chelsio Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***************************************************************************/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/types.h>
#include <sys/fcntl.h>
#include <sys/kernel.h>
#include <sys/ktr.h>
#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/mbuf.h>
#include <sys/condvar.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/sockstate.h>
#include <sys/sockopt.h>
#include <sys/socket.h>
#include <sys/sockbuf.h>
#include <sys/syslog.h>
#include <sys/uio.h>
#include <machine/bus.h>
#include <net/if.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/in_pcb.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <cxgb_osdep.h>
#include <sys/mbufq.h>
#include <ulp/tom/cxgb_tcp_offload.h>
#include <netinet/tcp.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_offload.h>
#include <net/route.h>
#include <t3cdev.h>
#include <common/cxgb_firmware_exports.h>
#include <common/cxgb_t3_cpl.h>
#include <common/cxgb_tcb.h>
#include <common/cxgb_ctl_defs.h>
#include <cxgb_offload.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
#include <vm/vm_map.h>
#include <vm/vm_extern.h>
#include <vm/pmap.h>
#include <sys/mvec.h>
#include <ulp/toecore/cxgb_toedev.h>
#include <ulp/tom/cxgb_defs.h>
#include <ulp/tom/cxgb_tom.h>
#include <ulp/tom/cxgb_t3_ddp.h>
#include <ulp/tom/cxgb_toepcb.h>
#include <ulp/tom/cxgb_tcp.h>
#define MAX_SCHEDULE_TIMEOUT 300
/*
* Return the # of page pods needed to accommodate a # of pages.
*/
static inline unsigned int
pages2ppods(unsigned int pages)
{
return (pages + PPOD_PAGES - 1) / PPOD_PAGES + NUM_SENTINEL_PPODS;
}
/**
* t3_pin_pages - pin a user memory range and prepare it for DDP
* @addr - the starting address
* @len - the length of the range
* @newgl - contains the pages and physical addresses of the pinned range
* @gl - an existing gather list, may be %NULL
*
* Pins the pages in the user-space memory range [addr, addr + len) and
* maps them for DMA. Returns a gather list with the pinned pages and
* their physical addresses. If @gl is non NULL the pages it describes
* are compared against the pages for [addr, addr + len), and if the
* existing gather list already covers the range a new list is not
* allocated. Returns 0 on success, or a negative errno. On success if
* a new gather list was allocated it is returned in @newgl.
*/
static int
t3_pin_pages(bus_dma_tag_t tag, bus_dmamap_t dmamap, vm_offset_t addr,
size_t len, struct ddp_gather_list **newgl,
const struct ddp_gather_list *gl)
{
int i = 0, err;
size_t pg_off;
unsigned int npages;
struct ddp_gather_list *p;
vm_map_t map;
pg_off = addr & PAGE_MASK;
npages = (pg_off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
p = malloc(sizeof(struct ddp_gather_list) + npages * sizeof(vm_page_t *),
M_DEVBUF, M_NOWAIT|M_ZERO);
if (p == NULL)
return (ENOMEM);
map = &curthread->td_proc->p_vmspace->vm_map;
if (vm_fault_quick_hold_pages(map, addr, len, VM_PROT_READ |
VM_PROT_WRITE, p->dgl_pages, npages) < 0) {
err = EFAULT;
goto free_gl;
}
if (gl && gl->dgl_offset == pg_off && gl->dgl_nelem >= npages &&
gl->dgl_length >= len) {
for (i = 0; i < npages; i++)
if (p->dgl_pages[i] != gl->dgl_pages[i])
goto different_gl;
err = 0;
goto unpin;
}
different_gl:
p->dgl_length = len;
p->dgl_offset = pg_off;
p->dgl_nelem = npages;
#ifdef NEED_BUSDMA
p->phys_addr[0] = pci_map_page(pdev, p->pages[0], pg_off,
PAGE_SIZE - pg_off,
PCI_DMA_FROMDEVICE) - pg_off;
for (i = 1; i < npages; ++i)
p->phys_addr[i] = pci_map_page(pdev, p->pages[i], 0, PAGE_SIZE,
PCI_DMA_FROMDEVICE);
#endif
*newgl = p;
return (0);
unpin:
vm_page_unhold_pages(p->dgl_pages, npages);
free_gl:
free(p, M_DEVBUF);
*newgl = NULL;
return (err);
}
static void
unmap_ddp_gl(const struct ddp_gather_list *gl)
{
#ifdef NEED_BUSDMA
int i;
if (!gl->nelem)
return;
pci_unmap_page(pdev, gl->phys_addr[0] + gl->offset,
PAGE_SIZE - gl->offset, PCI_DMA_FROMDEVICE);
for (i = 1; i < gl->nelem; ++i)
pci_unmap_page(pdev, gl->phys_addr[i], PAGE_SIZE,
PCI_DMA_FROMDEVICE);
#endif
}
static void
ddp_gl_free_pages(struct ddp_gather_list *gl, int dirty)
{
/*
* XXX mark pages as dirty before unholding
*/
vm_page_unhold_pages(gl->dgl_pages, gl->dgl_nelem);
}
void
t3_free_ddp_gl(struct ddp_gather_list *gl)
{
unmap_ddp_gl(gl);
ddp_gl_free_pages(gl, 0);
free(gl, M_DEVBUF);
}
/* Max # of page pods for a buffer, enough for 1MB buffer at 4KB page size */
#define MAX_PPODS 64U
/*
* Allocate page pods for DDP buffer 1 (the user buffer) and set up the tag in
* the TCB. We allocate page pods in multiples of PPOD_CLUSTER_SIZE. First we
* try to allocate enough page pods to accommodate the whole buffer, subject to
* the MAX_PPODS limit. If that fails we try to allocate PPOD_CLUSTER_SIZE page
* pods before failing entirely.
*/
static int
alloc_buf1_ppods(struct toepcb *toep, struct ddp_state *p,
unsigned long addr, unsigned int len)
{
int err, tag, npages, nppods;
struct tom_data *d = TOM_DATA(toep->tp_toedev);
#if 0
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
#endif
npages = ((addr & PAGE_MASK) + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
nppods = min(pages2ppods(npages), MAX_PPODS);
nppods = roundup2(nppods, PPOD_CLUSTER_SIZE);
err = t3_alloc_ppods(d, nppods, &tag);
if (err && nppods > PPOD_CLUSTER_SIZE) {
nppods = PPOD_CLUSTER_SIZE;
err = t3_alloc_ppods(d, nppods, &tag);
}
if (err)
return (ENOMEM);
p->ubuf_nppods = nppods;
p->ubuf_tag = tag;
#if NUM_DDP_KBUF == 1
t3_set_ddp_tag(toep, 1, tag << 6);
#endif
return (0);
}
/*
* Starting offset for the user DDP buffer. A non-0 value ensures a DDP flush
* won't block indefinitely if there's nothing to place (which should be rare).
*/
#define UBUF_OFFSET 1
static __inline unsigned long
select_ddp_flags(const struct toepcb *toep, int buf_idx,
int nonblock, int rcv_flags)
{
if (buf_idx == 1) {
if (__predict_false(rcv_flags & MSG_WAITALL))
return V_TF_DDP_PSH_NO_INVALIDATE0(1) |
V_TF_DDP_PSH_NO_INVALIDATE1(1) |
V_TF_DDP_PUSH_DISABLE_1(1);
if (nonblock)
return V_TF_DDP_BUF1_FLUSH(1);
return V_TF_DDP_BUF1_FLUSH(!TOM_TUNABLE(toep->tp_toedev,
ddp_push_wait));
}
if (__predict_false(rcv_flags & MSG_WAITALL))
return V_TF_DDP_PSH_NO_INVALIDATE0(1) |
V_TF_DDP_PSH_NO_INVALIDATE1(1) |
V_TF_DDP_PUSH_DISABLE_0(1);
if (nonblock)
return V_TF_DDP_BUF0_FLUSH(1);
return V_TF_DDP_BUF0_FLUSH(!TOM_TUNABLE(toep->tp_toedev, ddp_push_wait));
}
/*
* Reposts the kernel DDP buffer after it has been previously become full and
* invalidated. We just need to reset the offset and adjust the DDP flags.
* Conveniently, we can set the flags and the offset with a single message.
* Note that this function does not set the buffer length. Again conveniently
* our kernel buffer is of fixed size. If the length needs to be changed it
* needs to be done separately.
*/
static void
t3_repost_kbuf(struct toepcb *toep, unsigned int bufidx, int modulate,
int activate, int nonblock)
{
struct ddp_state *p = &toep->tp_ddp_state;
unsigned long flags;
#if 0
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
#endif
p->buf_state[bufidx].cur_offset = p->kbuf[bufidx]->dgl_offset;
p->buf_state[bufidx].flags = p->kbuf_noinval ? DDP_BF_NOINVAL : 0;
p->buf_state[bufidx].gl = p->kbuf[bufidx];
p->cur_buf = bufidx;
p->kbuf_idx = bufidx;
flags = select_ddp_flags(toep, bufidx, nonblock, 0);
if (!bufidx)
t3_setup_ddpbufs(toep, 0, 0, 0, 0, flags |
V_TF_DDP_PSH_NO_INVALIDATE0(p->kbuf_noinval) |
V_TF_DDP_PSH_NO_INVALIDATE1(p->kbuf_noinval) |
V_TF_DDP_BUF0_VALID(1),
V_TF_DDP_BUF0_FLUSH(1) |
V_TF_DDP_PSH_NO_INVALIDATE0(1) |
V_TF_DDP_PSH_NO_INVALIDATE1(1) | V_TF_DDP_OFF(1) |
V_TF_DDP_BUF0_VALID(1) |
V_TF_DDP_ACTIVE_BUF(activate), modulate);
else
t3_setup_ddpbufs(toep, 0, 0, 0, 0, flags |
V_TF_DDP_PSH_NO_INVALIDATE0(p->kbuf_noinval) |
V_TF_DDP_PSH_NO_INVALIDATE1(p->kbuf_noinval) |
V_TF_DDP_BUF1_VALID(1) |
V_TF_DDP_ACTIVE_BUF(activate),
V_TF_DDP_BUF1_FLUSH(1) |
V_TF_DDP_PSH_NO_INVALIDATE0(1) |
V_TF_DDP_PSH_NO_INVALIDATE1(1) | V_TF_DDP_OFF(1) |
V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1),
modulate);
}
/**
* setup_uio_ppods - setup HW page pods for a user iovec
* @sk: the associated socket
* @uio: the uio
* @oft: additional bytes to map before the start of the buffer
*
* Pins a user iovec and sets up HW page pods for DDP into it. We allocate
* page pods for user buffers on the first call per socket. Afterwards we
* limit the buffer length to whatever the existing page pods can accommodate.
* Returns a negative error code or the length of the mapped buffer.
*
* The current implementation handles iovecs with only one entry.
*/
static int
setup_uio_ppods(struct toepcb *toep, const struct uio *uio, int oft, int *length)
{
int err;
unsigned int len;
struct ddp_gather_list *gl = NULL;
struct ddp_state *p = &toep->tp_ddp_state;
struct iovec *iov = uio->uio_iov;
vm_offset_t addr = (vm_offset_t)iov->iov_base - oft;
#ifdef notyet
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
#endif
if (__predict_false(p->ubuf_nppods == 0)) {
err = alloc_buf1_ppods(toep, p, addr, iov->iov_len + oft);
if (err)
return (err);
}
len = (p->ubuf_nppods - NUM_SENTINEL_PPODS) * PPOD_PAGES * PAGE_SIZE;
len -= addr & PAGE_MASK;
if (len > M_TCB_RX_DDP_BUF0_LEN)
len = M_TCB_RX_DDP_BUF0_LEN;
len = min(len, toep->tp_tp->rcv_wnd - 32768);
len = min(len, iov->iov_len + oft);
if (len <= p->kbuf[0]->dgl_length) {
printf("length too short\n");
return (EINVAL);
}
err = t3_pin_pages(toep->tp_rx_dmat, toep->tp_dmamap, addr, len, &gl, p->ubuf);
if (err)
return (err);
if (gl) {
if (p->ubuf)
t3_free_ddp_gl(p->ubuf);
p->ubuf = gl;
t3_setup_ppods(toep, gl, pages2ppods(gl->dgl_nelem), p->ubuf_tag, len,
gl->dgl_offset, 0);
}
*length = len;
return (0);
}
/*
*
*/
void
t3_cancel_ubuf(struct toepcb *toep, struct sockbuf *rcv)
{
struct ddp_state *p = &toep->tp_ddp_state;
int ubuf_pending = t3_ddp_ubuf_pending(toep);
int err = 0, count = 0;
if (p->ubuf == NULL)
return;
sockbuf_lock_assert(rcv);
p->cancel_ubuf = 1;
while (ubuf_pending && !(rcv->sb_state & SBS_CANTRCVMORE)) {
CTR3(KTR_TOM,
"t3_cancel_ubuf: flags0 0x%x flags1 0x%x get_tcb_count %d",
p->buf_state[0].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY),
p->buf_state[1].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY),
p->get_tcb_count);
if (p->get_tcb_count == 0)
t3_cancel_ddpbuf(toep, p->cur_buf);
else
CTR5(KTR_TOM, "waiting err=%d get_tcb_count=%d timeo=%d rcv=%p SBS_CANTRCVMORE=%d",
err, p->get_tcb_count, rcv->sb_timeo, rcv,
!!(rcv->sb_state & SBS_CANTRCVMORE));
while (p->get_tcb_count && !(rcv->sb_state & SBS_CANTRCVMORE)) {
if (count & 0xfffffff)
CTR5(KTR_TOM, "waiting err=%d get_tcb_count=%d timeo=%d rcv=%p count=%d",
err, p->get_tcb_count, rcv->sb_timeo, rcv, count);
count++;
err = sbwait(rcv);
}
ubuf_pending = t3_ddp_ubuf_pending(toep);
}
p->cancel_ubuf = 0;
p->user_ddp_pending = 0;
}
#define OVERLAY_MASK (V_TF_DDP_PSH_NO_INVALIDATE0(1) | \
V_TF_DDP_PSH_NO_INVALIDATE1(1) | \
V_TF_DDP_BUF1_FLUSH(1) | \
V_TF_DDP_BUF0_FLUSH(1) | \
V_TF_DDP_PUSH_DISABLE_1(1) | \
V_TF_DDP_PUSH_DISABLE_0(1) | \
V_TF_DDP_INDICATE_OUT(1))
/*
* Post a user buffer as an overlay on top of the current kernel buffer.
*/
int
t3_overlay_ubuf(struct toepcb *toep, struct sockbuf *rcv,
const struct uio *uio, int nonblock, int rcv_flags,
int modulate, int post_kbuf)
{
int err, len, ubuf_idx;
unsigned long flags;
struct ddp_state *p = &toep->tp_ddp_state;
if (p->kbuf[0] == NULL) {
return (EINVAL);
}
sockbuf_unlock(rcv);
err = setup_uio_ppods(toep, uio, 0, &len);
sockbuf_lock(rcv);
if (err)
return (err);
if ((rcv->sb_state & SBS_CANTRCVMORE) ||
(toep->tp_tp->t_flags & TF_TOE) == 0)
return (EINVAL);
ubuf_idx = p->kbuf_idx;
p->buf_state[ubuf_idx].flags = DDP_BF_NOFLIP;
/* Use existing offset */
/* Don't need to update .gl, user buffer isn't copied. */
p->cur_buf = ubuf_idx;
flags = select_ddp_flags(toep, ubuf_idx, nonblock, rcv_flags);
if (post_kbuf) {
struct ddp_buf_state *dbs = &p->buf_state[ubuf_idx ^ 1];
dbs->cur_offset = 0;
dbs->flags = 0;
dbs->gl = p->kbuf[ubuf_idx ^ 1];
p->kbuf_idx ^= 1;
flags |= p->kbuf_idx ?
V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_PUSH_DISABLE_1(0) :
V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_PUSH_DISABLE_0(0);
}
if (ubuf_idx == 0) {
t3_overlay_ddpbuf(toep, 0, p->ubuf_tag << 6, p->kbuf_tag[1] << 6,
len);
t3_setup_ddpbufs(toep, 0, 0, p->kbuf[1]->dgl_length, 0,
flags,
OVERLAY_MASK | flags, 1);
} else {
t3_overlay_ddpbuf(toep, 1, p->kbuf_tag[0] << 6, p->ubuf_tag << 6,
len);
t3_setup_ddpbufs(toep, p->kbuf[0]->dgl_length, 0, 0, 0,
flags,
OVERLAY_MASK | flags, 1);
}
#ifdef T3_TRACE
T3_TRACE5(TIDTB(so),
"t3_overlay_ubuf: tag %u flags 0x%x mask 0x%x ubuf_idx %d "
" kbuf_idx %d",
p->ubuf_tag, flags, OVERLAY_MASK, ubuf_idx, p->kbuf_idx);
#endif
CTR3(KTR_TOM,
"t3_overlay_ubuf: tag %u flags 0x%x mask 0x%x",
p->ubuf_tag, flags, OVERLAY_MASK);
CTR3(KTR_TOM,
"t3_overlay_ubuf: ubuf_idx %d kbuf_idx %d post_kbuf %d",
ubuf_idx, p->kbuf_idx, post_kbuf);
return (0);
}
/*
* Clean up DDP state that needs to survive until socket close time, such as the
* DDP buffers. The buffers are already unmapped at this point as unmapping
* needs the PCI device and a socket may close long after the device is removed.
*/
void
t3_cleanup_ddp(struct toepcb *toep)
{
struct ddp_state *p = &toep->tp_ddp_state;
int idx;
for (idx = 0; idx < NUM_DDP_KBUF; idx++)
if (p->kbuf[idx]) {
ddp_gl_free_pages(p->kbuf[idx], 0);
free(p->kbuf[idx], M_DEVBUF);
}
if (p->ubuf) {
ddp_gl_free_pages(p->ubuf, 0);
free(p->ubuf, M_DEVBUF);
p->ubuf = NULL;
}
toep->tp_ulp_mode = 0;
}
/*
* This is a companion to t3_cleanup_ddp() and releases the HW resources
* associated with a connection's DDP state, such as the page pods.
* It's called when HW is done with a connection. The rest of the state
* remains available until both HW and the app are done with the connection.
*/
void
t3_release_ddp_resources(struct toepcb *toep)
{
struct ddp_state *p = &toep->tp_ddp_state;
struct tom_data *d = TOM_DATA(toep->tp_toedev);
int idx;
for (idx = 0; idx < NUM_DDP_KBUF; idx++) {
t3_free_ppods(d, p->kbuf_tag[idx],
p->kbuf_nppods[idx]);
unmap_ddp_gl(p->kbuf[idx]);
}
if (p->ubuf_nppods) {
t3_free_ppods(d, p->ubuf_tag, p->ubuf_nppods);
p->ubuf_nppods = 0;
}
if (p->ubuf)
unmap_ddp_gl(p->ubuf);
}
void
t3_post_kbuf(struct toepcb *toep, int modulate, int nonblock)
{
struct ddp_state *p = &toep->tp_ddp_state;
t3_set_ddp_tag(toep, p->cur_buf, p->kbuf_tag[p->cur_buf] << 6);
t3_set_ddp_buf(toep, p->cur_buf, 0, p->kbuf[p->cur_buf]->dgl_length);
t3_repost_kbuf(toep, p->cur_buf, modulate, 1, nonblock);
#ifdef T3_TRACE
T3_TRACE1(TIDTB(so),
"t3_post_kbuf: cur_buf = kbuf_idx = %u ", p->cur_buf);
#endif
CTR1(KTR_TOM,
"t3_post_kbuf: cur_buf = kbuf_idx = %u ", p->cur_buf);
}
/*
* Prepare a socket for DDP. Must be called when the socket is known to be
* open.
*/
int
t3_enter_ddp(struct toepcb *toep, unsigned int kbuf_size, unsigned int waitall, int nonblock)
{
int i, err = ENOMEM;
static vm_pindex_t color;
unsigned int nppods, kbuf_pages, idx = 0;
struct ddp_state *p = &toep->tp_ddp_state;
struct tom_data *d = TOM_DATA(toep->tp_toedev);
if (kbuf_size > M_TCB_RX_DDP_BUF0_LEN)
return (EINVAL);
#ifdef notyet
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
#endif
kbuf_pages = (kbuf_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
nppods = pages2ppods(kbuf_pages);
p->kbuf_noinval = !!waitall;
p->kbuf_tag[NUM_DDP_KBUF - 1] = -1;
for (idx = 0; idx < NUM_DDP_KBUF; idx++) {
p->kbuf[idx] =
malloc(sizeof (struct ddp_gather_list) + kbuf_pages *
sizeof(vm_page_t *), M_DEVBUF, M_NOWAIT|M_ZERO);
if (p->kbuf[idx] == NULL)
goto err;
err = t3_alloc_ppods(d, nppods, &p->kbuf_tag[idx]);
if (err) {
printf("t3_alloc_ppods failed err=%d\n", err);
goto err;
}
p->kbuf_nppods[idx] = nppods;
p->kbuf[idx]->dgl_length = kbuf_size;
p->kbuf[idx]->dgl_offset = 0;
p->kbuf[idx]->dgl_nelem = kbuf_pages;
for (i = 0; i < kbuf_pages; ++i) {
p->kbuf[idx]->dgl_pages[i] = vm_page_alloc(NULL, color,
VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | VM_ALLOC_WIRED |
VM_ALLOC_ZERO);
if (p->kbuf[idx]->dgl_pages[i] == NULL) {
p->kbuf[idx]->dgl_nelem = i;
printf("failed to allocate kbuf pages\n");
goto err;
}
}
#ifdef NEED_BUSDMA
/*
* XXX we'll need this for VT-d or any platform with an iommu :-/
*
*/
for (i = 0; i < kbuf_pages; ++i)
p->kbuf[idx]->phys_addr[i] =
pci_map_page(p->pdev, p->kbuf[idx]->pages[i],
0, PAGE_SIZE, PCI_DMA_FROMDEVICE);
#endif
t3_setup_ppods(toep, p->kbuf[idx], nppods, p->kbuf_tag[idx],
p->kbuf[idx]->dgl_length, 0, 0);
}
cxgb_log_tcb(TOEP_T3C_DEV(toep)->adapter, toep->tp_tid);
t3_set_ddp_tag(toep, 0, p->kbuf_tag[0] << 6);
t3_set_ddp_buf(toep, 0, 0, p->kbuf[0]->dgl_length);
t3_repost_kbuf(toep, 0, 0, 1, nonblock);
t3_set_rcv_coalesce_enable(toep,
TOM_TUNABLE(toep->tp_toedev, ddp_rcvcoalesce));
t3_set_dack_mss(toep, TOM_TUNABLE(toep->tp_toedev, delack)>>1);
#ifdef T3_TRACE
T3_TRACE4(TIDTB(so),
"t3_enter_ddp: kbuf_size %u waitall %u tag0 %d tag1 %d",
kbuf_size, waitall, p->kbuf_tag[0], p->kbuf_tag[1]);
#endif
CTR4(KTR_TOM,
"t3_enter_ddp: kbuf_size %u waitall %u tag0 %d tag1 %d",
kbuf_size, waitall, p->kbuf_tag[0], p->kbuf_tag[1]);
cxgb_log_tcb(TOEP_T3C_DEV(toep)->adapter, toep->tp_tid);
return (0);
err:
t3_release_ddp_resources(toep);
t3_cleanup_ddp(toep);
return (err);
}
int
t3_ddp_copy(const struct mbuf *m, int offset, struct uio *uio, int len)
{
int resid_init, err;
struct ddp_gather_list *gl = (struct ddp_gather_list *)m->m_ddp_gl;
resid_init = uio->uio_resid;
if (!gl->dgl_pages)
panic("pages not set\n");
CTR4(KTR_TOM, "t3_ddp_copy: offset=%d dgl_offset=%d cur_offset=%d len=%d",
offset, gl->dgl_offset, m->m_cur_offset, len);
offset += gl->dgl_offset + m->m_cur_offset;
KASSERT(len <= gl->dgl_length,
("len=%d > dgl_length=%d in ddp_copy\n", len, gl->dgl_length));
err = uiomove_fromphys(gl->dgl_pages, offset, len, uio);
return (err);
}
/*
* Allocate n page pods. Returns -1 on failure or the page pod tag.
*/
int
t3_alloc_ppods(struct tom_data *td, unsigned int n, int *ptag)
{
unsigned int i, j;
if (__predict_false(!td->ppod_map)) {
printf("ppod_map not set\n");
return (EINVAL);
}
mtx_lock(&td->ppod_map_lock);
for (i = 0; i < td->nppods; ) {
for (j = 0; j < n; ++j) /* scan ppod_map[i..i+n-1] */
if (td->ppod_map[i + j]) {
i = i + j + 1;
goto next;
}
memset(&td->ppod_map[i], 1, n); /* allocate range */
mtx_unlock(&td->ppod_map_lock);
CTR2(KTR_TOM,
"t3_alloc_ppods: n=%u tag=%u", n, i);
*ptag = i;
return (0);
next: ;
}
mtx_unlock(&td->ppod_map_lock);
return (0);
}
void
t3_free_ppods(struct tom_data *td, unsigned int tag, unsigned int n)
{
/* No need to take ppod_lock here */
memset(&td->ppod_map[tag], 0, n);
}

View File

@ -1,91 +0,0 @@
/**************************************************************************
Copyright (c) 2007, Chelsio Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Neither the name of the Chelsio Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
$FreeBSD$
***************************************************************************/
#ifndef CXGB_DEFS_H_
#define CXGB_DEFS_H_
#define VALIDATE_TID 0
#define TOEPCB(so) ((struct toepcb *)(sototcpcb((so))->t_toe))
#define TOE_DEV(so) (TOEPCB((so))->tp_toedev)
#define toeptoso(toep) ((toep)->tp_tp->t_inpcb->inp_socket)
#define sototoep(so) (sototcpcb((so))->t_toe)
#define TRACE_ENTER printf("%s:%s entered\n", __FUNCTION__, __FILE__)
#define TRACE_EXIT printf("%s:%s:%d exited\n", __FUNCTION__, __FILE__, __LINE__)
#define KTR_TOM KTR_SPARE2
#define KTR_TCB KTR_SPARE3
struct toepcb;
struct listen_ctx;
void cxgb_log_tcb(struct adapter *sc, unsigned int tid);
typedef void (*defer_handler_t)(struct toedev *dev, struct mbuf *m);
void t3tom_register_cpl_handler(unsigned int opcode, cxgb_cpl_handler_func h);
void t3_listen_start(struct toedev *dev, struct socket *so, struct t3cdev *cdev);
void t3_listen_stop(struct toedev *dev, struct socket *so, struct t3cdev *cdev);
int t3_push_frames(struct socket *so, int req_completion);
int t3_connect(struct toedev *tdev, struct socket *so, struct rtentry *rt,
struct sockaddr *nam);
void t3_init_listen_cpl_handlers(void);
int t3_init_cpl_io(void);
void t3_init_wr_tab(unsigned int wr_len);
uint32_t t3_send_rx_credits(struct tcpcb *tp, uint32_t credits, uint32_t dack, int nofail);
void t3_send_rx_modulate(struct toepcb *toep);
void t3_cleanup_rbuf(struct tcpcb *tp, int copied);
void t3_init_socket_ops(void);
void t3_install_socket_ops(struct socket *so);
void t3_disconnect_acceptq(struct socket *listen_so);
void t3_reset_synq(struct listen_ctx *ctx);
void t3_defer_reply(struct mbuf *m, struct toedev *dev, defer_handler_t handler);
struct toepcb *toepcb_alloc(void);
void toepcb_hold(struct toepcb *);
void toepcb_release(struct toepcb *);
void toepcb_init(struct toepcb *);
void t3_set_rcv_coalesce_enable(struct toepcb *toep, int on_off);
void t3_set_dack_mss(struct toepcb *toep, int on);
void t3_set_keepalive(struct toepcb *toep, int on_off);
void t3_set_ddp_tag(struct toepcb *toep, int buf_idx, unsigned int tag);
void t3_set_ddp_buf(struct toepcb *toep, int buf_idx, unsigned int offset,
unsigned int len);
int t3_get_tcb(struct toepcb *toep);
int t3_ctloutput(struct socket *so, struct sockopt *sopt);
#endif

View File

@ -1,76 +1,61 @@
/**************************************************************************
Copyright (c) 2007, Chelsio Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Neither the name of the Chelsio Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***************************************************************************/
/*-
* Copyright (c) 2012 Chelsio Communications, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#ifdef TCP_OFFLOAD
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/module.h>
#include <sys/bus.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#if __FreeBSD_version > 700000
#include <sys/rwlock.h>
#endif
#include <sys/socket.h>
#include <net/if.h>
#include <net/ethernet.h>
#include <net/if_vlan_var.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/if_ether.h>
#include <netinet/toecore.h>
#include <cxgb_include.h>
#include <ulp/tom/cxgb_l2t.h>
#include "cxgb_include.h"
#include "ulp/tom/cxgb_tom.h"
#include "ulp/tom/cxgb_l2t.h"
#define VLAN_NONE 0xfff
#define SDL(s) ((struct sockaddr_dl *)s)
#define RT_ENADDR(sa) ((u_char *)LLADDR(SDL((sa))))
#define rt_expire rt_rmx.rmx_expire
struct llinfo_arp {
struct callout la_timer;
struct rtentry *la_rt;
struct mbuf *la_hold; /* last packet until resolved/timeout */
u_short la_preempt; /* countdown for pre-expiry arps */
u_short la_asked; /* # requests sent */
};
#define VLAN_NONE 0xfff
#define SA(x) ((struct sockaddr *)(x))
#define SIN(x) ((struct sockaddr_in *)(x))
#define SINADDR(x) (SIN(x)->sin_addr.s_addr)
/*
* Module locking notes: There is a RW lock protecting the L2 table as a
* whole plus a spinlock per L2T entry. Entry lookups and allocations happen
* whole plus a mutex per L2T entry. Entry lookups and allocations happen
* under the protection of the table lock, individual entry changes happen
* while holding that entry's spinlock. The table lock nests outside the
* while holding that entry's mutex. The table lock nests outside the
* entry locks. Allocations of new entries take the table lock as writers so
* no other lookups can happen while allocating new entries. Entry updates
* take the table lock as readers so multiple entries can be updated in
@ -78,72 +63,60 @@ struct llinfo_arp {
* and therefore can happen in parallel with entry allocation but no entry
* can change state or increment its ref count during allocation as both of
* these perform lookups.
*
* When acquiring multiple locks, the order is llentry -> L2 table -> L2 entry.
*/
static inline unsigned int
vlan_prio(const struct l2t_entry *e)
{
return e->vlan >> 13;
}
static inline unsigned int
arp_hash(u32 key, int ifindex, const struct l2t_data *d)
{
return jhash_2words(key, ifindex, 0) & (d->nentries - 1);
}
static inline void
neigh_replace(struct l2t_entry *e, struct llentry *neigh)
{
LLE_WLOCK(neigh);
LLE_ADDREF(neigh);
LLE_WUNLOCK(neigh);
if (e->neigh)
LLE_FREE(e->neigh);
e->neigh = neigh;
}
/*
* Set up an L2T entry and send any packets waiting in the arp queue. The
* supplied mbuf is used for the CPL_L2T_WRITE_REQ. Must be called with the
* entry locked.
* Set up an L2T entry and send any packets waiting in the arp queue. Must be
* called with the entry locked.
*/
static int
setup_l2e_send_pending(struct t3cdev *dev, struct mbuf *m,
struct l2t_entry *e)
setup_l2e_send_pending(struct adapter *sc, struct l2t_entry *e)
{
struct mbuf *m;
struct cpl_l2t_write_req *req;
struct port_info *pi = &sc->port[e->smt_idx]; /* smt_idx is port_id */
if (!m) {
if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
return (ENOMEM);
mtx_assert(&e->lock, MA_OWNED);
m = M_GETHDR_OFLD(pi->first_qset, CPL_PRIORITY_CONTROL, req);
if (m == NULL) {
log(LOG_ERR, "%s: no mbuf, can't setup L2 entry at index %d\n",
__func__, e->idx);
return (ENOMEM);
}
/*
* XXX MH_ALIGN
*/
req = mtod(m, struct cpl_l2t_write_req *);
m->m_pkthdr.len = m->m_len = sizeof(*req);
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx));
req->params = htonl(V_L2T_W_IDX(e->idx) | V_L2T_W_IFF(e->smt_idx) |
V_L2T_W_VLAN(e->vlan & EVL_VLID_MASK) |
V_L2T_W_PRIO(vlan_prio(e)));
V_L2T_W_VLAN(e->vlan & EVL_VLID_MASK) |
V_L2T_W_PRIO(EVL_PRIOFTAG(e->vlan)));
memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac));
m_set_priority(m, CPL_PRIORITY_CONTROL);
cxgb_ofld_send(dev, m);
t3_offload_tx(sc, m);
/*
* XXX: We used pi->first_qset to send the L2T_WRITE_REQ. If any mbuf
* on the arpq is going out via another queue set associated with the
* port then it has a bad race with the L2T_WRITE_REQ. Ideally we
* should wait till the reply to the write before draining the arpq.
*/
while (e->arpq_head) {
m = e->arpq_head;
e->arpq_head = m->m_next;
m->m_next = NULL;
cxgb_ofld_send(dev, m);
t3_offload_tx(sc, m);
}
e->arpq_tail = NULL;
e->state = L2T_STATE_VALID;
return 0;
return (0);
}
/*
@ -153,6 +126,8 @@ setup_l2e_send_pending(struct t3cdev *dev, struct mbuf *m,
static inline void
arpq_enqueue(struct l2t_entry *e, struct mbuf *m)
{
mtx_assert(&e->lock, MA_OWNED);
m->m_next = NULL;
if (e->arpq_head)
e->arpq_tail->m_next = m;
@ -161,113 +136,149 @@ arpq_enqueue(struct l2t_entry *e, struct mbuf *m)
e->arpq_tail = m;
}
int
t3_l2t_send_slow(struct t3cdev *dev, struct mbuf *m, struct l2t_entry *e)
static void
resolution_failed_mbuf(struct mbuf *m)
{
struct llentry *lle = e->neigh;
struct sockaddr_in sin;
log(LOG_ERR, "%s: leaked mbuf %p, CPL at %p",
__func__, m, mtod(m, void *));
}
static void
resolution_failed(struct l2t_entry *e)
{
struct mbuf *m;
mtx_assert(&e->lock, MA_OWNED);
while (e->arpq_head) {
m = e->arpq_head;
e->arpq_head = m->m_next;
m->m_next = NULL;
resolution_failed_mbuf(m);
}
e->arpq_tail = NULL;
}
static void
update_entry(struct adapter *sc, struct l2t_entry *e, uint8_t *lladdr,
uint16_t vtag)
{
mtx_assert(&e->lock, MA_OWNED);
/*
* The entry may be in active use (e->refcount > 0) or not. We update
* it even when it's not as this simplifies the case where we decide to
* reuse the entry later.
*/
if (lladdr == NULL &&
(e->state == L2T_STATE_RESOLVING || e->state == L2T_STATE_FAILED)) {
/*
* Never got a valid L2 address for this one. Just mark it as
* failed instead of removing it from the hash (for which we'd
* need to wlock the table).
*/
e->state = L2T_STATE_FAILED;
resolution_failed(e);
return;
} else if (lladdr == NULL) {
/* Valid or already-stale entry was deleted (or expired) */
KASSERT(e->state == L2T_STATE_VALID ||
e->state == L2T_STATE_STALE,
("%s: lladdr NULL, state %d", __func__, e->state));
e->state = L2T_STATE_STALE;
} else {
if (e->state == L2T_STATE_RESOLVING ||
e->state == L2T_STATE_FAILED ||
memcmp(e->dmac, lladdr, ETHER_ADDR_LEN)) {
/* unresolved -> resolved; or dmac changed */
memcpy(e->dmac, lladdr, ETHER_ADDR_LEN);
e->vlan = vtag;
setup_l2e_send_pending(sc, e);
}
e->state = L2T_STATE_VALID;
}
}
static int
resolve_entry(struct adapter *sc, struct l2t_entry *e)
{
struct tom_data *td = sc->tom_softc;
struct toedev *tod = &td->tod;
struct sockaddr_in sin = {0};
uint8_t dmac[ETHER_ADDR_LEN];
uint16_t vtag = EVL_VLID_MASK;
int rc;
bzero(&sin, sizeof(struct sockaddr_in));
sin.sin_family = AF_INET;
sin.sin_len = sizeof(struct sockaddr_in);
sin.sin_addr.s_addr = e->addr;
SINADDR(&sin) = e->addr;
rc = toe_l2_resolve(tod, e->ifp, SA(&sin), dmac, &vtag);
if (rc == EWOULDBLOCK)
return (rc);
mtx_lock(&e->lock);
update_entry(sc, e, rc == 0 ? dmac : NULL, vtag);
mtx_unlock(&e->lock);
return (rc);
}
int
t3_l2t_send_slow(struct adapter *sc, struct mbuf *m, struct l2t_entry *e)
{
CTR2(KTR_CXGB, "send slow on rt=%p eaddr=0x%08x\n", rt, e->addr);
again:
switch (e->state) {
case L2T_STATE_STALE: /* entry is stale, kick off revalidation */
arpresolve(rt->rt_ifp, rt, NULL,
(struct sockaddr *)&sin, e->dmac, &lle);
mtx_lock(&e->lock);
if (e->state == L2T_STATE_STALE)
e->state = L2T_STATE_VALID;
mtx_unlock(&e->lock);
if (resolve_entry(sc, e) != EWOULDBLOCK)
goto again; /* entry updated, re-examine state */
/* Fall through */
case L2T_STATE_VALID: /* fast-path, send the packet on */
return cxgb_ofld_send(dev, m);
return (t3_offload_tx(sc, m));
case L2T_STATE_RESOLVING:
mtx_lock(&e->lock);
if (e->state != L2T_STATE_RESOLVING) { // ARP already completed
if (e->state != L2T_STATE_RESOLVING) {
mtx_unlock(&e->lock);
goto again;
}
arpq_enqueue(e, m);
mtx_unlock(&e->lock);
/*
* Only the first packet added to the arpq should kick off
* resolution. However, because the m_gethdr below can fail,
* we allow each packet added to the arpq to retry resolution
* as a way of recovering from transient memory exhaustion.
* A better way would be to use a work request to retry L2T
* entries when there's no memory.
*/
if (arpresolve(rt->rt_ifp, rt, NULL,
(struct sockaddr *)&sin, e->dmac, &lle) == 0) {
CTR6(KTR_CXGB, "mac=%x:%x:%x:%x:%x:%x\n",
e->dmac[0], e->dmac[1], e->dmac[2], e->dmac[3], e->dmac[4], e->dmac[5]);
if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
return (ENOMEM);
mtx_lock(&e->lock);
if (e->arpq_head)
setup_l2e_send_pending(dev, m, e);
else
m_freem(m);
mtx_unlock(&e->lock);
}
if (resolve_entry(sc, e) == EWOULDBLOCK)
break;
mtx_lock(&e->lock);
if (e->state == L2T_STATE_VALID && e->arpq_head)
setup_l2e_send_pending(sc, e);
if (e->state == L2T_STATE_FAILED)
resolution_failed(e);
mtx_unlock(&e->lock);
break;
case L2T_STATE_FAILED:
resolution_failed_mbuf(m);
return (EHOSTUNREACH);
}
return 0;
return (0);
}
void
t3_l2t_send_event(struct t3cdev *dev, struct l2t_entry *e)
{
struct mbuf *m0;
struct sockaddr_in sin;
sin.sin_family = AF_INET;
sin.sin_len = sizeof(struct sockaddr_in);
sin.sin_addr.s_addr = e->addr;
struct llentry *lle;
if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
return;
rt = e->neigh;
again:
switch (e->state) {
case L2T_STATE_STALE: /* entry is stale, kick off revalidation */
arpresolve(rt->rt_ifp, rt, NULL,
(struct sockaddr *)&sin, e->dmac, &lle);
mtx_lock(&e->lock);
if (e->state == L2T_STATE_STALE) {
e->state = L2T_STATE_VALID;
}
mtx_unlock(&e->lock);
return;
case L2T_STATE_VALID: /* fast-path, send the packet on */
return;
case L2T_STATE_RESOLVING:
mtx_lock(&e->lock);
if (e->state != L2T_STATE_RESOLVING) { // ARP already completed
mtx_unlock(&e->lock);
goto again;
}
mtx_unlock(&e->lock);
/*
* Only the first packet added to the arpq should kick off
* resolution. However, because the alloc_skb below can fail,
* we allow each packet added to the arpq to retry resolution
* as a way of recovering from transient memory exhaustion.
* A better way would be to use a work request to retry L2T
* entries when there's no memory.
*/
arpresolve(rt->rt_ifp, rt, NULL,
(struct sockaddr *)&sin, e->dmac, &lle);
}
return;
}
/*
* Allocate a free L2T entry. Must be called with l2t_data.lock held.
*/
@ -276,15 +287,19 @@ alloc_l2e(struct l2t_data *d)
{
struct l2t_entry *end, *e, **p;
rw_assert(&d->lock, RA_WLOCKED);
if (!atomic_load_acq_int(&d->nfree))
return NULL;
return (NULL);
/* there's definitely a free entry */
for (e = d->rover, end = &d->l2tab[d->nentries]; e != end; ++e)
for (e = d->rover, end = &d->l2tab[d->nentries]; e != end; ++e) {
if (atomic_load_acq_int(&e->refcnt) == 0)
goto found;
}
for (e = &d->l2tab[1]; atomic_load_acq_int(&e->refcnt); ++e) ;
for (e = &d->l2tab[1]; atomic_load_acq_int(&e->refcnt); ++e)
continue;
found:
d->rover = e + 1;
atomic_add_int(&d->nfree, -1);
@ -294,90 +309,37 @@ found:
* presently in the hash table. We need to remove it.
*/
if (e->state != L2T_STATE_UNUSED) {
int hash = arp_hash(e->addr, e->ifindex, d);
int hash = arp_hash(e->addr, e->ifp->if_index, d);
for (p = &d->l2tab[hash].first; *p; p = &(*p)->next)
for (p = &d->l2tab[hash].first; *p; p = &(*p)->next) {
if (*p == e) {
*p = e->next;
break;
}
}
e->state = L2T_STATE_UNUSED;
}
return e;
}
/*
* Called when an L2T entry has no more users. The entry is left in the hash
* table since it is likely to be reused but we also bump nfree to indicate
* that the entry can be reallocated for a different neighbor. We also drop
* the existing neighbor reference in case the neighbor is going away and is
* waiting on our reference.
*
* Because entries can be reallocated to other neighbors once their ref count
* drops to 0 we need to take the entry's lock to avoid races with a new
* incarnation.
*/
void
t3_l2e_free(struct l2t_data *d, struct l2t_entry *e)
{
struct llentry *lle;
mtx_lock(&e->lock);
if (atomic_load_acq_int(&e->refcnt) == 0) { /* hasn't been recycled */
lle = e->neigh;
e->neigh = NULL;
}
mtx_unlock(&e->lock);
atomic_add_int(&d->nfree, 1);
if (lle)
LLE_FREE(lle);
}
/*
* Update an L2T entry that was previously used for the same next hop as neigh.
* Must be called with softirqs disabled.
*/
static inline void
reuse_entry(struct l2t_entry *e, struct llentry *neigh)
{
mtx_lock(&e->lock); /* avoid race with t3_l2t_free */
if (neigh != e->neigh)
neigh_replace(e, neigh);
if (memcmp(e->dmac, RT_ENADDR(neigh->rt_gateway), sizeof(e->dmac)) ||
(neigh->rt_expire > time_uptime))
e->state = L2T_STATE_RESOLVING;
else if (la->la_hold == NULL)
e->state = L2T_STATE_VALID;
else
e->state = L2T_STATE_STALE;
mtx_unlock(&e->lock);
return (e);
}
struct l2t_entry *
t3_l2t_get(struct t3cdev *dev, struct llentry *neigh, struct ifnet *ifp,
struct sockaddr *sa)
t3_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa)
{
struct tom_data *td = pi->adapter->tom_softc;
struct l2t_entry *e;
struct l2t_data *d = L2DATA(dev);
u32 addr = ((struct sockaddr_in *)sa)->sin_addr.s_addr;
int ifidx = ifp->if_index;
int hash = arp_hash(addr, ifidx, d);
unsigned int smt_idx = ((struct port_info *)ifp->if_softc)->port_id;
struct l2t_data *d = td->l2t;
uint32_t addr = SINADDR(sa);
int hash = arp_hash(addr, ifp->if_index, d);
unsigned int smt_idx = pi->port_id;
rw_wlock(&d->lock);
for (e = d->l2tab[hash].first; e; e = e->next)
if (e->addr == addr && e->ifindex == ifidx &&
e->smt_idx == smt_idx) {
for (e = d->l2tab[hash].first; e; e = e->next) {
if (e->addr == addr && e->ifp == ifp && e->smt_idx == smt_idx) {
l2t_hold(d, e);
if (atomic_load_acq_int(&e->refcnt) == 1)
reuse_entry(e, neigh);
goto done;
}
}
/* Need to allocate a new entry */
e = alloc_l2e(d);
@ -385,116 +347,59 @@ t3_l2t_get(struct t3cdev *dev, struct llentry *neigh, struct ifnet *ifp,
mtx_lock(&e->lock); /* avoid race with t3_l2t_free */
e->next = d->l2tab[hash].first;
d->l2tab[hash].first = e;
rw_wunlock(&d->lock);
e->state = L2T_STATE_RESOLVING;
e->addr = addr;
e->ifindex = ifidx;
e->ifp = ifp;
e->smt_idx = smt_idx;
atomic_store_rel_int(&e->refcnt, 1);
e->neigh = NULL;
neigh_replace(e, neigh);
#ifdef notyet
/*
* XXX need to add accessor function for vlan tag
*/
if (neigh->rt_ifp->if_vlantrunk)
e->vlan = VLAN_DEV_INFO(neigh->dev)->vlan_id;
else
#endif
e->vlan = VLAN_NONE;
mtx_unlock(&e->lock);
return (e);
KASSERT(ifp->if_vlantrunk == NULL, ("TOE+VLAN unimplemented."));
e->vlan = VLAN_NONE;
mtx_unlock(&e->lock);
}
done:
rw_wunlock(&d->lock);
return e;
}
/*
* Called when address resolution fails for an L2T entry to handle packets
* on the arpq head. If a packet specifies a failure handler it is invoked,
* otherwise the packets is sent to the TOE.
*
* XXX: maybe we should abandon the latter behavior and just require a failure
* handler.
*/
static void
handle_failed_resolution(struct t3cdev *dev, struct mbuf *arpq)
{
while (arpq) {
struct mbuf *m = arpq;
#ifdef notyet
struct l2t_mbuf_cb *cb = L2T_MBUF_CB(m);
#endif
arpq = m->m_next;
m->m_next = NULL;
#ifdef notyet
if (cb->arp_failure_handler)
cb->arp_failure_handler(dev, m);
else
#endif
cxgb_ofld_send(dev, m);
}
return (e);
}
void
t3_l2t_update(struct t3cdev *dev, struct llentry *neigh,
uint8_t *enaddr, struct sockaddr *sa)
t3_l2_update(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa,
uint8_t *lladdr, uint16_t vtag)
{
struct tom_data *td = t3_tomdata(tod);
struct adapter *sc = tod->tod_softc;
struct l2t_entry *e;
struct mbuf *arpq = NULL;
struct l2t_data *d = L2DATA(dev);
u32 addr = *(u32 *) &((struct sockaddr_in *)sa)->sin_addr;
int hash = arp_hash(addr, ifidx, d);
struct llinfo_arp *la;
struct l2t_data *d = td->l2t;
u32 addr = *(u32 *) &SIN(sa)->sin_addr;
int hash = arp_hash(addr, ifp->if_index, d);
rw_rlock(&d->lock);
for (e = d->l2tab[hash].first; e; e = e->next)
if (e->addr == addr) {
if (e->addr == addr && e->ifp == ifp) {
mtx_lock(&e->lock);
goto found;
}
rw_runlock(&d->lock);
CTR1(KTR_CXGB, "t3_l2t_update: addr=0x%08x not found", addr);
/*
* This is of no interest to us. We've never had an offloaded
* connection to this destination, and we aren't attempting one right
* now.
*/
return;
found:
printf("found 0x%08x\n", addr);
rw_runlock(&d->lock);
memcpy(e->dmac, enaddr, ETHER_ADDR_LEN);
printf("mac=%x:%x:%x:%x:%x:%x\n",
e->dmac[0], e->dmac[1], e->dmac[2], e->dmac[3], e->dmac[4], e->dmac[5]);
if (atomic_load_acq_int(&e->refcnt)) {
if (neigh != e->neigh)
neigh_replace(e, neigh);
la = (struct llinfo_arp *)neigh->rt_llinfo;
if (e->state == L2T_STATE_RESOLVING) {
if (la->la_asked >= 5 /* arp_maxtries */) {
arpq = e->arpq_head;
e->arpq_head = e->arpq_tail = NULL;
} else
setup_l2e_send_pending(dev, NULL, e);
} else {
e->state = L2T_STATE_VALID;
if (memcmp(e->dmac, RT_ENADDR(neigh->rt_gateway), 6))
setup_l2e_send_pending(dev, NULL, e);
}
}
mtx_unlock(&e->lock);
if (arpq)
handle_failed_resolution(dev, arpq);
KASSERT(e->state != L2T_STATE_UNUSED,
("%s: unused entry in the hash.", __func__));
update_entry(sc, e, lladdr, vtag);
mtx_unlock(&e->lock);
}
struct l2t_data *
@ -503,9 +408,9 @@ t3_init_l2t(unsigned int l2t_capacity)
struct l2t_data *d;
int i, size = sizeof(*d) + l2t_capacity * sizeof(struct l2t_entry);
d = cxgb_alloc_mem(size);
d = malloc(size, M_CXGB, M_NOWAIT | M_ZERO);
if (!d)
return NULL;
return (NULL);
d->nentries = l2t_capacity;
d->rover = &d->l2tab[1]; /* entry 0 is not used */
@ -515,10 +420,10 @@ t3_init_l2t(unsigned int l2t_capacity)
for (i = 0; i < l2t_capacity; ++i) {
d->l2tab[i].idx = i;
d->l2tab[i].state = L2T_STATE_UNUSED;
mtx_init(&d->l2tab[i].lock, "L2TAB", NULL, MTX_DEF);
mtx_init(&d->l2tab[i].lock, "L2T_E", NULL, MTX_DEF);
atomic_store_rel_int(&d->l2tab[i].refcnt, 0);
}
return d;
return (d);
}
void
@ -530,5 +435,26 @@ t3_free_l2t(struct l2t_data *d)
for (i = 0; i < d->nentries; ++i)
mtx_destroy(&d->l2tab[i].lock);
cxgb_free_mem(d);
free(d, M_CXGB);
}
static int
do_l2t_write_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
{
struct cpl_l2t_write_rpl *rpl = mtod(m, void *);
if (rpl->status != CPL_ERR_NONE)
log(LOG_ERR,
"Unexpected L2T_WRITE_RPL status %u for entry %u\n",
rpl->status, GET_TID(rpl));
m_freem(m);
return (0);
}
void
t3_init_l2t_cpl_handlers(struct adapter *sc)
{
t3_register_cpl_handler(sc, CPL_L2T_WRITE_RPL, do_l2t_write_rpl);
}
#endif

View File

@ -1,6 +1,6 @@
/**************************************************************************
Copyright (c) 2007-2008, Chelsio Inc.
Copyright (c) 2007-2009, Chelsio Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
@ -31,26 +31,19 @@ $FreeBSD$
#ifndef _CHELSIO_L2T_H
#define _CHELSIO_L2T_H
#include <ulp/toecore/cxgb_toedev.h>
#include <sys/lock.h>
#if __FreeBSD_version > 700000
#include <sys/rwlock.h>
#else
#define rwlock mtx
#define rw_wlock(x) mtx_lock((x))
#define rw_wunlock(x) mtx_unlock((x))
#define rw_rlock(x) mtx_lock((x))
#define rw_runlock(x) mtx_unlock((x))
#define rw_init(x, str) mtx_init((x), (str), NULL, MTX_DEF)
#define rw_destroy(x) mtx_destroy((x))
#endif
enum {
L2T_STATE_VALID, /* entry is up to date */
L2T_STATE_STALE, /* entry may be used but needs revalidation */
L2T_STATE_RESOLVING, /* entry needs address resolution */
L2T_STATE_UNUSED /* entry not in use */
L2T_SIZE = 2048
};
enum {
L2T_STATE_VALID, /* entry is up to date */
L2T_STATE_STALE, /* entry may be used but needs revalidation */
L2T_STATE_RESOLVING, /* entry needs address resolution */
L2T_STATE_FAILED, /* failed to resolve */
L2T_STATE_UNUSED /* entry not in use */
};
/*
@ -64,18 +57,17 @@ enum {
struct l2t_entry {
uint16_t state; /* entry state */
uint16_t idx; /* entry index */
uint32_t addr; /* dest IP address */
int ifindex; /* neighbor's net_device's ifindex */
uint32_t addr; /* nexthop IP address */
struct ifnet *ifp; /* outgoing interface */
uint16_t smt_idx; /* SMT index */
uint16_t vlan; /* VLAN TCI (id: bits 0-11, prio: 13-15 */
struct llentry *neigh; /* associated neighbour */
struct l2t_entry *first; /* start of hash chain */
struct l2t_entry *next; /* next l2t_entry on chain */
struct mbuf *arpq_head; /* queue of packets awaiting resolution */
struct mbuf *arpq_tail;
struct mtx lock;
volatile uint32_t refcnt; /* entry reference count */
uint8_t dmac[6]; /* neighbour's MAC address */
uint8_t dmac[ETHER_ADDR_LEN]; /* nexthop's MAC address */
};
struct l2t_data {
@ -86,76 +78,37 @@ struct l2t_data {
struct l2t_entry l2tab[0];
};
typedef void (*arp_failure_handler_func)(struct t3cdev *dev,
struct mbuf *m);
void t3_l2e_free(struct l2t_data *, struct l2t_entry *e);
void t3_l2_update(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa,
uint8_t *lladdr, uint16_t vtag);
struct l2t_entry *t3_l2t_get(struct port_info *, struct ifnet *,
struct sockaddr *);
int t3_l2t_send_slow(struct adapter *, struct mbuf *, struct l2t_entry *);
struct l2t_data *t3_init_l2t(unsigned int);
void t3_free_l2t(struct l2t_data *);
void t3_init_l2t_cpl_handlers(struct adapter *);
typedef void (*opaque_arp_failure_handler_func)(void *dev,
struct mbuf *m);
/*
* Callback stored in an skb to handle address resolution failure.
*/
struct l2t_mbuf_cb {
arp_failure_handler_func arp_failure_handler;
};
/*
* XXX
*/
#define L2T_MBUF_CB(skb) ((struct l2t_mbuf_cb *)(skb)->cb)
static __inline void set_arp_failure_handler(struct mbuf *m,
arp_failure_handler_func hnd)
static inline int
l2t_send(struct adapter *sc, struct mbuf *m, struct l2t_entry *e)
{
m->m_pkthdr.header = (opaque_arp_failure_handler_func)hnd;
if (__predict_true(e->state == L2T_STATE_VALID))
return t3_offload_tx(sc, m);
else
return t3_l2t_send_slow(sc, m, e);
}
/*
* Getting to the L2 data from an offload device.
*/
#define L2DATA(dev) ((dev)->l2opt)
void t3_l2e_free(struct l2t_data *d, struct l2t_entry *e);
void t3_l2t_update(struct t3cdev *dev, struct rtentry *rt, uint8_t *enaddr, struct sockaddr *sa);
struct l2t_entry *t3_l2t_get(struct t3cdev *dev, struct rtentry *neigh,
struct ifnet *ifp, struct sockaddr *sa);
int t3_l2t_send_slow(struct t3cdev *dev, struct mbuf *m,
struct l2t_entry *e);
void t3_l2t_send_event(struct t3cdev *dev, struct l2t_entry *e);
struct l2t_data *t3_init_l2t(unsigned int l2t_capacity);
void t3_free_l2t(struct l2t_data *d);
#ifdef CONFIG_PROC_FS
int t3_l2t_proc_setup(struct proc_dir_entry *dir, struct l2t_data *d);
void t3_l2t_proc_free(struct proc_dir_entry *dir);
#else
#define l2t_proc_setup(dir, d) 0
#define l2t_proc_free(dir)
#endif
int cxgb_ofld_send(struct t3cdev *dev, struct mbuf *m);
static inline int l2t_send(struct t3cdev *dev, struct mbuf *m,
struct l2t_entry *e)
static inline void
l2t_release(struct l2t_data *d, struct l2t_entry *e)
{
if (__predict_true(e->state == L2T_STATE_VALID)) {
return cxgb_ofld_send(dev, (struct mbuf *)m);
}
return t3_l2t_send_slow(dev, (struct mbuf *)m, e);
}
static inline void l2t_release(struct l2t_data *d, struct l2t_entry *e)
{
if (atomic_fetchadd_int(&e->refcnt, -1) == 1)
t3_l2e_free(d, e);
}
static inline void l2t_hold(struct l2t_data *d, struct l2t_entry *e)
{
if (atomic_fetchadd_int(&e->refcnt, 1) == 1) /* 0 -> 1 transition */
if (atomic_fetchadd_int(&e->refcnt, -1) == 1) /* 1 -> 0 transition */
atomic_add_int(&d->nfree, 1);
}
static inline void
l2t_hold(struct l2t_data *d, struct l2t_entry *e)
{
if (atomic_fetchadd_int(&e->refcnt, 1) == 0) /* 0 -> 1 transition */
atomic_add_int(&d->nfree, -1);
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,181 +0,0 @@
/**************************************************************************
Copyright (c) 2007, Chelsio Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Neither the name of the Chelsio Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
$FreeBSD$
***************************************************************************/
#ifndef T3_DDP_H
#define T3_DDP_H
/* Should be 1 or 2 indicating single or double kernel buffers. */
#define NUM_DDP_KBUF 2
/* min receive window for a connection to be considered for DDP */
#define MIN_DDP_RCV_WIN (48 << 10)
/* amount of Rx window not available to DDP to avoid window exhaustion */
#define DDP_RSVD_WIN (16 << 10)
/* # of sentinel invalid page pods at the end of a group of valid page pods */
#define NUM_SENTINEL_PPODS 0
/* # of pages a pagepod can hold without needing another pagepod */
#define PPOD_PAGES 4
/* page pods are allocated in groups of this size (must be power of 2) */
#define PPOD_CLUSTER_SIZE 16
/* for each TID we reserve this many page pods up front */
#define RSVD_PPODS_PER_TID 1
struct pagepod {
uint32_t pp_vld_tid;
uint32_t pp_pgsz_tag_color;
uint32_t pp_max_offset;
uint32_t pp_page_offset;
uint64_t pp_rsvd;
uint64_t pp_addr[5];
};
#define PPOD_SIZE sizeof(struct pagepod)
#define S_PPOD_TID 0
#define M_PPOD_TID 0xFFFFFF
#define V_PPOD_TID(x) ((x) << S_PPOD_TID)
#define S_PPOD_VALID 24
#define V_PPOD_VALID(x) ((x) << S_PPOD_VALID)
#define F_PPOD_VALID V_PPOD_VALID(1U)
#define S_PPOD_COLOR 0
#define M_PPOD_COLOR 0x3F
#define V_PPOD_COLOR(x) ((x) << S_PPOD_COLOR)
#define S_PPOD_TAG 6
#define M_PPOD_TAG 0xFFFFFF
#define V_PPOD_TAG(x) ((x) << S_PPOD_TAG)
#define S_PPOD_PGSZ 30
#define M_PPOD_PGSZ 0x3
#define V_PPOD_PGSZ(x) ((x) << S_PPOD_PGSZ)
#include <vm/vm.h>
#include <vm/vm_page.h>
#include <machine/bus.h>
/* DDP gather lists can specify an offset only for the first page. */
struct ddp_gather_list {
unsigned int dgl_length;
unsigned int dgl_offset;
unsigned int dgl_nelem;
vm_page_t dgl_pages[0];
};
struct ddp_buf_state {
unsigned int cur_offset; /* offset of latest DDP notification */
unsigned int flags;
struct ddp_gather_list *gl;
};
struct ddp_state {
struct ddp_buf_state buf_state[2]; /* per buffer state */
int cur_buf;
unsigned short kbuf_noinval;
unsigned short kbuf_idx; /* which HW buffer is used for kbuf */
struct ddp_gather_list *ubuf;
int user_ddp_pending;
unsigned int ubuf_nppods; /* # of page pods for buffer 1 */
unsigned int ubuf_tag;
unsigned int ubuf_ddp_ready;
int cancel_ubuf;
int get_tcb_count;
unsigned int kbuf_posted;
unsigned int kbuf_nppods[NUM_DDP_KBUF];
unsigned int kbuf_tag[NUM_DDP_KBUF];
struct ddp_gather_list *kbuf[NUM_DDP_KBUF]; /* kernel buffer for DDP prefetch */
};
/* buf_state flags */
enum {
DDP_BF_NOINVAL = 1 << 0, /* buffer is set to NO_INVALIDATE */
DDP_BF_NOCOPY = 1 << 1, /* DDP to final dest, no copy needed */
DDP_BF_NOFLIP = 1 << 2, /* buffer flips after GET_TCB_RPL */
DDP_BF_PSH = 1 << 3, /* set in skb->flags if the a DDP was
completed with a segment having the
PSH flag set */
DDP_BF_NODATA = 1 << 4, /* buffer completed before filling */
};
#include <ulp/tom/cxgb_toepcb.h>
struct sockbuf;
/*
* Returns 1 if a UBUF DMA buffer might be active.
*/
static inline int
t3_ddp_ubuf_pending(struct toepcb *toep)
{
struct ddp_state *p = &toep->tp_ddp_state;
/* When the TOM_TUNABLE(ddp) is enabled, we're always in ULP_MODE DDP,
* but DDP_STATE() is only valid if the connection actually enabled
* DDP.
*/
if (p->kbuf[0] == NULL)
return (0);
return (p->buf_state[0].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY)) ||
(p->buf_state[1].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY));
}
int t3_setup_ppods(struct toepcb *toep, const struct ddp_gather_list *gl,
unsigned int nppods, unsigned int tag, unsigned int maxoff,
unsigned int pg_off, unsigned int color);
int t3_alloc_ppods(struct tom_data *td, unsigned int n, int *tag);
void t3_free_ppods(struct tom_data *td, unsigned int tag, unsigned int n);
void t3_free_ddp_gl(struct ddp_gather_list *gl);
int t3_ddp_copy(const struct mbuf *m, int offset, struct uio *uio, int len);
//void t3_repost_kbuf(struct socket *so, int modulate, int activate);
void t3_post_kbuf(struct toepcb *toep, int modulate, int nonblock);
int t3_post_ubuf(struct toepcb *toep, const struct uio *uio, int nonblock,
int rcv_flags, int modulate, int post_kbuf);
void t3_cancel_ubuf(struct toepcb *toep, struct sockbuf *rcv);
int t3_overlay_ubuf(struct toepcb *toep, struct sockbuf *rcv,
const struct uio *uio, int nonblock,
int rcv_flags, int modulate, int post_kbuf);
int t3_enter_ddp(struct toepcb *toep, unsigned int kbuf_size, unsigned int waitall, int nonblock);
void t3_cleanup_ddp(struct toepcb *toep);
void t3_release_ddp_resources(struct toepcb *toep);
void t3_cancel_ddpbuf(struct toepcb *, unsigned int bufidx);
void t3_overlay_ddpbuf(struct toepcb *, unsigned int bufidx, unsigned int tag0,
unsigned int tag1, unsigned int len);
void t3_setup_ddpbufs(struct toepcb *, unsigned int len0, unsigned int offset0,
unsigned int len1, unsigned int offset1,
uint64_t ddp_flags, uint64_t flag_mask, int modulate);
#endif /* T3_DDP_H */

View File

@ -1,47 +0,0 @@
/*-
* Copyright (c) 2007, Chelsio Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Neither the name of the Chelsio Corporation nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef CXGB_TCP_H_
#define CXGB_TCP_H_
#ifdef TCP_USRREQS_OVERLOAD
struct tcpcb *cxgb_tcp_drop(struct tcpcb *tp, int errno);
#else
#define cxgb_tcp_drop tcp_drop
#endif
void cxgb_tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip);
struct tcpcb *cxgb_tcp_close(struct tcpcb *tp);
extern struct pr_usrreqs cxgb_tcp_usrreqs;
#ifdef INET6
extern struct pr_usrreqs cxgb_tcp6_usrreqs;
#endif
#include <sys/sysctl.h>
SYSCTL_DECL(_net_inet_tcp_cxgb);
#endif /* CXGB_TCP_H_ */

View File

@ -1,97 +0,0 @@
/*-
* Copyright (c) 2007, Chelsio Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Neither the name of the Chelsio Corporation nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
/*
* grab bag of accessor routines that will either be moved to netinet
* or removed
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/types.h>
#include <sys/malloc.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
#include <sys/mbuf.h>
#include <sys/sockopt.h>
#include <sys/sockbuf.h>
#include <sys/socket.h>
#include <net/if.h>
#include <net/if_types.h>
#include <net/if_var.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/in_pcb.h>
#include <netinet/tcp.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_offload.h>
#include <netinet/tcp_syncache.h>
#include <netinet/toedev.h>
#include <ulp/tom/cxgb_tcp_offload.h>
/*
* This file contains code as a short-term staging area before it is moved in
* to sys/netinet/tcp_offload.c
*/
void
sockbuf_lock(struct sockbuf *sb)
{
SOCKBUF_LOCK(sb);
}
void
sockbuf_lock_assert(struct sockbuf *sb)
{
SOCKBUF_LOCK_ASSERT(sb);
}
void
sockbuf_unlock(struct sockbuf *sb)
{
SOCKBUF_UNLOCK(sb);
}
int
sockbuf_sbspace(struct sockbuf *sb)
{
return (sbspace(sb));
}

View File

@ -1,14 +0,0 @@
/* $FreeBSD$ */
#ifndef CXGB_TCP_OFFLOAD_H_
#define CXGB_TCP_OFFLOAD_H_
struct sockbuf;
void sockbuf_lock(struct sockbuf *);
void sockbuf_lock_assert(struct sockbuf *);
void sockbuf_unlock(struct sockbuf *);
int sockbuf_sbspace(struct sockbuf *);
#endif /* CXGB_TCP_OFFLOAD_H_ */

View File

@ -1,5 +1,5 @@
/*-
* Copyright (c) 2007-2008, Chelsio Inc.
* Copyright (c) 2007-2009, Chelsio Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -32,88 +32,63 @@
#include <sys/condvar.h>
#include <sys/mbufq.h>
#define TP_DATASENT (1 << 0)
#define TP_TX_WAIT_IDLE (1 << 1)
#define TP_FIN_SENT (1 << 2)
#define TP_ABORT_RPL_PENDING (1 << 3)
#define TP_ABORT_SHUTDOWN (1 << 4)
#define TP_ABORT_RPL_RCVD (1 << 5)
#define TP_ABORT_REQ_RCVD (1 << 6)
#define TP_ATTACHED (1 << 7)
#define TP_CPL_DONE (1 << 8)
#define TP_IS_A_SYNQ_ENTRY (1 << 9)
#define TP_ABORT_RPL_SENT (1 << 10)
#define TP_SEND_FIN (1 << 11)
struct toepcb {
struct toedev *tp_toedev;
TAILQ_ENTRY(toepcb) link; /* toep_list */
int tp_flags;
struct toedev *tp_tod;
struct l2t_entry *tp_l2t;
unsigned int tp_tid;
int tp_tid;
int tp_wr_max;
int tp_wr_avail;
int tp_wr_unacked;
int tp_delack_mode;
int tp_mtu_idx;
int tp_ulp_mode;
int tp_qset_idx;
int tp_mss_clamp;
int tp_qset;
int tp_flags;
int tp_enqueued_bytes;
int tp_page_count;
int tp_state;
int tp_enqueued;
int tp_rx_credits;
tcp_seq tp_iss;
tcp_seq tp_delack_seq;
tcp_seq tp_rcv_wup;
tcp_seq tp_copied_seq;
uint64_t tp_write_seq;
struct inpcb *tp_inp;
struct mbuf *tp_m_last;
volatile int tp_refcount;
vm_page_t *tp_pages;
struct tcpcb *tp_tp;
struct mbuf *tp_m_last;
bus_dma_tag_t tp_tx_dmat;
bus_dma_tag_t tp_rx_dmat;
bus_dmamap_t tp_dmamap;
LIST_ENTRY(toepcb) synq_entry;
struct mbuf_head wr_list;
struct mbuf_head out_of_order_queue;
struct ddp_state tp_ddp_state;
struct cv tp_cv;
};
static inline void
reset_wr_list(struct toepcb *toep)
{
mbufq_init(&toep->wr_list);
}
static inline void
purge_wr_queue(struct toepcb *toep)
{
struct mbuf *m;
while ((m = mbufq_dequeue(&toep->wr_list)) != NULL)
m_freem(m);
}
static inline void
enqueue_wr(struct toepcb *toep, struct mbuf *m)
{
mbufq_tail(&toep->wr_list, m);
}
static inline struct mbuf *
peek_wr(const struct toepcb *toep)
{
return (mbufq_peek(&toep->wr_list));
}
static inline struct mbuf *
dequeue_wr(struct toepcb *toep)
{
return (mbufq_dequeue(&toep->wr_list));
}
#define wr_queue_walk(toep, m) \
for (m = peek_wr(toep); m; m = m->m_nextpkt)
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,6 @@
/**************************************************************************
Copyright (c) 2007, Chelsio Inc.
Copyright (c) 2007, 2009 Chelsio Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
@ -33,128 +32,248 @@ $FreeBSD$
#ifndef CXGB_TOM_H_
#define CXGB_TOM_H_
#include <sys/protosw.h>
#include <netinet/toedev.h>
#include <netinet/toecore.h>
#define LISTEN_INFO_HASH_SIZE 32
MALLOC_DECLARE(M_CXGB);
struct listen_info {
struct listen_info *next; /* Link to next entry */
struct socket *so; /* The listening socket */
unsigned int stid; /* The server TID */
};
#define KTR_CXGB KTR_SPARE3
#define LISTEN_HASH_SIZE 32
/*
* TOM tunable parameters. They can be manipulated through sysctl(2) or /proc.
* Holds the size, base address, free list start, etc of the TID, server TID,
* and active-open TID tables for a offload device.
* The tables themselves are allocated dynamically.
*/
struct tom_tunables {
int max_host_sndbuf; // max host RAM consumed by a sndbuf
int tx_hold_thres; // push/pull threshold for non-full TX sk_buffs
int max_wrs; // max # of outstanding WRs per connection
int rx_credit_thres; // min # of RX credits needed for RX_DATA_ACK
int cong_alg; // Congestion control algorithm
int mss; // max TX_DATA WR payload size
int delack; // delayed ACK control
int max_conn; // maximum number of offloaded connections
int soft_backlog_limit; // whether the listen backlog limit is soft
int ddp; // whether to put new connections in DDP mode
int ddp_thres; // min recvmsg size before activating DDP
int ddp_copy_limit; // capacity of kernel DDP buffer
int ddp_push_wait; // whether blocking DDP waits for PSH flag
int ddp_rcvcoalesce; // whether receive coalescing is enabled
int zcopy_sosend_enabled; // < is never zcopied
int zcopy_sosend_partial_thres; // < is never zcopied
int zcopy_sosend_partial_copy; // bytes copied in partial zcopy
int zcopy_sosend_thres;// >= are mostly zcopied
int zcopy_sosend_copy; // bytes coped in zcopied
int zcopy_sosend_ret_pending_dma;// pot. return while pending DMA
int activated; // TOE engine activation state
struct tid_info {
void **tid_tab;
unsigned int ntids;
volatile unsigned int tids_in_use;
union listen_entry *stid_tab;
unsigned int nstids;
unsigned int stid_base;
union active_open_entry *atid_tab;
unsigned int natids;
unsigned int atid_base;
/*
* The following members are accessed R/W so we put them in their own
* cache lines. TOM_XXX: actually do what is said here.
*
* XXX We could combine the atid fields above with the lock here since
* atids are use once (unlike other tids). OTOH the above fields are
* usually in cache due to tid_tab.
*/
struct mtx atid_lock;
union active_open_entry *afree;
unsigned int atids_in_use;
struct mtx stid_lock;
union listen_entry *sfree;
unsigned int stids_in_use;
};
struct tom_data {
TAILQ_ENTRY(tom_data) entry;
struct t3cdev *cdev;
struct pci_dev *pdev;
struct toedev tdev;
struct toedev tod;
struct cxgb_client *client;
struct tom_tunables conf;
struct tom_sysctl_table *sysctl;
/*
* toepcb's associated with this TOE device are either on the
* toep list or in the synq of a listening socket in lctx hash.
*/
struct mtx toep_list_lock;
TAILQ_HEAD(, toepcb) toep_list;
struct l2t_data *l2t;
struct tid_info tid_maps;
/*
* The next three locks listen_lock, deferq.lock, and tid_release_lock
* are used rarely so we let them potentially share a cacheline.
* The next two locks listen_lock, and tid_release_lock are used rarely
* so we let them potentially share a cacheline.
*/
struct listen_info *listen_hash_tab[LISTEN_INFO_HASH_SIZE];
struct mtx listen_lock;
LIST_HEAD(, listen_ctx) *listen_hash;
u_long listen_mask;
int lctx_count; /* # of lctx in the hash table */
struct mtx lctx_hash_lock;
struct mbuf_head deferq;
struct task deferq_task;
struct socket **tid_release_list;
void **tid_release_list;
struct mtx tid_release_lock;
struct task tid_release_task;
volatile int tx_dma_pending;
unsigned int ddp_llimit;
unsigned int ddp_ulimit;
unsigned int rx_page_size;
u8 *ppod_map;
unsigned int nppods;
struct mtx ppod_map_lock;
struct adap_ports *ports;
struct taskqueue *tq;
};
struct synq_entry {
TAILQ_ENTRY(synq_entry) link; /* listen_ctx's synq link */
int flags; /* same as toepcb's tp_flags */
int tid;
struct mbuf *m; /* backpointer to containing mbuf */
struct listen_ctx *lctx; /* backpointer to listen ctx */
struct cpl_pass_establish *cpl;
struct toepcb *toep;
struct l2t_entry *e;
uint32_t iss;
uint32_t ts;
uint32_t opt0h;
uint32_t qset;
int rx_credits;
volatile u_int refcnt;
#define RPL_OK 0 /* ok to reply */
#define RPL_DONE 1 /* replied already */
#define RPL_DONT 2 /* don't reply */
volatile u_int reply; /* see above. */
};
#define LCTX_RPL_PENDING 1 /* waiting for CPL_PASS_OPEN_RPL */
struct listen_ctx {
struct socket *lso;
struct tom_data *tom_data;
int ulp_mode;
LIST_HEAD(, toepcb) synq_head;
LIST_ENTRY(listen_ctx) link; /* listen hash linkage */
volatile int refcnt;
int stid;
int flags;
struct inpcb *inp; /* listening socket's inp */
int qset;
TAILQ_HEAD(, synq_entry) synq;
};
#define TOM_DATA(dev) (*(struct tom_data **)&(dev)->tod_l4opt)
#define T3C_DEV(sk) ((TOM_DATA(TOE_DEV(sk)))->cdev)
#define TOEP_T3C_DEV(toep) (TOM_DATA(toep->tp_toedev)->cdev)
#define TOM_TUNABLE(dev, param) (TOM_DATA(dev)->conf.param)
void t3_process_tid_release_list(void *data, int pending);
#define TP_DATASENT (1 << 0)
#define TP_TX_WAIT_IDLE (1 << 1)
#define TP_FIN_SENT (1 << 2)
#define TP_ABORT_RPL_PENDING (1 << 3)
#define TP_ABORT_SHUTDOWN (1 << 4)
#define TP_ABORT_RPL_RCVD (1 << 5)
#define TP_ABORT_REQ_RCVD (1 << 6)
#define TP_CLOSE_CON_REQUESTED (1 << 7)
#define TP_SYN_RCVD (1 << 8)
#define TP_ESTABLISHED (1 << 9)
void t3_init_tunables(struct tom_data *t);
void t3_sysctl_register(struct adapter *sc, const struct tom_tunables *p);
static __inline struct mbuf *
m_gethdr_nofail(int len)
static inline struct tom_data *
t3_tomdata(struct toedev *tod)
{
struct mbuf *m;
m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL) {
panic("implement lowmem cache\n");
}
KASSERT(len < MHLEN, ("requested header size too large for mbuf"));
m->m_pkthdr.len = m->m_len = len;
return (m);
return (member2struct(tom_data, tod, tod));
}
union listen_entry {
void *ctx;
union listen_entry *next;
};
union active_open_entry {
void *ctx;
union active_open_entry *next;
};
/*
* Map an ATID or STID to their entries in the corresponding TID tables.
*/
static inline union active_open_entry *atid2entry(const struct tid_info *t,
unsigned int atid)
{
return &t->atid_tab[atid - t->atid_base];
}
static inline union listen_entry *stid2entry(const struct tid_info *t,
unsigned int stid)
{
return &t->stid_tab[stid - t->stid_base];
}
/*
* Find the connection corresponding to a TID.
*/
static inline void *lookup_tid(const struct tid_info *t, unsigned int tid)
{
void *p;
if (tid >= t->ntids)
return (NULL);
p = t->tid_tab[tid];
if (p < (void *)t->tid_tab || p >= (void *)&t->atid_tab[t->natids])
return (p);
return (NULL);
}
/*
* Find the connection corresponding to a server TID.
*/
static inline void *lookup_stid(const struct tid_info *t, unsigned int tid)
{
void *p;
if (tid < t->stid_base || tid >= t->stid_base + t->nstids)
return (NULL);
p = stid2entry(t, tid)->ctx;
if (p < (void *)t->tid_tab || p >= (void *)&t->atid_tab[t->natids])
return (p);
return (NULL);
}
/*
* Find the connection corresponding to an active-open TID.
*/
static inline void *lookup_atid(const struct tid_info *t, unsigned int tid)
{
void *p;
if (tid < t->atid_base || tid >= t->atid_base + t->natids)
return (NULL);
p = atid2entry(t, tid)->ctx;
if (p < (void *)t->tid_tab || p >= (void *)&t->atid_tab[t->natids])
return (p);
return (NULL);
}
static inline uint32_t
calc_opt2(int cpu_idx)
{
uint32_t opt2 = F_CPU_INDEX_VALID | V_CPU_INDEX(cpu_idx);
/* 3 = highspeed CC algorithm */
opt2 |= V_FLAVORS_VALID(1) | V_CONG_CONTROL_FLAVOR(3) |
V_PACING_FLAVOR(1);
/* coalesce and push bit semantics */
opt2 |= F_RX_COALESCE_VALID | V_RX_COALESCE(3);
return (htobe32(opt2));
}
/* cxgb_tom.c */
struct toepcb *toepcb_alloc(struct toedev *);
void toepcb_free(struct toepcb *);
/* cxgb_cpl_io.c */
void t3_init_cpl_io(struct adapter *);
int t3_push_frames(struct socket *, int);
int t3_connect(struct toedev *, struct socket *, struct rtentry *,
struct sockaddr *);
int t3_tod_output(struct toedev *, struct tcpcb *);
int t3_send_rst(struct toedev *, struct tcpcb *);
int t3_send_fin(struct toedev *, struct tcpcb *);
void insert_tid(struct tom_data *, void *, unsigned int);
void update_tid(struct tom_data *, void *, unsigned int);
void remove_tid(struct tom_data *, unsigned int);
uint32_t calc_opt0h(struct socket *, int, int, struct l2t_entry *);
uint32_t calc_opt0l(struct socket *, int);
void queue_tid_release(struct toedev *, unsigned int);
void offload_socket(struct socket *, struct toepcb *);
void undo_offload_socket(struct socket *);
int select_rcv_wscale(void);
unsigned long select_rcv_wnd(struct socket *);
int find_best_mtu_idx(struct adapter *, struct in_conninfo *, int);
void make_established(struct socket *, uint32_t, uint32_t, uint16_t);
void t3_rcvd(struct toedev *, struct tcpcb *);
void t3_pcb_detach(struct toedev *, struct tcpcb *);
void send_abort_rpl(struct toedev *, int, int);
void release_tid(struct toedev *, unsigned int, int);
/* cxgb_listen.c */
void t3_init_listen_cpl_handlers(struct adapter *);
int t3_listen_start(struct toedev *, struct tcpcb *);
int t3_listen_stop(struct toedev *, struct tcpcb *);
void t3_syncache_added(struct toedev *, void *);
void t3_syncache_removed(struct toedev *, void *);
int t3_syncache_respond(struct toedev *, void *, struct mbuf *);
int do_abort_req_synqe(struct sge_qset *, struct rsp_desc *, struct mbuf *);
int do_abort_rpl_synqe(struct sge_qset *, struct rsp_desc *, struct mbuf *);
void t3_offload_socket(struct toedev *, void *, struct socket *);
#endif

View File

@ -1,140 +0,0 @@
/**************************************************************************
Copyright (c) 2007, Chelsio Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Neither the name of the Chelsio Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***************************************************************************/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/fcntl.h>
#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/mbuf.h>
#include <sys/module.h>
#include <sys/mutex.h>
#include <sys/sockopt.h>
#include <sys/sockstate.h>
#include <sys/sockbuf.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <net/if.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/in_pcb.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <cxgb_osdep.h>
#include <sys/mbufq.h>
#include <netinet/tcp.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_fsm.h>
#include <net/route.h>
#include <t3cdev.h>
#include <common/cxgb_firmware_exports.h>
#include <common/cxgb_tcb.h>
#include <common/cxgb_ctl_defs.h>
#include <common/cxgb_t3_cpl.h>
#include <cxgb_offload.h>
#include <cxgb_include.h>
#include <ulp/toecore/cxgb_toedev.h>
#include <ulp/tom/cxgb_tom.h>
#include <ulp/tom/cxgb_defs.h>
#include <ulp/tom/cxgb_t3_ddp.h>
/* Avoid clutter in the hw.* space, keep all toe tunables within hw.cxgb */
SYSCTL_DECL(_hw_cxgb);
static SYSCTL_NODE(_hw_cxgb, OID_AUTO, toe, CTLFLAG_RD, 0, "TOE parameters");
static struct tom_tunables default_tunable_vals = {
.max_host_sndbuf = 32 * 1024,
.tx_hold_thres = 0,
.max_wrs = 15,
.rx_credit_thres = 15 * 1024,
.cong_alg = -1,
.mss = 16384,
.delack = 1,
.max_conn = -1,
.soft_backlog_limit = 0,
.ddp = 1,
.ddp_thres = 14 * 4096,
.ddp_copy_limit = 13 * 4096,
.ddp_push_wait = 1,
.ddp_rcvcoalesce = 0,
.zcopy_sosend_enabled = 0,
.zcopy_sosend_partial_thres = 40960,
.zcopy_sosend_partial_copy = 4096 * 3,
.zcopy_sosend_thres = 128 * 1024,
.zcopy_sosend_copy = 4096 * 2,
.zcopy_sosend_ret_pending_dma = 1,
.activated = 1,
};
static int activated = 1;
TUNABLE_INT("hw.cxgb.toe.activated", &activated);
SYSCTL_UINT(_hw_cxgb_toe, OID_AUTO, activated, CTLFLAG_RDTUN, &activated, 0,
"enable TOE at init time");
static int ddp = 1;
TUNABLE_INT("hw.cxgb.toe.ddp", &ddp);
SYSCTL_UINT(_hw_cxgb_toe, OID_AUTO, ddp, CTLFLAG_RDTUN, &ddp, 0, "enable DDP");
void
t3_init_tunables(struct tom_data *t)
{
t->conf = default_tunable_vals;
/* Adjust tunables */
t->conf.activated = activated;
t->conf.ddp = ddp;
/* Now apply device specific fixups. */
t->conf.mss = T3C_DATA(t->cdev)->tx_max_chunk;
t->conf.max_wrs = T3C_DATA(t->cdev)->max_wrs;
}
void
t3_sysctl_register(struct adapter *sc, const struct tom_tunables *p)
{
struct sysctl_ctx_list *ctx;
struct sysctl_oid_list *children;
ctx = device_get_sysctl_ctx(sc->dev);
children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
}

View File

@ -157,6 +157,7 @@ enum {
INTR_DIRECT = (1 << 2), /* direct interrupts for everything */
MASTER_PF = (1 << 3),
ADAP_SYSCTL_CTX = (1 << 4),
TOM_INIT_DONE = (1 << 5),
CXGBE_BUSY = (1 << 9),
@ -199,7 +200,7 @@ struct port_info {
int first_txq; /* index of first tx queue */
int nrxq; /* # of rx queues */
int first_rxq; /* index of first rx queue */
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
int nofldtxq; /* # of offload tx queues */
int first_ofld_txq; /* index of first offload tx queue */
int nofldrxq; /* # of offload rx queues */
@ -213,6 +214,8 @@ struct port_info {
struct link_config link_cfg;
struct port_stats stats;
eventhandler_tag vlan_c;
struct callout tick;
struct sysctl_ctx_list ctx; /* from ifconfig up to driver detach */
@ -296,7 +299,7 @@ struct sge_iq {
enum {
EQ_CTRL = 1,
EQ_ETH = 2,
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
EQ_OFLD = 3,
#endif
@ -422,14 +425,36 @@ struct sge_rxq {
} __aligned(CACHE_LINE_SIZE);
#ifndef TCP_OFFLOAD_DISABLE
static inline struct sge_rxq *
iq_to_rxq(struct sge_iq *iq)
{
return (member2struct(sge_rxq, iq, iq));
}
#ifdef TCP_OFFLOAD
/* ofld_rxq: SGE ingress queue + SGE free list + miscellaneous items */
struct sge_ofld_rxq {
struct sge_iq iq; /* MUST be first */
struct sge_fl fl; /* MUST follow iq */
} __aligned(CACHE_LINE_SIZE);
static inline struct sge_ofld_rxq *
iq_to_ofld_rxq(struct sge_iq *iq)
{
return (member2struct(sge_ofld_rxq, iq, iq));
}
#endif
struct wrqe {
STAILQ_ENTRY(wrqe) link;
struct sge_wrq *wrq;
int wr_len;
uint64_t wr[] __aligned(16);
};
/*
* wrq: SGE egress queue that is given prebuilt work requests. Both the control
* and offload tx queues are of this type.
@ -438,8 +463,9 @@ struct sge_wrq {
struct sge_eq eq; /* MUST be first */
struct adapter *adapter;
struct mbuf *head; /* held up due to lack of descriptors */
struct mbuf *tail; /* valid only if head is valid */
/* List of WRs held up due to lack of tx descriptors */
STAILQ_HEAD(, wrqe) wr_list;
/* stats for common events first */
@ -457,7 +483,7 @@ struct sge {
int nrxq; /* total # of Ethernet rx queues */
int ntxq; /* total # of Ethernet tx tx queues */
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
int nofldrxq; /* total # of TOE rx queues */
int nofldtxq; /* total # of TOE tx queues */
#endif
@ -469,7 +495,7 @@ struct sge {
struct sge_wrq *ctrlq; /* Control queues */
struct sge_txq *txq; /* NIC tx queues */
struct sge_rxq *rxq; /* NIC rx queues */
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
struct sge_wrq *ofld_txq; /* TOE tx queues */
struct sge_ofld_rxq *ofld_rxq; /* TOE rx queues */
#endif
@ -483,6 +509,7 @@ struct sge {
struct rss_header;
typedef int (*cpl_handler_t)(struct sge_iq *, const struct rss_header *,
struct mbuf *);
typedef int (*an_handler_t)(struct sge_iq *, const struct rsp_ctrl *);
struct adapter {
SLIST_ENTRY(adapter) link;
@ -519,15 +546,15 @@ struct adapter {
uint8_t chan_map[NCHAN];
uint32_t filter_mode;
#ifndef TCP_OFFLOAD_DISABLE
struct uld_softc tom;
#ifdef TCP_OFFLOAD
void *tom_softc; /* (struct tom_data *) */
struct tom_tunables tt;
#endif
struct l2t_data *l2t; /* L2 table */
struct tid_info tids;
int open_device_map;
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
int offload_map;
#endif
int flags;
@ -554,7 +581,8 @@ struct adapter {
TAILQ_HEAD(, sge_fl) sfl;
struct callout sfl_callout;
cpl_handler_t cpl_handler[256] __aligned(CACHE_LINE_SIZE);
an_handler_t an_handler __aligned(CACHE_LINE_SIZE);
cpl_handler_t cpl_handler[256];
};
#define ADAPTER_LOCK(sc) mtx_lock(&(sc)->sc_lock)
@ -609,82 +637,96 @@ struct adapter {
static inline uint32_t
t4_read_reg(struct adapter *sc, uint32_t reg)
{
return bus_space_read_4(sc->bt, sc->bh, reg);
}
static inline void
t4_write_reg(struct adapter *sc, uint32_t reg, uint32_t val)
{
bus_space_write_4(sc->bt, sc->bh, reg, val);
}
static inline uint64_t
t4_read_reg64(struct adapter *sc, uint32_t reg)
{
return t4_bus_space_read_8(sc->bt, sc->bh, reg);
}
static inline void
t4_write_reg64(struct adapter *sc, uint32_t reg, uint64_t val)
{
t4_bus_space_write_8(sc->bt, sc->bh, reg, val);
}
static inline void
t4_os_pci_read_cfg1(struct adapter *sc, int reg, uint8_t *val)
{
*val = pci_read_config(sc->dev, reg, 1);
}
static inline void
t4_os_pci_write_cfg1(struct adapter *sc, int reg, uint8_t val)
{
pci_write_config(sc->dev, reg, val, 1);
}
static inline void
t4_os_pci_read_cfg2(struct adapter *sc, int reg, uint16_t *val)
{
*val = pci_read_config(sc->dev, reg, 2);
}
static inline void
t4_os_pci_write_cfg2(struct adapter *sc, int reg, uint16_t val)
{
pci_write_config(sc->dev, reg, val, 2);
}
static inline void
t4_os_pci_read_cfg4(struct adapter *sc, int reg, uint32_t *val)
{
*val = pci_read_config(sc->dev, reg, 4);
}
static inline void
t4_os_pci_write_cfg4(struct adapter *sc, int reg, uint32_t val)
{
pci_write_config(sc->dev, reg, val, 4);
}
static inline struct port_info *
adap2pinfo(struct adapter *sc, int idx)
{
return (sc->port[idx]);
}
static inline void
t4_os_set_hw_addr(struct adapter *sc, int idx, uint8_t hw_addr[])
{
bcopy(hw_addr, sc->port[idx]->hw_addr, ETHER_ADDR_LEN);
}
static inline bool is_10G_port(const struct port_info *pi)
{
return ((pi->link_cfg.supported & FW_PORT_CAP_SPEED_10G) != 0);
}
static inline int tx_resume_threshold(struct sge_eq *eq)
{
return (eq->qsize / 4);
}
@ -698,6 +740,7 @@ void t4_os_portmod_changed(const struct adapter *, int);
void t4_os_link_changed(struct adapter *, int, int);
void t4_iterate(void (*)(struct adapter *, void *), void *);
int t4_register_cpl_handler(struct adapter *, int, cpl_handler_t);
int t4_register_an_handler(struct adapter *, an_handler_t);
/* t4_sge.c */
void t4_sge_modload(void);
@ -714,21 +757,45 @@ void t4_intr_all(void *);
void t4_intr(void *);
void t4_intr_err(void *);
void t4_intr_evt(void *);
int t4_mgmt_tx(struct adapter *, struct mbuf *);
int t4_wrq_tx_locked(struct adapter *, struct sge_wrq *, struct mbuf *);
void t4_wrq_tx_locked(struct adapter *, struct sge_wrq *, struct wrqe *);
int t4_eth_tx(struct ifnet *, struct sge_txq *, struct mbuf *);
void t4_update_fl_bufsize(struct ifnet *);
int can_resume_tx(struct sge_eq *);
static inline int t4_wrq_tx(struct adapter *sc, struct sge_wrq *wrq, struct mbuf *m)
static inline struct wrqe *
alloc_wrqe(int wr_len, struct sge_wrq *wrq)
{
int rc;
int len = offsetof(struct wrqe, wr) + wr_len;
struct wrqe *wr;
TXQ_LOCK(wrq);
rc = t4_wrq_tx_locked(sc, wrq, m);
TXQ_UNLOCK(wrq);
return (rc);
wr = malloc(len, M_CXGBE, M_NOWAIT);
if (__predict_false(wr == NULL))
return (NULL);
wr->wr_len = wr_len;
wr->wrq = wrq;
return (wr);
}
static inline void *
wrtod(struct wrqe *wr)
{
return (&wr->wr[0]);
}
static inline void
free_wrqe(struct wrqe *wr)
{
free(wr, M_CXGBE);
}
static inline void
t4_wrq_tx(struct adapter *sc, struct wrqe *wr)
{
struct sge_wrq *wrq = wr->wrq;
TXQ_LOCK(wrq);
t4_wrq_tx_locked(sc, wrq, wr);
TXQ_UNLOCK(wrq);
}
#endif

View File

@ -27,6 +27,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "common.h"
#include "t4_regs.h"
#include "t4_regs_values.h"

View File

@ -31,12 +31,6 @@
#ifndef __T4_OFFLOAD_H__
#define __T4_OFFLOAD_H__
/* XXX: flagrant misuse of mbuf fields (during tx by TOM) */
#define MBUF_EQ(m) (*((void **)(&(m)->m_pkthdr.rcvif)))
/* These have to work for !M_PKTHDR so we use a field from m_hdr. */
#define MBUF_TX_CREDITS(m) ((m)->m_hdr.pad[0])
#define MBUF_DMA_MAPPED(m) ((m)->m_hdr.pad[1])
#define INIT_ULPTX_WR(w, wrlen, atomic, tid) do { \
(w)->wr.wr_hi = htonl(V_FW_WR_OP(FW_ULPTX_WR) | V_FW_WR_ATOMIC(atomic)); \
(w)->wr.wr_mid = htonl(V_FW_WR_LEN16(DIV_ROUND_UP(wrlen, 16)) | \
@ -119,7 +113,7 @@ struct t4_virt_res { /* virtualized HW resources */
struct t4_range ocq;
};
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
enum {
ULD_TOM = 1,
};
@ -130,13 +124,8 @@ struct uld_info {
SLIST_ENTRY(uld_info) link;
int refcount;
int uld_id;
int (*attach)(struct adapter *, void **);
int (*detach)(void *);
};
struct uld_softc {
struct uld_info *uld;
void *softc;
int (*activate)(struct adapter *);
int (*deactivate)(struct adapter *);
};
struct tom_tunables {
@ -148,6 +137,8 @@ struct tom_tunables {
int t4_register_uld(struct uld_info *);
int t4_unregister_uld(struct uld_info *);
int t4_activate_uld(struct adapter *, int);
int t4_deactivate_uld(struct adapter *, int);
#endif
#endif

View File

@ -1,5 +1,5 @@
/*-
* Copyright (c) 2011 Chelsio Communications, Inc.
* Copyright (c) 2012 Chelsio Communications, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -38,16 +38,7 @@ __FBSDID("$FreeBSD$");
#include <sys/rwlock.h>
#include <sys/socket.h>
#include <sys/sbuf.h>
#include <net/if.h>
#include <net/if_types.h>
#include <net/ethernet.h>
#include <net/if_vlan_var.h>
#include <net/if_dl.h>
#include <net/if_llatbl.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/if_ether.h>
#include "common/common.h"
#include "common/jhash.h"
@ -72,42 +63,11 @@ __FBSDID("$FreeBSD$");
* lifetime of an L2T entry is fully contained in the lifetime of the TOE.
*/
/* identifies sync vs async L2T_WRITE_REQs */
#define S_SYNC_WR 12
#define V_SYNC_WR(x) ((x) << S_SYNC_WR)
#define F_SYNC_WR V_SYNC_WR(1)
enum {
L2T_STATE_VALID, /* entry is up to date */
L2T_STATE_STALE, /* entry may be used but needs revalidation */
L2T_STATE_RESOLVING, /* entry needs address resolution */
L2T_STATE_SYNC_WRITE, /* synchronous write of entry underway */
/* when state is one of the below the entry is not hashed */
L2T_STATE_SWITCHING, /* entry is being used by a switching filter */
L2T_STATE_UNUSED /* entry not in use */
};
struct l2t_data {
struct rwlock lock;
volatile int nfree; /* number of free entries */
struct l2t_entry *rover;/* starting point for next allocation */
struct l2t_entry l2tab[L2T_SIZE];
};
static int do_l2t_write_rpl(struct sge_iq *, const struct rss_header *,
struct mbuf *);
#define VLAN_NONE 0xfff
#define SA(x) ((struct sockaddr *)(x))
#define SIN(x) ((struct sockaddr_in *)(x))
#define SINADDR(x) (SIN(x)->sin_addr.s_addr)
/*
* Allocate a free L2T entry. Must be called with l2t_data.lock held.
*/
static struct l2t_entry *
alloc_l2e(struct l2t_data *d)
struct l2t_entry *
t4_alloc_l2e(struct l2t_data *d)
{
struct l2t_entry *end, *e, **p;
@ -121,7 +81,8 @@ alloc_l2e(struct l2t_data *d)
if (atomic_load_acq_int(&e->refcnt) == 0)
goto found;
for (e = d->l2tab; atomic_load_acq_int(&e->refcnt); ++e) ;
for (e = d->l2tab; atomic_load_acq_int(&e->refcnt); ++e)
continue;
found:
d->rover = e + 1;
atomic_subtract_int(&d->nfree, 1);
@ -148,19 +109,18 @@ found:
* Write an L2T entry. Must be called with the entry locked.
* The write may be synchronous or asynchronous.
*/
static int
write_l2e(struct adapter *sc, struct l2t_entry *e, int sync)
int
t4_write_l2e(struct adapter *sc, struct l2t_entry *e, int sync)
{
struct mbuf *m;
struct wrqe *wr;
struct cpl_l2t_write_req *req;
mtx_assert(&e->lock, MA_OWNED);
if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
wr = alloc_wrqe(sizeof(*req), &sc->sge.mgmtq);
if (wr == NULL)
return (ENOMEM);
req = mtod(m, struct cpl_l2t_write_req *);
m->m_pkthdr.len = m->m_len = sizeof(*req);
req = wrtod(wr);
INIT_TP_WR(req, 0);
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx |
@ -170,7 +130,7 @@ write_l2e(struct adapter *sc, struct l2t_entry *e, int sync)
req->vlan = htons(e->vlan);
memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac));
t4_mgmt_tx(sc, m);
t4_wrq_tx(sc, wr);
if (sync && e->state != L2T_STATE_SWITCHING)
e->state = L2T_STATE_SYNC_WRITE;
@ -189,7 +149,7 @@ t4_l2t_alloc_switching(struct l2t_data *d)
struct l2t_entry *e;
rw_rlock(&d->lock);
e = alloc_l2e(d);
e = t4_alloc_l2e(d);
if (e) {
mtx_lock(&e->lock); /* avoid race with t4_l2t_free */
e->state = L2T_STATE_SWITCHING;
@ -214,7 +174,7 @@ t4_l2t_set_switching(struct adapter *sc, struct l2t_entry *e, uint16_t vlan,
e->lport = port;
memcpy(e->dmac, eth_addr, ETHER_ADDR_LEN);
mtx_lock(&e->lock);
rc = write_l2e(sc, e, 0);
rc = t4_write_l2e(sc, e, 0);
mtx_unlock(&e->lock);
return (rc);
}
@ -234,10 +194,13 @@ t4_init_l2t(struct adapter *sc, int flags)
rw_init(&d->lock, "L2T");
for (i = 0; i < L2T_SIZE; i++) {
d->l2tab[i].idx = i;
d->l2tab[i].state = L2T_STATE_UNUSED;
mtx_init(&d->l2tab[i].lock, "L2T_E", NULL, MTX_DEF);
atomic_store_rel_int(&d->l2tab[i].refcnt, 0);
struct l2t_entry *e = &d->l2tab[i];
e->idx = i;
e->state = L2T_STATE_UNUSED;
mtx_init(&e->lock, "L2T_E", NULL, MTX_DEF);
STAILQ_INIT(&e->wr_list);
atomic_store_rel_int(&e->refcnt, 0);
}
sc->l2t = d;
@ -259,6 +222,24 @@ t4_free_l2t(struct l2t_data *d)
return (0);
}
int
do_l2t_write_rpl(struct sge_iq *iq, const struct rss_header *rss,
struct mbuf *m)
{
const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1);
unsigned int tid = GET_TID(rpl);
unsigned int idx = tid & (L2T_SIZE - 1);
if (__predict_false(rpl->status != CPL_ERR_NONE)) {
log(LOG_ERR,
"Unexpected L2T_WRITE_RPL status %u for entry %u\n",
rpl->status, idx);
return (EINVAL);
}
return (0);
}
#ifdef SBUF_DRAIN
static inline unsigned int
vlan_prio(const struct l2t_entry *e)
@ -273,7 +254,7 @@ l2e_state(const struct l2t_entry *e)
case L2T_STATE_VALID: return 'V'; /* valid, fast-path entry */
case L2T_STATE_STALE: return 'S'; /* needs revalidation, but usable */
case L2T_STATE_SYNC_WRITE: return 'W';
case L2T_STATE_RESOLVING: return e->arpq_head ? 'A' : 'R';
case L2T_STATE_RESOLVING: return STAILQ_EMPTY(&e->wr_list) ? 'R' : 'A';
case L2T_STATE_SWITCHING: return 'X';
default: return 'U';
}
@ -311,20 +292,20 @@ sysctl_l2t(SYSCTL_HANDLER_ARGS)
"Ethernet address VLAN/P LP State Users Port");
header = 1;
}
if (e->state == L2T_STATE_SWITCHING || e->v6)
if (e->state == L2T_STATE_SWITCHING)
ip[0] = 0;
else
snprintf(ip, sizeof(ip), "%s",
inet_ntoa(*(struct in_addr *)&e->addr[0]));
inet_ntoa(*(struct in_addr *)&e->addr));
/* XXX: accessing lle probably not safe? */
/* XXX: e->ifp may not be around */
sbuf_printf(sb, "\n%4u %-15s %02x:%02x:%02x:%02x:%02x:%02x %4d"
" %u %2u %c %5u %s",
e->idx, ip, e->dmac[0], e->dmac[1], e->dmac[2],
e->dmac[3], e->dmac[4], e->dmac[5],
e->vlan & 0xfff, vlan_prio(e), e->lport,
l2e_state(e), atomic_load_acq_int(&e->refcnt),
e->lle ? e->lle->lle_tbl->llt_ifp->if_xname : "");
e->ifp->if_xname);
skip:
mtx_unlock(&e->lock);
}
@ -335,459 +316,3 @@ skip:
return (rc);
}
#endif
#ifndef TCP_OFFLOAD_DISABLE
static inline void
l2t_hold(struct l2t_data *d, struct l2t_entry *e)
{
if (atomic_fetchadd_int(&e->refcnt, 1) == 0) /* 0 -> 1 transition */
atomic_subtract_int(&d->nfree, 1);
}
/*
* To avoid having to check address families we do not allow v4 and v6
* neighbors to be on the same hash chain. We keep v4 entries in the first
* half of available hash buckets and v6 in the second.
*/
enum {
L2T_SZ_HALF = L2T_SIZE / 2,
L2T_HASH_MASK = L2T_SZ_HALF - 1
};
static inline unsigned int
arp_hash(const uint32_t *key, int ifindex)
{
return jhash_2words(*key, ifindex, 0) & L2T_HASH_MASK;
}
static inline unsigned int
ipv6_hash(const uint32_t *key, int ifindex)
{
uint32_t xor = key[0] ^ key[1] ^ key[2] ^ key[3];
return L2T_SZ_HALF + (jhash_2words(xor, ifindex, 0) & L2T_HASH_MASK);
}
static inline unsigned int
addr_hash(const uint32_t *addr, int addr_len, int ifindex)
{
return addr_len == 4 ? arp_hash(addr, ifindex) :
ipv6_hash(addr, ifindex);
}
/*
* Checks if an L2T entry is for the given IP/IPv6 address. It does not check
* whether the L2T entry and the address are of the same address family.
* Callers ensure an address is only checked against L2T entries of the same
* family, something made trivial by the separation of IP and IPv6 hash chains
* mentioned above. Returns 0 if there's a match,
*/
static inline int
addreq(const struct l2t_entry *e, const uint32_t *addr)
{
if (e->v6)
return (e->addr[0] ^ addr[0]) | (e->addr[1] ^ addr[1]) |
(e->addr[2] ^ addr[2]) | (e->addr[3] ^ addr[3]);
return e->addr[0] ^ addr[0];
}
/*
* Add a packet to an L2T entry's queue of packets awaiting resolution.
* Must be called with the entry's lock held.
*/
static inline void
arpq_enqueue(struct l2t_entry *e, struct mbuf *m)
{
mtx_assert(&e->lock, MA_OWNED);
KASSERT(m->m_nextpkt == NULL, ("%s: m_nextpkt not NULL", __func__));
if (e->arpq_head)
e->arpq_tail->m_nextpkt = m;
else
e->arpq_head = m;
e->arpq_tail = m;
}
static inline void
send_pending(struct adapter *sc, struct l2t_entry *e)
{
struct mbuf *m, *next;
mtx_assert(&e->lock, MA_OWNED);
for (m = e->arpq_head; m; m = next) {
next = m->m_nextpkt;
m->m_nextpkt = NULL;
t4_wrq_tx(sc, MBUF_EQ(m), m);
}
e->arpq_head = e->arpq_tail = NULL;
}
#ifdef INET
/*
* Looks up and fills up an l2t_entry's lle. We grab all the locks that we need
* ourself, and update e->state at the end if e->lle was successfully filled.
*
* The lle passed in comes from arpresolve and is ignored as it does not appear
* to be of much use.
*/
static int
l2t_fill_lle(struct adapter *sc, struct l2t_entry *e, struct llentry *unused)
{
int rc = 0;
struct sockaddr_in sin;
struct ifnet *ifp = e->ifp;
struct llentry *lle;
bzero(&sin, sizeof(struct sockaddr_in));
if (e->v6)
panic("%s: IPv6 L2 resolution not supported yet.", __func__);
sin.sin_family = AF_INET;
sin.sin_len = sizeof(struct sockaddr_in);
memcpy(&sin.sin_addr, e->addr, sizeof(struct sockaddr_in));
mtx_assert(&e->lock, MA_NOTOWNED);
KASSERT(e->addr && ifp, ("%s: bad prep before call", __func__));
IF_AFDATA_LOCK(ifp);
lle = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, SA(&sin));
IF_AFDATA_UNLOCK(ifp);
if (!LLE_IS_VALID(lle))
return (ENOMEM);
if (!(lle->la_flags & LLE_VALID)) {
rc = EINVAL;
goto done;
}
LLE_ADDREF(lle);
mtx_lock(&e->lock);
if (e->state == L2T_STATE_RESOLVING) {
KASSERT(e->lle == NULL, ("%s: lle already valid", __func__));
e->lle = lle;
memcpy(e->dmac, &lle->ll_addr, ETHER_ADDR_LEN);
write_l2e(sc, e, 1);
} else {
KASSERT(e->lle == lle, ("%s: lle changed", __func__));
LLE_REMREF(lle);
}
mtx_unlock(&e->lock);
done:
LLE_WUNLOCK(lle);
return (rc);
}
#endif
int
t4_l2t_send(struct adapter *sc, struct mbuf *m, struct l2t_entry *e)
{
#ifndef INET
return (EINVAL);
#else
struct llentry *lle = NULL;
struct sockaddr_in sin;
struct ifnet *ifp = e->ifp;
if (e->v6)
panic("%s: IPv6 L2 resolution not supported yet.", __func__);
bzero(&sin, sizeof(struct sockaddr_in));
sin.sin_family = AF_INET;
sin.sin_len = sizeof(struct sockaddr_in);
memcpy(&sin.sin_addr, e->addr, sizeof(struct sockaddr_in));
again:
switch (e->state) {
case L2T_STATE_STALE: /* entry is stale, kick off revalidation */
if (arpresolve(ifp, NULL, NULL, SA(&sin), e->dmac, &lle) == 0)
l2t_fill_lle(sc, e, lle);
/* Fall through */
case L2T_STATE_VALID: /* fast-path, send the packet on */
return t4_wrq_tx(sc, MBUF_EQ(m), m);
case L2T_STATE_RESOLVING:
case L2T_STATE_SYNC_WRITE:
mtx_lock(&e->lock);
if (e->state != L2T_STATE_SYNC_WRITE &&
e->state != L2T_STATE_RESOLVING) {
/* state changed by the time we got here */
mtx_unlock(&e->lock);
goto again;
}
arpq_enqueue(e, m);
mtx_unlock(&e->lock);
if (e->state == L2T_STATE_RESOLVING &&
arpresolve(ifp, NULL, NULL, SA(&sin), e->dmac, &lle) == 0)
l2t_fill_lle(sc, e, lle);
}
return (0);
#endif
}
/*
* Called when an L2T entry has no more users. The entry is left in the hash
* table since it is likely to be reused but we also bump nfree to indicate
* that the entry can be reallocated for a different neighbor. We also drop
* the existing neighbor reference in case the neighbor is going away and is
* waiting on our reference.
*
* Because entries can be reallocated to other neighbors once their ref count
* drops to 0 we need to take the entry's lock to avoid races with a new
* incarnation.
*/
static void
t4_l2e_free(struct l2t_entry *e)
{
struct llentry *lle = NULL;
struct l2t_data *d;
mtx_lock(&e->lock);
if (atomic_load_acq_int(&e->refcnt) == 0) { /* hasn't been recycled */
lle = e->lle;
e->lle = NULL;
/*
* Don't need to worry about the arpq, an L2T entry can't be
* released if any packets are waiting for resolution as we
* need to be able to communicate with the device to close a
* connection.
*/
}
mtx_unlock(&e->lock);
d = container_of(e, struct l2t_data, l2tab[e->idx]);
atomic_add_int(&d->nfree, 1);
if (lle)
LLE_FREE(lle);
}
void
t4_l2t_release(struct l2t_entry *e)
{
if (atomic_fetchadd_int(&e->refcnt, -1) == 1)
t4_l2e_free(e);
}
static int
do_l2t_write_rpl(struct sge_iq *iq, const struct rss_header *rss,
struct mbuf *m)
{
struct adapter *sc = iq->adapter;
const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1);
unsigned int tid = GET_TID(rpl);
unsigned int idx = tid & (L2T_SIZE - 1);
if (__predict_false(rpl->status != CPL_ERR_NONE)) {
log(LOG_ERR,
"Unexpected L2T_WRITE_RPL status %u for entry %u\n",
rpl->status, idx);
return (EINVAL);
}
if (tid & F_SYNC_WR) {
struct l2t_entry *e = &sc->l2t->l2tab[idx];
mtx_lock(&e->lock);
if (e->state != L2T_STATE_SWITCHING) {
send_pending(sc, e);
e->state = L2T_STATE_VALID;
}
mtx_unlock(&e->lock);
}
return (0);
}
/*
* Reuse an L2T entry that was previously used for the same next hop.
*/
static void
reuse_entry(struct l2t_entry *e)
{
struct llentry *lle;
mtx_lock(&e->lock); /* avoid race with t4_l2t_free */
lle = e->lle;
if (lle) {
KASSERT(lle->la_flags & LLE_VALID,
("%s: invalid lle stored in l2t_entry", __func__));
if (lle->la_expire >= time_uptime)
e->state = L2T_STATE_STALE;
else
e->state = L2T_STATE_VALID;
} else
e->state = L2T_STATE_RESOLVING;
mtx_unlock(&e->lock);
}
/*
* The TOE wants an L2 table entry that it can use to reach the next hop over
* the specified port. Produce such an entry - create one if needed.
*
* Note that the ifnet could be a pseudo-device like if_vlan, if_lagg, etc. on
* top of the real cxgbe interface.
*/
struct l2t_entry *
t4_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa)
{
struct l2t_entry *e;
struct l2t_data *d = pi->adapter->l2t;
int addr_len;
uint32_t *addr;
int hash;
struct sockaddr_in6 *sin6;
unsigned int smt_idx = pi->port_id;
if (sa->sa_family == AF_INET) {
addr = (uint32_t *)&SINADDR(sa);
addr_len = sizeof(SINADDR(sa));
} else if (sa->sa_family == AF_INET6) {
sin6 = (struct sockaddr_in6 *)sa;
addr = (uint32_t *)&sin6->sin6_addr.s6_addr;
addr_len = sizeof(sin6->sin6_addr.s6_addr);
} else
return (NULL);
#ifndef VLAN_TAG
if (ifp->if_type == IFT_L2VLAN)
return (NULL);
#endif
hash = addr_hash(addr, addr_len, ifp->if_index);
rw_wlock(&d->lock);
for (e = d->l2tab[hash].first; e; e = e->next) {
if (!addreq(e, addr) && e->ifp == ifp && e->smt_idx == smt_idx){
l2t_hold(d, e);
if (atomic_load_acq_int(&e->refcnt) == 1)
reuse_entry(e);
goto done;
}
}
/* Need to allocate a new entry */
e = alloc_l2e(d);
if (e) {
mtx_lock(&e->lock); /* avoid race with t4_l2t_free */
e->state = L2T_STATE_RESOLVING;
memcpy(e->addr, addr, addr_len);
e->ifindex = ifp->if_index;
e->smt_idx = smt_idx;
e->ifp = ifp;
e->hash = hash;
e->lport = pi->lport;
e->v6 = (addr_len == 16);
e->lle = NULL;
atomic_store_rel_int(&e->refcnt, 1);
#ifdef VLAN_TAG
if (ifp->if_type == IFT_L2VLAN)
VLAN_TAG(ifp, &e->vlan);
else
e->vlan = VLAN_NONE;
#endif
e->next = d->l2tab[hash].first;
d->l2tab[hash].first = e;
mtx_unlock(&e->lock);
}
done:
rw_wunlock(&d->lock);
return e;
}
/*
* Called when the host's neighbor layer makes a change to some entry that is
* loaded into the HW L2 table.
*/
void
t4_l2t_update(struct adapter *sc, struct llentry *lle)
{
struct l2t_entry *e;
struct l2t_data *d = sc->l2t;
struct sockaddr *sa = L3_ADDR(lle);
struct llentry *old_lle = NULL;
uint32_t *addr = (uint32_t *)&SINADDR(sa);
struct ifnet *ifp = lle->lle_tbl->llt_ifp;
int hash = addr_hash(addr, sizeof(*addr), ifp->if_index);
KASSERT(d != NULL, ("%s: no L2 table", __func__));
LLE_WLOCK_ASSERT(lle);
KASSERT(lle->la_flags & LLE_VALID || lle->la_flags & LLE_DELETED,
("%s: entry neither valid nor deleted.", __func__));
rw_rlock(&d->lock);
for (e = d->l2tab[hash].first; e; e = e->next) {
if (!addreq(e, addr) && e->ifp == ifp) {
mtx_lock(&e->lock);
if (atomic_load_acq_int(&e->refcnt))
goto found;
e->state = L2T_STATE_STALE;
mtx_unlock(&e->lock);
break;
}
}
rw_runlock(&d->lock);
/* The TOE has no interest in this LLE */
return;
found:
rw_runlock(&d->lock);
if (atomic_load_acq_int(&e->refcnt)) {
/* Entry is referenced by at least 1 offloaded connection. */
/* Handle deletes first */
if (lle->la_flags & LLE_DELETED) {
if (lle == e->lle) {
e->lle = NULL;
e->state = L2T_STATE_RESOLVING;
LLE_REMREF(lle);
}
goto done;
}
if (lle != e->lle) {
old_lle = e->lle;
LLE_ADDREF(lle);
e->lle = lle;
}
if (e->state == L2T_STATE_RESOLVING ||
memcmp(e->dmac, &lle->ll_addr, ETHER_ADDR_LEN)) {
/* unresolved -> resolved; or dmac changed */
memcpy(e->dmac, &lle->ll_addr, ETHER_ADDR_LEN);
write_l2e(sc, e, 1);
} else {
/* +ve reinforcement of a valid or stale entry */
}
e->state = L2T_STATE_VALID;
} else {
/*
* Entry was used previously but is unreferenced right now.
* e->lle has been released and NULL'd out by t4_l2t_free, or
* l2t_release is about to call t4_l2t_free and do that.
*
* Either way this is of no interest to us.
*/
}
done:
mtx_unlock(&e->lock);
if (old_lle)
LLE_FREE(old_lle);
}
#endif

View File

@ -30,8 +30,25 @@
#ifndef __T4_L2T_H
#define __T4_L2T_H
/* identifies sync vs async L2T_WRITE_REQs */
#define S_SYNC_WR 12
#define V_SYNC_WR(x) ((x) << S_SYNC_WR)
#define F_SYNC_WR V_SYNC_WR(1)
enum { L2T_SIZE = 4096 }; /* # of L2T entries */
enum {
L2T_STATE_VALID, /* entry is up to date */
L2T_STATE_STALE, /* entry may be used but needs revalidation */
L2T_STATE_RESOLVING, /* entry needs address resolution */
L2T_STATE_FAILED, /* failed to resolve */
L2T_STATE_SYNC_WRITE, /* synchronous write of entry underway */
/* when state is one of the below the entry is not hashed */
L2T_STATE_SWITCHING, /* entry is being used by a switching filter */
L2T_STATE_UNUSED /* entry not in use */
};
/*
* Each L2T entry plays multiple roles. First of all, it keeps state for the
* corresponding entry of the HW L2 table and maintains a queue of offload
@ -43,39 +60,49 @@ enum { L2T_SIZE = 4096 }; /* # of L2T entries */
struct l2t_entry {
uint16_t state; /* entry state */
uint16_t idx; /* entry index */
uint32_t addr[4]; /* next hop IP or IPv6 address */
uint32_t addr; /* next hop IP address */
struct ifnet *ifp; /* outgoing interface */
uint16_t smt_idx; /* SMT index */
uint16_t vlan; /* VLAN TCI (id: 0-11, prio: 13-15) */
int ifindex; /* interface index */
struct llentry *lle; /* llentry for next hop */
struct l2t_entry *first; /* start of hash chain */
struct l2t_entry *next; /* next l2t_entry on chain */
struct mbuf *arpq_head; /* list of mbufs awaiting resolution */
struct mbuf *arpq_tail;
STAILQ_HEAD(, wrqe) wr_list; /* list of WRs awaiting resolution */
struct mtx lock;
volatile int refcnt; /* entry reference count */
uint16_t hash; /* hash bucket the entry is on */
uint8_t v6; /* whether entry is for IPv6 */
uint8_t lport; /* associated offload logical port */
uint8_t dmac[ETHER_ADDR_LEN]; /* next hop's MAC address */
};
struct l2t_data {
struct rwlock lock;
volatile int nfree; /* number of free entries */
struct l2t_entry *rover;/* starting point for next allocation */
struct l2t_entry l2tab[L2T_SIZE];
};
int t4_init_l2t(struct adapter *, int);
int t4_free_l2t(struct l2t_data *);
struct l2t_entry *t4_alloc_l2e(struct l2t_data *);
struct l2t_entry *t4_l2t_alloc_switching(struct l2t_data *);
int t4_l2t_set_switching(struct adapter *, struct l2t_entry *, uint16_t,
uint8_t, uint8_t *);
void t4_l2t_release(struct l2t_entry *);
int t4_write_l2e(struct adapter *, struct l2t_entry *, int);
int do_l2t_write_rpl(struct sge_iq *, const struct rss_header *, struct mbuf *);
static inline void
t4_l2t_release(struct l2t_entry *e)
{
struct l2t_data *d = container_of(e, struct l2t_data, l2tab[e->idx]);
if (atomic_fetchadd_int(&e->refcnt, -1) == 1)
atomic_add_int(&d->nfree, 1);
}
#ifdef SBUF_DRAIN
int sysctl_l2t(SYSCTL_HANDLER_ARGS);
#endif
#ifndef TCP_OFFLOAD_DISABLE
struct l2t_entry *t4_l2t_get(struct port_info *, struct ifnet *,
struct sockaddr *);
int t4_l2t_send(struct adapter *, struct mbuf *, struct l2t_entry *);
void t4_l2t_update(struct adapter *, struct llentry *);
#endif
#endif /* __T4_L2T_H */

View File

@ -119,9 +119,13 @@ static void cxgbe_media_status(struct ifnet *, struct ifmediareq *);
MALLOC_DEFINE(M_CXGBE, "cxgbe", "Chelsio T4 Ethernet driver and services");
/*
* Correct lock order when you need to acquire multiple locks is t4_list_lock,
* then ADAPTER_LOCK, then t4_uld_list_lock.
*/
static struct mtx t4_list_lock;
static SLIST_HEAD(, adapter) t4_list;
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
static struct mtx t4_uld_list_lock;
static SLIST_HEAD(, uld_info) t4_uld_list;
#endif
@ -149,7 +153,7 @@ TUNABLE_INT("hw.cxgbe.ntxq1g", &t4_ntxq1g);
static int t4_nrxq1g = -1;
TUNABLE_INT("hw.cxgbe.nrxq1g", &t4_nrxq1g);
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
#define NOFLDTXQ_10G 8
static int t4_nofldtxq10g = -1;
TUNABLE_INT("hw.cxgbe.nofldtxq10g", &t4_nofldtxq10g);
@ -237,7 +241,7 @@ struct intrs_and_queues {
int nrxq10g; /* # of NIC rxq's for each 10G port */
int ntxq1g; /* # of NIC txq's for each 1G port */
int nrxq1g; /* # of NIC rxq's for each 1G port */
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
int nofldtxq10g; /* # of TOE txq's for each 10G port */
int nofldrxq10g; /* # of TOE rxq's for each 10G port */
int nofldtxq1g; /* # of TOE txq's for each 1G port */
@ -297,8 +301,10 @@ static void reg_block_dump(struct adapter *, uint8_t *, unsigned int,
unsigned int);
static void t4_get_regs(struct adapter *, struct t4_regdump *, uint8_t *);
static void cxgbe_tick(void *);
static void cxgbe_vlan_config(void *, struct ifnet *, uint16_t);
static int cpl_not_handled(struct sge_iq *, const struct rss_header *,
struct mbuf *);
static int an_not_handled(struct sge_iq *, const struct rsp_ctrl *);
static int t4_sysctls(struct adapter *);
static int cxgbe_sysctls(struct port_info *);
static int sysctl_int_array(SYSCTL_HANDLER_ARGS);
@ -342,10 +348,8 @@ static int filter_rpl(struct sge_iq *, const struct rss_header *,
struct mbuf *);
static int get_sge_context(struct adapter *, struct t4_sge_context *);
static int read_card_mem(struct adapter *, struct t4_mem_range *);
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
static int toe_capability(struct port_info *, int);
static int activate_uld(struct adapter *, int, struct uld_softc *);
static int deactivate_uld(struct uld_softc *);
#endif
static int t4_mod_event(module_t, int, void *);
@ -368,8 +372,12 @@ struct t4_pciids {
{0x440a, 4, "Chelsio T404-BT"},
};
#ifndef TCP_OFFLOAD_DISABLE
/* This is used in service_iq() to get to the fl associated with an iq. */
#ifdef TCP_OFFLOAD
/*
* service_iq() has an iq and needs the fl. Offset of fl from the iq should be
* exactly the same for both rxq and ofld_rxq.
*/
CTASSERT(offsetof(struct sge_ofld_rxq, iq) == offsetof(struct sge_rxq, iq));
CTASSERT(offsetof(struct sge_ofld_rxq, fl) == offsetof(struct sge_rxq, fl));
#endif
@ -401,7 +409,7 @@ t4_attach(device_t dev)
int rc = 0, i, n10g, n1g, rqidx, tqidx;
struct intrs_and_queues iaq;
struct sge *s;
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
int ofld_rqidx, ofld_tqidx;
#endif
@ -436,6 +444,7 @@ t4_attach(device_t dev)
goto done; /* error message displayed already */
memset(sc->chan_map, 0xff, sizeof(sc->chan_map));
sc->an_handler = an_not_handled;
for (i = 0; i < ARRAY_SIZE(sc->cpl_handler); i++)
sc->cpl_handler[i] = cpl_not_handled;
t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, filter_rpl);
@ -595,7 +604,7 @@ t4_attach(device_t dev)
s->neq += sc->params.nports + 1;/* ctrl queues: 1 per port + 1 mgmt */
s->niq = s->nrxq + 1; /* 1 extra for firmware event queue */
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
if (is_offload(sc)) {
s->nofldrxq = n10g * iaq.nofldrxq10g + n1g * iaq.nofldrxq1g;
@ -631,7 +640,7 @@ t4_attach(device_t dev)
* tx queues that each port should get.
*/
rqidx = tqidx = 0;
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
ofld_rqidx = ofld_tqidx = 0;
#endif
for_each_port(sc, i) {
@ -653,7 +662,7 @@ t4_attach(device_t dev)
rqidx += pi->nrxq;
tqidx += pi->ntxq;
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
if (is_offload(sc)) {
pi->first_ofld_rxq = ofld_rqidx;
pi->first_ofld_txq = ofld_tqidx;
@ -761,7 +770,7 @@ t4_detach(device_t dev)
if (sc->l2t)
t4_free_l2t(sc->l2t);
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
free(sc->sge.ofld_rxq, M_CXGBE);
free(sc->sge.ofld_txq, M_CXGBE);
#endif
@ -832,7 +841,7 @@ cxgbe_attach(device_t dev)
ifp->if_qflush = cxgbe_qflush;
ifp->if_capabilities = T4_CAP;
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
if (is_offload(pi->adapter))
ifp->if_capabilities |= IFCAP_TOE4;
#endif
@ -844,9 +853,12 @@ cxgbe_attach(device_t dev)
cxgbe_media_status);
build_medialist(pi);
pi->vlan_c = EVENTHANDLER_REGISTER(vlan_config, cxgbe_vlan_config, ifp,
EVENTHANDLER_PRI_ANY);
ether_ifattach(ifp, pi->hw_addr);
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
if (is_offload(pi->adapter)) {
device_printf(dev,
"%d txq, %d rxq (NIC); %d txq, %d rxq (TOE)\n",
@ -876,6 +888,9 @@ cxgbe_detach(device_t dev)
SET_BUSY(sc);
ADAPTER_UNLOCK(sc);
if (pi->vlan_c)
EVENTHANDLER_DEREGISTER(vlan_config, pi->vlan_c);
PORT_LOCK(pi);
ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
callout_stop(&pi->tick);
@ -1042,7 +1057,7 @@ fail:
}
#endif
}
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
if (mask & IFCAP_TOE) {
int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE;
@ -1292,7 +1307,7 @@ cfg_itype_and_nqueues(struct adapter *sc, int n10g, int n1g,
iaq->ntxq1g = t4_ntxq1g;
iaq->nrxq10g = nrxq10g = t4_nrxq10g;
iaq->nrxq1g = nrxq1g = t4_nrxq1g;
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
iaq->nofldtxq10g = t4_nofldtxq10g;
iaq->nofldtxq1g = t4_nofldtxq1g;
iaq->nofldrxq10g = nofldrxq10g = t4_nofldrxq10g;
@ -1364,7 +1379,7 @@ restart:
n++;
}
iaq->nrxq10g = min(n, nrxq10g);
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
iaq->nofldrxq10g = min(n, nofldrxq10g);
#endif
}
@ -1379,7 +1394,7 @@ restart:
n++;
}
iaq->nrxq1g = min(n, nrxq1g);
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
iaq->nofldrxq1g = min(n, nofldrxq1g);
#endif
}
@ -1392,7 +1407,7 @@ restart:
* Least desirable option: one interrupt vector for everything.
*/
iaq->nirq = iaq->nrxq10g = iaq->nrxq1g = 1;
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
iaq->nofldrxq10g = iaq->nofldrxq1g = 1;
#endif
@ -2305,7 +2320,7 @@ adapter_full_init(struct adapter *sc)
struct irq *irq;
struct port_info *pi;
struct sge_rxq *rxq;
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
struct sge_ofld_rxq *ofld_rxq;
#endif
@ -2369,7 +2384,7 @@ adapter_full_init(struct adapter *sc)
for_each_port(sc, p) {
pi = sc->port[p];
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
/*
* Skip over the NIC queues if they aren't taking direct
* interrupts.
@ -2386,7 +2401,7 @@ adapter_full_init(struct adapter *sc)
rid++;
}
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
/*
* Skip over the offload queues if they aren't taking
* direct interrupts.
@ -2494,7 +2509,7 @@ port_full_uninit(struct port_info *pi)
int i;
struct sge_rxq *rxq;
struct sge_txq *txq;
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
struct sge_ofld_rxq *ofld_rxq;
struct sge_wrq *ofld_txq;
#endif
@ -2507,7 +2522,7 @@ port_full_uninit(struct port_info *pi)
quiesce_eq(sc, &txq->eq);
}
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
for_each_ofld_txq(pi, i, ofld_txq) {
quiesce_eq(sc, &ofld_txq->eq);
}
@ -2518,7 +2533,7 @@ port_full_uninit(struct port_info *pi)
quiesce_fl(sc, &rxq->fl);
}
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
for_each_ofld_rxq(pi, i, ofld_rxq) {
quiesce_iq(sc, &ofld_rxq->iq);
quiesce_fl(sc, &ofld_rxq->fl);
@ -2892,14 +2907,27 @@ cxgbe_tick(void *arg)
PORT_UNLOCK(pi);
}
static void
cxgbe_vlan_config(void *arg, struct ifnet *ifp, uint16_t vid)
{
struct ifnet *vlan;
if (arg != ifp)
return;
vlan = VLAN_DEVAT(ifp, vid);
VLAN_SETCOOKIE(vlan, ifp);
}
static int
cpl_not_handled(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
{
#ifdef INVARIANTS
panic("%s: opcode %02x on iq %p with payload %p",
panic("%s: opcode 0x%02x on iq %p with payload %p",
__func__, rss->opcode, iq, m);
#else
log(LOG_ERR, "%s: opcode %02x on iq %p with payload %p",
log(LOG_ERR, "%s: opcode 0x%02x on iq %p with payload %p",
__func__, rss->opcode, iq, m);
m_freem(m);
#endif
@ -2921,6 +2949,31 @@ t4_register_cpl_handler(struct adapter *sc, int opcode, cpl_handler_t h)
return (0);
}
static int
an_not_handled(struct sge_iq *iq, const struct rsp_ctrl *ctrl)
{
#ifdef INVARIANTS
panic("%s: async notification on iq %p (ctrl %p)", __func__, iq, ctrl);
#else
log(LOG_ERR, "%s: async notification on iq %p (ctrl %p)",
__func__, iq, ctrl);
#endif
return (EDOOFUS);
}
int
t4_register_an_handler(struct adapter *sc, an_handler_t h)
{
uintptr_t *loc, new;
new = h ? (uintptr_t)h : (uintptr_t)an_not_handled;
loc = (uintptr_t *) &sc->an_handler;
atomic_store_rel_ptr(loc, new);
return (0);
}
static int
t4_sysctls(struct adapter *sc)
{
@ -3072,7 +3125,7 @@ t4_sysctls(struct adapter *sc)
sysctl_tx_rate, "A", "Tx rate");
#endif
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
if (is_offload(sc)) {
/*
* dev.t4nex.X.toe.
@ -3125,7 +3178,7 @@ cxgbe_sysctls(struct port_info *pi)
SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_txq", CTLFLAG_RD,
&pi->first_txq, 0, "index of first tx queue");
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
if (is_offload(pi->adapter)) {
SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldrxq", CTLFLAG_RD,
&pi->nofldrxq, 0,
@ -4543,7 +4596,7 @@ set_filter_mode(struct adapter *sc, uint32_t mode)
goto done;
}
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
if (sc->offload_map) {
rc = EBUSY;
goto done;
@ -4734,7 +4787,7 @@ static int
set_filter_wr(struct adapter *sc, int fidx)
{
struct filter_entry *f = &sc->tids.ftid_tab[fidx];
struct mbuf *m;
struct wrqe *wr;
struct fw_filter_wr *fwr;
unsigned int ftid;
@ -4755,12 +4808,11 @@ set_filter_wr(struct adapter *sc, int fidx)
ftid = sc->tids.ftid_base + fidx;
m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
wr = alloc_wrqe(sizeof(*fwr), &sc->sge.mgmtq);
if (wr == NULL)
return (ENOMEM);
fwr = mtod(m, struct fw_filter_wr *);
m->m_len = m->m_pkthdr.len = sizeof(*fwr);
fwr = wrtod(wr);
bzero(fwr, sizeof (*fwr));
fwr->op_pkd = htobe32(V_FW_WR_OP(FW_FILTER_WR));
@ -4830,7 +4882,7 @@ set_filter_wr(struct adapter *sc, int fidx)
f->pending = 1;
sc->tids.ftids_in_use++;
t4_mgmt_tx(sc, m);
t4_wrq_tx(sc, wr);
return (0);
}
@ -4838,7 +4890,7 @@ static int
del_filter_wr(struct adapter *sc, int fidx)
{
struct filter_entry *f = &sc->tids.ftid_tab[fidx];
struct mbuf *m;
struct wrqe *wr;
struct fw_filter_wr *fwr;
unsigned int ftid;
@ -4846,18 +4898,16 @@ del_filter_wr(struct adapter *sc, int fidx)
ftid = sc->tids.ftid_base + fidx;
m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
wr = alloc_wrqe(sizeof(*fwr), &sc->sge.mgmtq);
if (wr == NULL)
return (ENOMEM);
fwr = mtod(m, struct fw_filter_wr *);
m->m_len = m->m_pkthdr.len = sizeof(*fwr);
fwr = wrtod(wr);
bzero(fwr, sizeof (*fwr));
t4_mk_filtdelwr(ftid, fwr, sc->sge.fwq.abs_id);
f->pending = 1;
t4_mgmt_tx(sc, m);
t4_wrq_tx(sc, wr);
return (0);
}
@ -5215,7 +5265,7 @@ t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag,
return (rc);
}
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
static int
toe_capability(struct port_info *pi, int enable)
{
@ -5228,13 +5278,28 @@ toe_capability(struct port_info *pi, int enable)
return (ENODEV);
if (enable) {
if (!(sc->flags & FULL_INIT_DONE)) {
log(LOG_WARNING,
"You must enable a cxgbe interface first\n");
return (EAGAIN);
}
if (isset(&sc->offload_map, pi->port_id))
return (0);
if (sc->offload_map == 0) {
rc = activate_uld(sc, ULD_TOM, &sc->tom);
if (!(sc->flags & TOM_INIT_DONE)) {
rc = t4_activate_uld(sc, ULD_TOM);
if (rc == EAGAIN) {
log(LOG_WARNING,
"You must kldload t4_tom.ko before trying "
"to enable TOE on a cxgbe interface.\n");
}
if (rc != 0)
return (rc);
KASSERT(sc->tom_softc != NULL,
("%s: TOM activated but softc NULL", __func__));
KASSERT(sc->flags & TOM_INIT_DONE,
("%s: TOM activated but flag not set", __func__));
}
setbit(&sc->offload_map, pi->port_id);
@ -5242,15 +5307,9 @@ toe_capability(struct port_info *pi, int enable)
if (!isset(&sc->offload_map, pi->port_id))
return (0);
KASSERT(sc->flags & TOM_INIT_DONE,
("%s: TOM never initialized?", __func__));
clrbit(&sc->offload_map, pi->port_id);
if (sc->offload_map == 0) {
rc = deactivate_uld(&sc->tom);
if (rc != 0) {
setbit(&sc->offload_map, pi->port_id);
return (rc);
}
}
}
return (0);
@ -5305,8 +5364,8 @@ done:
return (rc);
}
static int
activate_uld(struct adapter *sc, int id, struct uld_softc *usc)
int
t4_activate_uld(struct adapter *sc, int id)
{
int rc = EAGAIN;
struct uld_info *ui;
@ -5315,13 +5374,9 @@ activate_uld(struct adapter *sc, int id, struct uld_softc *usc)
SLIST_FOREACH(ui, &t4_uld_list, link) {
if (ui->uld_id == id) {
rc = ui->attach(sc, &usc->softc);
if (rc == 0) {
KASSERT(usc->softc != NULL,
("%s: ULD %d has no state", __func__, id));
rc = ui->activate(sc);
if (rc == 0)
ui->refcount++;
usc->uld = ui;
}
goto done;
}
}
@ -5331,25 +5386,21 @@ done:
return (rc);
}
static int
deactivate_uld(struct uld_softc *usc)
int
t4_deactivate_uld(struct adapter *sc, int id)
{
int rc;
int rc = EINVAL;
struct uld_info *ui;
mtx_lock(&t4_uld_list_lock);
if (usc->uld == NULL || usc->softc == NULL) {
rc = EINVAL;
goto done;
}
rc = usc->uld->detach(usc->softc);
if (rc == 0) {
KASSERT(usc->uld->refcount > 0,
("%s: ULD has bad refcount", __func__));
usc->uld->refcount--;
usc->uld = NULL;
usc->softc = NULL;
SLIST_FOREACH(ui, &t4_uld_list, link) {
if (ui->uld_id == id) {
rc = ui->deactivate(sc);
if (rc == 0)
ui->refcount--;
goto done;
}
}
done:
mtx_unlock(&t4_uld_list_lock);
@ -5379,7 +5430,7 @@ tweak_tunables(void)
if (t4_nrxq1g < 1)
t4_nrxq1g = min(nc, NRXQ_1G);
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
if (t4_nofldtxq10g < 1)
t4_nofldtxq10g = min(nc, NOFLDTXQ_10G);
@ -5426,7 +5477,7 @@ t4_mod_event(module_t mod, int cmd, void *arg)
t4_sge_modload();
mtx_init(&t4_list_lock, "T4 adapters", 0, MTX_DEF);
SLIST_INIT(&t4_list);
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
mtx_init(&t4_uld_list_lock, "T4 ULDs", 0, MTX_DEF);
SLIST_INIT(&t4_uld_list);
#endif
@ -5434,7 +5485,7 @@ t4_mod_event(module_t mod, int cmd, void *arg)
break;
case MOD_UNLOAD:
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
mtx_lock(&t4_uld_list_lock);
if (!SLIST_EMPTY(&t4_uld_list)) {
rc = EBUSY;

View File

@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$");
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/kernel.h>
#include <sys/kdb.h>
#include <sys/malloc.h>
#include <sys/queue.h>
#include <sys/taskqueue.h>
@ -51,7 +52,6 @@ __FBSDID("$FreeBSD$");
#include "common/t4_regs.h"
#include "common/t4_regs_values.h"
#include "common/t4_msg.h"
#include "t4_l2t.h"
struct fl_buf_info {
int size;
@ -115,14 +115,14 @@ static int free_mgmtq(struct adapter *);
static int alloc_rxq(struct port_info *, struct sge_rxq *, int, int,
struct sysctl_oid *);
static int free_rxq(struct port_info *, struct sge_rxq *);
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
static int alloc_ofld_rxq(struct port_info *, struct sge_ofld_rxq *, int, int,
struct sysctl_oid *);
static int free_ofld_rxq(struct port_info *, struct sge_ofld_rxq *);
#endif
static int ctrl_eq_alloc(struct adapter *, struct sge_eq *);
static int eth_eq_alloc(struct adapter *, struct port_info *, struct sge_eq *);
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
static int ofld_eq_alloc(struct adapter *, struct port_info *, struct sge_eq *);
#endif
static int alloc_eq(struct adapter *, struct port_info *, struct sge_eq *);
@ -397,7 +397,7 @@ first_vector(struct port_info *pi)
if (i == pi->port_id)
break;
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
if (sc->flags & INTR_DIRECT)
rc += pi->nrxq + pi->nofldrxq;
else
@ -434,7 +434,7 @@ port_intr_iq(struct port_info *pi, int idx)
if (sc->intr_count == 1)
return (&sc->sge.fwq);
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
if (sc->flags & INTR_DIRECT) {
idx %= pi->nrxq + pi->nofldrxq;
@ -475,19 +475,20 @@ t4_setup_port_queues(struct port_info *pi)
struct sge_rxq *rxq;
struct sge_txq *txq;
struct sge_wrq *ctrlq;
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
struct sge_ofld_rxq *ofld_rxq;
struct sge_wrq *ofld_txq;
struct sysctl_oid *oid2 = NULL;
#endif
char name[16];
struct adapter *sc = pi->adapter;
struct sysctl_oid *oid = device_get_sysctl_tree(pi->dev), *oid2 = NULL;
struct sysctl_oid *oid = device_get_sysctl_tree(pi->dev);
struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "rxq", CTLFLAG_RD,
NULL, "rx queues");
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
if (is_offload(sc)) {
oid2 = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ofld_rxq",
CTLFLAG_RD, NULL,
@ -515,7 +516,7 @@ t4_setup_port_queues(struct port_info *pi)
init_fl(&rxq->fl, pi->qsize_rxq / 8, pi->ifp->if_mtu, name);
if (sc->flags & INTR_DIRECT
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
|| (sc->intr_count > 1 && pi->nrxq >= pi->nofldrxq)
#endif
) {
@ -527,7 +528,7 @@ t4_setup_port_queues(struct port_info *pi)
}
}
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
for_each_ofld_rxq(pi, i, ofld_rxq) {
snprintf(name, sizeof(name), "%s ofld_rxq%d-iq",
@ -567,7 +568,7 @@ t4_setup_port_queues(struct port_info *pi)
j++;
}
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
for_each_ofld_rxq(pi, i, ofld_rxq) {
if (ofld_rxq->iq.flags & IQ_INTR)
continue;
@ -603,7 +604,7 @@ t4_setup_port_queues(struct port_info *pi)
j++;
}
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ofld_txq",
CTLFLAG_RD, NULL, "tx queues for offloaded TCP connections");
for_each_ofld_txq(pi, i, ofld_txq) {
@ -655,7 +656,7 @@ t4_teardown_port_queues(struct port_info *pi)
struct adapter *sc = pi->adapter;
struct sge_rxq *rxq;
struct sge_txq *txq;
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
struct sge_ofld_rxq *ofld_rxq;
struct sge_wrq *ofld_txq;
#endif
@ -677,7 +678,7 @@ t4_teardown_port_queues(struct port_info *pi)
free_txq(pi, txq);
}
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
for_each_ofld_txq(pi, i, ofld_txq) {
free_wrq(sc, ofld_txq);
}
@ -693,7 +694,7 @@ t4_teardown_port_queues(struct port_info *pi)
free_rxq(pi, rxq);
}
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
for_each_ofld_rxq(pi, i, ofld_rxq) {
if ((ofld_rxq->iq.flags & IQ_INTR) == 0)
free_ofld_rxq(pi, ofld_rxq);
@ -709,7 +710,7 @@ t4_teardown_port_queues(struct port_info *pi)
free_rxq(pi, rxq);
}
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
for_each_ofld_rxq(pi, i, ofld_rxq) {
if (ofld_rxq->iq.flags & IQ_INTR)
free_ofld_rxq(pi, ofld_rxq);
@ -775,7 +776,7 @@ static int
service_iq(struct sge_iq *iq, int budget)
{
struct sge_iq *q;
struct sge_rxq *rxq = (void *)iq; /* Use iff iq is part of rxq */
struct sge_rxq *rxq = iq_to_rxq(iq); /* Use iff iq is part of rxq */
struct sge_fl *fl = &rxq->fl; /* Use iff IQ_HAS_FL */
struct adapter *sc = iq->adapter;
struct rsp_ctrl *ctrl;
@ -862,7 +863,8 @@ service_iq(struct sge_iq *iq, int budget)
break;
default:
panic("%s: rsp_type %u", __func__, rsp_type);
sc->an_handler(iq, ctrl);
break;
}
iq_next(iq);
@ -1076,42 +1078,33 @@ t4_eth_rx(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0)
return (0);
}
int
t4_mgmt_tx(struct adapter *sc, struct mbuf *m)
{
return t4_wrq_tx(sc, &sc->sge.mgmtq, m);
}
/*
* Doesn't fail. Holds on to work requests it can't send right away.
*/
int
t4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, struct mbuf *m0)
void
t4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, struct wrqe *wr)
{
struct sge_eq *eq = &wrq->eq;
int can_reclaim;
caddr_t dst;
struct mbuf *wr, *next;
TXQ_LOCK_ASSERT_OWNED(wrq);
#ifdef TCP_OFFLOAD
KASSERT((eq->flags & EQ_TYPEMASK) == EQ_OFLD ||
(eq->flags & EQ_TYPEMASK) == EQ_CTRL,
("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK));
#else
KASSERT((eq->flags & EQ_TYPEMASK) == EQ_CTRL,
("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK));
#endif
if (__predict_true(m0 != NULL)) {
if (wrq->head)
wrq->tail->m_nextpkt = m0;
else
wrq->head = m0;
while (m0->m_nextpkt)
m0 = m0->m_nextpkt;
wrq->tail = m0;
}
if (__predict_true(wr != NULL))
STAILQ_INSERT_TAIL(&wrq->wr_list, wr, link);
can_reclaim = reclaimable(eq);
if (__predict_false(eq->flags & EQ_STALLED)) {
if (can_reclaim < tx_resume_threshold(eq))
return (0);
return;
eq->flags &= ~EQ_STALLED;
eq->unstalled++;
}
@ -1120,39 +1113,34 @@ t4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, struct mbuf *m0)
if (__predict_false(eq->cidx >= eq->cap))
eq->cidx -= eq->cap;
for (wr = wrq->head; wr; wr = next) {
while ((wr = STAILQ_FIRST(&wrq->wr_list)) != NULL) {
int ndesc;
struct mbuf *m;
next = wr->m_nextpkt;
wr->m_nextpkt = NULL;
if (__predict_false(wr->wr_len < 0 ||
wr->wr_len > SGE_MAX_WR_LEN || (wr->wr_len & 0x7))) {
M_ASSERTPKTHDR(wr);
KASSERT(wr->m_pkthdr.len > 0 && (wr->m_pkthdr.len & 0x7) == 0,
("%s: work request len %d.", __func__, wr->m_pkthdr.len));
if (wr->m_pkthdr.len > SGE_MAX_WR_LEN) {
#ifdef INVARIANTS
panic("%s: oversized work request", __func__);
#else
log(LOG_ERR, "%s: %s work request too long (%d)",
device_get_nameunit(sc->dev), __func__,
wr->m_pkthdr.len);
m_freem(wr);
continue;
panic("%s: work request with length %d", __func__,
wr->wr_len);
#endif
#ifdef KDB
kdb_backtrace();
#endif
log(LOG_ERR, "%s: %s work request with length %d",
device_get_nameunit(sc->dev), __func__, wr->wr_len);
STAILQ_REMOVE_HEAD(&wrq->wr_list, link);
free_wrqe(wr);
continue;
}
ndesc = howmany(wr->m_pkthdr.len, EQ_ESIZE);
ndesc = howmany(wr->wr_len, EQ_ESIZE);
if (eq->avail < ndesc) {
wr->m_nextpkt = next;
wrq->no_desc++;
break;
}
dst = (void *)&eq->desc[eq->pidx];
for (m = wr; m; m = m->m_next)
copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len);
copy_to_txd(eq, wrtod(wr), &dst, wr->wr_len);
eq->pidx += ndesc;
eq->avail -= ndesc;
@ -1164,7 +1152,8 @@ t4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, struct mbuf *m0)
ring_eq_db(sc, eq);
wrq->tx_wrs++;
m_freem(wr);
STAILQ_REMOVE_HEAD(&wrq->wr_list, link);
free_wrqe(wr);
if (eq->avail < 8) {
can_reclaim = reclaimable(eq);
@ -1178,20 +1167,11 @@ t4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, struct mbuf *m0)
if (eq->pending)
ring_eq_db(sc, eq);
if (wr == NULL)
wrq->head = wrq->tail = NULL;
else {
wrq->head = wr;
KASSERT(wrq->tail->m_nextpkt == NULL,
("%s: wrq->tail grew a tail of its own", __func__));
if (wr != NULL) {
eq->flags |= EQ_STALLED;
if (callout_pending(&eq->tx_callout) == 0)
callout_reset(&eq->tx_callout, 1, t4_tx_callout, eq);
}
return (0);
}
/* Per-packet header in a coalesced tx WR, before the SGL starts (in flits) */
@ -1792,6 +1772,7 @@ alloc_mgmtq(struct adapter *sc)
static int
free_mgmtq(struct adapter *sc)
{
return free_wrq(sc, &sc->sge.mgmtq);
}
@ -1885,7 +1866,7 @@ free_rxq(struct port_info *pi, struct sge_rxq *rxq)
return (rc);
}
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
static int
alloc_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq,
int intr_idx, int idx, struct sysctl_oid *oid)
@ -2031,7 +2012,7 @@ eth_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
return (rc);
}
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
static int
ofld_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
{
@ -2103,7 +2084,7 @@ alloc_eq(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
rc = eth_eq_alloc(sc, pi, eq);
break;
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
case EQ_OFLD:
rc = ofld_eq_alloc(sc, pi, eq);
break;
@ -2141,7 +2122,7 @@ free_eq(struct adapter *sc, struct sge_eq *eq)
eq->cntxt_id);
break;
#ifndef TCP_OFFLOAD_DISABLE
#ifdef TCP_OFFLOAD
case EQ_OFLD:
rc = -t4_ofld_eq_free(sc, sc->mbox, sc->pf, 0,
eq->cntxt_id);
@ -2183,6 +2164,7 @@ alloc_wrq(struct adapter *sc, struct port_info *pi, struct sge_wrq *wrq,
return (rc);
wrq->adapter = sc;
STAILQ_INIT(&wrq->wr_list);
SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD,
&wrq->eq.cntxt_id, 0, "SGE context id of the queue");
@ -3179,7 +3161,7 @@ write_sgl_to_txd(struct sge_eq *eq, struct sgl *sgl, caddr_t *to)
static inline void
copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len)
{
if ((uintptr_t)(*to) + len <= (uintptr_t)eq->spg) {
if (__predict_true((uintptr_t)(*to) + len <= (uintptr_t)eq->spg)) {
bcopy(from, *to, len);
(*to) += len;
} else {

View File

@ -0,0 +1,377 @@
/*-
* Copyright (c) 2012 Chelsio Communications, Inc.
* All rights reserved.
* Written by: Navdeep Parhar <np@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#ifdef TCP_OFFLOAD
#include <sys/param.h>
#include <sys/types.h>
#include <sys/kernel.h>
#include <sys/ktr.h>
#include <sys/module.h>
#include <sys/protosw.h>
#include <sys/domain.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <net/if_types.h>
#include <net/if_vlan_var.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/in_pcb.h>
#include <netinet/ip.h>
#include <netinet/tcp_var.h>
#define TCPSTATES
#include <netinet/tcp_fsm.h>
#include <netinet/toecore.h>
#include "common/common.h"
#include "common/t4_msg.h"
#include "common/t4_regs.h"
#include "tom/t4_tom_l2t.h"
#include "tom/t4_tom.h"
/* atid services */
static int alloc_atid(struct adapter *, void *);
static void *lookup_atid(struct adapter *, int);
static void free_atid(struct adapter *, int);
static int
alloc_atid(struct adapter *sc, void *ctx)
{
struct tid_info *t = &sc->tids;
int atid = -1;
mtx_lock(&t->atid_lock);
if (t->afree) {
union aopen_entry *p = t->afree;
atid = p - t->atid_tab;
t->afree = p->next;
p->data = ctx;
t->atids_in_use++;
}
mtx_unlock(&t->atid_lock);
return (atid);
}
static void *
lookup_atid(struct adapter *sc, int atid)
{
struct tid_info *t = &sc->tids;
return (t->atid_tab[atid].data);
}
static void
free_atid(struct adapter *sc, int atid)
{
struct tid_info *t = &sc->tids;
union aopen_entry *p = &t->atid_tab[atid];
mtx_lock(&t->atid_lock);
p->next = t->afree;
t->afree = p;
t->atids_in_use--;
mtx_unlock(&t->atid_lock);
}
/*
* Active open failed.
*/
static int
do_act_establish(struct sge_iq *iq, const struct rss_header *rss,
struct mbuf *m)
{
struct adapter *sc = iq->adapter;
const struct cpl_act_establish *cpl = (const void *)(rss + 1);
unsigned int tid = GET_TID(cpl);
unsigned int atid = G_TID_TID(ntohl(cpl->tos_atid));
struct toepcb *toep = lookup_atid(sc, atid);
struct inpcb *inp = toep->inp;
KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
KASSERT(toep->tid == atid, ("%s: toep tid/atid mismatch", __func__));
CTR3(KTR_CXGBE, "%s: atid %u, tid %u", __func__, atid, tid);
free_atid(sc, atid);
INP_WLOCK(inp);
toep->tid = tid;
insert_tid(sc, tid, toep);
if (inp->inp_flags & INP_DROPPED) {
/* socket closed by the kernel before hw told us it connected */
send_flowc_wr(toep, NULL);
send_reset(sc, toep, be32toh(cpl->snd_isn));
goto done;
}
make_established(toep, cpl->snd_isn, cpl->rcv_isn, cpl->tcp_opt);
done:
INP_WUNLOCK(inp);
return (0);
}
static inline int
act_open_has_tid(unsigned int status)
{
return (status != CPL_ERR_TCAM_FULL &&
status != CPL_ERR_TCAM_PARITY &&
status != CPL_ERR_CONN_EXIST &&
status != CPL_ERR_ARP_MISS);
}
/*
* Convert an ACT_OPEN_RPL status to an errno.
*/
static inline int
act_open_rpl_status_to_errno(int status)
{
switch (status) {
case CPL_ERR_CONN_RESET:
return (ECONNREFUSED);
case CPL_ERR_ARP_MISS:
return (EHOSTUNREACH);
case CPL_ERR_CONN_TIMEDOUT:
return (ETIMEDOUT);
case CPL_ERR_TCAM_FULL:
return (ENOMEM);
case CPL_ERR_CONN_EXIST:
log(LOG_ERR, "ACTIVE_OPEN_RPL: 4-tuple in use\n");
return (EADDRINUSE);
default:
return (EIO);
}
}
static int
do_act_open_rpl(struct sge_iq *iq, const struct rss_header *rss,
struct mbuf *m)
{
struct adapter *sc = iq->adapter;
const struct cpl_act_open_rpl *cpl = (const void *)(rss + 1);
unsigned int atid = G_TID_TID(G_AOPEN_ATID(be32toh(cpl->atid_status)));
unsigned int status = G_AOPEN_STATUS(be32toh(cpl->atid_status));
struct toepcb *toep = lookup_atid(sc, atid);
struct inpcb *inp = toep->inp;
struct tcpcb *tp = intotcpcb(inp);
struct toedev *tod = &toep->td->tod;
KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
KASSERT(toep->tid == atid, ("%s: toep tid/atid mismatch", __func__));
CTR3(KTR_CXGBE, "%s: atid %u, status %u ", __func__, atid, status);
/* Ignore negative advice */
if (status == CPL_ERR_RTX_NEG_ADVICE)
return (0);
free_atid(sc, atid);
toep->tid = -1;
if (status && act_open_has_tid(status))
release_tid(sc, GET_TID(cpl), toep->ctrlq);
if (status == CPL_ERR_TCAM_FULL) {
INP_WLOCK(inp);
toe_connect_failed(tod, tp, EAGAIN);
final_cpl_received(toep); /* unlocks inp */
} else {
INP_INFO_WLOCK(&V_tcbinfo);
INP_WLOCK(inp);
toe_connect_failed(tod, tp, act_open_rpl_status_to_errno(status));
final_cpl_received(toep); /* unlocks inp */
INP_INFO_WUNLOCK(&V_tcbinfo);
}
return (0);
}
/*
* Options2 for active open.
*/
static uint32_t
calc_opt2a(struct socket *so)
{
struct tcpcb *tp = so_sototcpcb(so);
struct toepcb *toep = tp->t_toe;
struct port_info *pi = toep->port;
struct adapter *sc = pi->adapter;
uint32_t opt2 = 0;
if (tp->t_flags & TF_SACK_PERMIT)
opt2 |= F_SACK_EN;
if (tp->t_flags & TF_REQ_TSTMP)
opt2 |= F_TSTAMPS_EN;
if (tp->t_flags & TF_REQ_SCALE)
opt2 |= F_WND_SCALE_EN;
if (V_tcp_do_ecn)
opt2 |= F_CCTRL_ECN;
opt2 |= V_TX_QUEUE(sc->params.tp.tx_modq[pi->tx_chan]);
opt2 |= F_RX_COALESCE_VALID | V_RX_COALESCE(M_RX_COALESCE);
opt2 |= F_RSS_QUEUE_VALID | V_RSS_QUEUE(toep->ofld_rxq->iq.abs_id);
return (htobe32(opt2));
}
void
t4_init_connect_cpl_handlers(struct adapter *sc)
{
t4_register_cpl_handler(sc, CPL_ACT_ESTABLISH, do_act_establish);
t4_register_cpl_handler(sc, CPL_ACT_OPEN_RPL, do_act_open_rpl);
}
/*
* active open (soconnect).
*
* State of affairs on entry:
* soisconnecting (so_state |= SS_ISCONNECTING)
* tcbinfo not locked (This has changed - used to be WLOCKed)
* inp WLOCKed
* tp->t_state = TCPS_SYN_SENT
* rtalloc1, RT_UNLOCK on rt.
*/
int
t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt,
struct sockaddr *nam)
{
struct adapter *sc = tod->tod_softc;
struct toepcb *toep = NULL;
struct wrqe *wr = NULL;
struct cpl_act_open_req *cpl;
struct l2t_entry *e = NULL;
struct ifnet *rt_ifp = rt->rt_ifp;
struct port_info *pi;
int atid = -1, mtu_idx, rscale, qid_atid, rc = ENOMEM;
struct inpcb *inp = sotoinpcb(so);
struct tcpcb *tp = intotcpcb(inp);
INP_WLOCK_ASSERT(inp);
if (nam->sa_family != AF_INET)
CXGBE_UNIMPLEMENTED("IPv6 connect");
if (rt_ifp->if_type == IFT_ETHER)
pi = rt_ifp->if_softc;
else if (rt_ifp->if_type == IFT_L2VLAN) {
struct ifnet *ifp = VLAN_COOKIE(rt_ifp);
pi = ifp->if_softc;
} else if (rt_ifp->if_type == IFT_IEEE8023ADLAG)
return (ENOSYS); /* XXX: implement lagg support */
else
return (ENOTSUP);
toep = alloc_toepcb(pi, -1, -1, M_NOWAIT);
if (toep == NULL)
goto failed;
atid = alloc_atid(sc, toep);
if (atid < 0)
goto failed;
e = t4_l2t_get(pi, rt_ifp,
rt->rt_flags & RTF_GATEWAY ? rt->rt_gateway : nam);
if (e == NULL)
goto failed;
wr = alloc_wrqe(sizeof(*cpl), toep->ctrlq);
if (wr == NULL)
goto failed;
cpl = wrtod(wr);
toep->tid = atid;
toep->l2te = e;
toep->ulp_mode = ULP_MODE_NONE;
SOCKBUF_LOCK(&so->so_rcv);
/* opt0 rcv_bufsiz initially, assumes its normal meaning later */
toep->rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ);
SOCKBUF_UNLOCK(&so->so_rcv);
offload_socket(so, toep);
/*
* The kernel sets request_r_scale based on sb_max whereas we need to
* take hardware's MAX_RCV_WND into account too. This is normally a
* no-op as MAX_RCV_WND is much larger than the default sb_max.
*/
if (tp->t_flags & TF_REQ_SCALE)
rscale = tp->request_r_scale = select_rcv_wscale();
else
rscale = 0;
mtu_idx = find_best_mtu_idx(sc, &inp->inp_inc, 0);
qid_atid = (toep->ofld_rxq->iq.abs_id << 14) | atid;
INIT_TP_WR(cpl, 0);
OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, qid_atid));
inp_4tuple_get(inp, &cpl->local_ip, &cpl->local_port, &cpl->peer_ip,
&cpl->peer_port);
cpl->opt0 = calc_opt0(so, pi, e, mtu_idx, rscale, toep->rx_credits,
toep->ulp_mode);
cpl->params = select_ntuple(pi, e, sc->filter_mode);
cpl->opt2 = calc_opt2a(so);
CTR5(KTR_CXGBE, "%s: atid %u (%s), toep %p, inp %p", __func__,
toep->tid, tcpstates[tp->t_state], toep, inp);
rc = t4_l2t_send(sc, wr, e);
if (rc == 0) {
toepcb_set_flag(toep, TPF_CPL_PENDING);
return (0);
}
undo_offload_socket(so);
failed:
CTR5(KTR_CXGBE, "%s: FAILED, atid %d, toep %p, l2te %p, wr %p",
__func__, atid, toep, e, wr);
if (e)
t4_l2t_release(e);
if (wr)
free_wrqe(wr);
if (atid >= 0)
free_atid(sc, atid);
if (toep)
free_toepcb(toep);
return (rc);
}
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

755
sys/dev/cxgbe/tom/t4_tom.c Normal file
View File

@ -0,0 +1,755 @@
/*-
* Copyright (c) 2012 Chelsio Communications, Inc.
* All rights reserved.
* Written by: Navdeep Parhar <np@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include <sys/param.h>
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/ktr.h>
#include <sys/module.h>
#include <sys/protosw.h>
#include <sys/domain.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <netinet/in.h>
#include <netinet/in_pcb.h>
#include <netinet/ip.h>
#include <netinet/tcp_var.h>
#define TCPSTATES
#include <netinet/tcp_fsm.h>
#include <netinet/toecore.h>
#ifdef TCP_OFFLOAD
#include "common/common.h"
#include "common/t4_msg.h"
#include "common/t4_regs.h"
#include "tom/t4_tom_l2t.h"
#include "tom/t4_tom.h"
/* Module ops */
static int t4_tom_mod_load(void);
static int t4_tom_mod_unload(void);
static int t4_tom_modevent(module_t, int, void *);
/* ULD ops and helpers */
static int t4_tom_activate(struct adapter *);
static int t4_tom_deactivate(struct adapter *);
static struct uld_info tom_uld_info = {
.uld_id = ULD_TOM,
.activate = t4_tom_activate,
.deactivate = t4_tom_deactivate,
};
static void queue_tid_release(struct adapter *, int);
static void release_offload_resources(struct toepcb *);
static int alloc_tid_tabs(struct tid_info *);
static void free_tid_tabs(struct tid_info *);
static void free_tom_data(struct adapter *, struct tom_data *);
struct toepcb *
alloc_toepcb(struct port_info *pi, int txqid, int rxqid, int flags)
{
struct adapter *sc = pi->adapter;
struct toepcb *toep;
int tx_credits, txsd_total, len;
/*
* The firmware counts tx work request credits in units of 16 bytes
* each. Reserve room for an ABORT_REQ so the driver never has to worry
* about tx credits if it wants to abort a connection.
*/
tx_credits = sc->params.ofldq_wr_cred;
tx_credits -= howmany(sizeof(struct cpl_abort_req), 16);
/*
* Shortest possible tx work request is a fw_ofld_tx_data_wr + 1 byte
* immediate payload, and firmware counts tx work request credits in
* units of 16 byte. Calculate the maximum work requests possible.
*/
txsd_total = tx_credits /
howmany((sizeof(struct fw_ofld_tx_data_wr) + 1), 16);
if (txqid < 0)
txqid = (arc4random() % pi->nofldtxq) + pi->first_ofld_txq;
KASSERT(txqid >= pi->first_ofld_txq &&
txqid < pi->first_ofld_txq + pi->nofldtxq,
("%s: txqid %d for port %p (first %d, n %d)", __func__, txqid, pi,
pi->first_ofld_txq, pi->nofldtxq));
if (rxqid < 0)
rxqid = (arc4random() % pi->nofldrxq) + pi->first_ofld_rxq;
KASSERT(rxqid >= pi->first_ofld_rxq &&
rxqid < pi->first_ofld_rxq + pi->nofldrxq,
("%s: rxqid %d for port %p (first %d, n %d)", __func__, rxqid, pi,
pi->first_ofld_rxq, pi->nofldrxq));
len = offsetof(struct toepcb, txsd) +
txsd_total * sizeof(struct ofld_tx_sdesc);
toep = malloc(len, M_CXGBE, M_ZERO | flags);
if (toep == NULL)
return (NULL);
toep->td = sc->tom_softc;
toep->port = pi;
toep->tx_credits = tx_credits;
toep->ofld_txq = &sc->sge.ofld_txq[txqid];
toep->ofld_rxq = &sc->sge.ofld_rxq[rxqid];
toep->ctrlq = &sc->sge.ctrlq[pi->port_id];
toep->txsd_total = txsd_total;
toep->txsd_avail = txsd_total;
toep->txsd_pidx = 0;
toep->txsd_cidx = 0;
return (toep);
}
void
free_toepcb(struct toepcb *toep)
{
KASSERT(toepcb_flag(toep, TPF_ATTACHED) == 0,
("%s: attached to an inpcb", __func__));
KASSERT(toepcb_flag(toep, TPF_CPL_PENDING) == 0,
("%s: CPL pending", __func__));
free(toep, M_CXGBE);
}
/*
* Set up the socket for TCP offload.
*/
void
offload_socket(struct socket *so, struct toepcb *toep)
{
struct tom_data *td = toep->td;
struct inpcb *inp = sotoinpcb(so);
struct tcpcb *tp = intotcpcb(inp);
struct sockbuf *sb;
INP_WLOCK_ASSERT(inp);
/* Update socket */
sb = &so->so_snd;
SOCKBUF_LOCK(sb);
sb->sb_flags |= SB_NOCOALESCE;
SOCKBUF_UNLOCK(sb);
sb = &so->so_rcv;
SOCKBUF_LOCK(sb);
sb->sb_flags |= SB_NOCOALESCE;
SOCKBUF_UNLOCK(sb);
/* Update TCP PCB */
tp->tod = &td->tod;
tp->t_toe = toep;
tp->t_flags |= TF_TOE;
/* Install an extra hold on inp */
toep->inp = inp;
toepcb_set_flag(toep, TPF_ATTACHED);
in_pcbref(inp);
/* Add the TOE PCB to the active list */
mtx_lock(&td->toep_list_lock);
TAILQ_INSERT_HEAD(&td->toep_list, toep, link);
mtx_unlock(&td->toep_list_lock);
}
/* This is _not_ the normal way to "unoffload" a socket. */
void
undo_offload_socket(struct socket *so)
{
struct inpcb *inp = sotoinpcb(so);
struct tcpcb *tp = intotcpcb(inp);
struct toepcb *toep = tp->t_toe;
struct tom_data *td = toep->td;
struct sockbuf *sb;
INP_WLOCK_ASSERT(inp);
sb = &so->so_snd;
SOCKBUF_LOCK(sb);
sb->sb_flags &= ~SB_NOCOALESCE;
SOCKBUF_UNLOCK(sb);
sb = &so->so_rcv;
SOCKBUF_LOCK(sb);
sb->sb_flags &= ~SB_NOCOALESCE;
SOCKBUF_UNLOCK(sb);
tp->tod = NULL;
tp->t_toe = NULL;
tp->t_flags &= ~TF_TOE;
toep->inp = NULL;
toepcb_clr_flag(toep, TPF_ATTACHED);
if (in_pcbrele_wlocked(inp))
panic("%s: inp freed.", __func__);
mtx_lock(&td->toep_list_lock);
TAILQ_REMOVE(&td->toep_list, toep, link);
mtx_unlock(&td->toep_list_lock);
}
static void
release_offload_resources(struct toepcb *toep)
{
struct tom_data *td = toep->td;
struct adapter *sc = td_adapter(td);
int tid = toep->tid;
KASSERT(toepcb_flag(toep, TPF_CPL_PENDING) == 0,
("%s: %p has CPL pending.", __func__, toep));
KASSERT(toepcb_flag(toep, TPF_ATTACHED) == 0,
("%s: %p is still attached.", __func__, toep));
CTR4(KTR_CXGBE, "%s: toep %p (tid %d, l2te %p)",
__func__, toep, tid, toep->l2te);
if (toep->l2te)
t4_l2t_release(toep->l2te);
if (tid >= 0) {
remove_tid(sc, tid);
release_tid(sc, tid, toep->ctrlq);
}
mtx_lock(&td->toep_list_lock);
TAILQ_REMOVE(&td->toep_list, toep, link);
mtx_unlock(&td->toep_list_lock);
free_toepcb(toep);
}
/*
* The kernel is done with the TCP PCB and this is our opportunity to unhook the
* toepcb hanging off of it. If the TOE driver is also done with the toepcb (no
* pending CPL) then it is time to release all resources tied to the toepcb.
*
* Also gets called when an offloaded active open fails and the TOM wants the
* kernel to take the TCP PCB back.
*/
static void
t4_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp)
{
#if defined(KTR) || defined(INVARIANTS)
struct inpcb *inp = tp->t_inpcb;
#endif
struct toepcb *toep = tp->t_toe;
INP_WLOCK_ASSERT(inp);
KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
KASSERT(toepcb_flag(toep, TPF_ATTACHED),
("%s: not attached", __func__));
#ifdef KTR
if (tp->t_state == TCPS_SYN_SENT) {
CTR6(KTR_CXGBE, "%s: atid %d, toep %p (0x%x), inp %p (0x%x)",
__func__, toep->tid, toep, toep->flags, inp,
inp->inp_flags);
} else {
CTR6(KTR_CXGBE,
"t4_pcb_detach: tid %d (%s), toep %p (0x%x), inp %p (0x%x)",
toep->tid, tcpstates[tp->t_state], toep, toep->flags, inp,
inp->inp_flags);
}
#endif
tp->t_toe = NULL;
tp->t_flags &= ~TF_TOE;
toepcb_clr_flag(toep, TPF_ATTACHED);
if (toepcb_flag(toep, TPF_CPL_PENDING) == 0)
release_offload_resources(toep);
}
/*
* The TOE driver will not receive any more CPLs for the tid associated with the
* toepcb; release the hold on the inpcb.
*/
void
final_cpl_received(struct toepcb *toep)
{
struct inpcb *inp = toep->inp;
KASSERT(inp != NULL, ("%s: inp is NULL", __func__));
INP_WLOCK_ASSERT(inp);
KASSERT(toepcb_flag(toep, TPF_CPL_PENDING),
("%s: CPL not pending already?", __func__));
CTR6(KTR_CXGBE, "%s: tid %d, toep %p (0x%x), inp %p (0x%x)",
__func__, toep->tid, toep, toep->flags, inp, inp->inp_flags);
toep->inp = NULL;
toepcb_clr_flag(toep, TPF_CPL_PENDING);
if (toepcb_flag(toep, TPF_ATTACHED) == 0)
release_offload_resources(toep);
if (!in_pcbrele_wlocked(inp))
INP_WUNLOCK(inp);
}
void
insert_tid(struct adapter *sc, int tid, void *ctx)
{
struct tid_info *t = &sc->tids;
t->tid_tab[tid] = ctx;
atomic_add_int(&t->tids_in_use, 1);
}
void *
lookup_tid(struct adapter *sc, int tid)
{
struct tid_info *t = &sc->tids;
return (t->tid_tab[tid]);
}
void
update_tid(struct adapter *sc, int tid, void *ctx)
{
struct tid_info *t = &sc->tids;
t->tid_tab[tid] = ctx;
}
void
remove_tid(struct adapter *sc, int tid)
{
struct tid_info *t = &sc->tids;
t->tid_tab[tid] = NULL;
atomic_subtract_int(&t->tids_in_use, 1);
}
void
release_tid(struct adapter *sc, int tid, struct sge_wrq *ctrlq)
{
struct wrqe *wr;
struct cpl_tid_release *req;
wr = alloc_wrqe(sizeof(*req), ctrlq);
if (wr == NULL) {
queue_tid_release(sc, tid); /* defer */
return;
}
req = wrtod(wr);
INIT_TP_WR_MIT_CPL(req, CPL_TID_RELEASE, tid);
t4_wrq_tx(sc, wr);
}
static void
queue_tid_release(struct adapter *sc, int tid)
{
CXGBE_UNIMPLEMENTED("deferred tid release");
}
/*
* What mtu_idx to use, given a 4-tuple and/or an MSS cap
*/
int
find_best_mtu_idx(struct adapter *sc, struct in_conninfo *inc, int pmss)
{
unsigned short *mtus = &sc->params.mtus[0];
int i = 0, mss;
KASSERT(inc != NULL || pmss > 0,
("%s: at least one of inc/pmss must be specified", __func__));
mss = inc ? tcp_mssopt(inc) : pmss;
if (pmss > 0 && mss > pmss)
mss = pmss;
while (i < NMTUS - 1 && mtus[i + 1] <= mss + 40)
++i;
return (i);
}
/*
* Determine the receive window size for a socket.
*/
u_long
select_rcv_wnd(struct socket *so)
{
unsigned long wnd;
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
wnd = sbspace(&so->so_rcv);
if (wnd < MIN_RCV_WND)
wnd = MIN_RCV_WND;
return min(wnd, MAX_RCV_WND);
}
int
select_rcv_wscale(void)
{
int wscale = 0;
unsigned long space = sb_max;
if (space > MAX_RCV_WND)
space = MAX_RCV_WND;
while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < space)
wscale++;
return (wscale);
}
extern int always_keepalive;
#define VIID_SMACIDX(v) (((unsigned int)(v) & 0x7f) << 1)
/*
* socket so could be a listening socket too.
*/
uint64_t
calc_opt0(struct socket *so, struct port_info *pi, struct l2t_entry *e,
int mtu_idx, int rscale, int rx_credits, int ulp_mode)
{
uint64_t opt0;
KASSERT(rx_credits <= M_RCV_BUFSIZ,
("%s: rcv_bufsiz too high", __func__));
opt0 = F_TCAM_BYPASS | V_WND_SCALE(rscale) | V_MSS_IDX(mtu_idx) |
V_ULP_MODE(ulp_mode) | V_RCV_BUFSIZ(rx_credits);
if (so != NULL) {
struct inpcb *inp = sotoinpcb(so);
struct tcpcb *tp = intotcpcb(inp);
int keepalive = always_keepalive ||
so_options_get(so) & SO_KEEPALIVE;
opt0 |= V_NAGLE((tp->t_flags & TF_NODELAY) == 0);
opt0 |= V_KEEP_ALIVE(keepalive != 0);
}
if (e != NULL)
opt0 |= V_L2T_IDX(e->idx);
if (pi != NULL) {
opt0 |= V_SMAC_SEL(VIID_SMACIDX(pi->viid));
opt0 |= V_TX_CHAN(pi->tx_chan);
}
return htobe64(opt0);
}
#define FILTER_SEL_WIDTH_P_FC (3 + 1)
#define FILTER_SEL_WIDTH_VIN_P_FC (6 + 7 + FILTER_SEL_WIDTH_P_FC)
#define FILTER_SEL_WIDTH_TAG_P_FC (3 + FILTER_SEL_WIDTH_VIN_P_FC)
#define FILTER_SEL_WIDTH_VLD_TAG_P_FC (1 + FILTER_SEL_WIDTH_TAG_P_FC)
#define VLAN_NONE 0xfff
#define FILTER_SEL_VLAN_NONE 0xffff
uint32_t
select_ntuple(struct port_info *pi, struct l2t_entry *e, uint32_t filter_mode)
{
uint16_t viid = pi->viid;
uint32_t ntuple = 0;
if (filter_mode == HW_TPL_FR_MT_PR_IV_P_FC) {
if (e->vlan == VLAN_NONE)
ntuple |= FILTER_SEL_VLAN_NONE << FILTER_SEL_WIDTH_P_FC;
else {
ntuple |= e->vlan << FILTER_SEL_WIDTH_P_FC;
ntuple |= 1 << FILTER_SEL_WIDTH_VLD_TAG_P_FC;
}
ntuple |= e->lport << S_PORT;
ntuple |= IPPROTO_TCP << FILTER_SEL_WIDTH_VLD_TAG_P_FC;
} else if (filter_mode == HW_TPL_FR_MT_PR_OV_P_FC) {
ntuple |= G_FW_VIID_VIN(viid) << FILTER_SEL_WIDTH_P_FC;
ntuple |= G_FW_VIID_PFN(viid) << FILTER_SEL_WIDTH_VIN_P_FC;
ntuple |= G_FW_VIID_VIVLD(viid) << FILTER_SEL_WIDTH_TAG_P_FC;
ntuple |= e->lport << S_PORT;
ntuple |= IPPROTO_TCP << FILTER_SEL_WIDTH_VLD_TAG_P_FC;
}
return (htobe32(ntuple));
}
static int
alloc_tid_tabs(struct tid_info *t)
{
size_t size;
unsigned int i;
size = t->ntids * sizeof(*t->tid_tab) +
t->natids * sizeof(*t->atid_tab) +
t->nstids * sizeof(*t->stid_tab);
t->tid_tab = malloc(size, M_CXGBE, M_ZERO | M_NOWAIT);
if (t->tid_tab == NULL)
return (ENOMEM);
mtx_init(&t->atid_lock, "atid lock", NULL, MTX_DEF);
t->atid_tab = (union aopen_entry *)&t->tid_tab[t->ntids];
t->afree = t->atid_tab;
t->atids_in_use = 0;
for (i = 1; i < t->natids; i++)
t->atid_tab[i - 1].next = &t->atid_tab[i];
t->atid_tab[t->natids - 1].next = NULL;
mtx_init(&t->stid_lock, "stid lock", NULL, MTX_DEF);
t->stid_tab = (union serv_entry *)&t->atid_tab[t->natids];
t->sfree = t->stid_tab;
t->stids_in_use = 0;
for (i = 1; i < t->nstids; i++)
t->stid_tab[i - 1].next = &t->stid_tab[i];
t->stid_tab[t->nstids - 1].next = NULL;
atomic_store_rel_int(&t->tids_in_use, 0);
return (0);
}
static void
free_tid_tabs(struct tid_info *t)
{
KASSERT(t->tids_in_use == 0,
("%s: %d tids still in use.", __func__, t->tids_in_use));
KASSERT(t->atids_in_use == 0,
("%s: %d atids still in use.", __func__, t->atids_in_use));
KASSERT(t->stids_in_use == 0,
("%s: %d tids still in use.", __func__, t->stids_in_use));
free(t->tid_tab, M_CXGBE);
t->tid_tab = NULL;
if (mtx_initialized(&t->atid_lock))
mtx_destroy(&t->atid_lock);
if (mtx_initialized(&t->stid_lock))
mtx_destroy(&t->stid_lock);
}
static void
free_tom_data(struct adapter *sc, struct tom_data *td)
{
KASSERT(TAILQ_EMPTY(&td->toep_list),
("%s: TOE PCB list is not empty.", __func__));
KASSERT(td->lctx_count == 0,
("%s: lctx hash table is not empty.", __func__));
t4_uninit_l2t_cpl_handlers(sc);
if (td->listen_mask != 0)
hashdestroy(td->listen_hash, M_CXGBE, td->listen_mask);
if (mtx_initialized(&td->lctx_hash_lock))
mtx_destroy(&td->lctx_hash_lock);
if (mtx_initialized(&td->toep_list_lock))
mtx_destroy(&td->toep_list_lock);
free_tid_tabs(&sc->tids);
free(td, M_CXGBE);
}
/*
* Ground control to Major TOM
* Commencing countdown, engines on
*/
static int
t4_tom_activate(struct adapter *sc)
{
struct tom_data *td;
struct toedev *tod;
int i, rc;
ADAPTER_LOCK_ASSERT_OWNED(sc); /* for sc->flags */
/* per-adapter softc for TOM */
td = malloc(sizeof(*td), M_CXGBE, M_ZERO | M_NOWAIT);
if (td == NULL)
return (ENOMEM);
/* List of TOE PCBs and associated lock */
mtx_init(&td->toep_list_lock, "PCB list lock", NULL, MTX_DEF);
TAILQ_INIT(&td->toep_list);
/* Listen context */
mtx_init(&td->lctx_hash_lock, "lctx hash lock", NULL, MTX_DEF);
td->listen_hash = hashinit_flags(LISTEN_HASH_SIZE, M_CXGBE,
&td->listen_mask, HASH_NOWAIT);
/* TID tables */
rc = alloc_tid_tabs(&sc->tids);
if (rc != 0)
goto done;
/* CPL handlers */
t4_init_connect_cpl_handlers(sc);
t4_init_l2t_cpl_handlers(sc);
t4_init_listen_cpl_handlers(sc);
t4_init_cpl_io_handlers(sc);
/* toedev ops */
tod = &td->tod;
init_toedev(tod);
tod->tod_softc = sc;
tod->tod_connect = t4_connect;
tod->tod_listen_start = t4_listen_start;
tod->tod_listen_stop = t4_listen_stop;
tod->tod_rcvd = t4_rcvd;
tod->tod_output = t4_tod_output;
tod->tod_send_rst = t4_send_rst;
tod->tod_send_fin = t4_send_fin;
tod->tod_pcb_detach = t4_pcb_detach;
tod->tod_l2_update = t4_l2_update;
tod->tod_syncache_added = t4_syncache_added;
tod->tod_syncache_removed = t4_syncache_removed;
tod->tod_syncache_respond = t4_syncache_respond;
tod->tod_offload_socket = t4_offload_socket;
for_each_port(sc, i)
TOEDEV(sc->port[i]->ifp) = &td->tod;
sc->tom_softc = td;
sc->flags |= TOM_INIT_DONE;
register_toedev(sc->tom_softc);
done:
if (rc != 0)
free_tom_data(sc, td);
return (rc);
}
static int
t4_tom_deactivate(struct adapter *sc)
{
int rc = 0;
struct tom_data *td = sc->tom_softc;
ADAPTER_LOCK_ASSERT_OWNED(sc); /* for sc->flags */
if (td == NULL)
return (0); /* XXX. KASSERT? */
if (sc->offload_map != 0)
return (EBUSY); /* at least one port has IFCAP_TOE enabled */
mtx_lock(&td->toep_list_lock);
if (!TAILQ_EMPTY(&td->toep_list))
rc = EBUSY;
mtx_unlock(&td->toep_list_lock);
mtx_lock(&td->lctx_hash_lock);
if (td->lctx_count > 0)
rc = EBUSY;
mtx_unlock(&td->lctx_hash_lock);
if (rc == 0) {
unregister_toedev(sc->tom_softc);
free_tom_data(sc, td);
sc->tom_softc = NULL;
sc->flags &= ~TOM_INIT_DONE;
}
return (rc);
}
static int
t4_tom_mod_load(void)
{
int rc;
rc = t4_register_uld(&tom_uld_info);
if (rc != 0)
t4_tom_mod_unload();
return (rc);
}
static void
tom_uninit(struct adapter *sc, void *arg __unused)
{
/* Try to free resources (works only if no port has IFCAP_TOE) */
ADAPTER_LOCK(sc);
if (sc->flags & TOM_INIT_DONE)
t4_deactivate_uld(sc, ULD_TOM);
ADAPTER_UNLOCK(sc);
}
static int
t4_tom_mod_unload(void)
{
t4_iterate(tom_uninit, NULL);
if (t4_unregister_uld(&tom_uld_info) == EBUSY)
return (EBUSY);
return (0);
}
#endif /* TCP_OFFLOAD */
static int
t4_tom_modevent(module_t mod, int cmd, void *arg)
{
int rc = 0;
#ifdef TCP_OFFLOAD
switch (cmd) {
case MOD_LOAD:
rc = t4_tom_mod_load();
break;
case MOD_UNLOAD:
rc = t4_tom_mod_unload();
break;
default:
rc = EINVAL;
}
#else
printf("t4_tom: compiled without TCP_OFFLOAD support.\n");
rc = EOPNOTSUPP;
#endif
return (rc);
}
static moduledata_t t4_tom_moddata= {
"t4_tom",
t4_tom_modevent,
0
};
MODULE_VERSION(t4_tom, 1);
MODULE_DEPEND(t4_tom, toecore, 1, 1, 1);
MODULE_DEPEND(t4_tom, t4nex, 1, 1, 1);
DECLARE_MODULE(t4_tom, t4_tom_moddata, SI_SUB_EXEC, SI_ORDER_ANY);

248
sys/dev/cxgbe/tom/t4_tom.h Normal file
View File

@ -0,0 +1,248 @@
/*-
* Copyright (c) 2012 Chelsio Communications, Inc.
* All rights reserved.
* Written by: Navdeep Parhar <np@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*
*/
#ifndef __T4_TOM_H__
#define __T4_TOM_H__
#define KTR_CXGBE KTR_SPARE3
#define LISTEN_HASH_SIZE 32
/*
* Min receive window. We want it to be large enough to accommodate receive
* coalescing, handle jumbo frames, and not trigger sender SWS avoidance.
*/
#define MIN_RCV_WND (24 * 1024U)
/*
* Max receive window supported by HW in bytes. Only a small part of it can
* be set through option0, the rest needs to be set through RX_DATA_ACK.
*/
#define MAX_RCV_WND ((1U << 27) - 1)
/* TOE PCB flags */
enum {
TPF_ATTACHED, /* a tcpcb refers to this toepcb */
TPF_FLOWC_WR_SENT, /* firmware flow context WR sent */
TPF_TX_DATA_SENT, /* some data sent */
TPF_TX_SUSPENDED, /* tx suspended for lack of resources */
TPF_SEND_FIN, /* send FIN after sending all pending data */
TPF_FIN_SENT, /* FIN has been sent */
TPF_ABORT_SHUTDOWN, /* connection abort is in progress */
TPF_CPL_PENDING, /* haven't received the last CPL */
TPF_SYNQE, /* synq_entry, not really a toepcb */
TPF_SYNQE_NEEDFREE, /* synq_entry was allocated externally */
};
struct ofld_tx_sdesc {
uint32_t plen; /* payload length */
uint8_t tx_credits; /* firmware tx credits (unit is 16B) */
};
struct toepcb {
TAILQ_ENTRY(toepcb) link; /* toep_list */
unsigned int flags; /* miscellaneous flags */
struct tom_data *td;
struct inpcb *inp; /* backpointer to host stack's PCB */
struct port_info *port; /* physical port */
struct sge_wrq *ofld_txq;
struct sge_ofld_rxq *ofld_rxq;
struct sge_wrq *ctrlq;
struct l2t_entry *l2te; /* L2 table entry used by this connection */
int tid; /* Connection identifier */
unsigned int tx_credits;/* tx WR credits (in 16 byte units) remaining */
unsigned int enqueued; /* # of bytes added to so_rcv (not yet read) */
int rx_credits; /* rx credits (in bytes) to be returned to hw */
unsigned int ulp_mode; /* ULP mode */
/* Tx software descriptor */
uint8_t txsd_total;
uint8_t txsd_pidx;
uint8_t txsd_cidx;
uint8_t txsd_avail;
struct ofld_tx_sdesc txsd[];
};
struct flowc_tx_params {
uint32_t snd_nxt;
uint32_t rcv_nxt;
unsigned int snd_space;
unsigned int mss;
};
static inline int
toepcb_flag(struct toepcb *toep, int flag)
{
return isset(&toep->flags, flag);
}
static inline void
toepcb_set_flag(struct toepcb *toep, int flag)
{
setbit(&toep->flags, flag);
}
static inline void
toepcb_clr_flag(struct toepcb *toep, int flag)
{
clrbit(&toep->flags, flag);
}
/*
* Compressed state for embryonic connections for a listener. Barely fits in
* 64B, try not to grow it further.
*/
struct synq_entry {
TAILQ_ENTRY(synq_entry) link; /* listen_ctx's synq link */
int flags; /* same as toepcb's tp_flags */
int tid;
struct listen_ctx *lctx; /* backpointer to listen ctx */
struct mbuf *syn;
uint32_t iss;
uint32_t ts;
volatile uintptr_t wr;
volatile u_int refcnt;
uint16_t l2e_idx;
uint16_t rcv_bufsize;
};
static inline int
synqe_flag(struct synq_entry *synqe, int flag)
{
return isset(&synqe->flags, flag);
}
static inline void
synqe_set_flag(struct synq_entry *synqe, int flag)
{
setbit(&synqe->flags, flag);
}
static inline void
synqe_clr_flag(struct synq_entry *synqe, int flag)
{
clrbit(&synqe->flags, flag);
}
/* listen_ctx flags */
#define LCTX_RPL_PENDING 1 /* waiting for a CPL_PASS_OPEN_RPL */
struct listen_ctx {
LIST_ENTRY(listen_ctx) link; /* listen hash linkage */
volatile int refcount;
int stid;
int flags;
struct inpcb *inp; /* listening socket's inp */
struct sge_wrq *ctrlq;
struct sge_ofld_rxq *ofld_rxq;
TAILQ_HEAD(, synq_entry) synq;
};
struct tom_data {
struct toedev tod;
/* toepcb's associated with this TOE device */
struct mtx toep_list_lock;
TAILQ_HEAD(, toepcb) toep_list;
LIST_HEAD(, listen_ctx) *listen_hash;
u_long listen_mask;
int lctx_count; /* # of lctx in the hash table */
struct mtx lctx_hash_lock;
};
static inline struct tom_data *
tod_td(struct toedev *tod)
{
return (member2struct(tom_data, tod, tod));
}
static inline struct adapter *
td_adapter(struct tom_data *td)
{
return (td->tod.tod_softc);
}
/* t4_tom.c */
struct toepcb *alloc_toepcb(struct port_info *, int, int, int);
void free_toepcb(struct toepcb *);
void offload_socket(struct socket *, struct toepcb *);
void undo_offload_socket(struct socket *);
void final_cpl_received(struct toepcb *);
void insert_tid(struct adapter *, int, void *);
void *lookup_tid(struct adapter *, int);
void update_tid(struct adapter *, int, void *);
void remove_tid(struct adapter *, int);
void release_tid(struct adapter *, int, struct sge_wrq *);
int find_best_mtu_idx(struct adapter *, struct in_conninfo *, int);
u_long select_rcv_wnd(struct socket *);
int select_rcv_wscale(void);
uint64_t calc_opt0(struct socket *, struct port_info *, struct l2t_entry *,
int, int, int, int);
uint32_t select_ntuple(struct port_info *, struct l2t_entry *, uint32_t);
/* t4_connect.c */
void t4_init_connect_cpl_handlers(struct adapter *);
int t4_connect(struct toedev *, struct socket *, struct rtentry *,
struct sockaddr *);
/* t4_listen.c */
void t4_init_listen_cpl_handlers(struct adapter *);
int t4_listen_start(struct toedev *, struct tcpcb *);
int t4_listen_stop(struct toedev *, struct tcpcb *);
void t4_syncache_added(struct toedev *, void *);
void t4_syncache_removed(struct toedev *, void *);
int t4_syncache_respond(struct toedev *, void *, struct mbuf *);
int do_abort_req_synqe(struct sge_iq *, const struct rss_header *,
struct mbuf *);
int do_abort_rpl_synqe(struct sge_iq *, const struct rss_header *,
struct mbuf *);
void t4_offload_socket(struct toedev *, void *, struct socket *);
/* t4_cpl_io.c */
void t4_init_cpl_io_handlers(struct adapter *);
void send_abort_rpl(struct adapter *, struct sge_wrq *, int , int);
void send_flowc_wr(struct toepcb *, struct flowc_tx_params *);
void send_reset(struct adapter *, struct toepcb *, uint32_t);
void make_established(struct toepcb *, uint32_t, uint32_t, uint16_t);
void t4_rcvd(struct toedev *, struct tcpcb *);
int t4_tod_output(struct toedev *, struct tcpcb *);
int t4_send_fin(struct toedev *, struct tcpcb *);
int t4_send_rst(struct toedev *, struct tcpcb *);
#endif

View File

@ -0,0 +1,405 @@
/*-
* Copyright (c) 2012 Chelsio Communications, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#ifdef TCP_OFFLOAD
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/module.h>
#include <sys/bus.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/rwlock.h>
#include <sys/socket.h>
#include <sys/sbuf.h>
#include <net/if.h>
#include <net/if_types.h>
#include <net/ethernet.h>
#include <net/if_vlan_var.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/toecore.h>
#include "common/common.h"
#include "common/jhash.h"
#include "common/t4_msg.h"
#include "tom/t4_tom_l2t.h"
#include "tom/t4_tom.h"
#define VLAN_NONE 0xfff
#define SA(x) ((struct sockaddr *)(x))
#define SIN(x) ((struct sockaddr_in *)(x))
#define SINADDR(x) (SIN(x)->sin_addr.s_addr)
static inline void
l2t_hold(struct l2t_data *d, struct l2t_entry *e)
{
if (atomic_fetchadd_int(&e->refcnt, 1) == 0) /* 0 -> 1 transition */
atomic_subtract_int(&d->nfree, 1);
}
static inline unsigned int
arp_hash(const uint32_t key, int ifindex)
{
return jhash_2words(key, ifindex, 0) & (L2T_SIZE - 1);
}
/*
* Add a WR to an L2T entry's queue of work requests awaiting resolution.
* Must be called with the entry's lock held.
*/
static inline void
arpq_enqueue(struct l2t_entry *e, struct wrqe *wr)
{
mtx_assert(&e->lock, MA_OWNED);
STAILQ_INSERT_TAIL(&e->wr_list, wr, link);
}
static inline void
send_pending(struct adapter *sc, struct l2t_entry *e)
{
struct wrqe *wr;
mtx_assert(&e->lock, MA_OWNED);
while ((wr = STAILQ_FIRST(&e->wr_list)) != NULL) {
STAILQ_REMOVE_HEAD(&e->wr_list, link);
t4_wrq_tx(sc, wr);
}
}
static void
resolution_failed_for_wr(struct wrqe *wr)
{
log(LOG_ERR, "%s: leaked work request %p, wr_len %d", __func__, wr,
wr->wr_len);
/* free(wr, M_CXGBE); */
}
static void
resolution_failed(struct l2t_entry *e)
{
struct wrqe *wr;
mtx_assert(&e->lock, MA_OWNED);
while ((wr = STAILQ_FIRST(&e->wr_list)) != NULL) {
STAILQ_REMOVE_HEAD(&e->wr_list, link);
resolution_failed_for_wr(wr);
}
}
static void
update_entry(struct adapter *sc, struct l2t_entry *e, uint8_t *lladdr,
uint16_t vtag)
{
mtx_assert(&e->lock, MA_OWNED);
/*
* The entry may be in active use (e->refcount > 0) or not. We update
* it even when it's not as this simplifies the case where we decide to
* reuse the entry later.
*/
if (lladdr == NULL &&
(e->state == L2T_STATE_RESOLVING || e->state == L2T_STATE_FAILED)) {
/*
* Never got a valid L2 address for this one. Just mark it as
* failed instead of removing it from the hash (for which we'd
* need to wlock the table).
*/
e->state = L2T_STATE_FAILED;
resolution_failed(e);
return;
} else if (lladdr == NULL) {
/* Valid or already-stale entry was deleted (or expired) */
KASSERT(e->state == L2T_STATE_VALID ||
e->state == L2T_STATE_STALE,
("%s: lladdr NULL, state %d", __func__, e->state));
e->state = L2T_STATE_STALE;
} else {
if (e->state == L2T_STATE_RESOLVING ||
e->state == L2T_STATE_FAILED ||
memcmp(e->dmac, lladdr, ETHER_ADDR_LEN)) {
/* unresolved -> resolved; or dmac changed */
memcpy(e->dmac, lladdr, ETHER_ADDR_LEN);
e->vlan = vtag;
t4_write_l2e(sc, e, 1);
}
e->state = L2T_STATE_VALID;
}
}
static int
resolve_entry(struct adapter *sc, struct l2t_entry *e)
{
struct tom_data *td = sc->tom_softc;
struct toedev *tod = &td->tod;
struct sockaddr_in sin = {0};
uint8_t dmac[ETHER_ADDR_LEN];
uint16_t vtag = VLAN_NONE;
int rc;
sin.sin_family = AF_INET;
sin.sin_len = sizeof(struct sockaddr_in);
SINADDR(&sin) = e->addr;
rc = toe_l2_resolve(tod, e->ifp, SA(&sin), dmac, &vtag);
if (rc == EWOULDBLOCK)
return (rc);
mtx_lock(&e->lock);
update_entry(sc, e, rc == 0 ? dmac : NULL, vtag);
mtx_unlock(&e->lock);
return (rc);
}
int
t4_l2t_send_slow(struct adapter *sc, struct wrqe *wr, struct l2t_entry *e)
{
again:
switch (e->state) {
case L2T_STATE_STALE: /* entry is stale, kick off revalidation */
if (resolve_entry(sc, e) != EWOULDBLOCK)
goto again; /* entry updated, re-examine state */
/* Fall through */
case L2T_STATE_VALID: /* fast-path, send the packet on */
t4_wrq_tx(sc, wr);
return (0);
case L2T_STATE_RESOLVING:
case L2T_STATE_SYNC_WRITE:
mtx_lock(&e->lock);
if (e->state != L2T_STATE_SYNC_WRITE &&
e->state != L2T_STATE_RESOLVING) {
/* state changed by the time we got here */
mtx_unlock(&e->lock);
goto again;
}
arpq_enqueue(e, wr);
mtx_unlock(&e->lock);
if (resolve_entry(sc, e) == EWOULDBLOCK)
break;
mtx_lock(&e->lock);
if (e->state == L2T_STATE_VALID && !STAILQ_EMPTY(&e->wr_list))
send_pending(sc, e);
if (e->state == L2T_STATE_FAILED)
resolution_failed(e);
mtx_unlock(&e->lock);
break;
case L2T_STATE_FAILED:
resolution_failed_for_wr(wr);
return (EHOSTUNREACH);
}
return (0);
}
/*
* Called when an L2T entry has no more users. The entry is left in the hash
* table since it is likely to be reused but we also bump nfree to indicate
* that the entry can be reallocated for a different neighbor. We also drop
* the existing neighbor reference in case the neighbor is going away and is
* waiting on our reference.
*
* Because entries can be reallocated to other neighbors once their ref count
* drops to 0 we need to take the entry's lock to avoid races with a new
* incarnation.
*/
static int
do_l2t_write_rpl2(struct sge_iq *iq, const struct rss_header *rss,
struct mbuf *m)
{
struct adapter *sc = iq->adapter;
const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1);
unsigned int tid = GET_TID(rpl);
unsigned int idx = tid & (L2T_SIZE - 1);
int rc;
rc = do_l2t_write_rpl(iq, rss, m);
if (rc != 0)
return (rc);
if (tid & F_SYNC_WR) {
struct l2t_entry *e = &sc->l2t->l2tab[idx];
mtx_lock(&e->lock);
if (e->state != L2T_STATE_SWITCHING) {
send_pending(sc, e);
e->state = L2T_STATE_VALID;
}
mtx_unlock(&e->lock);
}
return (0);
}
void
t4_init_l2t_cpl_handlers(struct adapter *sc)
{
t4_register_cpl_handler(sc, CPL_L2T_WRITE_RPL, do_l2t_write_rpl2);
}
void
t4_uninit_l2t_cpl_handlers(struct adapter *sc)
{
t4_register_cpl_handler(sc, CPL_L2T_WRITE_RPL, do_l2t_write_rpl);
}
/*
* The TOE wants an L2 table entry that it can use to reach the next hop over
* the specified port. Produce such an entry - create one if needed.
*
* Note that the ifnet could be a pseudo-device like if_vlan, if_lagg, etc. on
* top of the real cxgbe interface.
*/
struct l2t_entry *
t4_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa)
{
struct l2t_entry *e;
struct l2t_data *d = pi->adapter->l2t;
uint32_t addr = SINADDR(sa);
int hash = arp_hash(addr, ifp->if_index);
unsigned int smt_idx = pi->port_id;
if (sa->sa_family != AF_INET)
return (NULL); /* XXX: no IPv6 support right now */
#ifndef VLAN_TAG
if (ifp->if_type == IFT_L2VLAN)
return (NULL);
#endif
rw_wlock(&d->lock);
for (e = d->l2tab[hash].first; e; e = e->next) {
if (e->addr == addr && e->ifp == ifp && e->smt_idx == smt_idx) {
l2t_hold(d, e);
goto done;
}
}
/* Need to allocate a new entry */
e = t4_alloc_l2e(d);
if (e) {
mtx_lock(&e->lock); /* avoid race with t4_l2t_free */
e->next = d->l2tab[hash].first;
d->l2tab[hash].first = e;
e->state = L2T_STATE_RESOLVING;
e->addr = addr;
e->ifp = ifp;
e->smt_idx = smt_idx;
e->hash = hash;
e->lport = pi->lport;
atomic_store_rel_int(&e->refcnt, 1);
#ifdef VLAN_TAG
if (ifp->if_type == IFT_L2VLAN)
VLAN_TAG(ifp, &e->vlan);
else
e->vlan = VLAN_NONE;
#endif
mtx_unlock(&e->lock);
}
done:
rw_wunlock(&d->lock);
return e;
}
/*
* Called when the host's ARP layer makes a change to some entry that is loaded
* into the HW L2 table.
*/
void
t4_l2_update(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa,
uint8_t *lladdr, uint16_t vtag)
{
struct adapter *sc = tod->tod_softc;
struct l2t_entry *e;
struct l2t_data *d = sc->l2t;
uint32_t addr = SINADDR(sa);
int hash = arp_hash(addr, ifp->if_index);
KASSERT(d != NULL, ("%s: no L2 table", __func__));
rw_rlock(&d->lock);
for (e = d->l2tab[hash].first; e; e = e->next) {
if (e->addr == addr && e->ifp == ifp) {
mtx_lock(&e->lock);
if (atomic_load_acq_int(&e->refcnt))
goto found;
e->state = L2T_STATE_STALE;
mtx_unlock(&e->lock);
break;
}
}
rw_runlock(&d->lock);
/*
* This is of no interest to us. We've never had an offloaded
* connection to this destination, and we aren't attempting one right
* now.
*/
return;
found:
rw_runlock(&d->lock);
KASSERT(e->state != L2T_STATE_UNUSED,
("%s: unused entry in the hash.", __func__));
update_entry(sc, e, lladdr, vtag);
mtx_unlock(&e->lock);
}
#endif

View File

@ -0,0 +1,53 @@
/*-
* Copyright (c) 2012 Chelsio Communications, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*
*/
#ifndef __T4_TOM_L2T_H
#define __T4_TOM_L2T_H
#include "t4_l2t.h"
int t4_l2t_send_slow(struct adapter *, struct wrqe *, struct l2t_entry *);
struct l2t_entry *t4_l2t_get(struct port_info *, struct ifnet *,
struct sockaddr *);
void t4_l2_update(struct toedev *, struct ifnet *, struct sockaddr *,
uint8_t *, uint16_t);
void t4_init_l2t_cpl_handlers(struct adapter *);
void t4_uninit_l2t_cpl_handlers(struct adapter *);
static inline int
t4_l2t_send(struct adapter *sc, struct wrqe *wr, struct l2t_entry *e)
{
if (__predict_true(e->state == L2T_STATE_VALID)) {
t4_wrq_tx(sc, wr);
return (0);
} else
return (t4_l2t_send_slow(sc, wr, e));
}
#endif /* __T4_TOM_L2T_H */

View File

@ -30,6 +30,7 @@ options SCHED_ULE # ULE scheduler
options PREEMPTION # Enable kernel thread preemption
options INET # InterNETworking
options INET6 # IPv6 communications protocols
options TCP_OFFLOAD # TCP offload
options SCTP # Stream Control Transmission Protocol
options FFS # Berkeley Fast Filesystem
options SOFTUPDATES # Enable FFS soft updates support

View File

@ -7,7 +7,7 @@ cpu I686_CPU
ident XEN
makeoptions DEBUG=-g # Build kernel with gdb(1) debug symbols
makeoptions WITHOUT_MODULES="aha ahb amd cxgb dpt drm drm2 hptmv ida malo mps mwl nve sound sym trm xfs"
makeoptions WITHOUT_MODULES="aha ahb amd cxgb dpt drm drm2 hptmv ida malo mps mwl nve rdma sound sym trm xfs"
options SCHED_ULE # ULE scheduler
options PREEMPTION # Enable kernel thread preemption

View File

@ -314,6 +314,7 @@ SUBDIR= \
${_ti} \
tl \
tmpfs \
${_toecore} \
${_tpm} \
trm \
${_twa} \
@ -392,6 +393,7 @@ _random= random
.if (${MK_INET_SUPPORT} != "no" || ${MK_INET6_SUPPORT} != "no") || \
defined(ALL_MODULES)
_carp= carp
_toecore= toecore
.endif
.if ${MK_INET_SUPPORT} != "no" || defined(ALL_MODULES)

View File

@ -1,39 +1,12 @@
# $FreeBSD$
SUBDIR= cxgb
SUBDIR+= ${_toecore}
SUBDIR+= cxgb_t3fw
SUBDIR+= ${_tom}
SUBDIR+= ${_iw_cxgb}
SUBDIR+= cxgb_t3fw
.if defined(SYSDIR)
_sysdir = ${SYSDIR}
.endif
# Based on bsd.kmod.mk but we don't modify SYSDIR in this one.
.for _dir in ${.CURDIR}/../.. ${.CURDIR}/../../.. ${.CURDIR}/../../../.. \
/sys /usr/src/sys
.if !defined(_sysdir) && exists(${_dir}/kern/) && exists(${_dir}/conf/kmod.mk)
_sysdir = ${_dir}
.endif
.endfor
.if !defined(_sysdir) || !exists(${_sysdir}/kern/) || \
!exists(${_sysdir}/conf/kmod.mk)
.error "can't find kernel source tree"
.endif
_toe_header = ${_sysdir}/netinet/toedev.h
.if exists(${_toe_header})
_toecore = toecore
#_tom = tom
.endif
.if ${MACHINE_CPUARCH} == "i386" && exists(${_toe_header})
_iw_cxgb = iw_cxgb
.endif
.if ${MACHINE_CPUARCH} == "amd64" && exists(${_toe_header})
_iw_cxgb = iw_cxgb
.if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386"
_tom= tom
_iw_cxgb= iw_cxgb
.endif
.include <bsd.subdir.mk>

View File

@ -8,7 +8,7 @@ CXGB = ${.CURDIR}/../../../dev/cxgb
KMOD= if_cxgb
SRCS= cxgb_mc5.c cxgb_vsc8211.c cxgb_ael1002.c cxgb_mv88e1xxx.c
SRCS+= cxgb_xgmac.c cxgb_vsc7323.c cxgb_t3_hw.c cxgb_main.c cxgb_aq100x.c
SRCS+= cxgb_sge.c cxgb_offload.c cxgb_tn1010.c
SRCS+= cxgb_sge.c cxgb_tn1010.c
SRCS+= device_if.h bus_if.h pci_if.h
SRCS+= opt_inet.h opt_inet6.h opt_zero.h opt_sched.h
SRCS+= uipc_mvec.c
@ -19,6 +19,7 @@ CFLAGS+= -g -DDEFAULT_JUMBO -I${CXGB}
.if ${MK_INET_SUPPORT} != "no"
opt_inet.h:
@echo "#define INET 1" > ${.TARGET}
@echo "#define TCP_OFFLOAD 1" >> ${.TARGET}
.endif
.if ${MK_INET6_SUPPORT} != "no"

View File

@ -1,5 +1,7 @@
# $FreeBSD$
.include <bsd.own.mk>
CXGB = ${.CURDIR}/../../../dev/cxgb
.PATH: ${CXGB}/ulp/iw_cxgb
@ -8,8 +10,15 @@ SRCS= iw_cxgb.c iw_cxgb_cm.c iw_cxgb_hal.c
SRCS+= iw_cxgb_provider.c iw_cxgb_qp.c iw_cxgb_resource.c
SRCS+= iw_cxgb_ev.c iw_cxgb_mem.c iw_cxgb_dbg.c iw_cxgb_cq.c
SRCS+= bus_if.h device_if.h opt_sched.h pci_if.h pcib_if.h opt_ktr.h
SRCS+= opt_inet.h
CFLAGS+= -g -I${CXGB}
#CFLAGS+= -DDEBUG
SRCS+= opt_inet.h opt_ofed.h vnode_if.h
CFLAGS+= -I${CXGB} -I${.CURDIR}/../../../ofed/include -DLINUX_TYPES_DEFINED
.if !defined(KERNBUILDDIR)
.if ${MK_INET_SUPPORT} != "no"
opt_inet.h:
echo "#define INET 1" > ${.TARGET}
echo "#define TCP_OFFLOAD 1" >> ${.TARGET}
.endif
.endif
.include <bsd.kmod.mk>

View File

@ -1,11 +0,0 @@
# $FreeBSD$
CXGB = ${.CURDIR}/../../../dev/cxgb
.PATH: ${CXGB}/ulp/toecore
KMOD= toecore
SRCS= toedev.c
SRCS+= device_if.h bus_if.h pci_if.h opt_sched.h opt_inet.h
CFLAGS+= -g -I${CXGB}
.include <bsd.kmod.mk>

View File

@ -1,15 +1,25 @@
# $FreeBSD$
.include <bsd.own.mk>
CXGB = ${.CURDIR}/../../../dev/cxgb
.PATH: ${CXGB}/ulp/tom
KMOD= tom
SRCS= cxgb_tom.c cxgb_cpl_io.c cxgb_listen.c cxgb_tom_sysctl.c cxgb_cpl_socket.c
SRCS+= cxgb_ddp.c cxgb_vm.c cxgb_l2t.c cxgb_tcp_offload.c
KMOD= t3_tom
SRCS= cxgb_tom.c cxgb_cpl_io.c cxgb_listen.c cxgb_l2t.c
SRCS+= opt_compat.h opt_inet.h opt_inet6.h opt_ipsec.h
SRCS+= opt_tcpdebug.h opt_ddb.h opt_sched.h opt_global.h opt_ktr.h
SRCS+= device_if.h bus_if.h pci_if.h
CFLAGS+= -g -I${CXGB}
#CFLAGS+= -DDEBUG_PRINT -DDEBUG
.if !defined(KERNBUILDDIR)
.if ${MK_INET_SUPPORT} != "no"
opt_inet.h:
echo "#define INET 1" > ${.TARGET}
echo "#define TCP_OFFLOAD 1" >> ${.TARGET}
.endif
.endif
.include <bsd.kmod.mk>

View File

@ -4,5 +4,10 @@
SUBDIR = if_cxgbe
SUBDIR+= firmware
SUBDIR+= ${_tom}
.if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386"
_tom= tom
.endif
.include <bsd.subdir.mk>

View File

@ -0,0 +1,15 @@
#
# $FreeBSD$
#
CXGBE = ${.CURDIR}/../../../dev/cxgbe
.PATH: ${CXGBE}/tom
KMOD = t4_tom
SRCS = t4_tom.c t4_connect.c t4_listen.c t4_cpl_io.c t4_tom_l2t.c
SRCS+= device_if.h bus_if.h pci_if.h
SRCS+= opt_inet.h
CFLAGS+= -I${CXGBE}
.include <bsd.kmod.mk>

View File

@ -6,5 +6,7 @@ RDMA= ${.CURDIR}/../../../contrib/rdma/krping
KMOD= krping
SRCS= krping.c krping_dev.c getopt.c
SRCS+= bus_if.h device_if.h opt_sched.h pci_if.h pcib_if.h
SRCS+= vnode_if.h
CFLAGS+= -I${.CURDIR}/../../../ofed/include
.include <bsd.kmod.mk>

View File

@ -0,0 +1,9 @@
# $FreeBSD$
.PATH: ${.CURDIR}/../../netinet
KMOD= toecore
SRCS= toecore.c
SRCS+= opt_ofed.h
.include <bsd.kmod.mk>

View File

@ -712,6 +712,8 @@ drbr_inuse(struct ifnet *ifp, struct buf_ring *br)
#define IF_MINMTU 72
#define IF_MAXMTU 65535
#define TOEDEV(ifp) ((ifp)->if_llsoftc)
#endif /* _KERNEL */
/*

View File

@ -746,8 +746,8 @@ vlan_modevent(module_t mod, int type, void *data)
vlan_trunk_cap_p = NULL;
vlan_trunkdev_p = NULL;
vlan_tag_p = NULL;
vlan_cookie_p = vlan_cookie;
vlan_setcookie_p = vlan_setcookie;
vlan_cookie_p = NULL;
vlan_setcookie_p = NULL;
vlan_devat_p = NULL;
VLAN_LOCK_DESTROY();
if (bootverbose)
@ -1503,6 +1503,22 @@ vlan_capabilities(struct ifvlan *ifv)
ifp->if_capenable &= ~(p->if_capenable & IFCAP_TSO);
ifp->if_hwassist &= ~(p->if_hwassist & CSUM_TSO);
}
/*
* If the parent interface can offload TCP connections over VLANs then
* propagate its TOE capability to the VLAN interface.
*
* All TOE drivers in the tree today can deal with VLANs. If this
* changes then IFCAP_VLAN_TOE should be promoted to a full capability
* with its own bit.
*/
#define IFCAP_VLAN_TOE IFCAP_TOE
if (p->if_capabilities & IFCAP_VLAN_TOE)
ifp->if_capabilities |= p->if_capabilities & IFCAP_TOE;
if (p->if_capenable & IFCAP_VLAN_TOE) {
TOEDEV(ifp) = TOEDEV(p);
ifp->if_capenable |= p->if_capenable & IFCAP_TOE;
}
}
static void

View File

@ -180,6 +180,17 @@ arptimer(void *arg)
callout_active(&lle->la_timer)) {
callout_stop(&lle->la_timer);
LLE_REMREF(lle);
if (lle->la_flags != LLE_DELETED) {
int evt;
if (lle->la_flags & LLE_VALID)
evt = LLENTRY_EXPIRED;
else
evt = LLENTRY_TIMEDOUT;
EVENTHANDLER_INVOKE(lle_event, lle, evt);
}
pkts_dropped = llentry_free(lle);
ARPSTAT_ADD(dropped, pkts_dropped);
ARPSTAT_INC(timeouts);
@ -726,7 +737,7 @@ match:
(void)memcpy(&la->ll_addr, ar_sha(ah), ifp->if_addrlen);
la->la_flags |= LLE_VALID;
EVENTHANDLER_INVOKE(arp_update_event, la);
EVENTHANDLER_INVOKE(lle_event, la, LLENTRY_RESOLVED);
if (!(la->la_flags & LLE_STATIC)) {
int canceled;

View File

@ -122,8 +122,14 @@ void arp_ifinit2(struct ifnet *, struct ifaddr *, u_char *);
void arp_ifscrub(struct ifnet *, uint32_t);
#include <sys/eventhandler.h>
typedef void (*llevent_arp_update_fn)(void *, struct llentry *);
EVENTHANDLER_DECLARE(arp_update_event, llevent_arp_update_fn);
enum {
LLENTRY_RESOLVED,
LLENTRY_TIMEDOUT,
LLENTRY_DELETED,
LLENTRY_EXPIRED,
};
typedef void (*lle_event_fn)(void *, struct llentry *, int);
EVENTHANDLER_DECLARE(lle_event, lle_event_fn);
#endif

View File

@ -1469,7 +1469,7 @@ in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3add
if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) {
LLE_WLOCK(lle);
lle->la_flags = LLE_DELETED;
EVENTHANDLER_INVOKE(arp_update_event, lle);
EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_DELETED);
LLE_WUNLOCK(lle);
#ifdef DIAGNOSTIC
log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);

View File

@ -105,6 +105,9 @@ __FBSDID("$FreeBSD$");
#ifdef TCPDEBUG
#include <netinet/tcp_debug.h>
#endif /* TCPDEBUG */
#ifdef TCP_OFFLOAD
#include <netinet/tcp_offload.h>
#endif
#ifdef IPSEC
#include <netipsec/ipsec.h>
@ -958,6 +961,14 @@ relocked:
goto dropwithreset;
}
#ifdef TCP_OFFLOAD
if (tp->t_flags & TF_TOE) {
tcp_offload_input(tp, m);
m = NULL; /* consumed by the TOE driver */
goto dropunlock;
}
#endif
/*
* We've identified a valid inpcb, but it could be that we need an
* inpcbinfo write lock but don't hold it. In this case, attempt to
@ -1320,7 +1331,7 @@ relocked:
(void *)tcp_saveipgen, &tcp_savetcp, 0);
#endif
tcp_dooptions(&to, optp, optlen, TO_SYN);
syncache_add(&inc, &to, th, inp, &so, m);
syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL);
/*
* Entry added to syncache and mbuf consumed.
* Everything already unlocked by syncache_add().

View File

@ -1,145 +1,176 @@
/*-
* Copyright (c) 2007, Chelsio Inc.
* Copyright (c) 2012 Chelsio Communications, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Neither the name of the Chelsio Corporation nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/types.h>
#include <sys/malloc.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sockopt.h>
#include <net/if.h>
#include <net/if_types.h>
#include <net/if_var.h>
#include <net/route.h>
#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/in_pcb.h>
#include <netinet/tcp.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_offload.h>
#include <netinet/toedev.h>
#define TCPOUTFLAGS
#include <netinet/tcp_fsm.h>
#include <netinet/toecore.h>
uint32_t toedev_registration_count;
int registered_toedevs;
/*
* Provide an opportunity for a TOE driver to offload.
*/
int
tcp_offload_connect(struct socket *so, struct sockaddr *nam)
{
struct ifnet *ifp;
struct toedev *tdev;
struct toedev *tod;
struct rtentry *rt;
int error;
int error = EOPNOTSUPP;
if (toedev_registration_count == 0)
return (EINVAL);
/*
* Look up the route used for the connection to
* determine if it uses an interface capable of
* offloading the connection.
*/
rt = rtalloc1(nam, 0 /*report*/, 0 /*ignflags*/);
if (rt)
INP_WLOCK_ASSERT(sotoinpcb(so));
KASSERT(nam->sa_family == AF_INET || nam->sa_family == AF_INET6,
("%s: called with sa_family %d", __func__, nam->sa_family));
if (registered_toedevs == 0)
return (error);
rt = rtalloc1(nam, 0, 0);
if (rt)
RT_UNLOCK(rt);
else
else
return (EHOSTUNREACH);
ifp = rt->rt_ifp;
if ((ifp->if_capenable & IFCAP_TOE) == 0) {
error = EINVAL;
goto fail;
}
tdev = TOEDEV(ifp);
if (tdev == NULL) {
error = EPERM;
goto fail;
}
if (tdev->tod_can_offload(tdev, so) == 0) {
error = EPERM;
goto fail;
}
return (tdev->tod_connect(tdev, so, rt, nam));
fail:
if (nam->sa_family == AF_INET && !(ifp->if_capenable & IFCAP_TOE4))
goto done;
if (nam->sa_family == AF_INET6 && !(ifp->if_capenable & IFCAP_TOE6))
goto done;
tod = TOEDEV(ifp);
if (tod != NULL)
error = tod->tod_connect(tod, so, rt, nam);
done:
RTFREE(rt);
return (error);
}
void
tcp_offload_listen_start(struct tcpcb *tp)
{
/*
* This file contains code as a short-term staging area before it is moved in
* to sys/netinet/tcp_offload.c
*/
INP_WLOCK_ASSERT(tp->t_inpcb);
EVENTHANDLER_INVOKE(tcp_offload_listen_start, tp);
}
void
tcp_offload_twstart(struct tcpcb *tp)
tcp_offload_listen_stop(struct tcpcb *tp)
{
INP_INFO_WLOCK(&V_tcbinfo);
INP_WLOCK(tp->t_inpcb);
tcp_twstart(tp);
INP_INFO_WUNLOCK(&V_tcbinfo);
INP_WLOCK_ASSERT(tp->t_inpcb);
EVENTHANDLER_INVOKE(tcp_offload_listen_stop, tp);
}
struct tcpcb *
tcp_offload_close(struct tcpcb *tp)
void
tcp_offload_input(struct tcpcb *tp, struct mbuf *m)
{
struct toedev *tod = tp->tod;
INP_INFO_WLOCK(&V_tcbinfo);
INP_WLOCK(tp->t_inpcb);
tp = tcp_close(tp);
INP_INFO_WUNLOCK(&V_tcbinfo);
if (tp)
INP_WUNLOCK(tp->t_inpcb);
KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
INP_WLOCK_ASSERT(tp->t_inpcb);
return (tp);
tod->tod_input(tod, tp, m);
}
struct tcpcb *
tcp_offload_drop(struct tcpcb *tp, int error)
int
tcp_offload_output(struct tcpcb *tp)
{
struct toedev *tod = tp->tod;
int error, flags;
INP_INFO_WLOCK(&V_tcbinfo);
INP_WLOCK(tp->t_inpcb);
tp = tcp_drop(tp, error);
INP_INFO_WUNLOCK(&V_tcbinfo);
if (tp)
INP_WUNLOCK(tp->t_inpcb);
KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
INP_WLOCK_ASSERT(tp->t_inpcb);
return (tp);
flags = tcp_outflags[tp->t_state];
if (flags & TH_RST) {
/* XXX: avoid repeated calls like we do for FIN */
error = tod->tod_send_rst(tod, tp);
} else if ((flags & TH_FIN || tp->t_flags & TF_NEEDFIN) &&
(tp->t_flags & TF_SENTFIN) == 0) {
error = tod->tod_send_fin(tod, tp);
if (error == 0)
tp->t_flags |= TF_SENTFIN;
} else
error = tod->tod_output(tod, tp);
return (error);
}
void
tcp_offload_rcvd(struct tcpcb *tp)
{
struct toedev *tod = tp->tod;
KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
INP_WLOCK_ASSERT(tp->t_inpcb);
tod->tod_rcvd(tod, tp);
}
void
tcp_offload_ctloutput(struct tcpcb *tp, int sopt_dir, int sopt_name)
{
struct toedev *tod = tp->tod;
KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
INP_WLOCK_ASSERT(tp->t_inpcb);
tod->tod_ctloutput(tod, tp, sopt_dir, sopt_name);
}
void
tcp_offload_detach(struct tcpcb *tp)
{
struct toedev *tod = tp->tod;
KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
INP_WLOCK_ASSERT(tp->t_inpcb);
tod->tod_pcb_detach(tod, tp);
}

View File

@ -1,30 +1,30 @@
/*-
* Copyright (c) 2007, Chelsio Inc.
* Copyright (c) 2012 Chelsio Communications, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Neither the name of the Chelsio Corporation nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*
*/
#ifndef _NETINET_TCP_OFFLOAD_H_
@ -34,321 +34,15 @@
#error "no user-serviceable parts inside"
#endif
/*
* A driver publishes that it provides offload services
* by setting IFCAP_TOE in the ifnet. The offload connect
* will bypass any further work if the interface that a
* connection would use does not support TCP offload.
*
* The TOE API assumes that the tcp offload engine can offload the
* the entire connection from set up to teardown, with some provision
* being made to allowing the software stack to handle time wait. If
* the device does not meet these criteria, it is the driver's responsibility
* to overload the functions that it needs to in tcp_usrreqs and make
* its own calls to tcp_output if it needs to do so.
*
* There is currently no provision for the device advertising the congestion
* control algorithms it supports as there is currently no API for querying
* an operating system for the protocols that it has loaded. This is a desirable
* future extension.
*
*
*
* It is assumed that individuals deploying TOE will want connections
* to be offloaded without software changes so all connections on an
* interface providing TOE are offloaded unless the SO_NO_OFFLOAD
* flag is set on the socket.
*
*
* The toe_usrreqs structure constitutes the TOE driver's
* interface to the TCP stack for functionality that doesn't
* interact directly with userspace. If one wants to provide
* (optional) functionality to do zero-copy to/from
* userspace one still needs to override soreceive/sosend
* with functions that fault in and pin the user buffers.
*
* + tu_send
* - tells the driver that new data may have been added to the
* socket's send buffer - the driver should not fail if the
* buffer is in fact unchanged
* - the driver is responsible for providing credits (bytes in the send window)
* back to the socket by calling sbdrop() as segments are acknowledged.
* - The driver expects the inpcb lock to be held - the driver is expected
* not to drop the lock. Hence the driver is not allowed to acquire the
* pcbinfo lock during this call.
*
* + tu_rcvd
* - returns credits to the driver and triggers window updates
* to the peer (a credit as used here is a byte in the peer's receive window)
* - the driver is expected to determine how many bytes have been
* consumed and credit that back to the card so that it can grow
* the window again by maintaining its own state between invocations.
* - In principle this could be used to shrink the window as well as
* grow the window, although it is not used for that now.
* - this function needs to correctly handle being called any number of
* times without any bytes being consumed from the receive buffer.
* - The driver expects the inpcb lock to be held - the driver is expected
* not to drop the lock. Hence the driver is not allowed to acquire the
* pcbinfo lock during this call.
*
* + tu_disconnect
* - tells the driver to send FIN to peer
* - driver is expected to send the remaining data and then do a clean half close
* - disconnect implies at least half-close so only send, reset, and detach
* are legal
* - the driver is expected to handle transition through the shutdown
* state machine and allow the stack to support SO_LINGER.
* - The driver expects the inpcb lock to be held - the driver is expected
* not to drop the lock. Hence the driver is not allowed to acquire the
* pcbinfo lock during this call.
*
* + tu_reset
* - closes the connection and sends a RST to peer
* - driver is expectd to trigger an RST and detach the toepcb
* - no further calls are legal after reset
* - The driver expects the inpcb lock to be held - the driver is expected
* not to drop the lock. Hence the driver is not allowed to acquire the
* pcbinfo lock during this call.
*
* The following fields in the tcpcb are expected to be referenced by the driver:
* + iss
* + rcv_nxt
* + rcv_wnd
* + snd_isn
* + snd_max
* + snd_nxt
* + snd_una
* + t_flags
* + t_inpcb
* + t_maxseg
* + t_toe
*
* The following fields in the inpcb are expected to be referenced by the driver:
* + inp_lport
* + inp_fport
* + inp_laddr
* + inp_fport
* + inp_socket
* + inp_ip_tos
*
* The following fields in the socket are expected to be referenced by the
* driver:
* + so_comp
* + so_error
* + so_linger
* + so_options
* + so_rcv
* + so_snd
* + so_state
* + so_timeo
*
* These functions all return 0 on success and can return the following errors
* as appropriate:
* + EPERM:
* + ENOBUFS: memory allocation failed
* + EMSGSIZE: MTU changed during the call
* + EHOSTDOWN:
* + EHOSTUNREACH:
* + ENETDOWN:
* * ENETUNREACH: the peer is no longer reachable
*
* + tu_detach
* - tells driver that the socket is going away so disconnect
* the toepcb and free appropriate resources
* - allows the driver to cleanly handle the case of connection state
* outliving the socket
* - no further calls are legal after detach
* - the driver is expected to provide its own synchronization between
* detach and receiving new data.
*
* + tu_syncache_event
* - even if it is not actually needed, the driver is expected to
* call syncache_add for the initial SYN and then syncache_expand
* for the SYN,ACK
* - tells driver that a connection either has not been added or has
* been dropped from the syncache
* - the driver is expected to maintain state that lives outside the
* software stack so the syncache needs to be able to notify the
* toe driver that the software stack is not going to create a connection
* for a received SYN
* - The driver is responsible for any synchronization required between
* the syncache dropping an entry and the driver processing the SYN,ACK.
*
*/
struct toe_usrreqs {
int (*tu_send)(struct tcpcb *tp);
int (*tu_rcvd)(struct tcpcb *tp);
int (*tu_disconnect)(struct tcpcb *tp);
int (*tu_reset)(struct tcpcb *tp);
void (*tu_detach)(struct tcpcb *tp);
void (*tu_syncache_event)(int event, void *toep);
};
extern int registered_toedevs;
/*
* Proxy for struct tcpopt between TOE drivers and TCP functions.
*/
struct toeopt {
u_int64_t to_flags; /* see tcpopt in tcp_var.h */
u_int16_t to_mss; /* maximum segment size */
u_int8_t to_wscale; /* window scaling */
int tcp_offload_connect(struct socket *, struct sockaddr *);
void tcp_offload_listen_start(struct tcpcb *);
void tcp_offload_listen_stop(struct tcpcb *);
void tcp_offload_input(struct tcpcb *, struct mbuf *);
int tcp_offload_output(struct tcpcb *);
void tcp_offload_rcvd(struct tcpcb *);
void tcp_offload_ctloutput(struct tcpcb *, int, int);
void tcp_offload_detach(struct tcpcb *);
u_int8_t _pad1; /* explicit pad for 64bit alignment */
u_int32_t _pad2; /* explicit pad for 64bit alignment */
u_int64_t _pad3[4]; /* TBD */
};
#define TOE_SC_ENTRY_PRESENT 1 /* 4-tuple already present */
#define TOE_SC_DROP 2 /* connection was timed out */
/*
* Because listen is a one-to-many relationship (a socket can be listening
* on all interfaces on a machine some of which may be using different TCP
* offload devices), listen uses a publish/subscribe mechanism. The TCP
* offload driver registers a listen notification function with the stack.
* When a listen socket is created all TCP offload devices are notified
* so that they can do the appropriate set up to offload connections on the
* port to which the socket is bound. When the listen socket is closed,
* the offload devices are notified so that they will stop listening on that
* port and free any associated resources as well as sending RSTs on any
* connections in the SYN_RCVD state.
*
*/
typedef void (*tcp_offload_listen_start_fn)(void *, struct tcpcb *);
typedef void (*tcp_offload_listen_stop_fn)(void *, struct tcpcb *);
EVENTHANDLER_DECLARE(tcp_offload_listen_start, tcp_offload_listen_start_fn);
EVENTHANDLER_DECLARE(tcp_offload_listen_stop, tcp_offload_listen_stop_fn);
/*
* Check if the socket can be offloaded by the following steps:
* - determine the egress interface
* - check the interface for TOE capability and TOE is enabled
* - check if the device has resources to offload the connection
*/
int tcp_offload_connect(struct socket *so, struct sockaddr *nam);
/*
* The tcp_output_* routines are wrappers around the toe_usrreqs calls
* which trigger packet transmission. In the non-offloaded case they
* translate to tcp_output. The tcp_offload_* routines notify TOE
* of specific events. I the non-offloaded case they are no-ops.
*
* Listen is a special case because it is a 1 to many relationship
* and there can be more than one offload driver in the system.
*/
/*
* Connection is offloaded
*/
#define tp_offload(tp) ((tp)->t_flags & TF_TOE)
/*
* hackish way of allowing this file to also be included by TOE
* which needs to be kept ignorant of socket implementation details
*/
#ifdef _SYS_SOCKETVAR_H_
/*
* The socket has not been marked as "do not offload"
*/
#define SO_OFFLOADABLE(so) ((so->so_options & SO_NO_OFFLOAD) == 0)
static __inline int
tcp_output_connect(struct socket *so, struct sockaddr *nam)
{
struct tcpcb *tp = sototcpcb(so);
int error;
/*
* If offload has been disabled for this socket or the
* connection cannot be offloaded just call tcp_output
* to start the TCP state machine.
*/
#ifndef TCP_OFFLOAD_DISABLE
if (!SO_OFFLOADABLE(so) || (error = tcp_offload_connect(so, nam)) != 0)
#endif
error = tcp_output(tp);
return (error);
}
static __inline int
tcp_output_send(struct tcpcb *tp)
{
#ifndef TCP_OFFLOAD_DISABLE
if (tp_offload(tp))
return (tp->t_tu->tu_send(tp));
#endif
return (tcp_output(tp));
}
static __inline int
tcp_output_rcvd(struct tcpcb *tp)
{
#ifndef TCP_OFFLOAD_DISABLE
if (tp_offload(tp))
return (tp->t_tu->tu_rcvd(tp));
#endif
return (tcp_output(tp));
}
static __inline int
tcp_output_disconnect(struct tcpcb *tp)
{
#ifndef TCP_OFFLOAD_DISABLE
if (tp_offload(tp))
return (tp->t_tu->tu_disconnect(tp));
#endif
return (tcp_output(tp));
}
static __inline int
tcp_output_reset(struct tcpcb *tp)
{
#ifndef TCP_OFFLOAD_DISABLE
if (tp_offload(tp))
return (tp->t_tu->tu_reset(tp));
#endif
return (tcp_output(tp));
}
static __inline void
tcp_offload_detach(struct tcpcb *tp)
{
#ifndef TCP_OFFLOAD_DISABLE
if (tp_offload(tp))
tp->t_tu->tu_detach(tp);
#endif
}
static __inline void
tcp_offload_listen_open(struct tcpcb *tp)
{
#ifndef TCP_OFFLOAD_DISABLE
if (SO_OFFLOADABLE(tp->t_inpcb->inp_socket))
EVENTHANDLER_INVOKE(tcp_offload_listen_start, tp);
#endif
}
static __inline void
tcp_offload_listen_close(struct tcpcb *tp)
{
#ifndef TCP_OFFLOAD_DISABLE
EVENTHANDLER_INVOKE(tcp_offload_listen_stop, tp);
#endif
}
#undef SO_OFFLOADABLE
#endif /* _SYS_SOCKETVAR_H_ */
#undef tp_offload
void tcp_offload_twstart(struct tcpcb *tp);
struct tcpcb *tcp_offload_close(struct tcpcb *tp);
struct tcpcb *tcp_offload_drop(struct tcpcb *tp, int error);
#endif /* _NETINET_TCP_OFFLOAD_H_ */

View File

@ -75,6 +75,9 @@ __FBSDID("$FreeBSD$");
#ifdef TCPDEBUG
#include <netinet/tcp_debug.h>
#endif
#ifdef TCP_OFFLOAD
#include <netinet/tcp_offload.h>
#endif
#ifdef IPSEC
#include <netipsec/ipsec.h>
@ -191,6 +194,11 @@ tcp_output(struct tcpcb *tp)
INP_WLOCK_ASSERT(tp->t_inpcb);
#ifdef TCP_OFFLOAD
if (tp->t_flags & TF_TOE)
return (tcp_offload_output(tp));
#endif
/*
* Determine length of data that should be transmitted,
* and flags that will be used.

View File

@ -85,7 +85,6 @@ __FBSDID("$FreeBSD$");
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_syncache.h>
#include <netinet/tcp_offload.h>
#ifdef INET6
#include <netinet6/tcp6_var.h>
#endif
@ -96,6 +95,9 @@ __FBSDID("$FreeBSD$");
#ifdef INET6
#include <netinet6/ip6protosw.h>
#endif
#ifdef TCP_OFFLOAD
#include <netinet/tcp_offload.h>
#endif
#ifdef IPSEC
#include <netipsec/ipsec.h>
@ -824,7 +826,7 @@ tcp_drop(struct tcpcb *tp, int errno)
if (TCPS_HAVERCVDSYN(tp->t_state)) {
tp->t_state = TCPS_CLOSED;
(void) tcp_output_reset(tp);
(void) tcp_output(tp);
TCPSTAT_INC(tcps_drops);
} else
TCPSTAT_INC(tcps_conndrops);
@ -924,8 +926,12 @@ tcp_discardcb(struct tcpcb *tp)
/* free the reassembly queue, if any */
tcp_reass_flush(tp);
#ifdef TCP_OFFLOAD
/* Disconnect offload device, if any. */
tcp_offload_detach(tp);
if (tp->t_flags & TF_TOE)
tcp_offload_detach(tp);
#endif
tcp_free_sackholes(tp);
@ -954,9 +960,10 @@ tcp_close(struct tcpcb *tp)
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
/* Notify any offload devices of listener close */
#ifdef TCP_OFFLOAD
if (tp->t_state == TCPS_LISTEN)
tcp_offload_listen_close(tp);
tcp_offload_listen_stop(tp);
#endif
in_pcbdrop(inp);
TCPSTAT_INC(tcps_closed);
KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL"));
@ -1695,7 +1702,7 @@ tcp_mtudisc(struct inpcb *inp, int mtuoffer)
tp->snd_recover = tp->snd_max;
if (tp->t_flags & TF_SACK_PERMIT)
EXIT_FASTRECOVERY(tp->t_flags);
tcp_output_send(tp);
tcp_output(tp);
return (inp);
}

View File

@ -81,10 +81,12 @@ __FBSDID("$FreeBSD$");
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_syncache.h>
#include <netinet/tcp_offload.h>
#ifdef INET6
#include <netinet6/tcp6_var.h>
#endif
#ifdef TCP_OFFLOAD
#include <netinet/toecore.h>
#endif
#ifdef IPSEC
#include <netipsec/ipsec.h>
@ -110,10 +112,8 @@ SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, syncookies_only, CTLFLAG_RW,
&VNET_NAME(tcp_syncookiesonly), 0,
"Use only TCP SYN cookies");
#ifdef TCP_OFFLOAD_DISABLE
#define TOEPCB_ISSET(sc) (0)
#else
#define TOEPCB_ISSET(sc) ((sc)->sc_toepcb != NULL)
#ifdef TCP_OFFLOAD
#define ADDED_BY_TOE(sc) ((sc)->sc_tod != NULL)
#endif
static void syncache_drop(struct syncache *, struct syncache_head *);
@ -332,6 +332,14 @@ syncache_insert(struct syncache *sc, struct syncache_head *sch)
TAILQ_INSERT_HEAD(&sch->sch_bucket, sc, sc_hash);
sch->sch_length++;
#ifdef TCP_OFFLOAD
if (ADDED_BY_TOE(sc)) {
struct toedev *tod = sc->sc_tod;
tod->tod_syncache_added(tod, sc->sc_todctx);
}
#endif
/* Reinitialize the bucket row's timer. */
if (sch->sch_length == 1)
sch->sch_nextc = ticks + INT_MAX;
@ -356,10 +364,14 @@ syncache_drop(struct syncache *sc, struct syncache_head *sch)
TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
sch->sch_length--;
#ifndef TCP_OFFLOAD_DISABLE
if (sc->sc_tu)
sc->sc_tu->tu_syncache_event(TOE_SC_DROP, sc->sc_toepcb);
#endif
#ifdef TCP_OFFLOAD
if (ADDED_BY_TOE(sc)) {
struct toedev *tod = sc->sc_tod;
tod->tod_syncache_removed(tod, sc->sc_todctx);
}
#endif
syncache_free(sc);
V_tcp_syncache.cache_count--;
}
@ -846,6 +858,18 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
if (sc->sc_rxmits > 1)
tp->snd_cwnd = tp->t_maxseg;
#ifdef TCP_OFFLOAD
/*
* Allow a TOE driver to install its hooks. Note that we hold the
* pcbinfo lock too and that prevents tcp_usr_accept from accepting a
* new connection before the TOE driver has done its thing.
*/
if (ADDED_BY_TOE(sc)) {
struct toedev *tod = sc->sc_tod;
tod->tod_offload_socket(tod, sc->sc_todctx, so);
}
#endif
/*
* Copy and activate timers.
*/
@ -926,6 +950,13 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
/* Pull out the entry to unlock the bucket row. */
TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
sch->sch_length--;
#ifdef TCP_OFFLOAD
if (ADDED_BY_TOE(sc)) {
struct toedev *tod = sc->sc_tod;
tod->tod_syncache_removed(tod, sc->sc_todctx);
}
#endif
V_tcp_syncache.cache_count--;
SCH_UNLOCK(sch);
}
@ -934,7 +965,7 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
* Segment validation:
* ACK must match our initial sequence number + 1 (the SYN|ACK).
*/
if (th->th_ack != sc->sc_iss + 1 && !TOEPCB_ISSET(sc)) {
if (th->th_ack != sc->sc_iss + 1) {
if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
log(LOG_DEBUG, "%s; %s: ACK %u != ISS+1 %u, segment "
"rejected\n", s, __func__, th->th_ack, sc->sc_iss);
@ -945,9 +976,8 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
* The SEQ must fall in the window starting at the received
* initial receive sequence number + 1 (the SYN).
*/
if ((SEQ_LEQ(th->th_seq, sc->sc_irs) ||
SEQ_GT(th->th_seq, sc->sc_irs + sc->sc_wnd)) &&
!TOEPCB_ISSET(sc)) {
if (SEQ_LEQ(th->th_seq, sc->sc_irs) ||
SEQ_GT(th->th_seq, sc->sc_irs + sc->sc_wnd)) {
if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
log(LOG_DEBUG, "%s; %s: SEQ %u != IRS+1 %u, segment "
"rejected\n", s, __func__, th->th_seq, sc->sc_irs);
@ -964,8 +994,7 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
* If timestamps were negotiated the reflected timestamp
* must be equal to what we actually sent in the SYN|ACK.
*/
if ((to->to_flags & TOF_TS) && to->to_tsecr != sc->sc_ts &&
!TOEPCB_ISSET(sc)) {
if ((to->to_flags & TOF_TS) && to->to_tsecr != sc->sc_ts) {
if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
log(LOG_DEBUG, "%s; %s: TSECR %u != TS %u, "
"segment rejected\n",
@ -993,25 +1022,6 @@ failed:
return (0);
}
int
tcp_offload_syncache_expand(struct in_conninfo *inc, struct toeopt *toeo,
struct tcphdr *th, struct socket **lsop, struct mbuf *m)
{
struct tcpopt to;
int rc;
bzero(&to, sizeof(struct tcpopt));
to.to_mss = toeo->to_mss;
to.to_wscale = toeo->to_wscale;
to.to_flags = toeo->to_flags;
INP_INFO_WLOCK(&V_tcbinfo);
rc = syncache_expand(inc, &to, th, lsop, m);
INP_INFO_WUNLOCK(&V_tcbinfo);
return (rc);
}
/*
* Given a LISTEN socket and an inbound SYN request, add
* this to the syn cache, and send back a segment:
@ -1025,10 +1035,10 @@ tcp_offload_syncache_expand(struct in_conninfo *inc, struct toeopt *toeo,
* consume all available buffer space if it were ACKed. By not ACKing
* the data, we avoid this DoS scenario.
*/
static void
_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
struct inpcb *inp, struct socket **lsop, struct mbuf *m,
struct toe_usrreqs *tu, void *toepcb)
void
syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
struct inpcb *inp, struct socket **lsop, struct mbuf *m, void *tod,
void *todctx)
{
struct tcpcb *tp;
struct socket *so;
@ -1114,11 +1124,6 @@ _syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
sc = syncache_lookup(inc, &sch); /* returns locked entry */
SCH_LOCK_ASSERT(sch);
if (sc != NULL) {
#ifndef TCP_OFFLOAD_DISABLE
if (sc->sc_tu)
sc->sc_tu->tu_syncache_event(TOE_SC_ENTRY_PRESENT,
sc->sc_toepcb);
#endif
TCPSTAT_INC(tcps_sc_dupsyn);
if (ipopts) {
/*
@ -1151,7 +1156,7 @@ _syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
s, __func__);
free(s, M_TCPLOG);
}
if (!TOEPCB_ISSET(sc) && syncache_respond(sc) == 0) {
if (syncache_respond(sc) == 0) {
sc->sc_rxmits = 0;
syncache_timeout(sc, sch, 1);
TCPSTAT_INC(tcps_sndacks);
@ -1202,9 +1207,9 @@ _syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
sc->sc_ip_tos = ip_tos;
sc->sc_ip_ttl = ip_ttl;
}
#ifndef TCP_OFFLOAD_DISABLE
sc->sc_tu = tu;
sc->sc_toepcb = toepcb;
#ifdef TCP_OFFLOAD
sc->sc_tod = tod;
sc->sc_todctx = todctx;
#endif
sc->sc_irs = th->th_seq;
sc->sc_iss = arc4random();
@ -1299,7 +1304,7 @@ _syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
/*
* Do a standard 3-way handshake.
*/
if (TOEPCB_ISSET(sc) || syncache_respond(sc) == 0) {
if (syncache_respond(sc) == 0) {
if (V_tcp_syncookies && V_tcp_syncookiesonly && sc != &scs)
syncache_free(sc);
else if (sc != &scs)
@ -1491,37 +1496,21 @@ syncache_respond(struct syncache *sc)
m->m_pkthdr.csum_flags = CSUM_TCP;
th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
htons(tlen + optlen - hlen + IPPROTO_TCP));
#ifdef TCP_OFFLOAD
if (ADDED_BY_TOE(sc)) {
struct toedev *tod = sc->sc_tod;
error = tod->tod_syncache_respond(tod, sc->sc_todctx, m);
return (error);
}
#endif
error = ip_output(m, sc->sc_ipopts, NULL, 0, NULL, NULL);
}
#endif
return (error);
}
void
syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
struct inpcb *inp, struct socket **lsop, struct mbuf *m)
{
_syncache_add(inc, to, th, inp, lsop, m, NULL, NULL);
}
void
tcp_offload_syncache_add(struct in_conninfo *inc, struct toeopt *toeo,
struct tcphdr *th, struct inpcb *inp, struct socket **lsop,
struct toe_usrreqs *tu, void *toepcb)
{
struct tcpopt to;
bzero(&to, sizeof(struct tcpopt));
to.to_mss = toeo->to_mss;
to.to_wscale = toeo->to_wscale;
to.to_flags = toeo->to_flags;
INP_INFO_WLOCK(&V_tcbinfo);
INP_WLOCK(inp);
_syncache_add(inc, &to, th, inp, lsop, NULL, tu, toepcb);
}
/*
* The purpose of SYN cookies is to avoid keeping track of all SYN's we
* receive and to be able to handle SYN floods from bogus source addresses

View File

@ -34,8 +34,6 @@
#define _NETINET_TCP_SYNCACHE_H_
#ifdef _KERNEL
struct toeopt;
void syncache_init(void);
#ifdef VIMAGE
void syncache_destroy(void);
@ -43,14 +41,9 @@ void syncache_destroy(void);
void syncache_unreach(struct in_conninfo *, struct tcphdr *);
int syncache_expand(struct in_conninfo *, struct tcpopt *,
struct tcphdr *, struct socket **, struct mbuf *);
int tcp_offload_syncache_expand(struct in_conninfo *inc, struct toeopt *toeo,
struct tcphdr *th, struct socket **lsop, struct mbuf *m);
void syncache_add(struct in_conninfo *, struct tcpopt *,
struct tcphdr *, struct inpcb *, struct socket **, struct mbuf *);
void tcp_offload_syncache_add(struct in_conninfo *, struct toeopt *,
struct tcphdr *, struct inpcb *, struct socket **,
struct toe_usrreqs *tu, void *toepcb);
struct tcphdr *, struct inpcb *, struct socket **, struct mbuf *,
void *, void *);
void syncache_chkrst(struct in_conninfo *, struct tcphdr *);
void syncache_badack(struct in_conninfo *);
int syncache_pcbcount(void);
@ -75,10 +68,10 @@ struct syncache {
u_int8_t sc_requested_s_scale:4,
sc_requested_r_scale:4;
u_int16_t sc_flags;
#ifndef TCP_OFFLOAD_DISABLE
struct toe_usrreqs *sc_tu; /* TOE operations */
void *sc_toepcb; /* TOE protocol block */
#endif
#if defined(TCP_OFFLOAD) || !defined(TCP_OFFLOAD_DISABLE)
struct toedev *sc_tod; /* entry added by this TOE */
void *sc_todctx; /* TOE driver context */
#endif
struct label *sc_label; /* MAC label reference */
struct ucred *sc_cred; /* cred cache for jail checks */

View File

@ -602,6 +602,11 @@ tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta)
struct inpcb *inp = tp->t_inpcb;
int cpu = INP_CPU(inp);
#ifdef TCP_OFFLOAD
if (tp->t_flags & TF_TOE)
return;
#endif
switch (timer_type) {
case TT_DELACK:
t_callout = &tp->t_timers->tt_delack;

View File

@ -87,7 +87,9 @@ __FBSDID("$FreeBSD$");
#ifdef TCPDEBUG
#include <netinet/tcp_debug.h>
#endif
#ifdef TCP_OFFLOAD
#include <netinet/tcp_offload.h>
#endif
/*
* TCP protocol interface to socket abstraction.
@ -367,7 +369,9 @@ tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
if (error == 0) {
tp->t_state = TCPS_LISTEN;
solisten_proto(so, backlog);
tcp_offload_listen_open(tp);
#ifdef TCP_OFFLOAD
tcp_offload_listen_start(tp);
#endif
}
SOCK_UNLOCK(so);
@ -409,6 +413,9 @@ tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
if (error == 0) {
tp->t_state = TCPS_LISTEN;
solisten_proto(so, backlog);
#ifdef TCP_OFFLOAD
tcp_offload_listen_start(tp);
#endif
}
SOCK_UNLOCK(so);
@ -459,7 +466,13 @@ tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
TCPDEBUG1();
if ((error = tcp_connect(tp, nam, td)) != 0)
goto out;
error = tcp_output_connect(so, nam);
#ifdef TCP_OFFLOAD
if (registered_toedevs > 0 &&
(error = tcp_offload_connect(so, nam)) == 0)
goto out;
#endif
tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
error = tcp_output(tp);
out:
TCPDEBUG2(PRU_CONNECT);
INP_WUNLOCK(inp);
@ -519,7 +532,12 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
goto out;
if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0)
goto out;
error = tcp_output_connect(so, nam);
#ifdef TCP_OFFLOAD
if (registered_toedevs > 0 &&
(error = tcp_offload_connect(so, nam)) == 0)
goto out;
#endif
error = tcp_output(tp);
goto out;
}
#endif
@ -530,7 +548,13 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
goto out;
if ((error = tcp6_connect(tp, nam, td)) != 0)
goto out;
error = tcp_output_connect(so, nam);
#ifdef TCP_OFFLOAD
if (registered_toedevs > 0 &&
(error = tcp_offload_connect(so, nam)) == 0)
goto out;
#endif
tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
error = tcp_output(tp);
out:
TCPDEBUG2(PRU_CONNECT);
@ -709,7 +733,7 @@ tcp_usr_shutdown(struct socket *so)
socantsendmore(so);
tcp_usrclosed(tp);
if (!(inp->inp_flags & INP_DROPPED))
error = tcp_output_disconnect(tp);
error = tcp_output(tp);
out:
TCPDEBUG2(PRU_SHUTDOWN);
@ -739,7 +763,11 @@ tcp_usr_rcvd(struct socket *so, int flags)
}
tp = intotcpcb(inp);
TCPDEBUG1();
tcp_output_rcvd(tp);
#ifdef TCP_OFFLOAD
if (tp->t_flags & TF_TOE)
tcp_offload_rcvd(tp);
#endif
tcp_output(tp);
out:
TCPDEBUG2(PRU_RCVD);
@ -835,7 +863,7 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
if (!(inp->inp_flags & INP_DROPPED)) {
if (flags & PRUS_MORETOCOME)
tp->t_flags |= TF_MORETOCOME;
error = tcp_output_send(tp);
error = tcp_output(tp);
if (flags & PRUS_MORETOCOME)
tp->t_flags &= ~TF_MORETOCOME;
}
@ -884,7 +912,7 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
}
tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
tp->t_flags |= TF_FORCEDATA;
error = tcp_output_send(tp);
error = tcp_output(tp);
tp->t_flags &= ~TF_FORCEDATA;
}
out:
@ -1119,7 +1147,6 @@ tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
soisconnecting(so);
TCPSTAT_INC(tcps_connattempt);
tp->t_state = TCPS_SYN_SENT;
tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
tp->iss = tcp_new_isn(tp);
tcp_sendseqinit(tp);
@ -1192,7 +1219,6 @@ tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
soisconnecting(so);
TCPSTAT_INC(tcps_connattempt);
tp->t_state = TCPS_SYN_SENT;
tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
tp->iss = tcp_new_isn(tp);
tcp_sendseqinit(tp);
@ -1323,9 +1349,9 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
tp->t_flags |= TF_SIGNATURE;
else
tp->t_flags &= ~TF_SIGNATURE;
INP_WUNLOCK(inp);
break;
goto unlock_and_done;
#endif /* TCP_SIGNATURE */
case TCP_NODELAY:
case TCP_NOOPT:
INP_WUNLOCK(inp);
@ -1351,6 +1377,13 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
tp->t_flags |= opt;
else
tp->t_flags &= ~opt;
unlock_and_done:
#ifdef TCP_OFFLOAD
if (tp->t_flags & TF_TOE) {
tcp_offload_ctloutput(tp, sopt->sopt_dir,
sopt->sopt_name);
}
#endif
INP_WUNLOCK(inp);
break;
@ -1369,8 +1402,7 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
if (TCPS_HAVEESTABLISHED(tp->t_state))
error = tcp_output(tp);
}
INP_WUNLOCK(inp);
break;
goto unlock_and_done;
case TCP_MAXSEG:
INP_WUNLOCK(inp);
@ -1385,8 +1417,7 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
tp->t_maxseg = optval;
else
error = EINVAL;
INP_WUNLOCK(inp);
break;
goto unlock_and_done;
case TCP_INFO:
INP_WUNLOCK(inp);
@ -1438,8 +1469,7 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
}
}
CC_LIST_RUNLOCK();
INP_WUNLOCK(inp);
break;
goto unlock_and_done;
case TCP_KEEPIDLE:
case TCP_KEEPINTVL:
@ -1491,8 +1521,7 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
TP_KEEPINIT(tp));
break;
}
INP_WUNLOCK(inp);
break;
goto unlock_and_done;
default:
INP_WUNLOCK(inp);
@ -1635,7 +1664,7 @@ tcp_disconnect(struct tcpcb *tp)
sbflush(&so->so_rcv);
tcp_usrclosed(tp);
if (!(inp->inp_flags & INP_DROPPED))
tcp_output_disconnect(tp);
tcp_output(tp);
}
}
@ -1658,7 +1687,9 @@ tcp_usrclosed(struct tcpcb *tp)
switch (tp->t_state) {
case TCPS_LISTEN:
tcp_offload_listen_close(tp);
#ifdef TCP_OFFLOAD
tcp_offload_listen_stop(tp);
#endif
/* FALLTHROUGH */
case TCPS_CLOSED:
tp->t_state = TCPS_CLOSED;

View File

@ -194,7 +194,7 @@ struct tcpcb {
int t_rttlow; /* smallest observerved RTT */
u_int32_t rfbuf_ts; /* recv buffer autoscaling timestamp */
int rfbuf_cnt; /* recv buffer autoscaling byte count */
struct toe_usrreqs *t_tu; /* offload operations vector */
struct toedev *tod; /* toedev handling this connection */
int t_sndrexmitpack; /* retransmit packets sent */
int t_rcvoopack; /* out-of-order packets received */
void *t_toe; /* TOE pcb pointer */

575
sys/netinet/toecore.c Normal file
View File

@ -0,0 +1,575 @@
/*-
* Copyright (c) 2012 Chelsio Communications, Inc.
* All rights reserved.
* Written by: Navdeep Parhar <np@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#include <sys/module.h>
#include <sys/types.h>
#include <sys/sockopt.h>
#include <sys/sysctl.h>
#include <sys/socket.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <net/if_types.h>
#include <net/if_vlan_var.h>
#include <net/if_llatbl.h>
#include <net/route.h>
#include <netinet/if_ether.h>
#include <netinet/in.h>
#include <netinet/in_pcb.h>
#include <netinet/in_var.h>
#include <netinet6/nd6.h>
#define TCPSTATES
#include <netinet/tcp.h>
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_syncache.h>
#include <netinet/tcp_offload.h>
#include <netinet/toecore.h>
static struct mtx toedev_lock;
static TAILQ_HEAD(, toedev) toedev_list;
static eventhandler_tag listen_start_eh;
static eventhandler_tag listen_stop_eh;
static eventhandler_tag lle_event_eh;
static eventhandler_tag route_redirect_eh;
static int
toedev_connect(struct toedev *tod __unused, struct socket *so __unused,
struct rtentry *rt __unused, struct sockaddr *nam __unused)
{
return (ENOTSUP);
}
static int
toedev_listen_start(struct toedev *tod __unused, struct tcpcb *tp __unused)
{
return (ENOTSUP);
}
static int
toedev_listen_stop(struct toedev *tod __unused, struct tcpcb *tp __unused)
{
return (ENOTSUP);
}
static void
toedev_input(struct toedev *tod __unused, struct tcpcb *tp __unused,
struct mbuf *m)
{
m_freem(m);
return;
}
static void
toedev_rcvd(struct toedev *tod __unused, struct tcpcb *tp __unused)
{
return;
}
static int
toedev_output(struct toedev *tod __unused, struct tcpcb *tp __unused)
{
return (ENOTSUP);
}
static void
toedev_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp __unused)
{
return;
}
static void
toedev_l2_update(struct toedev *tod __unused, struct ifnet *ifp __unused,
struct sockaddr *sa __unused, uint8_t *lladdr __unused,
uint16_t vtag __unused)
{
return;
}
static void
toedev_route_redirect(struct toedev *tod __unused, struct ifnet *ifp __unused,
struct rtentry *rt0 __unused, struct rtentry *rt1 __unused)
{
return;
}
static void
toedev_syncache_added(struct toedev *tod __unused, void *ctx __unused)
{
return;
}
static void
toedev_syncache_removed(struct toedev *tod __unused, void *ctx __unused)
{
return;
}
static int
toedev_syncache_respond(struct toedev *tod __unused, void *ctx __unused,
struct mbuf *m)
{
m_freem(m);
return (0);
}
static void
toedev_offload_socket(struct toedev *tod __unused, void *ctx __unused,
struct socket *so __unused)
{
return;
}
static void
toedev_ctloutput(struct toedev *tod __unused, struct tcpcb *tp __unused,
int sopt_dir __unused, int sopt_name __unused)
{
return;
}
/*
* Inform one or more TOE devices about a listening socket.
*/
static void
toe_listen_start(struct inpcb *inp, void *arg)
{
struct toedev *t, *tod;
struct tcpcb *tp;
INP_WLOCK_ASSERT(inp);
KASSERT(inp->inp_pcbinfo == &V_tcbinfo,
("%s: inp is not a TCP inp", __func__));
if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT))
return;
tp = intotcpcb(inp);
if (tp->t_state != TCPS_LISTEN)
return;
t = arg;
mtx_lock(&toedev_lock);
TAILQ_FOREACH(tod, &toedev_list, link) {
if (t == NULL || t == tod)
tod->tod_listen_start(tod, tp);
}
mtx_unlock(&toedev_lock);
}
static void
toe_listen_start_event(void *arg __unused, struct tcpcb *tp)
{
struct inpcb *inp = tp->t_inpcb;
INP_WLOCK_ASSERT(inp);
KASSERT(tp->t_state == TCPS_LISTEN,
("%s: t_state %s", __func__, tcpstates[tp->t_state]));
toe_listen_start(inp, NULL);
}
static void
toe_listen_stop_event(void *arg __unused, struct tcpcb *tp)
{
struct toedev *tod;
#ifdef INVARIANTS
struct inpcb *inp = tp->t_inpcb;
#endif
INP_WLOCK_ASSERT(inp);
KASSERT(tp->t_state == TCPS_LISTEN,
("%s: t_state %s", __func__, tcpstates[tp->t_state]));
mtx_lock(&toedev_lock);
TAILQ_FOREACH(tod, &toedev_list, link)
tod->tod_listen_stop(tod, tp);
mtx_unlock(&toedev_lock);
}
/*
* Fill up a freshly allocated toedev struct with reasonable defaults.
*/
void
init_toedev(struct toedev *tod)
{
tod->tod_softc = NULL;
/*
* Provide no-op defaults so that the kernel can call any toedev
* function without having to check whether the TOE driver supplied one
* or not.
*/
tod->tod_connect = toedev_connect;
tod->tod_listen_start = toedev_listen_start;
tod->tod_listen_stop = toedev_listen_stop;
tod->tod_input = toedev_input;
tod->tod_rcvd = toedev_rcvd;
tod->tod_output = toedev_output;
tod->tod_send_rst = toedev_output;
tod->tod_send_fin = toedev_output;
tod->tod_pcb_detach = toedev_pcb_detach;
tod->tod_l2_update = toedev_l2_update;
tod->tod_route_redirect = toedev_route_redirect;
tod->tod_syncache_added = toedev_syncache_added;
tod->tod_syncache_removed = toedev_syncache_removed;
tod->tod_syncache_respond = toedev_syncache_respond;
tod->tod_offload_socket = toedev_offload_socket;
tod->tod_ctloutput = toedev_ctloutput;
}
/*
* Register an active TOE device with the system. This allows it to receive
* notifications from the kernel.
*/
int
register_toedev(struct toedev *tod)
{
struct toedev *t;
mtx_lock(&toedev_lock);
TAILQ_FOREACH(t, &toedev_list, link) {
if (t == tod) {
mtx_unlock(&toedev_lock);
return (EEXIST);
}
}
TAILQ_INSERT_TAIL(&toedev_list, tod, link);
registered_toedevs++;
mtx_unlock(&toedev_lock);
inp_apply_all(toe_listen_start, tod);
return (0);
}
/*
* Remove the TOE device from the global list of active TOE devices. It is the
* caller's responsibility to ensure that the TOE device is quiesced prior to
* this call.
*/
int
unregister_toedev(struct toedev *tod)
{
struct toedev *t, *t2;
int rc = ENODEV;
mtx_lock(&toedev_lock);
TAILQ_FOREACH_SAFE(t, &toedev_list, link, t2) {
if (t == tod) {
TAILQ_REMOVE(&toedev_list, tod, link);
registered_toedevs--;
rc = 0;
break;
}
}
KASSERT(registered_toedevs >= 0,
("%s: registered_toedevs (%d) < 0", __func__, registered_toedevs));
mtx_unlock(&toedev_lock);
return (rc);
}
void
toe_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
struct inpcb *inp, void *tod, void *todctx)
{
struct socket *lso = inp->inp_socket;
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
syncache_add(inc, to, th, inp, &lso, NULL, tod, todctx);
}
int
toe_syncache_expand(struct in_conninfo *inc, struct tcpopt *to,
struct tcphdr *th, struct socket **lsop)
{
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
return (syncache_expand(inc, to, th, lsop, NULL));
}
/*
* General purpose check to see if a 4-tuple is in use by the kernel. If a TCP
* header (presumably for an incoming SYN) is also provided, an existing 4-tuple
* in TIME_WAIT may be assassinated freeing it up for re-use.
*
* Note that the TCP header must have been run through tcp_fields_to_host() or
* equivalent.
*/
int
toe_4tuple_check(struct in_conninfo *inc, struct tcphdr *th, struct ifnet *ifp)
{
struct inpcb *inp;
if (inc->inc_flags & INC_ISIPV6)
return (ENOSYS); /* XXX: implement */
inp = in_pcblookup(&V_tcbinfo, inc->inc_faddr, inc->inc_fport,
inc->inc_laddr, inc->inc_lport, INPLOOKUP_WLOCKPCB, ifp);
if (inp != NULL) {
INP_WLOCK_ASSERT(inp);
if ((inp->inp_flags & INP_TIMEWAIT) && th != NULL) {
INP_INFO_WLOCK_ASSERT(&V_tcbinfo); /* for twcheck */
if (!tcp_twcheck(inp, NULL, th, NULL, 0))
return (EADDRINUSE);
} else {
INP_WUNLOCK(inp);
return (EADDRINUSE);
}
}
return (0);
}
static void
toe_lle_event(void *arg __unused, struct llentry *lle, int evt)
{
struct toedev *tod;
struct ifnet *ifp;
struct sockaddr *sa;
uint8_t *lladdr;
uint16_t vtag;
LLE_WLOCK_ASSERT(lle);
ifp = lle->lle_tbl->llt_ifp;
sa = L3_ADDR(lle);
KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6,
("%s: lle_event %d for lle %p but sa %p !INET && !INET6",
__func__, evt, lle, sa));
/*
* Not interested if the interface's TOE capability is not enabled.
*/
if ((sa->sa_family == AF_INET && !(ifp->if_capenable & IFCAP_TOE4)) ||
(sa->sa_family == AF_INET6 && !(ifp->if_capenable & IFCAP_TOE6)))
return;
tod = TOEDEV(ifp);
if (tod == NULL)
return;
vtag = 0xfff;
if (evt != LLENTRY_RESOLVED) {
/*
* LLENTRY_TIMEDOUT, LLENTRY_DELETED, LLENTRY_EXPIRED all mean
* this entry is going to be deleted.
*/
lladdr = NULL;
} else {
KASSERT(lle->la_flags & LLE_VALID,
("%s: %p resolved but not valid?", __func__, lle));
lladdr = (uint8_t *)&lle->ll_addr;
#ifdef VLAN_TAG
VLAN_TAG(ifp, &vtag);
#endif
}
tod->tod_l2_update(tod, ifp, sa, lladdr, vtag);
}
/*
* XXX: implement.
*/
static void
toe_route_redirect_event(void *arg __unused, struct rtentry *rt0,
struct rtentry *rt1, struct sockaddr *sa)
{
return;
}
/*
* Returns 0 or EWOULDBLOCK on success (any other value is an error). 0 means
* lladdr and vtag are valid on return, EWOULDBLOCK means the TOE driver's
* tod_l2_update will be called later, when the entry is resolved or times out.
*/
int
toe_l2_resolve(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa,
uint8_t *lladdr, uint16_t *vtag)
{
struct llentry *lle;
int rc;
switch (sa->sa_family) {
#ifdef INET
case AF_INET:
rc = arpresolve(ifp, NULL, NULL, sa, lladdr, &lle);
break;
#endif
#ifdef INET6
case AF_INET6:
rc = nd6_storelladdr(ifp, NULL, sa, lladdr, &lle);
break;
#endif
default:
return (EPROTONOSUPPORT);
}
if (rc == 0) {
#ifdef VLAN_TAG
if (VLAN_TAG(ifp, vtag) != 0)
#endif
*vtag = 0xfff;
}
return (rc);
}
void
toe_connect_failed(struct toedev *tod, struct tcpcb *tp, int err)
{
struct inpcb *inp = tp->t_inpcb;
INP_WLOCK_ASSERT(inp);
KASSERT(tp->t_flags & TF_TOE,
("%s: tp %p not offloaded.", __func__, tp));
if (!(inp->inp_flags & INP_DROPPED)) {
if (err == EAGAIN) {
/*
* Temporary failure during offload, take this PCB back.
* Detach from the TOE driver and do the rest of what
* TCP's pru_connect would have done if the connection
* wasn't offloaded.
*/
tod->tod_pcb_detach(tod, tp);
KASSERT(!(tp->t_flags & TF_TOE),
("%s: tp %p still offloaded.", __func__, tp));
tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
(void) tcp_output(tp);
} else {
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
tp = tcp_drop(tp, err);
if (tp == NULL)
INP_WLOCK(inp); /* re-acquire */
}
}
INP_WLOCK_ASSERT(inp);
}
static int
toecore_load(void)
{
mtx_init(&toedev_lock, "toedev lock", NULL, MTX_DEF);
TAILQ_INIT(&toedev_list);
listen_start_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_start,
toe_listen_start_event, NULL, EVENTHANDLER_PRI_ANY);
listen_stop_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_stop,
toe_listen_stop_event, NULL, EVENTHANDLER_PRI_ANY);
lle_event_eh = EVENTHANDLER_REGISTER(lle_event, toe_lle_event, NULL,
EVENTHANDLER_PRI_ANY);
route_redirect_eh = EVENTHANDLER_REGISTER(route_redirect_event,
toe_route_redirect_event, NULL, EVENTHANDLER_PRI_ANY);
return (0);
}
static int
toecore_unload(void)
{
mtx_lock(&toedev_lock);
if (!TAILQ_EMPTY(&toedev_list)) {
mtx_unlock(&toedev_lock);
return (EBUSY);
}
EVENTHANDLER_DEREGISTER(tcp_offload_listen_start, listen_start_eh);
EVENTHANDLER_DEREGISTER(tcp_offload_listen_stop, listen_stop_eh);
EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh);
EVENTHANDLER_DEREGISTER(route_redirect_event, route_redirect_eh);
mtx_unlock(&toedev_lock);
mtx_destroy(&toedev_lock);
return (0);
}
static int
toecore_mod_handler(module_t mod, int cmd, void *arg)
{
if (cmd == MOD_LOAD)
return (toecore_load());
if (cmd == MOD_UNLOAD)
return (toecore_unload());
return (EOPNOTSUPP);
}
static moduledata_t mod_data= {
"toecore",
toecore_mod_handler,
0
};
MODULE_VERSION(toecore, 1);
DECLARE_MODULE(toecore, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);

130
sys/netinet/toecore.h Normal file
View File

@ -0,0 +1,130 @@
/*-
* Copyright (c) 2012 Chelsio Communications, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _NETINET_TOE_H_
#define _NETINET_TOE_H_
#ifndef _KERNEL
#error "no user-serviceable parts inside"
#endif
struct tcpopt;
struct tcphdr;
struct in_conninfo;
struct toedev {
TAILQ_ENTRY(toedev) link; /* glue for toedev_list */
void *tod_softc; /* TOE driver private data */
/*
* Active open. If a failure occurs, it is reported back by the driver
* via toe_connect_failed.
*/
int (*tod_connect)(struct toedev *, struct socket *, struct rtentry *,
struct sockaddr *);
/* Passive open. */
int (*tod_listen_start)(struct toedev *, struct tcpcb *);
int (*tod_listen_stop)(struct toedev *, struct tcpcb *);
/*
* The kernel uses this routine to pass on any frame it receives for an
* offloaded connection to the TOE driver. This is an unusual event.
*/
void (*tod_input)(struct toedev *, struct tcpcb *, struct mbuf *);
/*
* This is called by the kernel during pru_rcvd for an offloaded TCP
* connection and provides an opportunity for the TOE driver to manage
* its rx window and credits.
*/
void (*tod_rcvd)(struct toedev *, struct tcpcb *);
/*
* Transmit routine. The kernel calls this to have the TOE driver
* evaluate whether there is data to be transmitted, and transmit it.
*/
int (*tod_output)(struct toedev *, struct tcpcb *);
/* Immediate teardown: send RST to peer. */
int (*tod_send_rst)(struct toedev *, struct tcpcb *);
/* Initiate orderly disconnect by sending FIN to the peer. */
int (*tod_send_fin)(struct toedev *, struct tcpcb *);
/* Called to indicate that the kernel is done with this TCP PCB. */
void (*tod_pcb_detach)(struct toedev *, struct tcpcb *);
/*
* The kernel calls this once it has information about an L2 entry that
* the TOE driver enquired about previously (via toe_l2_resolve).
*/
void (*tod_l2_update)(struct toedev *, struct ifnet *,
struct sockaddr *, uint8_t *, uint16_t);
/* XXX. Route has been redirected. */
void (*tod_route_redirect)(struct toedev *, struct ifnet *,
struct rtentry *, struct rtentry *);
/* Syncache interaction. */
void (*tod_syncache_added)(struct toedev *, void *);
void (*tod_syncache_removed)(struct toedev *, void *);
int (*tod_syncache_respond)(struct toedev *, void *, struct mbuf *);
void (*tod_offload_socket)(struct toedev *, void *, struct socket *);
/* TCP socket option */
void (*tod_ctloutput)(struct toedev *, struct tcpcb *, int, int);
};
#include <sys/eventhandler.h>
typedef void (*tcp_offload_listen_start_fn)(void *, struct tcpcb *);
typedef void (*tcp_offload_listen_stop_fn)(void *, struct tcpcb *);
EVENTHANDLER_DECLARE(tcp_offload_listen_start, tcp_offload_listen_start_fn);
EVENTHANDLER_DECLARE(tcp_offload_listen_stop, tcp_offload_listen_stop_fn);
void init_toedev(struct toedev *);
int register_toedev(struct toedev *);
int unregister_toedev(struct toedev *);
/*
* General interface for looking up L2 information for an IP address. If an
* answer is not available right away then the TOE driver's tod_l2_update will
* be called later.
*/
int toe_l2_resolve(struct toedev *, struct ifnet *, struct sockaddr *,
uint8_t *, uint16_t *);
void toe_connect_failed(struct toedev *, struct tcpcb *, int);
void toe_syncache_add(struct in_conninfo *, struct tcpopt *, struct tcphdr *,
struct inpcb *, void *, void *);
int toe_syncache_expand(struct in_conninfo *, struct tcpopt *, struct tcphdr *,
struct socket **);
int toe_4tuple_check(struct in_conninfo *, struct tcphdr *, struct ifnet *);
#endif

View File

@ -1,162 +0,0 @@
/*-
* Copyright (c) 2007, Chelsio Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Neither the name of the Chelsio Corporation nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _NETINET_TOEDEV_H_
#define _NETINET_TOEDEV_H_
#ifndef _KERNEL
#error "no user-serviceable parts inside"
#endif
extern uint32_t toedev_registration_count;
/* Parameter values for offload_get_phys_egress(). */
enum {
TOE_OPEN,
TOE_FAILOVER,
};
/* Parameter values for toe_failover(). */
enum {
TOE_ACTIVE_SLAVE,
TOE_LINK_DOWN,
TOE_LINK_UP,
TOE_RELEASE,
TOE_RELEASE_ALL,
};
#define TOENAMSIZ 16
/* Get the toedev associated with a ifnet. */
#define TOEDEV(ifp) ((ifp)->if_llsoftc)
struct offload_id {
unsigned int id;
unsigned long data;
};
struct ifnet;
struct rt_entry;
struct tom_info;
struct sysctl_oid;
struct socket;
struct mbuf;
struct toedev {
TAILQ_ENTRY(toedev) entry;
char tod_name[TOENAMSIZ]; /* TOE device name */
unsigned int tod_ttid; /* TOE type id */
unsigned long tod_flags; /* device flags */
unsigned int tod_mtu; /* max TX offloaded data */
unsigned int tod_nconn; /* max # of offloaded
* connections
*/
struct ifnet *tod_lldev; /* first interface */
const struct tom_info *tod_offload_mod; /* TCP offload module */
/*
* This TOE device is capable of offloading the connection for socket so
*/
int (*tod_can_offload)(struct toedev *dev, struct socket *so);
/*
* Establish a connection to nam using the TOE device dev
*/
int (*tod_connect)(struct toedev *dev, struct socket *so,
struct rtentry *rt, struct sockaddr *nam);
/*
* Send an mbuf down to the toe device
*/
int (*tod_send)(struct toedev *dev, struct mbuf *m);
/*
* Receive an array of mbufs from the TOE device dev
*/
int (*tod_recv)(struct toedev *dev, struct mbuf **m, int n);
/*
* Device specific ioctl interface
*/
int (*tod_ctl)(struct toedev *dev, unsigned int req, void *data);
/*
* Update L2 entry in toedev
*/
void (*tod_arp_update)(struct toedev *dev, struct rtentry *neigh);
/*
* Failover from one toe device to another
*/
void (*tod_failover)(struct toedev *dev, struct ifnet *bond_ifp,
struct ifnet *ndev, int event);
void *tod_priv; /* driver private data */
void *tod_l2opt; /* optional layer 2 data */
void *tod_l3opt; /* optional layer 3 data */
void *tod_l4opt; /* optional layer 4 data */
void *tod_ulp; /* upper lever protocol */
};
struct tom_info {
TAILQ_ENTRY(tom_info) entry;
int (*ti_attach)(struct toedev *dev,
const struct offload_id *entry);
int (*ti_detach)(struct toedev *dev);
const char *ti_name;
const struct offload_id *ti_id_table;
};
static __inline void
init_offload_dev(struct toedev *dev)
{
}
int register_tom(struct tom_info *t);
int unregister_tom(struct tom_info *t);
int register_toedev(struct toedev *dev, const char *name);
int unregister_toedev(struct toedev *dev);
int activate_offload(struct toedev *dev);
int toe_send(struct toedev *dev, struct mbuf *m);
void toe_arp_update(struct rtentry *rt);
struct ifnet *offload_get_phys_egress(struct ifnet *ifp,
struct socket *so, int context);
int toe_receive_mbuf(struct toedev *dev, struct mbuf **m, int n);
static __inline void
toe_neigh_update(struct ifnet *ifp)
{
}
static __inline void
toe_failover(struct ifnet *bond_ifp, struct ifnet *fail_ifp, int event)
{
}
static __inline int
toe_enslave(struct ifnet *bond_ifp, struct ifnet *slave_ifp)
{
return (0);
}
#endif /* _NETINET_TOEDEV_H_ */

View File

@ -59,10 +59,10 @@ static int tavor_quirk = 0;
module_param_named(tavor_quirk, tavor_quirk, int, 0644);
MODULE_PARM_DESC(tavor_quirk, "Tavor performance quirk: limit MTU to 1K if > 0");
int unify_tcp_port_space = 0;
int unify_tcp_port_space = 1;
module_param(unify_tcp_port_space, int, 0644);
MODULE_PARM_DESC(unify_tcp_port_space, "Unify the host TCP and RDMA port "
"space allocation (default=0)");
"space allocation (default=1)");
#define CMA_CM_RESPONSE_TIMEOUT 20
#define CMA_MAX_CM_RETRIES 15
@ -1478,6 +1478,7 @@ static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
struct sockaddr_in *sin;
id_priv->cm_id.iw = iw_create_cm_id(id_priv->id.device,
id_priv->sock,
iw_conn_req_handler,
id_priv);
if (IS_ERR(id_priv->cm_id.iw))
@ -2055,7 +2056,16 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
((struct sockaddr_in6 *) dst_addr)->sin6_scope_id;
}
}
return rdma_bind_addr(id, src_addr);
if (!cma_any_addr(src_addr))
return rdma_bind_addr(id, src_addr);
else {
struct sockaddr_in addr_in;
memset(&addr_in, 0, sizeof addr_in);
addr_in.sin_family = dst_addr->sa_family;
addr_in.sin_len = sizeof addr_in;
return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
}
}
int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
@ -2247,6 +2257,7 @@ static int cma_get_tcp_port(struct rdma_id_private *id_priv)
sock_release(sock);
return ret;
}
size = ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr);
ret = sock_getname(sock,
(struct sockaddr *) &id_priv->id.route.addr.src_addr,
@ -2255,6 +2266,7 @@ static int cma_get_tcp_port(struct rdma_id_private *id_priv)
sock_release(sock);
return ret;
}
id_priv->sock = sock;
return 0;
}
@ -2604,7 +2616,8 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
int ret;
struct iw_cm_conn_param iw_param;
cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv);
cm_id = iw_create_cm_id(id_priv->id.device, id_priv->sock,
cma_iw_handler, id_priv);
if (IS_ERR(cm_id)) {
ret = PTR_ERR(cm_id);
goto out;

View File

@ -189,6 +189,7 @@ static void rem_ref(struct iw_cm_id *cm_id)
static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event);
struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
struct socket *so,
iw_cm_handler cm_handler,
void *context)
{
@ -205,6 +206,7 @@ struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
cm_id_priv->id.event_handler = cm_event_handler;
cm_id_priv->id.add_ref = add_ref;
cm_id_priv->id.rem_ref = rem_ref;
cm_id_priv->id.so = so;
spin_lock_init(&cm_id_priv->lock);
atomic_set(&cm_id_priv->refcount, 1);
init_waitqueue_head(&cm_id_priv->connect_wait);
@ -629,6 +631,7 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
spin_unlock_irqrestore(&listen_id_priv->lock, flags);
cm_id = iw_create_cm_id(listen_id_priv->id.device,
iw_event->so,
listen_id_priv->id.cm_handler,
listen_id_priv->id.context);
/* If the cm_id could not be created, ignore the request */

View File

@ -48,12 +48,12 @@ sock_getname(struct socket *so, struct sockaddr *addr, int *sockaddr_len,
int error;
nam = NULL;
if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0)
return (-ENOTCONN);
if (peer) {
if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0)
return (-ENOTCONN);
if (peer)
error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, nam);
else
} else
error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, nam);
if (error)
return (-error);

Some files were not shown because too many files have changed in this diff Show More