Make TCP offload work on HEAD (modulo negative interaction between sbcompress

and t3_push_frames).
 - Import latest changes to cxgb_main.c and cxgb_sge.c from toestack p4 branch
 - make driver local copy of tcp_subr.c and tcp_usrreq.c and override tcp_usrreqs so
   TOE can also functions on versions with unmodified TCP

- add cxgb back to the build
This commit is contained in:
Kip Macy 2007-12-17 08:17:51 +00:00
parent 7fab871d8c
commit 8090c9f504
24 changed files with 3314 additions and 605 deletions

View File

@ -1878,7 +1878,7 @@ device xe
device bce # Broadcom BCM5706/BCM5708 Gigabit Ethernet
device bfe # Broadcom BCM440x 10/100 Ethernet
device bge # Broadcom BCM570xx Gigabit Ethernet
#device cxgb # Chelsio T3 10 Gigabit Ethernet
device cxgb # Chelsio T3 10 Gigabit Ethernet
device dc # DEC/Intel 21143 and various workalikes
device fxp # Intel EtherExpress PRO/100B (82557, 82558)
hint.fxp.0.prefer_iomap="0"

View File

@ -1131,6 +1131,18 @@ struct cpl_tx_pkt_lso {
__be32 lso_info;
};
struct cpl_tx_pkt_batch_entry {
__be32 cntrl;
__be32 len;
__be64 addr;
};
struct cpl_tx_pkt_batch {
WR_HDR;
struct cpl_tx_pkt_batch_entry pkt_entry[7];
};
/* cpl_tx_pkt*.cntrl fields */
#define S_TXPKT_VLAN 0
#define M_TXPKT_VLAN 0xFFFF

View File

@ -31,7 +31,6 @@ $FreeBSD$
***************************************************************************/
#ifndef _CXGB_ADAPTER_H_
#define _CXGB_ADAPTER_H_
@ -42,6 +41,7 @@ $FreeBSD$
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/condvar.h>
#include <net/ethernet.h>
#include <net/if.h>
@ -49,6 +49,7 @@ $FreeBSD$
#include <machine/bus.h>
#include <machine/resource.h>
#include <sys/bus_dma.h>
#include <dev/pci/pcireg.h>
#include <dev/pci/pcivar.h>
@ -56,8 +57,8 @@ $FreeBSD$
#ifdef CONFIG_DEFINED
#include <cxgb_osdep.h>
#include <t3cdev.h>
#include <sys/mbufq.h>
#include <ulp/toecore/cxgb_toedev.h>
#include <sys/mbufq.h>
#else
#include <dev/cxgb/cxgb_osdep.h>
#include <dev/cxgb/t3cdev.h>
@ -128,10 +129,12 @@ struct port_info {
struct task timer_reclaim_task;
struct cdev *port_cdev;
#define PORT_NAME_LEN 32
#define PORT_LOCK_NAME_LEN 32
#define TASKQ_NAME_LEN 32
char lockbuf[PORT_NAME_LEN];
#define PORT_NAME_LEN 32
char lockbuf[PORT_LOCK_NAME_LEN];
char taskqbuf[TASKQ_NAME_LEN];
char namebuf[PORT_NAME_LEN];
};
enum { /* adapter flags */
@ -143,19 +146,14 @@ enum { /* adapter flags */
TPS_UPTODATE = (1 << 5),
};
#define FL_Q_SIZE 4096
#define JUMBO_Q_SIZE 512
#define JUMBO_Q_SIZE 1024
#define RSPQ_Q_SIZE 1024
#define TX_ETH_Q_SIZE 1024
/*
* Types of Tx queues in each queue set. Order here matters, do not change.
* XXX TOE is not implemented yet, so the extra queues are just placeholders.
*/
enum { TXQ_ETH, TXQ_OFLD, TXQ_CTRL };
enum { TXQ_ETH = 0,
TXQ_OFLD = 1,
TXQ_CTRL = 2, };
/* careful, the following are set on priv_flags and must not collide with
@ -275,7 +273,22 @@ struct sge_txq {
bus_dmamap_t desc_map;
bus_dma_tag_t entry_tag;
struct mbuf_head sendq;
/*
* cleanq should really be an buf_ring to avoid extra
* mbuf touches
*/
struct mbuf_head cleanq;
struct buf_ring txq_mr;
struct mbuf *immpkt;
uint32_t txq_drops;
uint32_t txq_skipped;
uint32_t txq_coalesced;
uint32_t txq_enqueued;
unsigned long txq_frees;
struct mtx lock;
struct sg_ent txq_sgl[TX_MAX_SEGS / 2 + 1];
bus_dma_segment_t txq_segs[TX_MAX_SEGS];
struct mbuf *txq_m_vec[TX_WR_COUNT_MAX];
#define TXQ_NAME_LEN 32
char lockbuf[TXQ_NAME_LEN];
};
@ -294,6 +307,10 @@ enum {
#define SGE_PSTAT_MAX (SGE_PSTATS_LRO_X_STREAMS+1)
#define QS_EXITING 0x1
#define QS_RUNNING 0x2
#define QS_BOUND 0x4
struct sge_qset {
struct sge_rspq rspq;
struct sge_fl fl[SGE_RXQ_PER_SET];
@ -303,6 +320,12 @@ struct sge_qset {
uint64_t port_stats[SGE_PSTAT_MAX];
struct port_info *port;
int idx; /* qset # */
int qs_cpuid;
int qs_flags;
struct cv qs_cv;
struct mtx qs_mtx;
#define QS_NAME_LEN 32
char namebuf[QS_NAME_LEN];
};
struct sge {
@ -344,7 +367,15 @@ struct adapter {
void *msix_intr_tag[SGE_QSETS];
uint8_t rxpkt_map[8]; /* maps RX_PKT interface values to port ids */
uint8_t rrss_map[SGE_QSETS]; /* revers RSS map table */
uint16_t rspq_map[RSS_TABLE_SIZE]; /* maps 7-bit cookie to qidx */
union {
uint8_t fill[SGE_QSETS];
uint64_t coalesce;
} u;
#define tunq_fill u.fill
#define tunq_coalesce u.coalesce
struct filter_info *filters;
/* Tasks */
@ -474,7 +505,7 @@ t3_get_next_mcaddr(struct t3_rx_mode *rm)
uint8_t *macaddr = NULL;
if (rm->idx == 0)
macaddr = rm->port->hw_addr;
macaddr = (uint8_t *)rm->port->hw_addr;
rm->idx++;
return (macaddr);
@ -515,18 +546,21 @@ void t3_sge_stop(adapter_t *);
void t3b_intr(void *data);
void t3_intr_msi(void *data);
void t3_intr_msix(void *data);
int t3_encap(struct port_info *, struct mbuf **, int *free);
int t3_encap(struct sge_qset *, struct mbuf **, int);
int t3_sge_init_adapter(adapter_t *);
int t3_sge_init_port(struct port_info *);
void t3_sge_deinit_sw(adapter_t *);
void t3_free_tx_desc(struct sge_txq *q, int n);
void t3_free_tx_desc_all(struct sge_txq *q);
void t3_rx_eth_lro(adapter_t *adap, struct sge_rspq *rq, struct mbuf *m,
int ethpad, uint32_t rss_hash, uint32_t rss_csum, int lro);
void t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad);
void t3_lro_flush(adapter_t *adap, struct sge_qset *qs, struct lro_state *state);
void t3_add_sysctls(adapter_t *sc);
void t3_add_attach_sysctls(adapter_t *sc);
void t3_add_configured_sysctls(adapter_t *sc);
int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
unsigned char *data);
void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p);
@ -535,7 +569,7 @@ void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p);
*/
#define desc_reclaimable(q) ((int)((q)->processed - (q)->cleaned - TX_MAX_DESC))
#define container_of(p, stype, field) ((stype *)(((uint8_t *)(p)) - offsetof(stype, field)))
#define container_of(p, stype, field) ((stype *)(((uint8_t *)(p)) - offsetof(stype, field)))
static __inline struct sge_qset *
fl_to_qset(struct sge_fl *q, int qidx)
@ -569,5 +603,20 @@ static inline int offload_running(adapter_t *adapter)
return isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
}
#ifdef IFNET_MULTIQUEUE
int cxgb_pcpu_enqueue_packet(struct ifnet *ifp, struct mbuf *m);
int cxgb_pcpu_start(struct ifnet *ifp, struct mbuf *m);
int32_t cxgb_pcpu_get_cookie(struct ifnet *ifp, struct in6_addr *lip, uint16_t lport,
struct in6_addr *rip, uint16_t rport, int ipv6);
void cxgb_pcpu_shutdown_threads(struct adapter *sc);
void cxgb_pcpu_startup_threads(struct adapter *sc);
#endif
int process_responses(adapter_t *adap, struct sge_qset *qs, int budget);
int cxgb_tx_common(struct ifnet *ifp, struct sge_qset *qs, uint32_t txmax);
void t3_free_qset(adapter_t *sc, struct sge_qset *q);
int cxgb_dequeue_packet(struct ifnet *, struct sge_txq *, struct mbuf **);
void cxgb_start(struct ifnet *ifp);
void refill_fl_service(adapter_t *adap, struct sge_fl *fl);
#endif

View File

@ -34,7 +34,6 @@ $FreeBSD$
#ifndef CONFIG_DEFINED
#define CONFIG_CHELSIO_T3_CORE
#define DISABLE_MBUF_IOVEC
#endif
#endif

View File

@ -115,7 +115,7 @@ neigh_replace(struct l2t_entry *e, struct rtentry *rt)
*/
static int
setup_l2e_send_pending(struct t3cdev *dev, struct mbuf *m,
struct l2t_entry *e)
struct l2t_entry *e)
{
struct cpl_l2t_write_req *req;
@ -183,7 +183,7 @@ t3_l2t_send_slow(struct t3cdev *dev, struct mbuf *m, struct l2t_entry *e)
again:
switch (e->state) {
case L2T_STATE_STALE: /* entry is stale, kick off revalidation */
arpresolve(rt->rt_ifp, rt, NULL, (struct sockaddr *)&sin, e->dmac);
arpresolve2(rt->rt_ifp, rt, (struct sockaddr *)&sin, e->dmac);
mtx_lock(&e->lock);
if (e->state == L2T_STATE_STALE)
e->state = L2T_STATE_VALID;
@ -208,8 +208,8 @@ again:
* A better way would be to use a work request to retry L2T
* entries when there's no memory.
*/
printf("doing arpresolve on 0x%x \n", e->addr);
if (arpresolve(rt->rt_ifp, rt, NULL, (struct sockaddr *)&sin, e->dmac) == 0) {
printf("doing arpresolve2 on 0x%x \n", e->addr);
if (arpresolve2(rt->rt_ifp, rt, (struct sockaddr *)&sin, e->dmac) == 0) {
printf("mac=%x:%x:%x:%x:%x:%x\n",
e->dmac[0], e->dmac[1], e->dmac[2], e->dmac[3], e->dmac[4], e->dmac[5]);
@ -223,7 +223,7 @@ again:
m_freem(m);
mtx_unlock(&e->lock);
} else
printf("arpresolve returned non-zero\n");
printf("arpresolve2 returned non-zero\n");
}
return 0;
}
@ -245,7 +245,7 @@ t3_l2t_send_event(struct t3cdev *dev, struct l2t_entry *e)
again:
switch (e->state) {
case L2T_STATE_STALE: /* entry is stale, kick off revalidation */
arpresolve(rt->rt_ifp, rt, m0, (struct sockaddr *)&sin, e->dmac);
arpresolve2(rt->rt_ifp, rt, (struct sockaddr *)&sin, e->dmac);
mtx_lock(&e->lock);
if (e->state == L2T_STATE_STALE) {
e->state = L2T_STATE_VALID;
@ -262,8 +262,6 @@ again:
}
mtx_unlock(&e->lock);
if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
return;
/*
* Only the first packet added to the arpq should kick off
* resolution. However, because the alloc_skb below can fail,
@ -272,7 +270,7 @@ again:
* A better way would be to use a work request to retry L2T
* entries when there's no memory.
*/
arpresolve(rt->rt_ifp, rt, m0, (struct sockaddr *)&sin, e->dmac);
arpresolve2(rt->rt_ifp, rt, (struct sockaddr *)&sin, e->dmac);
}
return;
@ -459,7 +457,8 @@ handle_failed_resolution(struct t3cdev *dev, struct mbuf *arpq)
}
void
t3_l2t_update(struct t3cdev *dev, struct rtentry *neigh, struct sockaddr *sa)
t3_l2t_update(struct t3cdev *dev, struct rtentry *neigh,
uint8_t *enaddr, struct sockaddr *sa)
{
struct l2t_entry *e;
struct mbuf *arpq = NULL;
@ -468,8 +467,6 @@ t3_l2t_update(struct t3cdev *dev, struct rtentry *neigh, struct sockaddr *sa)
int ifidx = neigh->rt_ifp->if_index;
int hash = arp_hash(addr, ifidx, d);
struct llinfo_arp *la;
u_char edst[ETHER_ADDR_LEN];
printf("t3_l2t_update called with arp info\n");
@ -485,10 +482,11 @@ t3_l2t_update(struct t3cdev *dev, struct rtentry *neigh, struct sockaddr *sa)
found:
printf("found 0x%08x\n", addr);
arpresolve(neigh->rt_ifp, neigh, NULL, sa, edst);
rw_runlock(&d->lock);
memcpy(e->dmac, edst, ETHER_ADDR_LEN);
memcpy(e->dmac, enaddr, ETHER_ADDR_LEN);
printf("mac=%x:%x:%x:%x:%x:%x\n",
e->dmac[0], e->dmac[1], e->dmac[2], e->dmac[3], e->dmac[4], e->dmac[5]);
if (atomic_load_acq_int(&e->refcnt)) {
if (neigh != e->neigh)

View File

@ -118,7 +118,7 @@ static __inline void set_arp_failure_handler(struct mbuf *m,
#define L2DATA(dev) ((dev)->l2opt)
void t3_l2e_free(struct l2t_data *d, struct l2t_entry *e);
void t3_l2t_update(struct t3cdev *dev, struct rtentry *rt, struct sockaddr *sa);
void t3_l2t_update(struct t3cdev *dev, struct rtentry *rt, uint8_t *enaddr, struct sockaddr *sa);
struct l2t_entry *t3_l2t_get(struct t3cdev *dev, struct rtentry *neigh,
struct ifnet *ifp, struct sockaddr *sa);
int t3_l2t_send_slow(struct t3cdev *dev, struct mbuf *m,

View File

@ -44,14 +44,15 @@ __FBSDID("$FreeBSD$");
#include <sys/ioccom.h>
#include <sys/mbuf.h>
#include <sys/linker.h>
#include <sys/syslog.h>
#include <sys/firmware.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/queue.h>
#include <sys/taskqueue.h>
#include <sys/proc.h>
#include <net/bpf.h>
#include <net/ethernet.h>
@ -73,23 +74,18 @@ __FBSDID("$FreeBSD$");
#include <dev/pci/pcivar.h>
#include <dev/pci/pci_private.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
#include <vm/vm_map.h>
#ifdef CONFIG_DEFINED
#include <cxgb_include.h>
#include <sys/mvec.h>
#else
#include <dev/cxgb/cxgb_include.h>
#include <dev/cxgb/sys/mvec.h>
#endif
#ifdef PRIV_SUPPORTED
#include <sys/priv.h>
#endif
#include <machine/intr_machdep.h>
static int cxgb_setup_msix(adapter_t *, int);
static void cxgb_teardown_msix(adapter_t *);
static void cxgb_init(void *);
@ -97,8 +93,6 @@ static void cxgb_init_locked(struct port_info *);
static void cxgb_stop_locked(struct port_info *);
static void cxgb_set_rxmode(struct port_info *);
static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
static void cxgb_start(struct ifnet *);
static void cxgb_start_proc(void *, int ncount);
static int cxgb_media_change(struct ifnet *);
static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
static int setup_sge_qsets(adapter_t *);
@ -109,6 +103,10 @@ static void cxgb_down_locked(struct adapter *sc);
static void cxgb_tick(void *);
static void setup_rss(adapter_t *sc);
#ifndef IFNET_MULTIQUEUE
static void cxgb_start_proc(void *, int ncount);
#endif
/* Attachment glue for the PCI controller end of the device. Each port of
* the device is attached separately, as defined later.
*/
@ -122,11 +120,7 @@ static void cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf)
static int cxgb_get_regs_len(void);
static int offload_open(struct port_info *pi);
static void touch_bars(device_t dev);
#ifdef notyet
static int offload_close(struct t3cdev *tdev);
#endif
static device_method_t cxgb_controller_methods[] = {
DEVMETHOD(device_probe, cxgb_controller_probe),
@ -188,7 +182,6 @@ DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
#define SGE_MSIX_COUNT (SGE_QSETS + 1)
extern int collapse_mbufs;
/*
* The driver uses the best interrupt scheme available on a platform in the
* order MSI-X, MSI, legacy pin interrupts. This parameter determines which
@ -218,11 +211,15 @@ SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
* The driver uses an auto-queue algorithm by default.
* To disable it and force a single queue-set per port, use singleq = 1.
*/
static int singleq = 1;
static int singleq = 0;
TUNABLE_INT("hw.cxgb.singleq", &singleq);
SYSCTL_UINT(_hw_cxgb, OID_AUTO, singleq, CTLFLAG_RDTUN, &singleq, 0,
"use a single queue-set per port");
#ifndef IFNET_MULTIQUEUE
int cxgb_txq_buf_ring_size = 0;
#endif
enum {
MAX_TXQ_ENTRIES = 16384,
MAX_CTRL_TXQ_ENTRIES = 1024,
@ -281,10 +278,24 @@ struct cxgb_ident {
{0, 0, 0, NULL}
};
static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
static inline char
static __inline void
check_pkt_coalesce(struct sge_qset *qs)
{
struct adapter *sc;
struct sge_txq *txq;
txq = &qs->txq[TXQ_ETH];
sc = qs->port->adapter;
if (sc->tunq_fill[qs->idx] && (txq->in_use < (txq->size - (txq->size>>2))))
sc->tunq_fill[qs->idx] = 0;
else if (!sc->tunq_fill[qs->idx] && (txq->in_use > (txq->size - (txq->size>>2))))
sc->tunq_fill[qs->idx] = 1;
}
static __inline char
t3rev2char(struct adapter *adapter)
{
char rev = 'z';
@ -582,6 +593,7 @@ cxgb_controller_attach(device_t dev)
pi->tx_chan = i >= ai->nports0;
pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
sc->rxpkt_map[pi->txpkt_intf] = i;
sc->port[i].tx_chan = i >= ai->nports0;
sc->portdev[i] = child;
device_set_softc(child, pi);
}
@ -611,7 +623,7 @@ cxgb_controller_attach(device_t dev)
G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
G_FW_VERSION_MICRO(vers));
t3_add_sysctls(sc);
t3_add_attach_sysctls(sc);
out:
if (error)
cxgb_free(sc);
@ -636,10 +648,14 @@ cxgb_free(struct adapter *sc)
{
int i;
#ifdef IFNET_MULTIQUEUE
cxgb_pcpu_shutdown_threads(sc);
#endif
ADAPTER_LOCK(sc);
/*
* drops the lock
*/
/*
* drops the lock
*/
cxgb_down_locked(sc);
#ifdef MSI_SUPPORTED
@ -664,7 +680,7 @@ cxgb_free(struct adapter *sc)
* Wait for last callout
*/
tsleep(&sc, 0, "cxgb unload", 3*hz);
DELAY(hz*100);
for (i = 0; i < (sc)->params.nports; ++i) {
if (sc->portdev[i] != NULL)
@ -674,15 +690,17 @@ cxgb_free(struct adapter *sc)
bus_generic_detach(sc->dev);
if (sc->tq != NULL)
taskqueue_free(sc->tq);
#ifdef notyet
if (is_offload(sc)) {
cxgb_adapter_unofld(sc);
if (isset(&sc->open_device_map, OFFLOAD_DEVMAP_BIT))
offload_close(&sc->tdev);
}
#endif
else
printf("cxgb_free: DEVMAP_BIT not set\n");
} else
printf("not offloading set\n");
#ifndef IFNET_MULTIQUEUE
t3_free_sge_resources(sc);
#endif
free(sc->filters, M_DEVBUF);
t3_sge_free(sc);
@ -696,8 +714,6 @@ cxgb_free(struct adapter *sc)
MTX_DESTROY(&sc->sge.reg_lock);
MTX_DESTROY(&sc->elmer_lock);
ADAPTER_LOCK_DEINIT(sc);
return;
}
/**
@ -803,7 +819,7 @@ cxgb_setup_msix(adapter_t *sc, int msix_count)
printf("setting up interrupt for port=%d\n",
qs->port->port_id);
if (bus_setup_intr(sc->dev, sc->msix_irq_res[k],
INTR_MPSAFE|INTR_TYPE_NET,
INTR_MPSAFE|INTR_TYPE_NET,
#ifdef INTR_FILTERS
NULL,
#endif
@ -812,10 +828,17 @@ cxgb_setup_msix(adapter_t *sc, int msix_count)
"interrupt for message %d\n", rid);
return (EINVAL);
}
#ifdef IFNET_MULTIQUEUE
if (singleq == 0) {
int vector = rman_get_start(sc->msix_irq_res[k]);
if (bootverbose)
device_printf(sc->dev, "binding vector=%d to cpu=%d\n", vector, k % mp_ncpus);
intr_bind(vector, k % mp_ncpus);
}
#endif
}
}
return (0);
}
@ -892,6 +915,12 @@ cxgb_port_attach(device_t dev)
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_ioctl = cxgb_ioctl;
ifp->if_start = cxgb_start;
#ifdef IFNET_MULTIQUEUE
ifp->if_flags |= IFF_MULTIQ;
ifp->if_mq_start = cxgb_pcpu_start;
#endif
ifp->if_timer = 0; /* Disable ifnet watchdog */
ifp->if_watchdog = NULL;
@ -965,7 +994,7 @@ cxgb_port_attach(device_t dev)
p->tq = taskqueue_create_fast(p->taskqbuf, M_NOWAIT,
taskqueue_thread_enqueue, &p->tq);
#endif
#ifndef IFNET_MULTIQUEUE
if (p->tq == NULL) {
device_printf(dev, "failed to allocate port task queue\n");
return (ENOMEM);
@ -974,7 +1003,7 @@ cxgb_port_attach(device_t dev)
device_get_nameunit(dev));
TASK_INIT(&p->start_task, 0, cxgb_start_proc, ifp);
#endif
t3_sge_init_port(p);
return (0);
@ -999,6 +1028,9 @@ cxgb_port_detach(device_t dev)
}
ether_ifdetach(p->ifp);
printf("waiting for callout to stop ...");
DELAY(1000000);
printf("done\n");
/*
* the lock may be acquired in ifdetach
*/
@ -1247,9 +1279,7 @@ offload_tx(struct t3cdev *tdev, struct mbuf *m)
{
int ret;
critical_enter();
ret = t3_offload_tx(tdev, m);
critical_exit();
return (ret);
}
@ -1264,6 +1294,8 @@ write_smt_entry(struct adapter *adapter, int idx)
return (ENOMEM);
req = mtod(m, struct cpl_smt_write_req *);
m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
req->mtu_idx = NMTUS - 1; /* should be 0 but there's a T3 bug */
@ -1325,6 +1357,10 @@ bind_qsets(adapter_t *sc)
{
int i, j;
#ifdef IFNET_MULTIQUEUE
cxgb_pcpu_startup_threads(sc);
#endif
for (i = 0; i < (sc)->params.nports; ++i) {
const struct port_info *pi = adap2pinfo(sc, i);
@ -1473,6 +1509,7 @@ cxgb_up(struct adapter *sc)
goto out;
setup_rss(sc);
t3_add_configured_sysctls(sc);
sc->flags |= FULL_INIT_DONE;
}
@ -1545,6 +1582,8 @@ cxgb_down_locked(struct adapter *sc)
cxgb_teardown_msix(sc);
ADAPTER_UNLOCK(sc);
callout_stop(&sc->cxgb_tick_ch);
callout_stop(&sc->sge_timer_ch);
callout_drain(&sc->cxgb_tick_ch);
callout_drain(&sc->sge_timer_ch);
@ -1553,26 +1592,28 @@ cxgb_down_locked(struct adapter *sc)
for (i = 0; i < sc->params.nports; i++)
taskqueue_drain(sc->tq, &sc->port[i].timer_reclaim_task);
}
#ifdef notyet
if (sc->port[i].tq != NULL)
#endif
}
static int
offload_open(struct port_info *pi)
{
struct adapter *adapter = pi->adapter;
struct t3cdev *tdev = TOEDEV(pi->ifp);
struct t3cdev *tdev = &adapter->tdev;
#ifdef notyet
T3CDEV(pi->ifp);
#endif
int adap_up = adapter->open_device_map & PORT_MASK;
int err = 0;
printf("device_map=0x%x\n", adapter->open_device_map);
if (atomic_cmpset_int(&adapter->open_device_map,
(adapter->open_device_map & ~OFFLOAD_DEVMAP_BIT),
(adapter->open_device_map | OFFLOAD_DEVMAP_BIT)) == 0)
(adapter->open_device_map & ~(1<<OFFLOAD_DEVMAP_BIT)),
(adapter->open_device_map | (1<<OFFLOAD_DEVMAP_BIT))) == 0)
return (0);
if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
printf("offload_open: DEVMAP_BIT did not get set 0x%x\n", adapter->open_device_map);
ADAPTER_LOCK(pi->adapter);
if (!adap_up)
err = cxgb_up(adapter);
@ -1581,7 +1622,7 @@ offload_open(struct port_info *pi)
return (err);
t3_tp_set_offload_mode(adapter, 1);
tdev->lldev = adapter->port[0].ifp;
tdev->lldev = pi->ifp;
err = cxgb_offload_activate(adapter);
if (err)
goto out;
@ -1605,15 +1646,18 @@ out:
}
return (err);
}
#ifdef notyet
static int
offload_close(struct t3cev *tdev)
offload_close(struct t3cdev *tdev)
{
struct adapter *adapter = tdev2adap(tdev);
if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT)) {
printf("offload_close: DEVMAP_BIT not set\n");
return (0);
}
/* Call back all registered clients */
cxgb_remove_clients(tdev);
tdev->lldev = NULL;
@ -1621,13 +1665,15 @@ offload_close(struct t3cev *tdev)
t3_tp_set_offload_mode(adapter, 0);
clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
ADAPTER_LOCK(adapter);
if (!adapter->open_device_map)
cxgb_down(adapter);
cxgb_down_locked(adapter);
else
ADAPTER_UNLOCK(adapter);
cxgb_offload_deactivate(adapter);
return (0);
}
#endif
static void
cxgb_init(void *arg)
@ -1667,6 +1713,8 @@ cxgb_init_locked(struct port_info *p)
if (err)
log(LOG_WARNING,
"Could not initialize offload capabilities\n");
else
printf("offload opened\n");
}
cxgb_link_start(p);
t3_link_changed(sc, p->port_id);
@ -1675,8 +1723,7 @@ cxgb_init_locked(struct port_info *p)
device_printf(sc->dev, "enabling interrupts on port=%d\n", p->port_id);
t3_port_intr_enable(sc, p->port_id);
callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz,
cxgb_tick, sc);
callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
ifp->if_drv_flags |= IFF_DRV_RUNNING;
ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
@ -1703,7 +1750,6 @@ cxgb_stop_locked(struct port_info *p)
ADAPTER_LOCK_ASSERT_NOTOWNED(p->adapter);
ifp = p->ifp;
t3_port_intr_disable(p->adapter, p->port_id);
ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
p->phy.ops->power_down(&p->phy, 1);
@ -1712,7 +1758,6 @@ cxgb_stop_locked(struct port_info *p)
ADAPTER_LOCK(p->adapter);
clrbit(&p->adapter->open_device_map, p->port_id);
if (p->adapter->open_device_map == 0) {
cxgb_down_locked(p->adapter);
} else
@ -1786,8 +1831,7 @@ cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
adapter_t *sc = p->adapter;
callout_reset(&sc->cxgb_tick_ch,
sc->params.stats_update_period * hz,
callout_reset(&sc->cxgb_tick_ch, hz,
cxgb_tick, sc);
}
PORT_UNLOCK(p);
@ -1838,77 +1882,92 @@ cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
return (error);
}
int
cxgb_tx_common(struct ifnet *ifp, struct sge_qset *qs, uint32_t txmax)
{
struct sge_txq *txq;
int err, in_use_init, count;
struct mbuf **m_vec;
txq = &qs->txq[TXQ_ETH];
m_vec = txq->txq_m_vec;
in_use_init = txq->in_use;
err = 0;
while ((txq->in_use - in_use_init < txmax) &&
(txq->size > txq->in_use + TX_MAX_DESC)) {
check_pkt_coalesce(qs);
count = cxgb_dequeue_packet(ifp, txq, m_vec);
if (count == 0)
break;
ETHER_BPF_MTAP(ifp, m_vec[0]);
if ((err = t3_encap(qs, m_vec, count)) != 0)
break;
txq->txq_enqueued += count;
}
#ifndef IFNET_MULTIQUEUE
if (__predict_false(err)) {
if (err == ENOMEM) {
ifp->if_drv_flags |= IFF_DRV_OACTIVE;
IFQ_LOCK(&ifp->if_snd);
IFQ_DRV_PREPEND(&ifp->if_snd, m_vec[0]);
IFQ_UNLOCK(&ifp->if_snd);
}
}
if (err == 0 && m_vec[0] == NULL) {
err = ENOBUFS;
}
else if ((err == 0) && (txq->size <= txq->in_use + TX_MAX_DESC) &&
(ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
ifp->if_drv_flags |= IFF_DRV_OACTIVE;
err = ENOSPC;
}
#else
if ((err == 0) && (txq->size <= txq->in_use + TX_MAX_DESC)) {
err = ENOSPC;
setbit(&qs->txq_stopped, TXQ_ETH);
}
if (err == ENOMEM) {
int i;
/*
* Sub-optimal :-/
*/
for (i = 0; i < count; i++)
m_freem(m_vec[i]);
}
#endif
return (err);
}
#ifndef IFNET_MULTIQUEUE
static int
cxgb_start_tx(struct ifnet *ifp, uint32_t txmax)
{
struct sge_qset *qs;
struct sge_txq *txq;
struct port_info *p = ifp->if_softc;
struct mbuf *m = NULL;
int err, in_use_init, free;
int err;
if (!p->link_config.link_ok)
return (ENXIO);
if (IFQ_DRV_IS_EMPTY(&ifp->if_snd))
if (IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
return (ENOBUFS);
}
qs = &p->adapter->sge.qs[p->first_qset];
txq = &qs->txq[TXQ_ETH];
err = 0;
if (txq->flags & TXQ_TRANSMITTING)
return (EINPROGRESS);
mtx_lock(&txq->lock);
txq->flags |= TXQ_TRANSMITTING;
in_use_init = txq->in_use;
while ((txq->in_use - in_use_init < txmax) &&
(txq->size > txq->in_use + TX_MAX_DESC)) {
free = 0;
IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
if (m == NULL)
break;
/*
* Convert chain to M_IOVEC
*/
KASSERT((m->m_flags & M_IOVEC) == 0, ("IOVEC set too early"));
#ifdef notyet
m0 = m;
if (collapse_mbufs && m->m_pkthdr.len > MCLBYTES &&
m_collapse(m, TX_MAX_SEGS, &m0) == EFBIG) {
if ((m0 = m_defrag(m, M_NOWAIT)) != NULL) {
m = m0;
m_collapse(m, TX_MAX_SEGS, &m0);
} else
break;
}
m = m0;
#endif
if ((err = t3_encap(p, &m, &free)) != 0)
break;
BPF_MTAP(ifp, m);
if (free)
m_freem(m);
}
cxgb_tx_common(ifp, qs, txmax);
txq->flags &= ~TXQ_TRANSMITTING;
mtx_unlock(&txq->lock);
if (__predict_false(err)) {
if (err == ENOMEM) {
ifp->if_drv_flags |= IFF_DRV_OACTIVE;
IFQ_LOCK(&ifp->if_snd);
IFQ_DRV_PREPEND(&ifp->if_snd, m);
IFQ_UNLOCK(&ifp->if_snd);
}
}
if (err == 0 && m == NULL)
err = ENOBUFS;
else if ((err == 0) && (txq->size <= txq->in_use + TX_MAX_DESC) &&
(ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
ifp->if_drv_flags |= IFF_DRV_OACTIVE;
err = ENOSPC;
}
return (err);
}
@ -1932,7 +1991,15 @@ cxgb_start_proc(void *arg, int ncount)
} while (error == 0);
}
static void
int
cxgb_dequeue_packet(struct ifnet *ifp, struct sge_txq *unused, struct mbuf **m_vec)
{
IFQ_DRV_DEQUEUE(&ifp->if_snd, m_vec[0]);
return (m_vec[0] ? 1 : 0);
}
void
cxgb_start(struct ifnet *ifp)
{
struct port_info *pi = ifp->if_softc;
@ -1952,7 +2019,7 @@ cxgb_start(struct ifnet *ifp)
if (err == 0)
taskqueue_enqueue(pi->tq, &pi->start_task);
}
#endif
static int
cxgb_media_change(struct ifnet *ifp)
@ -2078,12 +2145,26 @@ static void
cxgb_tick(void *arg)
{
adapter_t *sc = (adapter_t *)arg;
int i, running = 0;
for_each_port(sc, i) {
struct port_info *p = &sc->port[i];
struct ifnet *ifp = p->ifp;
PORT_LOCK(p);
if ((ifp->if_drv_flags & IFF_DRV_RUNNING))
running = 1;
PORT_UNLOCK(p);
}
if (running == 0)
return;
taskqueue_enqueue(sc->tq, &sc->tick_task);
if (sc->open_device_map != 0)
callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz,
cxgb_tick, sc);
callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
}
static void
@ -2478,7 +2559,7 @@ cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
* Read 256 bytes at a time as len can be large and we don't
* want to use huge intermediate buffers.
*/
useraddr = (uint8_t *)(t + 1); /* advance to start of buffer */
useraddr = (uint8_t *)t->buf;
while (t->len) {
unsigned int chunk = min(t->len, sizeof(buf));

View File

@ -108,9 +108,12 @@ cxgb_register_client(struct cxgb_client *client)
printf("client->add set\n");
TAILQ_FOREACH(tdev, &ofld_dev_list, entry) {
if (offload_activated(tdev))
if (offload_activated(tdev)) {
printf("calling add=%p on %p\n",
client->add, tdev);
client->add(tdev);
else
} else
printf("%p not activated\n", tdev);
}
@ -477,7 +480,8 @@ rx_offload_blackhole(struct t3cdev *dev, struct mbuf **m, int n)
}
static void
dummy_neigh_update(struct t3cdev *dev, struct rtentry *neigh, struct sockaddr *sa)
dummy_neigh_update(struct t3cdev *dev, struct rtentry *neigh, uint8_t *enaddr,
struct sockaddr *sa)
{
}
@ -895,17 +899,32 @@ do_term(struct t3cdev *dev, struct mbuf *m)
}
static void
cxgb_route_event(void *unused, int event, struct rtentry *rt0,
cxgb_arp_update_event(void *unused, struct rtentry *rt0,
uint8_t *enaddr, struct sockaddr *sa)
{
if (TOEDEV(rt0->rt_ifp) == NULL)
return;
RT_ADDREF(rt0);
RT_UNLOCK(rt0);
cxgb_neigh_update(rt0, enaddr, sa);
RT_LOCK(rt0);
RT_REMREF(rt0);
}
static void
cxgb_redirect_event(void *unused, int event, struct rtentry *rt0,
struct rtentry *rt1, struct sockaddr *sa)
{
struct toedev *tdev0, *tdev1 = NULL;
struct toedev *tdev0, *tdev1;
/*
* ignore events on non-offloaded interfaces
*/
tdev0 = TOEDEV(rt0->rt_ifp);
if (rt1)
tdev1 = TOEDEV(rt1->rt_ifp);
tdev1 = TOEDEV(rt1->rt_ifp);
if (tdev0 == NULL && tdev1 == NULL)
return;
/*
@ -914,34 +933,16 @@ cxgb_route_event(void *unused, int event, struct rtentry *rt0,
*/
RT_ADDREF(rt0);
RT_UNLOCK(rt0);
if (rt1) {
RT_ADDREF(rt1);
RT_UNLOCK(rt1);
}
switch (event) {
case RTEVENT_ARP_UPDATE: {
cxgb_neigh_update(rt0, sa);
break;
}
case RTEVENT_REDIRECT_UPDATE: {
cxgb_redirect(rt0, rt1, sa);
cxgb_neigh_update(rt1, sa);
RT_ADDREF(rt1);
RT_UNLOCK(rt1);
break;
}
case RTEVENT_PMTU_UPDATE:
default:
break;
}
cxgb_redirect(rt0, rt1, sa);
cxgb_neigh_update(rt1, NULL, sa);
RT_LOCK(rt0);
RT_REMREF(rt0);
if (rt1) {
RT_LOCK(rt1);
RT_REMREF(rt1);
}
RT_LOCK(rt1);
RT_REMREF(rt1);
}
/*
@ -1048,14 +1049,14 @@ cxgb_ofld_recv(struct t3cdev *dev, struct mbuf **m, int n)
}
void
cxgb_neigh_update(struct rtentry *rt, struct sockaddr *sa)
cxgb_neigh_update(struct rtentry *rt, uint8_t *enaddr, struct sockaddr *sa)
{
if (is_offloading(rt->rt_ifp)) {
struct t3cdev *tdev = T3CDEV(rt->rt_ifp);
PANIC_IF(!tdev);
t3_l2t_update(tdev, rt, sa);
t3_l2t_update(tdev, rt, enaddr, sa);
}
}
@ -1425,7 +1426,10 @@ cxgb_offload_init(void)
t3_register_cpl_handler(CPL_RX_DDP_COMPLETE, do_hwtid_rpl);
t3_register_cpl_handler(CPL_ISCSI_HDR, do_hwtid_rpl);
EVENTHANDLER_REGISTER(route_event, cxgb_route_event, NULL, EVENTHANDLER_PRI_ANY);
EVENTHANDLER_REGISTER(route_arp_update_event, cxgb_arp_update_event,
NULL, EVENTHANDLER_PRI_ANY);
EVENTHANDLER_REGISTER(route_redirect_event, cxgb_redirect_event,
NULL, EVENTHANDLER_PRI_ANY);
#if 0
if (offload_proc_init())

View File

@ -253,7 +253,7 @@ static inline struct toe_tid_entry *lookup_atid(const struct tid_info *t,
void *cxgb_alloc_mem(unsigned long size);
void cxgb_free_mem(void *addr);
void cxgb_neigh_update(struct rtentry *rt, struct sockaddr *sa);
void cxgb_neigh_update(struct rtentry *rt, uint8_t *enaddr, struct sockaddr *sa);
void cxgb_redirect(struct rtentry *old, struct rtentry *new, struct sockaddr *sa);
int process_rx(struct t3cdev *dev, struct mbuf **m, int n);
int attach_t3cdev(struct t3cdev *dev);

View File

@ -36,6 +36,9 @@ $FreeBSD$
#include <sys/endian.h>
#include <sys/bus.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <dev/mii/mii.h>
#ifdef CONFIG_DEFINED
@ -52,18 +55,17 @@ $FreeBSD$
typedef struct adapter adapter_t;
struct sge_rspq;
struct t3_mbuf_hdr {
struct mbuf *mh_head;
struct mbuf *mh_tail;
};
#define PANIC_IF(exp) do { \
if (exp) \
panic("BUG: %s", #exp); \
} while (0)
#define m_get_priority(m) ((uintptr_t)(m)->m_pkthdr.rcvif)
#define m_set_priority(m, pri) ((m)->m_pkthdr.rcvif = (struct ifnet *)((uintptr_t)pri))
#define m_set_sgl(m, sgl) ((m)->m_pkthdr.header = (sgl))
@ -113,6 +115,7 @@ struct t3_mbuf_hdr {
#define CXGB_TX_CLEANUP_THRESHOLD 32
#ifdef DEBUG_PRINT
#define DPRINTF printf
#else
@ -121,19 +124,25 @@ struct t3_mbuf_hdr {
#define TX_MAX_SIZE (1 << 16) /* 64KB */
#define TX_MAX_SEGS 36 /* maximum supported by card */
#define TX_MAX_DESC 4 /* max descriptors per packet */
#define TX_START_MIN_DESC (TX_MAX_DESC << 2)
#if 0
#define TX_START_MAX_DESC (TX_ETH_Q_SIZE >> 2) /* maximum number of descriptors */
#endif
#define TX_START_MAX_DESC (TX_MAX_DESC << 3) /* maximum number of descriptors
* call to start used per */
#define TX_CLEAN_MAX_DESC (TX_MAX_DESC << 4) /* maximum tx descriptors
* to clean per iteration */
#define TX_WR_SIZE_MAX 11*1024 /* the maximum total size of packets aggregated into a single
* TX WR
*/
#define TX_WR_COUNT_MAX 7 /* the maximum total number of packets that can be
* aggregated into a single TX WR
*/
#if defined(__i386__) || defined(__amd64__)
@ -142,7 +151,7 @@ struct t3_mbuf_hdr {
#define wmb() __asm volatile("sfence" ::: "memory")
#define smp_mb() mb()
#define L1_CACHE_BYTES 64
#define L1_CACHE_BYTES 128
static __inline
void prefetch(void *x)
{
@ -167,6 +176,107 @@ extern void kdb_backtrace(void);
#define prefetch(x)
#define L1_CACHE_BYTES 32
#endif
struct buf_ring {
caddr_t *br_ring;
volatile uint32_t br_cons;
volatile uint32_t br_prod;
int br_size;
struct mtx br_lock;
};
struct buf_ring *buf_ring_alloc(int count, int flags);
void buf_ring_free(struct buf_ring *);
static __inline int
buf_ring_count(struct buf_ring *mr)
{
int size = mr->br_size;
int mask = size - 1;
return ((size + mr->br_prod - mr->br_cons) & mask);
}
static __inline int
buf_ring_empty(struct buf_ring *mr)
{
return (mr->br_cons == mr->br_prod);
}
/*
* The producer and consumer are independently locked
* this relies on the consumer providing his own serialization
*
*/
static __inline void *
buf_ring_dequeue(struct buf_ring *mr)
{
int prod, cons, mask;
caddr_t *ring, m;
ring = (caddr_t *)mr->br_ring;
mask = mr->br_size - 1;
cons = mr->br_cons;
prod = mr->br_prod;
m = NULL;
if (cons != prod) {
m = ring[cons];
mr->br_cons = (cons + 1) & mask;
mb();
}
return (m);
}
static __inline int
__buf_ring_enqueue(struct buf_ring *mr, void *m)
{
int prod, cons, mask, err;
cons = mr->br_cons;
prod = mr->br_prod;
mask = mr->br_size - 1;
if (((prod + 1) & mask) != cons) {
mr->br_ring[prod] = m;
mb();
mr->br_prod = (prod + 1) & mask;
err = 0;
} else
err = ENOBUFS;
return (err);
}
static __inline int
buf_ring_enqueue(struct buf_ring *mr, void *m)
{
int err;
mtx_lock(&mr->br_lock);
err = __buf_ring_enqueue(mr, m);
mtx_unlock(&mr->br_lock);
return (err);
}
static __inline void *
buf_ring_peek(struct buf_ring *mr)
{
int prod, cons, mask;
caddr_t *ring, m;
ring = (caddr_t *)mr->br_ring;
mask = mr->br_size - 1;
cons = mr->br_cons;
prod = mr->br_prod;
m = NULL;
if (cons != prod)
m = ring[cons];
return (m);
}
#define DBG_RX (1 << 0)
static const int debug_flags = DBG_RX;
@ -189,15 +299,12 @@ static const int debug_flags = DBG_RX;
#define t3_os_sleep(x) DELAY((x) * 1000)
#define test_and_clear_bit(bit, p) atomic_cmpset_int((p), ((*(p)) | bit), ((*(p)) & ~bit))
#define test_and_clear_bit(bit, p) atomic_cmpset_int((p), ((*(p)) | (1<<bit)), ((*(p)) & ~(1<<bit)))
#define max_t(type, a, b) (type)max((a), (b))
#define net_device ifnet
#define cpu_to_be32 htobe32
/* Standard PHY definitions */
#define BMCR_LOOPBACK BMCR_LOOP
#define BMCR_ISOLATE BMCR_ISO
@ -247,13 +354,13 @@ static const int debug_flags = DBG_RX;
#define swab32(x) bswap32(x)
#define simple_strtoul strtoul
/* More types and endian definitions */
typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
typedef uint64_t u64;
typedef uint8_t __u8;
typedef uint8_t __u8;
typedef uint16_t __u16;
typedef uint32_t __u32;
typedef uint8_t __be8;
@ -261,6 +368,7 @@ typedef uint16_t __be16;
typedef uint32_t __be32;
typedef uint64_t __be64;
#if BYTE_ORDER == BIG_ENDIAN
#define __BIG_ENDIAN_BITFIELD
#elif BYTE_ORDER == LITTLE_ENDIAN

File diff suppressed because it is too large Load Diff

View File

@ -126,11 +126,11 @@ cxgb_cache_pcpu_init(struct cxgb_cache_pcpu *ccp)
if ((err = buf_stack_init(&ccp->ccp_cluster_free, (FL_Q_SIZE >> 1))))
return (err);
if (jumbo_phys_contig)
#if __FreeBSD_version > 800000
ccp->ccp_jumbo_zone = zone_jumbo16;
else
#else
ccp->ccp_jumbo_zone = zone_jumbop;
#endif
return (0);
}

View File

@ -63,6 +63,9 @@ struct m_ext_ {
int ext_type; /* type of external storage */
};
#define MT_IOVEC 9
#define MT_CLIOVEC 10
#define EXT_IOVEC 8
#define EXT_CLIOVEC 9
#define EXT_JMPIOVEC 10

View File

@ -50,7 +50,7 @@ struct t3cdev {
int (*send)(struct t3cdev *dev, struct mbuf *m);
int (*recv)(struct t3cdev *dev, struct mbuf **m, int n);
int (*ctl)(struct t3cdev *dev, unsigned int req, void *data);
void (*arp_update)(struct t3cdev *dev, struct rtentry *neigh, struct sockaddr *sa);
void (*arp_update)(struct t3cdev *dev, struct rtentry *neigh, uint8_t *enaddr, struct sockaddr *sa);
void *priv; /* driver private data */
void *l2opt; /* optional layer 2 data */
void *l3opt; /* optional layer 3 data */

View File

@ -60,7 +60,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_ofld.h>
#include <netinet/tcp_offload.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_syncache.h>
#include <net/route.h>
@ -82,6 +82,7 @@ __FBSDID("$FreeBSD$");
#include <dev/cxgb/ulp/tom/cxgb_tom.h>
#include <dev/cxgb/ulp/tom/cxgb_t3_ddp.h>
#include <dev/cxgb/ulp/tom/cxgb_toepcb.h>
#include <dev/cxgb/ulp/tom/cxgb_tcp.h>
@ -559,7 +560,7 @@ cxgb_toe_disconnect(struct tcpcb *tp)
}
static int
cxgb_toe_abort(struct tcpcb *tp)
cxgb_toe_reset(struct tcpcb *tp)
{
struct toepcb *toep = tp->t_toe;
@ -620,7 +621,7 @@ cxgb_toe_detach(struct tcpcb *tp)
static struct toe_usrreqs cxgb_toe_usrreqs = {
.tu_disconnect = cxgb_toe_disconnect,
.tu_abort = cxgb_toe_abort,
.tu_reset = cxgb_toe_reset,
.tu_send = cxgb_toe_send,
.tu_rcvd = cxgb_toe_rcvd,
.tu_detach = cxgb_toe_detach,
@ -1145,7 +1146,7 @@ fail_act_open(struct toepcb *toep, int errno)
t3_release_offload_resources(toep);
if (tp) {
INP_LOCK_ASSERT(tp->t_inpcb);
tcp_drop(tp, errno);
cxgb_tcp_drop(tp, errno);
}
#ifdef notyet
@ -1957,7 +1958,7 @@ process_close_con_rpl(struct socket *so, struct mbuf *m)
wakeup(&so->so_timeo);
} else if ((so->so_options & SO_LINGER) && so->so_linger == 0 &&
(toep->tp_flags & TP_ABORT_SHUTDOWN) == 0) {
tp = tcp_drop(tp, 0);
tp = cxgb_tcp_drop(tp, 0);
}
break;
@ -2483,7 +2484,7 @@ handle_syncache_event(int event, void *arg)
struct toepcb *toep = arg;
switch (event) {
case SC_ENTRY_PRESENT:
case TOE_SC_ENTRY_PRESENT:
/*
* entry already exists - free toepcb
* and l2t
@ -2491,7 +2492,7 @@ handle_syncache_event(int event, void *arg)
printf("syncache entry present\n");
toepcb_release(toep);
break;
case SC_DROP:
case TOE_SC_DROP:
/*
* The syncache has given up on this entry
* either it timed out, or it was evicted

View File

@ -62,7 +62,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/tcp.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_ofld.h>
#include <netinet/tcp_offload.h>
#include <net/route.h>
#include <dev/cxgb/t3cdev.h>
@ -84,6 +84,7 @@ __FBSDID("$FreeBSD$");
#include <dev/cxgb/ulp/tom/cxgb_tom.h>
#include <dev/cxgb/ulp/tom/cxgb_t3_ddp.h>
#include <dev/cxgb/ulp/tom/cxgb_toepcb.h>
#include <dev/cxgb/ulp/tom/cxgb_tcp.h>
static int (*pru_sosend)(struct socket *so, struct sockaddr *addr,
struct uio *uio, struct mbuf *top, struct mbuf *control,
@ -99,9 +100,6 @@ static int vm_fault_hold_user_pages(vm_offset_t addr, int len, vm_page_t *mp,
int *count, int flags);
#endif
static void vm_fault_unhold_pages(vm_page_t *m, int count);
#define TMP_IOV_MAX 16
void
@ -112,6 +110,15 @@ t3_init_socket_ops(void)
prp = pffindtype(AF_INET, SOCK_STREAM);
pru_sosend = prp->pr_usrreqs->pru_sosend;
pru_soreceive = prp->pr_usrreqs->pru_soreceive;
tcp_usrreqs.pru_connect = cxgb_tcp_usrreqs.pru_connect;
tcp_usrreqs.pru_abort = cxgb_tcp_usrreqs.pru_abort;
tcp_usrreqs.pru_listen = cxgb_tcp_usrreqs.pru_listen;
tcp_usrreqs.pru_send = cxgb_tcp_usrreqs.pru_send;
tcp_usrreqs.pru_abort = cxgb_tcp_usrreqs.pru_abort;
tcp_usrreqs.pru_disconnect = cxgb_tcp_usrreqs.pru_disconnect;
tcp_usrreqs.pru_close = cxgb_tcp_usrreqs.pru_close;
tcp_usrreqs.pru_shutdown = cxgb_tcp_usrreqs.pru_shutdown;
tcp_usrreqs.pru_rcvd = cxgb_tcp_usrreqs.pru_rcvd;
}

View File

@ -57,7 +57,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/tcp_var.h>
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_ofld.h>
#include <netinet/tcp_offload.h>
#include <net/route.h>
#include <dev/cxgb/t3cdev.h>

View File

@ -0,0 +1,44 @@
/*-
* Copyright (c) 2007, Chelsio Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Neither the name of the Chelsio Corporation nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef CXGB_TCP_H_
#define CXGB_TCP_H_
struct tcpcb *cxgb_tcp_drop(struct tcpcb *tp, int errno);
void cxgb_tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip);
struct tcpcb *cxgb_tcp_close(struct tcpcb *tp);
extern struct pr_usrreqs cxgb_tcp_usrreqs;
#ifdef INET6
extern struct pr_usrreqs cxgb_tcp6_usrreqs;
#endif
#include <sys/sysctl.h>
SYSCTL_DECL(_net_inet_tcp_cxgb);
#endif /* CXGB_TCP_H_ */

View File

@ -0,0 +1,694 @@
/*-
* Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_compat.h"
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
#include "opt_mac.h"
#include "opt_tcpdebug.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/callout.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#ifdef INET6
#include <sys/domain.h>
#endif
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/protosw.h>
#include <sys/random.h>
#include <vm/uma.h>
#include <net/route.h>
#include <net/if.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#ifdef INET6
#include <netinet/ip6.h>
#endif
#include <netinet/in_pcb.h>
#ifdef INET6
#include <netinet6/in6_pcb.h>
#endif
#include <netinet/in_var.h>
#include <netinet/ip_var.h>
#ifdef INET6
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
#include <netinet6/nd6.h>
#endif
#include <netinet/ip_icmp.h>
#include <netinet/tcp.h>
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_syncache.h>
#include <netinet/tcp_offload.h>
#ifdef INET6
#include <netinet6/tcp6_var.h>
#endif
#include <netinet/tcpip.h>
#ifdef TCPDEBUG
#include <netinet/tcp_debug.h>
#endif
#include <netinet6/ip6protosw.h>
#ifdef IPSEC
#include <netipsec/ipsec.h>
#include <netipsec/xform.h>
#ifdef INET6
#include <netipsec/ipsec6.h>
#endif
#include <netipsec/key.h>
#endif /*IPSEC*/
#include <machine/in_cksum.h>
#include <sys/md5.h>
#include <security/mac/mac_framework.h>
#include <dev/cxgb/ulp/tom/cxgb_tcp.h>
SYSCTL_NODE(_net_inet_tcp, 0, cxgb, CTLFLAG_RW, 0, "chelsio TOE");
static int tcp_log_debug = 0;
SYSCTL_INT(_net_inet_tcp_cxgb, OID_AUTO, log_debug, CTLFLAG_RW,
&tcp_log_debug, 0, "Log errors caused by incoming TCP segments");
static int tcp_tcbhashsize = 0;
SYSCTL_INT(_net_inet_tcp_cxgb, OID_AUTO, tcbhashsize, CTLFLAG_RDTUN,
&tcp_tcbhashsize, 0, "Size of TCP control-block hashtable");
static int do_tcpdrain = 1;
SYSCTL_INT(_net_inet_tcp_cxgb, OID_AUTO, do_tcpdrain, CTLFLAG_RW,
&do_tcpdrain, 0,
"Enable tcp_drain routine for extra help when low on mbufs");
SYSCTL_INT(_net_inet_tcp_cxgb, OID_AUTO, pcbcount, CTLFLAG_RD,
&tcbinfo.ipi_count, 0, "Number of active PCBs");
static int icmp_may_rst = 1;
SYSCTL_INT(_net_inet_tcp_cxgb, OID_AUTO, icmp_may_rst, CTLFLAG_RW,
&icmp_may_rst, 0,
"Certain ICMP unreachable messages may abort connections in SYN_SENT");
static int tcp_isn_reseed_interval = 0;
SYSCTL_INT(_net_inet_tcp_cxgb, OID_AUTO, isn_reseed_interval, CTLFLAG_RW,
&tcp_isn_reseed_interval, 0, "Seconds between reseeding of ISN secret");
/*
* TCP bandwidth limiting sysctls. Note that the default lower bound of
* 1024 exists only for debugging. A good production default would be
* something like 6100.
*/
SYSCTL_NODE(_net_inet_tcp, OID_AUTO, inflight, CTLFLAG_RW, 0,
"TCP inflight data limiting");
static int tcp_inflight_enable = 1;
SYSCTL_INT(_net_inet_tcp_inflight, OID_AUTO, enable, CTLFLAG_RW,
&tcp_inflight_enable, 0, "Enable automatic TCP inflight data limiting");
static int tcp_inflight_debug = 0;
SYSCTL_INT(_net_inet_tcp_inflight, OID_AUTO, debug, CTLFLAG_RW,
&tcp_inflight_debug, 0, "Debug TCP inflight calculations");
static int tcp_inflight_rttthresh;
SYSCTL_PROC(_net_inet_tcp_inflight, OID_AUTO, rttthresh, CTLTYPE_INT|CTLFLAG_RW,
&tcp_inflight_rttthresh, 0, sysctl_msec_to_ticks, "I",
"RTT threshold below which inflight will deactivate itself");
static int tcp_inflight_min = 6144;
SYSCTL_INT(_net_inet_tcp_inflight, OID_AUTO, min, CTLFLAG_RW,
&tcp_inflight_min, 0, "Lower-bound for TCP inflight window");
static int tcp_inflight_max = TCP_MAXWIN << TCP_MAX_WINSHIFT;
SYSCTL_INT(_net_inet_tcp_inflight, OID_AUTO, max, CTLFLAG_RW,
&tcp_inflight_max, 0, "Upper-bound for TCP inflight window");
static int tcp_inflight_stab = 20;
SYSCTL_INT(_net_inet_tcp_inflight, OID_AUTO, stab, CTLFLAG_RW,
&tcp_inflight_stab, 0, "Inflight Algorithm Stabilization 20 = 2 packets");
uma_zone_t sack_hole_zone;
static struct inpcb *tcp_notify(struct inpcb *, int);
static struct inpcb *cxgb_tcp_drop_syn_sent(struct inpcb *inp, int errno);
/*
* Target size of TCP PCB hash tables. Must be a power of two.
*
* Note that this can be overridden by the kernel environment
* variable net.inet.tcp.tcbhashsize
*/
#ifndef TCBHASHSIZE
#define TCBHASHSIZE 512
#endif
/*
* XXX
* Callouts should be moved into struct tcp directly. They are currently
* separate because the tcpcb structure is exported to userland for sysctl
* parsing purposes, which do not know about callouts.
*/
struct tcpcb_mem {
struct tcpcb tcb;
struct tcp_timer tt;
};
MALLOC_DEFINE(M_TCPLOG, "tcplog", "TCP address and flags print buffers");
/*
* Drop a TCP connection, reporting
* the specified error. If connection is synchronized,
* then send a RST to peer.
*/
struct tcpcb *
cxgb_tcp_drop(struct tcpcb *tp, int errno)
{
struct socket *so = tp->t_inpcb->inp_socket;
INP_INFO_WLOCK_ASSERT(&tcbinfo);
INP_LOCK_ASSERT(tp->t_inpcb);
if (TCPS_HAVERCVDSYN(tp->t_state)) {
tp->t_state = TCPS_CLOSED;
(void) tcp_gen_reset(tp);
tcpstat.tcps_drops++;
} else
tcpstat.tcps_conndrops++;
if (errno == ETIMEDOUT && tp->t_softerror)
errno = tp->t_softerror;
so->so_error = errno;
return (cxgb_tcp_close(tp));
}
/*
* Attempt to close a TCP control block, marking it as dropped, and freeing
* the socket if we hold the only reference.
*/
struct tcpcb *
cxgb_tcp_close(struct tcpcb *tp)
{
struct inpcb *inp = tp->t_inpcb;
struct socket *so;
INP_INFO_WLOCK_ASSERT(&tcbinfo);
INP_LOCK_ASSERT(inp);
if (tp->t_state == TCPS_LISTEN)
tcp_gen_listen_close(tp);
in_pcbdrop(inp);
tcpstat.tcps_closed++;
KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL"));
so = inp->inp_socket;
soisdisconnected(so);
if (inp->inp_vflag & INP_SOCKREF) {
KASSERT(so->so_state & SS_PROTOREF,
("tcp_close: !SS_PROTOREF"));
inp->inp_vflag &= ~INP_SOCKREF;
INP_UNLOCK(inp);
ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_state &= ~SS_PROTOREF;
sofree(so);
return (NULL);
}
return (tp);
}
/*
* Notify a tcp user of an asynchronous error;
* store error as soft error, but wake up user
* (for now, won't do anything until can select for soft error).
*
* Do not wake up user since there currently is no mechanism for
* reporting soft errors (yet - a kqueue filter may be added).
*/
static struct inpcb *
tcp_notify(struct inpcb *inp, int error)
{
struct tcpcb *tp;
INP_INFO_WLOCK_ASSERT(&tcbinfo);
INP_LOCK_ASSERT(inp);
if ((inp->inp_vflag & INP_TIMEWAIT) ||
(inp->inp_vflag & INP_DROPPED))
return (inp);
tp = intotcpcb(inp);
KASSERT(tp != NULL, ("tcp_notify: tp == NULL"));
/*
* Ignore some errors if we are hooked up.
* If connection hasn't completed, has retransmitted several times,
* and receives a second error, give up now. This is better
* than waiting a long time to establish a connection that
* can never complete.
*/
if (tp->t_state == TCPS_ESTABLISHED &&
(error == EHOSTUNREACH || error == ENETUNREACH ||
error == EHOSTDOWN)) {
return (inp);
} else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 &&
tp->t_softerror) {
tp = cxgb_tcp_drop(tp, error);
if (tp != NULL)
return (inp);
else
return (NULL);
} else {
tp->t_softerror = error;
return (inp);
}
#if 0
wakeup( &so->so_timeo);
sorwakeup(so);
sowwakeup(so);
#endif
}
void
cxgb_tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
{
struct ip *ip = vip;
struct tcphdr *th;
struct in_addr faddr;
struct inpcb *inp;
struct tcpcb *tp;
struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify;
struct icmp *icp;
struct in_conninfo inc;
tcp_seq icmp_tcp_seq;
int mtu;
faddr = ((struct sockaddr_in *)sa)->sin_addr;
if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
return;
if (cmd == PRC_MSGSIZE)
notify = tcp_mtudisc;
else if (icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB ||
cmd == PRC_UNREACH_PORT || cmd == PRC_TIMXCEED_INTRANS) && ip)
notify = cxgb_tcp_drop_syn_sent;
/*
* Redirects don't need to be handled up here.
*/
else if (PRC_IS_REDIRECT(cmd))
return;
/*
* Source quench is depreciated.
*/
else if (cmd == PRC_QUENCH)
return;
/*
* Hostdead is ugly because it goes linearly through all PCBs.
* XXX: We never get this from ICMP, otherwise it makes an
* excellent DoS attack on machines with many connections.
*/
else if (cmd == PRC_HOSTDEAD)
ip = NULL;
else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0)
return;
if (ip != NULL) {
icp = (struct icmp *)((caddr_t)ip
- offsetof(struct icmp, icmp_ip));
th = (struct tcphdr *)((caddr_t)ip
+ (ip->ip_hl << 2));
INP_INFO_WLOCK(&tcbinfo);
inp = in_pcblookup_hash(&tcbinfo, faddr, th->th_dport,
ip->ip_src, th->th_sport, 0, NULL);
if (inp != NULL) {
INP_LOCK(inp);
if (!(inp->inp_vflag & INP_TIMEWAIT) &&
!(inp->inp_vflag & INP_DROPPED) &&
!(inp->inp_socket == NULL)) {
icmp_tcp_seq = htonl(th->th_seq);
tp = intotcpcb(inp);
if (SEQ_GEQ(icmp_tcp_seq, tp->snd_una) &&
SEQ_LT(icmp_tcp_seq, tp->snd_max)) {
if (cmd == PRC_MSGSIZE) {
/*
* MTU discovery:
* If we got a needfrag set the MTU
* in the route to the suggested new
* value (if given) and then notify.
*/
bzero(&inc, sizeof(inc));
inc.inc_flags = 0; /* IPv4 */
inc.inc_faddr = faddr;
mtu = ntohs(icp->icmp_nextmtu);
/*
* If no alternative MTU was
* proposed, try the next smaller
* one. ip->ip_len has already
* been swapped in icmp_input().
*/
if (!mtu)
mtu = ip_next_mtu(ip->ip_len,
1);
if (mtu < max(296, (tcp_minmss)
+ sizeof(struct tcpiphdr)))
mtu = 0;
if (!mtu)
mtu = tcp_mssdflt
+ sizeof(struct tcpiphdr);
/*
* Only cache the the MTU if it
* is smaller than the interface
* or route MTU. tcp_mtudisc()
* will do right thing by itself.
*/
if (mtu <= tcp_maxmtu(&inc, NULL))
tcp_hc_updatemtu(&inc, mtu);
}
inp = (*notify)(inp, inetctlerrmap[cmd]);
}
}
if (inp != NULL)
INP_UNLOCK(inp);
} else {
inc.inc_fport = th->th_dport;
inc.inc_lport = th->th_sport;
inc.inc_faddr = faddr;
inc.inc_laddr = ip->ip_src;
#ifdef INET6
inc.inc_isipv6 = 0;
#endif
syncache_unreach(&inc, th);
}
INP_INFO_WUNLOCK(&tcbinfo);
} else
in_pcbnotifyall(&tcbinfo, faddr, inetctlerrmap[cmd], notify);
}
#ifdef INET6
void
tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
{
struct tcphdr th;
struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify;
struct ip6_hdr *ip6;
struct mbuf *m;
struct ip6ctlparam *ip6cp = NULL;
const struct sockaddr_in6 *sa6_src = NULL;
int off;
struct tcp_portonly {
u_int16_t th_sport;
u_int16_t th_dport;
} *thp;
if (sa->sa_family != AF_INET6 ||
sa->sa_len != sizeof(struct sockaddr_in6))
return;
if (cmd == PRC_MSGSIZE)
notify = tcp_mtudisc;
else if (!PRC_IS_REDIRECT(cmd) &&
((unsigned)cmd >= PRC_NCMDS || inet6ctlerrmap[cmd] == 0))
return;
/* Source quench is depreciated. */
else if (cmd == PRC_QUENCH)
return;
/* if the parameter is from icmp6, decode it. */
if (d != NULL) {
ip6cp = (struct ip6ctlparam *)d;
m = ip6cp->ip6c_m;
ip6 = ip6cp->ip6c_ip6;
off = ip6cp->ip6c_off;
sa6_src = ip6cp->ip6c_src;
} else {
m = NULL;
ip6 = NULL;
off = 0; /* fool gcc */
sa6_src = &sa6_any;
}
if (ip6 != NULL) {
struct in_conninfo inc;
/*
* XXX: We assume that when IPV6 is non NULL,
* M and OFF are valid.
*/
/* check if we can safely examine src and dst ports */
if (m->m_pkthdr.len < off + sizeof(*thp))
return;
bzero(&th, sizeof(th));
m_copydata(m, off, sizeof(*thp), (caddr_t)&th);
in6_pcbnotify(&tcbinfo, sa, th.th_dport,
(struct sockaddr *)ip6cp->ip6c_src,
th.th_sport, cmd, NULL, notify);
inc.inc_fport = th.th_dport;
inc.inc_lport = th.th_sport;
inc.inc6_faddr = ((struct sockaddr_in6 *)sa)->sin6_addr;
inc.inc6_laddr = ip6cp->ip6c_src->sin6_addr;
inc.inc_isipv6 = 1;
INP_INFO_WLOCK(&tcbinfo);
syncache_unreach(&inc, &th);
INP_INFO_WUNLOCK(&tcbinfo);
} else
in6_pcbnotify(&tcbinfo, sa, 0, (const struct sockaddr *)sa6_src,
0, cmd, NULL, notify);
}
#endif /* INET6 */
/*
* Following is where TCP initial sequence number generation occurs.
*
* There are two places where we must use initial sequence numbers:
* 1. In SYN-ACK packets.
* 2. In SYN packets.
*
* All ISNs for SYN-ACK packets are generated by the syncache. See
* tcp_syncache.c for details.
*
* The ISNs in SYN packets must be monotonic; TIME_WAIT recycling
* depends on this property. In addition, these ISNs should be
* unguessable so as to prevent connection hijacking. To satisfy
* the requirements of this situation, the algorithm outlined in
* RFC 1948 is used, with only small modifications.
*
* Implementation details:
*
* Time is based off the system timer, and is corrected so that it
* increases by one megabyte per second. This allows for proper
* recycling on high speed LANs while still leaving over an hour
* before rollover.
*
* As reading the *exact* system time is too expensive to be done
* whenever setting up a TCP connection, we increment the time
* offset in two ways. First, a small random positive increment
* is added to isn_offset for each connection that is set up.
* Second, the function tcp_isn_tick fires once per clock tick
* and increments isn_offset as necessary so that sequence numbers
* are incremented at approximately ISN_BYTES_PER_SECOND. The
* random positive increments serve only to ensure that the same
* exact sequence number is never sent out twice (as could otherwise
* happen when a port is recycled in less than the system tick
* interval.)
*
* net.inet.tcp.isn_reseed_interval controls the number of seconds
* between seeding of isn_secret. This is normally set to zero,
* as reseeding should not be necessary.
*
* Locking of the global variables isn_secret, isn_last_reseed, isn_offset,
* isn_offset_old, and isn_ctx is performed using the TCP pcbinfo lock. In
* general, this means holding an exclusive (write) lock.
*/
#define ISN_BYTES_PER_SECOND 1048576
#define ISN_STATIC_INCREMENT 4096
#define ISN_RANDOM_INCREMENT (4096 - 1)
/*
* When a specific ICMP unreachable message is received and the
* connection state is SYN-SENT, drop the connection. This behavior
* is controlled by the icmp_may_rst sysctl.
*/
static struct inpcb *
cxgb_tcp_drop_syn_sent(struct inpcb *inp, int errno)
{
struct tcpcb *tp;
INP_INFO_WLOCK_ASSERT(&tcbinfo);
INP_LOCK_ASSERT(inp);
if ((inp->inp_vflag & INP_TIMEWAIT) ||
(inp->inp_vflag & INP_DROPPED))
return (inp);
tp = intotcpcb(inp);
if (tp->t_state != TCPS_SYN_SENT)
return (inp);
tp = cxgb_tcp_drop(tp, errno);
if (tp != NULL)
return (inp);
else
return (NULL);
}
static int
cxgb_sysctl_drop(SYSCTL_HANDLER_ARGS)
{
/* addrs[0] is a foreign socket, addrs[1] is a local one. */
struct sockaddr_storage addrs[2];
struct inpcb *inp;
struct tcpcb *tp;
struct tcptw *tw;
struct sockaddr_in *fin, *lin;
#ifdef INET6
struct sockaddr_in6 *fin6, *lin6;
struct in6_addr f6, l6;
#endif
int error;
inp = NULL;
fin = lin = NULL;
#ifdef INET6
fin6 = lin6 = NULL;
#endif
error = 0;
if (req->oldptr != NULL || req->oldlen != 0)
return (EINVAL);
if (req->newptr == NULL)
return (EPERM);
if (req->newlen < sizeof(addrs))
return (ENOMEM);
error = SYSCTL_IN(req, &addrs, sizeof(addrs));
if (error)
return (error);
switch (addrs[0].ss_family) {
#ifdef INET6
case AF_INET6:
fin6 = (struct sockaddr_in6 *)&addrs[0];
lin6 = (struct sockaddr_in6 *)&addrs[1];
if (fin6->sin6_len != sizeof(struct sockaddr_in6) ||
lin6->sin6_len != sizeof(struct sockaddr_in6))
return (EINVAL);
if (IN6_IS_ADDR_V4MAPPED(&fin6->sin6_addr)) {
if (!IN6_IS_ADDR_V4MAPPED(&lin6->sin6_addr))
return (EINVAL);
in6_sin6_2_sin_in_sock((struct sockaddr *)&addrs[0]);
in6_sin6_2_sin_in_sock((struct sockaddr *)&addrs[1]);
fin = (struct sockaddr_in *)&addrs[0];
lin = (struct sockaddr_in *)&addrs[1];
break;
}
error = sa6_embedscope(fin6, ip6_use_defzone);
if (error)
return (error);
error = sa6_embedscope(lin6, ip6_use_defzone);
if (error)
return (error);
break;
#endif
case AF_INET:
fin = (struct sockaddr_in *)&addrs[0];
lin = (struct sockaddr_in *)&addrs[1];
if (fin->sin_len != sizeof(struct sockaddr_in) ||
lin->sin_len != sizeof(struct sockaddr_in))
return (EINVAL);
break;
default:
return (EINVAL);
}
INP_INFO_WLOCK(&tcbinfo);
switch (addrs[0].ss_family) {
#ifdef INET6
case AF_INET6:
inp = in6_pcblookup_hash(&tcbinfo, &f6, fin6->sin6_port,
&l6, lin6->sin6_port, 0, NULL);
break;
#endif
case AF_INET:
inp = in_pcblookup_hash(&tcbinfo, fin->sin_addr, fin->sin_port,
lin->sin_addr, lin->sin_port, 0, NULL);
break;
}
if (inp != NULL) {
INP_LOCK(inp);
if (inp->inp_vflag & INP_TIMEWAIT) {
/*
* XXXRW: There currently exists a state where an
* inpcb is present, but its timewait state has been
* discarded. For now, don't allow dropping of this
* type of inpcb.
*/
tw = intotw(inp);
if (tw != NULL)
tcp_twclose(tw, 0);
else
INP_UNLOCK(inp);
} else if (!(inp->inp_vflag & INP_DROPPED) &&
!(inp->inp_socket->so_options & SO_ACCEPTCONN)) {
tp = intotcpcb(inp);
tp = cxgb_tcp_drop(tp, ECONNABORTED);
if (tp != NULL)
INP_UNLOCK(inp);
} else
INP_UNLOCK(inp);
} else
error = ESRCH;
INP_INFO_WUNLOCK(&tcbinfo);
return (error);
}
SYSCTL_PROC(_net_inet_tcp_cxgb, TCPCTL_DROP, drop,
CTLTYPE_STRUCT|CTLFLAG_WR|CTLFLAG_SKIP, NULL,
0, cxgb_sysctl_drop, "", "Drop TCP connection");

File diff suppressed because it is too large Load Diff

View File

@ -60,7 +60,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/in_pcb.h>
#include <netinet/tcp.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_ofld.h>
#include <netinet/tcp_offload.h>
#include <netinet/tcp_fsm.h>
#include <net/route.h>
@ -77,6 +77,8 @@ __FBSDID("$FreeBSD$");
#include <dev/cxgb/ulp/tom/cxgb_defs.h>
#include <dev/cxgb/ulp/tom/cxgb_t3_ddp.h>
#include <dev/cxgb/ulp/tom/cxgb_toepcb.h>
#include <dev/cxgb/ulp/tom/cxgb_tcp.h>
static int activated = 1;
TUNABLE_INT("hw.t3toe.activated", &activated);
@ -177,6 +179,8 @@ toepcb_release(struct toepcb *toep)
static void
t3cdev_add(struct tom_data *t)
{
printf("t3cdev_add\n");
mtx_lock(&cxgb_list_lock);
TAILQ_INSERT_TAIL(&cxgb_list, t, entry);
mtx_unlock(&cxgb_list_lock);
@ -187,7 +191,8 @@ t3cdev_add(struct tom_data *t)
* initialize its cpl_handlers
* and register it as a T3C client
*/
static void t3c_tom_add(struct t3cdev *cdev)
static void
t3c_tom_add(struct t3cdev *cdev)
{
int i;
unsigned int wr_len;
@ -195,9 +200,12 @@ static void t3c_tom_add(struct t3cdev *cdev)
struct toedev *tdev;
struct adap_ports *port_info;
printf("%s called\n", __FUNCTION__);
t = malloc(sizeof(*t), M_CXGB, M_NOWAIT|M_ZERO);
if (!t)
if (t == NULL)
return;
if (cdev->ctl(cdev, GET_WR_LEN, &wr_len) < 0)
@ -226,11 +234,15 @@ static void t3c_tom_add(struct t3cdev *cdev)
}
TOM_DATA(tdev) = t;
printf("nports=%d\n", port_info->nports);
for (i = 0; i < port_info->nports; i++) {
struct ifnet *ifp = port_info->lldevs[i];
TOEDEV(ifp) = tdev;
printf("enabling toe on %p\n", ifp);
ifp->if_capabilities |= IFCAP_TOE;
ifp->if_capabilities |= IFCAP_TOE4;
ifp->if_capenable |= IFCAP_TOE4;
}
t->ports = port_info;
@ -242,8 +254,10 @@ static void t3c_tom_add(struct t3cdev *cdev)
return;
out_free_all:
printf("out_free_all fail\n");
free(port_info, M_CXGB);
out_free_tom:
printf("out_free_tom fail\n");
free(t, M_CXGB);
return;
}
@ -293,8 +307,8 @@ can_offload(struct toedev *dev, struct socket *so)
atomic_load_acq_int(&t->tids_in_use) + t->atids_in_use < tomd->conf.max_conn);
}
static int tom_ctl(struct toedev *dev, unsigned int req, void *data)
static int
tom_ctl(struct toedev *dev, unsigned int req, void *data)
{
struct tom_data *t = TOM_DATA(dev);
struct t3cdev *cdev = t->cdev;
@ -377,32 +391,33 @@ t3_toe_attach(struct toedev *dev, const struct offload_id *entry)
}
static void
cxgb_toe_listen(void *unused, int event, struct tcpcb *tp)
cxgb_toe_listen_start(void *unused, struct tcpcb *tp)
{
struct socket *so = tp->t_inpcb->inp_socket;
struct tom_data *p;
switch (event) {
case OFLD_LISTEN_OPEN:
case OFLD_LISTEN_CLOSE:
mtx_lock(&cxgb_list_lock);
TAILQ_FOREACH(p, &cxgb_list, entry) {
if (event == OFLD_LISTEN_OPEN)
t3_listen_start(&p->tdev, so, p->cdev);
else if (tp->t_state == TCPS_LISTEN) {
printf("stopping listen on port=%d\n",
ntohs(tp->t_inpcb->inp_lport));
t3_listen_stop(&p->tdev, so, p->cdev);
}
}
mtx_unlock(&cxgb_list_lock);
break;
default:
log(LOG_ERR, "unrecognized listen event %d\n", event);
break;
mtx_lock(&cxgb_list_lock);
TAILQ_FOREACH(p, &cxgb_list, entry) {
t3_listen_start(&p->tdev, so, p->cdev);
}
mtx_unlock(&cxgb_list_lock);
}
static void
cxgb_toe_listen_stop(void *unused, struct tcpcb *tp)
{
struct socket *so = tp->t_inpcb->inp_socket;
struct tom_data *p;
mtx_lock(&cxgb_list_lock);
TAILQ_FOREACH(p, &cxgb_list, entry) {
if (tp->t_state == TCPS_LISTEN) {
printf("stopping listen on port=%d\n",
ntohs(tp->t_inpcb->inp_lport));
t3_listen_stop(&p->tdev, so, p->cdev);
}
}
mtx_unlock(&cxgb_list_lock);
}
static void
@ -416,7 +431,7 @@ cxgb_register_listeners(void)
tp = intotcpcb(inp);
if (tp->t_state == TCPS_LISTEN)
cxgb_toe_listen(NULL, OFLD_LISTEN_OPEN, tp);
cxgb_toe_listen_start(NULL, tp);
}
INP_INFO_RUNLOCK(&tcbinfo);
}
@ -450,12 +465,19 @@ t3_tom_init(void)
"Unable to register Chelsio T3 TCP offload module.\n");
return -1;
}
INP_INFO_WLOCK(&tcbinfo);
INP_INFO_WUNLOCK(&tcbinfo);
mtx_init(&cxgb_list_lock, "cxgb tom list", NULL, MTX_DEF);
listen_tag = EVENTHANDLER_REGISTER(ofld_listen, cxgb_toe_listen, NULL, EVENTHANDLER_PRI_ANY);
listen_tag = EVENTHANDLER_REGISTER(tcp_offload_listen_start,
cxgb_toe_listen_start, NULL, EVENTHANDLER_PRI_ANY);
listen_tag = EVENTHANDLER_REGISTER(tcp_offload_listen_stop,
cxgb_toe_listen_stop, NULL, EVENTHANDLER_PRI_ANY);
TAILQ_INIT(&cxgb_list);
/* Register to offloading devices */
printf("setting add to %p\n", t3c_tom_add);
t3c_tom_client.add = t3c_tom_add;
cxgb_register_client(&t3c_tom_client);
cxgb_register_listeners();

View File

@ -1,7 +1,7 @@
# $FreeBSD$
SUBDIR= cxgb
SUBDIR+= toecore
#SUBDIR+= tom
SUBDIR+= tom
#SUBDIR+= iw_cxgb
.include <bsd.subdir.mk>

View File

@ -8,11 +8,11 @@ SRCS= cxgb_mc5.c cxgb_vsc8211.c cxgb_ael1002.c cxgb_mv88e1xxx.c
SRCS+= cxgb_xgmac.c cxgb_vsc7323.c cxgb_t3_hw.c cxgb_main.c
SRCS+= cxgb_sge.c cxgb_lro.c cxgb_offload.c cxgb_l2t.c
SRCS+= device_if.h bus_if.h pci_if.h opt_zero.h opt_sched.h
SRCS+= uipc_mvec.c
#SRCS+= cxgb_multiq.c cxgb_support.c
SRCS+= uipc_mvec.c cxgb_support.c
#SRCS+= cxgb_multiq.c
CFLAGS+= -DCONFIG_CHELSIO_T3_CORE -g -DCONFIG_DEFINED -DDEFAULT_JUMBO -I${CXGB} -DSMP
CFLAGS+= -DDISABLE_MBUF_IOVEC
#CFLAGS+= -DDISABLE_MBUF_IOVEC
#CFLAGS+= -DIFNET_MULTIQUEUE
#CFLAGS+= -DINVARIANT_SUPPORT -DINVARIANTS
#CFLAGS+= -DWITNESS

View File

@ -4,5 +4,9 @@ TOM = ${.CURDIR}/../../../dev/cxgb/ulp/tom
KMOD= tom
SRCS= cxgb_tom.c cxgb_cpl_io.c cxgb_listen.c cxgb_tom_sysctl.c cxgb_cpl_socket.c
SRCS+= device_if.h bus_if.h pci_if.h
.include <bsd.kmod.mk>
SRCS+= cxgb_tcp_subr.c cxgb_tcp_usrreq.c
SRCS+= opt_compat.h opt_inet.h opt_inet6.h opt_ipsec.h opt_mac.h opt_tcpdebug.h opt_ddb.h
SRCS+= device_if.h bus_if.h pci_if.h
#CFLAGS+= -DDEBUG_PRINT -DDEBUG
.include <bsd.kmod.mk>