Add driver for TCP offload

Sponsored by: Chelsio Inc.
This commit is contained in:
Kip Macy 2007-12-16 05:27:26 +00:00
parent 501e15907b
commit 280b95e8b5
9 changed files with 5422 additions and 0 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,560 @@
/**************************************************************************
Copyright (c) 2007, Chelsio Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Neither the name of the Chelsio Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***************************************************************************/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/types.h>
#include <sys/fcntl.h>
#include <sys/kernel.h>
#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/mbuf.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/socket.h>
#include <sys/syslog.h>
#include <sys/socketvar.h>
#include <sys/uio.h>
#include <machine/bus.h>
#include <net/if.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/in_pcb.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <dev/cxgb/cxgb_osdep.h>
#include <dev/cxgb/sys/mbufq.h>
#include <netinet/tcp.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_ofld.h>
#include <net/route.h>
#include <dev/cxgb/t3cdev.h>
#include <dev/cxgb/common/cxgb_firmware_exports.h>
#include <dev/cxgb/common/cxgb_t3_cpl.h>
#include <dev/cxgb/common/cxgb_tcb.h>
#include <dev/cxgb/common/cxgb_ctl_defs.h>
#include <dev/cxgb/cxgb_l2t.h>
#include <dev/cxgb/cxgb_offload.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
#include <vm/vm_map.h>
#include <vm/vm_extern.h>
#include <vm/pmap.h>
#include <dev/cxgb/sys/mvec.h>
#include <dev/cxgb/ulp/toecore/cxgb_toedev.h>
#include <dev/cxgb/ulp/tom/cxgb_defs.h>
#include <dev/cxgb/ulp/tom/cxgb_tom.h>
#include <dev/cxgb/ulp/tom/cxgb_t3_ddp.h>
#include <dev/cxgb/ulp/tom/cxgb_toepcb.h>
static int (*pru_sosend)(struct socket *so, struct sockaddr *addr,
struct uio *uio, struct mbuf *top, struct mbuf *control,
int flags, struct thread *td);
static int (*pru_soreceive)(struct socket *so, struct sockaddr **paddr,
struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
int *flagsp);
#ifdef notyet
#define VM_HOLD_WRITEABLE 0x1
static int vm_fault_hold_user_pages(vm_offset_t addr, int len, vm_page_t *mp,
int *count, int flags);
#endif
static void vm_fault_unhold_pages(vm_page_t *m, int count);
#define TMP_IOV_MAX 16
void
t3_init_socket_ops(void)
{
struct protosw *prp;
prp = pffindtype(AF_INET, SOCK_STREAM);
pru_sosend = prp->pr_usrreqs->pru_sosend;
pru_soreceive = prp->pr_usrreqs->pru_soreceive;
}
struct cxgb_dma_info {
size_t cdi_mapped;
int cdi_nsegs;
bus_dma_segment_t *cdi_segs;
};
static void
cxgb_dma_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
bus_size_t mapsize, int error)
{
struct cxgb_dma_info *cdi = arg;
cdi->cdi_mapped = mapsize;
cdi->cdi_nsegs = nsegs;
cdi->cdi_segs = segs;
}
static void
iov_adj(struct iovec **iov, int *iovcnt, size_t count)
{
struct iovec *iovtmp;
int iovcnttmp;
caddr_t ptmp;
if (count > 0) {
iovtmp = *iov;
iovcnttmp = *iovcnt;
while (count > 0) {
if (count < iovtmp->iov_len) {
ptmp = iovtmp->iov_base;
ptmp += count;
iovtmp->iov_base = ptmp;
iovtmp->iov_len -= count;
break;
} else
count -= iovtmp->iov_len;
iovtmp++;
iovcnttmp--;
}
*iov = iovtmp;
*iovcnt = iovcnttmp;
} else if (count < 0) {
iovtmp = &(*iov)[*iovcnt - 1];
iovcnttmp = *iovcnt;
while (count < 0) {
if (-count < iovtmp->iov_len) {
iovtmp->iov_len += count;
break;
} else
count += iovtmp->iov_len;
iovtmp--;
iovcnttmp--;
}
*iovcnt = iovcnttmp;
}
}
static void
cxgb_zero_copy_free(void *cl, void *arg) {}
static int
cxgb_hold_iovec_pages(struct uio *uio, vm_page_t *m, int *held, int flags)
{
return (EINVAL);
}
static void
cxgb_wait_dma_completion(struct toepcb *tp)
{
}
static int
cxgb_vm_page_to_miov(struct toepcb *toep, struct uio *uio, struct mbuf **m)
{
int i, seg_count, err, type;
struct mbuf *m0;
struct cxgb_dma_info cdi;
struct mbuf_vec *mv;
struct mbuf_iovec *mi;
bus_dma_segment_t *segs;
err = bus_dmamap_load_uio(toep->tp_tx_dmat, toep->tp_dmamap, uio,
cxgb_dma_callback, &cdi, 0);
if (err)
return (err);
seg_count = cdi.cdi_nsegs;
if ((m0 = mcl_alloc(seg_count, &type)) == NULL) {
bus_dmamap_unload(toep->tp_tx_dmat, toep->tp_dmamap);
return (ENOMEM);
}
segs = cdi.cdi_segs;
m0->m_type = type;
m0->m_flags = (M_EXT|M_NOFREE);
m0->m_ext.ext_type = EXT_EXTREF;
m0->m_ext.ext_free = cxgb_zero_copy_free;
m0->m_ext.ext_args = NULL;
mv = mtomv(m0);
mv->mv_count = seg_count;
mv->mv_first = 0;
for (i = 0, mi = mv->mv_vec; i < seg_count; mi++, segs++, i++)
mi_collapse_sge(mi, segs);
*m = m0;
if (cdi.cdi_mapped < uio->uio_resid) {
uio->uio_resid -= cdi.cdi_mapped;
} else
uio->uio_resid = 0;
return (0);
}
static int
t3_sosend(struct socket *so, struct uio *uio)
{
int rv, count, hold_resid, sent, iovcnt;
struct iovec iovtmp[TMP_IOV_MAX], *iovtmpp, *iov;
struct tcpcb *tp = sototcpcb(so);
struct toepcb *toep = tp->t_toe;
struct mbuf *m;
struct uio uiotmp;
/*
* Events requiring iteration:
* - number of pages exceeds max hold pages for process or system
* - number of pages exceeds maximum sg entries for a single WR
*
* We're limited to holding 128 pages at once - and we're limited to
* 34 SG entries per work request, but each SG entry can be any number
* of contiguous pages
*
*/
uiotmp = *uio;
iovcnt = uio->uio_iovcnt;
iov = uio->uio_iov;
sent = 0;
sendmore:
/*
* Make sure we don't exceed the socket buffer
*/
count = min(toep->tp_page_count, (sbspace(&so->so_snd) >> PAGE_SHIFT) + 2*PAGE_SIZE);
rv = cxgb_hold_iovec_pages(&uiotmp, toep->tp_pages, &count, 0);
hold_resid = uiotmp.uio_resid;
if (rv)
return (rv);
/*
* Bump past sent and shave off the unheld amount
*/
if (hold_resid > 0) {
iovtmpp = iovtmp;
memcpy(iovtmp, iov, iovcnt*sizeof(*iov));
if (sent)
iov_adj(&iovtmpp, &iovcnt, sent);
iov_adj(&iovtmpp, &iovcnt, -hold_resid);
uiotmp.uio_iov = iovtmpp;
uiotmp.uio_iovcnt = iovcnt;
}
uiotmp.uio_resid = uio->uio_resid - hold_resid;
/*
* Push off all held pages
*
*/
while (uiotmp.uio_resid > 0) {
rv = cxgb_vm_page_to_miov(toep, &uiotmp, &m);
if (rv) {
vm_fault_unhold_pages(toep->tp_pages, count);
return (rv);
}
uio->uio_resid -= m->m_pkthdr.len;
sent += m->m_pkthdr.len;
sbappend_locked(&so->so_snd, m);
t3_push_frames(so, TRUE);
iov_adj(&uiotmp.uio_iov, &iovcnt, uiotmp.uio_resid);
}
/*
* Wait for pending I/O to be DMA'd to the card
*
*/
cxgb_wait_dma_completion(toep);
vm_fault_unhold_pages(toep->tp_pages, count);
/*
* If there is more data to send adjust local copy of iov
* to point to teh start
*/
if (hold_resid) {
iovtmpp = iovtmp;
memcpy(iovtmp, iov, iovcnt*sizeof(*iov));
iov_adj(&iovtmpp, &iovcnt, sent);
uiotmp = *uio;
uiotmp.uio_iov = iovtmpp;
uiotmp.uio_iovcnt = iovcnt;
goto sendmore;
}
return (0);
}
static int
cxgb_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
{
struct tcpcb *tp = sototcpcb(so);
struct toedev *tdev;
int zcopy_thres, zcopy_enabled, rv;
/*
* In order to use DMA direct from userspace the following
* conditions must be met:
* - the connection is currently offloaded
* - ddp is enabled
* - the number of bytes to be transferred exceeds the threshold
* - the number of bytes currently in flight won't exceed the in-flight
* threshold XXX TODO
* - vm_fault_hold_user_pages succeeds
* - blocking socket XXX for now
*
*/
if (tp->t_flags & TF_TOE) {
tdev = TOE_DEV(so);
zcopy_thres = TOM_TUNABLE(tdev, zcopy_sosend_partial_thres);
zcopy_enabled = TOM_TUNABLE(tdev, zcopy_sosend_enabled);
if ((uio->uio_resid > zcopy_thres) &&
(uio->uio_iovcnt < TMP_IOV_MAX) && ((so->so_state & SS_NBIO) == 0)
&& zcopy_enabled) {
rv = t3_sosend(so, uio);
if (rv != EAGAIN)
return (rv);
}
}
return pru_sosend(so, addr, uio, top, control, flags, td);
}
static int
t3_soreceive(struct socket *so, struct uio *uio)
{
#ifdef notyet
int i, rv, count, hold_resid, sent, iovcnt;
struct iovec iovtmp[TMP_IOV_MAX], *iovtmpp, *iov;
struct tcpcb *tp = sototcpcb(so);
struct toepcb *toep = tp->t_toe;
struct mbuf *m;
struct uio uiotmp;
/*
* Events requiring iteration:
* - number of pages exceeds max hold pages for process or system
* - number of pages exceeds maximum sg entries for a single WR
*
* We're limited to holding 128 pages at once - and we're limited to
* 34 SG entries per work request, but each SG entry can be any number
* of contiguous pages
*
*/
uiotmp = *uio;
iovcnt = uio->uio_iovcnt;
iov = uio->uio_iov;
sent = 0;
re;
#endif
return (0);
}
static int
cxgb_soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
{
struct toedev *tdev;
int rv, zcopy_thres, zcopy_enabled;
struct tcpcb *tp = sototcpcb(so);
/*
* In order to use DMA direct from userspace the following
* conditions must be met:
* - the connection is currently offloaded
* - ddp is enabled
* - the number of bytes to be transferred exceeds the threshold
* - the number of bytes currently in flight won't exceed the in-flight
* threshold XXX TODO
* - vm_fault_hold_user_pages succeeds
* - blocking socket XXX for now
* - iovcnt is 1
*
*/
if (tp->t_flags & TF_TOE) {
tdev = TOE_DEV(so);
zcopy_thres = TOM_TUNABLE(tdev, ddp_thres);
zcopy_enabled = TOM_TUNABLE(tdev, ddp);
if ((uio->uio_resid > zcopy_thres) &&
(uio->uio_iovcnt == 1) && ((so->so_state & SS_NBIO) == 0)
&& zcopy_enabled) {
rv = t3_soreceive(so, uio);
if (rv != EAGAIN)
return (rv);
}
}
return pru_soreceive(so, psa, uio, mp0, controlp, flagsp);
}
void
t3_install_socket_ops(struct socket *so)
{
so->so_proto->pr_usrreqs->pru_sosend = cxgb_sosend;
so->so_proto->pr_usrreqs->pru_soreceive = cxgb_soreceive;
}
/*
* This routine takes a user address range and does the following:
* - validate that the user has access to those pages (flags indicates read or write) - if not fail
* - validate that count is enough to hold range number of pages - if not fail
* - fault in any non-resident pages
* - if the user is doing a read force a write fault for any COWed pages
* - if the user is doing a read mark all pages as dirty
* - hold all pages
* - return number of pages in count
*/
#ifdef notyet
static int
vm_fault_hold_user_pages(vm_offset_t addr, int len, vm_page_t *mp, int *count, int flags)
{
vm_offset_t start, va;
vm_paddr_t pa;
int pageslen, faults, rv;
struct thread *td;
vm_map_t map;
pmap_t pmap;
vm_page_t m, *pages;
vm_prot_t prot;
start = addr & ~PAGE_MASK;
pageslen = roundup2(addr + len, PAGE_SIZE);
if (*count < (pageslen >> PAGE_SHIFT))
return (EFBIG);
*count = pageslen >> PAGE_SHIFT;
/*
* Check that virtual address range is legal
* This check is somewhat bogus as on some architectures kernel
* and user do not share VA - however, it appears that all FreeBSD
* architectures define it
*/
if (addr + len > VM_MAXUSER_ADDRESS)
return (EFAULT);
td = curthread;
map = &td->td_proc->p_vmspace->vm_map;
pmap = &td->td_proc->p_vmspace->vm_pmap;
pages = mp;
prot = (flags & VM_HOLD_WRITEABLE) ? VM_PROT_WRITE : VM_PROT_READ;
bzero(pages, sizeof(vm_page_t *) * (*count));
retry:
/*
* First optimistically assume that all pages are resident (and R/W if for write)
* if so just mark pages as held (and dirty if for write) and return
*/
vm_page_lock_queues();
for (pages = mp, faults = 0, va = start; va < pageslen; va += PAGE_SIZE, pages++) {
/*
* Assure that we only hold the page once
*/
if (*pages == NULL) {
/*
* page queue mutex is recursable so this is OK
* it would be really nice if we had an unlocked version of this so
* we were only acquiring the pmap lock 1 time as opposed to potentially
* many dozens of times
*/
m = pmap_extract_and_hold(pmap, va, prot);
if (m == NULL) {
faults++;
continue;
}
*pages = m;
if (flags & VM_HOLD_WRITEABLE)
vm_page_dirty(m);
}
}
vm_page_unlock_queues();
if (faults == 0)
return (0);
/*
* Pages either have insufficient permissions or are not present
* trigger a fault where neccessary
*
*/
for (va = start; va < pageslen; va += PAGE_SIZE) {
m = NULL;
pa = pmap_extract(pmap, va);
rv = 0;
if (pa)
m = PHYS_TO_VM_PAGE(pa);
if (flags & VM_HOLD_WRITEABLE) {
if (m == NULL || (m->flags & PG_WRITEABLE) == 0)
rv = vm_fault(map, va, VM_PROT_WRITE, VM_FAULT_DIRTY);
} else if (m == NULL)
rv = vm_fault(map, va, VM_PROT_READ, VM_FAULT_NORMAL);
if (rv)
goto error;
}
goto retry;
error:
vm_page_lock_queues();
for (pages = mp, va = start; va < pageslen; va += PAGE_SIZE, pages++)
if (*pages)
vm_page_unhold(*pages);
vm_page_unlock_queues();
return (EFAULT);
}
#endif
static void
vm_fault_unhold_pages(vm_page_t *mp, int count)
{
KASSERT(count >= 0, ("negative count %d", count));
vm_page_lock_queues();
while (count--) {
vm_page_unhold(*mp);
mp++;
}
vm_page_unlock_queues();
}

View File

@ -0,0 +1,79 @@
/**************************************************************************
Copyright (c) 2007, Chelsio Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Neither the name of the Chelsio Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
$FreeBSD$
***************************************************************************/
#ifndef CXGB_DEFS_H_
#define CXGB_DEFS_H_
#define VALIDATE_TID 0
#define TOEPCB(so) ((struct toepcb *)(sototcpcb((so))->t_toe))
#define TOE_DEV(so) (TOEPCB((so))->tp_toedev)
#define toeptoso(toep) ((toep)->tp_tp->t_inpcb->inp_socket)
#define sototoep(so) (sototcpcb((so))->t_toe)
struct listen_ctx;
typedef void (*defer_handler_t)(struct toedev *dev, struct mbuf *m);
void t3tom_register_cpl_handler(unsigned int opcode, cxgb_cpl_handler_func h);
void t3_listen_start(struct toedev *dev, struct socket *so, struct t3cdev *cdev);
void t3_listen_stop(struct toedev *dev, struct socket *so, struct t3cdev *cdev);
int t3_push_frames(struct socket *so, int req_completion);
int t3_connect(struct toedev *tdev, struct socket *so, struct rtentry *rt,
struct sockaddr *nam);
void t3_init_listen_cpl_handlers(void);
int t3_init_cpl_io(void);
void t3_init_wr_tab(unsigned int wr_len);
uint32_t t3_send_rx_credits(struct tcpcb *tp, uint32_t credits, uint32_t dack, int nofail);
void t3_cleanup_rbuf(struct tcpcb *tp);
void t3_init_socket_ops(void);
void t3_install_socket_ops(struct socket *so);
void t3_disconnect_acceptq(struct socket *listen_so);
void t3_reset_synq(struct listen_ctx *ctx);
void t3_defer_reply(struct mbuf *m, struct toedev *dev, defer_handler_t handler);
struct toepcb *toepcb_alloc(void);
void toepcb_hold(struct toepcb *);
void toepcb_release(struct toepcb *);
void toepcb_init(struct toepcb *);
void t3_set_rcv_coalesce_enable(struct socket *so, int on_off);
void t3_set_keepalive(struct socket *so, int on_off);
void t3_set_ddp_tag(struct socket *so, int buf_idx, unsigned int tag);
void t3_set_ddp_buf(struct socket *so, int buf_idx, unsigned int offset,
unsigned int len);
int t3_get_tcb(struct socket *so);
#endif

View File

@ -0,0 +1,345 @@
/**************************************************************************
Copyright (c) 2007, Chelsio Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Neither the name of the Chelsio Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***************************************************************************/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/fcntl.h>
#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/mbuf.h>
#include <sys/mutex.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/syslog.h>
#include <net/if.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/in_pcb.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <dev/cxgb/cxgb_osdep.h>
#include <dev/cxgb/sys/mbufq.h>
#include <netinet/tcp.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_ofld.h>
#include <net/route.h>
#include <dev/cxgb/t3cdev.h>
#include <dev/cxgb/common/cxgb_firmware_exports.h>
#include <dev/cxgb/common/cxgb_t3_cpl.h>
#include <dev/cxgb/common/cxgb_tcb.h>
#include <dev/cxgb/common/cxgb_ctl_defs.h>
#include <dev/cxgb/cxgb_l2t.h>
#include <dev/cxgb/cxgb_offload.h>
#include <dev/cxgb/ulp/toecore/cxgb_toedev.h>
#include <dev/cxgb/ulp/tom/cxgb_defs.h>
#include <dev/cxgb/ulp/tom/cxgb_tom.h>
#include <dev/cxgb/ulp/tom/cxgb_t3_ddp.h>
#include <dev/cxgb/ulp/tom/cxgb_toepcb.h>
static struct listen_info *listen_hash_add(struct tom_data *d, struct socket *so, unsigned int stid);
static int listen_hash_del(struct tom_data *d, struct socket *so);
/*
* Process a CPL_CLOSE_LISTSRV_RPL message. If the status is good we release
* the STID.
*/
static int
do_close_server_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
{
struct cpl_close_listserv_rpl *rpl = cplhdr(m);
unsigned int stid = GET_TID(rpl);
if (rpl->status != CPL_ERR_NONE)
log(LOG_ERR, "Unexpected CLOSE_LISTSRV_RPL status %u for "
"STID %u\n", rpl->status, stid);
else {
struct listen_ctx *listen_ctx = (struct listen_ctx *)ctx;
cxgb_free_stid(cdev, stid);
free(listen_ctx, M_CXGB);
}
return (CPL_RET_BUF_DONE);
}
/*
* Process a CPL_PASS_OPEN_RPL message. Remove the socket from the listen hash
* table and free the STID if there was any error, otherwise nothing to do.
*/
static int
do_pass_open_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
{
struct cpl_pass_open_rpl *rpl = cplhdr(m);
if (rpl->status != CPL_ERR_NONE) {
int stid = GET_TID(rpl);
struct listen_ctx *listen_ctx = (struct listen_ctx *)ctx;
struct tom_data *d = listen_ctx->tom_data;
struct socket *lso = listen_ctx->lso;
#if VALIDATE_TID
if (!lso)
return (CPL_RET_UNKNOWN_TID | CPL_RET_BUF_DONE);
#endif
/*
* Note: It is safe to unconditionally call listen_hash_del()
* at this point without risking unhashing a reincarnation of
* an already closed socket (i.e., there is no listen, close,
* listen, free the sock for the second listen while processing
* a message for the first race) because we are still holding
* a reference on the socket. It is possible that the unhash
* will fail because the socket is already closed, but we can't
* unhash the wrong socket because it is impossible for the
* socket to which this message refers to have reincarnated.
*/
listen_hash_del(d, lso);
cxgb_free_stid(cdev, stid);
#ifdef notyet
/*
* XXX need to unreference the inpcb
* but we have no way of knowing that other TOMs aren't referencing it
*/
sock_put(lso);
#endif
free(listen_ctx, M_CXGB);
}
return CPL_RET_BUF_DONE;
}
void
t3_init_listen_cpl_handlers(void)
{
t3tom_register_cpl_handler(CPL_PASS_OPEN_RPL, do_pass_open_rpl);
t3tom_register_cpl_handler(CPL_CLOSE_LISTSRV_RPL, do_close_server_rpl);
}
static inline int
listen_hashfn(const struct socket *so)
{
return ((unsigned long)so >> 10) & (LISTEN_INFO_HASH_SIZE - 1);
}
/*
* Create and add a listen_info entry to the listen hash table. This and the
* listen hash table functions below cannot be called from softirqs.
*/
static struct listen_info *
listen_hash_add(struct tom_data *d, struct socket *so, unsigned int stid)
{
struct listen_info *p;
p = malloc(sizeof(*p), M_CXGB, M_NOWAIT|M_ZERO);
if (p) {
int bucket = listen_hashfn(so);
p->so = so; /* just a key, no need to take a reference */
p->stid = stid;
mtx_lock(&d->listen_lock);
p->next = d->listen_hash_tab[bucket];
d->listen_hash_tab[bucket] = p;
mtx_unlock(&d->listen_lock);
}
return p;
}
#if 0
/*
* Given a pointer to a listening socket return its server TID by consulting
* the socket->stid map. Returns -1 if the socket is not in the map.
*/
static int
listen_hash_find(struct tom_data *d, struct socket *so)
{
int stid = -1, bucket = listen_hashfn(so);
struct listen_info *p;
spin_lock(&d->listen_lock);
for (p = d->listen_hash_tab[bucket]; p; p = p->next)
if (p->sk == sk) {
stid = p->stid;
break;
}
spin_unlock(&d->listen_lock);
return stid;
}
#endif
/*
* Delete the listen_info structure for a listening socket. Returns the server
* TID for the socket if it is present in the socket->stid map, or -1.
*/
static int
listen_hash_del(struct tom_data *d, struct socket *so)
{
int bucket, stid = -1;
struct listen_info *p, **prev;
bucket = listen_hashfn(so);
prev = &d->listen_hash_tab[bucket];
mtx_lock(&d->listen_lock);
for (p = *prev; p; prev = &p->next, p = p->next)
if (p->so == so) {
stid = p->stid;
*prev = p->next;
free(p, M_CXGB);
break;
}
mtx_unlock(&d->listen_lock);
return (stid);
}
/*
* Start a listening server by sending a passive open request to HW.
*/
void
t3_listen_start(struct toedev *dev, struct socket *so, struct t3cdev *cdev)
{
int stid;
struct mbuf *m;
struct cpl_pass_open_req *req;
struct tom_data *d = TOM_DATA(dev);
struct inpcb *inp = sotoinpcb(so);
struct listen_ctx *ctx;
if (!TOM_TUNABLE(dev, activated))
return;
printf("start listen\n");
ctx = malloc(sizeof(*ctx), M_CXGB, M_NOWAIT);
if (!ctx)
return;
ctx->tom_data = d;
ctx->lso = so;
ctx->ulp_mode = 0; /* DDP if the default */
LIST_INIT(&ctx->synq_head);
stid = cxgb_alloc_stid(d->cdev, d->client, ctx);
if (stid < 0)
goto free_ctx;
#ifdef notyet
/*
* XXX need to mark inpcb as referenced
*/
sock_hold(sk);
#endif
m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
goto free_stid;
m->m_pkthdr.len = m->m_len = sizeof(*req);
if (!listen_hash_add(d, so, stid))
goto free_all;
req = mtod(m, struct cpl_pass_open_req *);
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, stid));
req->local_port = inp->inp_lport;
memcpy(&req->local_ip, &inp->inp_laddr, 4);
req->peer_port = 0;
req->peer_ip = 0;
req->peer_netmask = 0;
req->opt0h = htonl(F_DELACK | F_TCAM_BYPASS);
req->opt0l = htonl(V_RCV_BUFSIZ(16));
req->opt1 = htonl(V_CONN_POLICY(CPL_CONN_POLICY_ASK));
m_set_priority(m, CPL_PRIORITY_LISTEN);
cxgb_ofld_send(cdev, m);
return;
free_all:
m_free(m);
free_stid:
cxgb_free_stid(cdev, stid);
#if 0
sock_put(sk);
#endif
free_ctx:
free(ctx, M_CXGB);
}
/*
* Stop a listening server by sending a close_listsvr request to HW.
* The server TID is freed when we get the reply.
*/
void
t3_listen_stop(struct toedev *dev, struct socket *so, struct t3cdev *cdev)
{
struct mbuf *m;
struct cpl_close_listserv_req *req;
struct listen_ctx *lctx;
int stid = listen_hash_del(TOM_DATA(dev), so);
if (stid < 0)
return;
lctx = cxgb_get_lctx(cdev, stid);
/*
* Do this early so embryonic connections are marked as being aborted
* while the stid is still open. This ensures pass_establish messages
* that arrive while we are closing the server will be able to locate
* the listening socket.
*/
t3_reset_synq(lctx);
/* Send the close ASAP to stop further passive opens */
m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL) {
/*
* XXX allocate from lowmem cache
*/
}
m->m_pkthdr.len = m->m_len = sizeof(*req);
req = mtod(m, struct cpl_close_listserv_req *);
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, stid));
req->cpu_idx = 0;
m_set_priority(m, CPL_PRIORITY_LISTEN);
cxgb_ofld_send(cdev, m);
t3_disconnect_acceptq(so);
}

View File

@ -0,0 +1,185 @@
/**************************************************************************
Copyright (c) 2007, Chelsio Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Neither the name of the Chelsio Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
$FreeBSD$
***************************************************************************/
#ifndef T3_DDP_H
#define T3_DDP_H
/* Should be 1 or 2 indicating single or double kernel buffers. */
#define NUM_DDP_KBUF 2
/* min receive window for a connection to be considered for DDP */
#define MIN_DDP_RCV_WIN (48 << 10)
/* amount of Rx window not available to DDP to avoid window exhaustion */
#define DDP_RSVD_WIN (16 << 10)
/* # of sentinel invalid page pods at the end of a group of valid page pods */
#define NUM_SENTINEL_PPODS 0
/* # of pages a pagepod can hold without needing another pagepod */
#define PPOD_PAGES 4
/* page pods are allocated in groups of this size (must be power of 2) */
#define PPOD_CLUSTER_SIZE 16
/* for each TID we reserve this many page pods up front */
#define RSVD_PPODS_PER_TID 1
struct pagepod {
uint32_t pp_vld_tid;
uint32_t pp_pgsz_tag_color;
uint32_t pp_max_offset;
uint32_t pp_page_offset;
uint64_t pp_rsvd;
uint64_t pp_addr[5];
};
#define PPOD_SIZE sizeof(struct pagepod)
#define S_PPOD_TID 0
#define M_PPOD_TID 0xFFFFFF
#define V_PPOD_TID(x) ((x) << S_PPOD_TID)
#define S_PPOD_VALID 24
#define V_PPOD_VALID(x) ((x) << S_PPOD_VALID)
#define F_PPOD_VALID V_PPOD_VALID(1U)
#define S_PPOD_COLOR 0
#define M_PPOD_COLOR 0x3F
#define V_PPOD_COLOR(x) ((x) << S_PPOD_COLOR)
#define S_PPOD_TAG 6
#define M_PPOD_TAG 0xFFFFFF
#define V_PPOD_TAG(x) ((x) << S_PPOD_TAG)
#define S_PPOD_PGSZ 30
#define M_PPOD_PGSZ 0x3
#define V_PPOD_PGSZ(x) ((x) << S_PPOD_PGSZ)
struct pci_dev;
#include <vm/vm.h>
#include <vm/vm_page.h>
#include <machine/bus.h>
/* DDP gather lists can specify an offset only for the first page. */
struct ddp_gather_list {
unsigned int dgl_length;
unsigned int dgl_offset;
unsigned int dgl_nelem;
vm_page_t *dgl_pages;
bus_addr_t dgl_phys_addr[0];
};
struct ddp_buf_state {
unsigned int cur_offset; /* offset of latest DDP notification */
unsigned int flags;
struct ddp_gather_list *gl;
};
struct ddp_state {
struct pci_dev *pdev;
struct ddp_buf_state buf_state[2]; /* per buffer state */
int cur_buf;
unsigned short kbuf_noinval;
unsigned short kbuf_idx; /* which HW buffer is used for kbuf */
struct ddp_gather_list *ubuf;
unsigned int ubuf_nppods; /* # of page pods for buffer 1 */
unsigned int ubuf_tag;
unsigned int ubuf_ddp_ready;
int get_tcb_count;
unsigned int kbuf_posted;
int cancel_ubuf;
unsigned int kbuf_nppods[NUM_DDP_KBUF];
unsigned int kbuf_tag[NUM_DDP_KBUF];
struct ddp_gather_list *kbuf[NUM_DDP_KBUF]; /* kernel buffer for DDP prefetch */
};
/* buf_state flags */
enum {
DDP_BF_NOINVAL = 1 << 0, /* buffer is set to NO_INVALIDATE */
DDP_BF_NOCOPY = 1 << 1, /* DDP to final dest, no copy needed */
DDP_BF_NOFLIP = 1 << 2, /* buffer flips after GET_TCB_RPL */
DDP_BF_PSH = 1 << 3, /* set in skb->flags if the a DDP was
completed with a segment having the
PSH flag set */
};
#ifdef notyet
/*
* Returns 1 if a UBUF DMA buffer might be active.
*/
static inline int t3_ddp_ubuf_pending(struct sock *so)
{
struct tcp_sock *tp = tcp_sk(sk);
struct ddp_state *p = DDP_STATE(tp);
/* When the TOM_TUNABLE(ddp) is enabled, we're always in ULP_MODE DDP,
* but DDP_STATE() is only valid if the connection actually enabled
* DDP.
*/
if (!p)
return 0;
return (p->buf_state[0].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY)) ||
(p->buf_state[1].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY));
}
#endif
int t3_setup_ppods(struct socket *so, const struct ddp_gather_list *gl,
unsigned int nppods, unsigned int tag, unsigned int maxoff,
unsigned int pg_off, unsigned int color);
int t3_alloc_ppods(struct tom_data *td, unsigned int n);
void t3_free_ppods(struct tom_data *td, unsigned int tag, unsigned int n);
void t3_free_ddp_gl(struct pci_dev *pdev, struct ddp_gather_list *gl);
int t3_pin_pages(struct pci_dev *pdev, unsigned long uaddr, size_t len,
struct ddp_gather_list **newgl,
const struct ddp_gather_list *gl);
int t3_ddp_copy(const struct mbuf *skb, int offset, struct iovec *to,
int len);
//void t3_repost_kbuf(struct socket *so, int modulate, int activate);
void t3_post_kbuf(struct socket *so, int modulate);
int t3_post_ubuf(struct socket *so, const struct iovec *iov, int nonblock,
int rcv_flags, int modulate, int post_kbuf);
void t3_cancel_ubuf(struct socket *so);
int t3_overlay_ubuf(struct socket *so, const struct iovec *iov, int nonblock,
int rcv_flags, int modulate, int post_kbuf);
int t3_enter_ddp(struct socket *so, unsigned int kbuf_size, unsigned int waitall);
void t3_cleanup_ddp(struct socket *so);
void t3_release_ddp_resources(struct toepcb *toep);
void t3_cancel_ddpbuf(struct socket *so, unsigned int bufidx);
void t3_overlay_ddpbuf(struct socket *so, unsigned int bufidx, unsigned int tag0,
unsigned int tag1, unsigned int len);
void t3_setup_ddpbufs(struct socket *so, unsigned int len0, unsigned int offset0,
unsigned int len1, unsigned int offset1,
uint64_t ddp_flags, uint64_t flag_mask, int modulate);
#endif /* T3_DDP_H */

View File

@ -0,0 +1,112 @@
/*-
* Copyright (c) 2007, Chelsio Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Neither the name of the Chelsio Corporation nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef CXGB_TOEPCB_H_
#define CXGB_TOEPCB_H_
#include <sys/bus.h>
#include <dev/cxgb/sys/mbufq.h>
struct toepcb {
struct toedev *tp_toedev;
struct l2t_entry *tp_l2t;
pr_ctloutput_t *tp_ctloutput;
unsigned int tp_tid;
int tp_wr_max;
int tp_wr_avail;
int tp_wr_unacked;
int tp_delack_mode;
int tp_mtu_idx;
int tp_ulp_mode;
int tp_qset_idx;
int tp_mss_clamp;
int tp_qset;
int tp_flags;
int tp_enqueued_bytes;
int tp_page_count;
int tp_state;
tcp_seq tp_iss;
tcp_seq tp_delack_seq;
tcp_seq tp_rcv_wup;
tcp_seq tp_copied_seq;
uint64_t tp_write_seq;
volatile int tp_refcount;
vm_page_t *tp_pages;
struct tcpcb *tp_tp;
struct mbuf *tp_m_last;
bus_dma_tag_t tp_tx_dmat;
bus_dmamap_t tp_dmamap;
LIST_ENTRY(toepcb) synq_entry;
struct mbuf_head wr_list;
struct mbuf_head out_of_order_queue;
struct ddp_state tp_ddp_state;
};
static inline void
reset_wr_list(struct toepcb *toep)
{
mbufq_init(&toep->wr_list);
}
static inline void
purge_wr_queue(struct toepcb *toep)
{
struct mbuf *m;
while ((m = mbufq_dequeue(&toep->wr_list)) != NULL)
m_freem(m);
}
static inline void
enqueue_wr(struct toepcb *toep, struct mbuf *m)
{
mbufq_tail(&toep->wr_list, m);
}
static inline struct mbuf *
peek_wr(struct toepcb *toep)
{
return (mbufq_peek(&toep->wr_list));
}
static inline struct mbuf *
dequeue_wr(struct toepcb *toep)
{
return (mbufq_dequeue(&toep->wr_list));
}
#endif

View File

@ -0,0 +1,500 @@
/**************************************************************************
Copyright (c) 2007, Chelsio Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Neither the name of the Chelsio Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***************************************************************************/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/fcntl.h>
#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/eventhandler.h>
#include <sys/mbuf.h>
#include <sys/module.h>
#include <sys/mutex.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/socketvar.h>
#include <sys/taskqueue.h>
#include <net/if.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/in_pcb.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <dev/cxgb/cxgb_osdep.h>
#include <dev/cxgb/sys/mbufq.h>
#include <netinet/in_pcb.h>
#include <netinet/tcp.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_ofld.h>
#include <netinet/tcp_fsm.h>
#include <net/route.h>
#include <dev/cxgb/t3cdev.h>
#include <dev/cxgb/common/cxgb_firmware_exports.h>
#include <dev/cxgb/common/cxgb_tcb.h>
#include <dev/cxgb/cxgb_include.h>
#include <dev/cxgb/common/cxgb_ctl_defs.h>
#include <dev/cxgb/common/cxgb_t3_cpl.h>
#include <dev/cxgb/cxgb_offload.h>
#include <dev/cxgb/cxgb_l2t.h>
#include <dev/cxgb/ulp/toecore/cxgb_toedev.h>
#include <dev/cxgb/ulp/tom/cxgb_tom.h>
#include <dev/cxgb/ulp/tom/cxgb_defs.h>
#include <dev/cxgb/ulp/tom/cxgb_t3_ddp.h>
#include <dev/cxgb/ulp/tom/cxgb_toepcb.h>
static int activated = 1;
TUNABLE_INT("hw.t3toe.activated", &activated);
SYSCTL_NODE(_hw, OID_AUTO, t3toe, CTLFLAG_RD, 0, "T3 toe driver parameters");
SYSCTL_UINT(_hw_t3toe, OID_AUTO, activated, CTLFLAG_RDTUN, &activated, 0,
"enable TOE at init time");
static TAILQ_HEAD(, tom_data) cxgb_list;
static struct mtx cxgb_list_lock;
static int t3_toe_attach(struct toedev *dev, const struct offload_id *entry);
/*
* Handlers for each CPL opcode
*/
static cxgb_cpl_handler_func tom_cpl_handlers[NUM_CPL_CMDS];
static eventhandler_tag listen_tag;
static struct offload_id t3_toe_id_tab[] = {
{ TOE_ID_CHELSIO_T3, 0 },
{ TOE_ID_CHELSIO_T3B, 0 },
{ 0 }
};
static struct tom_info t3_tom_info = {
.ti_attach = t3_toe_attach,
.ti_id_table = t3_toe_id_tab,
.ti_name = "Chelsio-T3"
};
struct cxgb_client t3c_tom_client = {
.name = "tom_cxgb3",
.remove = NULL,
.handlers = tom_cpl_handlers,
.redirect = NULL
};
/*
* Add an skb to the deferred skb queue for processing from process context.
*/
void
t3_defer_reply(struct mbuf *m, struct toedev *dev, defer_handler_t handler)
{
struct tom_data *td = TOM_DATA(dev);
m_set_handler(m, handler);
mtx_lock(&td->deferq.lock);
mbufq_tail(&td->deferq, m);
if (mbufq_len(&td->deferq) == 1)
taskqueue_enqueue(td->tq, &td->deferq_task);
mtx_lock(&td->deferq.lock);
}
struct toepcb *
toepcb_alloc(void)
{
struct toepcb *toep;
toep = malloc(sizeof(struct toepcb), M_DEVBUF, M_NOWAIT);
if (toep == NULL)
return (NULL);
toepcb_init(toep);
return (toep);
}
void
toepcb_init(struct toepcb *toep)
{
bzero(toep, sizeof(*toep));
toep->tp_refcount = 1;
}
void
toepcb_hold(struct toepcb *toep)
{
atomic_add_acq_int(&toep->tp_refcount, 1);
}
void
toepcb_release(struct toepcb *toep)
{
if (toep->tp_refcount == 1) {
printf("doing final toepcb free\n");
free(toep, M_DEVBUF);
return;
}
atomic_add_acq_int(&toep->tp_refcount, -1);
}
/*
* Add a T3 offload device to the list of devices we are managing.
*/
static void
t3cdev_add(struct tom_data *t)
{
mtx_lock(&cxgb_list_lock);
TAILQ_INSERT_TAIL(&cxgb_list, t, entry);
mtx_unlock(&cxgb_list_lock);
}
/*
* Allocate a TOM data structure,
* initialize its cpl_handlers
* and register it as a T3C client
*/
static void t3c_tom_add(struct t3cdev *cdev)
{
int i;
unsigned int wr_len;
struct tom_data *t;
struct toedev *tdev;
struct adap_ports *port_info;
t = malloc(sizeof(*t), M_CXGB, M_NOWAIT|M_ZERO);
if (!t)
return;
if (cdev->ctl(cdev, GET_WR_LEN, &wr_len) < 0)
goto out_free_tom;
port_info = malloc(sizeof(*port_info), M_CXGB, M_NOWAIT|M_ZERO);
if (!port_info)
goto out_free_tom;
if (cdev->ctl(cdev, GET_PORTS, port_info) < 0)
goto out_free_all;
t3_init_wr_tab(wr_len);
t->cdev = cdev;
t->client = &t3c_tom_client;
/* Register TCP offload device */
tdev = &t->tdev;
tdev->tod_ttid = (cdev->type == T3A ?
TOE_ID_CHELSIO_T3 : TOE_ID_CHELSIO_T3B);
tdev->tod_lldev = cdev->lldev;
if (register_toedev(tdev, "toe%d")) {
printf("unable to register offload device");
goto out_free_all;
}
TOM_DATA(tdev) = t;
for (i = 0; i < port_info->nports; i++) {
struct ifnet *ifp = port_info->lldevs[i];
TOEDEV(ifp) = tdev;
ifp->if_capabilities |= IFCAP_TOE;
}
t->ports = port_info;
/* Add device to the list of offload devices */
t3cdev_add(t);
/* Activate TCP offload device */
activate_offload(tdev);
return;
out_free_all:
free(port_info, M_CXGB);
out_free_tom:
free(t, M_CXGB);
return;
}
/*
* Process a received packet with an unknown/unexpected CPL opcode.
*/
static int
do_bad_cpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
{
log(LOG_ERR, "%s: received bad CPL command %u\n", cdev->name,
*mtod(m, unsigned int *));
return (CPL_RET_BUF_DONE | CPL_RET_BAD_MSG);
}
/*
* Add a new handler to the CPL dispatch table. A NULL handler may be supplied
* to unregister an existing handler.
*/
void
t3tom_register_cpl_handler(unsigned int opcode, cxgb_cpl_handler_func h)
{
if (opcode < NUM_CPL_CMDS)
tom_cpl_handlers[opcode] = h ? h : do_bad_cpl;
else
log(LOG_ERR, "Chelsio T3 TOM: handler registration for "
"opcode %u failed\n", opcode);
}
/*
* Make a preliminary determination if a connection can be offloaded. It's OK
* to fail the offload later if we say we can offload here. For now this
* always accepts the offload request unless there are IP options.
*/
static int
can_offload(struct toedev *dev, struct socket *so)
{
struct tom_data *tomd = TOM_DATA(dev);
struct t3cdev *cdev = T3CDEV(dev->tod_lldev);
struct tid_info *t = &(T3C_DATA(cdev))->tid_maps;
return sotoinpcb(so)->inp_depend4.inp4_options == NULL &&
tomd->conf.activated &&
(tomd->conf.max_conn < 0 ||
atomic_load_acq_int(&t->tids_in_use) + t->atids_in_use < tomd->conf.max_conn);
}
static int tom_ctl(struct toedev *dev, unsigned int req, void *data)
{
struct tom_data *t = TOM_DATA(dev);
struct t3cdev *cdev = t->cdev;
if (cdev->ctl)
return cdev->ctl(cdev, req, data);
return (EOPNOTSUPP);
}
/*
* Initialize the CPL dispatch table.
*/
static void
init_cpl_handlers(void)
{
int i;
for (i = 0; i < NUM_CPL_CMDS; ++i)
tom_cpl_handlers[i] = do_bad_cpl;
t3_init_listen_cpl_handlers();
}
static int
t3_toe_attach(struct toedev *dev, const struct offload_id *entry)
{
struct tom_data *t = TOM_DATA(dev);
struct t3cdev *cdev = t->cdev;
struct ddp_params ddp;
struct ofld_page_info rx_page_info;
int err;
#if 0
skb_queue_head_init(&t->deferq);
T3_INIT_WORK(&t->deferq_task, process_deferq, t);
spin_lock_init(&t->listen_lock);
#endif
t3_init_tunables(t);
mtx_init(&t->listen_lock, "tom data listeners", NULL, MTX_DEF);
/* Adjust TOE activation for this module */
t->conf.activated = activated;
dev->tod_can_offload = can_offload;
dev->tod_connect = t3_connect;
dev->tod_ctl = tom_ctl;
#if 0
#ifndef NETEVENT
dev->tod_neigh_update = tom_neigh_update;
#endif
dev->tod_failover = t3_failover;
#endif
err = cdev->ctl(cdev, GET_DDP_PARAMS, &ddp);
if (err)
return err;
err = cdev->ctl(cdev, GET_RX_PAGE_INFO, &rx_page_info);
if (err)
return err;
t->ddp_llimit = ddp.llimit;
t->ddp_ulimit = ddp.ulimit;
t->pdev = ddp.pdev;
t->rx_page_size = rx_page_info.page_size;
#ifdef notyet
/* OK if this fails, we just can't do DDP */
t->nppods = (ddp.ulimit + 1 - ddp.llimit) / PPOD_SIZE;
t->ppod_map = t3_alloc_mem(t->nppods);
#endif
#if 0
spin_lock_init(&t->ppod_map_lock);
tom_proc_init(dev);
#ifdef CONFIG_SYSCTL
t->sysctl = t3_sysctl_register(dev, &t->conf);
#endif
#endif
return (0);
}
static void
cxgb_toe_listen(void *unused, int event, struct tcpcb *tp)
{
struct socket *so = tp->t_inpcb->inp_socket;
struct tom_data *p;
switch (event) {
case OFLD_LISTEN_OPEN:
case OFLD_LISTEN_CLOSE:
mtx_lock(&cxgb_list_lock);
TAILQ_FOREACH(p, &cxgb_list, entry) {
if (event == OFLD_LISTEN_OPEN)
t3_listen_start(&p->tdev, so, p->cdev);
else if (tp->t_state == TCPS_LISTEN) {
printf("stopping listen on port=%d\n",
ntohs(tp->t_inpcb->inp_lport));
t3_listen_stop(&p->tdev, so, p->cdev);
}
}
mtx_unlock(&cxgb_list_lock);
break;
default:
log(LOG_ERR, "unrecognized listen event %d\n", event);
break;
}
}
static void
cxgb_register_listeners(void)
{
struct inpcb *inp;
struct tcpcb *tp;
INP_INFO_RLOCK(&tcbinfo);
LIST_FOREACH(inp, tcbinfo.ipi_listhead, inp_list) {
tp = intotcpcb(inp);
if (tp->t_state == TCPS_LISTEN)
cxgb_toe_listen(NULL, OFLD_LISTEN_OPEN, tp);
}
INP_INFO_RUNLOCK(&tcbinfo);
}
static int
t3_tom_init(void)
{
#if 0
struct socket *sock;
err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
if (err < 0) {
printk(KERN_ERR "Could not create TCP socket, error %d\n", err);
return err;
}
t3_def_state_change = sock->sk->sk_state_change;
t3_def_data_ready = sock->sk->sk_data_ready;
t3_def_error_report = sock->sk->sk_error_report;
sock_release(sock);
#endif
init_cpl_handlers();
if (t3_init_cpl_io() < 0)
return -1;
t3_init_socket_ops();
/* Register with the TOE device layer. */
if (register_tom(&t3_tom_info) != 0) {
log(LOG_ERR,
"Unable to register Chelsio T3 TCP offload module.\n");
return -1;
}
mtx_init(&cxgb_list_lock, "cxgb tom list", NULL, MTX_DEF);
listen_tag = EVENTHANDLER_REGISTER(ofld_listen, cxgb_toe_listen, NULL, EVENTHANDLER_PRI_ANY);
TAILQ_INIT(&cxgb_list);
/* Register to offloading devices */
t3c_tom_client.add = t3c_tom_add;
cxgb_register_client(&t3c_tom_client);
cxgb_register_listeners();
return (0);
}
static int
t3_tom_load(module_t mod, int cmd, void *arg)
{
int err = 0;
switch (cmd) {
case MOD_LOAD:
printf("wheeeeee ...\n");
t3_tom_init();
break;
case MOD_QUIESCE:
break;
case MOD_UNLOAD:
printf("uhm, ... unloading isn't really supported for toe\n");
break;
case MOD_SHUTDOWN:
break;
default:
err = EOPNOTSUPP;
break;
}
return (err);
}
static moduledata_t mod_data= {
"t3_tom",
t3_tom_load,
0
};
MODULE_VERSION(t3_tom, 1);
MODULE_DEPEND(t3_tom, toecore, 1, 1, 1);
MODULE_DEPEND(t3_tom, if_cxgb, 1, 1, 1);
DECLARE_MODULE(t3_tom, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);

View File

@ -0,0 +1,157 @@
/**************************************************************************
Copyright (c) 2007, Chelsio Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Neither the name of the Chelsio Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
$FreeBSD$
***************************************************************************/
#ifndef CXGB_TOM_H_
#define CXGB_TOM_H_
#include <sys/protosw.h>
#define LISTEN_INFO_HASH_SIZE 32
struct listen_info {
struct listen_info *next; /* Link to next entry */
struct socket *so; /* The listening socket */
unsigned int stid; /* The server TID */
};
/*
* TOM tunable parameters. They can be manipulated through sysctl(2) or /proc.
*/
struct tom_tunables {
int max_host_sndbuf; // max host RAM consumed by a sndbuf
int tx_hold_thres; // push/pull threshold for non-full TX sk_buffs
int max_wrs; // max # of outstanding WRs per connection
int rx_credit_thres; // min # of RX credits needed for RX_DATA_ACK
int cong_alg; // Congestion control algorithm
int mss; // max TX_DATA WR payload size
int delack; // delayed ACK control
int max_conn; // maximum number of offloaded connections
int soft_backlog_limit; // whether the listen backlog limit is soft
int ddp; // whether to put new connections in DDP mode
int ddp_thres; // min recvmsg size before activating DDP
int ddp_copy_limit; // capacity of kernel DDP buffer
int ddp_push_wait; // whether blocking DDP waits for PSH flag
int ddp_rcvcoalesce; // whether receive coalescing is enabled
int zcopy_sosend_enabled; // < is never zcopied
int zcopy_sosend_partial_thres; // < is never zcopied
int zcopy_sosend_partial_copy; // bytes copied in partial zcopy
int zcopy_sosend_thres;// >= are mostly zcopied
int zcopy_sosend_copy; // bytes coped in zcopied
int zcopy_sosend_ret_pending_dma;// pot. return while pending DMA
int activated; // TOE engine activation state
};
struct tom_data {
TAILQ_ENTRY(tom_data) entry;
struct t3cdev *cdev;
struct pci_dev *pdev;
struct toedev tdev;
struct cxgb_client *client;
struct tom_tunables conf;
struct tom_sysctl_table *sysctl;
/*
* The next three locks listen_lock, deferq.lock, and tid_release_lock
* are used rarely so we let them potentially share a cacheline.
*/
struct listen_info *listen_hash_tab[LISTEN_INFO_HASH_SIZE];
struct mtx listen_lock;
struct mbuf_head deferq;
struct task deferq_task;
struct socket **tid_release_list;
struct mtx tid_release_lock;
struct task tid_release_task;
volatile int tx_dma_pending;
unsigned int ddp_llimit;
unsigned int ddp_ulimit;
unsigned int rx_page_size;
u8 *ppod_map;
unsigned int nppods;
struct mtx ppod_map_lock;
struct adap_ports *ports;
struct taskqueue *tq;
};
struct listen_ctx {
struct socket *lso;
struct tom_data *tom_data;
int ulp_mode;
LIST_HEAD(, toepcb) synq_head;
};
#define TOM_DATA(dev) (*(struct tom_data **)&(dev)->tod_l4opt)
#define T3C_DEV(sk) ((TOM_DATA(TOE_DEV(sk)))->cdev)
#define TOEP_T3C_DEV(toep) (TOM_DATA(toep->tp_toedev)->cdev)
#define TOM_TUNABLE(dev, param) (TOM_DATA(dev)->conf.param)
#define TP_DATASENT (1 << 0)
#define TP_TX_WAIT_IDLE (1 << 1)
#define TP_FIN_SENT (1 << 2)
#define TP_ABORT_RPL_PENDING (1 << 3)
#define TP_ABORT_SHUTDOWN (1 << 4)
#define TP_ABORT_RPL_RCVD (1 << 5)
#define TP_ABORT_REQ_RCVD (1 << 6)
#define TP_CLOSE_CON_REQUESTED (1 << 7)
#define TP_SYN_RCVD (1 << 8)
#define TP_ESTABLISHED (1 << 9)
void t3_init_tunables(struct tom_data *t);
static __inline struct mbuf *
m_gethdr_nofail(int len)
{
struct mbuf *m;
m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL) {
panic("implement lowmem cache\n");
}
KASSERT(len < MHLEN, ("requested header size too large for mbuf"));
m->m_pkthdr.len = m->m_len = len;
return (m);
}
#endif

View File

@ -0,0 +1,106 @@
/**************************************************************************
Copyright (c) 2007, Chelsio Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Neither the name of the Chelsio Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***************************************************************************/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/fcntl.h>
#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/mbuf.h>
#include <sys/module.h>
#include <sys/mutex.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/socketvar.h>
#include <net/if.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/in_pcb.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <dev/cxgb/cxgb_osdep.h>
#include <dev/cxgb/sys/mbufq.h>
#include <netinet/tcp.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_fsm.h>
#include <net/route.h>
#include <dev/cxgb/t3cdev.h>
#include <dev/cxgb/common/cxgb_firmware_exports.h>
#include <dev/cxgb/common/cxgb_tcb.h>
#include <dev/cxgb/common/cxgb_ctl_defs.h>
#include <dev/cxgb/common/cxgb_t3_cpl.h>
#include <dev/cxgb/cxgb_offload.h>
#include <dev/cxgb/cxgb_l2t.h>
#include <dev/cxgb/ulp/toecore/cxgb_toedev.h>
#include <dev/cxgb/ulp/tom/cxgb_tom.h>
#include <dev/cxgb/ulp/tom/cxgb_defs.h>
#include <dev/cxgb/ulp/tom/cxgb_t3_ddp.h>
static struct tom_tunables default_tunable_vals = {
.max_host_sndbuf = 32 * 1024,
.tx_hold_thres = 0,
.max_wrs = 15,
.rx_credit_thres = 15 * 1024,
.cong_alg = -1,
.mss = 16384,
.delack = 1,
.max_conn = -1,
.soft_backlog_limit = 0,
.ddp = 0,
.ddp_thres = 14 * 4096,
.ddp_copy_limit = 13 * 4096,
.ddp_push_wait = 1,
.ddp_rcvcoalesce = 0,
.zcopy_sosend_enabled = 0,
.zcopy_sosend_partial_thres = 40960,
.zcopy_sosend_partial_copy = 4096 * 3,
.zcopy_sosend_thres = 128 * 1024,
.zcopy_sosend_copy = 4096 * 2,
.zcopy_sosend_ret_pending_dma = 1,
.activated = 1,
};
void t3_init_tunables(struct tom_data *t)
{
t->conf = default_tunable_vals;
/* Now apply device specific fixups. */
t->conf.mss = T3C_DATA(t->cdev)->tx_max_chunk;
t->conf.max_wrs = T3C_DATA(t->cdev)->max_wrs;
}