Add driver for TCP offload
Sponsored by: Chelsio Inc.
This commit is contained in:
parent
501e15907b
commit
280b95e8b5
3378
sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c
Normal file
3378
sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c
Normal file
File diff suppressed because it is too large
Load Diff
560
sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c
Normal file
560
sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c
Normal file
@ -0,0 +1,560 @@
|
||||
/**************************************************************************
|
||||
|
||||
Copyright (c) 2007, Chelsio Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
2. Neither the name of the Chelsio Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
***************************************************************************/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/fcntl.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/limits.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/mbuf.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/syslog.h>
|
||||
#include <sys/socketvar.h>
|
||||
#include <sys/uio.h>
|
||||
|
||||
#include <machine/bus.h>
|
||||
|
||||
#include <net/if.h>
|
||||
#include <net/route.h>
|
||||
|
||||
#include <netinet/in.h>
|
||||
#include <netinet/in_pcb.h>
|
||||
#include <netinet/in_systm.h>
|
||||
#include <netinet/in_var.h>
|
||||
|
||||
|
||||
#include <dev/cxgb/cxgb_osdep.h>
|
||||
#include <dev/cxgb/sys/mbufq.h>
|
||||
|
||||
#include <netinet/tcp.h>
|
||||
#include <netinet/tcp_var.h>
|
||||
#include <netinet/tcp_fsm.h>
|
||||
#include <netinet/tcp_ofld.h>
|
||||
#include <net/route.h>
|
||||
|
||||
#include <dev/cxgb/t3cdev.h>
|
||||
#include <dev/cxgb/common/cxgb_firmware_exports.h>
|
||||
#include <dev/cxgb/common/cxgb_t3_cpl.h>
|
||||
#include <dev/cxgb/common/cxgb_tcb.h>
|
||||
#include <dev/cxgb/common/cxgb_ctl_defs.h>
|
||||
#include <dev/cxgb/cxgb_l2t.h>
|
||||
#include <dev/cxgb/cxgb_offload.h>
|
||||
#include <vm/vm.h>
|
||||
#include <vm/vm_page.h>
|
||||
#include <vm/vm_map.h>
|
||||
#include <vm/vm_extern.h>
|
||||
#include <vm/pmap.h>
|
||||
|
||||
#include <dev/cxgb/sys/mvec.h>
|
||||
#include <dev/cxgb/ulp/toecore/cxgb_toedev.h>
|
||||
#include <dev/cxgb/ulp/tom/cxgb_defs.h>
|
||||
#include <dev/cxgb/ulp/tom/cxgb_tom.h>
|
||||
#include <dev/cxgb/ulp/tom/cxgb_t3_ddp.h>
|
||||
#include <dev/cxgb/ulp/tom/cxgb_toepcb.h>
|
||||
|
||||
static int (*pru_sosend)(struct socket *so, struct sockaddr *addr,
|
||||
struct uio *uio, struct mbuf *top, struct mbuf *control,
|
||||
int flags, struct thread *td);
|
||||
|
||||
static int (*pru_soreceive)(struct socket *so, struct sockaddr **paddr,
|
||||
struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
|
||||
int *flagsp);
|
||||
|
||||
#ifdef notyet
|
||||
#define VM_HOLD_WRITEABLE 0x1
|
||||
static int vm_fault_hold_user_pages(vm_offset_t addr, int len, vm_page_t *mp,
|
||||
int *count, int flags);
|
||||
#endif
|
||||
static void vm_fault_unhold_pages(vm_page_t *m, int count);
|
||||
|
||||
|
||||
|
||||
#define TMP_IOV_MAX 16
|
||||
|
||||
void
|
||||
t3_init_socket_ops(void)
|
||||
{
|
||||
struct protosw *prp;
|
||||
|
||||
prp = pffindtype(AF_INET, SOCK_STREAM);
|
||||
pru_sosend = prp->pr_usrreqs->pru_sosend;
|
||||
pru_soreceive = prp->pr_usrreqs->pru_soreceive;
|
||||
}
|
||||
|
||||
|
||||
struct cxgb_dma_info {
|
||||
size_t cdi_mapped;
|
||||
int cdi_nsegs;
|
||||
bus_dma_segment_t *cdi_segs;
|
||||
|
||||
};
|
||||
|
||||
static void
|
||||
cxgb_dma_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
|
||||
bus_size_t mapsize, int error)
|
||||
{
|
||||
struct cxgb_dma_info *cdi = arg;
|
||||
|
||||
cdi->cdi_mapped = mapsize;
|
||||
cdi->cdi_nsegs = nsegs;
|
||||
cdi->cdi_segs = segs;
|
||||
}
|
||||
|
||||
static void
|
||||
iov_adj(struct iovec **iov, int *iovcnt, size_t count)
|
||||
{
|
||||
struct iovec *iovtmp;
|
||||
int iovcnttmp;
|
||||
caddr_t ptmp;
|
||||
|
||||
if (count > 0) {
|
||||
iovtmp = *iov;
|
||||
iovcnttmp = *iovcnt;
|
||||
while (count > 0) {
|
||||
if (count < iovtmp->iov_len) {
|
||||
ptmp = iovtmp->iov_base;
|
||||
ptmp += count;
|
||||
iovtmp->iov_base = ptmp;
|
||||
iovtmp->iov_len -= count;
|
||||
break;
|
||||
} else
|
||||
count -= iovtmp->iov_len;
|
||||
iovtmp++;
|
||||
iovcnttmp--;
|
||||
}
|
||||
*iov = iovtmp;
|
||||
*iovcnt = iovcnttmp;
|
||||
} else if (count < 0) {
|
||||
iovtmp = &(*iov)[*iovcnt - 1];
|
||||
iovcnttmp = *iovcnt;
|
||||
while (count < 0) {
|
||||
if (-count < iovtmp->iov_len) {
|
||||
iovtmp->iov_len += count;
|
||||
break;
|
||||
} else
|
||||
count += iovtmp->iov_len;
|
||||
iovtmp--;
|
||||
iovcnttmp--;
|
||||
}
|
||||
*iovcnt = iovcnttmp;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
cxgb_zero_copy_free(void *cl, void *arg) {}
|
||||
|
||||
static int
|
||||
cxgb_hold_iovec_pages(struct uio *uio, vm_page_t *m, int *held, int flags)
|
||||
{
|
||||
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
static void
|
||||
cxgb_wait_dma_completion(struct toepcb *tp)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
static int
|
||||
cxgb_vm_page_to_miov(struct toepcb *toep, struct uio *uio, struct mbuf **m)
|
||||
{
|
||||
int i, seg_count, err, type;
|
||||
struct mbuf *m0;
|
||||
struct cxgb_dma_info cdi;
|
||||
struct mbuf_vec *mv;
|
||||
struct mbuf_iovec *mi;
|
||||
bus_dma_segment_t *segs;
|
||||
|
||||
err = bus_dmamap_load_uio(toep->tp_tx_dmat, toep->tp_dmamap, uio,
|
||||
cxgb_dma_callback, &cdi, 0);
|
||||
|
||||
if (err)
|
||||
return (err);
|
||||
seg_count = cdi.cdi_nsegs;
|
||||
if ((m0 = mcl_alloc(seg_count, &type)) == NULL) {
|
||||
bus_dmamap_unload(toep->tp_tx_dmat, toep->tp_dmamap);
|
||||
return (ENOMEM);
|
||||
}
|
||||
segs = cdi.cdi_segs;
|
||||
m0->m_type = type;
|
||||
m0->m_flags = (M_EXT|M_NOFREE);
|
||||
m0->m_ext.ext_type = EXT_EXTREF;
|
||||
m0->m_ext.ext_free = cxgb_zero_copy_free;
|
||||
m0->m_ext.ext_args = NULL;
|
||||
|
||||
mv = mtomv(m0);
|
||||
mv->mv_count = seg_count;
|
||||
mv->mv_first = 0;
|
||||
for (i = 0, mi = mv->mv_vec; i < seg_count; mi++, segs++, i++)
|
||||
mi_collapse_sge(mi, segs);
|
||||
|
||||
*m = m0;
|
||||
|
||||
if (cdi.cdi_mapped < uio->uio_resid) {
|
||||
uio->uio_resid -= cdi.cdi_mapped;
|
||||
} else
|
||||
uio->uio_resid = 0;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
t3_sosend(struct socket *so, struct uio *uio)
|
||||
{
|
||||
int rv, count, hold_resid, sent, iovcnt;
|
||||
struct iovec iovtmp[TMP_IOV_MAX], *iovtmpp, *iov;
|
||||
struct tcpcb *tp = sototcpcb(so);
|
||||
struct toepcb *toep = tp->t_toe;
|
||||
struct mbuf *m;
|
||||
struct uio uiotmp;
|
||||
|
||||
/*
|
||||
* Events requiring iteration:
|
||||
* - number of pages exceeds max hold pages for process or system
|
||||
* - number of pages exceeds maximum sg entries for a single WR
|
||||
*
|
||||
* We're limited to holding 128 pages at once - and we're limited to
|
||||
* 34 SG entries per work request, but each SG entry can be any number
|
||||
* of contiguous pages
|
||||
*
|
||||
*/
|
||||
|
||||
uiotmp = *uio;
|
||||
iovcnt = uio->uio_iovcnt;
|
||||
iov = uio->uio_iov;
|
||||
sent = 0;
|
||||
sendmore:
|
||||
/*
|
||||
* Make sure we don't exceed the socket buffer
|
||||
*/
|
||||
count = min(toep->tp_page_count, (sbspace(&so->so_snd) >> PAGE_SHIFT) + 2*PAGE_SIZE);
|
||||
rv = cxgb_hold_iovec_pages(&uiotmp, toep->tp_pages, &count, 0);
|
||||
hold_resid = uiotmp.uio_resid;
|
||||
if (rv)
|
||||
return (rv);
|
||||
|
||||
/*
|
||||
* Bump past sent and shave off the unheld amount
|
||||
*/
|
||||
if (hold_resid > 0) {
|
||||
iovtmpp = iovtmp;
|
||||
memcpy(iovtmp, iov, iovcnt*sizeof(*iov));
|
||||
if (sent)
|
||||
iov_adj(&iovtmpp, &iovcnt, sent);
|
||||
iov_adj(&iovtmpp, &iovcnt, -hold_resid);
|
||||
uiotmp.uio_iov = iovtmpp;
|
||||
uiotmp.uio_iovcnt = iovcnt;
|
||||
|
||||
}
|
||||
uiotmp.uio_resid = uio->uio_resid - hold_resid;
|
||||
|
||||
/*
|
||||
* Push off all held pages
|
||||
*
|
||||
*/
|
||||
while (uiotmp.uio_resid > 0) {
|
||||
rv = cxgb_vm_page_to_miov(toep, &uiotmp, &m);
|
||||
if (rv) {
|
||||
vm_fault_unhold_pages(toep->tp_pages, count);
|
||||
return (rv);
|
||||
}
|
||||
uio->uio_resid -= m->m_pkthdr.len;
|
||||
sent += m->m_pkthdr.len;
|
||||
sbappend_locked(&so->so_snd, m);
|
||||
t3_push_frames(so, TRUE);
|
||||
iov_adj(&uiotmp.uio_iov, &iovcnt, uiotmp.uio_resid);
|
||||
}
|
||||
/*
|
||||
* Wait for pending I/O to be DMA'd to the card
|
||||
*
|
||||
*/
|
||||
cxgb_wait_dma_completion(toep);
|
||||
vm_fault_unhold_pages(toep->tp_pages, count);
|
||||
/*
|
||||
* If there is more data to send adjust local copy of iov
|
||||
* to point to teh start
|
||||
*/
|
||||
if (hold_resid) {
|
||||
iovtmpp = iovtmp;
|
||||
memcpy(iovtmp, iov, iovcnt*sizeof(*iov));
|
||||
iov_adj(&iovtmpp, &iovcnt, sent);
|
||||
uiotmp = *uio;
|
||||
uiotmp.uio_iov = iovtmpp;
|
||||
uiotmp.uio_iovcnt = iovcnt;
|
||||
goto sendmore;
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
cxgb_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
|
||||
struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
|
||||
{
|
||||
struct tcpcb *tp = sototcpcb(so);
|
||||
struct toedev *tdev;
|
||||
int zcopy_thres, zcopy_enabled, rv;
|
||||
|
||||
/*
|
||||
* In order to use DMA direct from userspace the following
|
||||
* conditions must be met:
|
||||
* - the connection is currently offloaded
|
||||
* - ddp is enabled
|
||||
* - the number of bytes to be transferred exceeds the threshold
|
||||
* - the number of bytes currently in flight won't exceed the in-flight
|
||||
* threshold XXX TODO
|
||||
* - vm_fault_hold_user_pages succeeds
|
||||
* - blocking socket XXX for now
|
||||
*
|
||||
*/
|
||||
if (tp->t_flags & TF_TOE) {
|
||||
tdev = TOE_DEV(so);
|
||||
zcopy_thres = TOM_TUNABLE(tdev, zcopy_sosend_partial_thres);
|
||||
zcopy_enabled = TOM_TUNABLE(tdev, zcopy_sosend_enabled);
|
||||
|
||||
if ((uio->uio_resid > zcopy_thres) &&
|
||||
(uio->uio_iovcnt < TMP_IOV_MAX) && ((so->so_state & SS_NBIO) == 0)
|
||||
&& zcopy_enabled) {
|
||||
rv = t3_sosend(so, uio);
|
||||
if (rv != EAGAIN)
|
||||
return (rv);
|
||||
}
|
||||
}
|
||||
return pru_sosend(so, addr, uio, top, control, flags, td);
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
t3_soreceive(struct socket *so, struct uio *uio)
|
||||
{
|
||||
#ifdef notyet
|
||||
int i, rv, count, hold_resid, sent, iovcnt;
|
||||
struct iovec iovtmp[TMP_IOV_MAX], *iovtmpp, *iov;
|
||||
struct tcpcb *tp = sototcpcb(so);
|
||||
struct toepcb *toep = tp->t_toe;
|
||||
struct mbuf *m;
|
||||
struct uio uiotmp;
|
||||
|
||||
/*
|
||||
* Events requiring iteration:
|
||||
* - number of pages exceeds max hold pages for process or system
|
||||
* - number of pages exceeds maximum sg entries for a single WR
|
||||
*
|
||||
* We're limited to holding 128 pages at once - and we're limited to
|
||||
* 34 SG entries per work request, but each SG entry can be any number
|
||||
* of contiguous pages
|
||||
*
|
||||
*/
|
||||
|
||||
uiotmp = *uio;
|
||||
iovcnt = uio->uio_iovcnt;
|
||||
iov = uio->uio_iov;
|
||||
sent = 0;
|
||||
re;
|
||||
#endif
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
cxgb_soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
|
||||
struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
|
||||
{
|
||||
struct toedev *tdev;
|
||||
int rv, zcopy_thres, zcopy_enabled;
|
||||
struct tcpcb *tp = sototcpcb(so);
|
||||
|
||||
/*
|
||||
* In order to use DMA direct from userspace the following
|
||||
* conditions must be met:
|
||||
* - the connection is currently offloaded
|
||||
* - ddp is enabled
|
||||
* - the number of bytes to be transferred exceeds the threshold
|
||||
* - the number of bytes currently in flight won't exceed the in-flight
|
||||
* threshold XXX TODO
|
||||
* - vm_fault_hold_user_pages succeeds
|
||||
* - blocking socket XXX for now
|
||||
* - iovcnt is 1
|
||||
*
|
||||
*/
|
||||
if (tp->t_flags & TF_TOE) {
|
||||
tdev = TOE_DEV(so);
|
||||
zcopy_thres = TOM_TUNABLE(tdev, ddp_thres);
|
||||
zcopy_enabled = TOM_TUNABLE(tdev, ddp);
|
||||
if ((uio->uio_resid > zcopy_thres) &&
|
||||
(uio->uio_iovcnt == 1) && ((so->so_state & SS_NBIO) == 0)
|
||||
&& zcopy_enabled) {
|
||||
rv = t3_soreceive(so, uio);
|
||||
if (rv != EAGAIN)
|
||||
return (rv);
|
||||
}
|
||||
}
|
||||
|
||||
return pru_soreceive(so, psa, uio, mp0, controlp, flagsp);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
t3_install_socket_ops(struct socket *so)
|
||||
{
|
||||
so->so_proto->pr_usrreqs->pru_sosend = cxgb_sosend;
|
||||
so->so_proto->pr_usrreqs->pru_soreceive = cxgb_soreceive;
|
||||
}
|
||||
|
||||
/*
|
||||
* This routine takes a user address range and does the following:
|
||||
* - validate that the user has access to those pages (flags indicates read or write) - if not fail
|
||||
* - validate that count is enough to hold range number of pages - if not fail
|
||||
* - fault in any non-resident pages
|
||||
* - if the user is doing a read force a write fault for any COWed pages
|
||||
* - if the user is doing a read mark all pages as dirty
|
||||
* - hold all pages
|
||||
* - return number of pages in count
|
||||
*/
|
||||
#ifdef notyet
|
||||
static int
|
||||
vm_fault_hold_user_pages(vm_offset_t addr, int len, vm_page_t *mp, int *count, int flags)
|
||||
{
|
||||
|
||||
vm_offset_t start, va;
|
||||
vm_paddr_t pa;
|
||||
int pageslen, faults, rv;
|
||||
|
||||
struct thread *td;
|
||||
vm_map_t map;
|
||||
pmap_t pmap;
|
||||
vm_page_t m, *pages;
|
||||
vm_prot_t prot;
|
||||
|
||||
start = addr & ~PAGE_MASK;
|
||||
pageslen = roundup2(addr + len, PAGE_SIZE);
|
||||
if (*count < (pageslen >> PAGE_SHIFT))
|
||||
return (EFBIG);
|
||||
|
||||
*count = pageslen >> PAGE_SHIFT;
|
||||
/*
|
||||
* Check that virtual address range is legal
|
||||
* This check is somewhat bogus as on some architectures kernel
|
||||
* and user do not share VA - however, it appears that all FreeBSD
|
||||
* architectures define it
|
||||
*/
|
||||
if (addr + len > VM_MAXUSER_ADDRESS)
|
||||
return (EFAULT);
|
||||
|
||||
td = curthread;
|
||||
map = &td->td_proc->p_vmspace->vm_map;
|
||||
pmap = &td->td_proc->p_vmspace->vm_pmap;
|
||||
pages = mp;
|
||||
|
||||
prot = (flags & VM_HOLD_WRITEABLE) ? VM_PROT_WRITE : VM_PROT_READ;
|
||||
bzero(pages, sizeof(vm_page_t *) * (*count));
|
||||
retry:
|
||||
|
||||
/*
|
||||
* First optimistically assume that all pages are resident (and R/W if for write)
|
||||
* if so just mark pages as held (and dirty if for write) and return
|
||||
*/
|
||||
vm_page_lock_queues();
|
||||
for (pages = mp, faults = 0, va = start; va < pageslen; va += PAGE_SIZE, pages++) {
|
||||
/*
|
||||
* Assure that we only hold the page once
|
||||
*/
|
||||
if (*pages == NULL) {
|
||||
/*
|
||||
* page queue mutex is recursable so this is OK
|
||||
* it would be really nice if we had an unlocked version of this so
|
||||
* we were only acquiring the pmap lock 1 time as opposed to potentially
|
||||
* many dozens of times
|
||||
*/
|
||||
m = pmap_extract_and_hold(pmap, va, prot);
|
||||
if (m == NULL) {
|
||||
faults++;
|
||||
continue;
|
||||
}
|
||||
*pages = m;
|
||||
if (flags & VM_HOLD_WRITEABLE)
|
||||
vm_page_dirty(m);
|
||||
}
|
||||
}
|
||||
vm_page_unlock_queues();
|
||||
|
||||
if (faults == 0)
|
||||
return (0);
|
||||
/*
|
||||
* Pages either have insufficient permissions or are not present
|
||||
* trigger a fault where neccessary
|
||||
*
|
||||
*/
|
||||
for (va = start; va < pageslen; va += PAGE_SIZE) {
|
||||
m = NULL;
|
||||
pa = pmap_extract(pmap, va);
|
||||
rv = 0;
|
||||
if (pa)
|
||||
m = PHYS_TO_VM_PAGE(pa);
|
||||
if (flags & VM_HOLD_WRITEABLE) {
|
||||
if (m == NULL || (m->flags & PG_WRITEABLE) == 0)
|
||||
rv = vm_fault(map, va, VM_PROT_WRITE, VM_FAULT_DIRTY);
|
||||
} else if (m == NULL)
|
||||
rv = vm_fault(map, va, VM_PROT_READ, VM_FAULT_NORMAL);
|
||||
if (rv)
|
||||
goto error;
|
||||
}
|
||||
goto retry;
|
||||
|
||||
error:
|
||||
vm_page_lock_queues();
|
||||
for (pages = mp, va = start; va < pageslen; va += PAGE_SIZE, pages++)
|
||||
if (*pages)
|
||||
vm_page_unhold(*pages);
|
||||
vm_page_unlock_queues();
|
||||
return (EFAULT);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
vm_fault_unhold_pages(vm_page_t *mp, int count)
|
||||
{
|
||||
|
||||
KASSERT(count >= 0, ("negative count %d", count));
|
||||
vm_page_lock_queues();
|
||||
while (count--) {
|
||||
vm_page_unhold(*mp);
|
||||
mp++;
|
||||
}
|
||||
vm_page_unlock_queues();
|
||||
}
|
||||
|
79
sys/dev/cxgb/ulp/tom/cxgb_defs.h
Normal file
79
sys/dev/cxgb/ulp/tom/cxgb_defs.h
Normal file
@ -0,0 +1,79 @@
|
||||
|
||||
/**************************************************************************
|
||||
|
||||
Copyright (c) 2007, Chelsio Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
2. Neither the name of the Chelsio Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
$FreeBSD$
|
||||
|
||||
***************************************************************************/
|
||||
#ifndef CXGB_DEFS_H_
|
||||
#define CXGB_DEFS_H_
|
||||
|
||||
#define VALIDATE_TID 0
|
||||
|
||||
#define TOEPCB(so) ((struct toepcb *)(sototcpcb((so))->t_toe))
|
||||
#define TOE_DEV(so) (TOEPCB((so))->tp_toedev)
|
||||
#define toeptoso(toep) ((toep)->tp_tp->t_inpcb->inp_socket)
|
||||
#define sototoep(so) (sototcpcb((so))->t_toe)
|
||||
|
||||
struct listen_ctx;
|
||||
|
||||
typedef void (*defer_handler_t)(struct toedev *dev, struct mbuf *m);
|
||||
|
||||
void t3tom_register_cpl_handler(unsigned int opcode, cxgb_cpl_handler_func h);
|
||||
void t3_listen_start(struct toedev *dev, struct socket *so, struct t3cdev *cdev);
|
||||
void t3_listen_stop(struct toedev *dev, struct socket *so, struct t3cdev *cdev);
|
||||
int t3_push_frames(struct socket *so, int req_completion);
|
||||
int t3_connect(struct toedev *tdev, struct socket *so, struct rtentry *rt,
|
||||
struct sockaddr *nam);
|
||||
void t3_init_listen_cpl_handlers(void);
|
||||
int t3_init_cpl_io(void);
|
||||
void t3_init_wr_tab(unsigned int wr_len);
|
||||
uint32_t t3_send_rx_credits(struct tcpcb *tp, uint32_t credits, uint32_t dack, int nofail);
|
||||
void t3_cleanup_rbuf(struct tcpcb *tp);
|
||||
|
||||
void t3_init_socket_ops(void);
|
||||
void t3_install_socket_ops(struct socket *so);
|
||||
|
||||
|
||||
void t3_disconnect_acceptq(struct socket *listen_so);
|
||||
void t3_reset_synq(struct listen_ctx *ctx);
|
||||
void t3_defer_reply(struct mbuf *m, struct toedev *dev, defer_handler_t handler);
|
||||
|
||||
struct toepcb *toepcb_alloc(void);
|
||||
void toepcb_hold(struct toepcb *);
|
||||
void toepcb_release(struct toepcb *);
|
||||
void toepcb_init(struct toepcb *);
|
||||
|
||||
void t3_set_rcv_coalesce_enable(struct socket *so, int on_off);
|
||||
void t3_set_keepalive(struct socket *so, int on_off);
|
||||
void t3_set_ddp_tag(struct socket *so, int buf_idx, unsigned int tag);
|
||||
void t3_set_ddp_buf(struct socket *so, int buf_idx, unsigned int offset,
|
||||
unsigned int len);
|
||||
int t3_get_tcb(struct socket *so);
|
||||
|
||||
#endif
|
345
sys/dev/cxgb/ulp/tom/cxgb_listen.c
Normal file
345
sys/dev/cxgb/ulp/tom/cxgb_listen.c
Normal file
@ -0,0 +1,345 @@
|
||||
/**************************************************************************
|
||||
|
||||
Copyright (c) 2007, Chelsio Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
2. Neither the name of the Chelsio Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
***************************************************************************/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/fcntl.h>
|
||||
#include <sys/limits.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/mbuf.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/socketvar.h>
|
||||
#include <sys/syslog.h>
|
||||
|
||||
#include <net/if.h>
|
||||
#include <net/route.h>
|
||||
|
||||
#include <netinet/in.h>
|
||||
#include <netinet/in_pcb.h>
|
||||
#include <netinet/in_systm.h>
|
||||
#include <netinet/in_var.h>
|
||||
|
||||
|
||||
#include <dev/cxgb/cxgb_osdep.h>
|
||||
#include <dev/cxgb/sys/mbufq.h>
|
||||
|
||||
#include <netinet/tcp.h>
|
||||
#include <netinet/tcp_var.h>
|
||||
#include <netinet/tcp_fsm.h>
|
||||
|
||||
#include <netinet/tcp_ofld.h>
|
||||
#include <net/route.h>
|
||||
|
||||
#include <dev/cxgb/t3cdev.h>
|
||||
#include <dev/cxgb/common/cxgb_firmware_exports.h>
|
||||
#include <dev/cxgb/common/cxgb_t3_cpl.h>
|
||||
#include <dev/cxgb/common/cxgb_tcb.h>
|
||||
#include <dev/cxgb/common/cxgb_ctl_defs.h>
|
||||
#include <dev/cxgb/cxgb_l2t.h>
|
||||
#include <dev/cxgb/cxgb_offload.h>
|
||||
#include <dev/cxgb/ulp/toecore/cxgb_toedev.h>
|
||||
#include <dev/cxgb/ulp/tom/cxgb_defs.h>
|
||||
#include <dev/cxgb/ulp/tom/cxgb_tom.h>
|
||||
#include <dev/cxgb/ulp/tom/cxgb_t3_ddp.h>
|
||||
#include <dev/cxgb/ulp/tom/cxgb_toepcb.h>
|
||||
|
||||
|
||||
static struct listen_info *listen_hash_add(struct tom_data *d, struct socket *so, unsigned int stid);
|
||||
static int listen_hash_del(struct tom_data *d, struct socket *so);
|
||||
|
||||
/*
|
||||
* Process a CPL_CLOSE_LISTSRV_RPL message. If the status is good we release
|
||||
* the STID.
|
||||
*/
|
||||
static int
|
||||
do_close_server_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
|
||||
{
|
||||
struct cpl_close_listserv_rpl *rpl = cplhdr(m);
|
||||
unsigned int stid = GET_TID(rpl);
|
||||
|
||||
if (rpl->status != CPL_ERR_NONE)
|
||||
log(LOG_ERR, "Unexpected CLOSE_LISTSRV_RPL status %u for "
|
||||
"STID %u\n", rpl->status, stid);
|
||||
else {
|
||||
struct listen_ctx *listen_ctx = (struct listen_ctx *)ctx;
|
||||
|
||||
cxgb_free_stid(cdev, stid);
|
||||
free(listen_ctx, M_CXGB);
|
||||
}
|
||||
|
||||
return (CPL_RET_BUF_DONE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Process a CPL_PASS_OPEN_RPL message. Remove the socket from the listen hash
|
||||
* table and free the STID if there was any error, otherwise nothing to do.
|
||||
*/
|
||||
static int
|
||||
do_pass_open_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
|
||||
{
|
||||
struct cpl_pass_open_rpl *rpl = cplhdr(m);
|
||||
|
||||
if (rpl->status != CPL_ERR_NONE) {
|
||||
int stid = GET_TID(rpl);
|
||||
struct listen_ctx *listen_ctx = (struct listen_ctx *)ctx;
|
||||
struct tom_data *d = listen_ctx->tom_data;
|
||||
struct socket *lso = listen_ctx->lso;
|
||||
|
||||
#if VALIDATE_TID
|
||||
if (!lso)
|
||||
return (CPL_RET_UNKNOWN_TID | CPL_RET_BUF_DONE);
|
||||
#endif
|
||||
/*
|
||||
* Note: It is safe to unconditionally call listen_hash_del()
|
||||
* at this point without risking unhashing a reincarnation of
|
||||
* an already closed socket (i.e., there is no listen, close,
|
||||
* listen, free the sock for the second listen while processing
|
||||
* a message for the first race) because we are still holding
|
||||
* a reference on the socket. It is possible that the unhash
|
||||
* will fail because the socket is already closed, but we can't
|
||||
* unhash the wrong socket because it is impossible for the
|
||||
* socket to which this message refers to have reincarnated.
|
||||
*/
|
||||
listen_hash_del(d, lso);
|
||||
cxgb_free_stid(cdev, stid);
|
||||
#ifdef notyet
|
||||
/*
|
||||
* XXX need to unreference the inpcb
|
||||
* but we have no way of knowing that other TOMs aren't referencing it
|
||||
*/
|
||||
sock_put(lso);
|
||||
#endif
|
||||
free(listen_ctx, M_CXGB);
|
||||
}
|
||||
return CPL_RET_BUF_DONE;
|
||||
}
|
||||
|
||||
void
|
||||
t3_init_listen_cpl_handlers(void)
|
||||
{
|
||||
t3tom_register_cpl_handler(CPL_PASS_OPEN_RPL, do_pass_open_rpl);
|
||||
t3tom_register_cpl_handler(CPL_CLOSE_LISTSRV_RPL, do_close_server_rpl);
|
||||
}
|
||||
|
||||
static inline int
|
||||
listen_hashfn(const struct socket *so)
|
||||
{
|
||||
return ((unsigned long)so >> 10) & (LISTEN_INFO_HASH_SIZE - 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Create and add a listen_info entry to the listen hash table. This and the
|
||||
* listen hash table functions below cannot be called from softirqs.
|
||||
*/
|
||||
static struct listen_info *
|
||||
listen_hash_add(struct tom_data *d, struct socket *so, unsigned int stid)
|
||||
{
|
||||
struct listen_info *p;
|
||||
|
||||
p = malloc(sizeof(*p), M_CXGB, M_NOWAIT|M_ZERO);
|
||||
if (p) {
|
||||
int bucket = listen_hashfn(so);
|
||||
|
||||
p->so = so; /* just a key, no need to take a reference */
|
||||
p->stid = stid;
|
||||
mtx_lock(&d->listen_lock);
|
||||
p->next = d->listen_hash_tab[bucket];
|
||||
d->listen_hash_tab[bucket] = p;
|
||||
mtx_unlock(&d->listen_lock);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
#if 0
|
||||
/*
|
||||
* Given a pointer to a listening socket return its server TID by consulting
|
||||
* the socket->stid map. Returns -1 if the socket is not in the map.
|
||||
*/
|
||||
static int
|
||||
listen_hash_find(struct tom_data *d, struct socket *so)
|
||||
{
|
||||
int stid = -1, bucket = listen_hashfn(so);
|
||||
struct listen_info *p;
|
||||
|
||||
spin_lock(&d->listen_lock);
|
||||
for (p = d->listen_hash_tab[bucket]; p; p = p->next)
|
||||
if (p->sk == sk) {
|
||||
stid = p->stid;
|
||||
break;
|
||||
}
|
||||
spin_unlock(&d->listen_lock);
|
||||
return stid;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Delete the listen_info structure for a listening socket. Returns the server
|
||||
* TID for the socket if it is present in the socket->stid map, or -1.
|
||||
*/
|
||||
static int
|
||||
listen_hash_del(struct tom_data *d, struct socket *so)
|
||||
{
|
||||
int bucket, stid = -1;
|
||||
struct listen_info *p, **prev;
|
||||
|
||||
bucket = listen_hashfn(so);
|
||||
prev = &d->listen_hash_tab[bucket];
|
||||
|
||||
mtx_lock(&d->listen_lock);
|
||||
for (p = *prev; p; prev = &p->next, p = p->next)
|
||||
if (p->so == so) {
|
||||
stid = p->stid;
|
||||
*prev = p->next;
|
||||
free(p, M_CXGB);
|
||||
break;
|
||||
}
|
||||
mtx_unlock(&d->listen_lock);
|
||||
|
||||
return (stid);
|
||||
}
|
||||
|
||||
/*
|
||||
* Start a listening server by sending a passive open request to HW.
|
||||
*/
|
||||
void
|
||||
t3_listen_start(struct toedev *dev, struct socket *so, struct t3cdev *cdev)
|
||||
{
|
||||
int stid;
|
||||
struct mbuf *m;
|
||||
struct cpl_pass_open_req *req;
|
||||
struct tom_data *d = TOM_DATA(dev);
|
||||
struct inpcb *inp = sotoinpcb(so);
|
||||
struct listen_ctx *ctx;
|
||||
|
||||
if (!TOM_TUNABLE(dev, activated))
|
||||
return;
|
||||
|
||||
printf("start listen\n");
|
||||
|
||||
ctx = malloc(sizeof(*ctx), M_CXGB, M_NOWAIT);
|
||||
|
||||
if (!ctx)
|
||||
return;
|
||||
|
||||
ctx->tom_data = d;
|
||||
ctx->lso = so;
|
||||
ctx->ulp_mode = 0; /* DDP if the default */
|
||||
LIST_INIT(&ctx->synq_head);
|
||||
|
||||
stid = cxgb_alloc_stid(d->cdev, d->client, ctx);
|
||||
if (stid < 0)
|
||||
goto free_ctx;
|
||||
|
||||
#ifdef notyet
|
||||
/*
|
||||
* XXX need to mark inpcb as referenced
|
||||
*/
|
||||
sock_hold(sk);
|
||||
#endif
|
||||
m = m_gethdr(M_NOWAIT, MT_DATA);
|
||||
if (m == NULL)
|
||||
goto free_stid;
|
||||
m->m_pkthdr.len = m->m_len = sizeof(*req);
|
||||
|
||||
if (!listen_hash_add(d, so, stid))
|
||||
goto free_all;
|
||||
|
||||
req = mtod(m, struct cpl_pass_open_req *);
|
||||
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
|
||||
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, stid));
|
||||
req->local_port = inp->inp_lport;
|
||||
memcpy(&req->local_ip, &inp->inp_laddr, 4);
|
||||
req->peer_port = 0;
|
||||
req->peer_ip = 0;
|
||||
req->peer_netmask = 0;
|
||||
req->opt0h = htonl(F_DELACK | F_TCAM_BYPASS);
|
||||
req->opt0l = htonl(V_RCV_BUFSIZ(16));
|
||||
req->opt1 = htonl(V_CONN_POLICY(CPL_CONN_POLICY_ASK));
|
||||
|
||||
m_set_priority(m, CPL_PRIORITY_LISTEN);
|
||||
cxgb_ofld_send(cdev, m);
|
||||
return;
|
||||
|
||||
free_all:
|
||||
m_free(m);
|
||||
free_stid:
|
||||
cxgb_free_stid(cdev, stid);
|
||||
#if 0
|
||||
sock_put(sk);
|
||||
#endif
|
||||
free_ctx:
|
||||
free(ctx, M_CXGB);
|
||||
}
|
||||
|
||||
/*
|
||||
* Stop a listening server by sending a close_listsvr request to HW.
|
||||
* The server TID is freed when we get the reply.
|
||||
*/
|
||||
void
|
||||
t3_listen_stop(struct toedev *dev, struct socket *so, struct t3cdev *cdev)
|
||||
{
|
||||
struct mbuf *m;
|
||||
struct cpl_close_listserv_req *req;
|
||||
struct listen_ctx *lctx;
|
||||
int stid = listen_hash_del(TOM_DATA(dev), so);
|
||||
|
||||
if (stid < 0)
|
||||
return;
|
||||
|
||||
lctx = cxgb_get_lctx(cdev, stid);
|
||||
/*
|
||||
* Do this early so embryonic connections are marked as being aborted
|
||||
* while the stid is still open. This ensures pass_establish messages
|
||||
* that arrive while we are closing the server will be able to locate
|
||||
* the listening socket.
|
||||
*/
|
||||
t3_reset_synq(lctx);
|
||||
|
||||
/* Send the close ASAP to stop further passive opens */
|
||||
m = m_gethdr(M_NOWAIT, MT_DATA);
|
||||
if (m == NULL) {
|
||||
/*
|
||||
* XXX allocate from lowmem cache
|
||||
*/
|
||||
}
|
||||
m->m_pkthdr.len = m->m_len = sizeof(*req);
|
||||
|
||||
req = mtod(m, struct cpl_close_listserv_req *);
|
||||
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
|
||||
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, stid));
|
||||
req->cpu_idx = 0;
|
||||
m_set_priority(m, CPL_PRIORITY_LISTEN);
|
||||
cxgb_ofld_send(cdev, m);
|
||||
|
||||
t3_disconnect_acceptq(so);
|
||||
}
|
185
sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h
Normal file
185
sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h
Normal file
@ -0,0 +1,185 @@
|
||||
|
||||
/**************************************************************************
|
||||
|
||||
Copyright (c) 2007, Chelsio Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
2. Neither the name of the Chelsio Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
$FreeBSD$
|
||||
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef T3_DDP_H
|
||||
#define T3_DDP_H
|
||||
|
||||
/* Should be 1 or 2 indicating single or double kernel buffers. */
|
||||
#define NUM_DDP_KBUF 2
|
||||
|
||||
/* min receive window for a connection to be considered for DDP */
|
||||
#define MIN_DDP_RCV_WIN (48 << 10)
|
||||
|
||||
/* amount of Rx window not available to DDP to avoid window exhaustion */
|
||||
#define DDP_RSVD_WIN (16 << 10)
|
||||
|
||||
/* # of sentinel invalid page pods at the end of a group of valid page pods */
|
||||
#define NUM_SENTINEL_PPODS 0
|
||||
|
||||
/* # of pages a pagepod can hold without needing another pagepod */
|
||||
#define PPOD_PAGES 4
|
||||
|
||||
/* page pods are allocated in groups of this size (must be power of 2) */
|
||||
#define PPOD_CLUSTER_SIZE 16
|
||||
|
||||
/* for each TID we reserve this many page pods up front */
|
||||
#define RSVD_PPODS_PER_TID 1
|
||||
|
||||
struct pagepod {
|
||||
uint32_t pp_vld_tid;
|
||||
uint32_t pp_pgsz_tag_color;
|
||||
uint32_t pp_max_offset;
|
||||
uint32_t pp_page_offset;
|
||||
uint64_t pp_rsvd;
|
||||
uint64_t pp_addr[5];
|
||||
};
|
||||
|
||||
#define PPOD_SIZE sizeof(struct pagepod)
|
||||
|
||||
#define S_PPOD_TID 0
|
||||
#define M_PPOD_TID 0xFFFFFF
|
||||
#define V_PPOD_TID(x) ((x) << S_PPOD_TID)
|
||||
|
||||
#define S_PPOD_VALID 24
|
||||
#define V_PPOD_VALID(x) ((x) << S_PPOD_VALID)
|
||||
#define F_PPOD_VALID V_PPOD_VALID(1U)
|
||||
|
||||
#define S_PPOD_COLOR 0
|
||||
#define M_PPOD_COLOR 0x3F
|
||||
#define V_PPOD_COLOR(x) ((x) << S_PPOD_COLOR)
|
||||
|
||||
#define S_PPOD_TAG 6
|
||||
#define M_PPOD_TAG 0xFFFFFF
|
||||
#define V_PPOD_TAG(x) ((x) << S_PPOD_TAG)
|
||||
|
||||
#define S_PPOD_PGSZ 30
|
||||
#define M_PPOD_PGSZ 0x3
|
||||
#define V_PPOD_PGSZ(x) ((x) << S_PPOD_PGSZ)
|
||||
|
||||
struct pci_dev;
|
||||
#include <vm/vm.h>
|
||||
#include <vm/vm_page.h>
|
||||
#include <machine/bus.h>
|
||||
|
||||
/* DDP gather lists can specify an offset only for the first page. */
|
||||
struct ddp_gather_list {
|
||||
unsigned int dgl_length;
|
||||
unsigned int dgl_offset;
|
||||
unsigned int dgl_nelem;
|
||||
vm_page_t *dgl_pages;
|
||||
bus_addr_t dgl_phys_addr[0];
|
||||
};
|
||||
|
||||
struct ddp_buf_state {
|
||||
unsigned int cur_offset; /* offset of latest DDP notification */
|
||||
unsigned int flags;
|
||||
struct ddp_gather_list *gl;
|
||||
};
|
||||
|
||||
struct ddp_state {
|
||||
struct pci_dev *pdev;
|
||||
struct ddp_buf_state buf_state[2]; /* per buffer state */
|
||||
int cur_buf;
|
||||
unsigned short kbuf_noinval;
|
||||
unsigned short kbuf_idx; /* which HW buffer is used for kbuf */
|
||||
struct ddp_gather_list *ubuf;
|
||||
unsigned int ubuf_nppods; /* # of page pods for buffer 1 */
|
||||
unsigned int ubuf_tag;
|
||||
unsigned int ubuf_ddp_ready;
|
||||
int get_tcb_count;
|
||||
unsigned int kbuf_posted;
|
||||
int cancel_ubuf;
|
||||
unsigned int kbuf_nppods[NUM_DDP_KBUF];
|
||||
unsigned int kbuf_tag[NUM_DDP_KBUF];
|
||||
struct ddp_gather_list *kbuf[NUM_DDP_KBUF]; /* kernel buffer for DDP prefetch */
|
||||
};
|
||||
|
||||
/* buf_state flags */
|
||||
enum {
|
||||
DDP_BF_NOINVAL = 1 << 0, /* buffer is set to NO_INVALIDATE */
|
||||
DDP_BF_NOCOPY = 1 << 1, /* DDP to final dest, no copy needed */
|
||||
DDP_BF_NOFLIP = 1 << 2, /* buffer flips after GET_TCB_RPL */
|
||||
DDP_BF_PSH = 1 << 3, /* set in skb->flags if the a DDP was
|
||||
completed with a segment having the
|
||||
PSH flag set */
|
||||
};
|
||||
|
||||
#ifdef notyet
|
||||
/*
|
||||
* Returns 1 if a UBUF DMA buffer might be active.
|
||||
*/
|
||||
static inline int t3_ddp_ubuf_pending(struct sock *so)
|
||||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
struct ddp_state *p = DDP_STATE(tp);
|
||||
|
||||
/* When the TOM_TUNABLE(ddp) is enabled, we're always in ULP_MODE DDP,
|
||||
* but DDP_STATE() is only valid if the connection actually enabled
|
||||
* DDP.
|
||||
*/
|
||||
if (!p)
|
||||
return 0;
|
||||
|
||||
return (p->buf_state[0].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY)) ||
|
||||
(p->buf_state[1].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY));
|
||||
}
|
||||
#endif
|
||||
|
||||
int t3_setup_ppods(struct socket *so, const struct ddp_gather_list *gl,
|
||||
unsigned int nppods, unsigned int tag, unsigned int maxoff,
|
||||
unsigned int pg_off, unsigned int color);
|
||||
int t3_alloc_ppods(struct tom_data *td, unsigned int n);
|
||||
void t3_free_ppods(struct tom_data *td, unsigned int tag, unsigned int n);
|
||||
void t3_free_ddp_gl(struct pci_dev *pdev, struct ddp_gather_list *gl);
|
||||
int t3_pin_pages(struct pci_dev *pdev, unsigned long uaddr, size_t len,
|
||||
struct ddp_gather_list **newgl,
|
||||
const struct ddp_gather_list *gl);
|
||||
int t3_ddp_copy(const struct mbuf *skb, int offset, struct iovec *to,
|
||||
int len);
|
||||
//void t3_repost_kbuf(struct socket *so, int modulate, int activate);
|
||||
void t3_post_kbuf(struct socket *so, int modulate);
|
||||
int t3_post_ubuf(struct socket *so, const struct iovec *iov, int nonblock,
|
||||
int rcv_flags, int modulate, int post_kbuf);
|
||||
void t3_cancel_ubuf(struct socket *so);
|
||||
int t3_overlay_ubuf(struct socket *so, const struct iovec *iov, int nonblock,
|
||||
int rcv_flags, int modulate, int post_kbuf);
|
||||
int t3_enter_ddp(struct socket *so, unsigned int kbuf_size, unsigned int waitall);
|
||||
void t3_cleanup_ddp(struct socket *so);
|
||||
void t3_release_ddp_resources(struct toepcb *toep);
|
||||
void t3_cancel_ddpbuf(struct socket *so, unsigned int bufidx);
|
||||
void t3_overlay_ddpbuf(struct socket *so, unsigned int bufidx, unsigned int tag0,
|
||||
unsigned int tag1, unsigned int len);
|
||||
void t3_setup_ddpbufs(struct socket *so, unsigned int len0, unsigned int offset0,
|
||||
unsigned int len1, unsigned int offset1,
|
||||
uint64_t ddp_flags, uint64_t flag_mask, int modulate);
|
||||
#endif /* T3_DDP_H */
|
112
sys/dev/cxgb/ulp/tom/cxgb_toepcb.h
Normal file
112
sys/dev/cxgb/ulp/tom/cxgb_toepcb.h
Normal file
@ -0,0 +1,112 @@
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2007, Chelsio Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Neither the name of the Chelsio Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
#ifndef CXGB_TOEPCB_H_
|
||||
#define CXGB_TOEPCB_H_
|
||||
#include <sys/bus.h>
|
||||
#include <dev/cxgb/sys/mbufq.h>
|
||||
|
||||
struct toepcb {
|
||||
struct toedev *tp_toedev;
|
||||
struct l2t_entry *tp_l2t;
|
||||
pr_ctloutput_t *tp_ctloutput;
|
||||
unsigned int tp_tid;
|
||||
int tp_wr_max;
|
||||
int tp_wr_avail;
|
||||
int tp_wr_unacked;
|
||||
int tp_delack_mode;
|
||||
int tp_mtu_idx;
|
||||
int tp_ulp_mode;
|
||||
int tp_qset_idx;
|
||||
int tp_mss_clamp;
|
||||
int tp_qset;
|
||||
int tp_flags;
|
||||
int tp_enqueued_bytes;
|
||||
int tp_page_count;
|
||||
int tp_state;
|
||||
|
||||
tcp_seq tp_iss;
|
||||
tcp_seq tp_delack_seq;
|
||||
tcp_seq tp_rcv_wup;
|
||||
tcp_seq tp_copied_seq;
|
||||
uint64_t tp_write_seq;
|
||||
|
||||
volatile int tp_refcount;
|
||||
vm_page_t *tp_pages;
|
||||
|
||||
struct tcpcb *tp_tp;
|
||||
struct mbuf *tp_m_last;
|
||||
bus_dma_tag_t tp_tx_dmat;
|
||||
bus_dmamap_t tp_dmamap;
|
||||
|
||||
LIST_ENTRY(toepcb) synq_entry;
|
||||
struct mbuf_head wr_list;
|
||||
struct mbuf_head out_of_order_queue;
|
||||
struct ddp_state tp_ddp_state;
|
||||
};
|
||||
|
||||
static inline void
|
||||
reset_wr_list(struct toepcb *toep)
|
||||
{
|
||||
|
||||
mbufq_init(&toep->wr_list);
|
||||
}
|
||||
|
||||
static inline void
|
||||
purge_wr_queue(struct toepcb *toep)
|
||||
{
|
||||
struct mbuf *m;
|
||||
|
||||
while ((m = mbufq_dequeue(&toep->wr_list)) != NULL)
|
||||
m_freem(m);
|
||||
}
|
||||
|
||||
static inline void
|
||||
enqueue_wr(struct toepcb *toep, struct mbuf *m)
|
||||
{
|
||||
|
||||
mbufq_tail(&toep->wr_list, m);
|
||||
}
|
||||
|
||||
static inline struct mbuf *
|
||||
peek_wr(struct toepcb *toep)
|
||||
{
|
||||
|
||||
return (mbufq_peek(&toep->wr_list));
|
||||
}
|
||||
|
||||
static inline struct mbuf *
|
||||
dequeue_wr(struct toepcb *toep)
|
||||
{
|
||||
|
||||
return (mbufq_dequeue(&toep->wr_list));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
500
sys/dev/cxgb/ulp/tom/cxgb_tom.c
Normal file
500
sys/dev/cxgb/ulp/tom/cxgb_tom.c
Normal file
@ -0,0 +1,500 @@
|
||||
/**************************************************************************
|
||||
|
||||
Copyright (c) 2007, Chelsio Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
2. Neither the name of the Chelsio Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
***************************************************************************/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/fcntl.h>
|
||||
#include <sys/limits.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/eventhandler.h>
|
||||
#include <sys/mbuf.h>
|
||||
#include <sys/module.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/syslog.h>
|
||||
#include <sys/socketvar.h>
|
||||
#include <sys/taskqueue.h>
|
||||
|
||||
#include <net/if.h>
|
||||
#include <net/route.h>
|
||||
|
||||
#include <netinet/in.h>
|
||||
#include <netinet/in_pcb.h>
|
||||
#include <netinet/in_systm.h>
|
||||
#include <netinet/in_var.h>
|
||||
|
||||
#include <dev/cxgb/cxgb_osdep.h>
|
||||
#include <dev/cxgb/sys/mbufq.h>
|
||||
|
||||
#include <netinet/in_pcb.h>
|
||||
#include <netinet/tcp.h>
|
||||
#include <netinet/tcp_var.h>
|
||||
#include <netinet/tcp_ofld.h>
|
||||
#include <netinet/tcp_fsm.h>
|
||||
#include <net/route.h>
|
||||
|
||||
#include <dev/cxgb/t3cdev.h>
|
||||
#include <dev/cxgb/common/cxgb_firmware_exports.h>
|
||||
#include <dev/cxgb/common/cxgb_tcb.h>
|
||||
#include <dev/cxgb/cxgb_include.h>
|
||||
#include <dev/cxgb/common/cxgb_ctl_defs.h>
|
||||
#include <dev/cxgb/common/cxgb_t3_cpl.h>
|
||||
#include <dev/cxgb/cxgb_offload.h>
|
||||
#include <dev/cxgb/cxgb_l2t.h>
|
||||
#include <dev/cxgb/ulp/toecore/cxgb_toedev.h>
|
||||
#include <dev/cxgb/ulp/tom/cxgb_tom.h>
|
||||
#include <dev/cxgb/ulp/tom/cxgb_defs.h>
|
||||
#include <dev/cxgb/ulp/tom/cxgb_t3_ddp.h>
|
||||
#include <dev/cxgb/ulp/tom/cxgb_toepcb.h>
|
||||
|
||||
static int activated = 1;
|
||||
TUNABLE_INT("hw.t3toe.activated", &activated);
|
||||
SYSCTL_NODE(_hw, OID_AUTO, t3toe, CTLFLAG_RD, 0, "T3 toe driver parameters");
|
||||
SYSCTL_UINT(_hw_t3toe, OID_AUTO, activated, CTLFLAG_RDTUN, &activated, 0,
|
||||
"enable TOE at init time");
|
||||
|
||||
static TAILQ_HEAD(, tom_data) cxgb_list;
|
||||
static struct mtx cxgb_list_lock;
|
||||
|
||||
static int t3_toe_attach(struct toedev *dev, const struct offload_id *entry);
|
||||
/*
|
||||
* Handlers for each CPL opcode
|
||||
*/
|
||||
static cxgb_cpl_handler_func tom_cpl_handlers[NUM_CPL_CMDS];
|
||||
|
||||
static eventhandler_tag listen_tag;
|
||||
|
||||
static struct offload_id t3_toe_id_tab[] = {
|
||||
{ TOE_ID_CHELSIO_T3, 0 },
|
||||
{ TOE_ID_CHELSIO_T3B, 0 },
|
||||
{ 0 }
|
||||
};
|
||||
|
||||
static struct tom_info t3_tom_info = {
|
||||
.ti_attach = t3_toe_attach,
|
||||
.ti_id_table = t3_toe_id_tab,
|
||||
.ti_name = "Chelsio-T3"
|
||||
};
|
||||
|
||||
struct cxgb_client t3c_tom_client = {
|
||||
.name = "tom_cxgb3",
|
||||
.remove = NULL,
|
||||
.handlers = tom_cpl_handlers,
|
||||
.redirect = NULL
|
||||
};
|
||||
|
||||
/*
|
||||
* Add an skb to the deferred skb queue for processing from process context.
|
||||
*/
|
||||
void
|
||||
t3_defer_reply(struct mbuf *m, struct toedev *dev, defer_handler_t handler)
|
||||
{
|
||||
struct tom_data *td = TOM_DATA(dev);
|
||||
|
||||
m_set_handler(m, handler);
|
||||
mtx_lock(&td->deferq.lock);
|
||||
|
||||
mbufq_tail(&td->deferq, m);
|
||||
if (mbufq_len(&td->deferq) == 1)
|
||||
taskqueue_enqueue(td->tq, &td->deferq_task);
|
||||
mtx_lock(&td->deferq.lock);
|
||||
}
|
||||
|
||||
struct toepcb *
|
||||
toepcb_alloc(void)
|
||||
{
|
||||
struct toepcb *toep;
|
||||
|
||||
toep = malloc(sizeof(struct toepcb), M_DEVBUF, M_NOWAIT);
|
||||
|
||||
if (toep == NULL)
|
||||
return (NULL);
|
||||
|
||||
toepcb_init(toep);
|
||||
return (toep);
|
||||
}
|
||||
|
||||
void
|
||||
toepcb_init(struct toepcb *toep)
|
||||
{
|
||||
bzero(toep, sizeof(*toep));
|
||||
toep->tp_refcount = 1;
|
||||
}
|
||||
|
||||
void
|
||||
toepcb_hold(struct toepcb *toep)
|
||||
{
|
||||
atomic_add_acq_int(&toep->tp_refcount, 1);
|
||||
}
|
||||
|
||||
void
|
||||
toepcb_release(struct toepcb *toep)
|
||||
{
|
||||
if (toep->tp_refcount == 1) {
|
||||
printf("doing final toepcb free\n");
|
||||
|
||||
free(toep, M_DEVBUF);
|
||||
return;
|
||||
}
|
||||
|
||||
atomic_add_acq_int(&toep->tp_refcount, -1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Add a T3 offload device to the list of devices we are managing.
|
||||
*/
|
||||
static void
|
||||
t3cdev_add(struct tom_data *t)
|
||||
{
|
||||
mtx_lock(&cxgb_list_lock);
|
||||
TAILQ_INSERT_TAIL(&cxgb_list, t, entry);
|
||||
mtx_unlock(&cxgb_list_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a TOM data structure,
|
||||
* initialize its cpl_handlers
|
||||
* and register it as a T3C client
|
||||
*/
|
||||
static void t3c_tom_add(struct t3cdev *cdev)
|
||||
{
|
||||
int i;
|
||||
unsigned int wr_len;
|
||||
struct tom_data *t;
|
||||
struct toedev *tdev;
|
||||
struct adap_ports *port_info;
|
||||
|
||||
t = malloc(sizeof(*t), M_CXGB, M_NOWAIT|M_ZERO);
|
||||
|
||||
if (!t)
|
||||
return;
|
||||
|
||||
if (cdev->ctl(cdev, GET_WR_LEN, &wr_len) < 0)
|
||||
goto out_free_tom;
|
||||
|
||||
port_info = malloc(sizeof(*port_info), M_CXGB, M_NOWAIT|M_ZERO);
|
||||
if (!port_info)
|
||||
goto out_free_tom;
|
||||
|
||||
if (cdev->ctl(cdev, GET_PORTS, port_info) < 0)
|
||||
goto out_free_all;
|
||||
|
||||
t3_init_wr_tab(wr_len);
|
||||
t->cdev = cdev;
|
||||
t->client = &t3c_tom_client;
|
||||
|
||||
/* Register TCP offload device */
|
||||
tdev = &t->tdev;
|
||||
tdev->tod_ttid = (cdev->type == T3A ?
|
||||
TOE_ID_CHELSIO_T3 : TOE_ID_CHELSIO_T3B);
|
||||
tdev->tod_lldev = cdev->lldev;
|
||||
|
||||
if (register_toedev(tdev, "toe%d")) {
|
||||
printf("unable to register offload device");
|
||||
goto out_free_all;
|
||||
}
|
||||
TOM_DATA(tdev) = t;
|
||||
|
||||
for (i = 0; i < port_info->nports; i++) {
|
||||
struct ifnet *ifp = port_info->lldevs[i];
|
||||
TOEDEV(ifp) = tdev;
|
||||
|
||||
ifp->if_capabilities |= IFCAP_TOE;
|
||||
}
|
||||
t->ports = port_info;
|
||||
|
||||
/* Add device to the list of offload devices */
|
||||
t3cdev_add(t);
|
||||
|
||||
/* Activate TCP offload device */
|
||||
activate_offload(tdev);
|
||||
return;
|
||||
|
||||
out_free_all:
|
||||
free(port_info, M_CXGB);
|
||||
out_free_tom:
|
||||
free(t, M_CXGB);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Process a received packet with an unknown/unexpected CPL opcode.
|
||||
*/
|
||||
static int
|
||||
do_bad_cpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
|
||||
{
|
||||
log(LOG_ERR, "%s: received bad CPL command %u\n", cdev->name,
|
||||
*mtod(m, unsigned int *));
|
||||
|
||||
return (CPL_RET_BUF_DONE | CPL_RET_BAD_MSG);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Add a new handler to the CPL dispatch table. A NULL handler may be supplied
|
||||
* to unregister an existing handler.
|
||||
*/
|
||||
void
|
||||
t3tom_register_cpl_handler(unsigned int opcode, cxgb_cpl_handler_func h)
|
||||
{
|
||||
if (opcode < NUM_CPL_CMDS)
|
||||
tom_cpl_handlers[opcode] = h ? h : do_bad_cpl;
|
||||
else
|
||||
log(LOG_ERR, "Chelsio T3 TOM: handler registration for "
|
||||
"opcode %u failed\n", opcode);
|
||||
}
|
||||
|
||||
/*
|
||||
* Make a preliminary determination if a connection can be offloaded. It's OK
|
||||
* to fail the offload later if we say we can offload here. For now this
|
||||
* always accepts the offload request unless there are IP options.
|
||||
*/
|
||||
static int
|
||||
can_offload(struct toedev *dev, struct socket *so)
|
||||
{
|
||||
struct tom_data *tomd = TOM_DATA(dev);
|
||||
struct t3cdev *cdev = T3CDEV(dev->tod_lldev);
|
||||
struct tid_info *t = &(T3C_DATA(cdev))->tid_maps;
|
||||
|
||||
return sotoinpcb(so)->inp_depend4.inp4_options == NULL &&
|
||||
tomd->conf.activated &&
|
||||
(tomd->conf.max_conn < 0 ||
|
||||
atomic_load_acq_int(&t->tids_in_use) + t->atids_in_use < tomd->conf.max_conn);
|
||||
}
|
||||
|
||||
|
||||
static int tom_ctl(struct toedev *dev, unsigned int req, void *data)
|
||||
{
|
||||
struct tom_data *t = TOM_DATA(dev);
|
||||
struct t3cdev *cdev = t->cdev;
|
||||
|
||||
if (cdev->ctl)
|
||||
return cdev->ctl(cdev, req, data);
|
||||
|
||||
return (EOPNOTSUPP);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the CPL dispatch table.
|
||||
*/
|
||||
static void
|
||||
init_cpl_handlers(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NUM_CPL_CMDS; ++i)
|
||||
tom_cpl_handlers[i] = do_bad_cpl;
|
||||
|
||||
t3_init_listen_cpl_handlers();
|
||||
}
|
||||
|
||||
static int
|
||||
t3_toe_attach(struct toedev *dev, const struct offload_id *entry)
|
||||
{
|
||||
struct tom_data *t = TOM_DATA(dev);
|
||||
struct t3cdev *cdev = t->cdev;
|
||||
struct ddp_params ddp;
|
||||
struct ofld_page_info rx_page_info;
|
||||
int err;
|
||||
|
||||
#if 0
|
||||
skb_queue_head_init(&t->deferq);
|
||||
T3_INIT_WORK(&t->deferq_task, process_deferq, t);
|
||||
spin_lock_init(&t->listen_lock);
|
||||
#endif
|
||||
t3_init_tunables(t);
|
||||
mtx_init(&t->listen_lock, "tom data listeners", NULL, MTX_DEF);
|
||||
|
||||
/* Adjust TOE activation for this module */
|
||||
t->conf.activated = activated;
|
||||
|
||||
dev->tod_can_offload = can_offload;
|
||||
dev->tod_connect = t3_connect;
|
||||
dev->tod_ctl = tom_ctl;
|
||||
#if 0
|
||||
#ifndef NETEVENT
|
||||
dev->tod_neigh_update = tom_neigh_update;
|
||||
#endif
|
||||
dev->tod_failover = t3_failover;
|
||||
#endif
|
||||
err = cdev->ctl(cdev, GET_DDP_PARAMS, &ddp);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = cdev->ctl(cdev, GET_RX_PAGE_INFO, &rx_page_info);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
t->ddp_llimit = ddp.llimit;
|
||||
t->ddp_ulimit = ddp.ulimit;
|
||||
t->pdev = ddp.pdev;
|
||||
t->rx_page_size = rx_page_info.page_size;
|
||||
#ifdef notyet
|
||||
/* OK if this fails, we just can't do DDP */
|
||||
t->nppods = (ddp.ulimit + 1 - ddp.llimit) / PPOD_SIZE;
|
||||
t->ppod_map = t3_alloc_mem(t->nppods);
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
spin_lock_init(&t->ppod_map_lock);
|
||||
tom_proc_init(dev);
|
||||
#ifdef CONFIG_SYSCTL
|
||||
t->sysctl = t3_sysctl_register(dev, &t->conf);
|
||||
#endif
|
||||
#endif
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
cxgb_toe_listen(void *unused, int event, struct tcpcb *tp)
|
||||
{
|
||||
struct socket *so = tp->t_inpcb->inp_socket;
|
||||
struct tom_data *p;
|
||||
|
||||
switch (event) {
|
||||
case OFLD_LISTEN_OPEN:
|
||||
case OFLD_LISTEN_CLOSE:
|
||||
mtx_lock(&cxgb_list_lock);
|
||||
TAILQ_FOREACH(p, &cxgb_list, entry) {
|
||||
if (event == OFLD_LISTEN_OPEN)
|
||||
t3_listen_start(&p->tdev, so, p->cdev);
|
||||
else if (tp->t_state == TCPS_LISTEN) {
|
||||
printf("stopping listen on port=%d\n",
|
||||
ntohs(tp->t_inpcb->inp_lport));
|
||||
|
||||
t3_listen_stop(&p->tdev, so, p->cdev);
|
||||
}
|
||||
|
||||
}
|
||||
mtx_unlock(&cxgb_list_lock);
|
||||
break;
|
||||
default:
|
||||
log(LOG_ERR, "unrecognized listen event %d\n", event);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
cxgb_register_listeners(void)
|
||||
{
|
||||
struct inpcb *inp;
|
||||
struct tcpcb *tp;
|
||||
|
||||
INP_INFO_RLOCK(&tcbinfo);
|
||||
LIST_FOREACH(inp, tcbinfo.ipi_listhead, inp_list) {
|
||||
tp = intotcpcb(inp);
|
||||
|
||||
if (tp->t_state == TCPS_LISTEN)
|
||||
cxgb_toe_listen(NULL, OFLD_LISTEN_OPEN, tp);
|
||||
}
|
||||
INP_INFO_RUNLOCK(&tcbinfo);
|
||||
}
|
||||
|
||||
static int
|
||||
t3_tom_init(void)
|
||||
{
|
||||
|
||||
#if 0
|
||||
struct socket *sock;
|
||||
err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
|
||||
if (err < 0) {
|
||||
printk(KERN_ERR "Could not create TCP socket, error %d\n", err);
|
||||
return err;
|
||||
}
|
||||
|
||||
t3_def_state_change = sock->sk->sk_state_change;
|
||||
t3_def_data_ready = sock->sk->sk_data_ready;
|
||||
t3_def_error_report = sock->sk->sk_error_report;
|
||||
sock_release(sock);
|
||||
#endif
|
||||
init_cpl_handlers();
|
||||
if (t3_init_cpl_io() < 0)
|
||||
return -1;
|
||||
t3_init_socket_ops();
|
||||
|
||||
/* Register with the TOE device layer. */
|
||||
|
||||
if (register_tom(&t3_tom_info) != 0) {
|
||||
log(LOG_ERR,
|
||||
"Unable to register Chelsio T3 TCP offload module.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
mtx_init(&cxgb_list_lock, "cxgb tom list", NULL, MTX_DEF);
|
||||
listen_tag = EVENTHANDLER_REGISTER(ofld_listen, cxgb_toe_listen, NULL, EVENTHANDLER_PRI_ANY);
|
||||
TAILQ_INIT(&cxgb_list);
|
||||
|
||||
/* Register to offloading devices */
|
||||
t3c_tom_client.add = t3c_tom_add;
|
||||
cxgb_register_client(&t3c_tom_client);
|
||||
cxgb_register_listeners();
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
t3_tom_load(module_t mod, int cmd, void *arg)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
switch (cmd) {
|
||||
case MOD_LOAD:
|
||||
printf("wheeeeee ...\n");
|
||||
|
||||
t3_tom_init();
|
||||
break;
|
||||
case MOD_QUIESCE:
|
||||
break;
|
||||
case MOD_UNLOAD:
|
||||
printf("uhm, ... unloading isn't really supported for toe\n");
|
||||
break;
|
||||
case MOD_SHUTDOWN:
|
||||
break;
|
||||
default:
|
||||
err = EOPNOTSUPP;
|
||||
break;
|
||||
}
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
static moduledata_t mod_data= {
|
||||
"t3_tom",
|
||||
t3_tom_load,
|
||||
0
|
||||
};
|
||||
MODULE_VERSION(t3_tom, 1);
|
||||
MODULE_DEPEND(t3_tom, toecore, 1, 1, 1);
|
||||
MODULE_DEPEND(t3_tom, if_cxgb, 1, 1, 1);
|
||||
DECLARE_MODULE(t3_tom, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);
|
||||
|
157
sys/dev/cxgb/ulp/tom/cxgb_tom.h
Normal file
157
sys/dev/cxgb/ulp/tom/cxgb_tom.h
Normal file
@ -0,0 +1,157 @@
|
||||
|
||||
/**************************************************************************
|
||||
|
||||
Copyright (c) 2007, Chelsio Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
2. Neither the name of the Chelsio Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
$FreeBSD$
|
||||
|
||||
***************************************************************************/
|
||||
#ifndef CXGB_TOM_H_
|
||||
#define CXGB_TOM_H_
|
||||
#include <sys/protosw.h>
|
||||
|
||||
#define LISTEN_INFO_HASH_SIZE 32
|
||||
|
||||
struct listen_info {
|
||||
struct listen_info *next; /* Link to next entry */
|
||||
struct socket *so; /* The listening socket */
|
||||
unsigned int stid; /* The server TID */
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* TOM tunable parameters. They can be manipulated through sysctl(2) or /proc.
|
||||
*/
|
||||
struct tom_tunables {
|
||||
int max_host_sndbuf; // max host RAM consumed by a sndbuf
|
||||
int tx_hold_thres; // push/pull threshold for non-full TX sk_buffs
|
||||
int max_wrs; // max # of outstanding WRs per connection
|
||||
int rx_credit_thres; // min # of RX credits needed for RX_DATA_ACK
|
||||
int cong_alg; // Congestion control algorithm
|
||||
int mss; // max TX_DATA WR payload size
|
||||
int delack; // delayed ACK control
|
||||
int max_conn; // maximum number of offloaded connections
|
||||
int soft_backlog_limit; // whether the listen backlog limit is soft
|
||||
int ddp; // whether to put new connections in DDP mode
|
||||
int ddp_thres; // min recvmsg size before activating DDP
|
||||
int ddp_copy_limit; // capacity of kernel DDP buffer
|
||||
int ddp_push_wait; // whether blocking DDP waits for PSH flag
|
||||
int ddp_rcvcoalesce; // whether receive coalescing is enabled
|
||||
int zcopy_sosend_enabled; // < is never zcopied
|
||||
int zcopy_sosend_partial_thres; // < is never zcopied
|
||||
int zcopy_sosend_partial_copy; // bytes copied in partial zcopy
|
||||
int zcopy_sosend_thres;// >= are mostly zcopied
|
||||
int zcopy_sosend_copy; // bytes coped in zcopied
|
||||
int zcopy_sosend_ret_pending_dma;// pot. return while pending DMA
|
||||
int activated; // TOE engine activation state
|
||||
};
|
||||
|
||||
struct tom_data {
|
||||
TAILQ_ENTRY(tom_data) entry;
|
||||
|
||||
struct t3cdev *cdev;
|
||||
struct pci_dev *pdev;
|
||||
struct toedev tdev;
|
||||
|
||||
struct cxgb_client *client;
|
||||
struct tom_tunables conf;
|
||||
struct tom_sysctl_table *sysctl;
|
||||
|
||||
/*
|
||||
* The next three locks listen_lock, deferq.lock, and tid_release_lock
|
||||
* are used rarely so we let them potentially share a cacheline.
|
||||
*/
|
||||
|
||||
struct listen_info *listen_hash_tab[LISTEN_INFO_HASH_SIZE];
|
||||
struct mtx listen_lock;
|
||||
|
||||
struct mbuf_head deferq;
|
||||
struct task deferq_task;
|
||||
|
||||
struct socket **tid_release_list;
|
||||
struct mtx tid_release_lock;
|
||||
struct task tid_release_task;
|
||||
|
||||
volatile int tx_dma_pending;
|
||||
|
||||
unsigned int ddp_llimit;
|
||||
unsigned int ddp_ulimit;
|
||||
|
||||
unsigned int rx_page_size;
|
||||
|
||||
u8 *ppod_map;
|
||||
unsigned int nppods;
|
||||
struct mtx ppod_map_lock;
|
||||
|
||||
struct adap_ports *ports;
|
||||
struct taskqueue *tq;
|
||||
};
|
||||
|
||||
|
||||
struct listen_ctx {
|
||||
struct socket *lso;
|
||||
struct tom_data *tom_data;
|
||||
int ulp_mode;
|
||||
LIST_HEAD(, toepcb) synq_head;
|
||||
|
||||
};
|
||||
|
||||
#define TOM_DATA(dev) (*(struct tom_data **)&(dev)->tod_l4opt)
|
||||
#define T3C_DEV(sk) ((TOM_DATA(TOE_DEV(sk)))->cdev)
|
||||
#define TOEP_T3C_DEV(toep) (TOM_DATA(toep->tp_toedev)->cdev)
|
||||
#define TOM_TUNABLE(dev, param) (TOM_DATA(dev)->conf.param)
|
||||
|
||||
#define TP_DATASENT (1 << 0)
|
||||
#define TP_TX_WAIT_IDLE (1 << 1)
|
||||
#define TP_FIN_SENT (1 << 2)
|
||||
#define TP_ABORT_RPL_PENDING (1 << 3)
|
||||
#define TP_ABORT_SHUTDOWN (1 << 4)
|
||||
#define TP_ABORT_RPL_RCVD (1 << 5)
|
||||
#define TP_ABORT_REQ_RCVD (1 << 6)
|
||||
#define TP_CLOSE_CON_REQUESTED (1 << 7)
|
||||
#define TP_SYN_RCVD (1 << 8)
|
||||
#define TP_ESTABLISHED (1 << 9)
|
||||
|
||||
void t3_init_tunables(struct tom_data *t);
|
||||
|
||||
static __inline struct mbuf *
|
||||
m_gethdr_nofail(int len)
|
||||
{
|
||||
struct mbuf *m;
|
||||
|
||||
m = m_gethdr(M_NOWAIT, MT_DATA);
|
||||
if (m == NULL) {
|
||||
panic("implement lowmem cache\n");
|
||||
}
|
||||
|
||||
KASSERT(len < MHLEN, ("requested header size too large for mbuf"));
|
||||
m->m_pkthdr.len = m->m_len = len;
|
||||
return (m);
|
||||
}
|
||||
|
||||
|
||||
#endif
|
106
sys/dev/cxgb/ulp/tom/cxgb_tom_sysctl.c
Normal file
106
sys/dev/cxgb/ulp/tom/cxgb_tom_sysctl.c
Normal file
@ -0,0 +1,106 @@
|
||||
/**************************************************************************
|
||||
|
||||
Copyright (c) 2007, Chelsio Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
2. Neither the name of the Chelsio Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
***************************************************************************/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/fcntl.h>
|
||||
#include <sys/limits.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/mbuf.h>
|
||||
#include <sys/module.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/syslog.h>
|
||||
#include <sys/socketvar.h>
|
||||
|
||||
#include <net/if.h>
|
||||
#include <net/route.h>
|
||||
|
||||
#include <netinet/in.h>
|
||||
#include <netinet/in_pcb.h>
|
||||
#include <netinet/in_systm.h>
|
||||
#include <netinet/in_var.h>
|
||||
|
||||
#include <dev/cxgb/cxgb_osdep.h>
|
||||
#include <dev/cxgb/sys/mbufq.h>
|
||||
|
||||
#include <netinet/tcp.h>
|
||||
#include <netinet/tcp_var.h>
|
||||
#include <netinet/tcp_fsm.h>
|
||||
#include <net/route.h>
|
||||
|
||||
#include <dev/cxgb/t3cdev.h>
|
||||
#include <dev/cxgb/common/cxgb_firmware_exports.h>
|
||||
#include <dev/cxgb/common/cxgb_tcb.h>
|
||||
#include <dev/cxgb/common/cxgb_ctl_defs.h>
|
||||
#include <dev/cxgb/common/cxgb_t3_cpl.h>
|
||||
#include <dev/cxgb/cxgb_offload.h>
|
||||
#include <dev/cxgb/cxgb_l2t.h>
|
||||
#include <dev/cxgb/ulp/toecore/cxgb_toedev.h>
|
||||
#include <dev/cxgb/ulp/tom/cxgb_tom.h>
|
||||
#include <dev/cxgb/ulp/tom/cxgb_defs.h>
|
||||
#include <dev/cxgb/ulp/tom/cxgb_t3_ddp.h>
|
||||
|
||||
static struct tom_tunables default_tunable_vals = {
|
||||
.max_host_sndbuf = 32 * 1024,
|
||||
.tx_hold_thres = 0,
|
||||
.max_wrs = 15,
|
||||
.rx_credit_thres = 15 * 1024,
|
||||
.cong_alg = -1,
|
||||
.mss = 16384,
|
||||
.delack = 1,
|
||||
.max_conn = -1,
|
||||
.soft_backlog_limit = 0,
|
||||
.ddp = 0,
|
||||
.ddp_thres = 14 * 4096,
|
||||
.ddp_copy_limit = 13 * 4096,
|
||||
.ddp_push_wait = 1,
|
||||
.ddp_rcvcoalesce = 0,
|
||||
.zcopy_sosend_enabled = 0,
|
||||
.zcopy_sosend_partial_thres = 40960,
|
||||
.zcopy_sosend_partial_copy = 4096 * 3,
|
||||
.zcopy_sosend_thres = 128 * 1024,
|
||||
.zcopy_sosend_copy = 4096 * 2,
|
||||
.zcopy_sosend_ret_pending_dma = 1,
|
||||
.activated = 1,
|
||||
};
|
||||
|
||||
void t3_init_tunables(struct tom_data *t)
|
||||
{
|
||||
t->conf = default_tunable_vals;
|
||||
|
||||
/* Now apply device specific fixups. */
|
||||
t->conf.mss = T3C_DATA(t->cdev)->tx_max_chunk;
|
||||
t->conf.max_wrs = T3C_DATA(t->cdev)->max_wrs;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user