19905d6dbd
- update some copyrights - add improved support for delayed ack - fix issue with fec
4310 lines
113 KiB
C
4310 lines
113 KiB
C
/**************************************************************************
|
|
|
|
Copyright (c) 2007, Chelsio Inc.
|
|
All rights reserved.
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions are met:
|
|
|
|
1. Redistributions of source code must retain the above copyright notice,
|
|
this list of conditions and the following disclaimer.
|
|
|
|
2. Neither the name of the Chelsio Corporation nor the names of its
|
|
contributors may be used to endorse or promote products derived from
|
|
this software without specific prior written permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
***************************************************************************/
|
|
|
|
#include <sys/cdefs.h>
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/fcntl.h>
|
|
#include <sys/kernel.h>
|
|
#include <sys/limits.h>
|
|
#include <sys/ktr.h>
|
|
#include <sys/lock.h>
|
|
#include <sys/mbuf.h>
|
|
#include <sys/mutex.h>
|
|
#include <sys/socket.h>
|
|
#include <sys/sysctl.h>
|
|
#include <sys/syslog.h>
|
|
#include <sys/socketvar.h>
|
|
#include <sys/protosw.h>
|
|
#include <sys/priv.h>
|
|
|
|
#include <net/if.h>
|
|
#include <net/route.h>
|
|
|
|
#include <netinet/in.h>
|
|
#include <netinet/in_pcb.h>
|
|
#include <netinet/in_systm.h>
|
|
#include <netinet/in_var.h>
|
|
|
|
|
|
#include <dev/cxgb/cxgb_osdep.h>
|
|
#include <dev/cxgb/sys/mbufq.h>
|
|
|
|
#include <netinet/ip.h>
|
|
#include <netinet/tcp_var.h>
|
|
#include <netinet/tcp_fsm.h>
|
|
#include <netinet/tcp_offload.h>
|
|
#include <netinet/tcp_seq.h>
|
|
#include <netinet/tcp_syncache.h>
|
|
#include <netinet/tcp_timer.h>
|
|
#include <net/route.h>
|
|
|
|
#include <dev/cxgb/t3cdev.h>
|
|
#include <dev/cxgb/common/cxgb_firmware_exports.h>
|
|
#include <dev/cxgb/common/cxgb_t3_cpl.h>
|
|
#include <dev/cxgb/common/cxgb_tcb.h>
|
|
#include <dev/cxgb/common/cxgb_ctl_defs.h>
|
|
#include <dev/cxgb/cxgb_l2t.h>
|
|
#include <dev/cxgb/cxgb_offload.h>
|
|
#include <vm/vm.h>
|
|
#include <vm/pmap.h>
|
|
#include <machine/bus.h>
|
|
#include <dev/cxgb/sys/mvec.h>
|
|
#include <dev/cxgb/ulp/toecore/cxgb_toedev.h>
|
|
#include <dev/cxgb/ulp/tom/cxgb_defs.h>
|
|
#include <dev/cxgb/ulp/tom/cxgb_tom.h>
|
|
#include <dev/cxgb/ulp/tom/cxgb_t3_ddp.h>
|
|
#include <dev/cxgb/ulp/tom/cxgb_toepcb.h>
|
|
#include <dev/cxgb/ulp/tom/cxgb_tcp.h>
|
|
|
|
/*
|
|
* For ULP connections HW may add headers, e.g., for digests, that aren't part
|
|
* of the messages sent by the host but that are part of the TCP payload and
|
|
* therefore consume TCP sequence space. Tx connection parameters that
|
|
* operate in TCP sequence space are affected by the HW additions and need to
|
|
* compensate for them to accurately track TCP sequence numbers. This array
|
|
* contains the compensating extra lengths for ULP packets. It is indexed by
|
|
* a packet's ULP submode.
|
|
*/
|
|
const unsigned int t3_ulp_extra_len[] = {0, 4, 4, 8};
|
|
|
|
#ifdef notyet
|
|
/*
|
|
* This sk_buff holds a fake header-only TCP segment that we use whenever we
|
|
* need to exploit SW TCP functionality that expects TCP headers, such as
|
|
* tcp_create_openreq_child(). It's a RO buffer that may be used by multiple
|
|
* CPUs without locking.
|
|
*/
|
|
static struct mbuf *tcphdr_mbuf __read_mostly;
|
|
#endif
|
|
|
|
/*
|
|
* Size of WRs in bytes. Note that we assume all devices we are handling have
|
|
* the same WR size.
|
|
*/
|
|
static unsigned int wrlen __read_mostly;
|
|
|
|
/*
|
|
* The number of WRs needed for an skb depends on the number of page fragments
|
|
* in the skb and whether it has any payload in its main body. This maps the
|
|
* length of the gather list represented by an skb into the # of necessary WRs.
|
|
*/
|
|
static unsigned int mbuf_wrs[TX_MAX_SEGS + 1] __read_mostly;
|
|
|
|
/*
|
|
* Max receive window supported by HW in bytes. Only a small part of it can
|
|
* be set through option0, the rest needs to be set through RX_DATA_ACK.
|
|
*/
|
|
#define MAX_RCV_WND ((1U << 27) - 1)
|
|
|
|
/*
|
|
* Min receive window. We want it to be large enough to accommodate receive
|
|
* coalescing, handle jumbo frames, and not trigger sender SWS avoidance.
|
|
*/
|
|
#define MIN_RCV_WND (24 * 1024U)
|
|
#define SO_TOS(so) ((sotoinpcb(so)->inp_ip_tos >> 2) & M_TOS)
|
|
|
|
#define VALIDATE_SEQ 0
|
|
#define VALIDATE_SOCK(so)
|
|
#define DEBUG_WR 0
|
|
|
|
extern int tcp_do_autorcvbuf;
|
|
extern int tcp_do_autosndbuf;
|
|
extern int tcp_autorcvbuf_max;
|
|
extern int tcp_autosndbuf_max;
|
|
|
|
static void t3_send_reset(struct toepcb *toep);
|
|
static void send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status);
|
|
static inline void free_atid(struct t3cdev *cdev, unsigned int tid);
|
|
static void handle_syncache_event(int event, void *arg);
|
|
|
|
static inline void
|
|
SBAPPEND(struct sockbuf *sb, struct mbuf *n)
|
|
{
|
|
struct mbuf * m;
|
|
|
|
m = sb->sb_mb;
|
|
while (m) {
|
|
KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) ||
|
|
!(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n",
|
|
!!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len));
|
|
KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x",
|
|
m->m_next, m->m_nextpkt, m->m_flags));
|
|
m = m->m_next;
|
|
}
|
|
m = n;
|
|
while (m) {
|
|
KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) ||
|
|
!(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n",
|
|
!!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len));
|
|
KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x",
|
|
m->m_next, m->m_nextpkt, m->m_flags));
|
|
m = m->m_next;
|
|
}
|
|
sbappend_locked(sb, n);
|
|
m = sb->sb_mb;
|
|
while (m) {
|
|
KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x",
|
|
m->m_next, m->m_nextpkt, m->m_flags));
|
|
m = m->m_next;
|
|
}
|
|
}
|
|
|
|
static inline int
|
|
is_t3a(const struct toedev *dev)
|
|
{
|
|
return (dev->tod_ttid == TOE_ID_CHELSIO_T3);
|
|
}
|
|
|
|
static void
|
|
dump_toepcb(struct toepcb *toep)
|
|
{
|
|
DPRINTF("qset_idx=%d qset=%d ulp_mode=%d mtu_idx=%d tid=%d\n",
|
|
toep->tp_qset_idx, toep->tp_qset, toep->tp_ulp_mode,
|
|
toep->tp_mtu_idx, toep->tp_tid);
|
|
|
|
DPRINTF("wr_max=%d wr_avail=%d wr_unacked=%d mss_clamp=%d flags=0x%x\n",
|
|
toep->tp_wr_max, toep->tp_wr_avail, toep->tp_wr_unacked,
|
|
toep->tp_mss_clamp, toep->tp_flags);
|
|
}
|
|
|
|
#ifndef RTALLOC2_DEFINED
|
|
static struct rtentry *
|
|
rtalloc2(struct sockaddr *dst, int report, u_long ignflags)
|
|
{
|
|
struct rtentry *rt = NULL;
|
|
|
|
if ((rt = rtalloc1(dst, report, ignflags)) != NULL)
|
|
RT_UNLOCK(rt);
|
|
|
|
return (rt);
|
|
}
|
|
#endif
|
|
/*
|
|
* Determine whether to send a CPL message now or defer it. A message is
|
|
* deferred if the connection is in SYN_SENT since we don't know the TID yet.
|
|
* For connections in other states the message is sent immediately.
|
|
* If through_l2t is set the message is subject to ARP processing, otherwise
|
|
* it is sent directly.
|
|
*/
|
|
static inline void
|
|
send_or_defer(struct toepcb *toep, struct mbuf *m, int through_l2t)
|
|
{
|
|
struct tcpcb *tp = toep->tp_tp;
|
|
|
|
if (__predict_false(tp->t_state == TCPS_SYN_SENT)) {
|
|
INP_LOCK(tp->t_inpcb);
|
|
mbufq_tail(&toep->out_of_order_queue, m); // defer
|
|
INP_UNLOCK(tp->t_inpcb);
|
|
} else if (through_l2t)
|
|
l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t); // send through L2T
|
|
else
|
|
cxgb_ofld_send(TOEP_T3C_DEV(toep), m); // send directly
|
|
}
|
|
|
|
static inline unsigned int
|
|
mkprio(unsigned int cntrl, const struct toepcb *toep)
|
|
{
|
|
return (cntrl);
|
|
}
|
|
|
|
/*
|
|
* Populate a TID_RELEASE WR. The skb must be already propely sized.
|
|
*/
|
|
static inline void
|
|
mk_tid_release(struct mbuf *m, const struct toepcb *toep, unsigned int tid)
|
|
{
|
|
struct cpl_tid_release *req;
|
|
|
|
m_set_priority(m, mkprio(CPL_PRIORITY_SETUP, toep));
|
|
m->m_pkthdr.len = m->m_len = sizeof(*req);
|
|
req = mtod(m, struct cpl_tid_release *);
|
|
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
|
|
req->wr.wr_lo = 0;
|
|
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid));
|
|
}
|
|
|
|
static inline void
|
|
make_tx_data_wr(struct socket *so, struct mbuf *m, int len, struct mbuf *tail)
|
|
{
|
|
struct tcpcb *tp = sototcpcb(so);
|
|
struct toepcb *toep = tp->t_toe;
|
|
struct tx_data_wr *req;
|
|
|
|
INP_LOCK_ASSERT(tp->t_inpcb);
|
|
|
|
req = mtod(m, struct tx_data_wr *);
|
|
m->m_len = sizeof(*req);
|
|
req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
|
|
req->wr_lo = htonl(V_WR_TID(toep->tp_tid));
|
|
/* len includes the length of any HW ULP additions */
|
|
req->len = htonl(len);
|
|
req->param = htonl(V_TX_PORT(toep->tp_l2t->smt_idx));
|
|
/* V_TX_ULP_SUBMODE sets both the mode and submode */
|
|
req->flags = htonl(V_TX_ULP_SUBMODE(/*skb_ulp_mode(skb)*/ 0) |
|
|
V_TX_URG(/* skb_urgent(skb) */ 0 ) |
|
|
V_TX_SHOVE((!(tp->t_flags & TF_MORETOCOME) &&
|
|
(tail ? 0 : 1))));
|
|
req->sndseq = htonl(tp->snd_nxt);
|
|
if (__predict_false((toep->tp_flags & TP_DATASENT) == 0)) {
|
|
req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT |
|
|
V_TX_CPU_IDX(toep->tp_qset));
|
|
|
|
/* Sendbuffer is in units of 32KB.
|
|
*/
|
|
if (tcp_do_autosndbuf && so->so_snd.sb_flags & SB_AUTOSIZE)
|
|
req->param |= htonl(V_TX_SNDBUF(tcp_autosndbuf_max >> 15));
|
|
else
|
|
req->param |= htonl(V_TX_SNDBUF(so->so_snd.sb_hiwat >> 15));
|
|
toep->tp_flags |= TP_DATASENT;
|
|
}
|
|
}
|
|
|
|
#define IMM_LEN 64 /* XXX - see WR_LEN in the cxgb driver */
|
|
|
|
int
|
|
t3_push_frames(struct socket *so, int req_completion)
|
|
{
|
|
struct tcpcb *tp = sototcpcb(so);
|
|
struct toepcb *toep = tp->t_toe;
|
|
|
|
struct mbuf *tail, *m0, *last;
|
|
struct t3cdev *cdev;
|
|
struct tom_data *d;
|
|
int i, bytes, count, total_bytes;
|
|
bus_dma_segment_t segs[TX_MAX_SEGS], *segp;
|
|
|
|
if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_CLOSED) {
|
|
DPRINTF("tcp state=%d\n", tp->t_state);
|
|
return (0);
|
|
}
|
|
|
|
if (so->so_state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) {
|
|
DPRINTF("disconnecting\n");
|
|
|
|
return (0);
|
|
}
|
|
|
|
|
|
INP_LOCK_ASSERT(tp->t_inpcb);
|
|
SOCKBUF_LOCK(&so->so_snd);
|
|
d = TOM_DATA(TOE_DEV(so));
|
|
cdev = d->cdev;
|
|
last = tail = so->so_snd.sb_sndptr ? so->so_snd.sb_sndptr : so->so_snd.sb_mb;
|
|
total_bytes = 0;
|
|
DPRINTF("wr_avail=%d tail=%p snd.cc=%d tp_last=%p\n",
|
|
toep->tp_wr_avail, tail, so->so_snd.sb_cc, toep->tp_m_last);
|
|
|
|
if (last && toep->tp_m_last == last && so->so_snd.sb_sndptroff != 0) {
|
|
KASSERT(tail, ("sbdrop error"));
|
|
last = tail = tail->m_next;
|
|
}
|
|
|
|
if ((toep->tp_wr_avail == 0 ) || (tail == NULL)) {
|
|
DPRINTF("wr_avail=%d tail=%p\n", toep->tp_wr_avail, tail);
|
|
SOCKBUF_UNLOCK(&so->so_snd);
|
|
return (0);
|
|
}
|
|
|
|
toep->tp_m_last = NULL;
|
|
while (toep->tp_wr_avail && (tail != NULL)) {
|
|
count = bytes = 0;
|
|
segp = segs;
|
|
if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) {
|
|
SOCKBUF_UNLOCK(&so->so_snd);
|
|
return (0);
|
|
}
|
|
/*
|
|
* If the data in tail fits as in-line, then
|
|
* make an immediate data wr.
|
|
*/
|
|
if (tail->m_len <= IMM_LEN) {
|
|
count = 1;
|
|
bytes = tail->m_len;
|
|
last = tail;
|
|
tail = tail->m_next;
|
|
m_set_sgl(m0, NULL);
|
|
m_set_sgllen(m0, 0);
|
|
make_tx_data_wr(so, m0, bytes, tail);
|
|
m_append(m0, bytes, mtod(last, caddr_t));
|
|
KASSERT(!m0->m_next, ("bad append"));
|
|
} else {
|
|
while ((mbuf_wrs[count + 1] <= toep->tp_wr_avail)
|
|
&& (tail != NULL) && (count < TX_MAX_SEGS-1)) {
|
|
bytes += tail->m_len;
|
|
last = tail;
|
|
count++;
|
|
/*
|
|
* technically an abuse to be using this for a VA
|
|
* but less gross than defining my own structure
|
|
* or calling pmap_kextract from here :-|
|
|
*/
|
|
segp->ds_addr = (bus_addr_t)tail->m_data;
|
|
segp->ds_len = tail->m_len;
|
|
DPRINTF("count=%d wr_needed=%d ds_addr=%p ds_len=%d\n",
|
|
count, mbuf_wrs[count], tail->m_data, tail->m_len);
|
|
segp++;
|
|
tail = tail->m_next;
|
|
}
|
|
DPRINTF("wr_avail=%d mbuf_wrs[%d]=%d tail=%p\n",
|
|
toep->tp_wr_avail, count, mbuf_wrs[count], tail);
|
|
|
|
m_set_sgl(m0, segs);
|
|
m_set_sgllen(m0, count);
|
|
make_tx_data_wr(so, m0, bytes, tail);
|
|
}
|
|
m_set_priority(m0, mkprio(CPL_PRIORITY_DATA, toep));
|
|
|
|
if (tail) {
|
|
so->so_snd.sb_sndptr = tail;
|
|
toep->tp_m_last = NULL;
|
|
} else
|
|
toep->tp_m_last = so->so_snd.sb_sndptr = last;
|
|
|
|
|
|
DPRINTF("toep->tp_m_last=%p\n", toep->tp_m_last);
|
|
|
|
so->so_snd.sb_sndptroff += bytes;
|
|
total_bytes += bytes;
|
|
toep->tp_write_seq += bytes;
|
|
CTR6(KTR_TOM, "t3_push_frames: wr_avail=%d mbuf_wrs[%d]=%d tail=%p sndptr=%p sndptroff=%d",
|
|
toep->tp_wr_avail, count, mbuf_wrs[count], tail, so->so_snd.sb_sndptr, so->so_snd.sb_sndptroff);
|
|
if (tail)
|
|
CTR4(KTR_TOM, "t3_push_frames: total_bytes=%d tp_m_last=%p tailbuf=%p snd_una=0x%08x",
|
|
total_bytes, toep->tp_m_last, tail->m_data, tp->snd_una);
|
|
else
|
|
CTR3(KTR_TOM, "t3_push_frames: total_bytes=%d tp_m_last=%p snd_una=0x%08x",
|
|
total_bytes, toep->tp_m_last, tp->snd_una);
|
|
|
|
|
|
i = 0;
|
|
while (i < count && m_get_sgllen(m0)) {
|
|
if ((count - i) >= 3) {
|
|
CTR6(KTR_TOM,
|
|
"t3_push_frames: pa=0x%zx len=%d pa=0x%zx len=%d pa=0x%zx len=%d",
|
|
segs[i].ds_addr, segs[i].ds_len, segs[i + 1].ds_addr, segs[i + 1].ds_len,
|
|
segs[i + 2].ds_addr, segs[i + 2].ds_len);
|
|
i += 3;
|
|
} else if ((count - i) == 2) {
|
|
CTR4(KTR_TOM,
|
|
"t3_push_frames: pa=0x%zx len=%d pa=0x%zx len=%d",
|
|
segs[i].ds_addr, segs[i].ds_len, segs[i + 1].ds_addr, segs[i + 1].ds_len);
|
|
i += 2;
|
|
} else {
|
|
CTR2(KTR_TOM, "t3_push_frames: pa=0x%zx len=%d",
|
|
segs[i].ds_addr, segs[i].ds_len);
|
|
i++;
|
|
}
|
|
|
|
}
|
|
|
|
/*
|
|
* remember credits used
|
|
*/
|
|
m0->m_pkthdr.csum_data = mbuf_wrs[count];
|
|
m0->m_pkthdr.len = bytes;
|
|
toep->tp_wr_avail -= mbuf_wrs[count];
|
|
toep->tp_wr_unacked += mbuf_wrs[count];
|
|
|
|
if ((req_completion && toep->tp_wr_unacked == mbuf_wrs[count]) ||
|
|
toep->tp_wr_unacked >= toep->tp_wr_max / 2) {
|
|
struct work_request_hdr *wr = cplhdr(m0);
|
|
|
|
wr->wr_hi |= htonl(F_WR_COMPL);
|
|
toep->tp_wr_unacked = 0;
|
|
}
|
|
KASSERT((m0->m_pkthdr.csum_data > 0) &&
|
|
(m0->m_pkthdr.csum_data <= 4), ("bad credit count %d",
|
|
m0->m_pkthdr.csum_data));
|
|
m0->m_type = MT_DONTFREE;
|
|
enqueue_wr(toep, m0);
|
|
DPRINTF("sending offload tx with %d bytes in %d segments\n",
|
|
bytes, count);
|
|
l2t_send(cdev, m0, toep->tp_l2t);
|
|
}
|
|
SOCKBUF_UNLOCK(&so->so_snd);
|
|
return (total_bytes);
|
|
}
|
|
|
|
/*
|
|
* Close a connection by sending a CPL_CLOSE_CON_REQ message. Cannot fail
|
|
* under any circumstances. We take the easy way out and always queue the
|
|
* message to the write_queue. We can optimize the case where the queue is
|
|
* already empty though the optimization is probably not worth it.
|
|
*/
|
|
static void
|
|
close_conn(struct socket *so)
|
|
{
|
|
struct mbuf *m;
|
|
struct cpl_close_con_req *req;
|
|
struct tom_data *d;
|
|
struct inpcb *inp = sotoinpcb(so);
|
|
struct tcpcb *tp;
|
|
struct toepcb *toep;
|
|
unsigned int tid;
|
|
|
|
|
|
INP_LOCK(inp);
|
|
tp = sototcpcb(so);
|
|
toep = tp->t_toe;
|
|
|
|
if (tp->t_state != TCPS_SYN_SENT)
|
|
t3_push_frames(so, 1);
|
|
|
|
if (toep->tp_flags & TP_FIN_SENT) {
|
|
INP_UNLOCK(inp);
|
|
return;
|
|
}
|
|
|
|
tid = toep->tp_tid;
|
|
|
|
d = TOM_DATA(toep->tp_toedev);
|
|
|
|
m = m_gethdr_nofail(sizeof(*req));
|
|
|
|
toep->tp_flags |= TP_FIN_SENT;
|
|
req = mtod(m, struct cpl_close_con_req *);
|
|
|
|
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON));
|
|
req->wr.wr_lo = htonl(V_WR_TID(tid));
|
|
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid));
|
|
req->rsvd = htonl(toep->tp_write_seq);
|
|
INP_UNLOCK(inp);
|
|
/*
|
|
* XXX - need to defer shutdown while there is still data in the queue
|
|
*
|
|
*/
|
|
cxgb_ofld_send(d->cdev, m);
|
|
|
|
}
|
|
|
|
/*
|
|
* Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant
|
|
* and send it along.
|
|
*/
|
|
static void
|
|
abort_arp_failure(struct t3cdev *cdev, struct mbuf *m)
|
|
{
|
|
struct cpl_abort_req *req = cplhdr(m);
|
|
|
|
req->cmd = CPL_ABORT_NO_RST;
|
|
cxgb_ofld_send(cdev, m);
|
|
}
|
|
|
|
/*
|
|
* Send RX credits through an RX_DATA_ACK CPL message. If nofail is 0 we are
|
|
* permitted to return without sending the message in case we cannot allocate
|
|
* an sk_buff. Returns the number of credits sent.
|
|
*/
|
|
uint32_t
|
|
t3_send_rx_credits(struct tcpcb *tp, uint32_t credits, uint32_t dack, int nofail)
|
|
{
|
|
struct mbuf *m;
|
|
struct cpl_rx_data_ack *req;
|
|
struct toepcb *toep = tp->t_toe;
|
|
struct toedev *tdev = toep->tp_toedev;
|
|
|
|
m = m_gethdr_nofail(sizeof(*req));
|
|
|
|
DPRINTF("returning %u credits to HW\n", credits);
|
|
|
|
req = mtod(m, struct cpl_rx_data_ack *);
|
|
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
|
|
req->wr.wr_lo = 0;
|
|
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid));
|
|
req->credit_dack = htonl(dack | V_RX_CREDITS(credits));
|
|
m_set_priority(m, mkprio(CPL_PRIORITY_ACK, toep));
|
|
cxgb_ofld_send(TOM_DATA(tdev)->cdev, m);
|
|
return (credits);
|
|
}
|
|
|
|
/*
|
|
* Send RX_DATA_ACK CPL message to request a modulation timer to be scheduled.
|
|
* This is only used in DDP mode, so we take the opportunity to also set the
|
|
* DACK mode and flush any Rx credits.
|
|
*/
|
|
void
|
|
t3_send_rx_modulate(struct toepcb *toep)
|
|
{
|
|
struct mbuf *m;
|
|
struct cpl_rx_data_ack *req;
|
|
|
|
m = m_gethdr_nofail(sizeof(*req));
|
|
|
|
req = mtod(m, struct cpl_rx_data_ack *);
|
|
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
|
|
req->wr.wr_lo = 0;
|
|
m->m_pkthdr.len = m->m_len = sizeof(*req);
|
|
|
|
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid));
|
|
req->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE |
|
|
V_RX_DACK_MODE(1) |
|
|
V_RX_CREDITS(toep->tp_copied_seq - toep->tp_rcv_wup));
|
|
m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
|
|
cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
|
|
toep->tp_rcv_wup = toep->tp_copied_seq;
|
|
}
|
|
|
|
/*
|
|
* Handle receipt of an urgent pointer.
|
|
*/
|
|
static void
|
|
handle_urg_ptr(struct socket *so, uint32_t urg_seq)
|
|
{
|
|
#ifdef URGENT_DATA_SUPPORTED
|
|
struct tcpcb *tp = sototcpcb(so);
|
|
|
|
urg_seq--; /* initially points past the urgent data, per BSD */
|
|
|
|
if (tp->urg_data && !after(urg_seq, tp->urg_seq))
|
|
return; /* duplicate pointer */
|
|
sk_send_sigurg(sk);
|
|
if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
|
|
!sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
|
|
struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
|
|
|
|
tp->copied_seq++;
|
|
if (skb && tp->copied_seq - TCP_SKB_CB(skb)->seq >= skb->len)
|
|
tom_eat_skb(sk, skb, 0);
|
|
}
|
|
tp->urg_data = TCP_URG_NOTYET;
|
|
tp->urg_seq = urg_seq;
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* Returns true if a socket cannot accept new Rx data.
|
|
*/
|
|
static inline int
|
|
so_no_receive(const struct socket *so)
|
|
{
|
|
return (so->so_state & (SS_ISDISCONNECTED|SS_ISDISCONNECTING));
|
|
}
|
|
|
|
/*
|
|
* Process an urgent data notification.
|
|
*/
|
|
static void
|
|
rx_urg_notify(struct toepcb *toep, struct mbuf *m)
|
|
{
|
|
struct cpl_rx_urg_notify *hdr = cplhdr(m);
|
|
struct socket *so = toeptoso(toep);
|
|
|
|
VALIDATE_SOCK(so);
|
|
|
|
if (!so_no_receive(so))
|
|
handle_urg_ptr(so, ntohl(hdr->seq));
|
|
|
|
m_freem(m);
|
|
}
|
|
|
|
/*
|
|
* Handler for RX_URG_NOTIFY CPL messages.
|
|
*/
|
|
static int
|
|
do_rx_urg_notify(struct t3cdev *cdev, struct mbuf *m, void *ctx)
|
|
{
|
|
struct toepcb *toep = (struct toepcb *)ctx;
|
|
|
|
rx_urg_notify(toep, m);
|
|
return (0);
|
|
}
|
|
|
|
static __inline int
|
|
is_delack_mode_valid(struct toedev *dev, struct toepcb *toep)
|
|
{
|
|
return (toep->tp_ulp_mode ||
|
|
(toep->tp_ulp_mode == ULP_MODE_TCPDDP &&
|
|
dev->tod_ttid >= TOE_ID_CHELSIO_T3));
|
|
}
|
|
|
|
/*
|
|
* Set of states for which we should return RX credits.
|
|
*/
|
|
#define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2)
|
|
|
|
/*
|
|
* Called after some received data has been read. It returns RX credits
|
|
* to the HW for the amount of data processed.
|
|
*/
|
|
void
|
|
t3_cleanup_rbuf(struct tcpcb *tp, int copied)
|
|
{
|
|
struct toepcb *toep = tp->t_toe;
|
|
struct socket *so;
|
|
struct toedev *dev;
|
|
int dack_mode, must_send, read;
|
|
u32 thres, credits, dack = 0;
|
|
|
|
so = tp->t_inpcb->inp_socket;
|
|
if (!((tp->t_state == TCPS_ESTABLISHED) || (tp->t_state == TCPS_FIN_WAIT_1) ||
|
|
(tp->t_state == TCPS_FIN_WAIT_2))) {
|
|
if (copied) {
|
|
SOCKBUF_LOCK(&so->so_rcv);
|
|
toep->tp_copied_seq += copied;
|
|
SOCKBUF_UNLOCK(&so->so_rcv);
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
INP_LOCK_ASSERT(tp->t_inpcb);
|
|
SOCKBUF_LOCK(&so->so_rcv);
|
|
if (copied)
|
|
toep->tp_copied_seq += copied;
|
|
else {
|
|
read = toep->tp_enqueued_bytes - so->so_rcv.sb_cc;
|
|
toep->tp_copied_seq += read;
|
|
}
|
|
credits = toep->tp_copied_seq - toep->tp_rcv_wup;
|
|
toep->tp_enqueued_bytes = so->so_rcv.sb_cc;
|
|
SOCKBUF_UNLOCK(&so->so_rcv);
|
|
|
|
if (credits > so->so_rcv.sb_mbmax) {
|
|
printf("copied_seq=%u rcv_wup=%u credits=%u\n",
|
|
toep->tp_copied_seq, toep->tp_rcv_wup, credits);
|
|
credits = so->so_rcv.sb_mbmax;
|
|
}
|
|
|
|
|
|
/*
|
|
* XXX this won't accurately reflect credit return - we need
|
|
* to look at the difference between the amount that has been
|
|
* put in the recv sockbuf and what is there now
|
|
*/
|
|
|
|
if (__predict_false(!credits))
|
|
return;
|
|
|
|
dev = toep->tp_toedev;
|
|
thres = TOM_TUNABLE(dev, rx_credit_thres);
|
|
|
|
if (__predict_false(thres == 0))
|
|
return;
|
|
|
|
if (is_delack_mode_valid(dev, toep)) {
|
|
dack_mode = TOM_TUNABLE(dev, delack);
|
|
if (__predict_false(dack_mode != toep->tp_delack_mode)) {
|
|
u32 r = tp->rcv_nxt - toep->tp_delack_seq;
|
|
|
|
if (r >= tp->rcv_wnd || r >= 16 * toep->tp_mss_clamp)
|
|
dack = F_RX_DACK_CHANGE |
|
|
V_RX_DACK_MODE(dack_mode);
|
|
}
|
|
} else
|
|
dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1);
|
|
|
|
/*
|
|
* For coalescing to work effectively ensure the receive window has
|
|
* at least 16KB left.
|
|
*/
|
|
must_send = credits + 16384 >= tp->rcv_wnd;
|
|
|
|
if (must_send || credits >= thres)
|
|
toep->tp_rcv_wup += t3_send_rx_credits(tp, credits, dack, must_send);
|
|
}
|
|
|
|
static int
|
|
cxgb_toe_disconnect(struct tcpcb *tp)
|
|
{
|
|
struct socket *so;
|
|
|
|
DPRINTF("cxgb_toe_disconnect\n");
|
|
|
|
so = tp->t_inpcb->inp_socket;
|
|
close_conn(so);
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
cxgb_toe_reset(struct tcpcb *tp)
|
|
{
|
|
struct toepcb *toep = tp->t_toe;
|
|
|
|
|
|
t3_send_reset(toep);
|
|
|
|
/*
|
|
* unhook from socket
|
|
*/
|
|
tp->t_flags &= ~TF_TOE;
|
|
toep->tp_tp = NULL;
|
|
tp->t_toe = NULL;
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
cxgb_toe_send(struct tcpcb *tp)
|
|
{
|
|
struct socket *so;
|
|
|
|
DPRINTF("cxgb_toe_send\n");
|
|
dump_toepcb(tp->t_toe);
|
|
|
|
so = tp->t_inpcb->inp_socket;
|
|
t3_push_frames(so, 1);
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
cxgb_toe_rcvd(struct tcpcb *tp)
|
|
{
|
|
INP_LOCK_ASSERT(tp->t_inpcb);
|
|
t3_cleanup_rbuf(tp, 0);
|
|
|
|
return (0);
|
|
}
|
|
|
|
static void
|
|
cxgb_toe_detach(struct tcpcb *tp)
|
|
{
|
|
struct toepcb *toep;
|
|
/*
|
|
* XXX how do we handle teardown in the SYN_SENT state?
|
|
*
|
|
*/
|
|
INP_INFO_WLOCK(&tcbinfo);
|
|
toep = tp->t_toe;
|
|
toep->tp_tp = NULL;
|
|
|
|
/*
|
|
* unhook from socket
|
|
*/
|
|
tp->t_flags &= ~TF_TOE;
|
|
tp->t_toe = NULL;
|
|
INP_INFO_WUNLOCK(&tcbinfo);
|
|
}
|
|
|
|
|
|
static struct toe_usrreqs cxgb_toe_usrreqs = {
|
|
.tu_disconnect = cxgb_toe_disconnect,
|
|
.tu_reset = cxgb_toe_reset,
|
|
.tu_send = cxgb_toe_send,
|
|
.tu_rcvd = cxgb_toe_rcvd,
|
|
.tu_detach = cxgb_toe_detach,
|
|
.tu_detach = cxgb_toe_detach,
|
|
.tu_syncache_event = handle_syncache_event,
|
|
};
|
|
|
|
|
|
static void
|
|
__set_tcb_field(struct toepcb *toep, struct mbuf *m, uint16_t word,
|
|
uint64_t mask, uint64_t val, int no_reply)
|
|
{
|
|
struct cpl_set_tcb_field *req;
|
|
|
|
CTR4(KTR_TCB, "__set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx",
|
|
toep->tp_tid, word, mask, val);
|
|
|
|
req = mtod(m, struct cpl_set_tcb_field *);
|
|
m->m_pkthdr.len = m->m_len = sizeof(*req);
|
|
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
|
|
req->wr.wr_lo = 0;
|
|
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, toep->tp_tid));
|
|
req->reply = V_NO_REPLY(no_reply);
|
|
req->cpu_idx = 0;
|
|
req->word = htons(word);
|
|
req->mask = htobe64(mask);
|
|
req->val = htobe64(val);
|
|
|
|
m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
|
|
send_or_defer(toep, m, 0);
|
|
}
|
|
|
|
static void
|
|
t3_set_tcb_field(struct socket *so, uint16_t word, uint64_t mask, uint64_t val)
|
|
{
|
|
struct mbuf *m;
|
|
struct tcpcb *tp = sototcpcb(so);
|
|
struct toepcb *toep = tp->t_toe;
|
|
|
|
if (toep == NULL)
|
|
return;
|
|
|
|
if (tp->t_state == TCPS_CLOSED || (toep->tp_flags & TP_ABORT_SHUTDOWN)) {
|
|
printf("not seting field\n");
|
|
return;
|
|
}
|
|
|
|
m = m_gethdr_nofail(sizeof(struct cpl_set_tcb_field));
|
|
|
|
__set_tcb_field(toep, m, word, mask, val, 1);
|
|
}
|
|
|
|
/*
|
|
* Set one of the t_flags bits in the TCB.
|
|
*/
|
|
static void
|
|
set_tcb_tflag(struct socket *so, unsigned int bit_pos, int val)
|
|
{
|
|
t3_set_tcb_field(so, W_TCB_T_FLAGS1, 1ULL << bit_pos, val << bit_pos);
|
|
}
|
|
|
|
/*
|
|
* Send a SET_TCB_FIELD CPL message to change a connection's Nagle setting.
|
|
*/
|
|
static void
|
|
t3_set_nagle(struct socket *so)
|
|
{
|
|
struct tcpcb *tp = sototcpcb(so);
|
|
|
|
set_tcb_tflag(so, S_TF_NAGLE, !(tp->t_flags & TF_NODELAY));
|
|
}
|
|
|
|
/*
|
|
* Send a SET_TCB_FIELD CPL message to change a connection's keepalive setting.
|
|
*/
|
|
void
|
|
t3_set_keepalive(struct socket *so, int on_off)
|
|
{
|
|
set_tcb_tflag(so, S_TF_KEEPALIVE, on_off);
|
|
}
|
|
|
|
void
|
|
t3_set_rcv_coalesce_enable(struct socket *so, int on_off)
|
|
{
|
|
set_tcb_tflag(so, S_TF_RCV_COALESCE_ENABLE, on_off);
|
|
}
|
|
|
|
void
|
|
t3_set_dack_mss(struct socket *so, int on_off)
|
|
{
|
|
set_tcb_tflag(so, S_TF_DACK_MSS, on_off);
|
|
}
|
|
|
|
/*
|
|
* Send a SET_TCB_FIELD CPL message to change a connection's TOS setting.
|
|
*/
|
|
static void
|
|
t3_set_tos(struct socket *so)
|
|
{
|
|
t3_set_tcb_field(so, W_TCB_TOS, V_TCB_TOS(M_TCB_TOS),
|
|
V_TCB_TOS(SO_TOS(so)));
|
|
}
|
|
|
|
|
|
/*
|
|
* In DDP mode, TP fails to schedule a timer to push RX data to the host when
|
|
* DDP is disabled (data is delivered to freelist). [Note that, the peer should
|
|
* set the PSH bit in the last segment, which would trigger delivery.]
|
|
* We work around the issue by setting a DDP buffer in a partial placed state,
|
|
* which guarantees that TP will schedule a timer.
|
|
*/
|
|
#define TP_DDP_TIMER_WORKAROUND_MASK\
|
|
(V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1) |\
|
|
((V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |\
|
|
V_TCB_RX_DDP_BUF0_LEN(3)) << 32))
|
|
#define TP_DDP_TIMER_WORKAROUND_VAL\
|
|
(V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(0) |\
|
|
((V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)1) | V_TCB_RX_DDP_BUF0_LEN((uint64_t)2)) <<\
|
|
32))
|
|
|
|
static void
|
|
t3_enable_ddp(struct socket *so, int on)
|
|
{
|
|
if (on) {
|
|
|
|
t3_set_tcb_field(so, W_TCB_RX_DDP_FLAGS, V_TF_DDP_OFF(1),
|
|
V_TF_DDP_OFF(0));
|
|
} else
|
|
t3_set_tcb_field(so, W_TCB_RX_DDP_FLAGS,
|
|
V_TF_DDP_OFF(1) |
|
|
TP_DDP_TIMER_WORKAROUND_MASK,
|
|
V_TF_DDP_OFF(1) |
|
|
TP_DDP_TIMER_WORKAROUND_VAL);
|
|
|
|
}
|
|
|
|
void
|
|
t3_set_ddp_tag(struct socket *so, int buf_idx, unsigned int tag_color)
|
|
{
|
|
t3_set_tcb_field(so, W_TCB_RX_DDP_BUF0_TAG + buf_idx,
|
|
V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG),
|
|
tag_color);
|
|
}
|
|
|
|
void
|
|
t3_set_ddp_buf(struct socket *so, int buf_idx, unsigned int offset,
|
|
unsigned int len)
|
|
{
|
|
if (buf_idx == 0)
|
|
t3_set_tcb_field(so, W_TCB_RX_DDP_BUF0_OFFSET,
|
|
V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |
|
|
V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN),
|
|
V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset) |
|
|
V_TCB_RX_DDP_BUF0_LEN((uint64_t)len));
|
|
else
|
|
t3_set_tcb_field(so, W_TCB_RX_DDP_BUF1_OFFSET,
|
|
V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) |
|
|
V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN << 32),
|
|
V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset) |
|
|
V_TCB_RX_DDP_BUF1_LEN(((uint64_t)len) << 32));
|
|
}
|
|
|
|
static int
|
|
t3_set_cong_control(struct socket *so, const char *name)
|
|
{
|
|
#ifdef CONGESTION_CONTROL_SUPPORTED
|
|
int cong_algo;
|
|
|
|
for (cong_algo = 0; cong_algo < ARRAY_SIZE(t3_cong_ops); cong_algo++)
|
|
if (!strcmp(name, t3_cong_ops[cong_algo].name))
|
|
break;
|
|
|
|
if (cong_algo >= ARRAY_SIZE(t3_cong_ops))
|
|
return -EINVAL;
|
|
#endif
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
t3_get_tcb(struct socket *so)
|
|
{
|
|
struct cpl_get_tcb *req;
|
|
struct tcpcb *tp = sototcpcb(so);
|
|
struct toepcb *toep = tp->t_toe;
|
|
struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA);
|
|
|
|
if (!m)
|
|
return (ENOMEM);
|
|
|
|
INP_LOCK_ASSERT(tp->t_inpcb);
|
|
m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
|
|
req = mtod(m, struct cpl_get_tcb *);
|
|
m->m_pkthdr.len = m->m_len = sizeof(*req);
|
|
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
|
|
req->wr.wr_lo = 0;
|
|
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, toep->tp_tid));
|
|
req->cpuno = htons(toep->tp_qset);
|
|
req->rsvd = 0;
|
|
if (sototcpcb(so)->t_state == TCPS_SYN_SENT)
|
|
mbufq_tail(&toep->out_of_order_queue, m); // defer
|
|
else
|
|
cxgb_ofld_send(T3C_DEV(so), m);
|
|
return 0;
|
|
}
|
|
|
|
static inline void
|
|
so_insert_tid(struct tom_data *d, struct socket *so, unsigned int tid)
|
|
{
|
|
struct toepcb *toep = sototoep(so);
|
|
toepcb_hold(toep);
|
|
|
|
cxgb_insert_tid(d->cdev, d->client, toep, tid);
|
|
}
|
|
|
|
/**
|
|
* find_best_mtu - find the entry in the MTU table closest to an MTU
|
|
* @d: TOM state
|
|
* @mtu: the target MTU
|
|
*
|
|
* Returns the index of the value in the MTU table that is closest to but
|
|
* does not exceed the target MTU.
|
|
*/
|
|
static unsigned int
|
|
find_best_mtu(const struct t3c_data *d, unsigned short mtu)
|
|
{
|
|
int i = 0;
|
|
|
|
while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu)
|
|
++i;
|
|
return (i);
|
|
}
|
|
|
|
static unsigned int
|
|
select_mss(struct t3c_data *td, struct tcpcb *tp, unsigned int pmtu)
|
|
{
|
|
unsigned int idx;
|
|
|
|
#ifdef notyet
|
|
struct rtentry *dst = sotoinpcb(so)->inp_route.ro_rt;
|
|
#endif
|
|
if (tp) {
|
|
tp->t_maxseg = pmtu - 40;
|
|
if (tp->t_maxseg < td->mtus[0] - 40)
|
|
tp->t_maxseg = td->mtus[0] - 40;
|
|
idx = find_best_mtu(td, tp->t_maxseg + 40);
|
|
|
|
tp->t_maxseg = td->mtus[idx] - 40;
|
|
} else
|
|
idx = find_best_mtu(td, pmtu);
|
|
|
|
return (idx);
|
|
}
|
|
|
|
static inline void
|
|
free_atid(struct t3cdev *cdev, unsigned int tid)
|
|
{
|
|
struct toepcb *toep = cxgb_free_atid(cdev, tid);
|
|
|
|
if (toep)
|
|
toepcb_release(toep);
|
|
}
|
|
|
|
/*
|
|
* Release resources held by an offload connection (TID, L2T entry, etc.)
|
|
*/
|
|
static void
|
|
t3_release_offload_resources(struct toepcb *toep)
|
|
{
|
|
struct tcpcb *tp = toep->tp_tp;
|
|
struct toedev *tdev = toep->tp_toedev;
|
|
struct t3cdev *cdev;
|
|
unsigned int tid = toep->tp_tid;
|
|
|
|
if (!tdev)
|
|
return;
|
|
|
|
cdev = TOEP_T3C_DEV(toep);
|
|
if (!cdev)
|
|
return;
|
|
|
|
toep->tp_qset = 0;
|
|
t3_release_ddp_resources(toep);
|
|
|
|
#ifdef CTRL_SKB_CACHE
|
|
kfree_skb(CTRL_SKB_CACHE(tp));
|
|
CTRL_SKB_CACHE(tp) = NULL;
|
|
#endif
|
|
|
|
if (toep->tp_wr_avail != toep->tp_wr_max) {
|
|
purge_wr_queue(toep);
|
|
reset_wr_list(toep);
|
|
}
|
|
|
|
if (toep->tp_l2t) {
|
|
l2t_release(L2DATA(cdev), toep->tp_l2t);
|
|
toep->tp_l2t = NULL;
|
|
}
|
|
toep->tp_tp = NULL;
|
|
if (tp) {
|
|
INP_LOCK_ASSERT(tp->t_inpcb);
|
|
tp->t_toe = NULL;
|
|
tp->t_flags &= ~TF_TOE;
|
|
}
|
|
|
|
if (toep->tp_state == TCPS_SYN_SENT) {
|
|
free_atid(cdev, tid);
|
|
#ifdef notyet
|
|
__skb_queue_purge(&tp->out_of_order_queue);
|
|
#endif
|
|
} else { // we have TID
|
|
cxgb_remove_tid(cdev, toep, tid);
|
|
toepcb_release(toep);
|
|
}
|
|
#if 0
|
|
log(LOG_INFO, "closing TID %u, state %u\n", tid, tp->t_state);
|
|
#endif
|
|
}
|
|
|
|
static void
|
|
install_offload_ops(struct socket *so)
|
|
{
|
|
struct tcpcb *tp = sototcpcb(so);
|
|
|
|
KASSERT(tp->t_toe != NULL, ("toepcb not set"));
|
|
|
|
t3_install_socket_ops(so);
|
|
tp->t_flags |= TF_TOE;
|
|
tp->t_tu = &cxgb_toe_usrreqs;
|
|
}
|
|
|
|
/*
|
|
* Determine the receive window scaling factor given a target max
|
|
* receive window.
|
|
*/
|
|
static __inline int
|
|
select_rcv_wscale(int space)
|
|
{
|
|
int wscale = 0;
|
|
|
|
if (space > MAX_RCV_WND)
|
|
space = MAX_RCV_WND;
|
|
|
|
if (tcp_do_rfc1323)
|
|
for (; space > 65535 && wscale < 14; space >>= 1, ++wscale) ;
|
|
|
|
return (wscale);
|
|
}
|
|
|
|
/*
|
|
* Determine the receive window size for a socket.
|
|
*/
|
|
static unsigned long
|
|
select_rcv_wnd(struct toedev *dev, struct socket *so)
|
|
{
|
|
struct tom_data *d = TOM_DATA(dev);
|
|
unsigned int wnd;
|
|
unsigned int max_rcv_wnd;
|
|
|
|
if (tcp_do_autorcvbuf)
|
|
wnd = tcp_autorcvbuf_max;
|
|
else
|
|
wnd = so->so_rcv.sb_hiwat;
|
|
|
|
|
|
|
|
/* XXX
|
|
* For receive coalescing to work effectively we need a receive window
|
|
* that can accomodate a coalesced segment.
|
|
*/
|
|
if (wnd < MIN_RCV_WND)
|
|
wnd = MIN_RCV_WND;
|
|
|
|
/* PR 5138 */
|
|
max_rcv_wnd = (dev->tod_ttid < TOE_ID_CHELSIO_T3C ?
|
|
(uint32_t)d->rx_page_size * 23 :
|
|
MAX_RCV_WND);
|
|
|
|
return min(wnd, max_rcv_wnd);
|
|
}
|
|
|
|
/*
|
|
* Assign offload parameters to some socket fields. This code is used by
|
|
* both active and passive opens.
|
|
*/
|
|
static inline void
|
|
init_offload_socket(struct socket *so, struct toedev *dev, unsigned int tid,
|
|
struct l2t_entry *e, struct rtentry *dst, struct toepcb *toep)
|
|
{
|
|
struct tcpcb *tp = sototcpcb(so);
|
|
struct t3c_data *td = T3C_DATA(TOM_DATA(dev)->cdev);
|
|
|
|
SOCK_LOCK_ASSERT(so);
|
|
|
|
printf("initializing offload socket\n");
|
|
/*
|
|
* We either need to fix push frames to work with sbcompress
|
|
* or we need to add this
|
|
*/
|
|
so->so_snd.sb_flags |= SB_NOCOALESCE;
|
|
so->so_rcv.sb_flags |= SB_NOCOALESCE;
|
|
|
|
tp->t_toe = toep;
|
|
toep->tp_tp = tp;
|
|
toep->tp_toedev = dev;
|
|
|
|
toep->tp_tid = tid;
|
|
toep->tp_l2t = e;
|
|
toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(dev, max_wrs);
|
|
toep->tp_wr_unacked = 0;
|
|
toep->tp_delack_mode = 0;
|
|
|
|
toep->tp_mtu_idx = select_mss(td, tp, dst->rt_ifp->if_mtu);
|
|
/*
|
|
* XXX broken
|
|
*
|
|
*/
|
|
tp->rcv_wnd = select_rcv_wnd(dev, so);
|
|
|
|
toep->tp_ulp_mode = TOM_TUNABLE(dev, ddp) && !(so->so_options & SO_NO_DDP) &&
|
|
tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0;
|
|
toep->tp_qset_idx = 0;
|
|
|
|
reset_wr_list(toep);
|
|
DPRINTF("initialization done\n");
|
|
}
|
|
|
|
/*
|
|
* The next two functions calculate the option 0 value for a socket.
|
|
*/
|
|
static inline unsigned int
|
|
calc_opt0h(struct socket *so, int mtu_idx)
|
|
{
|
|
struct tcpcb *tp = sototcpcb(so);
|
|
int wscale = select_rcv_wscale(tp->rcv_wnd);
|
|
|
|
return V_NAGLE((tp->t_flags & TF_NODELAY) == 0) |
|
|
V_KEEP_ALIVE((so->so_options & SO_KEEPALIVE) != 0) | F_TCAM_BYPASS |
|
|
V_WND_SCALE(wscale) | V_MSS_IDX(mtu_idx);
|
|
}
|
|
|
|
static inline unsigned int
|
|
calc_opt0l(struct socket *so, int ulp_mode)
|
|
{
|
|
struct tcpcb *tp = sototcpcb(so);
|
|
unsigned int val;
|
|
|
|
val = V_TOS(SO_TOS(so)) | V_ULP_MODE(ulp_mode) |
|
|
V_RCV_BUFSIZ(min(tp->rcv_wnd >> 10, (u32)M_RCV_BUFSIZ));
|
|
|
|
DPRINTF("opt0l tos=%08x rcv_wnd=%ld opt0l=%08x\n", SO_TOS(so), tp->rcv_wnd, val);
|
|
return (val);
|
|
}
|
|
|
|
static inline unsigned int
|
|
calc_opt2(const struct socket *so, struct toedev *dev)
|
|
{
|
|
int flv_valid;
|
|
|
|
flv_valid = (TOM_TUNABLE(dev, cong_alg) != -1);
|
|
|
|
return (V_FLAVORS_VALID(flv_valid) |
|
|
V_CONG_CONTROL_FLAVOR(flv_valid ? TOM_TUNABLE(dev, cong_alg) : 0));
|
|
}
|
|
|
|
#if DEBUG_WR > 1
|
|
static int
|
|
count_pending_wrs(const struct toepcb *toep)
|
|
{
|
|
const struct mbuf *m;
|
|
int n = 0;
|
|
|
|
wr_queue_walk(toep, m)
|
|
n += m->m_pkthdr.csum_data;
|
|
return (n);
|
|
}
|
|
#endif
|
|
|
|
#if 0
|
|
(((*(struct tom_data **)&(dev)->l4opt)->conf.cong_alg) != -1)
|
|
#endif
|
|
|
|
static void
|
|
mk_act_open_req(struct socket *so, struct mbuf *m,
|
|
unsigned int atid, const struct l2t_entry *e)
|
|
{
|
|
struct cpl_act_open_req *req;
|
|
struct inpcb *inp = sotoinpcb(so);
|
|
struct tcpcb *tp = intotcpcb(inp);
|
|
struct toepcb *toep = tp->t_toe;
|
|
struct toedev *tdev = TOE_DEV(so);
|
|
|
|
m_set_priority((struct mbuf *)m, mkprio(CPL_PRIORITY_SETUP, toep));
|
|
|
|
req = mtod(m, struct cpl_act_open_req *);
|
|
m->m_pkthdr.len = m->m_len = sizeof(*req);
|
|
|
|
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
|
|
req->wr.wr_lo = 0;
|
|
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, atid));
|
|
req->local_port = inp->inp_lport;
|
|
req->peer_port = inp->inp_fport;
|
|
memcpy(&req->local_ip, &inp->inp_laddr, 4);
|
|
memcpy(&req->peer_ip, &inp->inp_faddr, 4);
|
|
req->opt0h = htonl(calc_opt0h(so, toep->tp_mtu_idx) | V_L2T_IDX(e->idx) |
|
|
V_TX_CHANNEL(e->smt_idx));
|
|
req->opt0l = htonl(calc_opt0l(so, toep->tp_ulp_mode));
|
|
req->params = 0;
|
|
req->opt2 = htonl(calc_opt2(so, tdev));
|
|
}
|
|
|
|
|
|
/*
|
|
* Convert an ACT_OPEN_RPL status to an errno.
|
|
*/
|
|
static int
|
|
act_open_rpl_status_to_errno(int status)
|
|
{
|
|
switch (status) {
|
|
case CPL_ERR_CONN_RESET:
|
|
return (ECONNREFUSED);
|
|
case CPL_ERR_ARP_MISS:
|
|
return (EHOSTUNREACH);
|
|
case CPL_ERR_CONN_TIMEDOUT:
|
|
return (ETIMEDOUT);
|
|
case CPL_ERR_TCAM_FULL:
|
|
return (ENOMEM);
|
|
case CPL_ERR_CONN_EXIST:
|
|
log(LOG_ERR, "ACTIVE_OPEN_RPL: 4-tuple in use\n");
|
|
return (EADDRINUSE);
|
|
default:
|
|
return (EIO);
|
|
}
|
|
}
|
|
|
|
static void
|
|
fail_act_open(struct toepcb *toep, int errno)
|
|
{
|
|
struct tcpcb *tp = toep->tp_tp;
|
|
|
|
t3_release_offload_resources(toep);
|
|
if (tp) {
|
|
INP_LOCK_ASSERT(tp->t_inpcb);
|
|
tcp_drop(tp, errno);
|
|
}
|
|
|
|
#ifdef notyet
|
|
TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* Handle active open failures.
|
|
*/
|
|
static void
|
|
active_open_failed(struct toepcb *toep, struct mbuf *m)
|
|
{
|
|
struct cpl_act_open_rpl *rpl = cplhdr(m);
|
|
struct inpcb *inp;
|
|
|
|
INP_INFO_WLOCK(&tcbinfo);
|
|
if (toep->tp_tp == NULL)
|
|
goto done;
|
|
|
|
inp = toep->tp_tp->t_inpcb;
|
|
INP_LOCK(inp);
|
|
|
|
/*
|
|
* Don't handle connection retry for now
|
|
*/
|
|
#ifdef notyet
|
|
struct inet_connection_sock *icsk = inet_csk(sk);
|
|
|
|
if (rpl->status == CPL_ERR_CONN_EXIST &&
|
|
icsk->icsk_retransmit_timer.function != act_open_retry_timer) {
|
|
icsk->icsk_retransmit_timer.function = act_open_retry_timer;
|
|
sk_reset_timer(so, &icsk->icsk_retransmit_timer,
|
|
jiffies + HZ / 2);
|
|
} else
|
|
#endif
|
|
fail_act_open(toep, act_open_rpl_status_to_errno(rpl->status));
|
|
INP_UNLOCK(inp);
|
|
done:
|
|
INP_INFO_WUNLOCK(&tcbinfo);
|
|
|
|
m_free(m);
|
|
}
|
|
|
|
/*
|
|
* Return whether a failed active open has allocated a TID
|
|
*/
|
|
static inline int
|
|
act_open_has_tid(int status)
|
|
{
|
|
return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST &&
|
|
status != CPL_ERR_ARP_MISS;
|
|
}
|
|
|
|
/*
|
|
* Process an ACT_OPEN_RPL CPL message.
|
|
*/
|
|
static int
|
|
do_act_open_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
|
|
{
|
|
struct toepcb *toep = (struct toepcb *)ctx;
|
|
struct cpl_act_open_rpl *rpl = cplhdr(m);
|
|
|
|
if (cdev->type != T3A && act_open_has_tid(rpl->status))
|
|
cxgb_queue_tid_release(cdev, GET_TID(rpl));
|
|
|
|
active_open_failed(toep, m);
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Handle an ARP failure for an active open. XXX purge ofo queue
|
|
*
|
|
* XXX badly broken for crossed SYNs as the ATID is no longer valid.
|
|
* XXX crossed SYN errors should be generated by PASS_ACCEPT_RPL which should
|
|
* check SOCK_DEAD or sk->sk_sock. Or maybe generate the error here but don't
|
|
* free the atid. Hmm.
|
|
*/
|
|
#ifdef notyet
|
|
static void
|
|
act_open_req_arp_failure(struct t3cdev *dev, struct mbuf *m)
|
|
{
|
|
struct toepcb *toep = m_get_toep(m);
|
|
struct tcpcb *tp = toep->tp_tp;
|
|
struct inpcb *inp = tp->t_inpcb;
|
|
struct socket *so = toeptoso(toep);
|
|
|
|
INP_LOCK(inp);
|
|
if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED) {
|
|
fail_act_open(so, EHOSTUNREACH);
|
|
printf("freeing %p\n", m);
|
|
|
|
m_free(m);
|
|
}
|
|
INP_UNLOCK(inp);
|
|
}
|
|
#endif
|
|
/*
|
|
* Send an active open request.
|
|
*/
|
|
int
|
|
t3_connect(struct toedev *tdev, struct socket *so,
|
|
struct rtentry *rt, struct sockaddr *nam)
|
|
{
|
|
struct mbuf *m;
|
|
struct l2t_entry *e;
|
|
struct tom_data *d = TOM_DATA(tdev);
|
|
struct inpcb *inp = sotoinpcb(so);
|
|
struct tcpcb *tp = intotcpcb(inp);
|
|
struct toepcb *toep; /* allocated by init_offload_socket */
|
|
|
|
int atid;
|
|
|
|
toep = toepcb_alloc();
|
|
if (toep == NULL)
|
|
goto out_err;
|
|
|
|
if ((atid = cxgb_alloc_atid(d->cdev, d->client, toep)) < 0)
|
|
goto out_err;
|
|
|
|
e = t3_l2t_get(d->cdev, rt, rt->rt_ifp, nam);
|
|
if (!e)
|
|
goto free_tid;
|
|
|
|
INP_LOCK_ASSERT(inp);
|
|
m = m_gethdr(MT_DATA, M_WAITOK);
|
|
|
|
#if 0
|
|
m->m_toe.mt_toepcb = tp->t_toe;
|
|
set_arp_failure_handler((struct mbuf *)m, act_open_req_arp_failure);
|
|
#endif
|
|
SOCK_LOCK(so);
|
|
|
|
init_offload_socket(so, tdev, atid, e, rt, toep);
|
|
|
|
install_offload_ops(so);
|
|
|
|
mk_act_open_req(so, m, atid, e);
|
|
SOCK_UNLOCK(so);
|
|
|
|
soisconnecting(so);
|
|
toep = tp->t_toe;
|
|
m_set_toep(m, tp->t_toe);
|
|
|
|
toep->tp_state = TCPS_SYN_SENT;
|
|
l2t_send(d->cdev, (struct mbuf *)m, e);
|
|
|
|
if (toep->tp_ulp_mode)
|
|
t3_enable_ddp(so, 0);
|
|
return (0);
|
|
|
|
free_tid:
|
|
printf("failing connect - free atid\n");
|
|
|
|
free_atid(d->cdev, atid);
|
|
out_err:
|
|
printf("return ENOMEM\n");
|
|
return (ENOMEM);
|
|
}
|
|
|
|
/*
|
|
* Send an ABORT_REQ message. Cannot fail. This routine makes sure we do
|
|
* not send multiple ABORT_REQs for the same connection and also that we do
|
|
* not try to send a message after the connection has closed. Returns 1 if
|
|
* an ABORT_REQ wasn't generated after all, 0 otherwise.
|
|
*/
|
|
static void
|
|
t3_send_reset(struct toepcb *toep)
|
|
{
|
|
|
|
struct cpl_abort_req *req;
|
|
unsigned int tid = toep->tp_tid;
|
|
int mode = CPL_ABORT_SEND_RST;
|
|
struct tcpcb *tp = toep->tp_tp;
|
|
struct toedev *tdev = toep->tp_toedev;
|
|
struct socket *so = NULL;
|
|
struct mbuf *m;
|
|
|
|
if (tp) {
|
|
INP_LOCK_ASSERT(tp->t_inpcb);
|
|
so = toeptoso(toep);
|
|
}
|
|
|
|
if (__predict_false((toep->tp_flags & TP_ABORT_SHUTDOWN) ||
|
|
tdev == NULL))
|
|
return;
|
|
toep->tp_flags |= (TP_ABORT_RPL_PENDING|TP_ABORT_SHUTDOWN);
|
|
|
|
/* Purge the send queue so we don't send anything after an abort. */
|
|
if (so)
|
|
sbflush(&so->so_snd);
|
|
if ((toep->tp_flags & TP_CLOSE_CON_REQUESTED) && is_t3a(tdev))
|
|
mode |= CPL_ABORT_POST_CLOSE_REQ;
|
|
|
|
m = m_gethdr_nofail(sizeof(*req));
|
|
m_set_priority(m, mkprio(CPL_PRIORITY_DATA, toep));
|
|
set_arp_failure_handler(m, abort_arp_failure);
|
|
|
|
req = mtod(m, struct cpl_abort_req *);
|
|
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
|
|
req->wr.wr_lo = htonl(V_WR_TID(tid));
|
|
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid));
|
|
req->rsvd0 = tp ? htonl(tp->snd_nxt) : 0;
|
|
req->rsvd1 = !(toep->tp_flags & TP_DATASENT);
|
|
req->cmd = mode;
|
|
if (tp && (tp->t_state == TCPS_SYN_SENT))
|
|
mbufq_tail(&toep->out_of_order_queue, m); // defer
|
|
else
|
|
l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t);
|
|
}
|
|
|
|
static int
|
|
t3_ip_ctloutput(struct socket *so, struct sockopt *sopt)
|
|
{
|
|
struct inpcb *inp;
|
|
int error, optval;
|
|
|
|
if (sopt->sopt_name == IP_OPTIONS)
|
|
return (ENOPROTOOPT);
|
|
|
|
if (sopt->sopt_name != IP_TOS)
|
|
return (EOPNOTSUPP);
|
|
|
|
error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval);
|
|
|
|
if (error)
|
|
return (error);
|
|
|
|
if (optval > IPTOS_PREC_CRITIC_ECP && !suser(curthread))
|
|
return (EPERM);
|
|
|
|
inp = sotoinpcb(so);
|
|
inp->inp_ip_tos = optval;
|
|
|
|
t3_set_tos(so);
|
|
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
t3_tcp_ctloutput(struct socket *so, struct sockopt *sopt)
|
|
{
|
|
int err = 0;
|
|
size_t copied;
|
|
|
|
if (sopt->sopt_name != TCP_CONGESTION &&
|
|
sopt->sopt_name != TCP_NODELAY)
|
|
return (EOPNOTSUPP);
|
|
|
|
if (sopt->sopt_name == TCP_CONGESTION) {
|
|
char name[TCP_CA_NAME_MAX];
|
|
int optlen = sopt->sopt_valsize;
|
|
struct tcpcb *tp;
|
|
|
|
if (optlen < 1)
|
|
return (EINVAL);
|
|
|
|
err = copyinstr(sopt->sopt_val, name,
|
|
min(TCP_CA_NAME_MAX - 1, optlen), &copied);
|
|
if (err)
|
|
return (err);
|
|
if (copied < 1)
|
|
return (EINVAL);
|
|
|
|
tp = sototcpcb(so);
|
|
/*
|
|
* XXX I need to revisit this
|
|
*/
|
|
if ((err = t3_set_cong_control(so, name)) == 0) {
|
|
#ifdef CONGESTION_CONTROL_SUPPORTED
|
|
tp->t_cong_control = strdup(name, M_CXGB);
|
|
#endif
|
|
} else
|
|
return (err);
|
|
} else {
|
|
int optval, oldval;
|
|
struct inpcb *inp;
|
|
struct tcpcb *tp;
|
|
|
|
err = sooptcopyin(sopt, &optval, sizeof optval,
|
|
sizeof optval);
|
|
|
|
if (err)
|
|
return (err);
|
|
|
|
inp = sotoinpcb(so);
|
|
tp = intotcpcb(inp);
|
|
|
|
INP_LOCK(inp);
|
|
|
|
oldval = tp->t_flags;
|
|
if (optval)
|
|
tp->t_flags |= TF_NODELAY;
|
|
else
|
|
tp->t_flags &= ~TF_NODELAY;
|
|
INP_UNLOCK(inp);
|
|
|
|
if (oldval != tp->t_flags)
|
|
t3_set_nagle(so);
|
|
|
|
}
|
|
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
t3_ctloutput(struct socket *so, struct sockopt *sopt)
|
|
{
|
|
int err;
|
|
|
|
if (sopt->sopt_level != IPPROTO_TCP)
|
|
err = t3_ip_ctloutput(so, sopt);
|
|
else
|
|
err = t3_tcp_ctloutput(so, sopt);
|
|
|
|
if (err != EOPNOTSUPP)
|
|
return (err);
|
|
|
|
return (tcp_ctloutput(so, sopt));
|
|
}
|
|
|
|
/*
|
|
* Returns true if we need to explicitly request RST when we receive new data
|
|
* on an RX-closed connection.
|
|
*/
|
|
static inline int
|
|
need_rst_on_excess_rx(const struct toepcb *toep)
|
|
{
|
|
return (1);
|
|
}
|
|
|
|
/*
|
|
* Handles Rx data that arrives in a state where the socket isn't accepting
|
|
* new data.
|
|
*/
|
|
static void
|
|
handle_excess_rx(struct toepcb *toep, struct mbuf *m)
|
|
{
|
|
|
|
if (need_rst_on_excess_rx(toep) && !(toep->tp_flags & TP_ABORT_SHUTDOWN))
|
|
t3_send_reset(toep);
|
|
m_freem(m);
|
|
}
|
|
|
|
/*
|
|
* Process a get_tcb_rpl as a DDP completion (similar to RX_DDP_COMPLETE)
|
|
* by getting the DDP offset from the TCB.
|
|
*/
|
|
static void
|
|
tcb_rpl_as_ddp_complete(struct toepcb *toep, struct mbuf *m)
|
|
{
|
|
struct ddp_state *q = &toep->tp_ddp_state;
|
|
struct ddp_buf_state *bsp;
|
|
struct cpl_get_tcb_rpl *hdr;
|
|
unsigned int ddp_offset;
|
|
struct socket *so;
|
|
struct tcpcb *tp;
|
|
|
|
uint64_t t;
|
|
__be64 *tcb;
|
|
|
|
so = toeptoso(toep);
|
|
tp = toep->tp_tp;
|
|
|
|
INP_LOCK_ASSERT(tp->t_inpcb);
|
|
SOCKBUF_LOCK(&so->so_rcv);
|
|
|
|
/* Note that we only accout for CPL_GET_TCB issued by the DDP code. We
|
|
* really need a cookie in order to dispatch the RPLs.
|
|
*/
|
|
q->get_tcb_count--;
|
|
|
|
/* It is a possible that a previous CPL already invalidated UBUF DDP
|
|
* and moved the cur_buf idx and hence no further processing of this
|
|
* skb is required. However, the app might be sleeping on
|
|
* !q->get_tcb_count and we need to wake it up.
|
|
*/
|
|
if (q->cancel_ubuf && !t3_ddp_ubuf_pending(toep)) {
|
|
struct socket *so = toeptoso(toep);
|
|
|
|
m_freem(m);
|
|
if (__predict_true((so->so_state & SS_NOFDREF) == 0))
|
|
sorwakeup_locked(so);
|
|
else
|
|
SOCKBUF_UNLOCK(&so->so_rcv);
|
|
return;
|
|
}
|
|
|
|
bsp = &q->buf_state[q->cur_buf];
|
|
hdr = cplhdr(m);
|
|
tcb = (__be64 *)(hdr + 1);
|
|
if (q->cur_buf == 0) {
|
|
t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF0_OFFSET) / 2]);
|
|
ddp_offset = t >> (32 + S_TCB_RX_DDP_BUF0_OFFSET);
|
|
} else {
|
|
t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF1_OFFSET) / 2]);
|
|
ddp_offset = t >> S_TCB_RX_DDP_BUF1_OFFSET;
|
|
}
|
|
ddp_offset &= M_TCB_RX_DDP_BUF0_OFFSET;
|
|
m->m_cur_offset = bsp->cur_offset;
|
|
bsp->cur_offset = ddp_offset;
|
|
m->m_len = m->m_pkthdr.len = ddp_offset - m->m_cur_offset;
|
|
|
|
CTR5(KTR_TOM,
|
|
"tcb_rpl_as_ddp_complete: idx=%d seq=0x%x hwbuf=%u ddp_offset=%u cur_offset=%u",
|
|
q->cur_buf, tp->rcv_nxt, q->cur_buf, ddp_offset, m->m_cur_offset);
|
|
KASSERT(ddp_offset >= m->m_cur_offset, ("ddp_offset=%u less than cur_offset=%u",
|
|
ddp_offset, m->m_cur_offset));
|
|
|
|
#ifdef T3_TRACE
|
|
T3_TRACE3(TIDTB(so),
|
|
"tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u ddp_offset %u",
|
|
tp->rcv_nxt, q->cur_buf, ddp_offset);
|
|
#endif
|
|
|
|
#if 0
|
|
{
|
|
unsigned int ddp_flags, rcv_nxt, rx_hdr_offset, buf_idx;
|
|
|
|
t = be64toh(tcb[(31 - W_TCB_RX_DDP_FLAGS) / 2]);
|
|
ddp_flags = (t >> S_TCB_RX_DDP_FLAGS) & M_TCB_RX_DDP_FLAGS;
|
|
|
|
t = be64toh(tcb[(31 - W_TCB_RCV_NXT) / 2]);
|
|
rcv_nxt = t >> S_TCB_RCV_NXT;
|
|
rcv_nxt &= M_TCB_RCV_NXT;
|
|
|
|
t = be64toh(tcb[(31 - W_TCB_RX_HDR_OFFSET) / 2]);
|
|
rx_hdr_offset = t >> (32 + S_TCB_RX_HDR_OFFSET);
|
|
rx_hdr_offset &= M_TCB_RX_HDR_OFFSET;
|
|
|
|
T3_TRACE2(TIDTB(sk),
|
|
"tcb_rpl_as_ddp_complete: DDP FLAGS 0x%x dma up to 0x%x",
|
|
ddp_flags, rcv_nxt - rx_hdr_offset);
|
|
T3_TRACE4(TB(q),
|
|
"tcb_rpl_as_ddp_complete: rcvnxt 0x%x hwbuf %u cur_offset %u cancel %u",
|
|
tp->rcv_nxt, q->cur_buf, bsp->cur_offset, q->cancel_ubuf);
|
|
T3_TRACE3(TB(q),
|
|
"tcb_rpl_as_ddp_complete: TCB rcvnxt 0x%x hwbuf 0x%x ddp_offset %u",
|
|
rcv_nxt - rx_hdr_offset, ddp_flags, ddp_offset);
|
|
T3_TRACE2(TB(q),
|
|
"tcb_rpl_as_ddp_complete: flags0 0x%x flags1 0x%x",
|
|
q->buf_state[0].flags, q->buf_state[1].flags);
|
|
|
|
}
|
|
#endif
|
|
if (__predict_false(so_no_receive(so) && m->m_pkthdr.len)) {
|
|
handle_excess_rx(toep, m);
|
|
return;
|
|
}
|
|
|
|
#ifdef T3_TRACE
|
|
if ((int)m->m_pkthdr.len < 0) {
|
|
t3_ddp_error(so, "tcb_rpl_as_ddp_complete: neg len");
|
|
}
|
|
#endif
|
|
if (bsp->flags & DDP_BF_NOCOPY) {
|
|
#ifdef T3_TRACE
|
|
T3_TRACE0(TB(q),
|
|
"tcb_rpl_as_ddp_complete: CANCEL UBUF");
|
|
|
|
if (!q->cancel_ubuf && !(sk->sk_shutdown & RCV_SHUTDOWN)) {
|
|
printk("!cancel_ubuf");
|
|
t3_ddp_error(sk, "tcb_rpl_as_ddp_complete: !cancel_ubuf");
|
|
}
|
|
#endif
|
|
m->m_ddp_flags = DDP_BF_PSH | DDP_BF_NOCOPY | 1;
|
|
bsp->flags &= ~(DDP_BF_NOCOPY|DDP_BF_NODATA);
|
|
q->cur_buf ^= 1;
|
|
} else if (bsp->flags & DDP_BF_NOFLIP) {
|
|
|
|
m->m_ddp_flags = 1; /* always a kernel buffer */
|
|
|
|
/* now HW buffer carries a user buffer */
|
|
bsp->flags &= ~DDP_BF_NOFLIP;
|
|
bsp->flags |= DDP_BF_NOCOPY;
|
|
|
|
/* It is possible that the CPL_GET_TCB_RPL doesn't indicate
|
|
* any new data in which case we're done. If in addition the
|
|
* offset is 0, then there wasn't a completion for the kbuf
|
|
* and we need to decrement the posted count.
|
|
*/
|
|
if (m->m_pkthdr.len == 0) {
|
|
if (ddp_offset == 0) {
|
|
q->kbuf_posted--;
|
|
bsp->flags |= DDP_BF_NODATA;
|
|
}
|
|
SOCKBUF_UNLOCK(&so->so_rcv);
|
|
|
|
m_free(m);
|
|
return;
|
|
}
|
|
} else {
|
|
SOCKBUF_UNLOCK(&so->so_rcv);
|
|
/* This reply is for a CPL_GET_TCB_RPL to cancel the UBUF DDP,
|
|
* but it got here way late and nobody cares anymore.
|
|
*/
|
|
m_free(m);
|
|
return;
|
|
}
|
|
|
|
m->m_ddp_gl = (unsigned char *)bsp->gl;
|
|
m->m_flags |= M_DDP;
|
|
m->m_seq = tp->rcv_nxt;
|
|
tp->rcv_nxt += m->m_pkthdr.len;
|
|
tp->t_rcvtime = ticks;
|
|
#ifdef T3_TRACE
|
|
T3_TRACE3(TB(q),
|
|
"tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u lskb->len %u",
|
|
m->m_seq, q->cur_buf, m->m_pkthdr.len);
|
|
#endif
|
|
CTR3(KTR_TOM, "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u m->m_pktlen %u",
|
|
m->m_seq, q->cur_buf, m->m_pkthdr.len);
|
|
if (m->m_pkthdr.len == 0)
|
|
q->user_ddp_pending = 0;
|
|
else
|
|
SBAPPEND(&so->so_rcv, m);
|
|
if (__predict_true((so->so_state & SS_NOFDREF) == 0))
|
|
sorwakeup_locked(so);
|
|
else
|
|
SOCKBUF_UNLOCK(&so->so_rcv);
|
|
}
|
|
|
|
/*
|
|
* Process a CPL_GET_TCB_RPL. These can also be generated by the DDP code,
|
|
* in that case they are similar to DDP completions.
|
|
*/
|
|
static int
|
|
do_get_tcb_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
|
|
{
|
|
struct toepcb *toep = (struct toepcb *)ctx;
|
|
|
|
/* OK if socket doesn't exist */
|
|
if (toep == NULL) {
|
|
printf("null toep in do_get_tcb_rpl\n");
|
|
return (CPL_RET_BUF_DONE);
|
|
}
|
|
|
|
INP_LOCK(toep->tp_tp->t_inpcb);
|
|
tcb_rpl_as_ddp_complete(toep, m);
|
|
INP_UNLOCK(toep->tp_tp->t_inpcb);
|
|
|
|
return (0);
|
|
}
|
|
|
|
static void
|
|
handle_ddp_data(struct toepcb *toep, struct mbuf *m)
|
|
{
|
|
struct tcpcb *tp = toep->tp_tp;
|
|
struct socket *so = toeptoso(toep);
|
|
struct ddp_state *q;
|
|
struct ddp_buf_state *bsp;
|
|
struct cpl_rx_data *hdr = cplhdr(m);
|
|
unsigned int rcv_nxt = ntohl(hdr->seq);
|
|
|
|
if (tp->rcv_nxt == rcv_nxt)
|
|
return;
|
|
|
|
INP_LOCK_ASSERT(tp->t_inpcb);
|
|
SOCKBUF_LOCK(&so->so_rcv);
|
|
q = &toep->tp_ddp_state;
|
|
bsp = &q->buf_state[q->cur_buf];
|
|
KASSERT(SEQ_GT(rcv_nxt, tp->rcv_nxt), ("tp->rcv_nxt=0x%08x decreased rcv_nxt=0x08%x",
|
|
rcv_nxt, tp->rcv_nxt));
|
|
m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt;
|
|
KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len));
|
|
CTR3(KTR_TOM, "rcv_nxt=0x%x tp->rcv_nxt=0x%x len=%d",
|
|
rcv_nxt, tp->rcv_nxt, m->m_pkthdr.len);
|
|
|
|
#ifdef T3_TRACE
|
|
if ((int)m->m_pkthdr.len < 0) {
|
|
t3_ddp_error(so, "handle_ddp_data: neg len");
|
|
}
|
|
#endif
|
|
|
|
m->m_ddp_gl = (unsigned char *)bsp->gl;
|
|
m->m_flags |= M_DDP;
|
|
m->m_cur_offset = bsp->cur_offset;
|
|
m->m_ddp_flags = DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1;
|
|
if (bsp->flags & DDP_BF_NOCOPY)
|
|
bsp->flags &= ~DDP_BF_NOCOPY;
|
|
|
|
m->m_seq = tp->rcv_nxt;
|
|
tp->rcv_nxt = rcv_nxt;
|
|
bsp->cur_offset += m->m_pkthdr.len;
|
|
if (!(bsp->flags & DDP_BF_NOFLIP))
|
|
q->cur_buf ^= 1;
|
|
/*
|
|
* For now, don't re-enable DDP after a connection fell out of DDP
|
|
* mode.
|
|
*/
|
|
q->ubuf_ddp_ready = 0;
|
|
SOCKBUF_UNLOCK(&so->so_rcv);
|
|
}
|
|
|
|
/*
|
|
* Process new data received for a connection.
|
|
*/
|
|
static void
|
|
new_rx_data(struct toepcb *toep, struct mbuf *m)
|
|
{
|
|
struct cpl_rx_data *hdr = cplhdr(m);
|
|
struct tcpcb *tp = toep->tp_tp;
|
|
struct socket *so = toeptoso(toep);
|
|
int len = be16toh(hdr->len);
|
|
|
|
INP_LOCK(tp->t_inpcb);
|
|
|
|
if (__predict_false(so_no_receive(so))) {
|
|
handle_excess_rx(toep, m);
|
|
INP_UNLOCK(tp->t_inpcb);
|
|
TRACE_EXIT;
|
|
return;
|
|
}
|
|
|
|
if (toep->tp_ulp_mode == ULP_MODE_TCPDDP)
|
|
handle_ddp_data(toep, m);
|
|
|
|
m->m_seq = ntohl(hdr->seq);
|
|
m->m_ulp_mode = 0; /* for iSCSI */
|
|
|
|
#if VALIDATE_SEQ
|
|
if (__predict_false(m->m_seq != tp->rcv_nxt)) {
|
|
log(LOG_ERR,
|
|
"%s: TID %u: Bad sequence number %u, expected %u\n",
|
|
TOE_DEV(toeptoso(toep))->name, toep->tp_tid, m->m_seq,
|
|
tp->rcv_nxt);
|
|
m_freem(m);
|
|
INP_UNLOCK(tp->t_inpcb);
|
|
return;
|
|
}
|
|
#endif
|
|
m_adj(m, sizeof(*hdr));
|
|
|
|
#ifdef URGENT_DATA_SUPPORTED
|
|
/*
|
|
* We don't handle urgent data yet
|
|
*/
|
|
if (__predict_false(hdr->urg))
|
|
handle_urg_ptr(so, tp->rcv_nxt + ntohs(hdr->urg));
|
|
if (__predict_false(tp->urg_data == TCP_URG_NOTYET &&
|
|
tp->urg_seq - tp->rcv_nxt < skb->len))
|
|
tp->urg_data = TCP_URG_VALID | skb->data[tp->urg_seq -
|
|
tp->rcv_nxt];
|
|
#endif
|
|
if (__predict_false(hdr->dack_mode != toep->tp_delack_mode)) {
|
|
toep->tp_delack_mode = hdr->dack_mode;
|
|
toep->tp_delack_seq = tp->rcv_nxt;
|
|
}
|
|
CTR6(KTR_TOM, "appending mbuf=%p pktlen=%d m_len=%d len=%d rcv_nxt=0x%x enqueued_bytes=%d",
|
|
m, m->m_pkthdr.len, m->m_len, len, tp->rcv_nxt, toep->tp_enqueued_bytes);
|
|
|
|
if (len < m->m_pkthdr.len)
|
|
m->m_pkthdr.len = m->m_len = len;
|
|
|
|
tp->rcv_nxt += m->m_pkthdr.len;
|
|
tp->t_rcvtime = ticks;
|
|
toep->tp_enqueued_bytes += m->m_pkthdr.len;
|
|
#ifdef T3_TRACE
|
|
T3_TRACE2(TIDTB(sk),
|
|
"new_rx_data: seq 0x%x len %u",
|
|
m->m_seq, m->m_pkthdr.len);
|
|
#endif
|
|
INP_UNLOCK(tp->t_inpcb);
|
|
SOCKBUF_LOCK(&so->so_rcv);
|
|
if (sb_notify(&so->so_rcv))
|
|
DPRINTF("rx_data so=%p flags=0x%x len=%d\n", so, so->so_rcv.sb_flags, m->m_pkthdr.len);
|
|
|
|
SBAPPEND(&so->so_rcv, m);
|
|
|
|
#ifdef notyet
|
|
/*
|
|
* We're giving too many credits to the card - but disable this check so we can keep on moving :-|
|
|
*
|
|
*/
|
|
KASSERT(so->so_rcv.sb_cc < (so->so_rcv.sb_mbmax << 1),
|
|
|
|
("so=%p, data contents exceed mbmax, sb_cc=%d sb_mbmax=%d",
|
|
so, so->so_rcv.sb_cc, so->so_rcv.sb_mbmax));
|
|
#endif
|
|
|
|
|
|
CTR2(KTR_TOM, "sb_cc=%d sb_mbcnt=%d",
|
|
so->so_rcv.sb_cc, so->so_rcv.sb_mbcnt);
|
|
|
|
if (__predict_true((so->so_state & SS_NOFDREF) == 0))
|
|
sorwakeup_locked(so);
|
|
else
|
|
SOCKBUF_UNLOCK(&so->so_rcv);
|
|
}
|
|
|
|
/*
|
|
* Handler for RX_DATA CPL messages.
|
|
*/
|
|
static int
|
|
do_rx_data(struct t3cdev *cdev, struct mbuf *m, void *ctx)
|
|
{
|
|
struct toepcb *toep = (struct toepcb *)ctx;
|
|
|
|
DPRINTF("rx_data len=%d\n", m->m_pkthdr.len);
|
|
|
|
new_rx_data(toep, m);
|
|
|
|
return (0);
|
|
}
|
|
|
|
static void
|
|
new_rx_data_ddp(struct toepcb *toep, struct mbuf *m)
|
|
{
|
|
struct tcpcb *tp;
|
|
struct ddp_state *q;
|
|
struct ddp_buf_state *bsp;
|
|
struct cpl_rx_data_ddp *hdr;
|
|
unsigned int ddp_len, rcv_nxt, ddp_report, end_offset, buf_idx;
|
|
struct socket *so = toeptoso(toep);
|
|
int nomoredata = 0;
|
|
unsigned int delack_mode;
|
|
|
|
tp = sototcpcb(so);
|
|
|
|
INP_LOCK(tp->t_inpcb);
|
|
if (__predict_false(so_no_receive(so))) {
|
|
|
|
handle_excess_rx(toep, m);
|
|
INP_UNLOCK(tp->t_inpcb);
|
|
return;
|
|
}
|
|
|
|
q = &toep->tp_ddp_state;
|
|
hdr = cplhdr(m);
|
|
ddp_report = ntohl(hdr->u.ddp_report);
|
|
buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1;
|
|
bsp = &q->buf_state[buf_idx];
|
|
|
|
#ifdef T3_TRACE
|
|
T3_TRACE5(TIDTB(sk),
|
|
"new_rx_data_ddp: tp->rcv_nxt 0x%x cur_offset %u "
|
|
"hdr seq 0x%x len %u offset %u",
|
|
tp->rcv_nxt, bsp->cur_offset, ntohl(hdr->seq),
|
|
ntohs(hdr->len), G_DDP_OFFSET(ddp_report));
|
|
T3_TRACE1(TIDTB(sk),
|
|
"new_rx_data_ddp: ddp_report 0x%x",
|
|
ddp_report);
|
|
#endif
|
|
CTR4(KTR_TOM,
|
|
"new_rx_data_ddp: tp->rcv_nxt 0x%x cur_offset %u "
|
|
"hdr seq 0x%x len %u",
|
|
tp->rcv_nxt, bsp->cur_offset, ntohl(hdr->seq),
|
|
ntohs(hdr->len));
|
|
CTR3(KTR_TOM,
|
|
"new_rx_data_ddp: offset %u ddp_report 0x%x buf_idx=%d",
|
|
G_DDP_OFFSET(ddp_report), ddp_report, buf_idx);
|
|
|
|
ddp_len = ntohs(hdr->len);
|
|
rcv_nxt = ntohl(hdr->seq) + ddp_len;
|
|
|
|
delack_mode = G_DDP_DACK_MODE(ddp_report);
|
|
if (__predict_false(G_DDP_DACK_MODE(ddp_report) != toep->tp_delack_mode)) {
|
|
toep->tp_delack_mode = delack_mode;
|
|
toep->tp_delack_seq = tp->rcv_nxt;
|
|
}
|
|
|
|
m->m_seq = tp->rcv_nxt;
|
|
tp->rcv_nxt = rcv_nxt;
|
|
|
|
tp->t_rcvtime = ticks;
|
|
/*
|
|
* Store the length in m->m_len. We are changing the meaning of
|
|
* m->m_len here, we need to be very careful that nothing from now on
|
|
* interprets ->len of this packet the usual way.
|
|
*/
|
|
m->m_len = m->m_pkthdr.len = rcv_nxt - m->m_seq;
|
|
INP_UNLOCK(tp->t_inpcb);
|
|
CTR3(KTR_TOM,
|
|
"new_rx_data_ddp: m_len=%u rcv_next 0x%08x rcv_nxt_prev=0x%08x ",
|
|
m->m_len, rcv_nxt, m->m_seq);
|
|
/*
|
|
* Figure out where the new data was placed in the buffer and store it
|
|
* in when. Assumes the buffer offset starts at 0, consumer needs to
|
|
* account for page pod's pg_offset.
|
|
*/
|
|
end_offset = G_DDP_OFFSET(ddp_report) + ddp_len;
|
|
m->m_cur_offset = end_offset - m->m_pkthdr.len;
|
|
|
|
SOCKBUF_LOCK(&so->so_rcv);
|
|
m->m_ddp_gl = (unsigned char *)bsp->gl;
|
|
m->m_flags |= M_DDP;
|
|
bsp->cur_offset = end_offset;
|
|
toep->tp_enqueued_bytes += m->m_pkthdr.len;
|
|
|
|
/*
|
|
* Length is only meaningful for kbuf
|
|
*/
|
|
if (!(bsp->flags & DDP_BF_NOCOPY))
|
|
KASSERT(m->m_len <= bsp->gl->dgl_length,
|
|
("length received exceeds ddp pages: len=%d dgl_length=%d",
|
|
m->m_len, bsp->gl->dgl_length));
|
|
|
|
KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len));
|
|
KASSERT(m->m_next == NULL, ("m_len=%p", m->m_next));
|
|
|
|
|
|
/*
|
|
* Bit 0 of flags stores whether the DDP buffer is completed.
|
|
* Note that other parts of the code depend on this being in bit 0.
|
|
*/
|
|
if ((bsp->flags & DDP_BF_NOINVAL) && end_offset != bsp->gl->dgl_length) {
|
|
panic("spurious ddp completion");
|
|
} else {
|
|
m->m_ddp_flags = !!(ddp_report & F_DDP_BUF_COMPLETE);
|
|
if (m->m_ddp_flags && !(bsp->flags & DDP_BF_NOFLIP))
|
|
q->cur_buf ^= 1; /* flip buffers */
|
|
}
|
|
|
|
if (bsp->flags & DDP_BF_NOCOPY) {
|
|
m->m_ddp_flags |= (bsp->flags & DDP_BF_NOCOPY);
|
|
bsp->flags &= ~DDP_BF_NOCOPY;
|
|
}
|
|
|
|
if (ddp_report & F_DDP_PSH)
|
|
m->m_ddp_flags |= DDP_BF_PSH;
|
|
if (nomoredata)
|
|
m->m_ddp_flags |= DDP_BF_NODATA;
|
|
|
|
#ifdef notyet
|
|
skb_reset_transport_header(skb);
|
|
tcp_hdr(skb)->fin = 0; /* changes original hdr->ddp_report */
|
|
#endif
|
|
SBAPPEND(&so->so_rcv, m);
|
|
|
|
if ((so->so_state & SS_NOFDREF) == 0)
|
|
sorwakeup_locked(so);
|
|
else
|
|
SOCKBUF_UNLOCK(&so->so_rcv);
|
|
}
|
|
|
|
#define DDP_ERR (F_DDP_PPOD_MISMATCH | F_DDP_LLIMIT_ERR | F_DDP_ULIMIT_ERR |\
|
|
F_DDP_PPOD_PARITY_ERR | F_DDP_PADDING_ERR | F_DDP_OFFSET_ERR |\
|
|
F_DDP_INVALID_TAG | F_DDP_COLOR_ERR | F_DDP_TID_MISMATCH |\
|
|
F_DDP_INVALID_PPOD)
|
|
|
|
/*
|
|
* Handler for RX_DATA_DDP CPL messages.
|
|
*/
|
|
static int
|
|
do_rx_data_ddp(struct t3cdev *cdev, struct mbuf *m, void *ctx)
|
|
{
|
|
struct toepcb *toep = ctx;
|
|
const struct cpl_rx_data_ddp *hdr = cplhdr(m);
|
|
|
|
VALIDATE_SOCK(so);
|
|
|
|
if (__predict_false(ntohl(hdr->ddpvld_status) & DDP_ERR)) {
|
|
log(LOG_ERR, "RX_DATA_DDP for TID %u reported error 0x%x\n",
|
|
GET_TID(hdr), G_DDP_VALID(ntohl(hdr->ddpvld_status)));
|
|
return (CPL_RET_BUF_DONE);
|
|
}
|
|
#if 0
|
|
skb->h.th = tcphdr_skb->h.th;
|
|
#endif
|
|
new_rx_data_ddp(toep, m);
|
|
return (0);
|
|
}
|
|
|
|
static void
|
|
process_ddp_complete(struct toepcb *toep, struct mbuf *m)
|
|
{
|
|
struct tcpcb *tp = toep->tp_tp;
|
|
struct socket *so = toeptoso(toep);
|
|
struct ddp_state *q;
|
|
struct ddp_buf_state *bsp;
|
|
struct cpl_rx_ddp_complete *hdr;
|
|
unsigned int ddp_report, buf_idx, when, delack_mode;
|
|
int nomoredata = 0;
|
|
|
|
INP_LOCK(tp->t_inpcb);
|
|
if (__predict_false(so_no_receive(so))) {
|
|
struct inpcb *inp = sotoinpcb(so);
|
|
|
|
handle_excess_rx(toep, m);
|
|
INP_UNLOCK(inp);
|
|
return;
|
|
}
|
|
q = &toep->tp_ddp_state;
|
|
hdr = cplhdr(m);
|
|
ddp_report = ntohl(hdr->ddp_report);
|
|
buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1;
|
|
m->m_pkthdr.csum_data = tp->rcv_nxt;
|
|
|
|
|
|
SOCKBUF_LOCK(&so->so_rcv);
|
|
bsp = &q->buf_state[buf_idx];
|
|
when = bsp->cur_offset;
|
|
m->m_len = m->m_pkthdr.len = G_DDP_OFFSET(ddp_report) - when;
|
|
tp->rcv_nxt += m->m_len;
|
|
tp->t_rcvtime = ticks;
|
|
|
|
delack_mode = G_DDP_DACK_MODE(ddp_report);
|
|
if (__predict_false(G_DDP_DACK_MODE(ddp_report) != toep->tp_delack_mode)) {
|
|
toep->tp_delack_mode = delack_mode;
|
|
toep->tp_delack_seq = tp->rcv_nxt;
|
|
}
|
|
#ifdef notyet
|
|
skb_reset_transport_header(skb);
|
|
tcp_hdr(skb)->fin = 0; /* changes valid memory past CPL */
|
|
#endif
|
|
INP_UNLOCK(tp->t_inpcb);
|
|
|
|
KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len));
|
|
CTR5(KTR_TOM,
|
|
"process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u "
|
|
"ddp_report 0x%x offset %u, len %u",
|
|
tp->rcv_nxt, bsp->cur_offset, ddp_report,
|
|
G_DDP_OFFSET(ddp_report), m->m_len);
|
|
|
|
bsp->cur_offset += m->m_len;
|
|
|
|
if (!(bsp->flags & DDP_BF_NOFLIP)) {
|
|
q->cur_buf ^= 1; /* flip buffers */
|
|
if (G_DDP_OFFSET(ddp_report) < q->kbuf[0]->dgl_length)
|
|
nomoredata=1;
|
|
}
|
|
|
|
CTR4(KTR_TOM,
|
|
"process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u "
|
|
"ddp_report %u offset %u",
|
|
tp->rcv_nxt, bsp->cur_offset, ddp_report,
|
|
G_DDP_OFFSET(ddp_report));
|
|
|
|
m->m_ddp_gl = (unsigned char *)bsp->gl;
|
|
m->m_flags |= M_DDP;
|
|
m->m_ddp_flags = (bsp->flags & DDP_BF_NOCOPY) | 1;
|
|
if (bsp->flags & DDP_BF_NOCOPY)
|
|
bsp->flags &= ~DDP_BF_NOCOPY;
|
|
if (nomoredata)
|
|
m->m_ddp_flags |= DDP_BF_NODATA;
|
|
|
|
|
|
SBAPPEND(&so->so_rcv, m);
|
|
|
|
if ((so->so_state & SS_NOFDREF) == 0)
|
|
sorwakeup_locked(so);
|
|
else
|
|
SOCKBUF_UNLOCK(&so->so_rcv);
|
|
}
|
|
|
|
/*
|
|
* Handler for RX_DDP_COMPLETE CPL messages.
|
|
*/
|
|
static int
|
|
do_rx_ddp_complete(struct t3cdev *cdev, struct mbuf *m, void *ctx)
|
|
{
|
|
struct toepcb *toep = ctx;
|
|
|
|
VALIDATE_SOCK(so);
|
|
#if 0
|
|
skb->h.th = tcphdr_skb->h.th;
|
|
#endif
|
|
process_ddp_complete(toep, m);
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Move a socket to TIME_WAIT state. We need to make some adjustments to the
|
|
* socket state before calling tcp_time_wait to comply with its expectations.
|
|
*/
|
|
static void
|
|
enter_timewait(struct socket *so)
|
|
{
|
|
struct tcpcb *tp = sototcpcb(so);
|
|
|
|
INP_LOCK_ASSERT(tp->t_inpcb);
|
|
/*
|
|
* Bump rcv_nxt for the peer FIN. We don't do this at the time we
|
|
* process peer_close because we don't want to carry the peer FIN in
|
|
* the socket's receive queue and if we increment rcv_nxt without
|
|
* having the FIN in the receive queue we'll confuse facilities such
|
|
* as SIOCINQ.
|
|
*/
|
|
tp->rcv_nxt++;
|
|
|
|
tp->ts_recent_age = 0; /* defeat recycling */
|
|
tp->t_srtt = 0; /* defeat tcp_update_metrics */
|
|
tcp_twstart(tp);
|
|
}
|
|
|
|
/*
|
|
* For TCP DDP a PEER_CLOSE may also be an implicit RX_DDP_COMPLETE. This
|
|
* function deals with the data that may be reported along with the FIN.
|
|
* Returns -1 if no further processing of the PEER_CLOSE is needed, >= 0 to
|
|
* perform normal FIN-related processing. In the latter case 1 indicates that
|
|
* there was an implicit RX_DDP_COMPLETE and the skb should not be freed, 0 the
|
|
* skb can be freed.
|
|
*/
|
|
static int
|
|
handle_peer_close_data(struct socket *so, struct mbuf *m)
|
|
{
|
|
struct tcpcb *tp = sototcpcb(so);
|
|
struct toepcb *toep = tp->t_toe;
|
|
struct ddp_state *q;
|
|
struct ddp_buf_state *bsp;
|
|
struct cpl_peer_close *req = cplhdr(m);
|
|
unsigned int rcv_nxt = ntohl(req->rcv_nxt) - 1; /* exclude FIN */
|
|
|
|
if (tp->rcv_nxt == rcv_nxt) /* no data */
|
|
return (0);
|
|
|
|
if (__predict_false(so_no_receive(so))) {
|
|
handle_excess_rx(toep, m);
|
|
|
|
/*
|
|
* Although we discard the data we want to process the FIN so
|
|
* that PEER_CLOSE + data behaves the same as RX_DATA_DDP +
|
|
* PEER_CLOSE without data. In particular this PEER_CLOSE
|
|
* may be what will close the connection. We return 1 because
|
|
* handle_excess_rx() already freed the packet.
|
|
*/
|
|
return (1);
|
|
}
|
|
|
|
INP_LOCK_ASSERT(tp->t_inpcb);
|
|
q = &toep->tp_ddp_state;
|
|
SOCKBUF_LOCK(&so->so_rcv);
|
|
bsp = &q->buf_state[q->cur_buf];
|
|
m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt;
|
|
KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len));
|
|
m->m_ddp_gl = (unsigned char *)bsp->gl;
|
|
m->m_flags |= M_DDP;
|
|
m->m_cur_offset = bsp->cur_offset;
|
|
m->m_ddp_flags =
|
|
DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1;
|
|
m->m_seq = tp->rcv_nxt;
|
|
tp->rcv_nxt = rcv_nxt;
|
|
bsp->cur_offset += m->m_pkthdr.len;
|
|
if (!(bsp->flags & DDP_BF_NOFLIP))
|
|
q->cur_buf ^= 1;
|
|
#ifdef notyet
|
|
skb_reset_transport_header(skb);
|
|
tcp_hdr(skb)->fin = 0; /* changes valid memory past CPL */
|
|
#endif
|
|
tp->t_rcvtime = ticks;
|
|
SBAPPEND(&so->so_rcv, m);
|
|
if (__predict_true((so->so_state & SS_NOFDREF) == 0))
|
|
sorwakeup_locked(so);
|
|
else
|
|
SOCKBUF_UNLOCK(&so->so_rcv);
|
|
return (1);
|
|
}
|
|
|
|
/*
|
|
* Handle a peer FIN.
|
|
*/
|
|
static void
|
|
do_peer_fin(struct socket *so, struct mbuf *m)
|
|
{
|
|
struct tcpcb *tp = sototcpcb(so);
|
|
struct toepcb *toep = tp->t_toe;
|
|
int keep = 0;
|
|
DPRINTF("do_peer_fin state=%d\n", tp->t_state);
|
|
|
|
#ifdef T3_TRACE
|
|
T3_TRACE0(TIDTB(sk),"do_peer_fin:");
|
|
#endif
|
|
|
|
if (!is_t3a(TOE_DEV(so)) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) {
|
|
printf("abort_pending set\n");
|
|
|
|
goto out;
|
|
}
|
|
INP_INFO_WLOCK(&tcbinfo);
|
|
INP_LOCK(tp->t_inpcb);
|
|
if (toep->tp_ulp_mode == ULP_MODE_TCPDDP) {
|
|
keep = handle_peer_close_data(so, m);
|
|
if (keep < 0) {
|
|
INP_INFO_WUNLOCK(&tcbinfo);
|
|
INP_UNLOCK(tp->t_inpcb);
|
|
return;
|
|
}
|
|
}
|
|
if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
|
|
socantrcvmore(so);
|
|
/*
|
|
* If connection is half-synchronized
|
|
* (ie NEEDSYN flag on) then delay ACK,
|
|
* so it may be piggybacked when SYN is sent.
|
|
* Otherwise, since we received a FIN then no
|
|
* more input can be expected, send ACK now.
|
|
*/
|
|
if (tp->t_flags & TF_NEEDSYN)
|
|
tp->t_flags |= TF_DELACK;
|
|
else
|
|
tp->t_flags |= TF_ACKNOW;
|
|
tp->rcv_nxt++;
|
|
}
|
|
|
|
switch (tp->t_state) {
|
|
case TCPS_SYN_RECEIVED:
|
|
tp->t_starttime = ticks;
|
|
/* FALLTHROUGH */
|
|
case TCPS_ESTABLISHED:
|
|
tp->t_state = TCPS_CLOSE_WAIT;
|
|
break;
|
|
case TCPS_FIN_WAIT_1:
|
|
tp->t_state = TCPS_CLOSING;
|
|
break;
|
|
case TCPS_FIN_WAIT_2:
|
|
/*
|
|
* If we've sent an abort_req we must have sent it too late,
|
|
* HW will send us a reply telling us so, and this peer_close
|
|
* is really the last message for this connection and needs to
|
|
* be treated as an abort_rpl, i.e., transition the connection
|
|
* to TCP_CLOSE (note that the host stack does this at the
|
|
* time of generating the RST but we must wait for HW).
|
|
* Otherwise we enter TIME_WAIT.
|
|
*/
|
|
t3_release_offload_resources(toep);
|
|
if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
|
|
tp = tcp_close(tp);
|
|
} else {
|
|
enter_timewait(so);
|
|
}
|
|
break;
|
|
default:
|
|
log(LOG_ERR,
|
|
"%s: TID %u received PEER_CLOSE in bad state %d\n",
|
|
TOE_DEV(so)->tod_name, toep->tp_tid, tp->t_state);
|
|
}
|
|
INP_INFO_WUNLOCK(&tcbinfo);
|
|
if (tp)
|
|
INP_UNLOCK(tp->t_inpcb);
|
|
|
|
DPRINTF("waking up waiters on %p rcv_notify=%d flags=0x%x\n", so, sb_notify(&so->so_rcv), so->so_rcv.sb_flags);
|
|
|
|
#ifdef notyet
|
|
/* Do not send POLL_HUP for half duplex close. */
|
|
if ((sk->sk_shutdown & SEND_SHUTDOWN) ||
|
|
sk->sk_state == TCP_CLOSE)
|
|
sk_wake_async(so, 1, POLL_HUP);
|
|
else
|
|
sk_wake_async(so, 1, POLL_IN);
|
|
#endif
|
|
|
|
out:
|
|
if (!keep)
|
|
m_free(m);
|
|
}
|
|
|
|
/*
|
|
* Handler for PEER_CLOSE CPL messages.
|
|
*/
|
|
static int
|
|
do_peer_close(struct t3cdev *cdev, struct mbuf *m, void *ctx)
|
|
{
|
|
struct toepcb *toep = (struct toepcb *)ctx;
|
|
struct socket *so = toeptoso(toep);
|
|
|
|
VALIDATE_SOCK(so);
|
|
|
|
do_peer_fin(so, m);
|
|
return (0);
|
|
}
|
|
|
|
static void
|
|
process_close_con_rpl(struct socket *so, struct mbuf *m)
|
|
{
|
|
struct tcpcb *tp = sototcpcb(so);
|
|
struct cpl_close_con_rpl *rpl = cplhdr(m);
|
|
struct toepcb *toep = tp->t_toe;
|
|
|
|
tp->snd_una = ntohl(rpl->snd_nxt) - 1; /* exclude FIN */
|
|
|
|
DPRINTF("process_close_con_rpl(%p) state=%d dead=%d\n", so, tp->t_state,
|
|
!!(so->so_state & SS_NOFDREF));
|
|
if (!is_t3a(TOE_DEV(so)) && (toep->tp_flags & TP_ABORT_RPL_PENDING))
|
|
goto out;
|
|
|
|
INP_INFO_WLOCK(&tcbinfo);
|
|
INP_LOCK(tp->t_inpcb);
|
|
switch (tp->t_state) {
|
|
case TCPS_CLOSING: /* see FIN_WAIT2 case in do_peer_fin */
|
|
t3_release_offload_resources(toep);
|
|
if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
|
|
tp = tcp_close(tp);
|
|
|
|
} else {
|
|
enter_timewait(so);
|
|
soisdisconnected(so);
|
|
}
|
|
break;
|
|
case TCPS_LAST_ACK:
|
|
/*
|
|
* In this state we don't care about pending abort_rpl.
|
|
* If we've sent abort_req it was post-close and was sent too
|
|
* late, this close_con_rpl is the actual last message.
|
|
*/
|
|
t3_release_offload_resources(toep);
|
|
tp = tcp_close(tp);
|
|
break;
|
|
case TCPS_FIN_WAIT_1:
|
|
/*
|
|
* If we can't receive any more
|
|
* data, then closing user can proceed.
|
|
* Starting the timer is contrary to the
|
|
* specification, but if we don't get a FIN
|
|
* we'll hang forever.
|
|
*
|
|
* XXXjl:
|
|
* we should release the tp also, and use a
|
|
* compressed state.
|
|
*/
|
|
if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
|
|
int timeout;
|
|
|
|
soisdisconnected(so);
|
|
timeout = (tcp_fast_finwait2_recycle) ?
|
|
tcp_finwait2_timeout : tcp_maxidle;
|
|
tcp_timer_activate(tp, TT_2MSL, timeout);
|
|
}
|
|
tp->t_state = TCPS_FIN_WAIT_2;
|
|
if ((so->so_options & SO_LINGER) && so->so_linger == 0 &&
|
|
(toep->tp_flags & TP_ABORT_SHUTDOWN) == 0) {
|
|
tp = tcp_drop(tp, 0);
|
|
}
|
|
|
|
break;
|
|
default:
|
|
log(LOG_ERR,
|
|
"%s: TID %u received CLOSE_CON_RPL in bad state %d\n",
|
|
TOE_DEV(so)->tod_name, toep->tp_tid,
|
|
tp->t_state);
|
|
}
|
|
INP_INFO_WUNLOCK(&tcbinfo);
|
|
if (tp)
|
|
INP_UNLOCK(tp->t_inpcb);
|
|
out:
|
|
m_freem(m);
|
|
}
|
|
|
|
/*
|
|
* Handler for CLOSE_CON_RPL CPL messages.
|
|
*/
|
|
static int
|
|
do_close_con_rpl(struct t3cdev *cdev, struct mbuf *m,
|
|
void *ctx)
|
|
{
|
|
struct toepcb *toep = (struct toepcb *)ctx;
|
|
struct socket *so = toeptoso(toep);
|
|
|
|
VALIDATE_SOCK(so);
|
|
|
|
process_close_con_rpl(so, m);
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Process abort replies. We only process these messages if we anticipate
|
|
* them as the coordination between SW and HW in this area is somewhat lacking
|
|
* and sometimes we get ABORT_RPLs after we are done with the connection that
|
|
* originated the ABORT_REQ.
|
|
*/
|
|
static void
|
|
process_abort_rpl(struct socket *so, struct mbuf *m)
|
|
{
|
|
struct tcpcb *tp = sototcpcb(so);
|
|
struct toepcb *toep = tp->t_toe;
|
|
|
|
#ifdef T3_TRACE
|
|
T3_TRACE1(TIDTB(sk),
|
|
"process_abort_rpl: GTS rpl pending %d",
|
|
sock_flag(sk, ABORT_RPL_PENDING));
|
|
#endif
|
|
|
|
INP_INFO_WLOCK(&tcbinfo);
|
|
INP_LOCK(tp->t_inpcb);
|
|
|
|
if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
|
|
/*
|
|
* XXX panic on tcpdrop
|
|
*/
|
|
if (!(toep->tp_flags & TP_ABORT_RPL_RCVD) && !is_t3a(TOE_DEV(so)))
|
|
toep->tp_flags |= TP_ABORT_RPL_RCVD;
|
|
else {
|
|
toep->tp_flags &= ~(TP_ABORT_RPL_RCVD|TP_ABORT_RPL_PENDING);
|
|
if (!(toep->tp_flags & TP_ABORT_REQ_RCVD) ||
|
|
!is_t3a(TOE_DEV(so))) {
|
|
if (toep->tp_flags & TP_ABORT_REQ_RCVD)
|
|
panic("TP_ABORT_REQ_RCVD set");
|
|
t3_release_offload_resources(toep);
|
|
tp = tcp_close(tp);
|
|
}
|
|
}
|
|
}
|
|
if (tp)
|
|
INP_UNLOCK(tp->t_inpcb);
|
|
INP_INFO_WUNLOCK(&tcbinfo);
|
|
|
|
m_free(m);
|
|
}
|
|
|
|
/*
|
|
* Handle an ABORT_RPL_RSS CPL message.
|
|
*/
|
|
static int
|
|
do_abort_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
|
|
{
|
|
struct socket *so;
|
|
struct cpl_abort_rpl_rss *rpl = cplhdr(m);
|
|
struct toepcb *toep;
|
|
|
|
/*
|
|
* Ignore replies to post-close aborts indicating that the abort was
|
|
* requested too late. These connections are terminated when we get
|
|
* PEER_CLOSE or CLOSE_CON_RPL and by the time the abort_rpl_rss
|
|
* arrives the TID is either no longer used or it has been recycled.
|
|
*/
|
|
if (rpl->status == CPL_ERR_ABORT_FAILED) {
|
|
discard:
|
|
m_free(m);
|
|
return (0);
|
|
}
|
|
|
|
toep = (struct toepcb *)ctx;
|
|
|
|
/*
|
|
* Sometimes we've already closed the socket, e.g., a post-close
|
|
* abort races with ABORT_REQ_RSS, the latter frees the socket
|
|
* expecting the ABORT_REQ will fail with CPL_ERR_ABORT_FAILED,
|
|
* but FW turns the ABORT_REQ into a regular one and so we get
|
|
* ABORT_RPL_RSS with status 0 and no socket. Only on T3A.
|
|
*/
|
|
if (!toep)
|
|
goto discard;
|
|
|
|
if (toep->tp_tp == NULL) {
|
|
printf("removing tid for abort\n");
|
|
cxgb_remove_tid(cdev, toep, toep->tp_tid);
|
|
if (toep->tp_l2t)
|
|
l2t_release(L2DATA(cdev), toep->tp_l2t);
|
|
|
|
toepcb_release(toep);
|
|
goto discard;
|
|
}
|
|
|
|
printf("toep=%p\n", toep);
|
|
printf("tp=%p\n", toep->tp_tp);
|
|
|
|
so = toeptoso(toep); /* <- XXX panic */
|
|
toepcb_hold(toep);
|
|
process_abort_rpl(so, m);
|
|
toepcb_release(toep);
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Convert the status code of an ABORT_REQ into a FreeBSD error code. Also
|
|
* indicate whether RST should be sent in response.
|
|
*/
|
|
static int
|
|
abort_status_to_errno(struct socket *so, int abort_reason, int *need_rst)
|
|
{
|
|
struct tcpcb *tp = sototcpcb(so);
|
|
|
|
switch (abort_reason) {
|
|
case CPL_ERR_BAD_SYN:
|
|
#if 0
|
|
NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN); // fall through
|
|
#endif
|
|
case CPL_ERR_CONN_RESET:
|
|
// XXX need to handle SYN_RECV due to crossed SYNs
|
|
return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET);
|
|
case CPL_ERR_XMIT_TIMEDOUT:
|
|
case CPL_ERR_PERSIST_TIMEDOUT:
|
|
case CPL_ERR_FINWAIT2_TIMEDOUT:
|
|
case CPL_ERR_KEEPALIVE_TIMEDOUT:
|
|
#if 0
|
|
NET_INC_STATS_BH(LINUX_MIB_TCPABORTONTIMEOUT);
|
|
#endif
|
|
return (ETIMEDOUT);
|
|
default:
|
|
return (EIO);
|
|
}
|
|
}
|
|
|
|
static inline void
|
|
set_abort_rpl_wr(struct mbuf *m, unsigned int tid, int cmd)
|
|
{
|
|
struct cpl_abort_rpl *rpl = cplhdr(m);
|
|
|
|
rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
|
|
rpl->wr.wr_lo = htonl(V_WR_TID(tid));
|
|
m->m_len = m->m_pkthdr.len = sizeof(*rpl);
|
|
|
|
OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, tid));
|
|
rpl->cmd = cmd;
|
|
}
|
|
|
|
static void
|
|
send_deferred_abort_rpl(struct toedev *tdev, struct mbuf *m)
|
|
{
|
|
struct mbuf *reply_mbuf;
|
|
struct cpl_abort_req_rss *req = cplhdr(m);
|
|
|
|
reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_abort_rpl));
|
|
m_set_priority(m, CPL_PRIORITY_DATA);
|
|
m->m_len = m->m_pkthdr.len = sizeof(struct cpl_abort_rpl);
|
|
set_abort_rpl_wr(reply_mbuf, GET_TID(req), req->status);
|
|
cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf);
|
|
m_free(m);
|
|
}
|
|
|
|
/*
|
|
* Returns whether an ABORT_REQ_RSS message is a negative advice.
|
|
*/
|
|
static inline int
|
|
is_neg_adv_abort(unsigned int status)
|
|
{
|
|
return status == CPL_ERR_RTX_NEG_ADVICE ||
|
|
status == CPL_ERR_PERSIST_NEG_ADVICE;
|
|
}
|
|
|
|
static void
|
|
send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status)
|
|
{
|
|
struct mbuf *reply_mbuf;
|
|
struct cpl_abort_req_rss *req = cplhdr(m);
|
|
|
|
reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
|
|
|
|
if (!reply_mbuf) {
|
|
/* Defer the reply. Stick rst_status into req->cmd. */
|
|
req->status = rst_status;
|
|
t3_defer_reply(m, tdev, send_deferred_abort_rpl);
|
|
return;
|
|
}
|
|
|
|
m_set_priority(reply_mbuf, CPL_PRIORITY_DATA);
|
|
set_abort_rpl_wr(reply_mbuf, GET_TID(req), rst_status);
|
|
m_free(m);
|
|
|
|
/*
|
|
* XXX need to sync with ARP as for SYN_RECV connections we can send
|
|
* these messages while ARP is pending. For other connection states
|
|
* it's not a problem.
|
|
*/
|
|
cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf);
|
|
}
|
|
|
|
#ifdef notyet
|
|
static void
|
|
cleanup_syn_rcv_conn(struct socket *child, struct socket *parent)
|
|
{
|
|
CXGB_UNIMPLEMENTED();
|
|
#ifdef notyet
|
|
struct request_sock *req = child->sk_user_data;
|
|
|
|
inet_csk_reqsk_queue_removed(parent, req);
|
|
synq_remove(tcp_sk(child));
|
|
__reqsk_free(req);
|
|
child->sk_user_data = NULL;
|
|
#endif
|
|
}
|
|
|
|
|
|
/*
|
|
* Performs the actual work to abort a SYN_RECV connection.
|
|
*/
|
|
static void
|
|
do_abort_syn_rcv(struct socket *child, struct socket *parent)
|
|
{
|
|
struct tcpcb *parenttp = sototcpcb(parent);
|
|
struct tcpcb *childtp = sototcpcb(child);
|
|
|
|
/*
|
|
* If the server is still open we clean up the child connection,
|
|
* otherwise the server already did the clean up as it was purging
|
|
* its SYN queue and the skb was just sitting in its backlog.
|
|
*/
|
|
if (__predict_false(parenttp->t_state == TCPS_LISTEN)) {
|
|
cleanup_syn_rcv_conn(child, parent);
|
|
INP_INFO_WLOCK(&tcbinfo);
|
|
INP_LOCK(childtp->t_inpcb);
|
|
t3_release_offload_resources(childtp->t_toe);
|
|
childtp = tcp_close(childtp);
|
|
INP_INFO_WUNLOCK(&tcbinfo);
|
|
if (childtp)
|
|
INP_UNLOCK(childtp->t_inpcb);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Handle abort requests for a SYN_RECV connection. These need extra work
|
|
* because the socket is on its parent's SYN queue.
|
|
*/
|
|
static int
|
|
abort_syn_rcv(struct socket *so, struct mbuf *m)
|
|
{
|
|
CXGB_UNIMPLEMENTED();
|
|
#ifdef notyet
|
|
struct socket *parent;
|
|
struct toedev *tdev = TOE_DEV(so);
|
|
struct t3cdev *cdev = TOM_DATA(tdev)->cdev;
|
|
struct socket *oreq = so->so_incomp;
|
|
struct t3c_tid_entry *t3c_stid;
|
|
struct tid_info *t;
|
|
|
|
if (!oreq)
|
|
return -1; /* somehow we are not on the SYN queue */
|
|
|
|
t = &(T3C_DATA(cdev))->tid_maps;
|
|
t3c_stid = lookup_stid(t, oreq->ts_recent);
|
|
parent = ((struct listen_ctx *)t3c_stid->ctx)->lso;
|
|
|
|
SOCK_LOCK(parent);
|
|
do_abort_syn_rcv(so, parent);
|
|
send_abort_rpl(m, tdev, CPL_ABORT_NO_RST);
|
|
SOCK_UNLOCK(parent);
|
|
#endif
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Process abort requests. If we are waiting for an ABORT_RPL we ignore this
|
|
* request except that we need to reply to it.
|
|
*/
|
|
static void
|
|
process_abort_req(struct socket *so, struct mbuf *m, struct toedev *tdev)
|
|
{
|
|
int rst_status = CPL_ABORT_NO_RST;
|
|
const struct cpl_abort_req_rss *req = cplhdr(m);
|
|
struct tcpcb *tp = sototcpcb(so);
|
|
struct toepcb *toep = tp->t_toe;
|
|
|
|
INP_LOCK(tp->t_inpcb);
|
|
if ((toep->tp_flags & TP_ABORT_REQ_RCVD) == 0) {
|
|
toep->tp_flags |= (TP_ABORT_REQ_RCVD|TP_ABORT_SHUTDOWN);
|
|
m_free(m);
|
|
goto skip;
|
|
}
|
|
|
|
toep->tp_flags &= ~TP_ABORT_REQ_RCVD;
|
|
/*
|
|
* Three cases to consider:
|
|
* a) We haven't sent an abort_req; close the connection.
|
|
* b) We have sent a post-close abort_req that will get to TP too late
|
|
* and will generate a CPL_ERR_ABORT_FAILED reply. The reply will
|
|
* be ignored and the connection should be closed now.
|
|
* c) We have sent a regular abort_req that will get to TP too late.
|
|
* That will generate an abort_rpl with status 0, wait for it.
|
|
*/
|
|
if (((toep->tp_flags & TP_ABORT_RPL_PENDING) == 0) ||
|
|
(is_t3a(TOE_DEV(so)) && (toep->tp_flags & TP_CLOSE_CON_REQUESTED))) {
|
|
so->so_error = abort_status_to_errno(so, req->status,
|
|
&rst_status);
|
|
if (__predict_true((so->so_state & SS_NOFDREF) == 0))
|
|
sorwakeup(so);
|
|
/*
|
|
* SYN_RECV needs special processing. If abort_syn_rcv()
|
|
* returns 0 is has taken care of the abort.
|
|
*/
|
|
if ((tp->t_state == TCPS_SYN_RECEIVED) && !abort_syn_rcv(so, m))
|
|
goto skip;
|
|
|
|
t3_release_offload_resources(toep);
|
|
tp = tcp_close(tp);
|
|
}
|
|
if (tp)
|
|
INP_UNLOCK(tp->t_inpcb);
|
|
send_abort_rpl(m, tdev, rst_status);
|
|
return;
|
|
|
|
skip:
|
|
INP_UNLOCK(tp->t_inpcb);
|
|
}
|
|
|
|
/*
|
|
* Handle an ABORT_REQ_RSS CPL message.
|
|
*/
|
|
static int
|
|
do_abort_req(struct t3cdev *cdev, struct mbuf *m, void *ctx)
|
|
{
|
|
const struct cpl_abort_req_rss *req = cplhdr(m);
|
|
struct toepcb *toep = (struct toepcb *)ctx;
|
|
struct socket *so;
|
|
struct inpcb *inp;
|
|
|
|
if (is_neg_adv_abort(req->status)) {
|
|
m_free(m);
|
|
return (0);
|
|
}
|
|
|
|
printf("aborting tid=%d\n", toep->tp_tid);
|
|
|
|
if ((toep->tp_flags & (TP_SYN_RCVD|TP_ABORT_REQ_RCVD)) == TP_SYN_RCVD) {
|
|
cxgb_remove_tid(cdev, toep, toep->tp_tid);
|
|
toep->tp_flags |= TP_ABORT_REQ_RCVD;
|
|
printf("sending abort rpl\n");
|
|
|
|
send_abort_rpl(m, toep->tp_toedev, CPL_ABORT_NO_RST);
|
|
printf("sent\n");
|
|
if (toep->tp_l2t)
|
|
l2t_release(L2DATA(cdev), toep->tp_l2t);
|
|
|
|
/*
|
|
* Unhook
|
|
*/
|
|
toep->tp_tp->t_toe = NULL;
|
|
toep->tp_tp->t_flags &= ~TF_TOE;
|
|
toep->tp_tp = NULL;
|
|
/*
|
|
* XXX need to call syncache_chkrst - but we don't
|
|
* have a way of doing that yet
|
|
*/
|
|
toepcb_release(toep);
|
|
printf("abort for unestablished connection :-(\n");
|
|
return (0);
|
|
}
|
|
if (toep->tp_tp == NULL) {
|
|
printf("disconnected toepcb\n");
|
|
/* should be freed momentarily */
|
|
return (0);
|
|
}
|
|
|
|
so = toeptoso(toep);
|
|
inp = sotoinpcb(so);
|
|
|
|
VALIDATE_SOCK(so);
|
|
toepcb_hold(toep);
|
|
INP_INFO_WLOCK(&tcbinfo);
|
|
process_abort_req(so, m, TOE_DEV(so));
|
|
INP_INFO_WUNLOCK(&tcbinfo);
|
|
toepcb_release(toep);
|
|
return (0);
|
|
}
|
|
#ifdef notyet
|
|
static void
|
|
pass_open_abort(struct socket *child, struct socket *parent, struct mbuf *m)
|
|
{
|
|
struct toedev *tdev = TOE_DEV(parent);
|
|
|
|
do_abort_syn_rcv(child, parent);
|
|
if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) {
|
|
struct cpl_pass_accept_rpl *rpl = cplhdr(m);
|
|
|
|
rpl->opt0h = htonl(F_TCAM_BYPASS);
|
|
rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT);
|
|
cxgb_ofld_send(TOM_DATA(tdev)->cdev, m);
|
|
} else
|
|
m_free(m);
|
|
}
|
|
#endif
|
|
static void
|
|
handle_pass_open_arp_failure(struct socket *so, struct mbuf *m)
|
|
{
|
|
CXGB_UNIMPLEMENTED();
|
|
|
|
#ifdef notyet
|
|
struct t3cdev *cdev;
|
|
struct socket *parent;
|
|
struct socket *oreq;
|
|
struct t3c_tid_entry *t3c_stid;
|
|
struct tid_info *t;
|
|
struct tcpcb *otp, *tp = sototcpcb(so);
|
|
struct toepcb *toep = tp->t_toe;
|
|
|
|
/*
|
|
* If the connection is being aborted due to the parent listening
|
|
* socket going away there's nothing to do, the ABORT_REQ will close
|
|
* the connection.
|
|
*/
|
|
if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
|
|
m_free(m);
|
|
return;
|
|
}
|
|
|
|
oreq = so->so_incomp;
|
|
otp = sototcpcb(oreq);
|
|
|
|
cdev = T3C_DEV(so);
|
|
t = &(T3C_DATA(cdev))->tid_maps;
|
|
t3c_stid = lookup_stid(t, otp->ts_recent);
|
|
parent = ((struct listen_ctx *)t3c_stid->ctx)->lso;
|
|
|
|
SOCK_LOCK(parent);
|
|
pass_open_abort(so, parent, m);
|
|
SOCK_UNLOCK(parent);
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* Handle an ARP failure for a CPL_PASS_ACCEPT_RPL. This is treated similarly
|
|
* to an ABORT_REQ_RSS in SYN_RECV as both events need to tear down a SYN_RECV
|
|
* connection.
|
|
*/
|
|
static void
|
|
pass_accept_rpl_arp_failure(struct t3cdev *cdev, struct mbuf *m)
|
|
{
|
|
|
|
#ifdef notyet
|
|
TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
|
|
BLOG_SKB_CB(skb)->dev = TOE_DEV(skb->sk);
|
|
#endif
|
|
handle_pass_open_arp_failure(m_get_socket(m), m);
|
|
}
|
|
|
|
/*
|
|
* Populate a reject CPL_PASS_ACCEPT_RPL WR.
|
|
*/
|
|
static void
|
|
mk_pass_accept_rpl(struct mbuf *reply_mbuf, struct mbuf *req_mbuf)
|
|
{
|
|
struct cpl_pass_accept_req *req = cplhdr(req_mbuf);
|
|
struct cpl_pass_accept_rpl *rpl = cplhdr(reply_mbuf);
|
|
unsigned int tid = GET_TID(req);
|
|
|
|
m_set_priority(reply_mbuf, CPL_PRIORITY_SETUP);
|
|
rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
|
|
OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid));
|
|
rpl->peer_ip = req->peer_ip; // req->peer_ip not overwritten yet
|
|
rpl->opt0h = htonl(F_TCAM_BYPASS);
|
|
rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT);
|
|
rpl->opt2 = 0;
|
|
rpl->rsvd = rpl->opt2; /* workaround for HW bug */
|
|
}
|
|
|
|
/*
|
|
* Send a deferred reject to an accept request.
|
|
*/
|
|
static void
|
|
reject_pass_request(struct toedev *tdev, struct mbuf *m)
|
|
{
|
|
struct mbuf *reply_mbuf;
|
|
|
|
reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_pass_accept_rpl));
|
|
mk_pass_accept_rpl(reply_mbuf, m);
|
|
cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf);
|
|
m_free(m);
|
|
}
|
|
|
|
static void
|
|
handle_syncache_event(int event, void *arg)
|
|
{
|
|
struct toepcb *toep = arg;
|
|
|
|
switch (event) {
|
|
case TOE_SC_ENTRY_PRESENT:
|
|
/*
|
|
* entry already exists - free toepcb
|
|
* and l2t
|
|
*/
|
|
printf("syncache entry present\n");
|
|
toepcb_release(toep);
|
|
break;
|
|
case TOE_SC_DROP:
|
|
/*
|
|
* The syncache has given up on this entry
|
|
* either it timed out, or it was evicted
|
|
* we need to explicitly release the tid
|
|
*/
|
|
printf("syncache entry dropped\n");
|
|
toepcb_release(toep);
|
|
break;
|
|
default:
|
|
log(LOG_ERR, "unknown syncache event %d\n", event);
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void
|
|
syncache_add_accept_req(struct cpl_pass_accept_req *req, struct socket *lso, struct toepcb *toep)
|
|
{
|
|
struct in_conninfo inc;
|
|
struct tcpopt to;
|
|
struct tcphdr th;
|
|
struct inpcb *inp;
|
|
int mss, wsf, sack, ts;
|
|
uint32_t rcv_isn = ntohl(req->rcv_isn);
|
|
|
|
bzero(&to, sizeof(struct tcpopt));
|
|
inp = sotoinpcb(lso);
|
|
|
|
/*
|
|
* Fill out information for entering us into the syncache
|
|
*/
|
|
inc.inc_fport = th.th_sport = req->peer_port;
|
|
inc.inc_lport = th.th_dport = req->local_port;
|
|
th.th_seq = req->rcv_isn;
|
|
th.th_flags = TH_SYN;
|
|
|
|
toep->tp_iss = toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = rcv_isn + 1;
|
|
|
|
|
|
inc.inc_isipv6 = 0;
|
|
inc.inc_len = 0;
|
|
inc.inc_faddr.s_addr = req->peer_ip;
|
|
inc.inc_laddr.s_addr = req->local_ip;
|
|
|
|
DPRINTF("syncache add of %d:%d %d:%d\n",
|
|
ntohl(req->local_ip), ntohs(req->local_port),
|
|
ntohl(req->peer_ip), ntohs(req->peer_port));
|
|
|
|
mss = req->tcp_options.mss;
|
|
wsf = req->tcp_options.wsf;
|
|
ts = req->tcp_options.tstamp;
|
|
sack = req->tcp_options.sack;
|
|
to.to_mss = mss;
|
|
to.to_wscale = wsf;
|
|
to.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0);
|
|
INP_INFO_WLOCK(&tcbinfo);
|
|
INP_LOCK(inp);
|
|
syncache_offload_add(&inc, &to, &th, inp, &lso, &cxgb_toe_usrreqs, toep);
|
|
}
|
|
|
|
|
|
/*
|
|
* Process a CPL_PASS_ACCEPT_REQ message. Does the part that needs the socket
|
|
* lock held. Note that the sock here is a listening socket that is not owned
|
|
* by the TOE.
|
|
*/
|
|
static void
|
|
process_pass_accept_req(struct socket *so, struct mbuf *m, struct toedev *tdev,
|
|
struct listen_ctx *lctx)
|
|
{
|
|
int rt_flags;
|
|
struct l2t_entry *e;
|
|
struct iff_mac tim;
|
|
struct mbuf *reply_mbuf, *ddp_mbuf = NULL;
|
|
struct cpl_pass_accept_rpl *rpl;
|
|
struct cpl_pass_accept_req *req = cplhdr(m);
|
|
unsigned int tid = GET_TID(req);
|
|
struct tom_data *d = TOM_DATA(tdev);
|
|
struct t3cdev *cdev = d->cdev;
|
|
struct tcpcb *tp = sototcpcb(so);
|
|
struct toepcb *newtoep;
|
|
struct rtentry *dst;
|
|
struct sockaddr_in nam;
|
|
struct t3c_data *td = T3C_DATA(cdev);
|
|
|
|
reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
|
|
if (__predict_false(reply_mbuf == NULL)) {
|
|
if (tdev->tod_ttid == TOE_ID_CHELSIO_T3)
|
|
t3_defer_reply(m, tdev, reject_pass_request);
|
|
else {
|
|
cxgb_queue_tid_release(cdev, tid);
|
|
m_free(m);
|
|
}
|
|
DPRINTF("failed to get reply_mbuf\n");
|
|
|
|
goto out;
|
|
}
|
|
|
|
if (tp->t_state != TCPS_LISTEN) {
|
|
DPRINTF("socket not in listen state\n");
|
|
|
|
goto reject;
|
|
}
|
|
|
|
tim.mac_addr = req->dst_mac;
|
|
tim.vlan_tag = ntohs(req->vlan_tag);
|
|
if (cdev->ctl(cdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) {
|
|
DPRINTF("rejecting from failed GET_IFF_FROM_MAC\n");
|
|
goto reject;
|
|
}
|
|
|
|
#ifdef notyet
|
|
/*
|
|
* XXX do route lookup to confirm that we're still listening on this
|
|
* address
|
|
*/
|
|
if (ip_route_input(skb, req->local_ip, req->peer_ip,
|
|
G_PASS_OPEN_TOS(ntohl(req->tos_tid)), tim.dev))
|
|
goto reject;
|
|
rt_flags = ((struct rtable *)skb->dst)->rt_flags &
|
|
(RTCF_BROADCAST | RTCF_MULTICAST | RTCF_LOCAL);
|
|
dst_release(skb->dst); // done with the input route, release it
|
|
skb->dst = NULL;
|
|
|
|
if ((rt_flags & RTF_LOCAL) == 0)
|
|
goto reject;
|
|
#endif
|
|
/*
|
|
* XXX
|
|
*/
|
|
rt_flags = RTF_LOCAL;
|
|
if ((rt_flags & RTF_LOCAL) == 0)
|
|
goto reject;
|
|
|
|
/*
|
|
* Calculate values and add to syncache
|
|
*/
|
|
|
|
newtoep = toepcb_alloc();
|
|
if (newtoep == NULL)
|
|
goto reject;
|
|
|
|
bzero(&nam, sizeof(struct sockaddr_in));
|
|
|
|
nam.sin_len = sizeof(struct sockaddr_in);
|
|
nam.sin_family = AF_INET;
|
|
nam.sin_addr.s_addr =req->peer_ip;
|
|
dst = rtalloc2((struct sockaddr *)&nam, 1, 0);
|
|
|
|
if (dst == NULL) {
|
|
printf("failed to find route\n");
|
|
goto reject;
|
|
}
|
|
e = newtoep->tp_l2t = t3_l2t_get(d->cdev, dst, tim.dev,
|
|
(struct sockaddr *)&nam);
|
|
if (e == NULL) {
|
|
DPRINTF("failed to get l2t\n");
|
|
}
|
|
/*
|
|
* Point to our listen socket until accept
|
|
*/
|
|
newtoep->tp_tp = tp;
|
|
newtoep->tp_flags = TP_SYN_RCVD;
|
|
newtoep->tp_tid = tid;
|
|
newtoep->tp_toedev = tdev;
|
|
tp->rcv_wnd = select_rcv_wnd(tdev, so);
|
|
|
|
cxgb_insert_tid(cdev, d->client, newtoep, tid);
|
|
SOCK_LOCK(so);
|
|
LIST_INSERT_HEAD(&lctx->synq_head, newtoep, synq_entry);
|
|
SOCK_UNLOCK(so);
|
|
|
|
newtoep->tp_ulp_mode = TOM_TUNABLE(tdev, ddp) && !(so->so_options & SO_NO_DDP) &&
|
|
tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0;
|
|
|
|
if (newtoep->tp_ulp_mode) {
|
|
ddp_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
|
|
|
|
if (ddp_mbuf == NULL)
|
|
newtoep->tp_ulp_mode = 0;
|
|
}
|
|
|
|
CTR4(KTR_TOM, "ddp=%d rcv_wnd=%ld min_win=%d ulp_mode=%d",
|
|
TOM_TUNABLE(tdev, ddp), tp->rcv_wnd, MIN_DDP_RCV_WIN, newtoep->tp_ulp_mode);
|
|
set_arp_failure_handler(reply_mbuf, pass_accept_rpl_arp_failure);
|
|
/*
|
|
* XXX workaround for lack of syncache drop
|
|
*/
|
|
toepcb_hold(newtoep);
|
|
syncache_add_accept_req(req, so, newtoep);
|
|
|
|
rpl = cplhdr(reply_mbuf);
|
|
reply_mbuf->m_pkthdr.len = reply_mbuf->m_len = sizeof(*rpl);
|
|
rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
|
|
rpl->wr.wr_lo = 0;
|
|
OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid));
|
|
rpl->opt2 = htonl(calc_opt2(so, tdev));
|
|
rpl->rsvd = rpl->opt2; /* workaround for HW bug */
|
|
rpl->peer_ip = req->peer_ip; // req->peer_ip is not overwritten
|
|
|
|
rpl->opt0h = htonl(calc_opt0h(so, select_mss(td, NULL, dst->rt_ifp->if_mtu)) |
|
|
V_L2T_IDX(e->idx) | V_TX_CHANNEL(e->smt_idx));
|
|
rpl->opt0l_status = htonl(calc_opt0l(so, newtoep->tp_ulp_mode) |
|
|
CPL_PASS_OPEN_ACCEPT);
|
|
|
|
DPRINTF("opt0l_status=%08x\n", rpl->opt0l_status);
|
|
|
|
m_set_priority(reply_mbuf, mkprio(CPL_PRIORITY_SETUP, newtoep));
|
|
|
|
l2t_send(cdev, reply_mbuf, e);
|
|
m_free(m);
|
|
if (newtoep->tp_ulp_mode) {
|
|
__set_tcb_field(newtoep, ddp_mbuf, W_TCB_RX_DDP_FLAGS,
|
|
V_TF_DDP_OFF(1) |
|
|
TP_DDP_TIMER_WORKAROUND_MASK,
|
|
V_TF_DDP_OFF(1) |
|
|
TP_DDP_TIMER_WORKAROUND_VAL, 1);
|
|
} else
|
|
printf("not offloading\n");
|
|
|
|
|
|
|
|
return;
|
|
reject:
|
|
if (tdev->tod_ttid == TOE_ID_CHELSIO_T3)
|
|
mk_pass_accept_rpl(reply_mbuf, m);
|
|
else
|
|
mk_tid_release(reply_mbuf, newtoep, tid);
|
|
cxgb_ofld_send(cdev, reply_mbuf);
|
|
m_free(m);
|
|
out:
|
|
#if 0
|
|
TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
|
|
#else
|
|
return;
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* Handle a CPL_PASS_ACCEPT_REQ message.
|
|
*/
|
|
static int
|
|
do_pass_accept_req(struct t3cdev *cdev, struct mbuf *m, void *ctx)
|
|
{
|
|
struct listen_ctx *listen_ctx = (struct listen_ctx *)ctx;
|
|
struct socket *lso = listen_ctx->lso;
|
|
struct tom_data *d = listen_ctx->tom_data;
|
|
|
|
#if VALIDATE_TID
|
|
struct cpl_pass_accept_req *req = cplhdr(m);
|
|
unsigned int tid = GET_TID(req);
|
|
struct tid_info *t = &(T3C_DATA(cdev))->tid_maps;
|
|
|
|
if (unlikely(!lsk)) {
|
|
printk(KERN_ERR "%s: PASS_ACCEPT_REQ had unknown STID %lu\n",
|
|
cdev->name,
|
|
(unsigned long)((union listen_entry *)ctx -
|
|
t->stid_tab));
|
|
return CPL_RET_BUF_DONE;
|
|
}
|
|
if (unlikely(tid >= t->ntids)) {
|
|
printk(KERN_ERR "%s: passive open TID %u too large\n",
|
|
cdev->name, tid);
|
|
return CPL_RET_BUF_DONE;
|
|
}
|
|
/*
|
|
* For T3A the current user of the TID may have closed but its last
|
|
* message(s) may have been backlogged so the TID appears to be still
|
|
* in use. Just take the TID away, the connection can close at its
|
|
* own leisure. For T3B this situation is a bug.
|
|
*/
|
|
if (!valid_new_tid(t, tid) &&
|
|
cdev->type != T3A) {
|
|
printk(KERN_ERR "%s: passive open uses existing TID %u\n",
|
|
cdev->name, tid);
|
|
return CPL_RET_BUF_DONE;
|
|
}
|
|
#endif
|
|
|
|
process_pass_accept_req(lso, m, &d->tdev, listen_ctx);
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Called when a connection is established to translate the TCP options
|
|
* reported by HW to FreeBSD's native format.
|
|
*/
|
|
static void
|
|
assign_rxopt(struct socket *so, unsigned int opt)
|
|
{
|
|
const struct t3c_data *td = T3C_DATA(T3C_DEV(so));
|
|
struct tcpcb *tp = sototcpcb(so);
|
|
struct toepcb *toep = tp->t_toe;
|
|
|
|
INP_LOCK_ASSERT(tp->t_inpcb);
|
|
|
|
toep->tp_mss_clamp = td->mtus[G_TCPOPT_MSS(opt)] - 40;
|
|
tp->t_flags |= G_TCPOPT_TSTAMP(opt) ? TF_RCVD_TSTMP : 0;
|
|
tp->t_flags |= G_TCPOPT_SACK(opt) ? TF_SACK_PERMIT : 0;
|
|
tp->t_flags |= G_TCPOPT_WSCALE_OK(opt) ? TF_RCVD_SCALE : 0;
|
|
if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
|
|
(TF_RCVD_SCALE|TF_REQ_SCALE))
|
|
tp->rcv_scale = tp->request_r_scale;
|
|
}
|
|
|
|
/*
|
|
* Completes some final bits of initialization for just established connections
|
|
* and changes their state to TCP_ESTABLISHED.
|
|
*
|
|
* snd_isn here is the ISN after the SYN, i.e., the true ISN + 1.
|
|
*/
|
|
static void
|
|
make_established(struct socket *so, u32 snd_isn, unsigned int opt)
|
|
{
|
|
struct tcpcb *tp = sototcpcb(so);
|
|
struct toepcb *toep = tp->t_toe;
|
|
|
|
toep->tp_write_seq = tp->iss = tp->snd_max = tp->snd_nxt = tp->snd_una = snd_isn;
|
|
assign_rxopt(so, opt);
|
|
so->so_proto->pr_ctloutput = t3_ctloutput;
|
|
|
|
#if 0
|
|
inet_sk(sk)->id = tp->write_seq ^ jiffies;
|
|
#endif
|
|
/*
|
|
* XXX not clear what rcv_wup maps to
|
|
*/
|
|
/*
|
|
* Causes the first RX_DATA_ACK to supply any Rx credits we couldn't
|
|
* pass through opt0.
|
|
*/
|
|
if (tp->rcv_wnd > (M_RCV_BUFSIZ << 10))
|
|
toep->tp_rcv_wup -= tp->rcv_wnd - (M_RCV_BUFSIZ << 10);
|
|
|
|
dump_toepcb(toep);
|
|
|
|
#ifdef notyet
|
|
/*
|
|
* no clean interface for marking ARP up to date
|
|
*/
|
|
dst_confirm(sk->sk_dst_cache);
|
|
#endif
|
|
tp->t_starttime = ticks;
|
|
tp->t_state = TCPS_ESTABLISHED;
|
|
soisconnected(so);
|
|
}
|
|
|
|
static int
|
|
syncache_expand_establish_req(struct cpl_pass_establish *req, struct socket **so, struct toepcb *toep)
|
|
{
|
|
|
|
struct in_conninfo inc;
|
|
struct tcpopt to;
|
|
struct tcphdr th;
|
|
int mss, wsf, sack, ts;
|
|
struct mbuf *m = NULL;
|
|
const struct t3c_data *td = T3C_DATA(TOM_DATA(toep->tp_toedev)->cdev);
|
|
unsigned int opt;
|
|
|
|
#ifdef MAC
|
|
#error "no MAC support"
|
|
#endif
|
|
|
|
opt = ntohs(req->tcp_opt);
|
|
|
|
bzero(&to, sizeof(struct tcpopt));
|
|
|
|
/*
|
|
* Fill out information for entering us into the syncache
|
|
*/
|
|
inc.inc_fport = th.th_sport = req->peer_port;
|
|
inc.inc_lport = th.th_dport = req->local_port;
|
|
th.th_seq = req->rcv_isn;
|
|
th.th_flags = TH_ACK;
|
|
|
|
inc.inc_isipv6 = 0;
|
|
inc.inc_len = 0;
|
|
inc.inc_faddr.s_addr = req->peer_ip;
|
|
inc.inc_laddr.s_addr = req->local_ip;
|
|
|
|
mss = td->mtus[G_TCPOPT_MSS(opt)] - 40;
|
|
wsf = G_TCPOPT_WSCALE_OK(opt);
|
|
ts = G_TCPOPT_TSTAMP(opt);
|
|
sack = G_TCPOPT_SACK(opt);
|
|
|
|
to.to_mss = mss;
|
|
to.to_wscale = G_TCPOPT_SND_WSCALE(opt);
|
|
to.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0);
|
|
|
|
DPRINTF("syncache expand of %d:%d %d:%d mss:%d wsf:%d ts:%d sack:%d\n",
|
|
ntohl(req->local_ip), ntohs(req->local_port),
|
|
ntohl(req->peer_ip), ntohs(req->peer_port),
|
|
mss, wsf, ts, sack);
|
|
return syncache_expand(&inc, &to, &th, so, m);
|
|
}
|
|
|
|
|
|
/*
|
|
* Process a CPL_PASS_ESTABLISH message. XXX a lot of the locking doesn't work
|
|
* if we are in TCP_SYN_RECV due to crossed SYNs
|
|
*/
|
|
static int
|
|
do_pass_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx)
|
|
{
|
|
struct cpl_pass_establish *req = cplhdr(m);
|
|
struct toepcb *toep = (struct toepcb *)ctx;
|
|
struct tcpcb *tp;
|
|
struct socket *so, *lso;
|
|
struct t3c_data *td = T3C_DATA(cdev);
|
|
// Complete socket initialization now that we have the SND_ISN
|
|
|
|
struct toedev *tdev;
|
|
|
|
so = lso = toeptoso(toep);
|
|
tdev = toep->tp_toedev;
|
|
|
|
SOCK_LOCK(so);
|
|
LIST_REMOVE(toep, synq_entry);
|
|
SOCK_UNLOCK(so);
|
|
|
|
INP_INFO_WLOCK(&tcbinfo);
|
|
if (!syncache_expand_establish_req(req, &so, toep)) {
|
|
/*
|
|
* No entry
|
|
*/
|
|
CXGB_UNIMPLEMENTED();
|
|
}
|
|
if (so == NULL) {
|
|
/*
|
|
* Couldn't create the socket
|
|
*/
|
|
CXGB_UNIMPLEMENTED();
|
|
}
|
|
|
|
/*
|
|
* XXX workaround for lack of syncache drop
|
|
*/
|
|
toepcb_release(toep);
|
|
|
|
tp = sototcpcb(so);
|
|
INP_LOCK(tp->t_inpcb);
|
|
|
|
so->so_snd.sb_flags |= SB_NOCOALESCE;
|
|
so->so_rcv.sb_flags |= SB_NOCOALESCE;
|
|
|
|
toep->tp_tp = tp;
|
|
toep->tp_flags = 0;
|
|
tp->t_toe = toep;
|
|
reset_wr_list(toep);
|
|
tp->rcv_wnd = select_rcv_wnd(tdev, so);
|
|
tp->rcv_nxt = toep->tp_copied_seq;
|
|
install_offload_ops(so);
|
|
|
|
toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(tdev, max_wrs);
|
|
toep->tp_wr_unacked = 0;
|
|
toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data));
|
|
toep->tp_qset_idx = 0;
|
|
toep->tp_mtu_idx = select_mss(td, tp, toep->tp_l2t->neigh->rt_ifp->if_mtu);
|
|
|
|
/*
|
|
* XXX Cancel any keep alive timer
|
|
*/
|
|
|
|
make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt));
|
|
INP_INFO_WUNLOCK(&tcbinfo);
|
|
INP_UNLOCK(tp->t_inpcb);
|
|
|
|
CTR1(KTR_TOM, "do_pass_establish tid=%u", toep->tp_tid);
|
|
cxgb_log_tcb(cdev->adapter, toep->tp_tid);
|
|
#ifdef notyet
|
|
/*
|
|
* XXX not sure how these checks map to us
|
|
*/
|
|
if (unlikely(sk->sk_socket)) { // simultaneous opens only
|
|
sk->sk_state_change(sk);
|
|
sk_wake_async(so, 0, POLL_OUT);
|
|
}
|
|
/*
|
|
* The state for the new connection is now up to date.
|
|
* Next check if we should add the connection to the parent's
|
|
* accept queue. When the parent closes it resets connections
|
|
* on its SYN queue, so check if we are being reset. If so we
|
|
* don't need to do anything more, the coming ABORT_RPL will
|
|
* destroy this socket. Otherwise move the connection to the
|
|
* accept queue.
|
|
*
|
|
* Note that we reset the synq before closing the server so if
|
|
* we are not being reset the stid is still open.
|
|
*/
|
|
if (unlikely(!tp->forward_skb_hint)) { // removed from synq
|
|
__kfree_skb(skb);
|
|
goto unlock;
|
|
}
|
|
#endif
|
|
m_free(m);
|
|
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Fill in the right TID for CPL messages waiting in the out-of-order queue
|
|
* and send them to the TOE.
|
|
*/
|
|
static void
|
|
fixup_and_send_ofo(struct socket *so)
|
|
{
|
|
struct mbuf *m;
|
|
struct toedev *tdev = TOE_DEV(so);
|
|
struct tcpcb *tp = sototcpcb(so);
|
|
struct toepcb *toep = tp->t_toe;
|
|
unsigned int tid = toep->tp_tid;
|
|
|
|
printf("fixup_and_send_ofo\n");
|
|
|
|
INP_LOCK_ASSERT(tp->t_inpcb);
|
|
while ((m = mbufq_dequeue(&toep->out_of_order_queue)) != NULL) {
|
|
/*
|
|
* A variety of messages can be waiting but the fields we'll
|
|
* be touching are common to all so any message type will do.
|
|
*/
|
|
struct cpl_close_con_req *p = cplhdr(m);
|
|
|
|
p->wr.wr_lo = htonl(V_WR_TID(tid));
|
|
OPCODE_TID(p) = htonl(MK_OPCODE_TID(p->ot.opcode, tid));
|
|
cxgb_ofld_send(TOM_DATA(tdev)->cdev, m);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Updates socket state from an active establish CPL message. Runs with the
|
|
* socket lock held.
|
|
*/
|
|
static void
|
|
socket_act_establish(struct socket *so, struct mbuf *m)
|
|
{
|
|
struct cpl_act_establish *req = cplhdr(m);
|
|
u32 rcv_isn = ntohl(req->rcv_isn); /* real RCV_ISN + 1 */
|
|
struct tcpcb *tp = sototcpcb(so);
|
|
struct toepcb *toep = tp->t_toe;
|
|
|
|
if (__predict_false(tp->t_state != TCPS_SYN_SENT))
|
|
log(LOG_ERR, "TID %u expected SYN_SENT, found %d\n",
|
|
toep->tp_tid, tp->t_state);
|
|
|
|
tp->ts_recent_age = ticks;
|
|
tp->irs = tp->rcv_wnd = tp->rcv_nxt = rcv_isn;
|
|
toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = tp->irs;
|
|
|
|
make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt));
|
|
|
|
/*
|
|
* Now that we finally have a TID send any CPL messages that we had to
|
|
* defer for lack of a TID.
|
|
*/
|
|
if (mbufq_len(&toep->out_of_order_queue))
|
|
fixup_and_send_ofo(so);
|
|
|
|
if (__predict_false(so->so_state & SS_NOFDREF)) {
|
|
/*
|
|
* XXX does this even make sense?
|
|
*/
|
|
sorwakeup(so);
|
|
}
|
|
m_free(m);
|
|
#ifdef notyet
|
|
/*
|
|
* XXX assume no write requests permitted while socket connection is
|
|
* incomplete
|
|
*/
|
|
/*
|
|
* Currently the send queue must be empty at this point because the
|
|
* socket layer does not send anything before a connection is
|
|
* established. To be future proof though we handle the possibility
|
|
* that there are pending buffers to send (either TX_DATA or
|
|
* CLOSE_CON_REQ). First we need to adjust the sequence number of the
|
|
* buffers according to the just learned write_seq, and then we send
|
|
* them on their way.
|
|
*/
|
|
fixup_pending_writeq_buffers(sk);
|
|
if (t3_push_frames(so, 1))
|
|
sk->sk_write_space(sk);
|
|
#endif
|
|
|
|
toep->tp_state = tp->t_state;
|
|
tcpstat.tcps_connects++;
|
|
|
|
}
|
|
|
|
/*
|
|
* Process a CPL_ACT_ESTABLISH message.
|
|
*/
|
|
static int
|
|
do_act_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx)
|
|
{
|
|
struct cpl_act_establish *req = cplhdr(m);
|
|
unsigned int tid = GET_TID(req);
|
|
unsigned int atid = G_PASS_OPEN_TID(ntohl(req->tos_tid));
|
|
struct toepcb *toep = (struct toepcb *)ctx;
|
|
struct tcpcb *tp = toep->tp_tp;
|
|
struct socket *so;
|
|
struct toedev *tdev;
|
|
struct tom_data *d;
|
|
|
|
if (tp == NULL) {
|
|
free_atid(cdev, atid);
|
|
return (0);
|
|
}
|
|
|
|
so = toeptoso(toep);
|
|
tdev = TOE_DEV(so); /* blow up here if link was down */
|
|
d = TOM_DATA(tdev);
|
|
|
|
INP_LOCK(tp->t_inpcb);
|
|
|
|
/*
|
|
* It's OK if the TID is currently in use, the owning socket may have
|
|
* backlogged its last CPL message(s). Just take it away.
|
|
*/
|
|
toep->tp_tid = tid;
|
|
toep->tp_tp = tp;
|
|
so_insert_tid(d, so, tid);
|
|
free_atid(cdev, atid);
|
|
toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data));
|
|
|
|
socket_act_establish(so, m);
|
|
INP_UNLOCK(tp->t_inpcb);
|
|
CTR1(KTR_TOM, "do_act_establish tid=%u", toep->tp_tid);
|
|
cxgb_log_tcb(cdev->adapter, toep->tp_tid);
|
|
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Process an acknowledgment of WR completion. Advance snd_una and send the
|
|
* next batch of work requests from the write queue.
|
|
*/
|
|
static void
|
|
wr_ack(struct toepcb *toep, struct mbuf *m)
|
|
{
|
|
struct tcpcb *tp = toep->tp_tp;
|
|
struct cpl_wr_ack *hdr = cplhdr(m);
|
|
struct socket *so = toeptoso(toep);
|
|
unsigned int credits = ntohs(hdr->credits);
|
|
u32 snd_una = ntohl(hdr->snd_una);
|
|
int bytes = 0;
|
|
|
|
CTR2(KTR_SPARE2, "wr_ack: snd_una=%u credits=%d", snd_una, credits);
|
|
|
|
INP_LOCK(tp->t_inpcb);
|
|
|
|
toep->tp_wr_avail += credits;
|
|
if (toep->tp_wr_unacked > toep->tp_wr_max - toep->tp_wr_avail)
|
|
toep->tp_wr_unacked = toep->tp_wr_max - toep->tp_wr_avail;
|
|
|
|
while (credits) {
|
|
struct mbuf *p = peek_wr(toep);
|
|
|
|
if (__predict_false(!p)) {
|
|
log(LOG_ERR, "%u WR_ACK credits for TID %u with "
|
|
"nothing pending, state %u wr_avail=%u\n",
|
|
credits, toep->tp_tid, tp->t_state, toep->tp_wr_avail);
|
|
break;
|
|
}
|
|
CTR2(KTR_TOM,
|
|
"wr_ack: p->credits=%d p->bytes=%d", p->m_pkthdr.csum_data, p->m_pkthdr.len);
|
|
|
|
KASSERT(p->m_pkthdr.csum_data != 0, ("empty request still on list"));
|
|
if (__predict_false(credits < p->m_pkthdr.csum_data)) {
|
|
|
|
#if DEBUG_WR > 1
|
|
struct tx_data_wr *w = cplhdr(p);
|
|
log(LOG_ERR,
|
|
"TID %u got %u WR credits, need %u, len %u, "
|
|
"main body %u, frags %u, seq # %u, ACK una %u,"
|
|
" ACK nxt %u, WR_AVAIL %u, WRs pending %u\n",
|
|
toep->tp_tid, credits, p->csum, p->len,
|
|
p->len - p->data_len, skb_shinfo(p)->nr_frags,
|
|
ntohl(w->sndseq), snd_una, ntohl(hdr->snd_nxt),
|
|
toep->tp_wr_avail, count_pending_wrs(tp) - credits);
|
|
#endif
|
|
p->m_pkthdr.csum_data -= credits;
|
|
break;
|
|
} else {
|
|
dequeue_wr(toep);
|
|
credits -= p->m_pkthdr.csum_data;
|
|
bytes += p->m_pkthdr.len;
|
|
CTR3(KTR_TOM,
|
|
"wr_ack: done with wr of %d bytes remain credits=%d wr credits=%d",
|
|
p->m_pkthdr.len, credits, p->m_pkthdr.csum_data);
|
|
|
|
m_free(p);
|
|
}
|
|
}
|
|
|
|
#if DEBUG_WR
|
|
check_wr_invariants(tp);
|
|
#endif
|
|
|
|
if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) {
|
|
#if VALIDATE_SEQ
|
|
struct tom_data *d = TOM_DATA(TOE_DEV(so));
|
|
|
|
log(LOG_ERR "%s: unexpected sequence # %u in WR_ACK "
|
|
"for TID %u, snd_una %u\n", (&d->tdev)->name, snd_una,
|
|
toep->tp_tid, tp->snd_una);
|
|
#endif
|
|
goto out_free;
|
|
}
|
|
|
|
if (tp->snd_una != snd_una) {
|
|
tp->snd_una = snd_una;
|
|
tp->ts_recent_age = ticks;
|
|
#ifdef notyet
|
|
/*
|
|
* Keep ARP entry "minty fresh"
|
|
*/
|
|
dst_confirm(sk->sk_dst_cache);
|
|
#endif
|
|
if (tp->snd_una == tp->snd_nxt)
|
|
toep->tp_flags &= ~TP_TX_WAIT_IDLE;
|
|
}
|
|
if (bytes) {
|
|
CTR1(KTR_SPARE2, "wr_ack: sbdrop(%d)", bytes);
|
|
SOCKBUF_LOCK(&so->so_snd);
|
|
sbdrop_locked(&so->so_snd, bytes);
|
|
sowwakeup_locked(so);
|
|
}
|
|
|
|
if (so->so_snd.sb_sndptroff < so->so_snd.sb_cc)
|
|
t3_push_frames(so, 0);
|
|
|
|
out_free:
|
|
INP_UNLOCK(tp->t_inpcb);
|
|
m_free(m);
|
|
}
|
|
|
|
/*
|
|
* Handler for TX_DATA_ACK CPL messages.
|
|
*/
|
|
static int
|
|
do_wr_ack(struct t3cdev *dev, struct mbuf *m, void *ctx)
|
|
{
|
|
struct toepcb *toep = (struct toepcb *)ctx;
|
|
|
|
VALIDATE_SOCK(so);
|
|
|
|
wr_ack(toep, m);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Handler for TRACE_PKT CPL messages. Just sink these packets.
|
|
*/
|
|
static int
|
|
do_trace_pkt(struct t3cdev *dev, struct mbuf *m, void *ctx)
|
|
{
|
|
m_freem(m);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Reset a connection that is on a listener's SYN queue or accept queue,
|
|
* i.e., one that has not had a struct socket associated with it.
|
|
* Must be called from process context.
|
|
*
|
|
* Modeled after code in inet_csk_listen_stop().
|
|
*/
|
|
static void
|
|
t3_reset_listen_child(struct socket *child)
|
|
{
|
|
struct tcpcb *tp = sototcpcb(child);
|
|
|
|
t3_send_reset(tp->t_toe);
|
|
}
|
|
|
|
/*
|
|
* Disconnect offloaded established but not yet accepted connections sitting
|
|
* on a server's accept_queue. We just send an ABORT_REQ at this point and
|
|
* finish off the disconnect later as we may need to wait for the ABORT_RPL.
|
|
*/
|
|
void
|
|
t3_disconnect_acceptq(struct socket *listen_so)
|
|
{
|
|
struct socket *so;
|
|
struct tcpcb *tp;
|
|
|
|
TAILQ_FOREACH(so, &listen_so->so_comp, so_list) {
|
|
tp = sototcpcb(so);
|
|
|
|
if (tp->t_flags & TF_TOE) {
|
|
INP_LOCK(tp->t_inpcb);
|
|
t3_reset_listen_child(so);
|
|
INP_UNLOCK(tp->t_inpcb);
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Reset offloaded connections sitting on a server's syn queue. As above
|
|
* we send ABORT_REQ and finish off when we get ABORT_RPL.
|
|
*/
|
|
|
|
void
|
|
t3_reset_synq(struct listen_ctx *lctx)
|
|
{
|
|
struct toepcb *toep;
|
|
|
|
SOCK_LOCK(lctx->lso);
|
|
while (!LIST_EMPTY(&lctx->synq_head)) {
|
|
toep = LIST_FIRST(&lctx->synq_head);
|
|
LIST_REMOVE(toep, synq_entry);
|
|
toep->tp_tp = NULL;
|
|
t3_send_reset(toep);
|
|
cxgb_remove_tid(TOEP_T3C_DEV(toep), toep, toep->tp_tid);
|
|
toepcb_release(toep);
|
|
}
|
|
SOCK_UNLOCK(lctx->lso);
|
|
}
|
|
|
|
|
|
int
|
|
t3_setup_ppods(struct socket *so, const struct ddp_gather_list *gl,
|
|
unsigned int nppods, unsigned int tag, unsigned int maxoff,
|
|
unsigned int pg_off, unsigned int color)
|
|
{
|
|
unsigned int i, j, pidx;
|
|
struct pagepod *p;
|
|
struct mbuf *m;
|
|
struct ulp_mem_io *req;
|
|
struct tcpcb *tp = sototcpcb(so);
|
|
struct toepcb *toep = tp->t_toe;
|
|
unsigned int tid = toep->tp_tid;
|
|
const struct tom_data *td = TOM_DATA(TOE_DEV(so));
|
|
unsigned int ppod_addr = tag * PPOD_SIZE + td->ddp_llimit;
|
|
|
|
CTR6(KTR_TOM, "t3_setup_ppods(gl=%p nppods=%u tag=%u maxoff=%u pg_off=%u color=%u)",
|
|
gl, nppods, tag, maxoff, pg_off, color);
|
|
|
|
for (i = 0; i < nppods; ++i) {
|
|
m = m_gethdr_nofail(sizeof(*req) + PPOD_SIZE);
|
|
m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
|
|
req = mtod(m, struct ulp_mem_io *);
|
|
m->m_pkthdr.len = m->m_len = sizeof(*req) + PPOD_SIZE;
|
|
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS));
|
|
req->wr.wr_lo = 0;
|
|
req->cmd_lock_addr = htonl(V_ULP_MEMIO_ADDR(ppod_addr >> 5) |
|
|
V_ULPTX_CMD(ULP_MEM_WRITE));
|
|
req->len = htonl(V_ULP_MEMIO_DATA_LEN(PPOD_SIZE / 32) |
|
|
V_ULPTX_NFLITS(PPOD_SIZE / 8 + 1));
|
|
|
|
p = (struct pagepod *)(req + 1);
|
|
if (__predict_false(i < nppods - NUM_SENTINEL_PPODS)) {
|
|
p->pp_vld_tid = htonl(F_PPOD_VALID | V_PPOD_TID(tid));
|
|
p->pp_pgsz_tag_color = htonl(V_PPOD_TAG(tag) |
|
|
V_PPOD_COLOR(color));
|
|
p->pp_max_offset = htonl(maxoff);
|
|
p->pp_page_offset = htonl(pg_off);
|
|
p->pp_rsvd = 0;
|
|
for (pidx = 4 * i, j = 0; j < 5; ++j, ++pidx)
|
|
p->pp_addr[j] = pidx < gl->dgl_nelem ?
|
|
htobe64(VM_PAGE_TO_PHYS(gl->dgl_pages[pidx])) : 0;
|
|
} else
|
|
p->pp_vld_tid = 0; /* mark sentinel page pods invalid */
|
|
send_or_defer(toep, m, 0);
|
|
ppod_addr += PPOD_SIZE;
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Build a CPL_BARRIER message as payload of a ULP_TX_PKT command.
|
|
*/
|
|
static inline void
|
|
mk_cpl_barrier_ulp(struct cpl_barrier *b)
|
|
{
|
|
struct ulp_txpkt *txpkt = (struct ulp_txpkt *)b;
|
|
|
|
txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
|
|
txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*b) / 8));
|
|
b->opcode = CPL_BARRIER;
|
|
}
|
|
|
|
/*
|
|
* Build a CPL_GET_TCB message as payload of a ULP_TX_PKT command.
|
|
*/
|
|
static inline void
|
|
mk_get_tcb_ulp(struct cpl_get_tcb *req, unsigned int tid, unsigned int cpuno)
|
|
{
|
|
struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
|
|
|
|
txpkt = (struct ulp_txpkt *)req;
|
|
txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
|
|
txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
|
|
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, tid));
|
|
req->cpuno = htons(cpuno);
|
|
}
|
|
|
|
/*
|
|
* Build a CPL_SET_TCB_FIELD message as payload of a ULP_TX_PKT command.
|
|
*/
|
|
static inline void
|
|
mk_set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
|
|
unsigned int word, uint64_t mask, uint64_t val)
|
|
{
|
|
struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
|
|
|
|
CTR4(KTR_TCB, "mk_set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx",
|
|
tid, word, mask, val);
|
|
|
|
txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
|
|
txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
|
|
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
|
|
req->reply = V_NO_REPLY(1);
|
|
req->cpu_idx = 0;
|
|
req->word = htons(word);
|
|
req->mask = htobe64(mask);
|
|
req->val = htobe64(val);
|
|
}
|
|
|
|
/*
|
|
* Build a CPL_RX_DATA_ACK message as payload of a ULP_TX_PKT command.
|
|
*/
|
|
static void
|
|
mk_rx_data_ack_ulp(struct socket *so,struct cpl_rx_data_ack *ack,
|
|
unsigned int tid, unsigned int credits)
|
|
{
|
|
struct ulp_txpkt *txpkt = (struct ulp_txpkt *)ack;
|
|
|
|
txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
|
|
txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*ack) / 8));
|
|
OPCODE_TID(ack) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, tid));
|
|
ack->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE |
|
|
V_RX_DACK_MODE(TOM_TUNABLE(TOE_DEV(so), delack)) |
|
|
V_RX_CREDITS(credits));
|
|
}
|
|
|
|
void
|
|
t3_cancel_ddpbuf(struct toepcb *toep, unsigned int bufidx)
|
|
{
|
|
unsigned int wrlen;
|
|
struct mbuf *m;
|
|
struct work_request_hdr *wr;
|
|
struct cpl_barrier *lock;
|
|
struct cpl_set_tcb_field *req;
|
|
struct cpl_get_tcb *getreq;
|
|
struct ddp_state *p = &toep->tp_ddp_state;
|
|
|
|
SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv);
|
|
wrlen = sizeof(*wr) + sizeof(*req) + 2 * sizeof(*lock) +
|
|
sizeof(*getreq);
|
|
m = m_gethdr_nofail(wrlen);
|
|
m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
|
|
wr = mtod(m, struct work_request_hdr *);
|
|
bzero(wr, wrlen);
|
|
|
|
wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS));
|
|
m->m_pkthdr.len = m->m_len = wrlen;
|
|
|
|
lock = (struct cpl_barrier *)(wr + 1);
|
|
mk_cpl_barrier_ulp(lock);
|
|
|
|
req = (struct cpl_set_tcb_field *)(lock + 1);
|
|
|
|
CTR1(KTR_TCB, "t3_cancel_ddpbuf(bufidx=%u)", bufidx);
|
|
|
|
/* Hmmm, not sure if this actually a good thing: reactivating
|
|
* the other buffer might be an issue if it has been completed
|
|
* already. However, that is unlikely, since the fact that the UBUF
|
|
* is not completed indicates that there is no oustanding data.
|
|
*/
|
|
if (bufidx == 0)
|
|
mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS,
|
|
V_TF_DDP_ACTIVE_BUF(1) |
|
|
V_TF_DDP_BUF0_VALID(1),
|
|
V_TF_DDP_ACTIVE_BUF(1));
|
|
else
|
|
mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS,
|
|
V_TF_DDP_ACTIVE_BUF(1) |
|
|
V_TF_DDP_BUF1_VALID(1), 0);
|
|
|
|
getreq = (struct cpl_get_tcb *)(req + 1);
|
|
mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset);
|
|
|
|
mk_cpl_barrier_ulp((struct cpl_barrier *)(getreq + 1));
|
|
|
|
/* Keep track of the number of oustanding CPL_GET_TCB requests
|
|
*/
|
|
p->get_tcb_count++;
|
|
|
|
#ifdef T3_TRACE
|
|
T3_TRACE1(TIDTB(so),
|
|
"t3_cancel_ddpbuf: bufidx %u", bufidx);
|
|
#endif
|
|
cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
|
|
}
|
|
|
|
/**
|
|
* t3_overlay_ddpbuf - overlay an existing DDP buffer with a new one
|
|
* @sk: the socket associated with the buffers
|
|
* @bufidx: index of HW DDP buffer (0 or 1)
|
|
* @tag0: new tag for HW buffer 0
|
|
* @tag1: new tag for HW buffer 1
|
|
* @len: new length for HW buf @bufidx
|
|
*
|
|
* Sends a compound WR to overlay a new DDP buffer on top of an existing
|
|
* buffer by changing the buffer tag and length and setting the valid and
|
|
* active flag accordingly. The caller must ensure the new buffer is at
|
|
* least as big as the existing one. Since we typically reprogram both HW
|
|
* buffers this function sets both tags for convenience. Read the TCB to
|
|
* determine how made data was written into the buffer before the overlay
|
|
* took place.
|
|
*/
|
|
void
|
|
t3_overlay_ddpbuf(struct toepcb *toep, unsigned int bufidx, unsigned int tag0,
|
|
unsigned int tag1, unsigned int len)
|
|
{
|
|
unsigned int wrlen;
|
|
struct mbuf *m;
|
|
struct work_request_hdr *wr;
|
|
struct cpl_get_tcb *getreq;
|
|
struct cpl_set_tcb_field *req;
|
|
struct ddp_state *p = &toep->tp_ddp_state;
|
|
|
|
CTR4(KTR_TCB, "t3_setup_ppods(bufidx=%u tag0=%u tag1=%u len=%u)",
|
|
bufidx, tag0, tag1, len);
|
|
SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv);
|
|
wrlen = sizeof(*wr) + 3 * sizeof(*req) + sizeof(*getreq);
|
|
m = m_gethdr_nofail(wrlen);
|
|
m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
|
|
wr = mtod(m, struct work_request_hdr *);
|
|
m->m_pkthdr.len = m->m_len = wrlen;
|
|
bzero(wr, wrlen);
|
|
|
|
|
|
/* Set the ATOMIC flag to make sure that TP processes the following
|
|
* CPLs in an atomic manner and no wire segments can be interleaved.
|
|
*/
|
|
wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
|
|
req = (struct cpl_set_tcb_field *)(wr + 1);
|
|
mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_TAG,
|
|
V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG) |
|
|
V_TCB_RX_DDP_BUF1_TAG(M_TCB_RX_DDP_BUF1_TAG) << 32,
|
|
V_TCB_RX_DDP_BUF0_TAG(tag0) |
|
|
V_TCB_RX_DDP_BUF1_TAG((uint64_t)tag1) << 32);
|
|
req++;
|
|
if (bufidx == 0) {
|
|
mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_LEN,
|
|
V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN),
|
|
V_TCB_RX_DDP_BUF0_LEN((uint64_t)len));
|
|
req++;
|
|
mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS,
|
|
V_TF_DDP_PUSH_DISABLE_0(1) |
|
|
V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1),
|
|
V_TF_DDP_PUSH_DISABLE_0(0) |
|
|
V_TF_DDP_BUF0_VALID(1));
|
|
} else {
|
|
mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_LEN,
|
|
V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN),
|
|
V_TCB_RX_DDP_BUF1_LEN((uint64_t)len));
|
|
req++;
|
|
mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS,
|
|
V_TF_DDP_PUSH_DISABLE_1(1) |
|
|
V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1),
|
|
V_TF_DDP_PUSH_DISABLE_1(0) |
|
|
V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1));
|
|
}
|
|
|
|
getreq = (struct cpl_get_tcb *)(req + 1);
|
|
mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset);
|
|
|
|
/* Keep track of the number of oustanding CPL_GET_TCB requests
|
|
*/
|
|
p->get_tcb_count++;
|
|
|
|
#ifdef T3_TRACE
|
|
T3_TRACE4(TIDTB(sk),
|
|
"t3_overlay_ddpbuf: bufidx %u tag0 %u tag1 %u "
|
|
"len %d",
|
|
bufidx, tag0, tag1, len);
|
|
#endif
|
|
cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
|
|
}
|
|
|
|
/*
|
|
* Sends a compound WR containing all the CPL messages needed to program the
|
|
* two HW DDP buffers, namely optionally setting up the length and offset of
|
|
* each buffer, programming the DDP flags, and optionally sending RX_DATA_ACK.
|
|
*/
|
|
void
|
|
t3_setup_ddpbufs(struct toepcb *toep, unsigned int len0, unsigned int offset0,
|
|
unsigned int len1, unsigned int offset1,
|
|
uint64_t ddp_flags, uint64_t flag_mask, int modulate)
|
|
{
|
|
unsigned int wrlen;
|
|
struct mbuf *m;
|
|
struct work_request_hdr *wr;
|
|
struct cpl_set_tcb_field *req;
|
|
|
|
CTR6(KTR_TCB, "t3_setup_ddpbufs(len0=%u offset0=%u len1=%u offset1=%u ddp_flags=0x%08x%08x ",
|
|
len0, offset0, len1, offset1, ddp_flags >> 32, ddp_flags & 0xffffffff);
|
|
|
|
SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv);
|
|
wrlen = sizeof(*wr) + sizeof(*req) + (len0 ? sizeof(*req) : 0) +
|
|
(len1 ? sizeof(*req) : 0) +
|
|
(modulate ? sizeof(struct cpl_rx_data_ack) : 0);
|
|
m = m_gethdr_nofail(wrlen);
|
|
m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
|
|
wr = mtod(m, struct work_request_hdr *);
|
|
bzero(wr, wrlen);
|
|
|
|
wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS));
|
|
m->m_pkthdr.len = m->m_len = wrlen;
|
|
|
|
req = (struct cpl_set_tcb_field *)(wr + 1);
|
|
if (len0) { /* program buffer 0 offset and length */
|
|
mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_OFFSET,
|
|
V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |
|
|
V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN),
|
|
V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset0) |
|
|
V_TCB_RX_DDP_BUF0_LEN((uint64_t)len0));
|
|
req++;
|
|
}
|
|
if (len1) { /* program buffer 1 offset and length */
|
|
mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_OFFSET,
|
|
V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) |
|
|
V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN) << 32,
|
|
V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset1) |
|
|
V_TCB_RX_DDP_BUF1_LEN((uint64_t)len1) << 32);
|
|
req++;
|
|
}
|
|
|
|
mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, flag_mask,
|
|
ddp_flags);
|
|
|
|
if (modulate) {
|
|
mk_rx_data_ack_ulp(toeptoso(toep),
|
|
(struct cpl_rx_data_ack *)(req + 1), toep->tp_tid,
|
|
toep->tp_copied_seq - toep->tp_rcv_wup);
|
|
toep->tp_rcv_wup = toep->tp_copied_seq;
|
|
}
|
|
|
|
#ifdef T3_TRACE
|
|
T3_TRACE5(TIDTB(sk),
|
|
"t3_setup_ddpbufs: len0 %u len1 %u ddp_flags 0x%08x%08x "
|
|
"modulate %d",
|
|
len0, len1, ddp_flags >> 32, ddp_flags & 0xffffffff,
|
|
modulate);
|
|
#endif
|
|
|
|
cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
|
|
}
|
|
|
|
void
|
|
t3_init_wr_tab(unsigned int wr_len)
|
|
{
|
|
int i;
|
|
|
|
if (mbuf_wrs[1]) /* already initialized */
|
|
return;
|
|
|
|
for (i = 1; i < ARRAY_SIZE(mbuf_wrs); i++) {
|
|
int sgl_len = (3 * i) / 2 + (i & 1);
|
|
|
|
sgl_len += 3;
|
|
mbuf_wrs[i] = sgl_len <= wr_len ?
|
|
1 : 1 + (sgl_len - 2) / (wr_len - 1);
|
|
}
|
|
|
|
wrlen = wr_len * 8;
|
|
}
|
|
|
|
int
|
|
t3_init_cpl_io(void)
|
|
{
|
|
#ifdef notyet
|
|
tcphdr_skb = alloc_skb(sizeof(struct tcphdr), GFP_KERNEL);
|
|
if (!tcphdr_skb) {
|
|
log(LOG_ERR,
|
|
"Chelsio TCP offload: can't allocate sk_buff\n");
|
|
return -1;
|
|
}
|
|
skb_put(tcphdr_skb, sizeof(struct tcphdr));
|
|
tcphdr_skb->h.raw = tcphdr_skb->data;
|
|
memset(tcphdr_skb->data, 0, tcphdr_skb->len);
|
|
#endif
|
|
|
|
t3tom_register_cpl_handler(CPL_ACT_ESTABLISH, do_act_establish);
|
|
t3tom_register_cpl_handler(CPL_ACT_OPEN_RPL, do_act_open_rpl);
|
|
t3tom_register_cpl_handler(CPL_TX_DMA_ACK, do_wr_ack);
|
|
t3tom_register_cpl_handler(CPL_RX_DATA, do_rx_data);
|
|
t3tom_register_cpl_handler(CPL_CLOSE_CON_RPL, do_close_con_rpl);
|
|
t3tom_register_cpl_handler(CPL_PEER_CLOSE, do_peer_close);
|
|
t3tom_register_cpl_handler(CPL_PASS_ESTABLISH, do_pass_establish);
|
|
t3tom_register_cpl_handler(CPL_PASS_ACCEPT_REQ, do_pass_accept_req);
|
|
t3tom_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req);
|
|
t3tom_register_cpl_handler(CPL_ABORT_RPL_RSS, do_abort_rpl);
|
|
t3tom_register_cpl_handler(CPL_RX_DATA_DDP, do_rx_data_ddp);
|
|
t3tom_register_cpl_handler(CPL_RX_DDP_COMPLETE, do_rx_ddp_complete);
|
|
t3tom_register_cpl_handler(CPL_RX_URG_NOTIFY, do_rx_urg_notify);
|
|
t3tom_register_cpl_handler(CPL_TRACE_PKT, do_trace_pkt);
|
|
t3tom_register_cpl_handler(CPL_GET_TCB_RPL, do_get_tcb_rpl);
|
|
return (0);
|
|
}
|
|
|