Support for TCP DDP (Direct Data Placement) in the T4 TOE module.

Basically, this is automatic rx zero copy when feasible.  TCP payload is
DMA'd directly into the userspace buffer described by the uio submitted
in soreceive by an application.

- Works with sockets that are being handled by the TCP offload engine
  of a T4 chip (you need t4_tom.ko module loaded after cxgbe, and an
  "ifconfig +toe" on the cxgbe interface).
- Does not require any modification to the application.
- Not enabled by default.  Use hw.t4nex.<X>.toe.ddp="1" to enable it.
This commit is contained in:
Navdeep Parhar 2012-08-17 00:49:29 +00:00
parent 5f7a640879
commit e682d02e12
10 changed files with 1487 additions and 53 deletions

View File

@ -161,10 +161,12 @@ struct pagepod {
#define S_PPOD_TAG 6
#define M_PPOD_TAG 0xFFFFFF
#define V_PPOD_TAG(x) ((x) << S_PPOD_TAG)
#define G_PPOD_TAG(x) (((x) >> S_PPOD_TAG) & M_PPOD_TAG)
#define S_PPOD_PGSZ 30
#define M_PPOD_PGSZ 0x3
#define V_PPOD_PGSZ(x) ((x) << S_PPOD_PGSZ)
#define G_PPOD_PGSZ(x) (((x) >> S_PPOD_PGSZ) & M_PPOD_PGSZ)
#define S_PPOD_TID 32
#define M_PPOD_TID 0xFFFFFF

View File

@ -792,6 +792,14 @@ struct cpl_set_tcb_field {
__be64 val;
};
struct cpl_set_tcb_field_core {
union opcode_tid ot;
__be16 reply_ctrl;
__be16 word_cookie;
__be64 mask;
__be64 val;
};
/* cpl_set_tcb_field.word_cookie fields */
#define S_WORD 0
#define M_WORD 0x1F
@ -1376,6 +1384,11 @@ struct cpl_rx_data_ack {
__be32 credit_dack;
};
struct cpl_rx_data_ack_core {
union opcode_tid ot;
__be32 credit_dack;
};
/* cpl_rx_data_ack.ack_seq fields */
#define S_RX_CREDITS 0
#define M_RX_CREDITS 0x3FFFFFF

View File

@ -31,13 +31,16 @@
#ifndef __T4_OFFLOAD_H__
#define __T4_OFFLOAD_H__
#define INIT_ULPTX_WR(w, wrlen, atomic, tid) do { \
(w)->wr.wr_hi = htonl(V_FW_WR_OP(FW_ULPTX_WR) | V_FW_WR_ATOMIC(atomic)); \
(w)->wr.wr_mid = htonl(V_FW_WR_LEN16(DIV_ROUND_UP(wrlen, 16)) | \
#define INIT_ULPTX_WRH(w, wrlen, atomic, tid) do { \
(w)->wr_hi = htonl(V_FW_WR_OP(FW_ULPTX_WR) | V_FW_WR_ATOMIC(atomic)); \
(w)->wr_mid = htonl(V_FW_WR_LEN16(DIV_ROUND_UP(wrlen, 16)) | \
V_FW_WR_FLOWID(tid)); \
(w)->wr.wr_lo = cpu_to_be64(0); \
(w)->wr_lo = cpu_to_be64(0); \
} while (0)
#define INIT_ULPTX_WR(w, wrlen, atomic, tid) \
INIT_ULPTX_WRH(&((w)->wr), wrlen, atomic, tid)
#define INIT_TP_WR(w, tid) do { \
(w)->wr.wr_hi = htonl(V_FW_WR_OP(FW_TP_WR) | \
V_FW_WR_IMMDLEN(sizeof(*w) - sizeof(w->wr))); \

View File

@ -247,10 +247,14 @@ calc_opt2a(struct socket *so)
opt2 |= F_RX_COALESCE_VALID | V_RX_COALESCE(M_RX_COALESCE);
opt2 |= F_RSS_QUEUE_VALID | V_RSS_QUEUE(toep->ofld_rxq->iq.abs_id);
#ifdef USE_DDP_RX_FLOW_CONTROL
if (toep->ulp_mode == ULP_MODE_TCPDDP)
opt2 |= F_RX_FC_VALID | F_RX_FC_DDP;
#endif
return (htobe32(opt2));
}
void
t4_init_connect_cpl_handlers(struct adapter *sc)
{
@ -320,7 +324,10 @@ t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt,
toep->tid = atid;
toep->l2te = e;
toep->ulp_mode = ULP_MODE_NONE;
if (sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0)
set_tcpddp_ulp_mode(toep);
else
toep->ulp_mode = ULP_MODE_NONE;
SOCKBUF_LOCK(&so->so_rcv);
/* opt0 rcv_bufsiz initially, assumes its normal meaning later */
toep->rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ);

View File

@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$");
#include "common/common.h"
#include "common/t4_msg.h"
#include "common/t4_regs.h"
#include "common/t4_tcb.h"
#include "tom/t4_tom_l2t.h"
#include "tom/t4_tom.h"
@ -299,12 +300,14 @@ make_established(struct toepcb *toep, uint32_t snd_isn, uint32_t rcv_isn,
}
static int
send_rx_credits(struct adapter *sc, struct toepcb *toep, uint32_t credits)
send_rx_credits(struct adapter *sc, struct toepcb *toep, int credits)
{
struct wrqe *wr;
struct cpl_rx_data_ack *req;
uint32_t dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1);
KASSERT(credits >= 0, ("%s: %d credits", __func__, credits));
wr = alloc_wrqe(sizeof(*req), toep->ctrlq);
if (wr == NULL)
return (0);
@ -323,25 +326,28 @@ t4_rcvd(struct toedev *tod, struct tcpcb *tp)
struct adapter *sc = tod->tod_softc;
struct inpcb *inp = tp->t_inpcb;
struct socket *so = inp->inp_socket;
struct sockbuf *so_rcv = &so->so_rcv;
struct sockbuf *sb = &so->so_rcv;
struct toepcb *toep = tp->t_toe;
int must_send;
int credits;
INP_WLOCK_ASSERT(inp);
SOCKBUF_LOCK(so_rcv);
KASSERT(toep->enqueued >= so_rcv->sb_cc,
("%s: so_rcv->sb_cc > enqueued", __func__));
toep->rx_credits += toep->enqueued - so_rcv->sb_cc;
toep->enqueued = so_rcv->sb_cc;
SOCKBUF_UNLOCK(so_rcv);
SOCKBUF_LOCK(sb);
KASSERT(toep->sb_cc >= sb->sb_cc,
("%s: sb %p has more data (%d) than last time (%d).",
__func__, sb, sb->sb_cc, toep->sb_cc));
toep->rx_credits += toep->sb_cc - sb->sb_cc;
toep->sb_cc = sb->sb_cc;
credits = toep->rx_credits;
SOCKBUF_UNLOCK(sb);
must_send = toep->rx_credits + 16384 >= tp->rcv_wnd;
if (must_send || toep->rx_credits >= 15 * 1024) {
int credits;
if (credits > 0 &&
(credits + 16384 >= tp->rcv_wnd || credits >= 15 * 1024)) {
credits = send_rx_credits(sc, toep, toep->rx_credits);
credits = send_rx_credits(sc, toep, credits);
SOCKBUF_LOCK(sb);
toep->rx_credits -= credits;
SOCKBUF_UNLOCK(sb);
tp->rcv_wnd += credits;
tp->rcv_adv += credits;
}
@ -537,7 +543,8 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep)
KASSERT(toepcb_flag(toep, TPF_FLOWC_WR_SENT),
("%s: flowc_wr not sent for tid %u.", __func__, toep->tid));
if (toep->ulp_mode != ULP_MODE_NONE)
if (__predict_false(toep->ulp_mode != ULP_MODE_NONE &&
toep->ulp_mode != ULP_MODE_TCPDDP))
CXGBE_UNIMPLEMENTED("ulp_mode");
/*
@ -765,7 +772,8 @@ do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
struct toepcb *toep = lookup_tid(sc, tid);
struct inpcb *inp = toep->inp;
struct tcpcb *tp = NULL;
struct socket *so = NULL;
struct socket *so;
struct sockbuf *sb;
#ifdef INVARIANTS
unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
#endif
@ -785,10 +793,35 @@ do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
if (toepcb_flag(toep, TPF_ABORT_SHUTDOWN))
goto done;
so = inp->inp_socket;
socantrcvmore(so);
tp->rcv_nxt++; /* FIN */
so = inp->inp_socket;
sb = &so->so_rcv;
SOCKBUF_LOCK(sb);
if (__predict_false(toep->ddp_flags & (DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE))) {
m = m_get(M_NOWAIT, MT_DATA);
if (m == NULL)
CXGBE_UNIMPLEMENTED("mbuf alloc failure");
m->m_len = be32toh(cpl->rcv_nxt) - tp->rcv_nxt;
m->m_flags |= M_DDP; /* Data is already where it should be */
m->m_data = "nothing to see here";
tp->rcv_nxt = be32toh(cpl->rcv_nxt);
toep->ddp_flags &= ~(DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE);
KASSERT(toep->sb_cc >= sb->sb_cc,
("%s: sb %p has more data (%d) than last time (%d).",
__func__, sb, sb->sb_cc, toep->sb_cc));
toep->rx_credits += toep->sb_cc - sb->sb_cc;
#ifdef USE_DDP_RX_FLOW_CONTROL
toep->rx_credits -= m->m_len; /* adjust for F_RX_FC_DDP */
#endif
sbappendstream_locked(sb, m);
toep->sb_cc = sb->sb_cc;
}
socantrcvmore_locked(so); /* unlocks the sockbuf */
KASSERT(tp->rcv_nxt == be32toh(cpl->rcv_nxt),
("%s: rcv_nxt mismatch: %u %u", __func__, tp->rcv_nxt,
be32toh(cpl->rcv_nxt)));
@ -1046,7 +1079,8 @@ do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
struct inpcb *inp = toep->inp;
struct tcpcb *tp;
struct socket *so;
struct sockbuf *so_rcv;
struct sockbuf *sb;
int len;
if (__predict_false(toepcb_flag(toep, TPF_SYNQE))) {
/*
@ -1064,11 +1098,12 @@ do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
/* strip off CPL header */
m_adj(m, sizeof(*cpl));
len = m->m_pkthdr.len;
INP_WLOCK(inp);
if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) {
CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
__func__, tid, m->m_pkthdr.len, inp->inp_flags);
__func__, tid, len, inp->inp_flags);
INP_WUNLOCK(inp);
m_freem(m);
return (0);
@ -1084,21 +1119,20 @@ do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
}
#endif
tp->rcv_nxt += m->m_pkthdr.len;
KASSERT(tp->rcv_wnd >= m->m_pkthdr.len,
("%s: negative window size", __func__));
tp->rcv_wnd -= m->m_pkthdr.len;
tp->rcv_nxt += len;
KASSERT(tp->rcv_wnd >= len, ("%s: negative window size", __func__));
tp->rcv_wnd -= len;
tp->t_rcvtime = ticks;
so = inp_inpcbtosocket(inp);
so_rcv = &so->so_rcv;
SOCKBUF_LOCK(so_rcv);
sb = &so->so_rcv;
SOCKBUF_LOCK(sb);
if (__predict_false(so_rcv->sb_state & SBS_CANTRCVMORE)) {
if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)",
__func__, tid, m->m_pkthdr.len);
__func__, tid, len);
m_freem(m);
SOCKBUF_UNLOCK(so_rcv);
SOCKBUF_UNLOCK(sb);
INP_WUNLOCK(inp);
INP_INFO_WLOCK(&V_tcbinfo);
@ -1112,23 +1146,76 @@ do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
}
/* receive buffer autosize */
if (so_rcv->sb_flags & SB_AUTOSIZE &&
if (sb->sb_flags & SB_AUTOSIZE &&
V_tcp_do_autorcvbuf &&
so_rcv->sb_hiwat < V_tcp_autorcvbuf_max &&
m->m_pkthdr.len > (sbspace(so_rcv) / 8 * 7)) {
unsigned int hiwat = so_rcv->sb_hiwat;
sb->sb_hiwat < V_tcp_autorcvbuf_max &&
len > (sbspace(sb) / 8 * 7)) {
unsigned int hiwat = sb->sb_hiwat;
unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc,
V_tcp_autorcvbuf_max);
if (!sbreserve_locked(so_rcv, newsize, so, NULL))
so_rcv->sb_flags &= ~SB_AUTOSIZE;
if (!sbreserve_locked(sb, newsize, so, NULL))
sb->sb_flags &= ~SB_AUTOSIZE;
else
toep->rx_credits += newsize - hiwat;
}
toep->enqueued += m->m_pkthdr.len;
sbappendstream_locked(so_rcv, m);
if (toep->ulp_mode == ULP_MODE_TCPDDP) {
int changed = !(toep->ddp_flags & DDP_ON) ^ cpl->ddp_off;
if (changed) {
if (__predict_false(!(toep->ddp_flags & DDP_SC_REQ))) {
/* XXX: handle this if legitimate */
panic("%s: unexpected DDP state change %d",
__func__, cpl->ddp_off);
}
toep->ddp_flags ^= DDP_ON | DDP_SC_REQ;
}
if ((toep->ddp_flags & DDP_OK) == 0 &&
time_uptime >= toep->ddp_disabled + DDP_RETRY_WAIT) {
toep->ddp_score = DDP_LOW_SCORE;
toep->ddp_flags |= DDP_OK;
CTR3(KTR_CXGBE, "%s: tid %u DDP_OK @ %u",
__func__, tid, time_uptime);
}
if (toep->ddp_flags & DDP_ON) {
/*
* CPL_RX_DATA with DDP on can only be an indicate. Ask
* soreceive to post a buffer or disable DDP. The
* payload that arrived in this indicate is appended to
* the socket buffer as usual.
*/
#if 0
CTR5(KTR_CXGBE,
"%s: tid %u (0x%x) DDP indicate (seq 0x%x, len %d)",
__func__, tid, toep->flags, be32toh(cpl->seq), len);
#endif
sb->sb_flags |= SB_DDP_INDICATE;
} else if ((toep->ddp_flags & (DDP_OK|DDP_SC_REQ)) == DDP_OK &&
tp->rcv_wnd > DDP_RSVD_WIN && len >= sc->tt.ddp_thres) {
/*
* DDP allowed but isn't on (and a request to switch it
* on isn't pending either), and conditions are ripe for
* it to work. Switch it on.
*/
enable_ddp(sc, toep);
}
}
KASSERT(toep->sb_cc >= sb->sb_cc,
("%s: sb %p has more data (%d) than last time (%d).",
__func__, sb, sb->sb_cc, toep->sb_cc));
toep->rx_credits += toep->sb_cc - sb->sb_cc;
sbappendstream_locked(sb, m);
toep->sb_cc = sb->sb_cc;
sorwakeup_locked(so);
SOCKBUF_UNLOCK_ASSERT(so_rcv);
SOCKBUF_UNLOCK_ASSERT(sb);
INP_WUNLOCK(inp);
return (0);

1223
sys/dev/cxgbe/tom/t4_ddp.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -881,7 +881,7 @@ t4opt_to_tcpopt(const struct tcp_options *t4opt, struct tcpopt *to)
*/
static uint32_t
calc_opt2p(struct adapter *sc, struct port_info *pi, int rxqid,
const struct tcp_options *tcpopt, struct tcphdr *th)
const struct tcp_options *tcpopt, struct tcphdr *th, int ulp_mode)
{
uint32_t opt2 = 0;
struct sge_ofld_rxq *ofld_rxq = &sc->sge.ofld_rxq[rxqid];
@ -902,6 +902,11 @@ calc_opt2p(struct adapter *sc, struct port_info *pi, int rxqid,
opt2 |= F_RX_COALESCE_VALID | V_RX_COALESCE(M_RX_COALESCE);
opt2 |= F_RSS_QUEUE_VALID | V_RSS_QUEUE(ofld_rxq->iq.abs_id);
#ifdef USE_DDP_RX_FLOW_CONTROL
if (ulp_mode == ULP_MODE_TCPDDP)
opt2 |= F_RX_FC_VALID | F_RX_FC_DDP;
#endif
return htobe32(opt2);
}
@ -985,7 +990,7 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
struct l2t_entry *e = NULL;
struct rtentry *rt;
struct sockaddr_in nam;
int rscale, mtu_idx, rx_credits, rxqid;
int rscale, mtu_idx, rx_credits, rxqid, ulp_mode;
struct synq_entry *synqe = NULL;
int reject_reason;
uint16_t vid;
@ -1108,9 +1113,13 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
get_qids_from_mbuf(m, NULL, &rxqid);
INIT_TP_WR_MIT_CPL(rpl, CPL_PASS_ACCEPT_RPL, tid);
rpl->opt0 = calc_opt0(so, pi, e, mtu_idx, rscale, rx_credits,
ULP_MODE_NONE);
rpl->opt2 = calc_opt2p(sc, pi, rxqid, &cpl->tcpopt, &th);
if (sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0) {
ulp_mode = ULP_MODE_TCPDDP;
synqe_set_flag(synqe, TPF_SYNQE_TCPDDP);
} else
ulp_mode = ULP_MODE_NONE;
rpl->opt0 = calc_opt0(so, pi, e, mtu_idx, rscale, rx_credits, ulp_mode);
rpl->opt2 = calc_opt2p(sc, pi, rxqid, &cpl->tcpopt, &th, ulp_mode);
synqe->tid = tid;
synqe->lctx = lctx;
@ -1313,7 +1322,10 @@ do_pass_establish(struct sge_iq *iq, const struct rss_header *rss,
}
toep->tid = tid;
toep->l2te = &sc->l2t->l2tab[synqe->l2e_idx];
toep->ulp_mode = ULP_MODE_NONE;
if (synqe_flag(synqe, TPF_SYNQE_TCPDDP))
set_tcpddp_ulp_mode(toep);
else
toep->ulp_mode = ULP_MODE_NONE;
/* opt0 rcv_bufsiz initially, assumes its normal meaning later */
toep->rx_credits = synqe->rcv_bufsize;

View File

@ -55,6 +55,9 @@ __FBSDID("$FreeBSD$");
#include "tom/t4_tom_l2t.h"
#include "tom/t4_tom.h"
static struct protosw ddp_protosw;
static struct pr_usrreqs ddp_usrreqs;
/* Module ops */
static int t4_tom_mod_load(void);
static int t4_tom_mod_unload(void);
@ -167,6 +170,8 @@ offload_socket(struct socket *so, struct toepcb *toep)
sb = &so->so_rcv;
SOCKBUF_LOCK(sb);
sb->sb_flags |= SB_NOCOALESCE;
if (toep->ulp_mode == ULP_MODE_TCPDDP)
so->so_proto = &ddp_protosw;
SOCKBUF_UNLOCK(sb);
/* Update TCP PCB */
@ -235,6 +240,9 @@ release_offload_resources(struct toepcb *toep)
CTR4(KTR_CXGBE, "%s: toep %p (tid %d, l2te %p)",
__func__, toep, tid, toep->l2te);
if (toep->ulp_mode == ULP_MODE_TCPDDP)
release_ddp_resources(toep);
if (toep->l2te)
t4_l2t_release(toep->l2te);
@ -568,6 +576,8 @@ free_tom_data(struct adapter *sc, struct tom_data *td)
("%s: lctx hash table is not empty.", __func__));
t4_uninit_l2t_cpl_handlers(sc);
t4_uninit_cpl_io_handlers(sc);
t4_uninit_ddp(sc, td);
if (td->listen_mask != 0)
hashdestroy(td->listen_hash, M_CXGBE, td->listen_mask);
@ -613,6 +623,8 @@ t4_tom_activate(struct adapter *sc)
if (rc != 0)
goto done;
t4_init_ddp(sc, td);
/* CPL handlers */
t4_init_connect_cpl_handlers(sc);
t4_init_l2t_cpl_handlers(sc);
@ -688,6 +700,16 @@ static int
t4_tom_mod_load(void)
{
int rc;
struct protosw *tcp_protosw;
tcp_protosw = pffindproto(PF_INET, IPPROTO_TCP, SOCK_STREAM);
if (tcp_protosw == NULL)
return (ENOPROTOOPT);
bcopy(tcp_protosw, &ddp_protosw, sizeof(ddp_protosw));
bcopy(tcp_protosw->pr_usrreqs, &ddp_usrreqs, sizeof(ddp_usrreqs));
ddp_usrreqs.pru_soreceive = t4_soreceive_ddp;
ddp_protosw.pr_usrreqs = &ddp_usrreqs;
rc = t4_register_uld(&tom_uld_info);
if (rc != 0)

View File

@ -46,6 +46,13 @@
*/
#define MAX_RCV_WND ((1U << 27) - 1)
#define DDP_RSVD_WIN (16 * 1024U)
#define SB_DDP_INDICATE SB_IN_TOE /* soreceive must respond to indicate */
#define M_DDP M_PROTO1
#define USE_DDP_RX_FLOW_CONTROL
/* TOE PCB flags */
enum {
TPF_ATTACHED, /* a tcpcb refers to this toepcb */
@ -58,6 +65,15 @@ enum {
TPF_CPL_PENDING, /* haven't received the last CPL */
TPF_SYNQE, /* synq_entry, not really a toepcb */
TPF_SYNQE_NEEDFREE, /* synq_entry was allocated externally */
TPF_SYNQE_TCPDDP, /* ulp_mode TCPDDP when toepcb is allocated */
};
enum {
DDP_OK = (1 << 0), /* OK to turn on DDP */
DDP_SC_REQ = (1 << 1), /* state change (on/off) requested */
DDP_ON = (1 << 2), /* DDP is turned on */
DDP_BUF0_ACTIVE = (1 << 3), /* buffer 0 in use (not invalidated) */
DDP_BUF1_ACTIVE = (1 << 4), /* buffer 1 in use (not invalidated) */
};
struct ofld_tx_sdesc {
@ -65,6 +81,22 @@ struct ofld_tx_sdesc {
uint8_t tx_credits; /* firmware tx credits (unit is 16B) */
};
struct ppod_region {
TAILQ_ENTRY(ppod_region) link;
int used; /* # of pods used by this region */
int free; /* # of contiguous pods free right after this region */
};
struct ddp_buffer {
uint32_t tag; /* includes color, page pod addr, and DDP page size */
int nppods;
int offset;
int len;
struct ppod_region ppod_region;
int npages;
vm_page_t *pages;
};
struct toepcb {
TAILQ_ENTRY(toepcb) link; /* toep_list */
unsigned int flags; /* miscellaneous flags */
@ -77,11 +109,16 @@ struct toepcb {
struct l2t_entry *l2te; /* L2 table entry used by this connection */
int tid; /* Connection identifier */
unsigned int tx_credits;/* tx WR credits (in 16 byte units) remaining */
unsigned int enqueued; /* # of bytes added to so_rcv (not yet read) */
unsigned int sb_cc; /* last noted value of so_rcv->sb_cc */
int rx_credits; /* rx credits (in bytes) to be returned to hw */
unsigned int ulp_mode; /* ULP mode */
unsigned int ddp_flags;
struct ddp_buffer *db[2];
time_t ddp_disabled;
uint8_t ddp_score;
/* Tx software descriptor */
uint8_t txsd_total;
uint8_t txsd_pidx;
@ -118,6 +155,19 @@ toepcb_clr_flag(struct toepcb *toep, int flag)
clrbit(&toep->flags, flag);
}
#define DDP_RETRY_WAIT 5 /* seconds to wait before re-enabling DDP */
#define DDP_LOW_SCORE 1
#define DDP_HIGH_SCORE 3
static inline void
set_tcpddp_ulp_mode(struct toepcb *toep)
{
toep->ulp_mode = ULP_MODE_TCPDDP;
toep->ddp_flags = DDP_OK;
toep->ddp_score = DDP_LOW_SCORE;
}
/*
* Compressed state for embryonic connections for a listener. Barely fits in
* 64B, try not to grow it further.
@ -171,6 +221,8 @@ struct listen_ctx {
TAILQ_HEAD(, synq_entry) synq;
};
TAILQ_HEAD(ppod_head, ppod_region);
struct tom_data {
struct toedev tod;
@ -178,10 +230,16 @@ struct tom_data {
struct mtx toep_list_lock;
TAILQ_HEAD(, toepcb) toep_list;
struct mtx lctx_hash_lock;
LIST_HEAD(, listen_ctx) *listen_hash;
u_long listen_mask;
int lctx_count; /* # of lctx in the hash table */
struct mtx lctx_hash_lock;
struct mtx ppod_lock;
int nppods;
int nppods_free; /* # of available ppods */
int nppods_free_head; /* # of available ppods at the begining */
struct ppod_head ppods;
};
static inline struct tom_data *
@ -248,4 +306,11 @@ int t4_send_rst(struct toedev *, struct tcpcb *);
void t4_set_tcb_field(struct adapter *, struct toepcb *, uint16_t, uint64_t,
uint64_t);
/* t4_ddp.c */
void t4_init_ddp(struct adapter *, struct tom_data *);
void t4_uninit_ddp(struct adapter *, struct tom_data *);
int t4_soreceive_ddp(struct socket *, struct sockaddr **, struct uio *,
struct mbuf **, struct mbuf **, int *);
void enable_ddp(struct adapter *, struct toepcb *toep);
void release_ddp_resources(struct toepcb *toep);
#endif

View File

@ -8,7 +8,7 @@ CXGBE = ${.CURDIR}/../../../dev/cxgbe
.PATH: ${CXGBE}/tom
KMOD = t4_tom
SRCS = t4_tom.c t4_connect.c t4_listen.c t4_cpl_io.c t4_tom_l2t.c
SRCS = t4_tom.c t4_connect.c t4_listen.c t4_cpl_io.c t4_tom_l2t.c t4_ddp.c
SRCS+= device_if.h bus_if.h pci_if.h
SRCS+= opt_inet.h