cxgbe/tom: Add VIMAGE support to the TOE driver.
Active Open: - Save the socket's vnet at the time of the active open (t4_connect) and switch to it when processing the reply (do_act_open_rpl or do_act_establish). Passive Open: - Save the listening socket's vnet in the driver's listen_ctx and switch to it when processing incoming SYNs for the socket. - Reject SYNs that arrive on an ifnet that's not in the same vnet as the listening socket. CLIP (Compressed Local IPv6) table: - Add only those IPv6 addresses to the CLIP that are in a vnet associated with one of the card's ifnets. Misc: - Set vnet from the toepcb when processing TCP state transitions. - The kernel sets the vnet when calling the driver's output routine so t4_push_frames runs in proper vnet context already. One exception is when incoming credits trigger tx within the driver's ithread. Set the vnet explicitly in do_fw4_ack for that case. MFC after: 3 days Sponsored by: Chelsio Communications
This commit is contained in:
parent
8eff25dda5
commit
989ae30975
@ -126,6 +126,7 @@ do_act_establish(struct sge_iq *iq, const struct rss_header *rss,
|
||||
CTR3(KTR_CXGBE, "%s: atid %u, tid %u", __func__, atid, tid);
|
||||
free_atid(sc, atid);
|
||||
|
||||
CURVNET_SET(toep->vnet);
|
||||
INP_WLOCK(inp);
|
||||
toep->tid = tid;
|
||||
insert_tid(sc, tid, toep, inp->inp_vflag & INP_IPV6 ? 2 : 1);
|
||||
@ -141,6 +142,7 @@ do_act_establish(struct sge_iq *iq, const struct rss_header *rss,
|
||||
make_established(toep, cpl->snd_isn, cpl->rcv_isn, cpl->tcp_opt);
|
||||
done:
|
||||
INP_WUNLOCK(inp);
|
||||
CURVNET_RESTORE();
|
||||
return (0);
|
||||
}
|
||||
|
||||
@ -178,6 +180,7 @@ act_open_failure_cleanup(struct adapter *sc, u_int atid, u_int status)
|
||||
free_atid(sc, atid);
|
||||
toep->tid = -1;
|
||||
|
||||
CURVNET_SET(toep->vnet);
|
||||
if (status != EAGAIN)
|
||||
INP_INFO_RLOCK(&V_tcbinfo);
|
||||
INP_WLOCK(inp);
|
||||
@ -185,6 +188,7 @@ act_open_failure_cleanup(struct adapter *sc, u_int atid, u_int status)
|
||||
final_cpl_received(toep); /* unlocks inp */
|
||||
if (status != EAGAIN)
|
||||
INP_INFO_RUNLOCK(&V_tcbinfo);
|
||||
CURVNET_RESTORE();
|
||||
}
|
||||
|
||||
/*
|
||||
@ -360,6 +364,7 @@ t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt,
|
||||
if (wr == NULL)
|
||||
DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM);
|
||||
|
||||
toep->vnet = so->so_vnet;
|
||||
if (sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0)
|
||||
set_tcpddp_ulp_mode(toep);
|
||||
else
|
||||
|
@ -306,7 +306,6 @@ make_established(struct toepcb *toep, uint32_t snd_isn, uint32_t rcv_isn,
|
||||
uint16_t tcpopt = be16toh(opt);
|
||||
struct flowc_tx_params ftxp;
|
||||
|
||||
CURVNET_SET(so->so_vnet);
|
||||
INP_WLOCK_ASSERT(inp);
|
||||
KASSERT(tp->t_state == TCPS_SYN_SENT ||
|
||||
tp->t_state == TCPS_SYN_RECEIVED,
|
||||
@ -357,7 +356,6 @@ make_established(struct toepcb *toep, uint32_t snd_isn, uint32_t rcv_isn,
|
||||
send_flowc_wr(toep, &ftxp);
|
||||
|
||||
soisconnected(so);
|
||||
CURVNET_RESTORE();
|
||||
}
|
||||
|
||||
static int
|
||||
@ -1146,6 +1144,7 @@ do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
|
||||
|
||||
KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
|
||||
|
||||
CURVNET_SET(toep->vnet);
|
||||
INP_INFO_RLOCK(&V_tcbinfo);
|
||||
INP_WLOCK(inp);
|
||||
tp = intotcpcb(inp);
|
||||
@ -1191,6 +1190,7 @@ do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
|
||||
tcp_twstart(tp);
|
||||
INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */
|
||||
INP_INFO_RUNLOCK(&V_tcbinfo);
|
||||
CURVNET_RESTORE();
|
||||
|
||||
INP_WLOCK(inp);
|
||||
final_cpl_received(toep);
|
||||
@ -1203,6 +1203,7 @@ do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
|
||||
done:
|
||||
INP_WUNLOCK(inp);
|
||||
INP_INFO_RUNLOCK(&V_tcbinfo);
|
||||
CURVNET_RESTORE();
|
||||
return (0);
|
||||
}
|
||||
|
||||
@ -1229,6 +1230,7 @@ do_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss,
|
||||
KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
|
||||
KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
|
||||
|
||||
CURVNET_SET(toep->vnet);
|
||||
INP_INFO_RLOCK(&V_tcbinfo);
|
||||
INP_WLOCK(inp);
|
||||
tp = intotcpcb(inp);
|
||||
@ -1248,6 +1250,7 @@ do_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss,
|
||||
release:
|
||||
INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */
|
||||
INP_INFO_RUNLOCK(&V_tcbinfo);
|
||||
CURVNET_RESTORE();
|
||||
|
||||
INP_WLOCK(inp);
|
||||
final_cpl_received(toep); /* no more CPLs expected */
|
||||
@ -1272,6 +1275,7 @@ do_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss,
|
||||
done:
|
||||
INP_WUNLOCK(inp);
|
||||
INP_INFO_RUNLOCK(&V_tcbinfo);
|
||||
CURVNET_RESTORE();
|
||||
return (0);
|
||||
}
|
||||
|
||||
@ -1345,6 +1349,7 @@ do_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
|
||||
}
|
||||
|
||||
inp = toep->inp;
|
||||
CURVNET_SET(toep->vnet);
|
||||
INP_INFO_RLOCK(&V_tcbinfo); /* for tcp_close */
|
||||
INP_WLOCK(inp);
|
||||
|
||||
@ -1380,6 +1385,7 @@ do_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
|
||||
final_cpl_received(toep);
|
||||
done:
|
||||
INP_INFO_RUNLOCK(&V_tcbinfo);
|
||||
CURVNET_RESTORE();
|
||||
send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST);
|
||||
return (0);
|
||||
}
|
||||
@ -1501,18 +1507,21 @@ do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
|
||||
DDP_UNLOCK(toep);
|
||||
INP_WUNLOCK(inp);
|
||||
|
||||
CURVNET_SET(toep->vnet);
|
||||
INP_INFO_RLOCK(&V_tcbinfo);
|
||||
INP_WLOCK(inp);
|
||||
tp = tcp_drop(tp, ECONNRESET);
|
||||
if (tp)
|
||||
INP_WUNLOCK(inp);
|
||||
INP_INFO_RUNLOCK(&V_tcbinfo);
|
||||
CURVNET_RESTORE();
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* receive buffer autosize */
|
||||
CURVNET_SET(so->so_vnet);
|
||||
MPASS(toep->vnet == so->so_vnet);
|
||||
CURVNET_SET(toep->vnet);
|
||||
if (sb->sb_flags & SB_AUTOSIZE &&
|
||||
V_tcp_do_autorcvbuf &&
|
||||
sb->sb_hiwat < V_tcp_autorcvbuf_max &&
|
||||
@ -1713,10 +1722,12 @@ do_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
|
||||
tid);
|
||||
#endif
|
||||
toep->flags &= ~TPF_TX_SUSPENDED;
|
||||
CURVNET_SET(toep->vnet);
|
||||
if (toep->ulp_mode == ULP_MODE_ISCSI)
|
||||
t4_push_pdus(sc, toep, plen);
|
||||
else
|
||||
t4_push_frames(sc, toep, plen);
|
||||
CURVNET_RESTORE();
|
||||
} else if (plen > 0) {
|
||||
struct sockbuf *sb = &so->so_snd;
|
||||
int sbu;
|
||||
@ -2143,7 +2154,7 @@ t4_aiotx_task(void *context, int pending)
|
||||
struct socket *so = inp->inp_socket;
|
||||
struct kaiocb *job;
|
||||
|
||||
CURVNET_SET(so->so_vnet);
|
||||
CURVNET_SET(toep->vnet);
|
||||
SOCKBUF_LOCK(&so->so_snd);
|
||||
while (!TAILQ_EMPTY(&toep->aiotx_jobq) && sowriteable(so)) {
|
||||
job = TAILQ_FIRST(&toep->aiotx_jobq);
|
||||
|
@ -546,7 +546,8 @@ handle_ddp_data(struct toepcb *toep, __be32 ddp_report, __be32 rcv_nxt, int len)
|
||||
#endif
|
||||
|
||||
/* receive buffer autosize */
|
||||
CURVNET_SET(so->so_vnet);
|
||||
MPASS(toep->vnet == so->so_vnet);
|
||||
CURVNET_SET(toep->vnet);
|
||||
SOCKBUF_LOCK(sb);
|
||||
if (sb->sb_flags & SB_AUTOSIZE &&
|
||||
V_tcp_do_autorcvbuf &&
|
||||
|
@ -222,6 +222,7 @@ alloc_lctx(struct adapter *sc, struct inpcb *inp, struct vi_info *vi)
|
||||
TAILQ_INIT(&lctx->synq);
|
||||
|
||||
lctx->inp = inp;
|
||||
lctx->vnet = inp->inp_socket->so_vnet;
|
||||
in_pcbref(inp);
|
||||
|
||||
return (lctx);
|
||||
@ -1200,6 +1201,8 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
|
||||
|
||||
pi = sc->port[G_SYN_INTF(be16toh(cpl->l2info))];
|
||||
|
||||
CURVNET_SET(lctx->vnet);
|
||||
|
||||
/*
|
||||
* Use the MAC index to lookup the associated VI. If this SYN
|
||||
* didn't match a perfect MAC filter, punt.
|
||||
@ -1274,6 +1277,13 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
|
||||
ntids = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Don't offload if the ifnet that the SYN came in on is not in the same
|
||||
* vnet as the listening socket.
|
||||
*/
|
||||
if (lctx->vnet != ifp->if_vnet)
|
||||
REJECT_PASS_ACCEPT();
|
||||
|
||||
e = get_l2te_for_nexthop(pi, ifp, &inc);
|
||||
if (e == NULL)
|
||||
REJECT_PASS_ACCEPT();
|
||||
@ -1313,7 +1323,6 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
|
||||
REJECT_PASS_ACCEPT();
|
||||
}
|
||||
so = inp->inp_socket;
|
||||
CURVNET_SET(so->so_vnet);
|
||||
|
||||
mtu_idx = find_best_mtu_idx(sc, &inc, be16toh(cpl->tcpopt.mss));
|
||||
rscale = cpl->tcpopt.wsf && V_tcp_do_rfc1323 ? select_rcv_wscale() : 0;
|
||||
@ -1360,7 +1369,6 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
|
||||
*/
|
||||
toe_syncache_add(&inc, &to, &th, inp, tod, synqe);
|
||||
INP_UNLOCK_ASSERT(inp); /* ok to assert, we have a ref on the inp */
|
||||
CURVNET_RESTORE();
|
||||
|
||||
/*
|
||||
* If we replied during syncache_add (synqe->wr has been consumed),
|
||||
@ -1415,10 +1423,12 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
|
||||
return (__LINE__);
|
||||
}
|
||||
INP_WUNLOCK(inp);
|
||||
CURVNET_RESTORE();
|
||||
|
||||
release_synqe(synqe); /* extra hold */
|
||||
return (0);
|
||||
reject:
|
||||
CURVNET_RESTORE();
|
||||
CTR4(KTR_CXGBE, "%s: stid %u, tid %u, REJECT (%d)", __func__, stid, tid,
|
||||
reject_reason);
|
||||
|
||||
@ -1490,6 +1500,7 @@ do_pass_establish(struct sge_iq *iq, const struct rss_header *rss,
|
||||
KASSERT(synqe->flags & TPF_SYNQE,
|
||||
("%s: tid %u (ctx %p) not a synqe", __func__, tid, synqe));
|
||||
|
||||
CURVNET_SET(lctx->vnet);
|
||||
INP_INFO_RLOCK(&V_tcbinfo); /* for syncache_expand */
|
||||
INP_WLOCK(inp);
|
||||
|
||||
@ -1507,6 +1518,7 @@ do_pass_establish(struct sge_iq *iq, const struct rss_header *rss,
|
||||
|
||||
INP_WUNLOCK(inp);
|
||||
INP_INFO_RUNLOCK(&V_tcbinfo);
|
||||
CURVNET_RESTORE();
|
||||
return (0);
|
||||
}
|
||||
|
||||
@ -1532,6 +1544,7 @@ do_pass_establish(struct sge_iq *iq, const struct rss_header *rss,
|
||||
send_reset_synqe(TOEDEV(ifp), synqe);
|
||||
INP_WUNLOCK(inp);
|
||||
INP_INFO_RUNLOCK(&V_tcbinfo);
|
||||
CURVNET_RESTORE();
|
||||
return (0);
|
||||
}
|
||||
toep->tid = tid;
|
||||
@ -1568,6 +1581,8 @@ do_pass_establish(struct sge_iq *iq, const struct rss_header *rss,
|
||||
/* New connection inpcb is already locked by syncache_expand(). */
|
||||
new_inp = sotoinpcb(so);
|
||||
INP_WLOCK_ASSERT(new_inp);
|
||||
MPASS(so->so_vnet == lctx->vnet);
|
||||
toep->vnet = lctx->vnet;
|
||||
|
||||
/*
|
||||
* This is for the unlikely case where the syncache entry that we added
|
||||
@ -1591,6 +1606,7 @@ do_pass_establish(struct sge_iq *iq, const struct rss_header *rss,
|
||||
if (inp != NULL)
|
||||
INP_WUNLOCK(inp);
|
||||
INP_INFO_RUNLOCK(&V_tcbinfo);
|
||||
CURVNET_RESTORE();
|
||||
release_synqe(synqe);
|
||||
|
||||
return (0);
|
||||
|
@ -799,74 +799,96 @@ update_clip_table(struct adapter *sc, struct tom_data *td)
|
||||
struct in6_addr *lip, tlip;
|
||||
struct clip_head stale;
|
||||
struct clip_entry *ce, *ce_temp;
|
||||
int rc, gen = atomic_load_acq_int(&in6_ifaddr_gen);
|
||||
struct vi_info *vi;
|
||||
int rc, gen, i, j;
|
||||
uintptr_t last_vnet;
|
||||
|
||||
ASSERT_SYNCHRONIZED_OP(sc);
|
||||
|
||||
IN6_IFADDR_RLOCK(&in6_ifa_tracker);
|
||||
mtx_lock(&td->clip_table_lock);
|
||||
|
||||
gen = atomic_load_acq_int(&in6_ifaddr_gen);
|
||||
if (gen == td->clip_gen)
|
||||
goto done;
|
||||
|
||||
TAILQ_INIT(&stale);
|
||||
TAILQ_CONCAT(&stale, &td->clip_table, link);
|
||||
|
||||
TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
|
||||
lip = &ia->ia_addr.sin6_addr;
|
||||
|
||||
KASSERT(!IN6_IS_ADDR_MULTICAST(lip),
|
||||
("%s: mcast address in in6_ifaddr list", __func__));
|
||||
|
||||
if (IN6_IS_ADDR_LOOPBACK(lip))
|
||||
/*
|
||||
* last_vnet optimizes the common cases where all if_vnet = NULL (no
|
||||
* VIMAGE) or all if_vnet = vnet0.
|
||||
*/
|
||||
last_vnet = (uintptr_t)(-1);
|
||||
for_each_port(sc, i)
|
||||
for_each_vi(sc->port[i], j, vi) {
|
||||
if (last_vnet == (uintptr_t)vi->ifp->if_vnet)
|
||||
continue;
|
||||
if (IN6_IS_SCOPE_EMBED(lip)) {
|
||||
/* Remove the embedded scope */
|
||||
tlip = *lip;
|
||||
lip = &tlip;
|
||||
in6_clearscope(lip);
|
||||
}
|
||||
/*
|
||||
* XXX: how to weed out the link local address for the loopback
|
||||
* interface? It's fe80::1 usually (always?).
|
||||
*/
|
||||
|
||||
/*
|
||||
* If it's in the main list then we already know it's not stale.
|
||||
*/
|
||||
TAILQ_FOREACH(ce, &td->clip_table, link) {
|
||||
if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip))
|
||||
goto next;
|
||||
}
|
||||
/* XXX: races with if_vmove */
|
||||
CURVNET_SET(vi->ifp->if_vnet);
|
||||
TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
|
||||
lip = &ia->ia_addr.sin6_addr;
|
||||
|
||||
/*
|
||||
* If it's in the stale list we should move it to the main list.
|
||||
*/
|
||||
TAILQ_FOREACH(ce, &stale, link) {
|
||||
if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) {
|
||||
TAILQ_REMOVE(&stale, ce, link);
|
||||
TAILQ_INSERT_TAIL(&td->clip_table, ce, link);
|
||||
goto next;
|
||||
KASSERT(!IN6_IS_ADDR_MULTICAST(lip),
|
||||
("%s: mcast address in in6_ifaddr list", __func__));
|
||||
|
||||
if (IN6_IS_ADDR_LOOPBACK(lip))
|
||||
continue;
|
||||
if (IN6_IS_SCOPE_EMBED(lip)) {
|
||||
/* Remove the embedded scope */
|
||||
tlip = *lip;
|
||||
lip = &tlip;
|
||||
in6_clearscope(lip);
|
||||
}
|
||||
}
|
||||
/*
|
||||
* XXX: how to weed out the link local address for the
|
||||
* loopback interface? It's fe80::1 usually (always?).
|
||||
*/
|
||||
|
||||
/* A new IP6 address; add it to the CLIP table */
|
||||
ce = malloc(sizeof(*ce), M_CXGBE, M_NOWAIT);
|
||||
memcpy(&ce->lip, lip, sizeof(ce->lip));
|
||||
ce->refcount = 0;
|
||||
rc = add_lip(sc, lip);
|
||||
if (rc == 0)
|
||||
TAILQ_INSERT_TAIL(&td->clip_table, ce, link);
|
||||
else {
|
||||
char ip[INET6_ADDRSTRLEN];
|
||||
/*
|
||||
* If it's in the main list then we already know it's
|
||||
* not stale.
|
||||
*/
|
||||
TAILQ_FOREACH(ce, &td->clip_table, link) {
|
||||
if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip))
|
||||
goto next;
|
||||
}
|
||||
|
||||
inet_ntop(AF_INET6, &ce->lip, &ip[0], sizeof(ip));
|
||||
log(LOG_ERR, "%s: could not add %s (%d)\n",
|
||||
__func__, ip, rc);
|
||||
free(ce, M_CXGBE);
|
||||
}
|
||||
/*
|
||||
* If it's in the stale list we should move it to the
|
||||
* main list.
|
||||
*/
|
||||
TAILQ_FOREACH(ce, &stale, link) {
|
||||
if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) {
|
||||
TAILQ_REMOVE(&stale, ce, link);
|
||||
TAILQ_INSERT_TAIL(&td->clip_table, ce,
|
||||
link);
|
||||
goto next;
|
||||
}
|
||||
}
|
||||
|
||||
/* A new IP6 address; add it to the CLIP table */
|
||||
ce = malloc(sizeof(*ce), M_CXGBE, M_NOWAIT);
|
||||
memcpy(&ce->lip, lip, sizeof(ce->lip));
|
||||
ce->refcount = 0;
|
||||
rc = add_lip(sc, lip);
|
||||
if (rc == 0)
|
||||
TAILQ_INSERT_TAIL(&td->clip_table, ce, link);
|
||||
else {
|
||||
char ip[INET6_ADDRSTRLEN];
|
||||
|
||||
inet_ntop(AF_INET6, &ce->lip, &ip[0],
|
||||
sizeof(ip));
|
||||
log(LOG_ERR, "%s: could not add %s (%d)\n",
|
||||
__func__, ip, rc);
|
||||
free(ce, M_CXGBE);
|
||||
}
|
||||
next:
|
||||
continue;
|
||||
continue;
|
||||
}
|
||||
CURVNET_RESTORE();
|
||||
last_vnet = (uintptr_t)vi->ifp->if_vnet;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -141,6 +141,7 @@ struct toepcb {
|
||||
int refcount;
|
||||
struct tom_data *td;
|
||||
struct inpcb *inp; /* backpointer to host stack's PCB */
|
||||
struct vnet *vnet;
|
||||
struct vi_info *vi; /* virtual interface */
|
||||
struct sge_wrq *ofld_txq;
|
||||
struct sge_ofld_rxq *ofld_rxq;
|
||||
@ -232,6 +233,7 @@ struct listen_ctx {
|
||||
struct stid_region stid_region;
|
||||
int flags;
|
||||
struct inpcb *inp; /* listening socket's inp */
|
||||
struct vnet *vnet;
|
||||
struct sge_wrq *ctrlq;
|
||||
struct sge_ofld_rxq *ofld_rxq;
|
||||
struct clip_entry *ce;
|
||||
|
Loading…
Reference in New Issue
Block a user