cxgbe/tom: Add VIMAGE support to the TOE driver.

Active Open:
- Save the socket's vnet at the time of the active open (t4_connect) and
  switch to it when processing the reply (do_act_open_rpl or
  do_act_establish).

Passive Open:
- Save the listening socket's vnet in the driver's listen_ctx and switch
  to it when processing incoming SYNs for the socket.
- Reject SYNs that arrive on an ifnet that's not in the same vnet as the
  listening socket.

CLIP (Compressed Local IPv6) table:
- Add only those IPv6 addresses to the CLIP that are in a vnet
  associated with one of the card's ifnets.

Misc:
- Set vnet from the toepcb when processing TCP state transitions.
- The kernel sets the vnet when calling the driver's output routine
  so t4_push_frames runs in proper vnet context already.  One exception
  is when incoming credits trigger tx within the driver's ithread.  Set
  the vnet explicitly in do_fw4_ack for that case.

MFC after:	3 days
Sponsored by:	Chelsio Communications
This commit is contained in:
np 2017-01-11 23:48:17 +00:00
parent 8eff25dda5
commit 989ae30975
6 changed files with 113 additions and 56 deletions

View File

@ -126,6 +126,7 @@ do_act_establish(struct sge_iq *iq, const struct rss_header *rss,
CTR3(KTR_CXGBE, "%s: atid %u, tid %u", __func__, atid, tid);
free_atid(sc, atid);
CURVNET_SET(toep->vnet);
INP_WLOCK(inp);
toep->tid = tid;
insert_tid(sc, tid, toep, inp->inp_vflag & INP_IPV6 ? 2 : 1);
@ -141,6 +142,7 @@ do_act_establish(struct sge_iq *iq, const struct rss_header *rss,
make_established(toep, cpl->snd_isn, cpl->rcv_isn, cpl->tcp_opt);
done:
INP_WUNLOCK(inp);
CURVNET_RESTORE();
return (0);
}
@ -178,6 +180,7 @@ act_open_failure_cleanup(struct adapter *sc, u_int atid, u_int status)
free_atid(sc, atid);
toep->tid = -1;
CURVNET_SET(toep->vnet);
if (status != EAGAIN)
INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
@ -185,6 +188,7 @@ act_open_failure_cleanup(struct adapter *sc, u_int atid, u_int status)
final_cpl_received(toep); /* unlocks inp */
if (status != EAGAIN)
INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
}
/*
@ -360,6 +364,7 @@ t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt,
if (wr == NULL)
DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM);
toep->vnet = so->so_vnet;
if (sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0)
set_tcpddp_ulp_mode(toep);
else

View File

@ -306,7 +306,6 @@ make_established(struct toepcb *toep, uint32_t snd_isn, uint32_t rcv_isn,
uint16_t tcpopt = be16toh(opt);
struct flowc_tx_params ftxp;
CURVNET_SET(so->so_vnet);
INP_WLOCK_ASSERT(inp);
KASSERT(tp->t_state == TCPS_SYN_SENT ||
tp->t_state == TCPS_SYN_RECEIVED,
@ -357,7 +356,6 @@ make_established(struct toepcb *toep, uint32_t snd_isn, uint32_t rcv_isn,
send_flowc_wr(toep, &ftxp);
soisconnected(so);
CURVNET_RESTORE();
}
static int
@ -1146,6 +1144,7 @@ do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
CURVNET_SET(toep->vnet);
INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
tp = intotcpcb(inp);
@ -1191,6 +1190,7 @@ do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
tcp_twstart(tp);
INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */
INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
INP_WLOCK(inp);
final_cpl_received(toep);
@ -1203,6 +1203,7 @@ do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
done:
INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return (0);
}
@ -1229,6 +1230,7 @@ do_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss,
KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
CURVNET_SET(toep->vnet);
INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
tp = intotcpcb(inp);
@ -1248,6 +1250,7 @@ do_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss,
release:
INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */
INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
INP_WLOCK(inp);
final_cpl_received(toep); /* no more CPLs expected */
@ -1272,6 +1275,7 @@ do_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss,
done:
INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return (0);
}
@ -1345,6 +1349,7 @@ do_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
}
inp = toep->inp;
CURVNET_SET(toep->vnet);
INP_INFO_RLOCK(&V_tcbinfo); /* for tcp_close */
INP_WLOCK(inp);
@ -1380,6 +1385,7 @@ do_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
final_cpl_received(toep);
done:
INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST);
return (0);
}
@ -1501,18 +1507,21 @@ do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
DDP_UNLOCK(toep);
INP_WUNLOCK(inp);
CURVNET_SET(toep->vnet);
INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
tp = tcp_drop(tp, ECONNRESET);
if (tp)
INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return (0);
}
/* receive buffer autosize */
CURVNET_SET(so->so_vnet);
MPASS(toep->vnet == so->so_vnet);
CURVNET_SET(toep->vnet);
if (sb->sb_flags & SB_AUTOSIZE &&
V_tcp_do_autorcvbuf &&
sb->sb_hiwat < V_tcp_autorcvbuf_max &&
@ -1713,10 +1722,12 @@ do_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
tid);
#endif
toep->flags &= ~TPF_TX_SUSPENDED;
CURVNET_SET(toep->vnet);
if (toep->ulp_mode == ULP_MODE_ISCSI)
t4_push_pdus(sc, toep, plen);
else
t4_push_frames(sc, toep, plen);
CURVNET_RESTORE();
} else if (plen > 0) {
struct sockbuf *sb = &so->so_snd;
int sbu;
@ -2143,7 +2154,7 @@ t4_aiotx_task(void *context, int pending)
struct socket *so = inp->inp_socket;
struct kaiocb *job;
CURVNET_SET(so->so_vnet);
CURVNET_SET(toep->vnet);
SOCKBUF_LOCK(&so->so_snd);
while (!TAILQ_EMPTY(&toep->aiotx_jobq) && sowriteable(so)) {
job = TAILQ_FIRST(&toep->aiotx_jobq);

View File

@ -546,7 +546,8 @@ handle_ddp_data(struct toepcb *toep, __be32 ddp_report, __be32 rcv_nxt, int len)
#endif
/* receive buffer autosize */
CURVNET_SET(so->so_vnet);
MPASS(toep->vnet == so->so_vnet);
CURVNET_SET(toep->vnet);
SOCKBUF_LOCK(sb);
if (sb->sb_flags & SB_AUTOSIZE &&
V_tcp_do_autorcvbuf &&

View File

@ -222,6 +222,7 @@ alloc_lctx(struct adapter *sc, struct inpcb *inp, struct vi_info *vi)
TAILQ_INIT(&lctx->synq);
lctx->inp = inp;
lctx->vnet = inp->inp_socket->so_vnet;
in_pcbref(inp);
return (lctx);
@ -1200,6 +1201,8 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
pi = sc->port[G_SYN_INTF(be16toh(cpl->l2info))];
CURVNET_SET(lctx->vnet);
/*
* Use the MAC index to lookup the associated VI. If this SYN
* didn't match a perfect MAC filter, punt.
@ -1274,6 +1277,13 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
ntids = 1;
}
/*
* Don't offload if the ifnet that the SYN came in on is not in the same
* vnet as the listening socket.
*/
if (lctx->vnet != ifp->if_vnet)
REJECT_PASS_ACCEPT();
e = get_l2te_for_nexthop(pi, ifp, &inc);
if (e == NULL)
REJECT_PASS_ACCEPT();
@ -1313,7 +1323,6 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
REJECT_PASS_ACCEPT();
}
so = inp->inp_socket;
CURVNET_SET(so->so_vnet);
mtu_idx = find_best_mtu_idx(sc, &inc, be16toh(cpl->tcpopt.mss));
rscale = cpl->tcpopt.wsf && V_tcp_do_rfc1323 ? select_rcv_wscale() : 0;
@ -1360,7 +1369,6 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
*/
toe_syncache_add(&inc, &to, &th, inp, tod, synqe);
INP_UNLOCK_ASSERT(inp); /* ok to assert, we have a ref on the inp */
CURVNET_RESTORE();
/*
* If we replied during syncache_add (synqe->wr has been consumed),
@ -1415,10 +1423,12 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
return (__LINE__);
}
INP_WUNLOCK(inp);
CURVNET_RESTORE();
release_synqe(synqe); /* extra hold */
return (0);
reject:
CURVNET_RESTORE();
CTR4(KTR_CXGBE, "%s: stid %u, tid %u, REJECT (%d)", __func__, stid, tid,
reject_reason);
@ -1490,6 +1500,7 @@ do_pass_establish(struct sge_iq *iq, const struct rss_header *rss,
KASSERT(synqe->flags & TPF_SYNQE,
("%s: tid %u (ctx %p) not a synqe", __func__, tid, synqe));
CURVNET_SET(lctx->vnet);
INP_INFO_RLOCK(&V_tcbinfo); /* for syncache_expand */
INP_WLOCK(inp);
@ -1507,6 +1518,7 @@ do_pass_establish(struct sge_iq *iq, const struct rss_header *rss,
INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return (0);
}
@ -1532,6 +1544,7 @@ do_pass_establish(struct sge_iq *iq, const struct rss_header *rss,
send_reset_synqe(TOEDEV(ifp), synqe);
INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return (0);
}
toep->tid = tid;
@ -1568,6 +1581,8 @@ do_pass_establish(struct sge_iq *iq, const struct rss_header *rss,
/* New connection inpcb is already locked by syncache_expand(). */
new_inp = sotoinpcb(so);
INP_WLOCK_ASSERT(new_inp);
MPASS(so->so_vnet == lctx->vnet);
toep->vnet = lctx->vnet;
/*
* This is for the unlikely case where the syncache entry that we added
@ -1591,6 +1606,7 @@ do_pass_establish(struct sge_iq *iq, const struct rss_header *rss,
if (inp != NULL)
INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
release_synqe(synqe);
return (0);

View File

@ -799,74 +799,96 @@ update_clip_table(struct adapter *sc, struct tom_data *td)
struct in6_addr *lip, tlip;
struct clip_head stale;
struct clip_entry *ce, *ce_temp;
int rc, gen = atomic_load_acq_int(&in6_ifaddr_gen);
struct vi_info *vi;
int rc, gen, i, j;
uintptr_t last_vnet;
ASSERT_SYNCHRONIZED_OP(sc);
IN6_IFADDR_RLOCK(&in6_ifa_tracker);
mtx_lock(&td->clip_table_lock);
gen = atomic_load_acq_int(&in6_ifaddr_gen);
if (gen == td->clip_gen)
goto done;
TAILQ_INIT(&stale);
TAILQ_CONCAT(&stale, &td->clip_table, link);
TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
lip = &ia->ia_addr.sin6_addr;
KASSERT(!IN6_IS_ADDR_MULTICAST(lip),
("%s: mcast address in in6_ifaddr list", __func__));
if (IN6_IS_ADDR_LOOPBACK(lip))
/*
* last_vnet optimizes the common cases where all if_vnet = NULL (no
* VIMAGE) or all if_vnet = vnet0.
*/
last_vnet = (uintptr_t)(-1);
for_each_port(sc, i)
for_each_vi(sc->port[i], j, vi) {
if (last_vnet == (uintptr_t)vi->ifp->if_vnet)
continue;
if (IN6_IS_SCOPE_EMBED(lip)) {
/* Remove the embedded scope */
tlip = *lip;
lip = &tlip;
in6_clearscope(lip);
}
/*
* XXX: how to weed out the link local address for the loopback
* interface? It's fe80::1 usually (always?).
*/
/*
* If it's in the main list then we already know it's not stale.
*/
TAILQ_FOREACH(ce, &td->clip_table, link) {
if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip))
goto next;
}
/* XXX: races with if_vmove */
CURVNET_SET(vi->ifp->if_vnet);
TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
lip = &ia->ia_addr.sin6_addr;
/*
* If it's in the stale list we should move it to the main list.
*/
TAILQ_FOREACH(ce, &stale, link) {
if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) {
TAILQ_REMOVE(&stale, ce, link);
TAILQ_INSERT_TAIL(&td->clip_table, ce, link);
goto next;
KASSERT(!IN6_IS_ADDR_MULTICAST(lip),
("%s: mcast address in in6_ifaddr list", __func__));
if (IN6_IS_ADDR_LOOPBACK(lip))
continue;
if (IN6_IS_SCOPE_EMBED(lip)) {
/* Remove the embedded scope */
tlip = *lip;
lip = &tlip;
in6_clearscope(lip);
}
}
/*
* XXX: how to weed out the link local address for the
* loopback interface? It's fe80::1 usually (always?).
*/
/* A new IP6 address; add it to the CLIP table */
ce = malloc(sizeof(*ce), M_CXGBE, M_NOWAIT);
memcpy(&ce->lip, lip, sizeof(ce->lip));
ce->refcount = 0;
rc = add_lip(sc, lip);
if (rc == 0)
TAILQ_INSERT_TAIL(&td->clip_table, ce, link);
else {
char ip[INET6_ADDRSTRLEN];
/*
* If it's in the main list then we already know it's
* not stale.
*/
TAILQ_FOREACH(ce, &td->clip_table, link) {
if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip))
goto next;
}
inet_ntop(AF_INET6, &ce->lip, &ip[0], sizeof(ip));
log(LOG_ERR, "%s: could not add %s (%d)\n",
__func__, ip, rc);
free(ce, M_CXGBE);
}
/*
* If it's in the stale list we should move it to the
* main list.
*/
TAILQ_FOREACH(ce, &stale, link) {
if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) {
TAILQ_REMOVE(&stale, ce, link);
TAILQ_INSERT_TAIL(&td->clip_table, ce,
link);
goto next;
}
}
/* A new IP6 address; add it to the CLIP table */
ce = malloc(sizeof(*ce), M_CXGBE, M_NOWAIT);
memcpy(&ce->lip, lip, sizeof(ce->lip));
ce->refcount = 0;
rc = add_lip(sc, lip);
if (rc == 0)
TAILQ_INSERT_TAIL(&td->clip_table, ce, link);
else {
char ip[INET6_ADDRSTRLEN];
inet_ntop(AF_INET6, &ce->lip, &ip[0],
sizeof(ip));
log(LOG_ERR, "%s: could not add %s (%d)\n",
__func__, ip, rc);
free(ce, M_CXGBE);
}
next:
continue;
continue;
}
CURVNET_RESTORE();
last_vnet = (uintptr_t)vi->ifp->if_vnet;
}
/*

View File

@ -141,6 +141,7 @@ struct toepcb {
int refcount;
struct tom_data *td;
struct inpcb *inp; /* backpointer to host stack's PCB */
struct vnet *vnet;
struct vi_info *vi; /* virtual interface */
struct sge_wrq *ofld_txq;
struct sge_ofld_rxq *ofld_rxq;
@ -232,6 +233,7 @@ struct listen_ctx {
struct stid_region stid_region;
int flags;
struct inpcb *inp; /* listening socket's inp */
struct vnet *vnet;
struct sge_wrq *ctrlq;
struct sge_ofld_rxq *ofld_rxq;
struct clip_entry *ce;