ktls: Accurately track if ifnet ktls is enabled

This allows us to avoid spurious calls to ktls_disable_ifnet()

When we implemented ifnet kTLSe, we set a flag in the tx socket
buffer (SB_TLS_IFNET) to indicate ifnet kTLS.  This flag meant that
now, or in the past, ifnet ktls was active on a socket.  Later,
I added code to switch ifnet ktls sessions to software in the case
of lossy TCP connections that have a high retransmit rate.
Because TCP was using SB_TLS_IFNET to know if it needed to do math
to calculate the retransmit ratio and potentially call into
ktls_disable_ifnet(), it was doing unneeded work long after
a session was moved to software.

This patch carefully tracks whether or not ifnet ktls is still enabled
on a TCP connection.  Because the inp is now embedded in the tcpcb, and
because TCP is the most frequent accessor of this state, it made sense to
move this from the socket buffer flags to the tcpcb. Because we now need
reliable access to the tcbcb, we take a ref on the inp when creating a tx
ktls session.

While here, I noticed that rack/bbr were incorrectly implementing
tfb_hwtls_change(), and applying the change to all pending sends,
when it should apply only to future sends.

This change reduces spurious calls to  ktls_disable_ifnet() by 95% or so
in a Netflix CDN environment.

Reviewed by: markj, rrs
Sponsored by: Netflix
Differential Revision: https://reviews.freebsd.org/D38380
This commit is contained in:
Andrew Gallatin 2023-02-08 15:37:08 -05:00
parent ce6a0c776b
commit c0e4090e3d
8 changed files with 126 additions and 49 deletions

View File

@ -222,6 +222,11 @@ static COUNTER_U64_DEFINE_EARLY(ktls_ifnet_disable_ok);
SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, ifnet_disable_ok, CTLFLAG_RD,
&ktls_ifnet_disable_ok, "TLS sessions able to switch to SW from ifnet");
static COUNTER_U64_DEFINE_EARLY(ktls_destroy_task);
SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, destroy_task, CTLFLAG_RD,
&ktls_destroy_task,
"Number of times ktls session was destroyed via taskqueue");
SYSCTL_NODE(_kern_ipc_tls, OID_AUTO, sw, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
"Software TLS session stats");
SYSCTL_NODE(_kern_ipc_tls, OID_AUTO, ifnet, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
@ -619,10 +624,14 @@ ktls_create_session(struct socket *so, struct tls_enable *en,
counter_u64_add(ktls_offload_active, 1);
refcount_init(&tls->refcount, 1);
if (direction == KTLS_RX)
if (direction == KTLS_RX) {
TASK_INIT(&tls->reset_tag_task, 0, ktls_reset_receive_tag, tls);
else
} else {
TASK_INIT(&tls->reset_tag_task, 0, ktls_reset_send_tag, tls);
tls->inp = so->so_pcb;
in_pcbref(tls->inp);
tls->tx = true;
}
tls->wq_index = ktls_get_cpu(so);
@ -757,12 +766,16 @@ ktls_clone_session(struct ktls_session *tls, int direction)
counter_u64_add(ktls_offload_active, 1);
refcount_init(&tls_new->refcount, 1);
if (direction == KTLS_RX)
if (direction == KTLS_RX) {
TASK_INIT(&tls_new->reset_tag_task, 0, ktls_reset_receive_tag,
tls_new);
else
} else {
TASK_INIT(&tls_new->reset_tag_task, 0, ktls_reset_send_tag,
tls_new);
tls_new->inp = tls->inp;
tls_new->tx = true;
in_pcbref(tls_new->inp);
}
/* Copy fields from existing session. */
tls_new->params = tls->params;
@ -1272,6 +1285,7 @@ ktls_enable_tx(struct socket *so, struct tls_enable *en)
{
struct ktls_session *tls;
struct inpcb *inp;
struct tcpcb *tp;
int error;
if (!ktls_offload_enable)
@ -1336,8 +1350,13 @@ ktls_enable_tx(struct socket *so, struct tls_enable *en)
SOCKBUF_LOCK(&so->so_snd);
so->so_snd.sb_tls_seqno = be64dec(en->rec_seq);
so->so_snd.sb_tls_info = tls;
if (tls->mode != TCP_TLS_MODE_SW)
so->so_snd.sb_flags |= SB_TLS_IFNET;
if (tls->mode != TCP_TLS_MODE_SW) {
tp = intotcpcb(inp);
MPASS(tp->t_nic_ktls_xmit == 0);
tp->t_nic_ktls_xmit = 1;
if (tp->t_fb->tfb_hwtls_change != NULL)
(*tp->t_fb->tfb_hwtls_change)(tp, 1);
}
SOCKBUF_UNLOCK(&so->so_snd);
INP_WUNLOCK(inp);
SOCK_IO_SEND_UNLOCK(so);
@ -1438,6 +1457,7 @@ ktls_set_tx_mode(struct socket *so, int mode)
{
struct ktls_session *tls, *tls_new;
struct inpcb *inp;
struct tcpcb *tp;
int error;
if (SOLISTENING(so))
@ -1452,6 +1472,20 @@ ktls_set_tx_mode(struct socket *so, int mode)
inp = so->so_pcb;
INP_WLOCK_ASSERT(inp);
tp = intotcpcb(inp);
if (mode == TCP_TLS_MODE_IFNET) {
/* Don't allow enabling ifnet ktls multiple times */
if (tp->t_nic_ktls_xmit)
return (EALREADY);
/*
* Don't enable ifnet ktls if we disabled it due to an
* excessive retransmission rate
*/
if (tp->t_nic_ktls_xmit_dis)
return (ENXIO);
}
SOCKBUF_LOCK(&so->so_snd);
tls = so->so_snd.sb_tls_info;
if (tls == NULL) {
@ -1507,8 +1541,12 @@ ktls_set_tx_mode(struct socket *so, int mode)
INP_WLOCK(inp);
SOCKBUF_LOCK(&so->so_snd);
so->so_snd.sb_tls_info = tls_new;
if (tls_new->mode != TCP_TLS_MODE_SW)
so->so_snd.sb_flags |= SB_TLS_IFNET;
if (tls_new->mode != TCP_TLS_MODE_SW) {
MPASS(tp->t_nic_ktls_xmit == 0);
tp->t_nic_ktls_xmit = 1;
if (tp->t_fb->tfb_hwtls_change != NULL)
(*tp->t_fb->tfb_hwtls_change)(tp, 1);
}
SOCKBUF_UNLOCK(&so->so_snd);
SOCK_IO_SEND_UNLOCK(so);
@ -1662,8 +1700,7 @@ ktls_reset_send_tag(void *context, int pending)
mtx_pool_lock(mtxpool_sleep, tls);
tls->reset_pending = false;
mtx_pool_unlock(mtxpool_sleep, tls);
if (!in_pcbrele_wlocked(inp))
INP_WUNLOCK(inp);
INP_WUNLOCK(inp);
counter_u64_add(ktls_ifnet_reset, 1);
@ -1674,18 +1711,15 @@ ktls_reset_send_tag(void *context, int pending)
} else {
NET_EPOCH_ENTER(et);
INP_WLOCK(inp);
if (!in_pcbrele_wlocked(inp)) {
if (!(inp->inp_flags & INP_DROPPED)) {
tp = intotcpcb(inp);
CURVNET_SET(inp->inp_vnet);
tp = tcp_drop(tp, ECONNABORTED);
CURVNET_RESTORE();
if (tp != NULL)
INP_WUNLOCK(inp);
if (!(inp->inp_flags & INP_DROPPED)) {
tp = intotcpcb(inp);
CURVNET_SET(inp->inp_vnet);
tp = tcp_drop(tp, ECONNABORTED);
CURVNET_RESTORE();
if (tp != NULL)
counter_u64_add(ktls_ifnet_reset_dropped, 1);
} else
INP_WUNLOCK(inp);
}
INP_WUNLOCK(inp);
NET_EPOCH_EXIT(et);
counter_u64_add(ktls_ifnet_reset_failed, 1);
@ -1746,8 +1780,6 @@ ktls_output_eagain(struct inpcb *inp, struct ktls_session *tls)
mtx_pool_lock(mtxpool_sleep, tls);
if (!tls->reset_pending) {
(void) ktls_hold(tls);
in_pcbref(inp);
tls->inp = inp;
tls->reset_pending = true;
taskqueue_enqueue(taskqueue_thread, &tls->reset_tag_task);
}
@ -1790,11 +1822,55 @@ ktls_modify_txrtlmt(struct ktls_session *tls, uint64_t max_pacing_rate)
#endif
#endif
static void
ktls_destroy_help(void *context, int pending __unused)
{
ktls_destroy(context);
}
void
ktls_destroy(struct ktls_session *tls)
{
struct inpcb *inp;
struct tcpcb *tp;
bool wlocked;
MPASS(tls->refcount == 0);
inp = tls->inp;
if (tls->tx) {
wlocked = INP_WLOCKED(inp);
if (!wlocked && !INP_TRY_WLOCK(inp)) {
/*
* rwlocks read locks are anonymous, and there
* is no way to know if our current thread
* holds an rlock on the inp. As a rough
* estimate, check to see if the thread holds
* *any* rlocks at all. If it does not, then we
* know that we don't hold the inp rlock, and
* can safely take the wlock
*/
if (curthread->td_rw_rlocks == 0) {
INP_WLOCK(inp);
} else {
/*
* We might hold the rlock, so let's
* do the destroy in a taskqueue
* context to avoid a potential
* deadlock. This should be very
* rare.
*/
counter_u64_add(ktls_destroy_task, 1);
TASK_INIT(&tls->destroy_task, 0,
ktls_destroy_help, tls);
(void)taskqueue_enqueue(taskqueue_thread,
&tls->destroy_task);
return;
}
}
}
if (tls->sequential_records) {
struct mbuf *m, *n;
int page_count;
@ -1841,6 +1917,12 @@ ktls_destroy(struct ktls_session *tls)
m_snd_tag_rele(tls->snd_tag);
if (tls->rx_ifp != NULL)
if_rele(tls->rx_ifp);
if (tls->tx) {
INP_WLOCK_ASSERT(inp);
tp = intotcpcb(inp);
MPASS(tp->t_nic_ktls_xmit == 1);
tp->t_nic_ktls_xmit = 0;
}
break;
#ifdef TCP_OFFLOAD
case TCP_TLS_MODE_TOE:
@ -1870,6 +1952,11 @@ ktls_destroy(struct ktls_session *tls)
tls->params.cipher_key = NULL;
tls->params.cipher_key_len = 0;
}
if (tls->tx) {
INP_WLOCK_ASSERT(inp);
if (!in_pcbrele_wlocked(inp) && !wlocked)
INP_WUNLOCK(inp);
}
explicit_bzero(tls->params.iv, sizeof(tls->params.iv));
uma_zfree(ktls_session_zone, tls);
@ -3213,8 +3300,7 @@ ktls_disable_ifnet_help(void *context, int pending __unused)
CURVNET_SET(so->so_vnet);
sorele(so);
CURVNET_RESTORE();
if (!in_pcbrele_wlocked(inp))
INP_WUNLOCK(inp);
INP_WUNLOCK(inp);
ktls_free(tls);
}
@ -3245,22 +3331,19 @@ ktls_disable_ifnet(void *arg)
so = inp->inp_socket;
SOCK_LOCK(so);
tls = so->so_snd.sb_tls_info;
if (tls->disable_ifnet_pending) {
if (tp->t_nic_ktls_xmit_dis == 1) {
SOCK_UNLOCK(so);
return;
}
/*
* note that disable_ifnet_pending is never cleared; disabling
* ifnet can only be done once per session, so we never want
* note that t_nic_ktls_xmit_dis is never cleared; disabling
* ifnet can only be done once per connection, so we never want
* to do it again
*/
(void)ktls_hold(tls);
in_pcbref(inp);
soref(so);
tls->disable_ifnet_pending = true;
tls->inp = inp;
tp->t_nic_ktls_xmit_dis = 1;
SOCK_UNLOCK(so);
TASK_INIT(&tls->disable_ifnet_task, 0, ktls_disable_ifnet_help, tls);
(void)taskqueue_enqueue(taskqueue_thread, &tls->disable_ifnet_task);

View File

@ -227,7 +227,7 @@ tcp_default_output(struct tcpcb *tp)
isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
#endif
#ifdef KERN_TLS
const bool hw_tls = (so->so_snd.sb_flags & SB_TLS_IFNET) != 0;
const bool hw_tls = tp->t_nic_ktls_xmit != 0;
#else
const bool hw_tls = false;
#endif

View File

@ -1350,7 +1350,7 @@ tcp_set_pacing_rate(struct tcpcb *tp, struct ifnet *ifp,
}
#ifdef KERN_TLS
tls = NULL;
if (tptosocket(tp)->so_snd.sb_flags & SB_TLS_IFNET) {
if (tp->t_nic_ktls_xmit != 0) {
tls = tptosocket(tp)->so_snd.sb_tls_info;
if ((ifp->if_capenable & IFCAP_TXTLS_RTLMT) == 0 ||
@ -1413,7 +1413,7 @@ tcp_chg_pacing_rate(const struct tcp_hwrate_limit_table *crte,
}
#ifdef KERN_TLS
if (tptosocket(tp)->so_snd.sb_flags & SB_TLS_IFNET) {
if (tp->t_nic_ktls_xmit) {
tls = tptosocket(tp)->so_snd.sb_tls_info;
if (tls->mode != TCP_TLS_MODE_IFNET)
tls = NULL;

View File

@ -11861,7 +11861,7 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
inp = bbr->rc_inp;
so = inp->inp_socket;
sb = &so->so_snd;
if (sb->sb_flags & SB_TLS_IFNET)
if (tp->t_nic_ktls_xmit)
hw_tls = 1;
else
hw_tls = 0;

View File

@ -18202,7 +18202,7 @@ rack_output(struct tcpcb *tp)
* and initialize the header from the template for sends on this
* connection.
*/
hw_tls = (sb->sb_flags & SB_TLS_IFNET) != 0;
hw_tls = tp->t_nic_ktls_xmit != 0;
if (len) {
uint32_t max_val;
uint32_t moff;
@ -20183,20 +20183,10 @@ rack_apply_deferred_options(struct tcp_rack *rack)
static void
rack_hw_tls_change(struct tcpcb *tp, int chg)
{
/*
* HW tls state has changed.. fix all
* rsm's in flight.
*/
/* Update HW tls state */
struct tcp_rack *rack;
struct rack_sendmap *rsm;
rack = (struct tcp_rack *)tp->t_fb_ptr;
RB_FOREACH(rsm, rack_rb_tree_head, &rack->r_ctl.rc_mtree) {
if (chg)
rsm->r_hw_tls = 1;
else
rsm->r_hw_tls = 0;
}
if (chg)
rack->r_ctl.fsb.hw_tls = 1;
else

View File

@ -209,6 +209,9 @@ struct tcpcb {
tcp_seq snd_recover; /* for use in NewReno Fast Recovery */
char t_oobflags; /* have some */
char t_iobc; /* input character */
uint8_t t_nic_ktls_xmit:1, /* active nic ktls xmit sessions */
t_nic_ktls_xmit_dis:1, /* disabled nic xmit ktls? */
t_nic_ktls_spare:6; /* spare nic ktls */
int t_rxtcur; /* current retransmit value (ticks) */
int t_rxtshift; /* log(2) of rexmt exp. backoff */

View File

@ -194,13 +194,14 @@ struct ktls_session {
struct ifnet *rx_ifp;
u_short rx_vlan_id;
bool reset_pending;
bool disable_ifnet_pending;
bool tx;
bool sync_dispatch;
bool sequential_records;
/* Only used for TLS 1.0. */
uint64_t next_seqno;
STAILQ_HEAD(, mbuf) pending_records;
struct task destroy_task;
} __aligned(CACHE_LINE_SIZE);
extern unsigned int ktls_ifnet_max_rexmit_pct;

View File

@ -52,7 +52,7 @@
#define SB_AUTOSIZE 0x800 /* automatically size socket buffer */
#define SB_STOP 0x1000 /* backpressure indicator */
#define SB_AIO_RUNNING 0x2000 /* AIO operation running */
#define SB_TLS_IFNET 0x4000 /* has used / is using ifnet KTLS */
#define SB_UNUSED 0x4000 /* previously used for SB_TLS_IFNET */
#define SB_TLS_RX_RESYNC 0x8000 /* KTLS RX lost HW sync */
#define SBS_CANTSENDMORE 0x0010 /* can't send more data to peer */