From 521eac97f33d2d4dc448e307773d57b64935ce72 Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Thu, 29 Oct 2020 00:23:16 +0000 Subject: [PATCH] Support hardware rate limiting (pacing) with TLS offload. - Add a new send tag type for a send tag that supports both rate limiting (packet pacing) and TLS offload (mostly similar to D22669 but adds a separate structure when allocating the new tag type). - When allocating a send tag for TLS offload, check to see if the connection already has a pacing rate. If so, allocate a tag that supports both rate limiting and TLS offload rather than a plain TLS offload tag. - When setting an initial rate on an existing ifnet KTLS connection, set the rate in the TCP control block inp and then reset the TLS send tag (via ktls_output_eagain) to reallocate a TLS + ratelimit send tag. This allocates the TLS send tag asynchronously from a task queue, so the TLS rate limit tag alloc is always sleepable. - When modifying a rate on a connection using KTLS, look for a TLS send tag. If the send tag is only a plain TLS send tag, assume we failed to allocate a TLS ratelimit tag (either during the TCP_TXTLS_ENABLE socket option, or during the send tag reset triggered by ktls_output_eagain) and ignore the new rate. If the send tag is a ratelimit TLS send tag, change the rate on the TLS tag and leave the inp tag alone. - Lock the inp lock when setting sb_tls_info for a socket send buffer so that the routines in tcp_ratelimit can safely dereference the pointer without needing to grab the socket buffer lock. - Add an IFCAP_TXTLS_RTLMT capability flag and associated administrative controls in ifconfig(8). TLS rate limit tags are only allocated if this capability is enabled. Note that TLS offload (whether unlimited or rate limited) always requires IFCAP_TXTLS[46]. Reviewed by: gallatin, hselasky Relnotes: yes Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D26691 --- sbin/ifconfig/ifconfig.8 | 6 ++- sbin/ifconfig/ifconfig.c | 4 +- sys/dev/mlx5/mlx5_en/mlx5_en_main.c | 8 +++- sys/kern/uipc_ktls.c | 63 ++++++++++++++++++++++-- sys/net/if.h | 1 + sys/net/if_var.h | 13 ++++- sys/net/if_vlan.c | 8 ++-- sys/netinet/tcp_ratelimit.c | 74 +++++++++++++++++++++++++---- sys/sys/ktls.h | 3 ++ 9 files changed, 159 insertions(+), 21 deletions(-) diff --git a/sbin/ifconfig/ifconfig.8 b/sbin/ifconfig/ifconfig.8 index 8e83fbcc7730..54343ba489ee 100644 --- a/sbin/ifconfig/ifconfig.8 +++ b/sbin/ifconfig/ifconfig.8 @@ -28,7 +28,7 @@ .\" From: @(#)ifconfig.8 8.3 (Berkeley) 1/5/94 .\" $FreeBSD$ .\" -.Dd October 25, 2020 +.Dd October 28, 2020 .Dt IFCONFIG 8 .Os .Sh NAME @@ -561,6 +561,10 @@ It will always disable TLS for .Xr ip 4 and .Xr ip6 4 . +.It Cm txtlsrtlmt +Enable use of rate limiting (packet pacing) for TLS offload. +.It Fl txtlsrtlmt +Disable use of rate limiting for TLS offload. .It Cm nomap If the driver supports unmapped network buffers, enable them on the interface. diff --git a/sbin/ifconfig/ifconfig.c b/sbin/ifconfig/ifconfig.c index df3514ef5bd8..e47d0000c7ab 100644 --- a/sbin/ifconfig/ifconfig.c +++ b/sbin/ifconfig/ifconfig.c @@ -1345,7 +1345,7 @@ unsetifdescr(const char *val, int value, int s, const struct afswtch *afp) "\10VLAN_HWCSUM\11TSO4\12TSO6\13LRO\14WOL_UCAST\15WOL_MCAST\16WOL_MAGIC" \ "\17TOE4\20TOE6\21VLAN_HWFILTER\23VLAN_HWTSO\24LINKSTATE\25NETMAP" \ "\26RXCSUM_IPV6\27TXCSUM_IPV6\31TXRTLMT\32HWRXTSTMP\33NOMAP\34TXTLS4\35TXTLS6" \ -"\36VXLAN_HWCSUM\37VXLAN_HWTSO" +"\36VXLAN_HWCSUM\37VXLAN_HWTSO\40TXTLS_RTLMT" /* * Print the status of the interface. If an address family was @@ -1685,6 +1685,8 @@ static struct cmd basic_cmds[] = { DEF_CMD("-wol_magic", -IFCAP_WOL_MAGIC, setifcap), DEF_CMD("txrtlmt", IFCAP_TXRTLMT, setifcap), DEF_CMD("-txrtlmt", -IFCAP_TXRTLMT, setifcap), + DEF_CMD("txtlsrtlmt", IFCAP_TXTLS_RTLMT, setifcap), + DEF_CMD("-txtlsrtlmt", -IFCAP_TXTLS_RTLMT, setifcap), DEF_CMD("hwrxtstmp", IFCAP_HWRXTSTMP, setifcap), DEF_CMD("-hwrxtstmp", -IFCAP_HWRXTSTMP, setifcap), DEF_CMD("normal", -IFF_LINK0, setifflags), diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c index bb899f053468..39f03b999110 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c @@ -3349,6 +3349,10 @@ mlx5e_ioctl(struct ifnet *ifp, u_long command, caddr_t data) ifp->if_capenable ^= IFCAP_TXTLS4; if (mask & IFCAP_TXTLS6) ifp->if_capenable ^= IFCAP_TXTLS6; +#ifdef RATELIMIT + if (mask & IFCAP_TXTLS_RTLMT) + ifp->if_capenable ^= IFCAP_TXTLS_RTLMT; +#endif if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_RXCSUM_IPV6) @@ -4320,7 +4324,9 @@ mlx5e_create_ifp(struct mlx5_core_dev *mdev) ifp->if_capabilities |= IFCAP_HWSTATS | IFCAP_HWRXTSTMP; ifp->if_capabilities |= IFCAP_NOMAP; ifp->if_capabilities |= IFCAP_TXTLS4 | IFCAP_TXTLS6; - ifp->if_capabilities |= IFCAP_TXRTLMT; +#ifdef RATELIMIT + ifp->if_capabilities |= IFCAP_TXRTLMT | IFCAP_TXTLS_RTLMT; +#endif ifp->if_snd_tag_alloc = mlx5e_snd_tag_alloc; ifp->if_snd_tag_free = mlx5e_snd_tag_free; ifp->if_snd_tag_modify = mlx5e_snd_tag_modify; diff --git a/sys/kern/uipc_ktls.c b/sys/kern/uipc_ktls.c index ac90e1d45f51..c048f708ecfe 100644 --- a/sys/kern/uipc_ktls.c +++ b/sys/kern/uipc_ktls.c @@ -814,12 +814,24 @@ ktls_alloc_snd_tag(struct inpcb *inp, struct ktls_session *tls, bool force, ifp = nh->nh_ifp; if_ref(ifp); - params.hdr.type = IF_SND_TAG_TYPE_TLS; + /* + * Allocate a TLS + ratelimit tag if the connection has an + * existing pacing rate. + */ + if (tp->t_pacing_rate != -1 && + (ifp->if_capenable & IFCAP_TXTLS_RTLMT) != 0) { + params.hdr.type = IF_SND_TAG_TYPE_TLS_RATE_LIMIT; + params.tls_rate_limit.inp = inp; + params.tls_rate_limit.tls = tls; + params.tls_rate_limit.max_rate = tp->t_pacing_rate; + } else { + params.hdr.type = IF_SND_TAG_TYPE_TLS; + params.tls.inp = inp; + params.tls.tls = tls; + } params.hdr.flowid = inp->inp_flowid; params.hdr.flowtype = inp->inp_flowtype; params.hdr.numa_domain = inp->inp_numa_domain; - params.tls.inp = inp; - params.tls.tls = tls; INP_RUNLOCK(inp); if (ifp->if_snd_tag_alloc == NULL) { @@ -1034,6 +1046,7 @@ int ktls_enable_tx(struct socket *so, struct tls_enable *en) { struct ktls_session *tls; + struct inpcb *inp; int error; if (!ktls_offload_enable) @@ -1086,12 +1099,20 @@ ktls_enable_tx(struct socket *so, struct tls_enable *en) return (error); } + /* + * Write lock the INP when setting sb_tls_info so that + * routines in tcp_ratelimit.c can read sb_tls_info while + * holding the INP lock. + */ + inp = so->so_pcb; + INP_WLOCK(inp); SOCKBUF_LOCK(&so->so_snd); so->so_snd.sb_tls_seqno = be64dec(en->rec_seq); so->so_snd.sb_tls_info = tls; if (tls->mode != TCP_TLS_MODE_SW) so->so_snd.sb_flags |= SB_TLS_IFNET; SOCKBUF_UNLOCK(&so->so_snd); + INP_WUNLOCK(inp); sbunlock(&so->so_snd); counter_u64_add(ktls_offload_total, 1); @@ -1344,6 +1365,42 @@ ktls_output_eagain(struct inpcb *inp, struct ktls_session *tls) mtx_pool_unlock(mtxpool_sleep, tls); return (ENOBUFS); } + +#ifdef RATELIMIT +int +ktls_modify_txrtlmt(struct ktls_session *tls, uint64_t max_pacing_rate) +{ + union if_snd_tag_modify_params params = { + .rate_limit.max_rate = max_pacing_rate, + .rate_limit.flags = M_NOWAIT, + }; + struct m_snd_tag *mst; + struct ifnet *ifp; + int error; + + /* Can't get to the inp, but it should be locked. */ + /* INP_LOCK_ASSERT(inp); */ + + MPASS(tls->mode == TCP_TLS_MODE_IFNET); + + if (tls->snd_tag == NULL) { + /* + * Resetting send tag, ignore this change. The + * pending reset may or may not see this updated rate + * in the tcpcb. If it doesn't, we will just lose + * this rate change. + */ + return (0); + } + + MPASS(tls->snd_tag != NULL); + MPASS(tls->snd_tag->type == IF_SND_TAG_TYPE_TLS_RATE_LIMIT); + + mst = tls->snd_tag; + ifp = mst->ifp; + return (ifp->if_snd_tag_modify(mst, ¶ms)); +} +#endif #endif void diff --git a/sys/net/if.h b/sys/net/if.h index be306dfbb378..6d9cc906d383 100644 --- a/sys/net/if.h +++ b/sys/net/if.h @@ -250,6 +250,7 @@ struct if_data { #define IFCAP_TXTLS6 0x10000000 /* can do TLS encryption and segmentation for TCP6 */ #define IFCAP_VXLAN_HWCSUM 0x20000000 /* can do IFCAN_HWCSUM on VXLANs */ #define IFCAP_VXLAN_HWTSO 0x40000000 /* can do IFCAP_TSO on VXLANs */ +#define IFCAP_TXTLS_RTLMT 0x80000000 /* can do TLS with rate limiting */ #define IFCAP_HWCSUM_IPV6 (IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6) diff --git a/sys/net/if_var.h b/sys/net/if_var.h index 67d95747a778..65d6fb472d90 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -191,7 +191,8 @@ struct m_snd_tag; #define IF_SND_TAG_TYPE_RATE_LIMIT 0 #define IF_SND_TAG_TYPE_UNLIMITED 1 #define IF_SND_TAG_TYPE_TLS 2 -#define IF_SND_TAG_TYPE_MAX 3 +#define IF_SND_TAG_TYPE_TLS_RATE_LIMIT 3 +#define IF_SND_TAG_TYPE_MAX 4 struct if_snd_tag_alloc_header { uint32_t type; /* send tag type, see IF_SND_TAG_XXX */ @@ -213,6 +214,13 @@ struct if_snd_tag_alloc_tls { const struct ktls_session *tls; }; +struct if_snd_tag_alloc_tls_rate_limit { + struct if_snd_tag_alloc_header hdr; + struct inpcb *inp; + const struct ktls_session *tls; + uint64_t max_rate; /* in bytes/s */ +}; + struct if_snd_tag_rate_limit_params { uint64_t max_rate; /* in bytes/s */ uint32_t queue_level; /* 0 (empty) .. 65535 (full) */ @@ -226,16 +234,19 @@ union if_snd_tag_alloc_params { struct if_snd_tag_alloc_rate_limit rate_limit; struct if_snd_tag_alloc_rate_limit unlimited; struct if_snd_tag_alloc_tls tls; + struct if_snd_tag_alloc_tls_rate_limit tls_rate_limit; }; union if_snd_tag_modify_params { struct if_snd_tag_rate_limit_params rate_limit; struct if_snd_tag_rate_limit_params unlimited; + struct if_snd_tag_rate_limit_params tls_rate_limit; }; union if_snd_tag_query_params { struct if_snd_tag_rate_limit_params rate_limit; struct if_snd_tag_rate_limit_params unlimited; + struct if_snd_tag_rate_limit_params tls_rate_limit; }; /* Query return flags */ diff --git a/sys/net/if_vlan.c b/sys/net/if_vlan.c index 192dba7eed82..920c65323ae7 100644 --- a/sys/net/if_vlan.c +++ b/sys/net/if_vlan.c @@ -1782,10 +1782,10 @@ vlan_capabilities(struct ifvlan *ifv) * this ever changes, then a new IFCAP_VLAN_TXTLS can be * defined. */ - if (p->if_capabilities & IFCAP_TXTLS) - cap |= p->if_capabilities & IFCAP_TXTLS; - if (p->if_capenable & IFCAP_TXTLS) - ena |= mena & IFCAP_TXTLS; + if (p->if_capabilities & (IFCAP_TXTLS | IFCAP_TXTLS_RTLMT)) + cap |= p->if_capabilities & (IFCAP_TXTLS | IFCAP_TXTLS_RTLMT); + if (p->if_capenable & (IFCAP_TXTLS | IFCAP_TXTLS_RTLMT)) + ena |= mena & (IFCAP_TXTLS | IFCAP_TXTLS_RTLMT); ifp->if_capabilities = cap; ifp->if_capenable = ena; diff --git a/sys/netinet/tcp_ratelimit.c b/sys/netinet/tcp_ratelimit.c index 161ed0b5f33a..2ec2752e46ef 100644 --- a/sys/netinet/tcp_ratelimit.c +++ b/sys/netinet/tcp_ratelimit.c @@ -1219,6 +1219,9 @@ tcp_set_pacing_rate(struct tcpcb *tp, struct ifnet *ifp, uint64_t bytes_per_sec, int flags, int *error) { const struct tcp_hwrate_limit_table *rte; +#ifdef KERN_TLS + struct ktls_session *tls; +#endif INP_WLOCK_ASSERT(tp->t_inpcb); @@ -1233,17 +1236,30 @@ tcp_set_pacing_rate(struct tcpcb *tp, struct ifnet *ifp, return (NULL); } #ifdef KERN_TLS + tls = NULL; if (tp->t_inpcb->inp_socket->so_snd.sb_flags & SB_TLS_IFNET) { - /* - * We currently can't do both TLS and hardware - * pacing - */ - if (error) - *error = EINVAL; - return (NULL); + tls = tp->t_inpcb->inp_socket->so_snd.sb_tls_info; + + if ((ifp->if_capenable & IFCAP_TXTLS_RTLMT) == 0 || + tls->mode != TCP_TLS_MODE_IFNET) { + if (error) + *error = ENODEV; + return (NULL); + } } #endif rte = rt_setup_rate(tp->t_inpcb, ifp, bytes_per_sec, flags, error); +#ifdef KERN_TLS + if (rte != NULL && tls != NULL && tls->snd_tag != NULL) { + /* + * Fake a route change error to reset the TLS + * send tag. This will convert the existing + * tag to a TLS ratelimit tag. + */ + MPASS(tls->snd_tag->type == IF_SND_TAG_TYPE_TLS); + ktls_output_eagain(tp->t_inpcb, tls); + } +#endif } else { /* * We are modifying a rate, wrong interface? @@ -1264,13 +1280,39 @@ tcp_chg_pacing_rate(const struct tcp_hwrate_limit_table *crte, { const struct tcp_hwrate_limit_table *nrte; const struct tcp_rate_set *rs; +#ifdef KERN_TLS + struct ktls_session *tls = NULL; +#endif int is_indirect = 0; int err; INP_WLOCK_ASSERT(tp->t_inpcb); - if ((tp->t_inpcb->inp_snd_tag == NULL) || - (crte == NULL)) { + if (crte == NULL) { + /* Wrong interface */ + if (error) + *error = EINVAL; + return (NULL); + } + +#ifdef KERN_TLS + if (tp->t_inpcb->inp_socket->so_snd.sb_flags & SB_TLS_IFNET) { + tls = tp->t_inpcb->inp_socket->so_snd.sb_tls_info; + MPASS(tls->mode == TCP_TLS_MODE_IFNET); + if (tls->snd_tag != NULL && + tls->snd_tag->type != IF_SND_TAG_TYPE_TLS_RATE_LIMIT) { + /* + * NIC probably doesn't support ratelimit TLS + * tags if it didn't allocate one when an + * existing rate was present, so ignore. + */ + if (error) + *error = EOPNOTSUPP; + return (NULL); + } + } +#endif + if (tp->t_inpcb->inp_snd_tag == NULL) { /* Wrong interface */ if (error) *error = EINVAL; @@ -1327,7 +1369,12 @@ tcp_chg_pacing_rate(const struct tcp_hwrate_limit_table *crte, return (NULL); } /* Change rates to our new entry */ - err = in_pcbmodify_txrtlmt(tp->t_inpcb, nrte->rate); +#ifdef KERN_TLS + if (tls != NULL) + err = ktls_modify_txrtlmt(tls, nrte->rate); + else +#endif + err = in_pcbmodify_txrtlmt(tp->t_inpcb, nrte->rate); if (err) { if (error) *error = err; @@ -1365,6 +1412,13 @@ tcp_rel_pacing_rate(const struct tcp_hwrate_limit_table *crte, struct tcpcb *tp) rs_defer_destroy(rs); mtx_unlock(&rs_mtx); } + + /* + * XXX: If this connection is using ifnet TLS, should we + * switch it to using an unlimited rate, or perhaps use + * ktls_output_eagain() to reset the send tag to a plain + * TLS tag? + */ in_pcbdetach_txrtlmt(tp->t_inpcb); } diff --git a/sys/sys/ktls.h b/sys/sys/ktls.h index edbfe53f51ba..8d591888466c 100644 --- a/sys/sys/ktls.h +++ b/sys/sys/ktls.h @@ -222,6 +222,9 @@ int ktls_get_rx_mode(struct socket *so); int ktls_set_tx_mode(struct socket *so, int mode); int ktls_get_tx_mode(struct socket *so); int ktls_output_eagain(struct inpcb *inp, struct ktls_session *tls); +#ifdef RATELIMIT +int ktls_modify_txrtlmt(struct ktls_session *tls, uint64_t max_pacing_rate); +#endif static inline struct ktls_session * ktls_hold(struct ktls_session *tls)