Path MTU discovery hooks for offloaded TCP connections.

Notify the TOE driver when when an ICMP type 3 code 4 (Fragmentation
needed and DF set) message is received for an offloaded connection.
This gives the driver an opportunity to lower the path MTU for the
connection and resume transmission, much like what the kernel does for
the connections that it handles.

Reviewed by:	glebius@
Sponsored by:	Chelsio Communications
Differential Revision:	https://reviews.freebsd.org/D29755
This commit is contained in:
Navdeep Parhar 2021-04-12 17:25:22 -07:00
parent 652908599b
commit 01d74fe1ff
5 changed files with 81 additions and 26 deletions

View File

@ -219,3 +219,14 @@ tcp_offload_detach(struct tcpcb *tp)
tod->tod_pcb_detach(tod, tp);
}
void
tcp_offload_pmtu_update(struct tcpcb *tp, tcp_seq seq, int mtu)
{
struct toedev *tod = tp->tod;
KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
INP_WLOCK_ASSERT(tp->t_inpcb);
tod->tod_pmtu_update(tod, tp, seq, mtu);
}

View File

@ -36,6 +36,8 @@
#error "no user-serviceable parts inside"
#endif
#include <netinet/tcp.h>
extern int registered_toedevs;
int tcp_offload_connect(struct socket *, struct sockaddr *);
@ -48,5 +50,6 @@ void tcp_offload_ctloutput(struct tcpcb *, int, int);
void tcp_offload_tcp_info(struct tcpcb *, struct tcp_info *);
int tcp_offload_alloc_tls_session(struct tcpcb *, struct ktls_session *, int);
void tcp_offload_detach(struct tcpcb *);
void tcp_offload_pmtu_update(struct tcpcb *, tcp_seq, int);
#endif

View File

@ -2791,6 +2791,21 @@ SYSCTL_PROC(_net_inet6_tcp6, OID_AUTO, getcred,
#endif /* INET6 */
#ifdef INET
/* Path MTU to try next when a fragmentation-needed message is received. */
static inline int
tcp_next_pmtu(const struct icmp *icp, const struct ip *ip)
{
int mtu = ntohs(icp->icmp_nextmtu);
/* If no alternative MTU was proposed, try the next smaller one. */
if (!mtu)
mtu = ip_next_mtu(ntohs(ip->ip_len), 1);
if (mtu < V_tcp_minmss + sizeof(struct tcpiphdr))
mtu = V_tcp_minmss + sizeof(struct tcpiphdr);
return (mtu);
}
static void
tcp_ctlinput_with_port(int cmd, struct sockaddr *sa, void *vip, uint16_t port)
{
@ -2846,6 +2861,17 @@ tcp_ctlinput_with_port(int cmd, struct sockaddr *sa, void *vip, uint16_t port)
!(inp->inp_flags & INP_DROPPED) &&
!(inp->inp_socket == NULL)) {
tp = intotcpcb(inp);
#ifdef TCP_OFFLOAD
if (tp->t_flags & TF_TOE && cmd == PRC_MSGSIZE) {
/*
* MTU discovery for offloaded connections. Let
* the TOE driver verify seq# and process it.
*/
mtu = tcp_next_pmtu(icp, ip);
tcp_offload_pmtu_update(tp, icmp_tcp_seq, mtu);
goto out;
}
#endif
if (tp->t_port != port) {
goto out;
}
@ -2853,24 +2879,11 @@ tcp_ctlinput_with_port(int cmd, struct sockaddr *sa, void *vip, uint16_t port)
SEQ_LT(ntohl(icmp_tcp_seq), tp->snd_max)) {
if (cmd == PRC_MSGSIZE) {
/*
* MTU discovery:
* If we got a needfrag set the MTU
* in the route to the suggested new
* value (if given) and then notify.
* MTU discovery: we got a needfrag and
* will potentially try a lower MTU.
*/
mtu = ntohs(icp->icmp_nextmtu);
/*
* If no alternative MTU was
* proposed, try the next smaller
* one.
*/
if (!mtu)
mtu = ip_next_mtu(
ntohs(ip->ip_len), 1);
if (mtu < V_tcp_minmss +
sizeof(struct tcpiphdr))
mtu = V_tcp_minmss +
sizeof(struct tcpiphdr);
mtu = tcp_next_pmtu(icp, ip);
/*
* Only process the offered MTU if it
* is smaller than the current one.
@ -2948,6 +2961,20 @@ tcp_ctlinput_viaudp(int cmd, struct sockaddr *sa, void *vip, void *unused)
#endif /* INET */
#ifdef INET6
static inline int
tcp6_next_pmtu(const struct icmp6_hdr *icmp6)
{
int mtu = ntohl(icmp6->icmp6_mtu);
/*
* If no alternative MTU was proposed, or the proposed MTU was too
* small, set to the min.
*/
if (mtu < IPV6_MMTU)
mtu = IPV6_MMTU - 8; /* XXXNP: what is the adjustment for? */
return (mtu);
}
static void
tcp6_ctlinput_with_port(int cmd, struct sockaddr *sa, void *d, uint16_t port)
{
@ -3039,6 +3066,14 @@ tcp6_ctlinput_with_port(int cmd, struct sockaddr *sa, void *d, uint16_t port)
!(inp->inp_flags & INP_DROPPED) &&
!(inp->inp_socket == NULL)) {
tp = intotcpcb(inp);
#ifdef TCP_OFFLOAD
if (tp->t_flags & TF_TOE && cmd == PRC_MSGSIZE) {
/* MTU discovery for offloaded connections. */
mtu = tcp6_next_pmtu(icmp6);
tcp_offload_pmtu_update(tp, icmp_tcp_seq, mtu);
goto out;
}
#endif
if (tp->t_port != port) {
goto out;
}
@ -3051,15 +3086,8 @@ tcp6_ctlinput_with_port(int cmd, struct sockaddr *sa, void *d, uint16_t port)
* in the route to the suggested new
* value (if given) and then notify.
*/
mtu = ntohl(icmp6->icmp6_mtu);
/*
* If no alternative MTU was
* proposed, or the proposed
* MTU was too small, set to
* the min.
*/
if (mtu < IPV6_MMTU)
mtu = IPV6_MMTU - 8;
mtu = tcp6_next_pmtu(icmp6);
bzero(&inc, sizeof(inc));
inc.inc_fibnum = M_GETFIB(m);
inc.inc_flags |= INC_ISIPV6;

View File

@ -199,6 +199,14 @@ toedev_alloc_tls_session(struct toedev *tod __unused, struct tcpcb *tp __unused,
return (EINVAL);
}
static void
toedev_pmtu_update(struct toedev *tod __unused, struct tcpcb *tp __unused,
tcp_seq seq __unused, int mtu __unused)
{
return;
}
/*
* Inform one or more TOE devices about a listening socket.
*/
@ -290,6 +298,7 @@ init_toedev(struct toedev *tod)
tod->tod_ctloutput = toedev_ctloutput;
tod->tod_tcp_info = toedev_tcp_info;
tod->tod_alloc_tls_session = toedev_alloc_tls_session;
tod->tod_pmtu_update = toedev_pmtu_update;
}
/*

View File

@ -35,6 +35,7 @@
#error "no user-serviceable parts inside"
#endif
#include <netinet/tcp.h>
#include <sys/_eventhandler.h>
struct tcpopt;
@ -114,6 +115,9 @@ struct toedev {
/* Create a TLS session */
int (*tod_alloc_tls_session)(struct toedev *, struct tcpcb *,
struct ktls_session *, int);
/* ICMP fragmentation-needed received, adjust PMTU. */
void (*tod_pmtu_update)(struct toedev *, struct tcpcb *, tcp_seq, int);
};
typedef void (*tcp_offload_listen_start_fn)(void *, struct tcpcb *);