First step of TSO (TCP segmentation offload) support in our network stack.

o add IFCAP_TSO[46] for drivers to announce this capability for IPv4 and IPv6
 o add CSUM_TSO flag to mbuf pkthdr csum_flags field
 o add tso_segsz field to mbuf pkthdr
 o enhance ip_output() packet length check to allow for large TSO packets
 o extend tcp_maxmtu[46]() with a flag pointer to pass interface capabilities
 o adjust all callers of tcp_maxmtu[46]() accordingly

Discussed on:	-current, -net
Sponsored by:	TCP/IP Optimization Fundraise 2005
This commit is contained in:
Andre Oppermann 2006-09-06 21:51:59 +00:00
parent 723e155201
commit 233dcce118
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=162084
9 changed files with 82 additions and 27 deletions

View File

@ -206,8 +206,11 @@ struct if_data {
#define IFCAP_JUMBO_MTU 0x0020 /* 9000 byte MTU supported */
#define IFCAP_POLLING 0x0040 /* driver supports polling */
#define IFCAP_VLAN_HWCSUM 0x0080 /* can do IFCAP_HWCSUM on VLANs */
#define IFCAP_TSO4 0x0100 /* can do TCP Segmentation Offload */
#define IFCAP_TSO6 0x0200 /* can do TCP6 Segmentation Offload */
#define IFCAP_HWCSUM (IFCAP_RXCSUM | IFCAP_TXCSUM)
#define IFCAP_TSO (IFCAP_TSO4 | IFCAP_TSO6)
#define IFQ_MAXLEN 50
#define IFNET_SLOWHZ 1 /* granularity is 1 second */

View File

@ -495,19 +495,29 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro,
/*
* If small enough for interface, or the interface will take
* care of the fragmentation for us, can just send directly.
* care of the fragmentation for us, we can just send directly.
*/
if (ip->ip_len <= ifp->if_mtu || (ifp->if_hwassist & CSUM_FRAGMENT &&
((ip->ip_off & IP_DF) == 0))) {
if (ip->ip_len <= ifp->if_mtu ||
(m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0 ||
((ip->ip_off & IP_DF) == 0 && (ifp->if_hwassist & CSUM_FRAGMENT))) {
ip->ip_len = htons(ip->ip_len);
ip->ip_off = htons(ip->ip_off);
ip->ip_sum = 0;
if (sw_csum & CSUM_DELAY_IP)
ip->ip_sum = in_cksum(m, hlen);
/* Record statistics for this interface address. */
/*
* Record statistics for this interface address.
* With CSUM_TSO the byte/packet count will be slightly
* incorrect because we count the IP+TCP headers only
* once instead of for every generated packet.
*/
if (!(flags & IP_FORWARDING) && ia) {
ia->ia_ifa.if_opackets++;
if (m->m_pkthdr.csum_flags & CSUM_TSO)
ia->ia_ifa.if_opackets +=
m->m_pkthdr.len / m->m_pkthdr.tso_segsz;
else
ia->ia_ifa.if_opackets++;
ia->ia_ifa.if_obytes += m->m_pkthdr.len;
}
#ifdef IPSEC
@ -529,7 +539,8 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro,
goto done;
}
if (ip->ip_off & IP_DF) {
/* Balk when DF bit is set or the interface didn't support TSO. */
if ((ip->ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) {
error = EMSGSIZE;
/*
* This case can happen if the user changed the MTU

View File

@ -2857,6 +2857,7 @@ tcp_mss(tp, offer)
struct socket *so;
struct hc_metrics_lite metrics;
int origoffer = offer;
int mtuflags = 0;
#ifdef INET6
int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0;
size_t min_protoh = isipv6 ?
@ -2869,12 +2870,12 @@ tcp_mss(tp, offer)
/* initialize */
#ifdef INET6
if (isipv6) {
maxmtu = tcp_maxmtu6(&inp->inp_inc);
maxmtu = tcp_maxmtu6(&inp->inp_inc, &mtuflags);
tp->t_maxopd = tp->t_maxseg = tcp_v6mssdflt;
} else
#endif
{
maxmtu = tcp_maxmtu(&inp->inp_inc);
maxmtu = tcp_maxmtu(&inp->inp_inc, &mtuflags);
tp->t_maxopd = tp->t_maxseg = tcp_mssdflt;
}
so = inp->inp_socket;
@ -3081,6 +3082,10 @@ tcp_mss(tp, offer)
tp->snd_cwnd = mss * ss_fltsz_local;
else
tp->snd_cwnd = mss * ss_fltsz;
/* Check the interface for TSO capabilities. */
if (mtuflags & CSUM_TSO)
tp->t_flags |= TF_TSO;
}
/*
@ -3103,14 +3108,14 @@ tcp_mssopt(inc)
#ifdef INET6
if (isipv6) {
mss = tcp_v6mssdflt;
maxmtu = tcp_maxmtu6(inc);
maxmtu = tcp_maxmtu6(inc, NULL);
thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */
min_protoh = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
} else
#endif
{
mss = tcp_mssdflt;
maxmtu = tcp_maxmtu(inc);
maxmtu = tcp_maxmtu(inc, NULL);
thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */
min_protoh = sizeof(struct tcpiphdr);
}

View File

@ -2857,6 +2857,7 @@ tcp_mss(tp, offer)
struct socket *so;
struct hc_metrics_lite metrics;
int origoffer = offer;
int mtuflags = 0;
#ifdef INET6
int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0;
size_t min_protoh = isipv6 ?
@ -2869,12 +2870,12 @@ tcp_mss(tp, offer)
/* initialize */
#ifdef INET6
if (isipv6) {
maxmtu = tcp_maxmtu6(&inp->inp_inc);
maxmtu = tcp_maxmtu6(&inp->inp_inc, &mtuflags);
tp->t_maxopd = tp->t_maxseg = tcp_v6mssdflt;
} else
#endif
{
maxmtu = tcp_maxmtu(&inp->inp_inc);
maxmtu = tcp_maxmtu(&inp->inp_inc, &mtuflags);
tp->t_maxopd = tp->t_maxseg = tcp_mssdflt;
}
so = inp->inp_socket;
@ -3081,6 +3082,10 @@ tcp_mss(tp, offer)
tp->snd_cwnd = mss * ss_fltsz_local;
else
tp->snd_cwnd = mss * ss_fltsz;
/* Check the interface for TSO capabilities. */
if (mtuflags & CSUM_TSO)
tp->t_flags |= TF_TSO;
}
/*
@ -3103,14 +3108,14 @@ tcp_mssopt(inc)
#ifdef INET6
if (isipv6) {
mss = tcp_v6mssdflt;
maxmtu = tcp_maxmtu6(inc);
maxmtu = tcp_maxmtu6(inc, NULL);
thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */
min_protoh = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
} else
#endif
{
mss = tcp_mssdflt;
maxmtu = tcp_maxmtu(inc);
maxmtu = tcp_maxmtu(inc, NULL);
thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */
min_protoh = sizeof(struct tcpiphdr);
}

View File

@ -1252,7 +1252,7 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
* or route MTU. tcp_mtudisc()
* will do right thing by itself.
*/
if (mtu <= tcp_maxmtu(&inc))
if (mtu <= tcp_maxmtu(&inc, NULL))
tcp_hc_updatemtu(&inc, mtu);
}
@ -1531,9 +1531,9 @@ tcp_mtudisc(struct inpcb *inp, int errno)
maxmtu = tcp_hc_getmtu(&inp->inp_inc); /* IPv4 and IPv6 */
romtu =
#ifdef INET6
isipv6 ? tcp_maxmtu6(&inp->inp_inc) :
isipv6 ? tcp_maxmtu6(&inp->inp_inc, NULL) :
#endif /* INET6 */
tcp_maxmtu(&inp->inp_inc);
tcp_maxmtu(&inp->inp_inc, NULL);
if (!maxmtu)
maxmtu = romtu;
else
@ -1610,7 +1610,7 @@ tcp_mtudisc(struct inpcb *inp, int errno)
* to get the interface MTU.
*/
u_long
tcp_maxmtu(struct in_conninfo *inc)
tcp_maxmtu(struct in_conninfo *inc, int *flags)
{
struct route sro;
struct sockaddr_in *dst;
@ -1633,6 +1633,13 @@ tcp_maxmtu(struct in_conninfo *inc)
maxmtu = ifp->if_mtu;
else
maxmtu = min(sro.ro_rt->rt_rmx.rmx_mtu, ifp->if_mtu);
/* Report additional interface capabilities. */
if (flags != NULL) {
if (ifp->if_capenable & IFCAP_TSO4 &&
ifp->if_hwassist & CSUM_TSO)
*flags |= CSUM_TSO;
}
RTFREE(sro.ro_rt);
}
return (maxmtu);
@ -1640,7 +1647,7 @@ tcp_maxmtu(struct in_conninfo *inc)
#ifdef INET6
u_long
tcp_maxmtu6(struct in_conninfo *inc)
tcp_maxmtu6(struct in_conninfo *inc, int *flags)
{
struct route_in6 sro6;
struct ifnet *ifp;
@ -1662,6 +1669,13 @@ tcp_maxmtu6(struct in_conninfo *inc)
else
maxmtu = min(sro6.ro_rt->rt_rmx.rmx_mtu,
IN6_LINKMTU(sro6.ro_rt->rt_ifp));
/* Report additional interface capabilities. */
if (flags != NULL) {
if (ifp->if_capenable & IFCAP_TSO6 &&
ifp->if_hwassist & CSUM_TSO)
*flags |= CSUM_TSO;
}
RTFREE(sro6.ro_rt);
}

View File

@ -1252,7 +1252,7 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
* or route MTU. tcp_mtudisc()
* will do right thing by itself.
*/
if (mtu <= tcp_maxmtu(&inc))
if (mtu <= tcp_maxmtu(&inc, NULL))
tcp_hc_updatemtu(&inc, mtu);
}
@ -1531,9 +1531,9 @@ tcp_mtudisc(struct inpcb *inp, int errno)
maxmtu = tcp_hc_getmtu(&inp->inp_inc); /* IPv4 and IPv6 */
romtu =
#ifdef INET6
isipv6 ? tcp_maxmtu6(&inp->inp_inc) :
isipv6 ? tcp_maxmtu6(&inp->inp_inc, NULL) :
#endif /* INET6 */
tcp_maxmtu(&inp->inp_inc);
tcp_maxmtu(&inp->inp_inc, NULL);
if (!maxmtu)
maxmtu = romtu;
else
@ -1610,7 +1610,7 @@ tcp_mtudisc(struct inpcb *inp, int errno)
* to get the interface MTU.
*/
u_long
tcp_maxmtu(struct in_conninfo *inc)
tcp_maxmtu(struct in_conninfo *inc, int *flags)
{
struct route sro;
struct sockaddr_in *dst;
@ -1633,6 +1633,13 @@ tcp_maxmtu(struct in_conninfo *inc)
maxmtu = ifp->if_mtu;
else
maxmtu = min(sro.ro_rt->rt_rmx.rmx_mtu, ifp->if_mtu);
/* Report additional interface capabilities. */
if (flags != NULL) {
if (ifp->if_capenable & IFCAP_TSO4 &&
ifp->if_hwassist & CSUM_TSO)
*flags |= CSUM_TSO;
}
RTFREE(sro.ro_rt);
}
return (maxmtu);
@ -1640,7 +1647,7 @@ tcp_maxmtu(struct in_conninfo *inc)
#ifdef INET6
u_long
tcp_maxmtu6(struct in_conninfo *inc)
tcp_maxmtu6(struct in_conninfo *inc, int *flags)
{
struct route_in6 sro6;
struct ifnet *ifp;
@ -1662,6 +1669,13 @@ tcp_maxmtu6(struct in_conninfo *inc)
else
maxmtu = min(sro6.ro_rt->rt_rmx.rmx_mtu,
IN6_LINKMTU(sro6.ro_rt->rt_ifp));
/* Report additional interface capabilities. */
if (flags != NULL) {
if (ifp->if_capenable & IFCAP_TSO6 &&
ifp->if_hwassist & CSUM_TSO)
*flags |= CSUM_TSO;
}
RTFREE(sro6.ro_rt);
}

View File

@ -114,6 +114,7 @@ struct tcpcb {
#define TF_WASFRECOVERY 0x200000 /* was in NewReno Fast Recovery */
#define TF_SIGNATURE 0x400000 /* require MD5 digests (RFC2385) */
#define TF_FORCEDATA 0x800000 /* force out a byte */
#define TF_TSO 0x1000000 /* TSO enabled on this connection */
tcp_seq snd_una; /* send unacknowledged */
tcp_seq snd_max; /* highest sequence number sent;
@ -508,8 +509,8 @@ void tcp_init(void);
void tcp_fini(void *);
void tcp_reass_init(void);
void tcp_input(struct mbuf *, int);
u_long tcp_maxmtu(struct in_conninfo *);
u_long tcp_maxmtu6(struct in_conninfo *);
u_long tcp_maxmtu(struct in_conninfo *, int *);
u_long tcp_maxmtu6(struct in_conninfo *, int *);
void tcp_mss(struct tcpcb *, int);
int tcp_mssopt(struct in_conninfo *);
struct inpcb *

View File

@ -1132,7 +1132,7 @@ icmp6_mtudisc_update(ip6cp, validated)
if (in6_setscope(&inc.inc6_faddr, m->m_pkthdr.rcvif, NULL))
return;
if (mtu < tcp_maxmtu6(&inc)) {
if (mtu < tcp_maxmtu6(&inc, NULL)) {
tcp_hc_updatemtu(&inc, mtu);
icmp6stat.icp6s_pmtuchg++;
}

View File

@ -110,6 +110,7 @@ struct pkthdr {
/* variables for hardware checksum */
int csum_flags; /* flags regarding checksum */
int csum_data; /* data field used by csum routines */
u_int16_t tso_segsz; /* TSO segment size */
SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */
};
@ -216,6 +217,7 @@ struct mbuf {
#define CSUM_UDP 0x0004 /* will csum UDP */
#define CSUM_IP_FRAGS 0x0008 /* will csum IP fragments */
#define CSUM_FRAGMENT 0x0010 /* will do IP fragmentation */
#define CSUM_TSO 0x0020 /* will do TSO */
#define CSUM_IP_CHECKED 0x0100 /* did csum IP */
#define CSUM_IP_VALID 0x0200 /* ... the csum is valid */