It turns out that too many drivers are not only parsing the L2/3/4

headers for TSO but also for generic checksum offloading.  Ideally we
would only have one common function shared amongst all drivers, and
perhaps when updating them for IPv6 we should introduce that.
Eventually we should provide the meta information along with mbufs to
avoid (re-)parsing entirely.

To not break IPv6 (checksums and offload) and to be able to MFC the
changes without risking to hurt 3rd party drivers, duplicate the v4
framework, as other OSes have done as well.

Introduce interface capability flags for TX/RX checksum offload with
IPv6, to allow independent toggling (where possible).  Add CSUM_*_IPV6
flags for UDP/TCP over IPv6, and reserve further for SCTP, and IPv6
fragmentation.  Define CSUM_DELAY_DATA_IPV6 as we do for legacy IP and
add an alias for CSUM_DATA_VALID_IPV6.

This pretty much brings IPv6 handling in line with IPv4.
TSO is still handled in a different way and not via if_hwassist.

Update ifconfig to allow (un)setting of the new capability flags.
Update loopback to announce the new capabilities and if_hwassist flags.

Individual driver updates will have to follow, as will SCTP.

Reported by:	gallatin, dim, ..
Reviewed by:	gallatin (glanced at?)
MFC after:	3 days
X-MFC with:	r235961,235959,235958
This commit is contained in:
bz 2012-05-28 09:30:13 +00:00
parent 290c33a04a
commit ac429c7044
14 changed files with 85 additions and 31 deletions

View File

@ -28,7 +28,7 @@
.\" From: @(#)ifconfig.8 8.3 (Berkeley) 1/5/94
.\" $FreeBSD$
.\"
.Dd May 19, 2012
.Dd May 27, 2012
.Dt IFCONFIG 8
.Os
.Sh NAME
@ -372,16 +372,32 @@ This is useful for devices which have multiple physical layer interfaces
.It Cm name Ar name
Set the interface name to
.Ar name .
.It Cm rxcsum , txcsum
.It Cm rxcsum , txcsum , rxcsum6 , txcsum6
If the driver supports user-configurable checksum offloading,
enable receive (or transmit) checksum offloading on the interface.
The feature can be turned on selectively per protocol family.
Use
.Cm rxcsum6 , txcsum6
for
.Xr ip6 4
or
.Cm rxcsum , txcsum
otherwise.
Some drivers may not be able to enable these flags independently
of each other, so setting one may also set the other.
The driver will offload as much checksum work as it can reliably
support, the exact level of offloading varies between drivers.
.It Fl rxcsum , txcsum
.It Fl rxcsum , txcsum , rxcsum6 , txcsum6
If the driver supports user-configurable checksum offloading,
disable receive (or transmit) checksum offloading on the interface.
The feature can be turned off selectively per protocol family.
Use
.Fl rxcsum6 , txcsum6
for
.Xr ip6 4
or
.Fl rxcsum , txcsum
otherwise.
These settings may not always be independent of each other.
.It Cm tso
If the driver supports

View File

@ -916,7 +916,8 @@ unsetifdescr(const char *val, int value, int s, const struct afswtch *afp)
#define IFCAPBITS \
"\020\1RXCSUM\2TXCSUM\3NETCONS\4VLAN_MTU\5VLAN_HWTAGGING\6JUMBO_MTU\7POLLING" \
"\10VLAN_HWCSUM\11TSO4\12TSO6\13LRO\14WOL_UCAST\15WOL_MCAST\16WOL_MAGIC" \
"\21VLAN_HWFILTER\23VLAN_HWTSO\24LINKSTATE\25NETMAP"
"\21VLAN_HWFILTER\23VLAN_HWTSO\24LINKSTATE\25NETMAP" \
"\26IFCAP_RXCSUM_IPV6\27IFCAP_TXCSUM_IPV6"
/*
* Print the status of the interface. If an address family was
@ -1193,6 +1194,10 @@ static struct cmd basic_cmds[] = {
DEF_CMD("-monitor", -IFF_MONITOR, setifflags),
DEF_CMD("staticarp", IFF_STATICARP, setifflags),
DEF_CMD("-staticarp", -IFF_STATICARP, setifflags),
DEF_CMD("rxcsum6", IFCAP_RXCSUM_IPV6, setifcap),
DEF_CMD("-rxcsum6", -IFCAP_RXCSUM_IPV6, setifcap),
DEF_CMD("txcsum6", IFCAP_TXCSUM_IPV6, setifcap),
DEF_CMD("-txcsum6", -IFCAP_TXCSUM_IPV6, setifcap),
DEF_CMD("rxcsum", IFCAP_RXCSUM, setifcap),
DEF_CMD("-rxcsum", -IFCAP_RXCSUM, setifcap),
DEF_CMD("txcsum", IFCAP_TXCSUM, setifcap),

View File

@ -230,6 +230,10 @@ struct if_data {
#define IFCAP_VLAN_HWTSO 0x40000 /* can do IFCAP_TSO on VLANs */
#define IFCAP_LINKSTATE 0x80000 /* the runtime link state is dynamic */
#define IFCAP_NETMAP 0x100000 /* netmap mode supported/enabled */
#define IFCAP_RXCSUM_IPV6 0x200000 /* can offload checksum on IPv6 RX */
#define IFCAP_TXCSUM_IPV6 0x400000 /* can offload checksum on IPv6 TX */
#define IFCAP_HWCSUM_IPV6 (IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6)
#define IFCAP_HWCSUM (IFCAP_RXCSUM | IFCAP_TXCSUM)
#define IFCAP_TSO (IFCAP_TSO4 | IFCAP_TSO6)

View File

@ -92,7 +92,9 @@
#endif
#define LO_CSUM_FEATURES (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP)
#define LO_CSUM_SET (CSUM_DATA_VALID | CSUM_PSEUDO_HDR | \
#define LO_CSUM_FEATURES6 (CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP)
#define LO_CSUM_SET (CSUM_DATA_VALID | CSUM_DATA_VALID_IPV6 | \
CSUM_PSEUDO_HDR | \
CSUM_IP_CHECKED | CSUM_IP_VALID | \
CSUM_SCTP_VALID)
@ -143,8 +145,9 @@ lo_clone_create(struct if_clone *ifc, int unit, caddr_t params)
ifp->if_ioctl = loioctl;
ifp->if_output = looutput;
ifp->if_snd.ifq_maxlen = ifqmaxlen;
ifp->if_capabilities = ifp->if_capenable = IFCAP_HWCSUM;
ifp->if_hwassist = LO_CSUM_FEATURES;
ifp->if_capabilities = ifp->if_capenable =
IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6;
ifp->if_hwassist = LO_CSUM_FEATURES | LO_CSUM_FEATURES6;
if_attach(ifp);
bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
if (V_loif == NULL)
@ -247,12 +250,19 @@ looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
#if 1 /* XXX */
switch (dst->sa_family) {
case AF_INET:
case AF_INET6:
if (ifp->if_capenable & IFCAP_RXCSUM) {
m->m_pkthdr.csum_data = 0xffff;
m->m_pkthdr.csum_flags = LO_CSUM_SET;
}
m->m_pkthdr.csum_flags &= ~LO_CSUM_FEATURES;
break;
case AF_INET6:
if (ifp->if_capenable & IFCAP_RXCSUM_IPV6) {
m->m_pkthdr.csum_data = 0xffff;
m->m_pkthdr.csum_flags = LO_CSUM_SET;
}
m->m_pkthdr.csum_flags &= ~LO_CSUM_FEATURES6;
break;
case AF_IPX:
case AF_APPLETALK:
break;
@ -436,10 +446,15 @@ loioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
ifp->if_capenable ^= IFCAP_RXCSUM;
if ((mask & IFCAP_TXCSUM) != 0)
ifp->if_capenable ^= IFCAP_TXCSUM;
if ((mask & IFCAP_RXCSUM_IPV6) != 0)
ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
if ((mask & IFCAP_TXCSUM_IPV6) != 0)
ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
ifp->if_hwassist = 0;
if (ifp->if_capenable & IFCAP_TXCSUM)
ifp->if_hwassist = LO_CSUM_FEATURES;
else
ifp->if_hwassist = 0;
if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
ifp->if_hwassist |= LO_CSUM_FEATURES6;
break;
default:

View File

@ -589,7 +589,7 @@ tcp_input(struct mbuf *m, int off0)
ip6 = mtod(m, struct ip6_hdr *);
th = (struct tcphdr *)((caddr_t)ip6 + off0);
tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0;
if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) {
if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
th->th_sum = m->m_pkthdr.csum_data;
else

View File

@ -1047,7 +1047,6 @@ tcp_output(struct tcpcb *tp)
* checksum extended header and data.
*/
m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */
m->m_pkthdr.csum_flags = CSUM_TCP;
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
#ifdef INET6
if (isipv6) {
@ -1055,6 +1054,7 @@ tcp_output(struct tcpcb *tp)
* ip6_plen is not need to be filled now, and will be filled
* in ip6_output.
*/
m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
th->th_sum = in6_cksum_pseudo(ip6, sizeof(struct tcphdr) +
optlen + len, IPPROTO_TCP, 0);
}
@ -1064,6 +1064,7 @@ tcp_output(struct tcpcb *tp)
#endif
#ifdef INET
{
m->m_pkthdr.csum_flags = CSUM_TCP;
th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
htons(sizeof(struct tcphdr) + IPPROTO_TCP + len + optlen));

View File

@ -619,10 +619,10 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
nth->th_win = htons((u_short)win);
nth->th_urp = 0;
m->m_pkthdr.csum_flags = CSUM_TCP;
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
#ifdef INET6
if (isipv6) {
m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
nth->th_sum = in6_cksum_pseudo(ip6,
tlen - sizeof(struct ip6_hdr), IPPROTO_TCP, 0);
ip6->ip6_hlim = in6_selecthlim(tp != NULL ? tp->t_inpcb :
@ -634,6 +634,7 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
#endif
#ifdef INET
{
m->m_pkthdr.csum_flags = CSUM_TCP;
nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p)));
}

View File

@ -1473,10 +1473,10 @@ syncache_respond(struct syncache *sc)
optlen = 0;
M_SETFIB(m, sc->sc_inc.inc_fibnum);
m->m_pkthdr.csum_flags = CSUM_TCP;
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
#ifdef INET6
if (sc->sc_inc.inc_flags & INC_ISIPV6) {
m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
th->th_sum = in6_cksum_pseudo(ip6, tlen + optlen - hlen,
IPPROTO_TCP, 0);
ip6->ip6_hlim = in6_selecthlim(NULL, NULL);
@ -1488,6 +1488,7 @@ syncache_respond(struct syncache *sc)
#endif
#ifdef INET
{
m->m_pkthdr.csum_flags = CSUM_TCP;
th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
htons(tlen + optlen - hlen + IPPROTO_TCP));
error = ip_output(m, sc->sc_ipopts, NULL, 0, NULL, NULL);

View File

@ -574,10 +574,10 @@ tcp_twrespond(struct tcptw *tw, int flags)
th->th_flags = flags;
th->th_win = htons(tw->last_win);
m->m_pkthdr.csum_flags = CSUM_TCP;
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
#ifdef INET6
if (isipv6) {
m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
th->th_sum = in6_cksum_pseudo(ip6,
sizeof(struct tcphdr) + optlen, IPPROTO_TCP, 0);
ip6->ip6_hlim = in6_selecthlim(inp, NULL);
@ -590,6 +590,7 @@ tcp_twrespond(struct tcptw *tw, int flags)
#endif
#ifdef INET
{
m->m_pkthdr.csum_flags = CSUM_TCP;
th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
htons(sizeof(struct tcphdr) + optlen + IPPROTO_TCP));
ip->ip_len = m->m_pkthdr.len;

View File

@ -581,9 +581,9 @@ ip6_forward(struct mbuf *m, int srcrt)
m->m_flags |= M_FASTFWD_OURS;
if (m->m_pkthdr.rcvif == NULL)
m->m_pkthdr.rcvif = V_loif;
if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
m->m_pkthdr.csum_flags |=
CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
m->m_pkthdr.csum_data = 0xffff;
}
#ifdef SCTP
@ -601,9 +601,9 @@ ip6_forward(struct mbuf *m, int srcrt)
if (m->m_flags & M_FASTFWD_OURS) {
if (m->m_pkthdr.rcvif == NULL)
m->m_pkthdr.rcvif = V_loif;
if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
m->m_pkthdr.csum_flags |=
CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
m->m_pkthdr.csum_data = 0xffff;
}
#ifdef SCTP

View File

@ -291,6 +291,7 @@ ip6_ipsec_output(struct mbuf **m, struct inpcb *inp, int *flags, int *error,
/*
* Do delayed checksums now because we send before
* this is done in the normal processing path.
* XXX-BZ CSUM_DELAY_DATA_IPV6?
*/
if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
ipseclog((LOG_DEBUG,

View File

@ -190,7 +190,7 @@ in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset)
u_short csum;
csum = in_cksum_skip(m, offset + plen, offset);
if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
if (m->m_pkthdr.csum_flags & CSUM_UDP_IPV6 && csum == 0)
csum = 0xffff;
offset += m->m_pkthdr.csum_data; /* checksum offset */
@ -885,9 +885,9 @@ skip_ipsec2:;
m->m_flags |= M_FASTFWD_OURS;
if (m->m_pkthdr.rcvif == NULL)
m->m_pkthdr.rcvif = V_loif;
if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
m->m_pkthdr.csum_flags |=
CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
m->m_pkthdr.csum_data = 0xffff;
}
#ifdef SCTP
@ -905,9 +905,9 @@ skip_ipsec2:;
if (m->m_flags & M_FASTFWD_OURS) {
if (m->m_pkthdr.rcvif == NULL)
m->m_pkthdr.rcvif = V_loif;
if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
m->m_pkthdr.csum_flags |=
CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
m->m_pkthdr.csum_data = 0xffff;
}
#ifdef SCTP
@ -960,8 +960,8 @@ skip_ipsec2:;
* XXX-BZ Need a framework to know when the NIC can handle it, even
* with ext. hdrs.
*/
if (sw_csum & CSUM_DELAY_DATA) {
sw_csum &= ~CSUM_DELAY_DATA;
if (sw_csum & CSUM_DELAY_DATA_IPV6) {
sw_csum &= ~CSUM_DELAY_DATA_IPV6;
in6_delayed_cksum(m, plen, sizeof(struct ip6_hdr));
}
#ifdef SCTP
@ -1076,9 +1076,9 @@ skip_ipsec2:;
* fragmented packets, then do it here.
* XXX-BZ handle the hw offloading case. Need flags.
*/
if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
in6_delayed_cksum(m, plen, hlen);
m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
}
#ifdef SCTP
if (m->m_pkthdr.csum_flags & CSUM_SCTP) {

View File

@ -230,7 +230,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
goto badunlocked;
}
if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) {
if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
uh_sum = m->m_pkthdr.csum_data;
else
@ -784,7 +784,7 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
ip6->ip6_dst = *faddr;
udp6->uh_sum = in6_cksum_pseudo(ip6, plen, IPPROTO_UDP, 0);
m->m_pkthdr.csum_flags = CSUM_UDP;
m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
flags = 0;

View File

@ -283,15 +283,24 @@ struct mbuf {
#define CSUM_FRAGMENT 0x0010 /* will do IP fragmentation */
#define CSUM_TSO 0x0020 /* will do TSO */
#define CSUM_SCTP 0x0040 /* will csum SCTP */
/* CSUM_SCTP_IPV6 0x0080 will csum IPv6/SCTP */
#define CSUM_IP_CHECKED 0x0100 /* did csum IP */
#define CSUM_IP_VALID 0x0200 /* ... the csum is valid */
#define CSUM_DATA_VALID 0x0400 /* csum_data field is valid */
#define CSUM_PSEUDO_HDR 0x0800 /* csum_data has pseudo hdr */
#define CSUM_SCTP_VALID 0x1000 /* SCTP checksum is valid */
#define CSUM_UDP_IPV6 0x2000 /* will csum IPv6/UDP */
#define CSUM_TCP_IPV6 0x4000 /* will csum IPv6/TCP */
/* CSUM_TSO_IPV6 0x8000 will do IPv6/TSO */
/* CSUM_FRAGMENT_IPV6 0x10000 will do IPv6 fragementation */
#define CSUM_DELAY_DATA_IPV6 (CSUM_TCP_IPV6 | CSUM_UDP_IPV6)
#define CSUM_DATA_VALID_IPV6 CSUM_DATA_VALID
#define CSUM_DELAY_DATA (CSUM_TCP | CSUM_UDP)
#define CSUM_DELAY_IP (CSUM_IP) /* XXX add ipv6 here too? */
#define CSUM_DELAY_IP (CSUM_IP) /* Only v4, no v6 IP hdr csum */
/*
* mbuf types.