Add support for hardware Tx and Rx checksums to VNIC driver

- The network controller verifies Rx TCP/UDP/SCTP checksums by default.
  Communicate this to the stack when the packet is not marked as erroneous
  to avoid redundant checksum calculation in kernel.
- It is not uncommon to get the mbuf with m_len that is less than
  the minimal size for the IP, TCP, UDP, etc. when HW checsumming
  is enabled. To avoid data corruption performed by the HW that is
  intended to write IP and TCP/UDP/SCTP checksums to the data segment,
  the mbuf needs to be pulled up by the required number of bytes.
- Make sure that one can modify the mbufs that require checsum calculation
  rather than check for NULL mbuf on each transmission.

Reviewed by:   wma
Obtained from: Semihalf
Sponsored by:  Cavium
Differential Revision: https://reviews.freebsd.org/D5320
This commit is contained in:
Zbigniew Bodek 2016-02-25 14:12:51 +00:00
parent ccf8caad14
commit 856dce91c0
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=296030
2 changed files with 155 additions and 7 deletions

View File

@ -353,6 +353,13 @@ nicvf_setup_ifnet(struct nicvf *nic)
if_setmtu(ifp, ETHERMTU);
if_setcapabilities(ifp, IFCAP_VLAN_MTU);
/*
* HW offload capabilities
*/
/* IP/TCP/UDP HW checksums */
if_setcapabilitiesbit(ifp, IFCAP_HWCSUM, 0);
if_sethwassistbits(ifp, (CSUM_IP | CSUM_TCP | CSUM_UDP), 0);
#ifdef DEVICE_POLLING
#error "DEVICE_POLLING not supported in VNIC driver yet"
if_setcapabilitiesbit(ifp, IFCAP_POLLING, 0);
@ -500,6 +507,10 @@ nicvf_if_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
/* No work to do except acknowledge the change took. */
ifp->if_capenable ^= IFCAP_VLAN_MTU;
}
if (mask & IFCAP_TXCSUM)
ifp->if_capenable ^= IFCAP_TXCSUM;
if (mask & IFCAP_RXCSUM)
ifp->if_capenable ^= IFCAP_RXCSUM;
break;
default:
@ -591,6 +602,7 @@ nicvf_if_transmit(struct ifnet *ifp, struct mbuf *mbuf)
struct nicvf *nic = if_getsoftc(ifp);
struct queue_set *qs = nic->qs;
struct snd_queue *sq;
struct mbuf *mtmp;
int qidx;
int err = 0;
@ -610,16 +622,24 @@ nicvf_if_transmit(struct ifnet *ifp, struct mbuf *mbuf)
if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
IFF_DRV_RUNNING) {
if (mbuf != NULL)
err = drbr_enqueue(ifp, sq->br, mbuf);
err = drbr_enqueue(ifp, sq->br, mbuf);
return (err);
}
if (mbuf != NULL) {
if (mbuf->m_next != NULL &&
(mbuf->m_pkthdr.csum_flags &
(CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP)) != 0) {
if (M_WRITABLE(mbuf) == 0) {
mtmp = m_dup(mbuf, M_NOWAIT);
m_freem(mbuf);
if (mtmp == NULL)
return (ENOBUFS);
mbuf = mtmp;
}
}
err = drbr_enqueue(ifp, sq->br, mbuf);
if (err != 0)
return (err);
}
taskqueue_enqueue(sq->snd_taskq, &sq->snd_task);

View File

@ -29,6 +29,9 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bitset.h>
@ -64,6 +67,16 @@ __FBSDID("$FreeBSD$");
#include <net/if_media.h>
#include <net/ifq.h>
#include <netinet/in_systm.h>
#include <netinet/in.h>
#include <netinet/if_ether.h>
#include <netinet/ip.h>
#include <netinet/ip6.h>
#include <netinet/sctp.h>
#include <netinet/tcp.h>
#include <netinet/tcp_lro.h>
#include <netinet/udp.h>
#include <dev/pci/pcireg.h>
#include <dev/pci/pcivar.h>
@ -1658,11 +1671,17 @@ nicvf_sq_free_used_descs(struct nicvf *nic, struct snd_queue *sq, int qidx)
* Add SQ HEADER subdescriptor.
* First subdescriptor for every send descriptor.
*/
static __inline void
static __inline int
nicvf_sq_add_hdr_subdesc(struct snd_queue *sq, int qentry,
int subdesc_cnt, struct mbuf *mbuf, int len)
{
struct sq_hdr_subdesc *hdr;
struct ether_vlan_header *eh;
#ifdef INET
struct ip *ip;
#endif
uint16_t etype;
int ehdrlen, iphlen, poff;
hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry);
sq->snd_buff[qentry].mbuf = mbuf;
@ -1675,7 +1694,93 @@ nicvf_sq_add_hdr_subdesc(struct snd_queue *sq, int qentry,
hdr->subdesc_cnt = subdesc_cnt;
hdr->tot_len = len;
/* ARM64TODO: Implement HW checksums calculation */
if (mbuf->m_pkthdr.csum_flags != 0) {
hdr->csum_l3 = 1; /* Enable IP csum calculation */
eh = mtod(mbuf, struct ether_vlan_header *);
if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
etype = ntohs(eh->evl_proto);
} else {
ehdrlen = ETHER_HDR_LEN;
etype = ntohs(eh->evl_encap_proto);
}
if (mbuf->m_len < ehdrlen + sizeof(struct ip)) {
mbuf = m_pullup(mbuf, ehdrlen + sizeof(struct ip));
sq->snd_buff[qentry].mbuf = mbuf;
if (mbuf == NULL)
return (ENOBUFS);
}
switch (etype) {
#ifdef INET6
case ETHERTYPE_IPV6:
/* ARM64TODO: Add support for IPv6 */
hdr->csum_l3 = 0;
sq->snd_buff[qentry].mbuf = NULL;
return (ENXIO);
#endif
#ifdef INET
case ETHERTYPE_IP:
ip = (struct ip *)(mbuf->m_data + ehdrlen);
ip->ip_sum = 0;
iphlen = ip->ip_hl << 2;
poff = ehdrlen + iphlen;
switch (ip->ip_p) {
case IPPROTO_TCP:
if ((mbuf->m_pkthdr.csum_flags & CSUM_TCP) == 0)
break;
if (mbuf->m_len < (poff + sizeof(struct tcphdr))) {
mbuf = m_pullup(mbuf, poff + sizeof(struct tcphdr));
sq->snd_buff[qentry].mbuf = mbuf;
if (mbuf == NULL)
return (ENOBUFS);
}
hdr->csum_l4 = SEND_L4_CSUM_TCP;
break;
case IPPROTO_UDP:
if ((mbuf->m_pkthdr.csum_flags & CSUM_UDP) == 0)
break;
if (mbuf->m_len < (poff + sizeof(struct udphdr))) {
mbuf = m_pullup(mbuf, poff + sizeof(struct udphdr));
sq->snd_buff[qentry].mbuf = mbuf;
if (mbuf == NULL)
return (ENOBUFS);
}
hdr->csum_l4 = SEND_L4_CSUM_UDP;
break;
case IPPROTO_SCTP:
if ((mbuf->m_pkthdr.csum_flags & CSUM_SCTP) == 0)
break;
if (mbuf->m_len < (poff + sizeof(struct sctphdr))) {
mbuf = m_pullup(mbuf, poff + sizeof(struct sctphdr));
sq->snd_buff[qentry].mbuf = mbuf;
if (mbuf == NULL)
return (ENOBUFS);
}
hdr->csum_l4 = SEND_L4_CSUM_SCTP;
break;
default:
break;
}
break;
#endif
default:
hdr->csum_l3 = 0;
return (0);
}
hdr->l3_offset = ehdrlen;
hdr->l4_offset = ehdrlen + iphlen;
} else
hdr->csum_l3 = 0;
return (0);
}
/*
@ -1734,8 +1839,12 @@ nicvf_tx_mbuf_locked(struct snd_queue *sq, struct mbuf *mbuf)
qentry = nicvf_get_sq_desc(sq, subdesc_cnt);
/* Add SQ header subdesc */
nicvf_sq_add_hdr_subdesc(sq, qentry, subdesc_cnt - 1, mbuf,
err = nicvf_sq_add_hdr_subdesc(sq, qentry, subdesc_cnt - 1, mbuf,
mbuf->m_pkthdr.len);
if (err != 0) {
bus_dmamap_unload(sq->snd_buff_dmat, snd_buff->dmap);
return (err);
}
/* Add SQ gather subdescs */
for (seg = 0; seg < nsegs; seg++) {
@ -1806,6 +1915,25 @@ nicvf_get_rcv_mbuf(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
m_fixhdr(mbuf);
mbuf->m_pkthdr.flowid = cqe_rx->rq_idx;
M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE);
if (__predict_true((if_getcapenable(nic->ifp) & IFCAP_RXCSUM) != 0)) {
/*
* HW by default verifies IP & TCP/UDP/SCTP checksums
*/
/* XXX: Do we need to include IP with options too? */
if (__predict_true(cqe_rx->l3_type == L3TYPE_IPV4 ||
cqe_rx->l3_type == L3TYPE_IPV6)) {
mbuf->m_pkthdr.csum_flags =
(CSUM_IP_CHECKED | CSUM_IP_VALID);
}
if (cqe_rx->l4_type == L4TYPE_TCP ||
cqe_rx->l4_type == L4TYPE_UDP ||
cqe_rx->l4_type == L4TYPE_SCTP) {
mbuf->m_pkthdr.csum_flags |=
(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
mbuf->m_pkthdr.csum_data = htons(0xffff);
}
}
}
return (mbuf);