Improve mxge's receive performance for IPv6:
- Add support for IPv6 rx csum offload - Finally switch mxge from using its own driver lro, to using tcp_lro MFC after: 7 days Sponsored by: Myricom Inc.
This commit is contained in:
parent
fe9a760737
commit
26dd49c61d
@ -1742,7 +1742,6 @@ mwlboot.fw optional mwlfw \
|
||||
no-obj no-implicit-rule \
|
||||
clean "mwlboot.fw"
|
||||
dev/mxge/if_mxge.c optional mxge pci
|
||||
dev/mxge/mxge_lro.c optional mxge pci
|
||||
dev/mxge/mxge_eth_z8e.c optional mxge pci
|
||||
dev/mxge/mxge_ethp_z8e.c optional mxge pci
|
||||
dev/mxge/mxge_rss_eth_z8e.c optional mxge pci
|
||||
|
@ -64,6 +64,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <netinet/ip.h>
|
||||
#include <netinet/ip6.h>
|
||||
#include <netinet/tcp.h>
|
||||
#include <netinet/tcp_lro.h>
|
||||
#include <netinet6/ip6_var.h>
|
||||
|
||||
#include <machine/bus.h>
|
||||
@ -102,7 +103,6 @@ static int mxge_intr_coal_delay = 30;
|
||||
static int mxge_deassert_wait = 1;
|
||||
static int mxge_flow_control = 1;
|
||||
static int mxge_verbose = 0;
|
||||
static int mxge_lro_cnt = 8;
|
||||
static int mxge_ticks;
|
||||
static int mxge_max_slices = 1;
|
||||
static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
|
||||
@ -1311,9 +1311,9 @@ mxge_reset(mxge_softc_t *sc, int interrupts_setup)
|
||||
ss->tx.stall = 0;
|
||||
ss->rx_big.cnt = 0;
|
||||
ss->rx_small.cnt = 0;
|
||||
ss->lro_bad_csum = 0;
|
||||
ss->lro_queued = 0;
|
||||
ss->lro_flushed = 0;
|
||||
ss->lc.lro_bad_csum = 0;
|
||||
ss->lc.lro_queued = 0;
|
||||
ss->lc.lro_flushed = 0;
|
||||
if (ss->fw_stats != NULL) {
|
||||
bzero(ss->fw_stats, sizeof *ss->fw_stats);
|
||||
}
|
||||
@ -1413,50 +1413,6 @@ mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
|
||||
return err;
|
||||
}
|
||||
|
||||
static int
|
||||
mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt)
|
||||
{
|
||||
struct ifnet *ifp;
|
||||
int err = 0;
|
||||
|
||||
ifp = sc->ifp;
|
||||
if (lro_cnt == 0)
|
||||
ifp->if_capenable &= ~IFCAP_LRO;
|
||||
else
|
||||
ifp->if_capenable |= IFCAP_LRO;
|
||||
sc->lro_cnt = lro_cnt;
|
||||
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
|
||||
mxge_close(sc, 0);
|
||||
err = mxge_open(sc);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int
|
||||
mxge_change_lro(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
mxge_softc_t *sc;
|
||||
unsigned int lro_cnt;
|
||||
int err;
|
||||
|
||||
sc = arg1;
|
||||
lro_cnt = sc->lro_cnt;
|
||||
err = sysctl_handle_int(oidp, &lro_cnt, arg2, req);
|
||||
if (err != 0)
|
||||
return err;
|
||||
|
||||
if (lro_cnt == sc->lro_cnt)
|
||||
return 0;
|
||||
|
||||
if (lro_cnt > 128)
|
||||
return EINVAL;
|
||||
|
||||
mtx_lock(&sc->driver_mtx);
|
||||
err = mxge_change_lro_locked(sc, lro_cnt);
|
||||
mtx_unlock(&sc->driver_mtx);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int
|
||||
mxge_handle_be32(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
@ -1653,14 +1609,6 @@ mxge_add_sysctls(mxge_softc_t *sc)
|
||||
CTLFLAG_RW, &mxge_verbose,
|
||||
0, "verbose printing");
|
||||
|
||||
/* lro */
|
||||
SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
|
||||
"lro_cnt",
|
||||
CTLTYPE_INT|CTLFLAG_RW, sc,
|
||||
0, mxge_change_lro,
|
||||
"I", "number of lro merge queues");
|
||||
|
||||
|
||||
/* add counters exported for debugging from all slices */
|
||||
sysctl_ctx_init(&sc->slice_sysctl_ctx);
|
||||
sc->slice_sysctl_tree =
|
||||
@ -1686,11 +1634,15 @@ mxge_add_sysctls(mxge_softc_t *sc)
|
||||
CTLFLAG_RD, &ss->rx_big.cnt,
|
||||
0, "rx_small_cnt");
|
||||
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
|
||||
"lro_flushed", CTLFLAG_RD, &ss->lro_flushed,
|
||||
"lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed,
|
||||
0, "number of lro merge queues flushed");
|
||||
|
||||
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
|
||||
"lro_queued", CTLFLAG_RD, &ss->lro_queued,
|
||||
"lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum,
|
||||
0, "number of bad csums preventing LRO");
|
||||
|
||||
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
|
||||
"lro_queued", CTLFLAG_RD, &ss->lc.lro_queued,
|
||||
0, "number of frames appended to lro merge"
|
||||
"queues");
|
||||
|
||||
@ -2534,6 +2486,64 @@ mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
|
||||
return err;
|
||||
}
|
||||
|
||||
#ifdef INET6
|
||||
|
||||
static uint16_t
|
||||
mxge_csum_generic(uint16_t *raw, int len)
|
||||
{
|
||||
uint32_t csum;
|
||||
|
||||
|
||||
csum = 0;
|
||||
while (len > 0) {
|
||||
csum += *raw;
|
||||
raw++;
|
||||
len -= 2;
|
||||
}
|
||||
csum = (csum >> 16) + (csum & 0xffff);
|
||||
csum = (csum >> 16) + (csum & 0xffff);
|
||||
return (uint16_t)csum;
|
||||
}
|
||||
|
||||
static inline uint16_t
|
||||
mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum)
|
||||
{
|
||||
uint32_t partial;
|
||||
int nxt, cksum_offset;
|
||||
struct ip6_hdr *ip6 = p;
|
||||
uint16_t c;
|
||||
|
||||
nxt = ip6->ip6_nxt;
|
||||
cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN;
|
||||
if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) {
|
||||
cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN,
|
||||
IPPROTO_IPV6, &nxt);
|
||||
if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
|
||||
return (1);
|
||||
}
|
||||
|
||||
/*
|
||||
* IPv6 headers do not contain a checksum, and hence
|
||||
* do not checksum to zero, so they don't "fall out"
|
||||
* of the partial checksum calculation like IPv4
|
||||
* headers do. We need to fix the partial checksum by
|
||||
* subtracting the checksum of the IPv6 header.
|
||||
*/
|
||||
|
||||
partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset -
|
||||
ETHER_HDR_LEN);
|
||||
csum += ~partial;
|
||||
csum += (csum < ~partial);
|
||||
csum = (csum >> 16) + (csum & 0xFFFF);
|
||||
csum = (csum >> 16) + (csum & 0xFFFF);
|
||||
c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt,
|
||||
csum);
|
||||
|
||||
// printf("%d %d %x %x %x %x %x\n", m->m_pkthdr.len, cksum_offset, c, csum, ocsum, partial, d);
|
||||
c ^= 0xffff;
|
||||
return (c);
|
||||
}
|
||||
#endif /* INET6 */
|
||||
/*
|
||||
* Myri10GE hardware checksums are not valid if the sender
|
||||
* padded the frame with non-zero padding. This is because
|
||||
@ -2547,26 +2557,39 @@ static inline uint16_t
|
||||
mxge_rx_csum(struct mbuf *m, int csum)
|
||||
{
|
||||
struct ether_header *eh;
|
||||
#ifdef INET
|
||||
struct ip *ip;
|
||||
uint16_t c;
|
||||
#endif
|
||||
int cap = m->m_pkthdr.rcvif->if_capenable;
|
||||
uint16_t c, etype;
|
||||
|
||||
|
||||
eh = mtod(m, struct ether_header *);
|
||||
|
||||
/* only deal with IPv4 TCP & UDP for now */
|
||||
if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP)))
|
||||
return 1;
|
||||
ip = (struct ip *)(eh + 1);
|
||||
if (__predict_false(ip->ip_p != IPPROTO_TCP &&
|
||||
ip->ip_p != IPPROTO_UDP))
|
||||
return 1;
|
||||
etype = ntohs(eh->ether_type);
|
||||
switch (etype) {
|
||||
#ifdef INET
|
||||
c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
|
||||
htonl(ntohs(csum) + ntohs(ip->ip_len) +
|
||||
- (ip->ip_hl << 2) + ip->ip_p));
|
||||
#else
|
||||
c = 1;
|
||||
case ETHERTYPE_IP:
|
||||
if ((cap & IFCAP_RXCSUM) == 0)
|
||||
return (1);
|
||||
ip = (struct ip *)(eh + 1);
|
||||
if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP)
|
||||
return (1);
|
||||
c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
|
||||
htonl(ntohs(csum) + ntohs(ip->ip_len) -
|
||||
(ip->ip_hl << 2) + ip->ip_p));
|
||||
c ^= 0xffff;
|
||||
break;
|
||||
#endif
|
||||
c ^= 0xffff;
|
||||
#ifdef INET6
|
||||
case ETHERTYPE_IPV6:
|
||||
if ((cap & IFCAP_RXCSUM_IPV6) == 0)
|
||||
return (1);
|
||||
c = mxge_rx_csum6((eh + 1), m, csum);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
c = 1;
|
||||
}
|
||||
return (c);
|
||||
}
|
||||
|
||||
@ -2628,7 +2651,8 @@ mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
|
||||
|
||||
|
||||
static inline void
|
||||
mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
|
||||
mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len,
|
||||
uint32_t csum, int lro)
|
||||
{
|
||||
mxge_softc_t *sc;
|
||||
struct ifnet *ifp;
|
||||
@ -2637,7 +2661,6 @@ mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
|
||||
mxge_rx_ring_t *rx;
|
||||
bus_dmamap_t old_map;
|
||||
int idx;
|
||||
uint16_t tcpudp_csum;
|
||||
|
||||
sc = ss->sc;
|
||||
ifp = sc->ifp;
|
||||
@ -2674,14 +2697,18 @@ mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
|
||||
mxge_vlan_tag_remove(m, &csum);
|
||||
}
|
||||
/* if the checksum is valid, mark it in the mbuf header */
|
||||
if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
|
||||
if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
|
||||
return;
|
||||
/* otherwise, it was a UDP frame, or a TCP frame which
|
||||
we could not do LRO on. Tell the stack that the
|
||||
checksum is good */
|
||||
|
||||
if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
|
||||
(0 == mxge_rx_csum(m, csum))) {
|
||||
/* Tell the stack that the checksum is good */
|
||||
m->m_pkthdr.csum_data = 0xffff;
|
||||
m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
|
||||
m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
|
||||
CSUM_DATA_VALID;
|
||||
|
||||
#if defined(INET) || defined (INET6)
|
||||
if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0)))
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
/* flowid only valid if RSS hashing is enabled */
|
||||
if (sc->num_slices > 1) {
|
||||
@ -2693,7 +2720,8 @@ mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
|
||||
}
|
||||
|
||||
static inline void
|
||||
mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
|
||||
mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len,
|
||||
uint32_t csum, int lro)
|
||||
{
|
||||
mxge_softc_t *sc;
|
||||
struct ifnet *ifp;
|
||||
@ -2702,7 +2730,6 @@ mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
|
||||
mxge_rx_ring_t *rx;
|
||||
bus_dmamap_t old_map;
|
||||
int idx;
|
||||
uint16_t tcpudp_csum;
|
||||
|
||||
sc = ss->sc;
|
||||
ifp = sc->ifp;
|
||||
@ -2739,14 +2766,17 @@ mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
|
||||
mxge_vlan_tag_remove(m, &csum);
|
||||
}
|
||||
/* if the checksum is valid, mark it in the mbuf header */
|
||||
if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
|
||||
if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
|
||||
return;
|
||||
/* otherwise, it was a UDP frame, or a TCP frame which
|
||||
we could not do LRO on. Tell the stack that the
|
||||
checksum is good */
|
||||
if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
|
||||
(0 == mxge_rx_csum(m, csum))) {
|
||||
/* Tell the stack that the checksum is good */
|
||||
m->m_pkthdr.csum_data = 0xffff;
|
||||
m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
|
||||
m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
|
||||
CSUM_DATA_VALID;
|
||||
|
||||
#if defined(INET) || defined (INET6)
|
||||
if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum)))
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
/* flowid only valid if RSS hashing is enabled */
|
||||
if (sc->num_slices > 1) {
|
||||
@ -2764,16 +2794,17 @@ mxge_clean_rx_done(struct mxge_slice_state *ss)
|
||||
int limit = 0;
|
||||
uint16_t length;
|
||||
uint16_t checksum;
|
||||
int lro;
|
||||
|
||||
|
||||
lro = ss->sc->ifp->if_capenable & IFCAP_LRO;
|
||||
while (rx_done->entry[rx_done->idx].length != 0) {
|
||||
length = ntohs(rx_done->entry[rx_done->idx].length);
|
||||
rx_done->entry[rx_done->idx].length = 0;
|
||||
checksum = rx_done->entry[rx_done->idx].checksum;
|
||||
if (length <= (MHLEN - MXGEFW_PAD))
|
||||
mxge_rx_done_small(ss, length, checksum);
|
||||
mxge_rx_done_small(ss, length, checksum, lro);
|
||||
else
|
||||
mxge_rx_done_big(ss, length, checksum);
|
||||
mxge_rx_done_big(ss, length, checksum, lro);
|
||||
rx_done->cnt++;
|
||||
rx_done->idx = rx_done->cnt & rx_done->mask;
|
||||
|
||||
@ -2781,11 +2812,11 @@ mxge_clean_rx_done(struct mxge_slice_state *ss)
|
||||
if (__predict_false(++limit > rx_done->mask / 2))
|
||||
break;
|
||||
}
|
||||
#ifdef INET
|
||||
while (!SLIST_EMPTY(&ss->lro_active)) {
|
||||
struct lro_entry *lro = SLIST_FIRST(&ss->lro_active);
|
||||
SLIST_REMOVE_HEAD(&ss->lro_active, next);
|
||||
mxge_lro_flush(ss, lro);
|
||||
#if defined(INET) || defined (INET6)
|
||||
while (!SLIST_EMPTY(&ss->lc.lro_active)) {
|
||||
struct lro_entry *lro = SLIST_FIRST(&ss->lc.lro_active);
|
||||
SLIST_REMOVE_HEAD(&ss->lc.lro_active, next);
|
||||
tcp_lro_flush(&ss->lc, lro);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@ -3153,15 +3184,11 @@ mxge_init(void *arg)
|
||||
static void
|
||||
mxge_free_slice_mbufs(struct mxge_slice_state *ss)
|
||||
{
|
||||
struct lro_entry *lro_entry;
|
||||
int i;
|
||||
|
||||
while (!SLIST_EMPTY(&ss->lro_free)) {
|
||||
lro_entry = SLIST_FIRST(&ss->lro_free);
|
||||
SLIST_REMOVE_HEAD(&ss->lro_free, next);
|
||||
free(lro_entry, M_DEVBUF);
|
||||
}
|
||||
|
||||
#if defined(INET) || defined(INET6)
|
||||
tcp_lro_free(&ss->lc);
|
||||
#endif
|
||||
for (i = 0; i <= ss->rx_big.mask; i++) {
|
||||
if (ss->rx_big.info[i].m == NULL)
|
||||
continue;
|
||||
@ -3545,26 +3572,17 @@ mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
|
||||
mxge_softc_t *sc;
|
||||
mxge_cmd_t cmd;
|
||||
bus_dmamap_t map;
|
||||
struct lro_entry *lro_entry;
|
||||
int err, i, slice;
|
||||
|
||||
|
||||
sc = ss->sc;
|
||||
slice = ss - sc->ss;
|
||||
|
||||
SLIST_INIT(&ss->lro_free);
|
||||
SLIST_INIT(&ss->lro_active);
|
||||
|
||||
for (i = 0; i < sc->lro_cnt; i++) {
|
||||
lro_entry = (struct lro_entry *)
|
||||
malloc(sizeof (*lro_entry), M_DEVBUF,
|
||||
M_NOWAIT | M_ZERO);
|
||||
if (lro_entry == NULL) {
|
||||
sc->lro_cnt = i;
|
||||
break;
|
||||
}
|
||||
SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next);
|
||||
}
|
||||
#if defined(INET) || defined(INET6)
|
||||
(void)tcp_lro_init(&ss->lc);
|
||||
#endif
|
||||
ss->lc.ifp = sc->ifp;
|
||||
|
||||
/* get the lanai pointers to the send and receive rings */
|
||||
|
||||
err = 0;
|
||||
@ -4219,10 +4237,8 @@ mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
|
||||
} else if (mask & IFCAP_RXCSUM) {
|
||||
if (IFCAP_RXCSUM & ifp->if_capenable) {
|
||||
ifp->if_capenable &= ~IFCAP_RXCSUM;
|
||||
sc->csum_flag = 0;
|
||||
} else {
|
||||
ifp->if_capenable |= IFCAP_RXCSUM;
|
||||
sc->csum_flag = 1;
|
||||
}
|
||||
}
|
||||
if (mask & IFCAP_TSO4) {
|
||||
@ -4249,16 +4265,12 @@ mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
|
||||
ifp->if_hwassist |= (CSUM_TCP_IPV6
|
||||
| CSUM_UDP_IPV6);
|
||||
}
|
||||
#ifdef NOTYET
|
||||
} else if (mask & IFCAP_RXCSUM6) {
|
||||
if (IFCAP_RXCSUM6 & ifp->if_capenable) {
|
||||
ifp->if_capenable &= ~IFCAP_RXCSUM6;
|
||||
sc->csum_flag = 0;
|
||||
} else if (mask & IFCAP_RXCSUM_IPV6) {
|
||||
if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) {
|
||||
ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6;
|
||||
} else {
|
||||
ifp->if_capenable |= IFCAP_RXCSUM6;
|
||||
sc->csum_flag = 1;
|
||||
ifp->if_capenable |= IFCAP_RXCSUM_IPV6;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
if (mask & IFCAP_TSO6) {
|
||||
if (IFCAP_TSO6 & ifp->if_capenable) {
|
||||
@ -4274,12 +4286,8 @@ mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
|
||||
}
|
||||
#endif /*IFCAP_TSO6 */
|
||||
|
||||
if (mask & IFCAP_LRO) {
|
||||
if (IFCAP_LRO & ifp->if_capenable)
|
||||
err = mxge_change_lro_locked(sc, 0);
|
||||
else
|
||||
err = mxge_change_lro_locked(sc, mxge_lro_cnt);
|
||||
}
|
||||
if (mask & IFCAP_LRO)
|
||||
ifp->if_capenable ^= IFCAP_LRO;
|
||||
if (mask & IFCAP_VLAN_HWTAGGING)
|
||||
ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
|
||||
if (mask & IFCAP_VLAN_HWTSO)
|
||||
@ -4326,14 +4334,11 @@ mxge_fetch_tunables(mxge_softc_t *sc)
|
||||
TUNABLE_INT_FETCH("hw.mxge.verbose",
|
||||
&mxge_verbose);
|
||||
TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
|
||||
TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt);
|
||||
TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
|
||||
TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
|
||||
TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type);
|
||||
TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
|
||||
TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle);
|
||||
if (sc->lro_cnt != 0)
|
||||
mxge_lro_cnt = sc->lro_cnt;
|
||||
|
||||
if (bootverbose)
|
||||
mxge_verbose = 1;
|
||||
@ -4897,8 +4902,9 @@ mxge_attach(device_t dev)
|
||||
|
||||
if_initbaudrate(ifp, IF_Gbps(10));
|
||||
ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
|
||||
IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6;
|
||||
#ifdef INET
|
||||
IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 |
|
||||
IFCAP_RXCSUM_IPV6;
|
||||
#if defined(INET) || defined(INET6)
|
||||
ifp->if_capabilities |= IFCAP_LRO;
|
||||
#endif
|
||||
|
||||
@ -4929,7 +4935,6 @@ mxge_attach(device_t dev)
|
||||
ifp->if_capenable = ifp->if_capabilities;
|
||||
if (sc->lro_cnt == 0)
|
||||
ifp->if_capenable &= ~IFCAP_LRO;
|
||||
sc->csum_flag = 1;
|
||||
ifp->if_init = mxge_init;
|
||||
ifp->if_softc = sc;
|
||||
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
|
||||
|
@ -194,31 +194,6 @@ typedef struct
|
||||
char mtx_name[16];
|
||||
} mxge_tx_ring_t;
|
||||
|
||||
struct lro_entry;
|
||||
struct lro_entry
|
||||
{
|
||||
SLIST_ENTRY(lro_entry) next;
|
||||
struct mbuf *m_head;
|
||||
struct mbuf *m_tail;
|
||||
int timestamp;
|
||||
struct ip *ip;
|
||||
uint32_t tsval;
|
||||
uint32_t tsecr;
|
||||
uint32_t source_ip;
|
||||
uint32_t dest_ip;
|
||||
uint32_t next_seq;
|
||||
uint32_t ack_seq;
|
||||
uint32_t len;
|
||||
uint32_t data_csum;
|
||||
uint16_t window;
|
||||
uint16_t source_port;
|
||||
uint16_t dest_port;
|
||||
uint16_t append_cnt;
|
||||
uint16_t mss;
|
||||
|
||||
};
|
||||
SLIST_HEAD(lro_head, lro_entry);
|
||||
|
||||
struct mxge_softc;
|
||||
typedef struct mxge_softc mxge_softc_t;
|
||||
|
||||
@ -236,11 +211,7 @@ struct mxge_slice_state {
|
||||
u_long omcasts;
|
||||
u_long oerrors;
|
||||
int if_drv_flags;
|
||||
struct lro_head lro_active;
|
||||
struct lro_head lro_free;
|
||||
int lro_queued;
|
||||
int lro_flushed;
|
||||
int lro_bad_csum;
|
||||
struct lro_ctrl lc;
|
||||
mxge_dma_t fw_stats_dma;
|
||||
struct sysctl_oid *sysctl_tree;
|
||||
struct sysctl_ctx_list sysctl_ctx;
|
||||
@ -250,7 +221,6 @@ struct mxge_slice_state {
|
||||
struct mxge_softc {
|
||||
struct ifnet* ifp;
|
||||
struct mxge_slice_state *ss;
|
||||
int csum_flag; /* rx_csums? */
|
||||
int tx_boundary; /* boundary transmits cannot cross*/
|
||||
int lro_cnt;
|
||||
bus_dma_tag_t parent_dmat;
|
||||
|
@ -1,357 +0,0 @@
|
||||
/******************************************************************************
|
||||
|
||||
Copyright (c) 2007-2008, Myricom Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
2. Neither the name of the Myricom Inc, nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
***************************************************************************/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/endian.h>
|
||||
#include <sys/mbuf.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/bus.h>
|
||||
|
||||
#include <net/if.h>
|
||||
#include <net/ethernet.h>
|
||||
#include <net/if_media.h>
|
||||
|
||||
#include <netinet/in_systm.h>
|
||||
#include <netinet/in.h>
|
||||
#include <netinet/ip.h>
|
||||
#include <netinet/tcp.h>
|
||||
|
||||
#include <machine/bus.h>
|
||||
#include <machine/in_cksum.h>
|
||||
|
||||
#include <dev/mxge/mxge_mcp.h>
|
||||
#include <dev/mxge/if_mxge_var.h>
|
||||
|
||||
#include "opt_inet.h"
|
||||
|
||||
#ifdef INET
|
||||
|
||||
/* Assume len is a multiple of 4 */
|
||||
static uint16_t
|
||||
mxge_csum_generic(uint16_t *raw, int len)
|
||||
{
|
||||
uint32_t csum;
|
||||
csum = 0;
|
||||
while (len > 0) {
|
||||
csum += *raw;
|
||||
raw++;
|
||||
csum += *raw;
|
||||
raw++;
|
||||
len -= 4;
|
||||
}
|
||||
csum = (csum >> 16) + (csum & 0xffff);
|
||||
csum = (csum >> 16) + (csum & 0xffff);
|
||||
return (uint16_t)csum;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
mxge_lro_flush(struct mxge_slice_state *ss, struct lro_entry *lro)
|
||||
{
|
||||
mxge_softc_t *mgp = ss->sc;
|
||||
struct ifnet *ifp;
|
||||
struct ip *ip;
|
||||
struct tcphdr *tcp;
|
||||
uint32_t *ts_ptr;
|
||||
uint32_t tcplen, tcp_csum;
|
||||
|
||||
if (lro->append_cnt) {
|
||||
/* incorporate the new len into the ip header and
|
||||
* re-calculate the checksum */
|
||||
ip = lro->ip;
|
||||
ip->ip_len = htons(lro->len - ETHER_HDR_LEN);
|
||||
ip->ip_sum = 0;
|
||||
ip->ip_sum = 0xffff ^
|
||||
mxge_csum_generic((uint16_t*)ip,
|
||||
sizeof (*ip));
|
||||
|
||||
lro->m_head->m_pkthdr.csum_flags = CSUM_IP_CHECKED |
|
||||
CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
|
||||
lro->m_head->m_pkthdr.csum_data = 0xffff;
|
||||
lro->m_head->m_pkthdr.len = lro->len;
|
||||
|
||||
/* incorporate the latest ack into the tcp header */
|
||||
tcp = (struct tcphdr *) (ip + 1);
|
||||
tcp->th_ack = lro->ack_seq;
|
||||
tcp->th_win = lro->window;
|
||||
/* incorporate latest timestamp into the tcp header */
|
||||
if (lro->timestamp) {
|
||||
ts_ptr = (uint32_t *)(tcp + 1);
|
||||
ts_ptr[1] = htonl(lro->tsval);
|
||||
ts_ptr[2] = lro->tsecr;
|
||||
}
|
||||
/*
|
||||
* update checksum in tcp header by re-calculating the
|
||||
* tcp pseudoheader checksum, and adding it to the checksum
|
||||
* of the tcp payload data
|
||||
*/
|
||||
tcp->th_sum = 0;
|
||||
tcplen = lro->len - sizeof(*ip) - ETHER_HDR_LEN;
|
||||
tcp_csum = lro->data_csum;
|
||||
tcp_csum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
|
||||
htons(tcplen + IPPROTO_TCP));
|
||||
tcp_csum += mxge_csum_generic((uint16_t*)tcp,
|
||||
tcp->th_off << 2);
|
||||
tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
|
||||
tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
|
||||
#if 0
|
||||
IOLog("pseudo = 0x%x, generic = 0x%x, sum = %x\n",
|
||||
in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
|
||||
htons(tcplen + IPPROTO_TCP)),
|
||||
mxge_csum_generic((uint16_t*)tcp,
|
||||
tcp->th_off << 2),
|
||||
htons(0xffff ^ tcp_csum));
|
||||
#endif
|
||||
tcp->th_sum = 0xffff ^ tcp_csum;
|
||||
}
|
||||
ifp = mgp->ifp;
|
||||
(*ifp->if_input)(mgp->ifp, lro->m_head);
|
||||
ss->lro_queued += lro->append_cnt + 1;
|
||||
ss->lro_flushed++;
|
||||
lro->m_head = NULL;
|
||||
lro->timestamp = 0;
|
||||
lro->append_cnt = 0;
|
||||
SLIST_INSERT_HEAD(&ss->lro_free, lro, next);
|
||||
}
|
||||
|
||||
int
|
||||
mxge_lro_rx(struct mxge_slice_state *ss, struct mbuf *m_head, uint32_t csum)
|
||||
{
|
||||
struct ether_header *eh;
|
||||
struct ip *ip;
|
||||
struct tcphdr *tcp;
|
||||
uint32_t *ts_ptr;
|
||||
struct mbuf *m_nxt, *m_tail;
|
||||
struct lro_entry *lro;
|
||||
int hlen, ip_len, tcp_hdr_len, tcp_data_len, tot_len;
|
||||
int opt_bytes, trim;
|
||||
uint32_t seq, tmp_csum, device_mtu;
|
||||
|
||||
eh = mtod(m_head, struct ether_header *);
|
||||
if (eh->ether_type != htons(ETHERTYPE_IP))
|
||||
return 1;
|
||||
ip = (struct ip *) (eh + 1);
|
||||
if (ip->ip_p != IPPROTO_TCP)
|
||||
return 1;
|
||||
|
||||
/* ensure there are no options */
|
||||
if ((ip->ip_hl << 2) != sizeof (*ip))
|
||||
return -1;
|
||||
|
||||
/* .. and the packet is not fragmented */
|
||||
if (ip->ip_off & htons(IP_MF|IP_OFFMASK))
|
||||
return -1;
|
||||
|
||||
/* verify that the IP header checksum is correct */
|
||||
tmp_csum = mxge_csum_generic((uint16_t *)ip, sizeof (*ip));
|
||||
if (__predict_false((tmp_csum ^ 0xffff) != 0)) {
|
||||
ss->lro_bad_csum++;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* find the TCP header */
|
||||
tcp = (struct tcphdr *) (ip + 1);
|
||||
|
||||
/* ensure no bits set besides ack or psh */
|
||||
if ((tcp->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
|
||||
return -1;
|
||||
|
||||
/* check for timestamps. Since the only option we handle are
|
||||
timestamps, we only have to handle the simple case of
|
||||
aligned timestamps */
|
||||
|
||||
opt_bytes = (tcp->th_off << 2) - sizeof (*tcp);
|
||||
tcp_hdr_len = sizeof (*tcp) + opt_bytes;
|
||||
ts_ptr = (uint32_t *)(tcp + 1);
|
||||
if (opt_bytes != 0) {
|
||||
if (__predict_false(opt_bytes != TCPOLEN_TSTAMP_APPA) ||
|
||||
(*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))
|
||||
return -1;
|
||||
}
|
||||
|
||||
ip_len = ntohs(ip->ip_len);
|
||||
tcp_data_len = ip_len - (tcp->th_off << 2) - sizeof (*ip);
|
||||
|
||||
|
||||
/*
|
||||
* If frame is padded beyond the end of the IP packet,
|
||||
* then we must trim the extra bytes off the end.
|
||||
*/
|
||||
tot_len = m_head->m_pkthdr.len;
|
||||
trim = tot_len - (ip_len + ETHER_HDR_LEN);
|
||||
if (trim != 0) {
|
||||
if (trim < 0) {
|
||||
/* truncated packet */
|
||||
return -1;
|
||||
}
|
||||
m_adj(m_head, -trim);
|
||||
tot_len = m_head->m_pkthdr.len;
|
||||
}
|
||||
|
||||
m_nxt = m_head;
|
||||
m_tail = NULL; /* -Wuninitialized */
|
||||
while (m_nxt != NULL) {
|
||||
m_tail = m_nxt;
|
||||
m_nxt = m_tail->m_next;
|
||||
}
|
||||
|
||||
hlen = ip_len + ETHER_HDR_LEN - tcp_data_len;
|
||||
seq = ntohl(tcp->th_seq);
|
||||
|
||||
SLIST_FOREACH(lro, &ss->lro_active, next) {
|
||||
if (lro->source_port == tcp->th_sport &&
|
||||
lro->dest_port == tcp->th_dport &&
|
||||
lro->source_ip == ip->ip_src.s_addr &&
|
||||
lro->dest_ip == ip->ip_dst.s_addr) {
|
||||
/* Try to append it */
|
||||
|
||||
if (__predict_false(seq != lro->next_seq ||
|
||||
(tcp_data_len == 0 &&
|
||||
lro->ack_seq == tcp->th_ack))) {
|
||||
/* out of order packet or dup ack */
|
||||
SLIST_REMOVE(&ss->lro_active, lro,
|
||||
lro_entry, next);
|
||||
mxge_lro_flush(ss, lro);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (opt_bytes) {
|
||||
uint32_t tsval = ntohl(*(ts_ptr + 1));
|
||||
/* make sure timestamp values are increasing */
|
||||
if (__predict_false(lro->tsval > tsval ||
|
||||
*(ts_ptr + 2) == 0)) {
|
||||
return -1;
|
||||
}
|
||||
lro->tsval = tsval;
|
||||
lro->tsecr = *(ts_ptr + 2);
|
||||
}
|
||||
|
||||
lro->next_seq += tcp_data_len;
|
||||
lro->ack_seq = tcp->th_ack;
|
||||
lro->window = tcp->th_win;
|
||||
lro->append_cnt++;
|
||||
if (tcp_data_len == 0) {
|
||||
m_freem(m_head);
|
||||
return 0;
|
||||
}
|
||||
/* subtract off the checksum of the tcp header
|
||||
* from the hardware checksum, and add it to the
|
||||
* stored tcp data checksum. Byteswap the checksum
|
||||
* if the total length so far is odd
|
||||
*/
|
||||
tmp_csum = mxge_csum_generic((uint16_t*)tcp,
|
||||
tcp_hdr_len);
|
||||
csum = csum + (tmp_csum ^ 0xffff);
|
||||
csum = (csum & 0xffff) + (csum >> 16);
|
||||
csum = (csum & 0xffff) + (csum >> 16);
|
||||
if (lro->len & 0x1) {
|
||||
/* Odd number of bytes so far, flip bytes */
|
||||
csum = ((csum << 8) | (csum >> 8)) & 0xffff;
|
||||
}
|
||||
csum = csum + lro->data_csum;
|
||||
csum = (csum & 0xffff) + (csum >> 16);
|
||||
csum = (csum & 0xffff) + (csum >> 16);
|
||||
lro->data_csum = csum;
|
||||
|
||||
lro->len += tcp_data_len;
|
||||
|
||||
/* adjust mbuf so that m->m_data points to
|
||||
the first byte of the payload */
|
||||
m_adj(m_head, hlen);
|
||||
/* append mbuf chain */
|
||||
lro->m_tail->m_next = m_head;
|
||||
/* advance the last pointer */
|
||||
lro->m_tail = m_tail;
|
||||
/* flush packet if required */
|
||||
device_mtu = ss->sc->ifp->if_mtu;
|
||||
if (lro->len > (65535 - device_mtu)) {
|
||||
SLIST_REMOVE(&ss->lro_active, lro,
|
||||
lro_entry, next);
|
||||
mxge_lro_flush(ss, lro);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (SLIST_EMPTY(&ss->lro_free))
|
||||
return -1;
|
||||
|
||||
/* start a new chain */
|
||||
lro = SLIST_FIRST(&ss->lro_free);
|
||||
SLIST_REMOVE_HEAD(&ss->lro_free, next);
|
||||
SLIST_INSERT_HEAD(&ss->lro_active, lro, next);
|
||||
lro->source_port = tcp->th_sport;
|
||||
lro->dest_port = tcp->th_dport;
|
||||
lro->source_ip = ip->ip_src.s_addr;
|
||||
lro->dest_ip = ip->ip_dst.s_addr;
|
||||
lro->next_seq = seq + tcp_data_len;
|
||||
lro->mss = tcp_data_len;
|
||||
lro->ack_seq = tcp->th_ack;
|
||||
lro->window = tcp->th_win;
|
||||
|
||||
/* save the checksum of just the TCP payload by
|
||||
* subtracting off the checksum of the TCP header from
|
||||
* the entire hardware checksum
|
||||
* Since IP header checksum is correct, checksum over
|
||||
* the IP header is -0. Substracting -0 is unnecessary.
|
||||
*/
|
||||
tmp_csum = mxge_csum_generic((uint16_t*)tcp, tcp_hdr_len);
|
||||
csum = csum + (tmp_csum ^ 0xffff);
|
||||
csum = (csum & 0xffff) + (csum >> 16);
|
||||
csum = (csum & 0xffff) + (csum >> 16);
|
||||
lro->data_csum = csum;
|
||||
|
||||
lro->ip = ip;
|
||||
/* record timestamp if it is present */
|
||||
if (opt_bytes) {
|
||||
lro->timestamp = 1;
|
||||
lro->tsval = ntohl(*(ts_ptr + 1));
|
||||
lro->tsecr = *(ts_ptr + 2);
|
||||
}
|
||||
lro->len = tot_len;
|
||||
lro->m_head = m_head;
|
||||
lro->m_tail = m_tail;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* INET */
|
||||
/*
|
||||
This file uses Myri10GE driver indentation.
|
||||
|
||||
Local Variables:
|
||||
c-file-style:"linux"
|
||||
tab-width:8
|
||||
End:
|
||||
*/
|
@ -3,6 +3,6 @@
|
||||
.PATH: ${.CURDIR}/../../../dev/mxge
|
||||
|
||||
KMOD= if_mxge
|
||||
SRCS= if_mxge.c mxge_lro.c device_if.h bus_if.h pci_if.h opt_inet.h opt_inet6.h
|
||||
SRCS= if_mxge.c device_if.h bus_if.h pci_if.h opt_inet.h opt_inet6.h
|
||||
|
||||
.include <bsd.kmod.mk>
|
||||
|
Loading…
Reference in New Issue
Block a user