diff --git a/sys/dev/cxgb/cxgb_sge.c b/sys/dev/cxgb/cxgb_sge.c index a7499001af30..d70a94877fcd 100644 --- a/sys/dev/cxgb/cxgb_sge.c +++ b/sys/dev/cxgb/cxgb_sge.c @@ -3579,11 +3579,11 @@ t3_add_configured_sysctls(adapter_t *sc) CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL], 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue"); - SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued", + SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_queued", CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL); - SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed", + SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_flushed", CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL); - SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum", + SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_bad_csum", CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL); SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt", CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL); diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c index 743a399074c0..b43cd5fdf108 100644 --- a/sys/dev/cxgbe/t4_sge.c +++ b/sys/dev/cxgbe/t4_sge.c @@ -2939,9 +2939,9 @@ alloc_rxq(struct vi_info *vi, struct sge_rxq *rxq, int intr_idx, int idx, CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.cidx, 0, sysctl_uint16, "I", "consumer index"); #if defined(INET) || defined(INET6) - SYSCTL_ADD_INT(&vi->ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD, + SYSCTL_ADD_U64(&vi->ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD, &rxq->lro.lro_queued, 0, NULL); - SYSCTL_ADD_INT(&vi->ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD, + SYSCTL_ADD_U64(&vi->ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD, &rxq->lro.lro_flushed, 0, NULL); #endif SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD, diff --git a/sys/dev/e1000/if_igb.c b/sys/dev/e1000/if_igb.c index 5b172dfe017f..5be7836cb8f8 100644 --- a/sys/dev/e1000/if_igb.c +++ b/sys/dev/e1000/if_igb.c @@ -5914,10 +5914,10 @@ igb_add_hw_stats(struct adapter *adapter) SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, &rxr->rx_bytes, "Queue Bytes Received"); - SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued", + SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_queued", CTLFLAG_RD, &lro->lro_queued, 0, "LRO Queued"); - SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed", + SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_flushed", CTLFLAG_RD, &lro->lro_flushed, 0, "LRO Flushed"); } diff --git a/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c b/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c index fd23db90e9d7..29ceb2ab01ab 100644 --- a/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c +++ b/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c @@ -405,9 +405,9 @@ netvsc_attach(device_t dev) ctx = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); - SYSCTL_ADD_INT(ctx, child, OID_AUTO, "lro_queued", + SYSCTL_ADD_U64(ctx, child, OID_AUTO, "lro_queued", CTLFLAG_RW, &sc->hn_lro.lro_queued, 0, "LRO queued"); - SYSCTL_ADD_INT(ctx, child, OID_AUTO, "lro_flushed", + SYSCTL_ADD_U64(ctx, child, OID_AUTO, "lro_flushed", CTLFLAG_RW, &sc->hn_lro.lro_flushed, 0, "LRO flushed"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "lro_tried", CTLFLAG_RW, &sc->hn_lro_tried, "# of LRO tries"); diff --git a/sys/dev/ixgbe/if_ix.c b/sys/dev/ixgbe/if_ix.c index 90d70d98a464..67cc4bae2ce2 100644 --- a/sys/dev/ixgbe/if_ix.c +++ b/sys/dev/ixgbe/if_ix.c @@ -4476,10 +4476,10 @@ ixgbe_add_hw_stats(struct adapter *adapter) SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_copies", CTLFLAG_RD, &rxr->rx_copies, "Copied RX Frames"); - SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued", + SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_queued", CTLFLAG_RD, &lro->lro_queued, 0, "LRO Queued"); - SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed", + SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_flushed", CTLFLAG_RD, &lro->lro_flushed, 0, "LRO Flushed"); } diff --git a/sys/dev/ixgbe/if_ixv.c b/sys/dev/ixgbe/if_ixv.c index 20358ad1a9b0..7b6e3d136e78 100644 --- a/sys/dev/ixgbe/if_ixv.c +++ b/sys/dev/ixgbe/if_ixv.c @@ -2167,10 +2167,10 @@ ixv_print_debug_info(struct adapter *adapter) rxr->me, (long long)rxr->rx_packets); device_printf(dev,"RX(%d) Bytes Received: %lu\n", rxr->me, (long)rxr->rx_bytes); - device_printf(dev,"RX(%d) LRO Queued= %d\n", - rxr->me, lro->lro_queued); - device_printf(dev,"RX(%d) LRO Flushed= %d\n", - rxr->me, lro->lro_flushed); + device_printf(dev,"RX(%d) LRO Queued= %lld\n", + rxr->me, (long long)lro->lro_queued); + device_printf(dev,"RX(%d) LRO Flushed= %lld\n", + rxr->me, (long long)lro->lro_flushed); device_printf(dev,"TX(%d) Packets Sent: %lu\n", txr->me, (long)txr->total_packets); device_printf(dev,"TX(%d) NO Desc Avail: %lu\n", diff --git a/sys/dev/mxge/if_mxge.c b/sys/dev/mxge/if_mxge.c index 983caa30bc29..ba2af0c26d35 100644 --- a/sys/dev/mxge/if_mxge.c +++ b/sys/dev/mxge/if_mxge.c @@ -1637,15 +1637,15 @@ mxge_add_sysctls(mxge_softc_t *sc) "rx_big_cnt", CTLFLAG_RD, &ss->rx_big.cnt, 0, "rx_small_cnt"); - SYSCTL_ADD_INT(ctx, children, OID_AUTO, + SYSCTL_ADD_U64(ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed, 0, "number of lro merge queues flushed"); - SYSCTL_ADD_INT(ctx, children, OID_AUTO, + SYSCTL_ADD_U64(ctx, children, OID_AUTO, "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum, 0, "number of bad csums preventing LRO"); - SYSCTL_ADD_INT(ctx, children, OID_AUTO, + SYSCTL_ADD_U64(ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued, 0, "number of frames appended to lro merge" "queues"); diff --git a/sys/netinet/tcp_lro.c b/sys/netinet/tcp_lro.c index 83ab2e2072d4..566334348810 100644 --- a/sys/netinet/tcp_lro.c +++ b/sys/netinet/tcp_lro.c @@ -2,6 +2,7 @@ * Copyright (c) 2007, Myricom Inc. * Copyright (c) 2008, Intel Corporation. * Copyright (c) 2012 The FreeBSD Foundation + * Copyright (c) 2016 Mellanox Technologies. * All rights reserved. * * Portions of this software were developed by Bjoern Zeeb @@ -58,9 +59,7 @@ __FBSDID("$FreeBSD$"); #include -#ifndef LRO_ENTRIES -#define LRO_ENTRIES 8 /* # of LRO entries per RX queue. */ -#endif +static MALLOC_DEFINE(M_LRO, "LRO", "LRO control structures"); #define TCP_LRO_UPDATE_CSUM 1 #ifndef TCP_LRO_UPDATE_CSUM @@ -69,43 +68,74 @@ __FBSDID("$FreeBSD$"); int tcp_lro_init(struct lro_ctrl *lc) +{ + return (tcp_lro_init_args(lc, NULL, TCP_LRO_ENTRIES, 0)); +} + +int +tcp_lro_init_args(struct lro_ctrl *lc, struct ifnet *ifp, + unsigned lro_entries, unsigned lro_mbufs) { struct lro_entry *le; - int error, i; + size_t size; + unsigned i; lc->lro_bad_csum = 0; lc->lro_queued = 0; lc->lro_flushed = 0; lc->lro_cnt = 0; + lc->lro_mbuf_count = 0; + lc->lro_mbuf_max = lro_mbufs; + lc->lro_cnt = lro_entries; + lc->ifp = ifp; SLIST_INIT(&lc->lro_free); SLIST_INIT(&lc->lro_active); - error = 0; - for (i = 0; i < LRO_ENTRIES; i++) { - le = (struct lro_entry *)malloc(sizeof(*le), M_DEVBUF, - M_NOWAIT | M_ZERO); - if (le == NULL) { - if (i == 0) - error = ENOMEM; - break; - } - lc->lro_cnt = i + 1; - SLIST_INSERT_HEAD(&lc->lro_free, le, next); - } + /* compute size to allocate */ + size = (lro_mbufs * sizeof(struct mbuf *)) + + (lro_entries * sizeof(*le)); + lc->lro_mbuf_data = (struct mbuf **) + malloc(size, M_LRO, M_NOWAIT | M_ZERO); - return (error); + /* check for out of memory */ + if (lc->lro_mbuf_data == NULL) { + memset(lc, 0, sizeof(*lc)); + return (ENOMEM); + } + /* compute offset for LRO entries */ + le = (struct lro_entry *) + (lc->lro_mbuf_data + lro_mbufs); + + /* setup linked list */ + for (i = 0; i != lro_entries; i++) + SLIST_INSERT_HEAD(&lc->lro_free, le + i, next); + + return (0); } void tcp_lro_free(struct lro_ctrl *lc) { struct lro_entry *le; + unsigned x; - while (!SLIST_EMPTY(&lc->lro_free)) { - le = SLIST_FIRST(&lc->lro_free); - SLIST_REMOVE_HEAD(&lc->lro_free, next); - free(le, M_DEVBUF); + /* reset LRO free list */ + SLIST_INIT(&lc->lro_free); + + /* free active mbufs, if any */ + while ((le = SLIST_FIRST(&lc->lro_active)) != NULL) { + SLIST_REMOVE_HEAD(&lc->lro_active, next); + m_freem(le->m_head); } + + /* free mbuf array, if any */ + for (x = 0; x != lc->lro_mbuf_count; x++) + m_freem(lc->lro_mbuf_data[x]); + lc->lro_mbuf_count = 0; + + /* free allocated memory, if any */ + free(lc->lro_mbuf_data, M_LRO); + lc->lro_mbuf_data = NULL; } #ifdef TCP_LRO_UPDATE_CSUM @@ -305,6 +335,83 @@ tcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *le) SLIST_INSERT_HEAD(&lc->lro_free, le, next); } +static int +tcp_lro_mbuf_compare_header(const void *ppa, const void *ppb) +{ + const struct mbuf *ma = *((const struct mbuf * const *)ppa); + const struct mbuf *mb = *((const struct mbuf * const *)ppb); + int ret; + + ret = M_HASHTYPE_GET(ma) - M_HASHTYPE_GET(mb); + if (ret != 0) + goto done; + + ret = ma->m_pkthdr.flowid - mb->m_pkthdr.flowid; + if (ret != 0) + goto done; + + ret = TCP_LRO_SEQUENCE(ma) - TCP_LRO_SEQUENCE(mb); +done: + return (ret); +} + +void +tcp_lro_flush_all(struct lro_ctrl *lc) +{ + struct lro_entry *le; + uint32_t hashtype; + uint32_t flowid; + unsigned x; + + /* check if no mbufs to flush */ + if (__predict_false(lc->lro_mbuf_count == 0)) + goto done; + + /* sort all mbufs according to stream */ + qsort(lc->lro_mbuf_data, lc->lro_mbuf_count, sizeof(struct mbuf *), + &tcp_lro_mbuf_compare_header); + + /* input data into LRO engine, stream by stream */ + flowid = 0; + hashtype = M_HASHTYPE_NONE; + for (x = 0; x != lc->lro_mbuf_count; x++) { + struct mbuf *mb; + + mb = lc->lro_mbuf_data[x]; + + /* check for new stream */ + if (mb->m_pkthdr.flowid != flowid || + M_HASHTYPE_GET(mb) != hashtype) { + flowid = mb->m_pkthdr.flowid; + hashtype = M_HASHTYPE_GET(mb); + + /* flush active streams */ + while ((le = SLIST_FIRST(&lc->lro_active)) != NULL) { + SLIST_REMOVE_HEAD(&lc->lro_active, next); + tcp_lro_flush(lc, le); + } + } +#ifdef TCP_LRO_RESET_SEQUENCE + /* reset sequence number */ + TCP_LRO_SEQUENCE(mb) = 0; +#endif + /* add packet to LRO engine */ + if (tcp_lro_rx(lc, mb, 0) != 0) { + /* input packet to network layer */ + (*lc->ifp->if_input)(lc->ifp, mb); + lc->lro_queued++; + lc->lro_flushed++; + } + } +done: + /* flush active streams */ + while ((le = SLIST_FIRST(&lc->lro_active)) != NULL) { + SLIST_REMOVE_HEAD(&lc->lro_active, next); + tcp_lro_flush(lc, le); + } + lc->lro_mbuf_count = 0; +} + #ifdef INET6 static int tcp_lro_rx_ipv6(struct lro_ctrl *lc, struct mbuf *m, struct ip6_hdr *ip6, @@ -633,4 +740,37 @@ tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum) return (0); } +void +tcp_lro_queue_mbuf(struct lro_ctrl *lc, struct mbuf *mb) +{ + /* sanity checks */ + if (__predict_false(lc->ifp == NULL || lc->lro_mbuf_data == NULL || + lc->lro_mbuf_max == 0)) { + /* packet drop */ + m_freem(mb); + return; + } + + /* check if packet is not LRO capable */ + if (__predict_false(mb->m_pkthdr.csum_flags == 0 || + (lc->ifp->if_capenable & IFCAP_LRO) == 0)) { + lc->lro_flushed++; + lc->lro_queued++; + + /* input packet to network layer */ + (*lc->ifp->if_input) (lc->ifp, mb); + return; + } + + /* check if array is full */ + if (__predict_false(lc->lro_mbuf_count == lc->lro_mbuf_max)) + tcp_lro_flush_all(lc); + + /* store sequence number */ + TCP_LRO_SEQUENCE(mb) = lc->lro_mbuf_count; + + /* enter mbuf */ + lc->lro_mbuf_data[lc->lro_mbuf_count++] = mb; +} + /* end */ diff --git a/sys/netinet/tcp_lro.h b/sys/netinet/tcp_lro.h index ab6d74ac900b..48c679dec340 100644 --- a/sys/netinet/tcp_lro.h +++ b/sys/netinet/tcp_lro.h @@ -1,6 +1,7 @@ /*- * Copyright (c) 2006, Myricom Inc. * Copyright (c) 2008, Intel Corporation. + * Copyright (c) 2016 Mellanox Technologies. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,6 +33,14 @@ #include +#ifndef TCP_LRO_ENTRIES +/* Define default number of LRO entries per RX queue */ +#define TCP_LRO_ENTRIES 8 +#endif + +#define TCP_LRO_SEQUENCE(mb) \ + (mb)->m_pkthdr.PH_loc.thirtytwo[0] + struct lro_entry { SLIST_ENTRY(lro_entry) next; @@ -75,20 +84,26 @@ SLIST_HEAD(lro_head, lro_entry); /* NB: This is part of driver structs. */ struct lro_ctrl { struct ifnet *ifp; - int lro_queued; - int lro_flushed; - int lro_bad_csum; - int lro_cnt; + struct mbuf **lro_mbuf_data; + uint64_t lro_queued; + uint64_t lro_flushed; + uint64_t lro_bad_csum; + unsigned lro_cnt; + unsigned lro_mbuf_count; + unsigned lro_mbuf_max; struct lro_head lro_active; struct lro_head lro_free; }; int tcp_lro_init(struct lro_ctrl *); +int tcp_lro_init_args(struct lro_ctrl *, struct ifnet *, unsigned, unsigned); void tcp_lro_free(struct lro_ctrl *); void tcp_lro_flush_inactive(struct lro_ctrl *, const struct timeval *); void tcp_lro_flush(struct lro_ctrl *, struct lro_entry *); +void tcp_lro_flush_all(struct lro_ctrl *); int tcp_lro_rx(struct lro_ctrl *, struct mbuf *, uint32_t); +void tcp_lro_queue_mbuf(struct lro_ctrl *, struct mbuf *); #define TCP_LRO_CANNOT -1 #define TCP_LRO_NOT_SUPPORTED 1 diff --git a/sys/sys/param.h b/sys/sys/param.h index 6e25339e1f33..be0e73712631 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -58,7 +58,7 @@ * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 1100094 /* Master, propagated to newvers */ +#define __FreeBSD_version 1100095 /* Master, propagated to newvers */ /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,