e936121d31
- Add optimizing LRO wrapper which pre-sorts all incoming packets according to the hash type and flowid. This prevents exhaustion of the LRO entries due to too many connections at the same time. Testing using a larger number of higher bandwidth TCP connections showed that the incoming ACK packet aggregation rate increased from ~1.3:1 to almost 3:1. Another test showed that for a number of TCP connections greater than 16 per hardware receive ring, where 8 TCP connections was the LRO active entry limit, there was a significant improvement in throughput due to being able to fully aggregate more than 8 TCP stream. For very few very high bandwidth TCP streams, the optimizing LRO wrapper will add CPU usage instead of reducing CPU usage. This is expected. Network drivers which want to use the optimizing LRO wrapper needs to call "tcp_lro_queue_mbuf()" instead of "tcp_lro_rx()" and "tcp_lro_flush_all()" instead of "tcp_lro_flush()". Further the LRO control structure must be initialized using "tcp_lro_init_args()" passing a non-zero number into the "lro_mbufs" argument. - Make LRO statistics 64-bit. Previously 32-bit integers were used for statistics which can be prone to wrap-around. Fix this while at it and update all SYSCTL's which expose LRO statistics. - Ensure all data is freed when destroying a LRO control structures, especially leftover LRO entries. - Reduce number of memory allocations needed when setting up a LRO control structure by precomputing the total amount of memory needed. - Add own memory allocation counter for LRO. - Bump the FreeBSD version to force recompilation of all KLDs due to change of the LRO control structure size. Sponsored by: Mellanox Technologies Reviewed by: gallatin, sbruno, rrs, gnn, transport Tested by: Netflix Differential Revision: https://reviews.freebsd.org/D4914
112 lines
3.4 KiB
C
112 lines
3.4 KiB
C
/*-
|
|
* Copyright (c) 2006, Myricom Inc.
|
|
* Copyright (c) 2008, Intel Corporation.
|
|
* Copyright (c) 2016 Mellanox Technologies.
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*
|
|
* $FreeBSD$
|
|
*/
|
|
|
|
#ifndef _TCP_LRO_H_
|
|
#define _TCP_LRO_H_
|
|
|
|
#include <sys/time.h>
|
|
|
|
#ifndef TCP_LRO_ENTRIES
|
|
/* Define default number of LRO entries per RX queue */
|
|
#define TCP_LRO_ENTRIES 8
|
|
#endif
|
|
|
|
#define TCP_LRO_SEQUENCE(mb) \
|
|
(mb)->m_pkthdr.PH_loc.thirtytwo[0]
|
|
|
|
struct lro_entry
|
|
{
|
|
SLIST_ENTRY(lro_entry) next;
|
|
struct mbuf *m_head;
|
|
struct mbuf *m_tail;
|
|
union {
|
|
struct ip *ip4;
|
|
struct ip6_hdr *ip6;
|
|
} leip;
|
|
union {
|
|
in_addr_t s_ip4;
|
|
struct in6_addr s_ip6;
|
|
} lesource;
|
|
union {
|
|
in_addr_t d_ip4;
|
|
struct in6_addr d_ip6;
|
|
} ledest;
|
|
uint16_t source_port;
|
|
uint16_t dest_port;
|
|
uint16_t eh_type; /* EthernetHeader type. */
|
|
uint16_t append_cnt;
|
|
uint32_t p_len; /* IP header payload length. */
|
|
uint32_t ulp_csum; /* TCP, etc. checksum. */
|
|
uint32_t next_seq; /* tcp_seq */
|
|
uint32_t ack_seq; /* tcp_seq */
|
|
uint32_t tsval;
|
|
uint32_t tsecr;
|
|
uint16_t window;
|
|
uint16_t timestamp; /* flag, not a TCP hdr field. */
|
|
struct timeval mtime;
|
|
};
|
|
SLIST_HEAD(lro_head, lro_entry);
|
|
|
|
#define le_ip4 leip.ip4
|
|
#define le_ip6 leip.ip6
|
|
#define source_ip4 lesource.s_ip4
|
|
#define dest_ip4 ledest.d_ip4
|
|
#define source_ip6 lesource.s_ip6
|
|
#define dest_ip6 ledest.d_ip6
|
|
|
|
/* NB: This is part of driver structs. */
|
|
struct lro_ctrl {
|
|
struct ifnet *ifp;
|
|
struct mbuf **lro_mbuf_data;
|
|
uint64_t lro_queued;
|
|
uint64_t lro_flushed;
|
|
uint64_t lro_bad_csum;
|
|
unsigned lro_cnt;
|
|
unsigned lro_mbuf_count;
|
|
unsigned lro_mbuf_max;
|
|
|
|
struct lro_head lro_active;
|
|
struct lro_head lro_free;
|
|
};
|
|
|
|
int tcp_lro_init(struct lro_ctrl *);
|
|
int tcp_lro_init_args(struct lro_ctrl *, struct ifnet *, unsigned, unsigned);
|
|
void tcp_lro_free(struct lro_ctrl *);
|
|
void tcp_lro_flush_inactive(struct lro_ctrl *, const struct timeval *);
|
|
void tcp_lro_flush(struct lro_ctrl *, struct lro_entry *);
|
|
void tcp_lro_flush_all(struct lro_ctrl *);
|
|
int tcp_lro_rx(struct lro_ctrl *, struct mbuf *, uint32_t);
|
|
void tcp_lro_queue_mbuf(struct lro_ctrl *, struct mbuf *);
|
|
|
|
#define TCP_LRO_CANNOT -1
|
|
#define TCP_LRO_NOT_SUPPORTED 1
|
|
|
|
#endif /* _TCP_LRO_H_ */
|