Update the LRO processing code so that we can support

a further CPU enhancements for compressed acks. These
are acks that are compressed into an mbuf. The transport
has to be aware of how to process these, and an upcoming
update to rack will do so. You need the rack changes
to actually test and validate these since if the transport
does not support mbuf compression, then the old code paths
stay in place. We do in this commit take out the concept
of logging if you don't have a lock (which was quite
dangerous and was only for some early debugging but has
been left in the code).

Sponsored by: Netflix Inc.
Differential Revision: https://reviews.freebsd.org/D28374
This commit is contained in:
Randall Stewart 2021-01-27 12:09:32 -05:00
parent 4a7d84058d
commit 69a34e8d02
6 changed files with 830 additions and 124 deletions

View File

@ -731,8 +731,8 @@ int inp_so_options(const struct inpcb *inp);
/*
* Flags for inp_flags2.
*/
#define INP_2UNUSED1 0x00000001
#define INP_2UNUSED2 0x00000002
#define INP_MBUF_L_ACKS 0x00000001 /* We need large mbufs for ack compression */
#define INP_MBUF_ACKCMP 0x00000002 /* TCP mbuf ack compression ok */
#define INP_PCBGROUPWILD 0x00000004 /* in pcbgroup wildcard list */
#define INP_REUSEPORT 0x00000008 /* SO_REUSEPORT option is set */
#define INP_FREED 0x00000010 /* inp itself is not valid */

File diff suppressed because it is too large Load Diff

View File

@ -40,6 +40,29 @@
#define TCP_LRO_ENTRIES 8
#endif
/*
* Flags for ACK entry for compression
* the bottom 8 bits has the th_flags.
* LRO itself adds only the TSTMP flags
* to indicate if either of the types
* of timestamps are filled and the
* HAS_TSTMP option to indicate if the
* TCP timestamp option is valid.
*
* The other 5 flag bits are for processing
* by a stack.
*
*/
#define TSTMP_LRO 0x0100
#define TSTMP_HDWR 0x0200
#define HAS_TSTMP 0x0400
/* Flags in LRO entry */
#define CAN_USE_ACKCMP 0x0001
#define HAS_COMP_ENTRIES 0x0002
struct inpcb;
struct lro_entry {
LIST_ENTRY(lro_entry) next;
LIST_ENTRY(lro_entry) hash_next;
@ -47,6 +70,7 @@ struct lro_entry {
struct mbuf *m_tail;
struct mbuf *m_last_mbuf;
struct mbuf *m_prev_last;
struct inpcb *inp;
union {
struct ip *ip4;
struct ip6_hdr *ip6;
@ -75,6 +99,9 @@ struct lro_entry {
uint16_t need_wakeup;
uint16_t mbuf_cnt; /* Count of mbufs collected see note */
uint16_t mbuf_appended;
uint16_t cmp_ack_cnt;
uint16_t flags;
uint16_t strip_cnt;
struct timeval mtime;
};
/*
@ -103,6 +130,7 @@ struct lro_mbuf_sort {
struct lro_ctrl {
struct ifnet *ifp;
struct lro_mbuf_sort *lro_mbuf_data;
struct timeval lro_last_flush;
uint64_t lro_queued;
uint64_t lro_flushed;
uint64_t lro_bad_csum;
@ -118,6 +146,23 @@ struct lro_ctrl {
struct lro_head lro_free;
};
struct tcp_ackent {
uint64_t timestamp; /* hardware or sofware timestamp, valid if TSTMP_LRO or TSTMP_HDRW set */
uint32_t seq; /* th_seq value */
uint32_t ack; /* th_ack value */
uint32_t ts_value; /* If ts option value, valid if HAS_TSTMP is set */
uint32_t ts_echo; /* If ts option echo, valid if HAS_TSTMP is set */
uint16_t win; /* TCP window */
uint16_t flags; /* Flags to say if TS is present and type of timestamp and th_flags */
uint8_t codepoint; /* IP level codepoint including ECN bits */
uint8_t ack_val_set; /* Classification of ack used by the stack */
uint8_t pad[2]; /* To 32 byte boundary */
};
/* We use two M_PROTO on the mbuf */
#define M_ACKCMP M_PROTO4 /* Indicates LRO is sending in a Ack-compression mbuf */
#define M_LRO_EHDRSTRP M_PROTO6 /* Indicates that LRO has stripped the etherenet header */
#define TCP_LRO_LENGTH_MAX 65535
#define TCP_LRO_ACKCNT_MAX 65535 /* unlimited */

View File

@ -1236,6 +1236,13 @@ tcp_init(void)
tcp_inp_lro_single_push = counter_u64_alloc(M_WAITOK);
tcp_inp_lro_locks_taken = counter_u64_alloc(M_WAITOK);
tcp_inp_lro_sack_wake = counter_u64_alloc(M_WAITOK);
tcp_extra_mbuf = counter_u64_alloc(M_WAITOK);
tcp_would_have_but = counter_u64_alloc(M_WAITOK);
tcp_comp_total = counter_u64_alloc(M_WAITOK);
tcp_uncomp_total = counter_u64_alloc(M_WAITOK);
tcp_csum_hardware = counter_u64_alloc(M_WAITOK);
tcp_csum_hardware_w_ph = counter_u64_alloc(M_WAITOK);
tcp_csum_software = counter_u64_alloc(M_WAITOK);
#ifdef TCPPCAP
tcp_pcap_init();
#endif

View File

@ -984,6 +984,13 @@ extern counter_u64_t tcp_inp_lro_compressed;
extern counter_u64_t tcp_inp_lro_single_push;
extern counter_u64_t tcp_inp_lro_locks_taken;
extern counter_u64_t tcp_inp_lro_sack_wake;
extern counter_u64_t tcp_extra_mbuf;
extern counter_u64_t tcp_would_have_but;
extern counter_u64_t tcp_comp_total;
extern counter_u64_t tcp_uncomp_total;
extern counter_u64_t tcp_csum_hardware;
extern counter_u64_t tcp_csum_hardware_w_ph;
extern counter_u64_t tcp_csum_software;
#ifdef NETFLIX_EXP_DETECTION
/* Various SACK attack thresholds */

View File

@ -205,6 +205,7 @@ struct pkthdr {
#define csum_data PH_per.thirtytwo[1] /* inbound from hardware up */
#define lro_len PH_loc.sixteen[0] /* inbound during LRO (no reassembly) */
#define lro_csum PH_loc.sixteen[1] /* inbound during LRO (no reassembly) */
#define lro_etype PH_loc.sixteen[2] /* inbound during LRO (no reassembly) */
/* Note PH_loc is used during IP reassembly (all 8 bytes as a ptr) */
/*