tcp: Rack - in the absence of LRO fixed rate pacing (loopback or interfaces with no LRO) does not work correctly.

Rack is capable of fixed rate or dynamic rate pacing. Both of these can get mixed up when
LRO is not available. This is because LRO will hold off waking up the tcp connection to
processing the inbound packets until the pacing timer is up. Without LRO the pacing only
sort-of works. Sometimes we pace correctly, other times not so much.

This set of changes will make it so pacing works properly in the absence of LRO.

Reviewed by: tuexen
Sponsored by: Netflix Inc
Differential Revision:https://reviews.freebsd.org/D39494
This commit is contained in:
Randall Stewart 2023-04-10 16:33:56 -04:00
parent c3dbcadca2
commit a2b33c9a7a

View File

@ -16433,6 +16433,9 @@ rack_do_compressed_ack_processing(struct tcpcb *tp, struct socket *so, struct mb
return (0);
}
#define TCP_LRO_TS_OPTION \
ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | \
(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)
static int
rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
@ -16458,6 +16461,8 @@ rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
struct tcp_rack *rack;
struct rack_sendmap *rsm;
int32_t prev_state = 0;
int no_output = 0;
int slot_remaining = 0;
#ifdef TCP_ACCOUNTING
int ack_val_set = 0xf;
#endif
@ -16480,6 +16485,43 @@ rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
*/
rack_deferred_init(tp, rack);
}
/*
* Check to see if we need to skip any output plans. This
* can happen in the non-LRO path where we are pacing and
* must process the ack coming in but need to defer sending
* anything becase a pacing timer is running.
*/
us_cts = tcp_tv_to_usectick(tv);
if ((rack->rc_always_pace == 1) &&
(rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) &&
(TSTMP_LT(us_cts, rack->r_ctl.rc_last_output_to))) {
/*
* Ok conditions are right for queuing the packets
* but we do have to check the flags in the inp, it
* could be, if a sack is present, we want to be awoken and
* so should process the packets.
*/
slot_remaining = rack->r_ctl.rc_last_output_to - us_cts;
if (rack->rc_inp->inp_flags2 & INP_DONT_SACK_QUEUE) {
no_output = 1;
} else {
/*
* If there is no options, or just a
* timestamp option, we will want to queue
* the packets. This is the same that LRO does
* and will need to change with accurate ECN.
*/
uint32_t *ts_ptr;
int optlen;
optlen = (th->th_off << 2) - sizeof(struct tcphdr);
ts_ptr = (uint32_t *)(th + 1);
if ((optlen == 0) ||
((optlen == TCPOLEN_TSTAMP_APPA) &&
(*ts_ptr == TCP_LRO_TS_OPTION)))
no_output = 1;
}
}
if (m->m_flags & M_ACKCMP) {
/*
* All compressed ack's are ack's by definition so
@ -16914,7 +16956,7 @@ rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
}
}
#endif
if (nxt_pkt == 0) {
if ((nxt_pkt == 0) && (no_output == 0)) {
if ((rack->r_wanted_output != 0) || (rack->r_fast_output != 0)) {
do_output_now:
if (tcp_output(tp) < 0) {
@ -16927,6 +16969,16 @@ rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
}
rack_start_hpts_timer(rack, tp, cts, 0, 0, 0);
rack_free_trim(rack);
} else if ((no_output == 1) &&
(nxt_pkt == 0) &&
(tcp_in_hpts(rack->rc_inp) == 0)) {
/*
* We are not in hpts and we had a pacing timer up. Use
* the remaining time (slot_remaining) to restart the timer.
*/
KASSERT ((slot_remaining != 0), ("slot remaining is zero for rack:%p tp:%p", rack, tp));
rack_start_hpts_timer(rack, tp, cts, slot_remaining, 0, 0);
rack_free_trim(rack);
}
/* Update any rounds needed */
if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp))