TCP: remove special treatment for hardware (ifnet) TLS

Remove most special treatment for ifnet TLS in the TCP stack, except
for code to avoid mixing handshakes and bulk data.

This code made heroic efforts to send down entire TLS records to
NICs. It was added to improve the PCIe bus efficiency of older TLS
offload NICs which did not keep state per-session, and so would need
to re-DMA the first part(s) of a TLS record if a TLS record was sent
in multiple TCP packets or TSOs. Newer TLS offload NICs do not need
this feature.

At Netflix, we've run extensive QoE tests which show that this feature
reduces client quality metrics, presumably because the effort to send
TLS records atomically causes the server to both wait too long to send
data (leading to buffers running dry), and to send too much data at
once (leading to packet loss).

Reviewed by:	hselasky,  jhb, rrs
Sponsored by:	Netflix
Differential Revision:	https://reviews.freebsd.org/D26103
This commit is contained in:
Andrew Gallatin 2020-08-19 17:59:06 +00:00
parent 697718b9b6
commit b99781834f
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=364405
3 changed files with 21 additions and 301 deletions

View File

@ -1957,17 +1957,6 @@ tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen,
*pkthdrlen = len_cp;
break;
}
/*
* Don't end a send in the middle of a TLS
* record if it spans multiple TLS records.
*/
if (tls != NULL && (m != start) && len < m->m_len) {
*plen = len_cp;
if (pkthdrlen != NULL)
*pkthdrlen = len_cp;
break;
}
}
#endif
mlen = min(len, m->m_len - off);

View File

@ -38,7 +38,6 @@ __FBSDID("$FreeBSD$");
#include "opt_ipsec.h"
#include "opt_tcpdebug.h"
#include "opt_ratelimit.h"
#include "opt_kern_tls.h"
#include <sys/param.h>
#include <sys/arb.h>
#include <sys/module.h>
@ -52,9 +51,6 @@ __FBSDID("$FreeBSD$");
#include <sys/proc.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#ifdef KERN_TLS
#include <sys/ktls.h>
#endif
#include <sys/sysctl.h>
#include <sys/systm.h>
#ifdef STATS
@ -4600,15 +4596,6 @@ bbr_timeout_tlp(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts)
bbr_set_state(tp, bbr, 0);
BBR_STAT_INC(bbr_tlp_tot);
maxseg = tp->t_maxseg - bbr->rc_last_options;
#ifdef KERN_TLS
if (bbr->rc_inp->inp_socket->so_snd.sb_flags & SB_TLS_IFNET) {
/*
* For hardware TLS we do *not* want to send
* new data.
*/
goto need_retran;
}
#endif
/*
* A TLP timer has expired. We have been idle for 2 rtts. So we now
* need to figure out how to force a full MSS segment out.
@ -5802,8 +5789,6 @@ tcp_bbr_tso_size_check(struct tcp_bbr *bbr, uint32_t cts)
* Note we do set anything TSO size until we are past the initial
* window. Before that we gnerally use either a single MSS
* or we use the full IW size (so we burst a IW at a time)
* Also note that Hardware-TLS is special and does alternate
* things to minimize PCI Bus Bandwidth use.
*/
if (bbr->rc_tp->t_maxseg > bbr->rc_last_options) {
@ -5811,19 +5796,12 @@ tcp_bbr_tso_size_check(struct tcp_bbr *bbr, uint32_t cts)
} else {
maxseg = BBR_MIN_SEG - bbr->rc_last_options;
}
#ifdef KERN_TLS
if (bbr->rc_inp->inp_socket->so_snd.sb_flags & SB_TLS_IFNET) {
tls_seg = ctf_get_opt_tls_size(bbr->rc_inp->inp_socket, bbr->rc_tp->snd_wnd);
bbr->r_ctl.rc_pace_min_segs = (tls_seg + bbr->rc_last_options);
}
#endif
old_tso = bbr->r_ctl.rc_pace_max_segs;
if (bbr->rc_past_init_win == 0) {
/*
* Not enough data has been acknowledged to make a
* judgement unless we are hardware TLS. Set up
* the initial TSO based on if we are sending a
* full IW at once or not.
* judgement. Set up the initial TSO based on if we
* are sending a full IW at once or not.
*/
if (bbr->rc_use_google)
bbr->r_ctl.rc_pace_max_segs = ((bbr->rc_tp->t_maxseg - bbr->rc_last_options) * 2);
@ -5833,22 +5811,10 @@ tcp_bbr_tso_size_check(struct tcp_bbr *bbr, uint32_t cts)
bbr->r_ctl.rc_pace_max_segs = bbr->rc_tp->t_maxseg - bbr->rc_last_options;
if (bbr->r_ctl.rc_pace_min_segs != bbr->rc_tp->t_maxseg)
bbr->r_ctl.rc_pace_min_segs = bbr->rc_tp->t_maxseg;
#ifdef KERN_TLS
if ((bbr->rc_inp->inp_socket->so_snd.sb_flags & SB_TLS_IFNET) && tls_seg) {
/*
* For hardware TLS we set our min to the tls_seg size.
*/
bbr->r_ctl.rc_pace_max_segs = tls_seg;
bbr->r_ctl.rc_pace_min_segs = tls_seg + bbr->rc_last_options;
}
#endif
if (bbr->r_ctl.rc_pace_max_segs == 0) {
bbr->r_ctl.rc_pace_max_segs = maxseg;
}
bbr_log_type_tsosize(bbr, cts, bbr->r_ctl.rc_pace_max_segs, tls_seg, old_tso, maxseg, 0);
#ifdef KERN_TLS
if ((bbr->rc_inp->inp_socket->so_snd.sb_flags & SB_TLS_IFNET) == 0)
#endif
bbr_adjust_for_hw_pacing(bbr, cts);
return;
}
@ -5941,41 +5907,17 @@ tcp_bbr_tso_size_check(struct tcp_bbr *bbr, uint32_t cts)
new_tso = maxseg * bbr->r_ctl.bbr_hptsi_segments_floor;
if (new_tso > PACE_MAX_IP_BYTES)
new_tso = rounddown(PACE_MAX_IP_BYTES, maxseg);
/* Enforce an utter maximum if we are not HW-TLS */
#ifdef KERN_TLS
if ((bbr->rc_inp->inp_socket->so_snd.sb_flags & SB_TLS_IFNET) == 0)
#endif
if (bbr->r_ctl.bbr_utter_max && (new_tso > (bbr->r_ctl.bbr_utter_max * maxseg))) {
new_tso = bbr->r_ctl.bbr_utter_max * maxseg;
}
#ifdef KERN_TLS
if (tls_seg) {
/*
* Lets move the output size
* up to 1 or more TLS record sizes.
*/
uint32_t temp;
temp = roundup(new_tso, tls_seg);
new_tso = temp;
/* Back down if needed to under a full frame */
while (new_tso > PACE_MAX_IP_BYTES)
new_tso -= tls_seg;
/* Enforce an utter maximum. */
if (bbr->r_ctl.bbr_utter_max && (new_tso > (bbr->r_ctl.bbr_utter_max * maxseg))) {
new_tso = bbr->r_ctl.bbr_utter_max * maxseg;
}
#endif
if (old_tso != new_tso) {
/* Only log changes */
bbr_log_type_tsosize(bbr, cts, new_tso, tls_seg, old_tso, maxseg, 0);
bbr->r_ctl.rc_pace_max_segs = new_tso;
}
#ifdef KERN_TLS
if ((bbr->rc_inp->inp_socket->so_snd.sb_flags & SB_TLS_IFNET) &&
tls_seg) {
bbr->r_ctl.rc_pace_min_segs = tls_seg + bbr->rc_last_options;
} else
#endif
/* We have hardware pacing and not hardware TLS! */
bbr_adjust_for_hw_pacing(bbr, cts);
/* We have hardware pacing! */
bbr_adjust_for_hw_pacing(bbr, cts);
}
static void
@ -12094,7 +12036,6 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
volatile int32_t sack_rxmit;
struct bbr_sendmap *rsm = NULL;
int32_t tso, mtu;
int force_tso = 0;
struct tcpopt to;
int32_t slot = 0;
struct inpcb *inp;
@ -12113,11 +12054,9 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
inp = bbr->rc_inp;
so = inp->inp_socket;
sb = &so->so_snd;
#ifdef KERN_TLS
if (sb->sb_flags & SB_TLS_IFNET)
hw_tls = 1;
else
#endif
hw_tls = 0;
kern_prefetch(sb, &maxseg);
maxseg = tp->t_maxseg - bbr->rc_last_options;
@ -12423,9 +12362,6 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
} else
len = rsm->r_end - rsm->r_start;
if ((bbr->rc_resends_use_tso == 0) &&
#ifdef KERN_TLS
((sb->sb_flags & SB_TLS_IFNET) == 0) &&
#endif
(len > maxseg)) {
len = maxseg;
more_to_rxt = 1;
@ -13199,13 +13135,6 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
* length beyond the t_maxseg length. Clear the FIN bit because we
* cut off the tail of the segment.
*/
#ifdef KERN_TLS
/* force TSO for so TLS offload can get mss */
if (sb->sb_flags & SB_TLS_IFNET) {
force_tso = 1;
}
#endif
if (len > maxseg) {
if (len != 0 && (flags & TH_FIN)) {
flags &= ~TH_FIN;
@ -13239,8 +13168,7 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
* Prevent the last segment from being fractional
* unless the send sockbuf can be emptied:
*/
if (((sb_offset + len) < sbavail(sb)) &&
(hw_tls == 0)) {
if ((sb_offset + len) < sbavail(sb)) {
moff = len % (uint32_t)maxseg;
if (moff != 0) {
len -= moff;
@ -13432,7 +13360,7 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
, &filled_all
#endif
);
if (len <= maxseg && !force_tso) {
if (len <= maxseg) {
/*
* Must have ran out of mbufs for the copy
* shorten it to no longer need tso. Lets
@ -13758,8 +13686,8 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
* header checksum is always provided. XXX: Fixme: This is currently
* not the case for IPv6.
*/
if (tso || force_tso) {
KASSERT(force_tso || len > maxseg,
if (tso) {
KASSERT(len > maxseg,
("%s: len:%d <= tso_segsz:%d", __func__, len, maxseg));
m->m_pkthdr.csum_flags |= CSUM_TSO;
csum_flags |= CSUM_TSO;
@ -13938,35 +13866,6 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
bbr->bbr_segs_rcvd = 0;
if (len == 0)
counter_u64_add(bbr_out_size[TCP_MSS_ACCT_SNDACK], 1);
else if (hw_tls) {
if (filled_all ||
(len >= bbr->r_ctl.rc_pace_max_segs))
BBR_STAT_INC(bbr_meets_tso_thresh);
else {
if (doing_tlp) {
BBR_STAT_INC(bbr_miss_tlp);
bbr_log_type_hrdwtso(tp, bbr, len, 1, what_we_can);
} else if (rsm) {
BBR_STAT_INC(bbr_miss_retran);
bbr_log_type_hrdwtso(tp, bbr, len, 2, what_we_can);
} else if ((ctf_outstanding(tp) + bbr->r_ctl.rc_pace_max_segs) > sbavail(sb)) {
BBR_STAT_INC(bbr_miss_tso_app);
bbr_log_type_hrdwtso(tp, bbr, len, 3, what_we_can);
} else if ((ctf_flight_size(tp, (bbr->r_ctl.rc_sacked +
bbr->r_ctl.rc_lost_bytes)) + bbr->r_ctl.rc_pace_max_segs) > tp->snd_cwnd) {
BBR_STAT_INC(bbr_miss_tso_cwnd);
bbr_log_type_hrdwtso(tp, bbr, len, 4, what_we_can);
} else if ((ctf_outstanding(tp) + bbr->r_ctl.rc_pace_max_segs) > tp->snd_wnd) {
BBR_STAT_INC(bbr_miss_tso_rwnd);
bbr_log_type_hrdwtso(tp, bbr, len, 5, what_we_can);
} else {
BBR_STAT_INC(bbr_miss_unknown);
bbr_log_type_hrdwtso(tp, bbr, len, 6, what_we_can);
}
}
}
/* Do accounting for new sends */
if ((len > 0) && (rsm == NULL)) {
int idx;
@ -14286,7 +14185,6 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
(bbr->r_ctl.rc_pace_max_segs > tp->t_maxseg) &&
(doing_tlp == 0) &&
(tso == 0) &&
(hw_tls == 0) &&
(len > 0) &&
((flags & TH_RST) == 0) &&
((flags & TH_SYN) == 0) &&

View File

@ -32,7 +32,6 @@ __FBSDID("$FreeBSD$");
#include "opt_ipsec.h"
#include "opt_tcpdebug.h"
#include "opt_ratelimit.h"
#include "opt_kern_tls.h"
#include <sys/param.h>
#include <sys/arb.h>
#include <sys/module.h>
@ -48,9 +47,6 @@ __FBSDID("$FreeBSD$");
#include <sys/proc.h> /* for proc0 declaration */
#include <sys/socket.h>
#include <sys/socketvar.h>
#ifdef KERN_TLS
#include <sys/ktls.h>
#endif
#include <sys/sysctl.h>
#include <sys/systm.h>
#ifdef STATS
@ -200,7 +196,6 @@ static int32_t rack_non_rxt_use_cr = 0; /* does a non-rxt in recovery use the co
static int32_t rack_persist_min = 250; /* 250ms */
static int32_t rack_persist_max = 2000; /* 2 Second */
static int32_t rack_sack_not_required = 0; /* set to one to allow non-sack to use rack */
static int32_t rack_hw_tls_max_seg = 3; /* 3 means use hw-tls single segment */
static int32_t rack_default_init_window = 0; /* Use system default */
static int32_t rack_limit_time_with_srtt = 0;
static int32_t rack_hw_pace_adjust = 0;
@ -349,15 +344,6 @@ counter_u64_t rack_collapsed_win;
counter_u64_t rack_tlp_does_nada;
counter_u64_t rack_try_scwnd;
/* Counters for HW TLS */
counter_u64_t rack_tls_rwnd;
counter_u64_t rack_tls_cwnd;
counter_u64_t rack_tls_app;
counter_u64_t rack_tls_other;
counter_u64_t rack_tls_filled;
counter_u64_t rack_tls_rxt;
counter_u64_t rack_tls_tlp;
/* Temp CPU counters */
counter_u64_t rack_find_high;
@ -564,13 +550,6 @@ sysctl_rack_clear(SYSCTL_HANDLER_ARGS)
counter_u64_zero(rack_alloc_limited_conns);
counter_u64_zero(rack_split_limited);
counter_u64_zero(rack_find_high);
counter_u64_zero(rack_tls_rwnd);
counter_u64_zero(rack_tls_cwnd);
counter_u64_zero(rack_tls_app);
counter_u64_zero(rack_tls_other);
counter_u64_zero(rack_tls_filled);
counter_u64_zero(rack_tls_rxt);
counter_u64_zero(rack_tls_tlp);
counter_u64_zero(rack_sack_attacks_detected);
counter_u64_zero(rack_sack_attacks_reversed);
counter_u64_zero(rack_sack_used_next_merge);
@ -627,11 +606,6 @@ rack_init_sysctls(void)
OID_AUTO, "rate_sample_method", CTLFLAG_RW,
&rack_rate_sample_method , USE_RTT_LOW,
"What method should we use for rate sampling 0=high, 1=low ");
SYSCTL_ADD_S32(&rack_sysctl_ctx,
SYSCTL_CHILDREN(rack_sysctl_root),
OID_AUTO, "hw_tlsmax", CTLFLAG_RW,
&rack_hw_tls_max_seg , 3,
"What is the maximum number of full TLS records that will be sent at once");
/* Probe rtt related controls */
rack_probertt = SYSCTL_ADD_NODE(&rack_sysctl_ctx,
SYSCTL_CHILDREN(rack_sysctl_root),
@ -1449,48 +1423,6 @@ rack_init_sysctls(void)
&rack_try_scwnd,
"Total number of scwnd attempts");
rack_tls_rwnd = counter_u64_alloc(M_WAITOK);
SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
SYSCTL_CHILDREN(rack_counters),
OID_AUTO, "tls_rwnd", CTLFLAG_RD,
&rack_tls_rwnd,
"Total hdwr tls rwnd limited");
rack_tls_cwnd = counter_u64_alloc(M_WAITOK);
SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
SYSCTL_CHILDREN(rack_counters),
OID_AUTO, "tls_cwnd", CTLFLAG_RD,
&rack_tls_cwnd,
"Total hdwr tls cwnd limited");
rack_tls_app = counter_u64_alloc(M_WAITOK);
SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
SYSCTL_CHILDREN(rack_counters),
OID_AUTO, "tls_app", CTLFLAG_RD,
&rack_tls_app,
"Total hdwr tls app limited");
rack_tls_other = counter_u64_alloc(M_WAITOK);
SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
SYSCTL_CHILDREN(rack_counters),
OID_AUTO, "tls_other", CTLFLAG_RD,
&rack_tls_other,
"Total hdwr tls other limited");
rack_tls_filled = counter_u64_alloc(M_WAITOK);
SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
SYSCTL_CHILDREN(rack_counters),
OID_AUTO, "tls_filled", CTLFLAG_RD,
&rack_tls_filled,
"Total hdwr tls filled");
rack_tls_rxt = counter_u64_alloc(M_WAITOK);
SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
SYSCTL_CHILDREN(rack_counters),
OID_AUTO, "tls_rxt", CTLFLAG_RD,
&rack_tls_rxt,
"Total hdwr rxt");
rack_tls_tlp = counter_u64_alloc(M_WAITOK);
SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
SYSCTL_CHILDREN(rack_counters),
OID_AUTO, "tls_tlp", CTLFLAG_RD,
&rack_tls_tlp,
"Total hdwr tls tlp");
rack_per_timer_hole = counter_u64_alloc(M_WAITOK);
SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
SYSCTL_CHILDREN(rack_counters),
@ -2247,13 +2179,6 @@ rack_counter_destroy(void)
counter_u64_free(rack_collapsed_win);
counter_u64_free(rack_tlp_does_nada);
counter_u64_free(rack_try_scwnd);
counter_u64_free(rack_tls_rwnd);
counter_u64_free(rack_tls_cwnd);
counter_u64_free(rack_tls_app);
counter_u64_free(rack_tls_other);
counter_u64_free(rack_tls_filled);
counter_u64_free(rack_tls_rxt);
counter_u64_free(rack_tls_tlp);
counter_u64_free(rack_per_timer_hole);
COUNTER_ARRAY_FREE(rack_out_size, TCP_MSS_ACCT_SIZE);
COUNTER_ARRAY_FREE(rack_opts_arry, RACK_OPTS_SIZE);
@ -5273,15 +5198,6 @@ rack_timeout_tlp(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
if (rack->r_state && (rack->r_state != tp->t_state))
rack_set_state(tp, rack);
so = tp->t_inpcb->inp_socket;
#ifdef KERN_TLS
if (rack->rc_inp->inp_socket->so_snd.sb_flags & SB_TLS_IFNET) {
/*
* For hardware TLS we do *not* want to send
* new data, lets instead just do a retransmission.
*/
goto need_retran;
}
#endif
avail = sbavail(&so->so_snd);
out = tp->snd_max - tp->snd_una;
if (out > tp->snd_wnd) {
@ -8206,11 +8122,8 @@ rack_check_bottom_drag(struct tcpcb *tp,
uint32_t segsiz, minseg;
segsiz = ctf_fixed_maxseg(tp);
if (so->so_snd.sb_flags & SB_TLS_IFNET) {
minseg = rack->r_ctl.rc_pace_min_segs;
} else {
minseg = segsiz;
}
minseg = segsiz;
if (tp->snd_max == tp->snd_una) {
/*
* We are doing dynamic pacing and we are way
@ -10384,24 +10297,13 @@ static void
rack_set_pace_segments(struct tcpcb *tp, struct tcp_rack *rack, uint32_t line)
{
uint64_t bw_est, rate_wanted;
uint32_t tls_seg = 0;
int chged = 0;
uint32_t user_max;
user_max = ctf_fixed_maxseg(tp) * rack->rc_user_set_max_segs;
#ifdef KERN_TLS
if (rack->rc_inp->inp_socket->so_snd.sb_flags & SB_TLS_IFNET) {
tls_seg = ctf_get_opt_tls_size(rack->rc_inp->inp_socket, rack->rc_tp->snd_wnd);
if (tls_seg != rack->r_ctl.rc_pace_min_segs)
chged = 1;
rack->r_ctl.rc_pace_min_segs = tls_seg;
} else
#endif
{
if (ctf_fixed_maxseg(tp) != rack->r_ctl.rc_pace_min_segs)
chged = 1;
rack->r_ctl.rc_pace_min_segs = ctf_fixed_maxseg(tp);
}
if (ctf_fixed_maxseg(tp) != rack->r_ctl.rc_pace_min_segs)
chged = 1;
rack->r_ctl.rc_pace_min_segs = ctf_fixed_maxseg(tp);
if (rack->use_fixed_rate || rack->rc_force_max_seg) {
if (user_max != rack->r_ctl.rc_pace_max_segs)
chged = 1;
@ -10458,31 +10360,8 @@ rack_set_pace_segments(struct tcpcb *tp, struct tcp_rack *rack, uint32_t line)
chged = 1;
rack->r_ctl.rc_pace_max_segs = PACE_MAX_IP_BYTES;
}
#ifdef KERN_TLS
uint32_t orig;
if (tls_seg != 0) {
orig = rack->r_ctl.rc_pace_max_segs;
if (rack_hw_tls_max_seg > 1) {
rack->r_ctl.rc_pace_max_segs /= tls_seg;
if (rack_hw_tls_max_seg > rack->r_ctl.rc_pace_max_segs)
rack->r_ctl.rc_pace_max_segs = rack_hw_tls_max_seg;
} else {
rack->r_ctl.rc_pace_max_segs = 1;
}
if (rack->r_ctl.rc_pace_max_segs == 0)
rack->r_ctl.rc_pace_max_segs = 1;
rack->r_ctl.rc_pace_max_segs *= tls_seg;
if (rack->r_ctl.rc_pace_max_segs > PACE_MAX_IP_BYTES) {
/* We can't go over the max bytes (usually 64k) */
rack->r_ctl.rc_pace_max_segs = ((PACE_MAX_IP_BYTES / tls_seg) * tls_seg);
}
if (orig != rack->r_ctl.rc_pace_max_segs)
chged = 1;
}
#endif
if (chged)
rack_log_type_hrdwtso(tp, rack, tls_seg, rack->rc_inp->inp_socket->so_snd.sb_flags, line, 2);
rack_log_type_hrdwtso(tp, rack, 0, rack->rc_inp->inp_socket->so_snd.sb_flags, line, 2);
}
static int
@ -11669,12 +11548,6 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str
slot = (uint32_t)res;
orig_val = rack->r_ctl.rc_pace_max_segs;
rack_set_pace_segments(rack->rc_tp, rack, __LINE__);
#ifdef KERN_TLS
/* For TLS we need to override this, possibly */
if (rack->rc_inp->inp_socket->so_snd.sb_flags & SB_TLS_IFNET) {
rack_set_pace_segments(rack->rc_tp, rack, __LINE__);
}
#endif
/* Did we change the TSO size, if so log it */
if (rack->r_ctl.rc_pace_max_segs != orig_val)
rack_log_pacing_delay_calc(rack, len, slot, orig_val, 0, 0, 15, __LINE__, NULL);
@ -12040,7 +11913,6 @@ rack_output(struct tcpcb *tp)
uint32_t cwnd_to_use;
int32_t do_a_prefetch;
int32_t prefetch_rsm = 0;
int force_tso = 0;
int32_t orig_len;
struct timeval tv;
int32_t prefetch_so_done = 0;
@ -12062,9 +11934,7 @@ rack_output(struct tcpcb *tp)
kern_prefetch(sb, &do_a_prefetch);
do_a_prefetch = 1;
hpts_calling = inp->inp_hpts_calls;
#ifdef KERN_TLS
hw_tls = (so->so_snd.sb_flags & SB_TLS_IFNET) != 0;
#endif
NET_EPOCH_ASSERT();
INP_WLOCK_ASSERT(inp);
@ -12206,11 +12076,7 @@ rack_output(struct tcpcb *tp)
tso = 0;
mtu = 0;
segsiz = min(ctf_fixed_maxseg(tp), rack->r_ctl.rc_pace_min_segs);
if (so->so_snd.sb_flags & SB_TLS_IFNET) {
minseg = rack->r_ctl.rc_pace_min_segs;
} else {
minseg = segsiz;
}
minseg = segsiz;
sb_offset = tp->snd_max - tp->snd_una;
cwnd_to_use = rack->r_ctl.cwnd_to_use = tp->snd_cwnd;
#ifdef NETFLIX_SHARED_CWND
@ -13243,12 +13109,6 @@ rack_output(struct tcpcb *tp)
ipoptlen += ipsec_optlen;
#endif
#ifdef KERN_TLS
/* force TSO for so TLS offload can get mss */
if (sb->sb_flags & SB_TLS_IFNET) {
force_tso = 1;
}
#endif
/*
* Adjust data length if insertion of options will bump the packet
* length beyond the t_maxseg length. Clear the FIN bit because we
@ -13288,8 +13148,7 @@ rack_output(struct tcpcb *tp)
* unless the send sockbuf can be emptied:
*/
max_len = (tp->t_maxseg - optlen);
if (((sb_offset + len) < sbavail(sb)) &&
(hw_tls == 0)) {
if ((sb_offset + len) < sbavail(sb)) {
moff = len % (u_int)max_len;
if (moff != 0) {
mark = 3;
@ -13761,8 +13620,8 @@ rack_output(struct tcpcb *tp)
* header checksum is always provided. XXX: Fixme: This is currently
* not the case for IPv6.
*/
if (tso || force_tso) {
KASSERT(force_tso || len > tp->t_maxseg - optlen,
if (tso) {
KASSERT(len > tp->t_maxseg - optlen,
("%s: len <= tso_segsz", __func__));
m->m_pkthdr.csum_flags |= CSUM_TSO;
m->m_pkthdr.tso_segsz = tp->t_maxseg - optlen;
@ -13969,32 +13828,6 @@ rack_output(struct tcpcb *tp)
else
counter_u64_add(rack_out_size[idx], 1);
}
if (hw_tls && len > 0) {
if (filled_all) {
counter_u64_add(rack_tls_filled, 1);
rack_log_type_hrdwtso(tp, rack, len, 0, orig_len, 1);
} else {
if (rsm) {
counter_u64_add(rack_tls_rxt, 1);
rack_log_type_hrdwtso(tp, rack, len, 2, orig_len, 1);
} else if (doing_tlp) {
counter_u64_add(rack_tls_tlp, 1);
rack_log_type_hrdwtso(tp, rack, len, 3, orig_len, 1);
} else if ( (ctf_outstanding(tp) + minseg) > sbavail(sb)) {
counter_u64_add(rack_tls_app, 1);
rack_log_type_hrdwtso(tp, rack, len, 4, orig_len, 1);
} else if ((ctf_flight_size(tp, rack->r_ctl.rc_sacked) + minseg) > cwnd_to_use) {
counter_u64_add(rack_tls_cwnd, 1);
rack_log_type_hrdwtso(tp, rack, len, 5, orig_len, 1);
} else if ((ctf_outstanding(tp) + minseg) > tp->snd_wnd) {
counter_u64_add(rack_tls_rwnd, 1);
rack_log_type_hrdwtso(tp, rack, len, 6, orig_len, 1);
} else {
rack_log_type_hrdwtso(tp, rack, len, 7, orig_len, 1);
counter_u64_add(rack_tls_other, 1);
}
}
}
}
if (rack->rack_no_prr == 0) {
if (sub_from_prr && (error == 0)) {