This brings into sync FreeBSD with the netflix versions of rack and bbr.
This fixes several breakages (panics) since the tcp_lro code was committed that have been reported. Quite a few new features are now in rack (prefecting of DGP -- Dynamic Goodput Pacing among the largest). There is also support for ack-war prevention. Documents comming soon on rack.. Sponsored by: Netflix Reviewed by: rscheff, mtuexen Differential Revision: https://reviews.freebsd.org/D30036
This commit is contained in:
parent
0ec3e99111
commit
5d8fd932e4
@ -91,15 +91,20 @@ struct cc_var {
|
||||
struct sctp_nets *sctp;
|
||||
} ccvc;
|
||||
uint16_t nsegs; /* # segments coalesced into current chain. */
|
||||
uint8_t labc; /* Dont use system abc use passed in */
|
||||
};
|
||||
|
||||
/* cc_var flags. */
|
||||
#define CCF_ABC_SENTAWND 0x0001 /* ABC counted cwnd worth of bytes? */
|
||||
#define CCF_CWND_LIMITED 0x0002 /* Are we currently cwnd limited? */
|
||||
#define CCF_UNUSED1 0x0004 /* unused */
|
||||
#define CCF_USE_LOCAL_ABC 0x0004 /* Dont use the system l_abc val */
|
||||
#define CCF_ACKNOW 0x0008 /* Will this ack be sent now? */
|
||||
#define CCF_IPHDR_CE 0x0010 /* Does this packet set CE bit? */
|
||||
#define CCF_TCPHDR_CWR 0x0020 /* Does this packet set CWR bit? */
|
||||
#define CCF_MAX_CWND 0x0040 /* Have we reached maximum cwnd? */
|
||||
#define CCF_CHG_MAX_CWND 0x0080 /* Cubic max_cwnd changed, for K */
|
||||
#define CCF_USR_IWND 0x0100 /* User specified initial window */
|
||||
#define CCF_USR_IWND_INIT_NSEG 0x0200 /* Convert segs to bytes on conn init */
|
||||
|
||||
/* ACK types passed to the ack_received() hook. */
|
||||
#define CC_ACK 0x0001 /* Regular in sequence ACK. */
|
||||
|
@ -86,8 +86,8 @@ static void newreno_cong_signal(struct cc_var *ccv, uint32_t type);
|
||||
static void newreno_post_recovery(struct cc_var *ccv);
|
||||
static int newreno_ctl_output(struct cc_var *ccv, struct sockopt *sopt, void *buf);
|
||||
|
||||
VNET_DEFINE_STATIC(uint32_t, newreno_beta) = 50;
|
||||
VNET_DEFINE_STATIC(uint32_t, newreno_beta_ecn) = 80;
|
||||
VNET_DEFINE(uint32_t, newreno_beta) = 50;
|
||||
VNET_DEFINE(uint32_t, newreno_beta_ecn) = 80;
|
||||
#define V_newreno_beta VNET(newreno_beta)
|
||||
#define V_newreno_beta_ecn VNET(newreno_beta_ecn)
|
||||
|
||||
@ -101,11 +101,6 @@ struct cc_algo newreno_cc_algo = {
|
||||
.ctl_output = newreno_ctl_output,
|
||||
};
|
||||
|
||||
struct newreno {
|
||||
uint32_t beta;
|
||||
uint32_t beta_ecn;
|
||||
};
|
||||
|
||||
static inline struct newreno *
|
||||
newreno_malloc(struct cc_var *ccv)
|
||||
{
|
||||
@ -182,9 +177,15 @@ newreno_ack_received(struct cc_var *ccv, uint16_t type)
|
||||
* XXXLAS: Find a way to signal SS after RTO that
|
||||
* doesn't rely on tcpcb vars.
|
||||
*/
|
||||
uint16_t abc_val;
|
||||
|
||||
if (ccv->flags & CCF_USE_LOCAL_ABC)
|
||||
abc_val = ccv->labc;
|
||||
else
|
||||
abc_val = V_tcp_abc_l_var;
|
||||
if (CCV(ccv, snd_nxt) == CCV(ccv, snd_max))
|
||||
incr = min(ccv->bytes_this_ack,
|
||||
ccv->nsegs * V_tcp_abc_l_var *
|
||||
ccv->nsegs * abc_val *
|
||||
CCV(ccv, t_maxseg));
|
||||
else
|
||||
incr = min(ccv->bytes_this_ack, CCV(ccv, t_maxseg));
|
||||
@ -237,11 +238,19 @@ newreno_cong_signal(struct cc_var *ccv, uint32_t type)
|
||||
u_int mss;
|
||||
|
||||
cwin = CCV(ccv, snd_cwnd);
|
||||
mss = tcp_maxseg(ccv->ccvc.tcp);
|
||||
mss = tcp_fixed_maxseg(ccv->ccvc.tcp);
|
||||
nreno = ccv->cc_data;
|
||||
beta = (nreno == NULL) ? V_newreno_beta : nreno->beta;
|
||||
beta_ecn = (nreno == NULL) ? V_newreno_beta_ecn : nreno->beta_ecn;
|
||||
if (V_cc_do_abe && type == CC_ECN)
|
||||
|
||||
/*
|
||||
* Note that we only change the backoff for ECN if the
|
||||
* global sysctl V_cc_do_abe is set <or> the stack itself
|
||||
* has set a flag in our newreno_flags (due to pacing) telling
|
||||
* us to use the lower valued back-off.
|
||||
*/
|
||||
if (V_cc_do_abe ||
|
||||
(nreno && (nreno->newreno_flags & CC_NEWRENO_BETA_ECN) && (type == CC_ECN)))
|
||||
factor = beta_ecn;
|
||||
else
|
||||
factor = beta;
|
||||
@ -260,8 +269,7 @@ newreno_cong_signal(struct cc_var *ccv, uint32_t type)
|
||||
V_cc_do_abe && V_cc_abe_frlossreduce)) {
|
||||
CCV(ccv, snd_ssthresh) =
|
||||
((uint64_t)CCV(ccv, snd_ssthresh) *
|
||||
(uint64_t)beta) /
|
||||
(100ULL * (uint64_t)beta_ecn);
|
||||
(uint64_t)beta) / (uint64_t)beta_ecn;
|
||||
}
|
||||
if (!IN_CONGRECOVERY(CCV(ccv, t_flags)))
|
||||
CCV(ccv, snd_ssthresh) = cwin;
|
||||
@ -344,7 +352,7 @@ newreno_ctl_output(struct cc_var *ccv, struct sockopt *sopt, void *buf)
|
||||
nreno->beta = opt->val;
|
||||
break;
|
||||
case CC_NEWRENO_BETA_ECN:
|
||||
if (!V_cc_do_abe)
|
||||
if ((!V_cc_do_abe) && ((nreno->newreno_flags & CC_NEWRENO_BETA_ECN) == 0))
|
||||
return (EACCES);
|
||||
nreno->beta_ecn = opt->val;
|
||||
break;
|
||||
|
@ -31,12 +31,17 @@
|
||||
|
||||
#define CCALGONAME_NEWRENO "newreno"
|
||||
|
||||
struct newreno {
|
||||
uint32_t beta;
|
||||
uint32_t beta_ecn;
|
||||
uint32_t newreno_flags;
|
||||
};
|
||||
|
||||
struct cc_newreno_opts {
|
||||
int name;
|
||||
int name;
|
||||
uint32_t val;
|
||||
};
|
||||
|
||||
#define CC_NEWRENO_BETA 1
|
||||
#define CC_NEWRENO_BETA_ECN 2
|
||||
|
||||
#define CC_NEWRENO_BETA 1 /* Beta for normal DUP-ACK/Sack recovery */
|
||||
#define CC_NEWRENO_BETA_ECN 2 /* ECN Beta for Abe */
|
||||
#endif /* _CC_NEWRENO_H */
|
||||
|
@ -181,13 +181,24 @@ struct tcphdr {
|
||||
#define TCP_TXTLS_MODE 40 /* Transmit TLS mode */
|
||||
#define TCP_RXTLS_ENABLE 41 /* TLS framing and encryption for receive */
|
||||
#define TCP_RXTLS_MODE 42 /* Receive TLS mode */
|
||||
#define TCP_IWND_NB 43 /* Override initial window (units: bytes) */
|
||||
#define TCP_IWND_NSEG 44 /* Override initial window (units: MSS segs) */
|
||||
#define TCP_LOGID_CNT 46 /* get number of connections with the same ID */
|
||||
#define TCP_LOG_TAG 47 /* configure tag for grouping logs */
|
||||
#define TCP_USER_LOG 48 /* userspace log event */
|
||||
#define TCP_CONGESTION 64 /* get/set congestion control algorithm */
|
||||
#define TCP_CCALGOOPT 65 /* get/set cc algorithm specific options */
|
||||
#define TCP_MAXUNACKTIME 68 /* maximum time without making progress (sec) */
|
||||
#define TCP_MAXPEAKRATE 69 /* maximum peak rate allowed (kbps) */
|
||||
#define TCP_IDLE_REDUCE 70 /* Reduce cwnd on idle input */
|
||||
#define TCP_REMOTE_UDP_ENCAPS_PORT 71 /* Enable TCP over UDP tunneling via the specified port */
|
||||
#define TCP_DELACK 72 /* socket option for delayed ack */
|
||||
#define TCP_FIN_IS_RST 73 /* A fin from the peer is treated has a RST */
|
||||
#define TCP_LOG_LIMIT 74 /* Limit to number of records in tcp-log */
|
||||
#define TCP_SHARED_CWND_ALLOWED 75 /* Use of a shared cwnd is allowed */
|
||||
#define TCP_PROC_ACCOUNTING 76 /* Do accounting on tcp cpu usage and counts */
|
||||
#define TCP_USE_CMP_ACKS 77 /* The transport can handle the Compressed mbuf acks */
|
||||
#define TCP_PERF_INFO 78 /* retrieve accounting counters */
|
||||
#define TCP_KEEPINIT 128 /* N, time to establish connection */
|
||||
#define TCP_KEEPIDLE 256 /* L,N,X start keeplives after this period */
|
||||
#define TCP_KEEPINTVL 512 /* L,N interval between keepalives */
|
||||
@ -201,7 +212,7 @@ struct tcphdr {
|
||||
#define TCP_RACK_MBUF_QUEUE 1050 /* Do we allow mbuf queuing if supported */
|
||||
#define TCP_RACK_PROP 1051 /* RACK proportional rate reduction (bool) */
|
||||
#define TCP_RACK_TLP_REDUCE 1052 /* RACK TLP cwnd reduction (bool) */
|
||||
#define TCP_RACK_PACE_REDUCE 1053 /* RACK Pacing reduction factor (divisor) */
|
||||
#define TCP_RACK_PACE_REDUCE 1053 /* RACK Pacingv reduction factor (divisor) */
|
||||
#define TCP_RACK_PACE_MAX_SEG 1054 /* Max TSO size we will send */
|
||||
#define TCP_RACK_PACE_ALWAYS 1055 /* Use the always pace method */
|
||||
#define TCP_RACK_PROP_RATE 1056 /* The proportional reduction rate */
|
||||
@ -284,6 +295,16 @@ struct tcphdr {
|
||||
#define TCP_RACK_PACE_TO_FILL 1127 /* If we are not in recovery, always pace to fill the cwnd in 1 RTT */
|
||||
#define TCP_SHARED_CWND_TIME_LIMIT 1128 /* we should limit to low time values the scwnd life */
|
||||
#define TCP_RACK_PROFILE 1129 /* Select a profile that sets multiple options */
|
||||
#define TCP_HDWR_RATE_CAP 1130 /* Allow hardware rates to cap pacing rate */
|
||||
#define TCP_PACING_RATE_CAP 1131 /* Highest rate allowed in pacing in bytes per second (uint64_t) */
|
||||
#define TCP_HDWR_UP_ONLY 1132 /* Allow the pacing rate to climb but not descend (with the exception of fill-cw */
|
||||
#define TCP_RACK_ABC_VAL 1133 /* Set a local ABC value different then the system default */
|
||||
#define TCP_REC_ABC_VAL 1134 /* Do we use the ABC value for recovery or the override one from sysctl */
|
||||
#define TCP_RACK_MEASURE_CNT 1135 /* How many measurements are required in GP pacing */
|
||||
#define TCP_DEFER_OPTIONS 1136 /* Defer options until the proper number of measurements occur, does not defer TCP_RACK_MEASURE_CNT */
|
||||
#define TCP_FAST_RSM_HACK 1137 /* Do we do the broken thing where we don't twiddle the TLP bits properly in fast_rsm_output? */
|
||||
#define TCP_RACK_PACING_BETA 1138 /* Changing the beta for pacing */
|
||||
#define TCP_RACK_PACING_BETA_ECN 1139 /* Changing the beta for ecn with pacing */
|
||||
|
||||
/* Start of reserved space for third-party user-settable options. */
|
||||
#define TCP_VENDOR SO_VENDOR
|
||||
@ -295,6 +316,7 @@ struct tcphdr {
|
||||
#define TCPI_OPT_WSCALE 0x04
|
||||
#define TCPI_OPT_ECN 0x08
|
||||
#define TCPI_OPT_TOE 0x10
|
||||
#define TCPI_OPT_TFO 0x20
|
||||
|
||||
/* Maximum length of log ID. */
|
||||
#define TCP_LOG_ID_LEN 64
|
||||
|
39
sys/netinet/tcp_accounting.h
Normal file
39
sys/netinet/tcp_accounting.h
Normal file
@ -0,0 +1,39 @@
|
||||
#ifndef __tcp_accounting_h__
|
||||
#define __tcp_accounting_h__
|
||||
/*
|
||||
* Return values from tcp_do_ack_accounting
|
||||
* and indexs to the into the tcp_proc_time[]
|
||||
* array.
|
||||
*/
|
||||
#define ACK_BEHIND 0
|
||||
#define ACK_SACK 1
|
||||
#define ACK_CUMACK 2
|
||||
#define ACK_CUMACK_SACK 3
|
||||
#define ACK_DUPACK 4
|
||||
#define ACK_RWND 5
|
||||
/* Added values for tracking output too */
|
||||
#define SND_BLOCKED 6
|
||||
#define SND_LIMITED 7
|
||||
#define SND_OUT_DATA 8
|
||||
#define SND_OUT_ACK 9
|
||||
#define SND_OUT_FAIL 10
|
||||
/* We also count in the counts array two added (MSS sent and ACKS In) */
|
||||
#define CNT_OF_MSS_OUT 11
|
||||
#define CNT_OF_ACKS_IN 12
|
||||
|
||||
/* for the tcpcb we add two more cycle counters */
|
||||
#define CYC_HANDLE_MAP 11
|
||||
#define CYC_HANDLE_ACK 12
|
||||
|
||||
/* Should the tp->xxx array's be alloc'ed? */
|
||||
/* #define TCP_NUM_PROC_COUNTERS 11 defined in tcp_var.h */
|
||||
/* #define TCP_NUM_CNT_COUNTERS 13 defined in tcp_var.h */
|
||||
|
||||
#ifdef _KERNEL
|
||||
#ifdef TCP_ACCOUNTING
|
||||
extern counter_u64_t tcp_cnt_counters[TCP_NUM_CNT_COUNTERS];
|
||||
extern counter_u64_t tcp_proc_time[TCP_NUM_PROC_COUNTERS];
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
@ -526,7 +526,7 @@ cc_post_recovery(struct tcpcb *tp, struct tcphdr *th)
|
||||
(V_tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN)))
|
||||
|
||||
void inline
|
||||
cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos)
|
||||
cc_ecnpkt_handler_flags(struct tcpcb *tp, uint16_t flags, uint8_t iptos)
|
||||
{
|
||||
INP_WLOCK_ASSERT(tp->t_inpcb);
|
||||
|
||||
@ -544,7 +544,7 @@ cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos)
|
||||
break;
|
||||
}
|
||||
|
||||
if (th->th_flags & TH_CWR)
|
||||
if (flags & TH_CWR)
|
||||
tp->ccv->flags |= CCF_TCPHDR_CWR;
|
||||
else
|
||||
tp->ccv->flags &= ~CCF_TCPHDR_CWR;
|
||||
@ -558,6 +558,12 @@ cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos)
|
||||
}
|
||||
}
|
||||
|
||||
void inline
|
||||
cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos)
|
||||
{
|
||||
cc_ecnpkt_handler_flags(tp, th->th_flags, iptos);
|
||||
}
|
||||
|
||||
/*
|
||||
* TCP input handling is split into multiple parts:
|
||||
* tcp6_input is a thin wrapper around tcp_input for the extended
|
||||
|
@ -174,7 +174,7 @@ enum tcp_log_events {
|
||||
TCP_LOG_IN = 1, /* Incoming packet 1 */
|
||||
TCP_LOG_OUT, /* Transmit (without other event) 2 */
|
||||
TCP_LOG_RTO, /* Retransmit timeout 3 */
|
||||
TCP_LOG_TF_ACK, /* Transmit due to TF_ACK 4 */
|
||||
TCP_LOG_SB_WAKE, /* Awaken socket buffer 4 */
|
||||
TCP_LOG_BAD_RETRAN, /* Detected bad retransmission 5 */
|
||||
TCP_LOG_PRR, /* Doing PRR 6 */
|
||||
TCP_LOG_REORDER, /* Detected reorder 7 */
|
||||
@ -200,7 +200,7 @@ enum tcp_log_events {
|
||||
BBR_LOG_DOSEG_DONE, /* hpts do_segment completes 27 */
|
||||
BBR_LOG_EXIT_GAIN, /* hpts do_segment completes 28 */
|
||||
BBR_LOG_THRESH_CALC, /* Doing threshold calculation 29 */
|
||||
BBR_LOG_EXTRACWNDGAIN, /* Removed 30 */
|
||||
TCP_LOG_MAPCHG, /* Map Changes to the sendmap 30 */
|
||||
TCP_LOG_USERSEND, /* User level sends data 31 */
|
||||
BBR_RSM_CLEARED, /* RSM cleared of ACK flags 32 */
|
||||
BBR_LOG_STATE_TARGET, /* Log of target at state 33 */
|
||||
@ -232,7 +232,9 @@ enum tcp_log_events {
|
||||
TCP_LOG_USER_EVENT, /* User space event data 59 */
|
||||
TCP_LOG_SENDFILE, /* sendfile() logging for TCP connections 60 */
|
||||
TCP_LOG_HTTP_T, /* logging of http request tracking 61 */
|
||||
TCP_LOG_END /* End (keep at end) 62 */
|
||||
TCP_LOG_ACCOUNTING, /* Log of TCP Accounting data 62 */
|
||||
TCP_LOG_FSB, /* FSB information 63 */
|
||||
TCP_LOG_END /* End (keep at end) 64 */
|
||||
};
|
||||
|
||||
enum tcp_log_states {
|
||||
|
@ -367,11 +367,22 @@ rl_add_syctl_entries(struct sysctl_oid *rl_sysctl_root, struct tcp_rate_set *rs)
|
||||
OID_AUTO, "pacetime", CTLFLAG_RD,
|
||||
&rs->rs_rlt[i].time_between, 0,
|
||||
"Time hardware inserts between 1500 byte sends");
|
||||
SYSCTL_ADD_U64(&rs->sysctl_ctx,
|
||||
SYSCTL_ADD_LONG(&rs->sysctl_ctx,
|
||||
SYSCTL_CHILDREN(rl_rate_num),
|
||||
OID_AUTO, "rate", CTLFLAG_RD,
|
||||
&rs->rs_rlt[i].rate, 0,
|
||||
&rs->rs_rlt[i].rate,
|
||||
"Rate in bytes per second");
|
||||
SYSCTL_ADD_LONG(&rs->sysctl_ctx,
|
||||
SYSCTL_CHILDREN(rl_rate_num),
|
||||
OID_AUTO, "using", CTLFLAG_RD,
|
||||
&rs->rs_rlt[i].using,
|
||||
"Number of flows using");
|
||||
SYSCTL_ADD_LONG(&rs->sysctl_ctx,
|
||||
SYSCTL_CHILDREN(rl_rate_num),
|
||||
OID_AUTO, "enobufs", CTLFLAG_RD,
|
||||
&rs->rs_rlt[i].rs_num_enobufs,
|
||||
"Number of enobufs logged on this rate");
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -667,6 +678,8 @@ rt_setup_new_rs(struct ifnet *ifp, int *error)
|
||||
*/
|
||||
rs->rs_rlt[i].ptbl = rs;
|
||||
rs->rs_rlt[i].tag = NULL;
|
||||
rs->rs_rlt[i].using = 0;
|
||||
rs->rs_rlt[i].rs_num_enobufs = 0;
|
||||
/*
|
||||
* Calculate the time between.
|
||||
*/
|
||||
@ -1063,16 +1076,28 @@ rt_find_real_interface(struct ifnet *ifp, struct inpcb *inp, int *error)
|
||||
static void
|
||||
rl_increment_using(const struct tcp_hwrate_limit_table *rte)
|
||||
{
|
||||
struct tcp_hwrate_limit_table *decon_rte;
|
||||
|
||||
decon_rte = __DECONST(struct tcp_hwrate_limit_table *, rte);
|
||||
atomic_add_long(&decon_rte->using, 1);
|
||||
}
|
||||
|
||||
static void
|
||||
rl_decrement_using(const struct tcp_hwrate_limit_table *rte)
|
||||
{
|
||||
struct tcp_hwrate_limit_table *decon_rte;
|
||||
|
||||
decon_rte = __DECONST(struct tcp_hwrate_limit_table *, rte);
|
||||
atomic_subtract_long(&decon_rte->using, 1);
|
||||
}
|
||||
|
||||
void
|
||||
tcp_rl_log_enobuf(const struct tcp_hwrate_limit_table *rte)
|
||||
{
|
||||
struct tcp_hwrate_limit_table *decon_rte;
|
||||
|
||||
decon_rte = __DECONST(struct tcp_hwrate_limit_table *, rte);
|
||||
atomic_add_long(&decon_rte->rs_num_enobufs, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -43,7 +43,9 @@ struct m_snd_tag;
|
||||
struct tcp_hwrate_limit_table {
|
||||
const struct tcp_rate_set *ptbl; /* Pointer to parent table */
|
||||
struct m_snd_tag *tag; /* Send tag if needed (chelsio) */
|
||||
uint64_t rate; /* Rate we get in Bytes per second (Bps) */
|
||||
long rate; /* Rate we get in Bytes per second (Bps) */
|
||||
long using; /* How many flows are using this hdwr rate. */
|
||||
long rs_num_enobufs;
|
||||
uint32_t time_between; /* Time-Gap between packets at this rate */
|
||||
uint32_t flags;
|
||||
};
|
||||
|
@ -156,6 +156,17 @@ SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, globalholes, CTLFLAG_VNET | CTLFLAG_RD,
|
||||
&VNET_NAME(tcp_sack_globalholes), 0,
|
||||
"Global number of TCP SACK holes currently allocated");
|
||||
|
||||
int
|
||||
tcp_dsack_block_exists(struct tcpcb *tp)
|
||||
{
|
||||
/* Return true if a DSACK block exists */
|
||||
if (tp->rcv_numsacks == 0)
|
||||
return (0);
|
||||
if (SEQ_LEQ(tp->sackblks[0].end, tp->rcv_nxt))
|
||||
return(1);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function will find overlaps with the currently stored sackblocks
|
||||
* and add any overlap as a dsack block upfront
|
||||
|
@ -3930,6 +3930,9 @@ bbr_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type, struct bbr_s
|
||||
struct tcp_bbr *bbr;
|
||||
|
||||
INP_WLOCK_ASSERT(tp->t_inpcb);
|
||||
#ifdef STATS
|
||||
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_CSIG, type);
|
||||
#endif
|
||||
bbr = (struct tcp_bbr *)tp->t_fb_ptr;
|
||||
switch (type) {
|
||||
case CC_NDUPACK:
|
||||
@ -4403,6 +4406,7 @@ bbr_clone_rsm(struct tcp_bbr *bbr, struct bbr_sendmap *nrsm, struct bbr_sendmap
|
||||
nrsm->r_start = start;
|
||||
nrsm->r_end = rsm->r_end;
|
||||
nrsm->r_rtr_cnt = rsm->r_rtr_cnt;
|
||||
nrsm-> r_rtt_not_allowed = rsm->r_rtt_not_allowed;
|
||||
nrsm->r_flags = rsm->r_flags;
|
||||
/* We don't transfer forward the SYN flag */
|
||||
nrsm->r_flags &= ~BBR_HAS_SYN;
|
||||
@ -6429,65 +6433,6 @@ tcp_bbr_xmit_timer_commit(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts)
|
||||
bbr->r_ctl.bbr_smallest_srtt_this_state = rtt;
|
||||
}
|
||||
|
||||
static void
|
||||
bbr_earlier_retran(struct tcpcb *tp, struct tcp_bbr *bbr, struct bbr_sendmap *rsm,
|
||||
uint32_t t, uint32_t cts, int ack_type)
|
||||
{
|
||||
/*
|
||||
* For this RSM, we acknowledged the data from a previous
|
||||
* transmission, not the last one we made. This means we did a false
|
||||
* retransmit.
|
||||
*/
|
||||
if (rsm->r_flags & BBR_HAS_FIN) {
|
||||
/*
|
||||
* The sending of the FIN often is multiple sent when we
|
||||
* have everything outstanding ack'd. We ignore this case
|
||||
* since its over now.
|
||||
*/
|
||||
return;
|
||||
}
|
||||
if (rsm->r_flags & BBR_TLP) {
|
||||
/*
|
||||
* We expect TLP's to have this occur often
|
||||
*/
|
||||
bbr->rc_tlp_rtx_out = 0;
|
||||
return;
|
||||
}
|
||||
if (ack_type != BBR_CUM_ACKED) {
|
||||
/*
|
||||
* If it was not a cum-ack we
|
||||
* don't really know for sure since
|
||||
* the timestamp could be from some
|
||||
* other transmission.
|
||||
*/
|
||||
return;
|
||||
}
|
||||
|
||||
if (rsm->r_flags & BBR_WAS_SACKPASS) {
|
||||
/*
|
||||
* We retransmitted based on a sack and the earlier
|
||||
* retransmission ack'd it - re-ordering is occuring.
|
||||
*/
|
||||
BBR_STAT_INC(bbr_reorder_seen);
|
||||
bbr->r_ctl.rc_reorder_ts = cts;
|
||||
}
|
||||
/* Back down the loss count */
|
||||
if (rsm->r_flags & BBR_MARKED_LOST) {
|
||||
bbr->r_ctl.rc_lost -= rsm->r_end - rsm->r_start;
|
||||
bbr->r_ctl.rc_lost_bytes -= rsm->r_end - rsm->r_start;
|
||||
rsm->r_flags &= ~BBR_MARKED_LOST;
|
||||
if (SEQ_GT(bbr->r_ctl.rc_lt_lost, bbr->r_ctl.rc_lost))
|
||||
/* LT sampling also needs adjustment */
|
||||
bbr->r_ctl.rc_lt_lost = bbr->r_ctl.rc_lost;
|
||||
}
|
||||
/***** RRS HERE ************************/
|
||||
/* Do we need to do this??? */
|
||||
/* bbr_reset_lt_bw_sampling(bbr, cts); */
|
||||
/***** RRS HERE ************************/
|
||||
BBR_STAT_INC(bbr_badfr);
|
||||
BBR_STAT_ADD(bbr_badfr_bytes, (rsm->r_end - rsm->r_start));
|
||||
}
|
||||
|
||||
static void
|
||||
bbr_set_reduced_rtt(struct tcp_bbr *bbr, uint32_t cts, uint32_t line)
|
||||
{
|
||||
@ -6869,6 +6814,10 @@ bbr_update_rtt(struct tcpcb *tp, struct tcp_bbr *bbr,
|
||||
/* Already done */
|
||||
return (0);
|
||||
}
|
||||
if (rsm->r_rtt_not_allowed) {
|
||||
/* Not allowed */
|
||||
return (0);
|
||||
}
|
||||
if (rsm->r_rtr_cnt == 1) {
|
||||
/*
|
||||
* Only one transmit. Hopefully the normal case.
|
||||
@ -6926,7 +6875,7 @@ bbr_update_rtt(struct tcpcb *tp, struct tcp_bbr *bbr,
|
||||
rsm->r_tim_lastsent[i], ack_type, to);
|
||||
if ((i + 1) < rsm->r_rtr_cnt) {
|
||||
/* Likely */
|
||||
bbr_earlier_retran(tp, bbr, rsm, t, cts, ack_type);
|
||||
return (0);
|
||||
} else if (rsm->r_flags & BBR_TLP) {
|
||||
bbr->rc_tlp_rtx_out = 0;
|
||||
}
|
||||
@ -6974,7 +6923,7 @@ bbr_update_rtt(struct tcpcb *tp, struct tcp_bbr *bbr,
|
||||
t = 1;
|
||||
bbr_update_bbr_info(bbr, rsm, t, cts, to->to_tsecr, uts, BBR_RTT_BY_EARLIER_RET,
|
||||
rsm->r_tim_lastsent[i], ack_type, to);
|
||||
bbr_earlier_retran(tp, bbr, rsm, t, cts, ack_type);
|
||||
return (0);
|
||||
} else {
|
||||
/*
|
||||
* Too many prior transmissions, just
|
||||
@ -10207,7 +10156,7 @@ bbr_init(struct tcpcb *tp)
|
||||
tp->t_fb_ptr = NULL;
|
||||
return (ENOMEM);
|
||||
}
|
||||
rsm->r_flags = BBR_OVERMAX;
|
||||
rsm->r_rtt_not_allowed = 1;
|
||||
rsm->r_tim_lastsent[0] = cts;
|
||||
rsm->r_rtr_cnt = 1;
|
||||
rsm->r_rtr_bytes = 0;
|
||||
@ -10320,6 +10269,10 @@ bbr_fini(struct tcpcb *tp, int32_t tcb_is_purged)
|
||||
counter_u64_add(bbr_flows_whdwr_pacing, -1);
|
||||
else
|
||||
counter_u64_add(bbr_flows_nohdwr_pacing, -1);
|
||||
if (bbr->r_ctl.crte != NULL) {
|
||||
tcp_rel_pacing_rate(bbr->r_ctl.crte, tp);
|
||||
bbr->r_ctl.crte = NULL;
|
||||
}
|
||||
rsm = TAILQ_FIRST(&bbr->r_ctl.rc_map);
|
||||
while (rsm) {
|
||||
TAILQ_REMOVE(&bbr->r_ctl.rc_map, rsm, r_next);
|
||||
@ -13463,15 +13416,6 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
|
||||
th->th_seq = htonl(tp->snd_max);
|
||||
bbr_seq = tp->snd_max;
|
||||
}
|
||||
} else if (flags & TH_RST) {
|
||||
/*
|
||||
* For a Reset send the last cum ack in sequence
|
||||
* (this like any other choice may still generate a
|
||||
* challenge ack, if a ack-update packet is in
|
||||
* flight).
|
||||
*/
|
||||
th->th_seq = htonl(tp->snd_una);
|
||||
bbr_seq = tp->snd_una;
|
||||
} else {
|
||||
/*
|
||||
* len == 0 and not persist we use snd_max, sending
|
||||
@ -14536,9 +14480,9 @@ bbr_set_sockopt(struct socket *so, struct sockopt *sopt,
|
||||
} else {
|
||||
bbr->bbr_hdw_pace_ena = 0;
|
||||
#ifdef RATELIMIT
|
||||
if (bbr->bbr_hdrw_pacing) {
|
||||
bbr->bbr_hdrw_pacing = 0;
|
||||
in_pcbdetach_txrtlmt(bbr->rc_inp);
|
||||
if (bbr->r_ctl.crte != NULL) {
|
||||
tcp_rel_pacing_rate(bbr->r_ctl.crte, tp);
|
||||
bbr->r_ctl.crte = NULL;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -96,6 +96,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <netinet/tcp_var.h>
|
||||
#include <netinet/tcpip.h>
|
||||
#include <netinet/tcp_hpts.h>
|
||||
#include <netinet/tcp_lro.h>
|
||||
#include <netinet/cc/cc.h>
|
||||
#include <netinet/tcp_log_buf.h>
|
||||
#ifdef TCPDEBUG
|
||||
@ -161,6 +162,130 @@ ctf_get_opt_tls_size(struct socket *so, uint32_t rwnd)
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
ctf_get_enet_type(struct ifnet *ifp, struct mbuf *m)
|
||||
{
|
||||
struct ether_header *eh;
|
||||
struct tcphdr *th;
|
||||
#ifdef INET6
|
||||
struct ip6_hdr *ip6 = NULL; /* Keep compiler happy. */
|
||||
#endif
|
||||
#ifdef INET
|
||||
struct ip *ip = NULL; /* Keep compiler happy. */
|
||||
#endif
|
||||
int32_t tlen;
|
||||
uint16_t drop_hdrlen;
|
||||
uint16_t etype;
|
||||
uint8_t iptos;
|
||||
|
||||
/* Is it the easy way? */
|
||||
if (m->m_flags & M_LRO_EHDRSTRP)
|
||||
return (m->m_pkthdr.lro_etype);
|
||||
/*
|
||||
* Ok this is the old style call, the ethernet header is here.
|
||||
* This also means no checksum or BPF were done. This
|
||||
* can happen if the race to setup the inp fails and
|
||||
* LRO sees no INP at packet input, but by the time
|
||||
* we queue the packets an INP gets there. Its rare
|
||||
* but it can occur so we will handle it. Note that
|
||||
* this means duplicated work but with the rarity of it
|
||||
* its not worth worrying about.
|
||||
*/
|
||||
/* Let the BPF see the packet */
|
||||
if (bpf_peers_present(ifp->if_bpf))
|
||||
ETHER_BPF_MTAP(ifp, m);
|
||||
/* Now the csum */
|
||||
eh = mtod(m, struct ether_header *);
|
||||
etype = ntohs(eh->ether_type);
|
||||
m_adj(m, sizeof(*eh));
|
||||
switch (etype) {
|
||||
#ifdef INET6
|
||||
case ETHERTYPE_IPV6:
|
||||
{
|
||||
if (m->m_len < (sizeof(*ip6) + sizeof(*th))) {
|
||||
m = m_pullup(m, sizeof(*ip6) + sizeof(*th));
|
||||
if (m == NULL) {
|
||||
KMOD_TCPSTAT_INC(tcps_rcvshort);
|
||||
m_freem(m);
|
||||
return (-1);
|
||||
}
|
||||
}
|
||||
ip6 = (struct ip6_hdr *)(eh + 1);
|
||||
th = (struct tcphdr *)(ip6 + 1);
|
||||
drop_hdrlen = sizeof(*ip6);
|
||||
tlen = ntohs(ip6->ip6_plen);
|
||||
if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) {
|
||||
if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
|
||||
th->th_sum = m->m_pkthdr.csum_data;
|
||||
else
|
||||
th->th_sum = in6_cksum_pseudo(ip6, tlen,
|
||||
IPPROTO_TCP,
|
||||
m->m_pkthdr.csum_data);
|
||||
th->th_sum ^= 0xffff;
|
||||
} else
|
||||
th->th_sum = in6_cksum(m, IPPROTO_TCP, drop_hdrlen, tlen);
|
||||
if (th->th_sum) {
|
||||
KMOD_TCPSTAT_INC(tcps_rcvbadsum);
|
||||
m_freem(m);
|
||||
return (-1);
|
||||
}
|
||||
return (etype);
|
||||
}
|
||||
#endif
|
||||
#ifdef INET
|
||||
case ETHERTYPE_IP:
|
||||
{
|
||||
if (m->m_len < sizeof (struct tcpiphdr)) {
|
||||
m = m_pullup(m, sizeof (struct tcpiphdr));
|
||||
if (m == NULL) {
|
||||
KMOD_TCPSTAT_INC(tcps_rcvshort);
|
||||
m_freem(m);
|
||||
return (-1);
|
||||
}
|
||||
}
|
||||
ip = (struct ip *)(eh + 1);
|
||||
th = (struct tcphdr *)(ip + 1);
|
||||
drop_hdrlen = sizeof(*ip);
|
||||
iptos = ip->ip_tos;
|
||||
tlen = ntohs(ip->ip_len) - sizeof(struct ip);
|
||||
if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
|
||||
if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
|
||||
th->th_sum = m->m_pkthdr.csum_data;
|
||||
else
|
||||
th->th_sum = in_pseudo(ip->ip_src.s_addr,
|
||||
ip->ip_dst.s_addr,
|
||||
htonl(m->m_pkthdr.csum_data + tlen + IPPROTO_TCP));
|
||||
th->th_sum ^= 0xffff;
|
||||
} else {
|
||||
int len;
|
||||
struct ipovly *ipov = (struct ipovly *)ip;
|
||||
/*
|
||||
* Checksum extended TCP header and data.
|
||||
*/
|
||||
len = drop_hdrlen + tlen;
|
||||
bzero(ipov->ih_x1, sizeof(ipov->ih_x1));
|
||||
ipov->ih_len = htons(tlen);
|
||||
th->th_sum = in_cksum(m, len);
|
||||
/* Reset length for SDT probes. */
|
||||
ip->ip_len = htons(len);
|
||||
/* Reset TOS bits */
|
||||
ip->ip_tos = iptos;
|
||||
/* Re-initialization for later version check */
|
||||
ip->ip_v = IPVERSION;
|
||||
ip->ip_hl = sizeof(*ip) >> 2;
|
||||
}
|
||||
if (th->th_sum) {
|
||||
KMOD_TCPSTAT_INC(tcps_rcvbadsum);
|
||||
m_freem(m);
|
||||
return (-1);
|
||||
}
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
return (etype);
|
||||
}
|
||||
|
||||
/*
|
||||
* The function ctf_process_inbound_raw() is used by
|
||||
* transport developers to do the steps needed to
|
||||
@ -170,6 +295,7 @@ ctf_get_opt_tls_size(struct socket *so, uint32_t rwnd)
|
||||
* - INP_SUPPORTS_MBUFQ
|
||||
* - INP_MBUF_QUEUE_READY
|
||||
* - INP_DONT_SACK_QUEUE
|
||||
* - INP_MBUF_ACKCMP
|
||||
*
|
||||
* These flags help control how LRO will deliver
|
||||
* packets to the transport. You first set in inp_flags2
|
||||
@ -186,6 +312,18 @@ ctf_get_opt_tls_size(struct socket *so, uint32_t rwnd)
|
||||
* In some transport designs this is important since knowing
|
||||
* the actual time we got the packet is useful information.
|
||||
*
|
||||
* A new special type of mbuf may also be supported by the transport
|
||||
* if it has set the INP_MBUF_ACKCMP flag. If its set, LRO will
|
||||
* possibly create a M_ACKCMP type mbuf. This is a mbuf with
|
||||
* an array of "acks". One thing also to note is that when this
|
||||
* occurs a subsequent LRO may find at the back of the untouched
|
||||
* mbuf queue chain a M_ACKCMP and append on to it. This means
|
||||
* that until the transport pulls in the mbuf chain queued
|
||||
* for it more ack's may get on the mbufs that were already
|
||||
* delivered. There currently is a limit of 6 acks condensed
|
||||
* into 1 mbuf which means often when this is occuring, we
|
||||
* don't get that effect but it does happen.
|
||||
*
|
||||
* Now there are some interesting Caveats that the transport
|
||||
* designer needs to take into account when using this feature.
|
||||
*
|
||||
@ -247,7 +385,6 @@ ctf_process_inbound_raw(struct tcpcb *tp, struct socket *so, struct mbuf *m, int
|
||||
* shipped in, the tcb has been destroyed (or about to be destroyed).
|
||||
*/
|
||||
struct mbuf *m_save;
|
||||
struct ether_header *eh;
|
||||
struct tcphdr *th;
|
||||
#ifdef INET6
|
||||
struct ip6_hdr *ip6 = NULL; /* Keep compiler happy. */
|
||||
@ -257,20 +394,18 @@ ctf_process_inbound_raw(struct tcpcb *tp, struct socket *so, struct mbuf *m, int
|
||||
#endif
|
||||
struct ifnet *ifp;
|
||||
struct timeval tv;
|
||||
struct inpcb *inp;
|
||||
int32_t retval, nxt_pkt, tlen, off;
|
||||
uint16_t etype;
|
||||
int etype = 0;
|
||||
uint16_t drop_hdrlen;
|
||||
uint8_t iptos, no_vn=0, bpf_req=0;
|
||||
uint8_t iptos, no_vn=0;
|
||||
|
||||
NET_EPOCH_ASSERT();
|
||||
|
||||
if (m && m->m_pkthdr.rcvif)
|
||||
ifp = m->m_pkthdr.rcvif;
|
||||
if (m)
|
||||
ifp = m_rcvif(m);
|
||||
else
|
||||
ifp = NULL;
|
||||
if (ifp) {
|
||||
bpf_req = bpf_peers_present(ifp->if_bpf);
|
||||
} else {
|
||||
if (ifp == NULL) {
|
||||
/*
|
||||
* We probably should not work around
|
||||
* but kassert, since lro alwasy sets rcvif.
|
||||
@ -280,147 +415,86 @@ ctf_process_inbound_raw(struct tcpcb *tp, struct socket *so, struct mbuf *m, int
|
||||
}
|
||||
CURVNET_SET(ifp->if_vnet);
|
||||
skip_vnet:
|
||||
tcp_get_usecs(&tv);
|
||||
while (m) {
|
||||
m_save = m->m_nextpkt;
|
||||
m->m_nextpkt = NULL;
|
||||
/* Now lets get the ether header */
|
||||
eh = mtod(m, struct ether_header *);
|
||||
etype = ntohs(eh->ether_type);
|
||||
/* Let the BPF see the packet */
|
||||
if (bpf_req && ifp)
|
||||
ETHER_BPF_MTAP(ifp, m);
|
||||
m_adj(m, sizeof(*eh));
|
||||
/* Trim off the ethernet header */
|
||||
switch (etype) {
|
||||
if ((m->m_flags & M_ACKCMP) == 0) {
|
||||
/* Now lets get the ether header */
|
||||
etype = ctf_get_enet_type(ifp, m);
|
||||
if (etype == -1) {
|
||||
/* Skip this packet it was freed by checksum */
|
||||
goto skipped_pkt;
|
||||
}
|
||||
KASSERT(((etype == ETHERTYPE_IPV6) || (etype == ETHERTYPE_IP)),
|
||||
("tp:%p m:%p etype:0x%x -- not IP or IPv6", tp, m, etype));
|
||||
/* Trim off the ethernet header */
|
||||
switch (etype) {
|
||||
#ifdef INET6
|
||||
case ETHERTYPE_IPV6:
|
||||
{
|
||||
if (m->m_len < (sizeof(*ip6) + sizeof(*th))) {
|
||||
m = m_pullup(m, sizeof(*ip6) + sizeof(*th));
|
||||
if (m == NULL) {
|
||||
KMOD_TCPSTAT_INC(tcps_rcvshort);
|
||||
m_freem(m);
|
||||
goto skipped_pkt;
|
||||
}
|
||||
}
|
||||
ip6 = (struct ip6_hdr *)(eh + 1);
|
||||
th = (struct tcphdr *)(ip6 + 1);
|
||||
tlen = ntohs(ip6->ip6_plen);
|
||||
drop_hdrlen = sizeof(*ip6);
|
||||
if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) {
|
||||
if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
|
||||
th->th_sum = m->m_pkthdr.csum_data;
|
||||
else
|
||||
th->th_sum = in6_cksum_pseudo(ip6, tlen,
|
||||
IPPROTO_TCP, m->m_pkthdr.csum_data);
|
||||
th->th_sum ^= 0xffff;
|
||||
} else
|
||||
th->th_sum = in6_cksum(m, IPPROTO_TCP, drop_hdrlen, tlen);
|
||||
if (th->th_sum) {
|
||||
KMOD_TCPSTAT_INC(tcps_rcvbadsum);
|
||||
m_freem(m);
|
||||
goto skipped_pkt;
|
||||
}
|
||||
/*
|
||||
* Be proactive about unspecified IPv6 address in source.
|
||||
* As we use all-zero to indicate unbounded/unconnected pcb,
|
||||
* unspecified IPv6 address can be used to confuse us.
|
||||
*
|
||||
* Note that packets with unspecified IPv6 destination is
|
||||
* already dropped in ip6_input.
|
||||
*/
|
||||
if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
|
||||
/* XXX stat */
|
||||
m_freem(m);
|
||||
goto skipped_pkt;
|
||||
}
|
||||
iptos = IPV6_TRAFFIC_CLASS(ip6);
|
||||
break;
|
||||
}
|
||||
case ETHERTYPE_IPV6:
|
||||
ip6 = mtod(m, struct ip6_hdr *);
|
||||
th = (struct tcphdr *)(ip6 + 1);
|
||||
tlen = ntohs(ip6->ip6_plen);
|
||||
drop_hdrlen = sizeof(*ip6);
|
||||
iptos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
|
||||
break;
|
||||
#endif
|
||||
#ifdef INET
|
||||
case ETHERTYPE_IP:
|
||||
{
|
||||
if (m->m_len < sizeof (struct tcpiphdr)) {
|
||||
if ((m = m_pullup(m, sizeof (struct tcpiphdr)))
|
||||
== NULL) {
|
||||
KMOD_TCPSTAT_INC(tcps_rcvshort);
|
||||
m_freem(m);
|
||||
goto skipped_pkt;
|
||||
}
|
||||
}
|
||||
ip = (struct ip *)(eh + 1);
|
||||
th = (struct tcphdr *)(ip + 1);
|
||||
drop_hdrlen = sizeof(*ip);
|
||||
iptos = ip->ip_tos;
|
||||
tlen = ntohs(ip->ip_len) - sizeof(struct ip);
|
||||
if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
|
||||
if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
|
||||
th->th_sum = m->m_pkthdr.csum_data;
|
||||
else
|
||||
th->th_sum = in_pseudo(ip->ip_src.s_addr,
|
||||
ip->ip_dst.s_addr,
|
||||
htonl(m->m_pkthdr.csum_data + tlen +
|
||||
IPPROTO_TCP));
|
||||
th->th_sum ^= 0xffff;
|
||||
} else {
|
||||
int len;
|
||||
struct ipovly *ipov = (struct ipovly *)ip;
|
||||
/*
|
||||
* Checksum extended TCP header and data.
|
||||
*/
|
||||
len = drop_hdrlen + tlen;
|
||||
bzero(ipov->ih_x1, sizeof(ipov->ih_x1));
|
||||
ipov->ih_len = htons(tlen);
|
||||
th->th_sum = in_cksum(m, len);
|
||||
/* Reset length for SDT probes. */
|
||||
ip->ip_len = htons(len);
|
||||
/* Reset TOS bits */
|
||||
ip->ip_tos = iptos;
|
||||
/* Re-initialization for later version check */
|
||||
ip->ip_v = IPVERSION;
|
||||
ip->ip_hl = sizeof(*ip) >> 2;
|
||||
}
|
||||
if (th->th_sum) {
|
||||
KMOD_TCPSTAT_INC(tcps_rcvbadsum);
|
||||
m_freem(m);
|
||||
goto skipped_pkt;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ETHERTYPE_IP:
|
||||
ip = mtod(m, struct ip *);
|
||||
th = (struct tcphdr *)(ip + 1);
|
||||
drop_hdrlen = sizeof(*ip);
|
||||
iptos = ip->ip_tos;
|
||||
tlen = ntohs(ip->ip_len) - sizeof(struct ip);
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
/*
|
||||
* Convert TCP protocol specific fields to host format.
|
||||
*/
|
||||
tcp_fields_to_host(th);
|
||||
|
||||
off = th->th_off << 2;
|
||||
if (off < sizeof (struct tcphdr) || off > tlen) {
|
||||
KMOD_TCPSTAT_INC(tcps_rcvbadoff);
|
||||
} /* end switch */
|
||||
/*
|
||||
* Convert TCP protocol specific fields to host format.
|
||||
*/
|
||||
tcp_fields_to_host(th);
|
||||
off = th->th_off << 2;
|
||||
if (off < sizeof (struct tcphdr) || off > tlen) {
|
||||
printf("off:%d < hdrlen:%zu || > tlen:%u -- dump\n",
|
||||
off,
|
||||
sizeof(struct tcphdr),
|
||||
tlen);
|
||||
KMOD_TCPSTAT_INC(tcps_rcvbadoff);
|
||||
m_freem(m);
|
||||
goto skipped_pkt;
|
||||
}
|
||||
tlen -= off;
|
||||
drop_hdrlen += off;
|
||||
/*
|
||||
* Now lets setup the timeval to be when we should
|
||||
* have been called (if we can).
|
||||
*/
|
||||
m->m_pkthdr.lro_nsegs = 1;
|
||||
if (m->m_flags & M_TSTMP_LRO) {
|
||||
tv.tv_sec = m->m_pkthdr.rcv_tstmp /1000000000;
|
||||
tv.tv_usec = (m->m_pkthdr.rcv_tstmp % 1000000000)/1000;
|
||||
}
|
||||
tlen -= off;
|
||||
drop_hdrlen += off;
|
||||
/*
|
||||
* Now lets setup the timeval to be when we should
|
||||
* have been called (if we can).
|
||||
*/
|
||||
m->m_pkthdr.lro_nsegs = 1;
|
||||
/* Now what about next packet? */
|
||||
} else {
|
||||
/* Should not be should we kassert instead? */
|
||||
tcp_get_usecs(&tv);
|
||||
/*
|
||||
* This mbuf is an array of acks that have
|
||||
* been compressed. We assert the inp has
|
||||
* the flag set to enable this!
|
||||
*/
|
||||
KASSERT((tp->t_inpcb->inp_flags2 & INP_MBUF_ACKCMP),
|
||||
("tp:%p inp:%p no INP_MBUF_ACKCMP flags?", tp, tp->t_inpcb));
|
||||
tlen = 0;
|
||||
drop_hdrlen = 0;
|
||||
th = NULL;
|
||||
iptos = 0;
|
||||
}
|
||||
/* Now what about next packet? */
|
||||
tcp_get_usecs(&tv);
|
||||
if (m_save || has_pkt)
|
||||
nxt_pkt = 1;
|
||||
else
|
||||
nxt_pkt = 0;
|
||||
KMOD_TCPSTAT_INC(tcps_rcvtotal);
|
||||
if ((m->m_flags & M_ACKCMP) == 0)
|
||||
KMOD_TCPSTAT_INC(tcps_rcvtotal);
|
||||
else
|
||||
KMOD_TCPSTAT_ADD(tcps_rcvtotal, (m->m_len / sizeof(struct tcp_ackent)));
|
||||
inp = tp->t_inpcb;
|
||||
INP_WLOCK_ASSERT(inp);
|
||||
retval = (*tp->t_fb->tfb_do_segment_nounlock)(m, th, so, tp, drop_hdrlen, tlen,
|
||||
iptos, nxt_pkt, &tv);
|
||||
if (retval) {
|
||||
@ -434,6 +508,7 @@ ctf_process_inbound_raw(struct tcpcb *tp, struct socket *so, struct mbuf *m, int
|
||||
}
|
||||
if (no_vn == 0)
|
||||
CURVNET_RESTORE();
|
||||
INP_UNLOCK_ASSERT(inp);
|
||||
return(retval);
|
||||
}
|
||||
skipped_pkt:
|
||||
@ -482,11 +557,6 @@ ctf_flight_size(struct tcpcb *tp, uint32_t rc_sacked)
|
||||
if (rc_sacked <= ctf_outstanding(tp))
|
||||
return(ctf_outstanding(tp) - rc_sacked);
|
||||
else {
|
||||
/* TSNH */
|
||||
#ifdef INVARIANTS
|
||||
panic("tp:%p rc_sacked:%d > out:%d",
|
||||
tp, rc_sacked, ctf_outstanding(tp));
|
||||
#endif
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
@ -502,6 +572,36 @@ ctf_do_dropwithreset(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th,
|
||||
tcp_dropwithreset(m, th, NULL, tlen, rstreason);
|
||||
}
|
||||
|
||||
void
|
||||
ctf_ack_war_checks(struct tcpcb *tp, uint32_t *ts, uint32_t *cnt)
|
||||
{
|
||||
if ((ts != NULL) && (cnt != NULL) &&
|
||||
(tcp_ack_war_time_window > 0) &&
|
||||
(tcp_ack_war_cnt > 0)) {
|
||||
/* We are possibly doing ack war prevention */
|
||||
uint32_t cts;
|
||||
|
||||
/*
|
||||
* We use a msec tick here which gives us
|
||||
* roughly 49 days. We don't need the
|
||||
* precision of a microsecond timestamp which
|
||||
* would only give us hours.
|
||||
*/
|
||||
cts = tcp_ts_getticks();
|
||||
if (TSTMP_LT((*ts), cts)) {
|
||||
/* Timestamp is in the past */
|
||||
*cnt = 0;
|
||||
*ts = (cts + tcp_ack_war_time_window);
|
||||
}
|
||||
if (*cnt < tcp_ack_war_cnt) {
|
||||
*cnt = (*cnt + 1);
|
||||
tp->t_flags |= TF_ACKNOW;
|
||||
} else
|
||||
tp->t_flags &= ~TF_ACKNOW;
|
||||
} else
|
||||
tp->t_flags |= TF_ACKNOW;
|
||||
}
|
||||
|
||||
/*
|
||||
* ctf_drop_checks returns 1 for you should not proceed. It places
|
||||
* in ret_val what should be returned 1/0 by the caller. The 1 indicates
|
||||
@ -509,7 +609,10 @@ ctf_do_dropwithreset(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th,
|
||||
* TCB is still valid and locked.
|
||||
*/
|
||||
int
|
||||
ctf_drop_checks(struct tcpopt *to, struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int32_t * tlenp, int32_t * thf, int32_t * drop_hdrlen, int32_t * ret_val)
|
||||
_ctf_drop_checks(struct tcpopt *to, struct mbuf *m, struct tcphdr *th,
|
||||
struct tcpcb *tp, int32_t *tlenp,
|
||||
int32_t *thf, int32_t *drop_hdrlen, int32_t *ret_val,
|
||||
uint32_t *ts, uint32_t *cnt)
|
||||
{
|
||||
int32_t todrop;
|
||||
int32_t thflags;
|
||||
@ -543,7 +646,7 @@ ctf_drop_checks(struct tcpopt *to, struct mbuf *m, struct tcphdr *th, struct tcp
|
||||
* Send an ACK to resynchronize and drop any data.
|
||||
* But keep on processing for RST or ACK.
|
||||
*/
|
||||
tp->t_flags |= TF_ACKNOW;
|
||||
ctf_ack_war_checks(tp, ts, cnt);
|
||||
todrop = tlen;
|
||||
KMOD_TCPSTAT_INC(tcps_rcvduppack);
|
||||
KMOD_TCPSTAT_ADD(tcps_rcvdupbyte, todrop);
|
||||
@ -555,13 +658,14 @@ ctf_drop_checks(struct tcpopt *to, struct mbuf *m, struct tcphdr *th, struct tcp
|
||||
* DSACK - add SACK block for dropped range
|
||||
*/
|
||||
if ((todrop > 0) && (tp->t_flags & TF_SACK_PERMIT)) {
|
||||
tcp_update_sack_list(tp, th->th_seq,
|
||||
th->th_seq + todrop);
|
||||
/*
|
||||
* ACK now, as the next in-sequence segment
|
||||
* will clear the DSACK block again
|
||||
*/
|
||||
tp->t_flags |= TF_ACKNOW;
|
||||
ctf_ack_war_checks(tp, ts, cnt);
|
||||
if (tp->t_flags & TF_ACKNOW)
|
||||
tcp_update_sack_list(tp, th->th_seq,
|
||||
th->th_seq + todrop);
|
||||
}
|
||||
*drop_hdrlen += todrop; /* drop from the top afterwards */
|
||||
th->th_seq += todrop;
|
||||
@ -590,10 +694,10 @@ ctf_drop_checks(struct tcpopt *to, struct mbuf *m, struct tcphdr *th, struct tcp
|
||||
* ack.
|
||||
*/
|
||||
if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) {
|
||||
tp->t_flags |= TF_ACKNOW;
|
||||
ctf_ack_war_checks(tp, ts, cnt);
|
||||
KMOD_TCPSTAT_INC(tcps_rcvwinprobe);
|
||||
} else {
|
||||
ctf_do_dropafterack(m, tp, th, thflags, tlen, ret_val);
|
||||
__ctf_do_dropafterack(m, tp, th, thflags, tlen, ret_val, ts, cnt);
|
||||
return (1);
|
||||
}
|
||||
} else
|
||||
@ -614,7 +718,7 @@ ctf_drop_checks(struct tcpopt *to, struct mbuf *m, struct tcphdr *th, struct tcp
|
||||
* and valid.
|
||||
*/
|
||||
void
|
||||
ctf_do_dropafterack(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th, int32_t thflags, int32_t tlen, int32_t * ret_val)
|
||||
__ctf_do_dropafterack(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th, int32_t thflags, int32_t tlen, int32_t *ret_val, uint32_t *ts, uint32_t *cnt)
|
||||
{
|
||||
/*
|
||||
* Generate an ACK dropping incoming segment if it occupies sequence
|
||||
@ -638,7 +742,7 @@ ctf_do_dropafterack(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th, int32_t
|
||||
return;
|
||||
} else
|
||||
*ret_val = 0;
|
||||
tp->t_flags |= TF_ACKNOW;
|
||||
ctf_ack_war_checks(tp, ts, cnt);
|
||||
if (m)
|
||||
m_freem(m);
|
||||
}
|
||||
@ -671,7 +775,7 @@ ctf_process_rst(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcp
|
||||
*/
|
||||
int dropped = 0;
|
||||
|
||||
if ((SEQ_GEQ(th->th_seq, (tp->last_ack_sent - 1)) &&
|
||||
if ((SEQ_GEQ(th->th_seq, tp->last_ack_sent) &&
|
||||
SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) ||
|
||||
(tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) {
|
||||
KASSERT(tp->t_state != TCPS_SYN_SENT,
|
||||
@ -680,8 +784,7 @@ ctf_process_rst(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcp
|
||||
|
||||
if (V_tcp_insecure_rst ||
|
||||
(tp->last_ack_sent == th->th_seq) ||
|
||||
(tp->rcv_nxt == th->th_seq) ||
|
||||
((tp->last_ack_sent - 1) == th->th_seq)) {
|
||||
(tp->rcv_nxt == th->th_seq)) {
|
||||
KMOD_TCPSTAT_INC(tcps_drops);
|
||||
/* Drop the connection. */
|
||||
switch (tp->t_state) {
|
||||
@ -748,7 +851,7 @@ ctf_challenge_ack(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int32_t *
|
||||
}
|
||||
|
||||
/*
|
||||
* bbr_ts_check returns 1 for you should not proceed, the state
|
||||
* ctf_ts_check returns 1 for you should not proceed, the state
|
||||
* machine should return. It places in ret_val what should
|
||||
* be returned 1/0 by the caller (hpts_do_segment). The 1 indicates
|
||||
* that the TCB is unlocked and probably dropped. The 0 indicates the
|
||||
@ -786,6 +889,32 @@ ctf_ts_check(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp,
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
ctf_ts_check_ac(struct tcpcb *tp, int32_t thflags)
|
||||
{
|
||||
|
||||
if (tcp_ts_getticks() - tp->ts_recent_age > TCP_PAWS_IDLE) {
|
||||
/*
|
||||
* Invalidate ts_recent. If this segment updates ts_recent,
|
||||
* the age will be reset later and ts_recent will get a
|
||||
* valid value. If it does not, setting ts_recent to zero
|
||||
* will at least satisfy the requirement that zero be placed
|
||||
* in the timestamp echo reply when ts_recent isn't valid.
|
||||
* The age isn't reset until we get a valid ts_recent
|
||||
* because we don't want out-of-order segments to be dropped
|
||||
* when ts_recent is old.
|
||||
*/
|
||||
tp->ts_recent = 0;
|
||||
} else {
|
||||
KMOD_TCPSTAT_INC(tcps_rcvduppack);
|
||||
KMOD_TCPSTAT_INC(tcps_pawsdrop);
|
||||
return (1);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void
|
||||
ctf_calc_rwin(struct socket *so, struct tcpcb *tp)
|
||||
{
|
||||
@ -817,45 +946,7 @@ ctf_do_dropwithreset_conn(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th,
|
||||
uint32_t
|
||||
ctf_fixed_maxseg(struct tcpcb *tp)
|
||||
{
|
||||
int optlen;
|
||||
|
||||
if (tp->t_flags & TF_NOOPT)
|
||||
return (tp->t_maxseg);
|
||||
|
||||
/*
|
||||
* Here we have a simplified code from tcp_addoptions(),
|
||||
* without a proper loop, and having most of paddings hardcoded.
|
||||
* We only consider fixed options that we would send every
|
||||
* time I.e. SACK is not considered.
|
||||
*
|
||||
*/
|
||||
#define PAD(len) ((((len) / 4) + !!((len) % 4)) * 4)
|
||||
if (TCPS_HAVEESTABLISHED(tp->t_state)) {
|
||||
if (tp->t_flags & TF_RCVD_TSTMP)
|
||||
optlen = TCPOLEN_TSTAMP_APPA;
|
||||
else
|
||||
optlen = 0;
|
||||
#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
|
||||
if (tp->t_flags & TF_SIGNATURE)
|
||||
optlen += PAD(TCPOLEN_SIGNATURE);
|
||||
#endif
|
||||
} else {
|
||||
if (tp->t_flags & TF_REQ_TSTMP)
|
||||
optlen = TCPOLEN_TSTAMP_APPA;
|
||||
else
|
||||
optlen = PAD(TCPOLEN_MAXSEG);
|
||||
if (tp->t_flags & TF_REQ_SCALE)
|
||||
optlen += PAD(TCPOLEN_WINDOW);
|
||||
#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
|
||||
if (tp->t_flags & TF_SIGNATURE)
|
||||
optlen += PAD(TCPOLEN_SIGNATURE);
|
||||
#endif
|
||||
if (tp->t_flags & TF_SACK_PERMIT)
|
||||
optlen += PAD(TCPOLEN_SACK_PERMITTED);
|
||||
}
|
||||
#undef PAD
|
||||
optlen = min(optlen, TCP_MAXOLEN);
|
||||
return (tp->t_maxseg - optlen);
|
||||
return (tcp_fixed_maxseg(tp));
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -98,12 +98,20 @@ ctf_do_queued_segments(struct socket *so, struct tcpcb *tp, int have_pkt);
|
||||
uint32_t ctf_outstanding(struct tcpcb *tp);
|
||||
uint32_t ctf_flight_size(struct tcpcb *tp, uint32_t rc_sacked);
|
||||
int
|
||||
ctf_drop_checks(struct tcpopt *to, struct mbuf *m,
|
||||
struct tcphdr *th, struct tcpcb *tp, int32_t * tlenp, int32_t * thf,
|
||||
int32_t * drop_hdrlen, int32_t * ret_val);
|
||||
_ctf_drop_checks(struct tcpopt *to, struct mbuf *m, struct tcphdr *th,
|
||||
struct tcpcb *tp, int32_t *tlenp,
|
||||
int32_t *thf, int32_t *drop_hdrlen, int32_t *ret_val,
|
||||
uint32_t *ts, uint32_t *cnt);
|
||||
void ctf_ack_war_checks(struct tcpcb *tp, uint32_t *ts, uint32_t *cnt);
|
||||
#define ctf_drop_checks(a, b, c, d, e, f, g, h) _ctf_drop_checks(a, b, c, d, e, f, g, h, NULL, NULL)
|
||||
|
||||
void
|
||||
ctf_do_dropafterack(struct mbuf *m, struct tcpcb *tp,
|
||||
struct tcphdr *th, int32_t thflags, int32_t tlen, int32_t * ret_val);
|
||||
__ctf_do_dropafterack(struct mbuf *m, struct tcpcb *tp,
|
||||
struct tcphdr *th, int32_t thflags, int32_t tlen,
|
||||
int32_t *ret_val, uint32_t *ts, uint32_t *cnt);
|
||||
|
||||
#define ctf_do_dropafterack(a, b, c, d, e, f) __ctf_do_dropafterack(a, b, c, d, e, f, NULL, NULL)
|
||||
|
||||
void
|
||||
ctf_do_dropwithreset(struct mbuf *m, struct tcpcb *tp,
|
||||
struct tcphdr *th, int32_t rstreason, int32_t tlen);
|
||||
@ -122,6 +130,9 @@ int
|
||||
ctf_ts_check(struct mbuf *m, struct tcphdr *th,
|
||||
struct tcpcb *tp, int32_t tlen, int32_t thflags, int32_t * ret_val);
|
||||
|
||||
int
|
||||
ctf_ts_check_ac(struct tcpcb *tp, int32_t thflags);
|
||||
|
||||
void
|
||||
ctf_calc_rwin(struct socket *so, struct tcpcb *tp);
|
||||
|
||||
|
@ -71,7 +71,7 @@ struct bbr_sendmap {
|
||||
uint32_t r_del_time; /* The time of the last delivery update */
|
||||
uint8_t r_rtr_cnt:4, /* Retran count, index this -1 to get time
|
||||
* sent */
|
||||
unused_bit:1,
|
||||
r_rtt_not_allowed:1, /* No rtt measurement allowed */
|
||||
r_is_drain:1, /* In a draining cycle */
|
||||
r_app_limited:1,/* We went app limited */
|
||||
r_ts_valid:1; /* Timestamp field is valid (r_del_ack_ts) */
|
||||
@ -588,9 +588,9 @@ struct bbr_control {
|
||||
|
||||
uint32_t rc_reorder_ts; /* Last time we saw reordering Lock(a) */
|
||||
uint32_t rc_init_rwnd; /* Initial rwnd when we transitioned */
|
||||
/*- ---
|
||||
/*- ---
|
||||
* used only initial and close
|
||||
*/
|
||||
*/
|
||||
uint32_t rc_high_rwnd; /* Highest rwnd seen */
|
||||
uint32_t rc_lowest_rtt; /* Smallest RTT we have seen */
|
||||
|
||||
|
@ -29,7 +29,7 @@
|
||||
#define _NETINET_TCP_RACK_H_
|
||||
|
||||
#define RACK_ACKED 0x0001/* The remote endpoint acked this */
|
||||
#define RACK_TO_MIXED 0x0002/* A timeout occurred that mixed the send order - not used */
|
||||
#define RACK_TO_REXT 0x0002/* A timeout occured on this sendmap entry */
|
||||
#define RACK_DEFERRED 0x0004/* We can't use this for RTT calc - not used */
|
||||
#define RACK_OVERMAX 0x0008/* We have more retran's then we can fit */
|
||||
#define RACK_SACK_PASSED 0x0010/* A sack was done above this block */
|
||||
@ -39,37 +39,94 @@
|
||||
#define RACK_RWND_COLLAPSED 0x0100/* The peer collapsed the rwnd on the segment */
|
||||
#define RACK_APP_LIMITED 0x0200/* We went app limited after this send */
|
||||
#define RACK_WAS_ACKED 0x0400/* a RTO undid the ack, but it already had a rtt calc done */
|
||||
#define RACK_HAS_SIN 0x0800/* SIN is on this guy */
|
||||
#define RACK_HAS_SYN 0x0800/* SYN is on this guy */
|
||||
#define RACK_SENT_W_DSACK 0x1000/* Sent with a dsack */
|
||||
#define RACK_SENT_SP 0x2000/* sent in slow path */
|
||||
#define RACK_SENT_FP 0x4000/* sent in fast path */
|
||||
#define RACK_HAD_PUSH 0x8000/* Push was sent on original send */
|
||||
#define RACK_NUM_OF_RETRANS 3
|
||||
|
||||
#define RACK_INITIAL_RTO 1000 /* 1 second in milli seconds */
|
||||
#define RACK_INITIAL_RTO 1000000 /* 1 second in microseconds */
|
||||
|
||||
#define RACK_REQ_AVG 4 /* Must be less than 256 */
|
||||
#define RACK_REQ_AVG 3 /* Must be less than 256 */
|
||||
|
||||
struct rack_sendmap {
|
||||
TAILQ_ENTRY(rack_sendmap) r_tnext; /* Time of transmit based next */
|
||||
uint32_t r_start; /* Sequence number of the segment */
|
||||
uint32_t r_end; /* End seq, this is 1 beyond actually */
|
||||
TAILQ_ENTRY(rack_sendmap) r_tnext; /* Time of transmit based next */
|
||||
RB_ENTRY(rack_sendmap) r_next; /* RB Tree next */
|
||||
uint32_t r_rtr_bytes; /* How many bytes have been retransmitted */
|
||||
uint16_t r_rtr_cnt; /* Retran count, index this -1 to get time
|
||||
* sent */
|
||||
uint16_t r_flags; /* Flags as defined above */
|
||||
uint32_t r_tim_lastsent[RACK_NUM_OF_RETRANS];
|
||||
uint32_t usec_orig_send; /* time of orginal send in useconds */
|
||||
struct mbuf *m;
|
||||
uint32_t soff;
|
||||
uint32_t orig_m_len;
|
||||
uint32_t r_nseq_appl; /* If this one is app limited, this is the nxt seq limited */
|
||||
uint32_t r_ack_arrival; /* This is the time of ack-arrival (if SACK'd) */
|
||||
uint8_t r_dupack; /* Dup ack count */
|
||||
uint8_t r_in_tmap; /* Flag to see if its in the r_tnext array */
|
||||
uint8_t r_limit_type; /* is this entry counted against a limit? */
|
||||
uint8_t r_just_ret : 1, /* After sending, the next pkt was just returned, i.e. limited */
|
||||
r_one_out_nr : 1, /* Special case 1 outstanding and not in recovery */
|
||||
r_avail : 6;
|
||||
uint8_t r_resv[36];
|
||||
r_no_rtt_allowed : 1, /* No rtt measurement allowed */
|
||||
r_avail : 5;
|
||||
uint64_t r_tim_lastsent[RACK_NUM_OF_RETRANS];
|
||||
uint64_t r_ack_arrival; /* This is the time of ack-arrival (if SACK'd) */
|
||||
RB_ENTRY(rack_sendmap) r_next; /* RB Tree next */
|
||||
};
|
||||
|
||||
struct deferred_opt_list {
|
||||
TAILQ_ENTRY(deferred_opt_list) next;
|
||||
int optname;
|
||||
uint64_t optval;
|
||||
};
|
||||
|
||||
/*
|
||||
* Timestamps in the rack sendmap are now moving to be
|
||||
* uint64_t's. This means that if you want a uint32_t
|
||||
* usec timestamp (the old usecond timestamp) you simply have
|
||||
* to cast it to uint32_t. The reason we do this is not for
|
||||
* wrap, but we need to get back, at times, to the millisecond
|
||||
* timestamp that is used in the TSTMP option. To do this we
|
||||
* can use the rack_ts_to_msec() inline below which can take
|
||||
* the 64bit ts and make into the correct timestamp millisecond
|
||||
* wise. Thats not possible with the 32bit usecond timestamp since
|
||||
* the seconds wrap too quickly to cover all bases.
|
||||
*
|
||||
* There are quite a few places in rack where I simply cast
|
||||
* back to uint32_t and then end up using the TSTMP_XX()
|
||||
* macros. This is ok, but we could do simple compares if
|
||||
* we ever decided to move all of those variables to 64 bits
|
||||
* as well.
|
||||
*/
|
||||
|
||||
inline uint64_t
|
||||
rack_to_usec_ts(struct timeval *tv)
|
||||
{
|
||||
return ((tv->tv_sec * HPTS_USEC_IN_SEC) + tv->tv_usec);
|
||||
}
|
||||
|
||||
inline uint32_t
|
||||
rack_ts_to_msec(uint64_t ts)
|
||||
{
|
||||
return((uint32_t)(ts / HPTS_MSEC_IN_SEC));
|
||||
}
|
||||
|
||||
|
||||
RB_HEAD(rack_rb_tree_head, rack_sendmap);
|
||||
TAILQ_HEAD(rack_head, rack_sendmap);
|
||||
TAILQ_HEAD(def_opt_head, deferred_opt_list);
|
||||
|
||||
/* Map change logging */
|
||||
#define MAP_MERGE 0x01
|
||||
#define MAP_SPLIT 0x02
|
||||
#define MAP_NEW 0x03
|
||||
#define MAP_SACK_M1 0x04
|
||||
#define MAP_SACK_M2 0x05
|
||||
#define MAP_SACK_M3 0x06
|
||||
#define MAP_SACK_M4 0x07
|
||||
#define MAP_SACK_M5 0x08
|
||||
#define MAP_FREE 0x09
|
||||
#define MAP_TRIM_HEAD 0x0a
|
||||
|
||||
#define RACK_LIMIT_TYPE_SPLIT 1
|
||||
|
||||
@ -128,10 +185,7 @@ struct rack_log {
|
||||
#define RACK_TO_FRM_DELACK 6
|
||||
|
||||
struct rack_opts_stats {
|
||||
uint64_t tcp_rack_prop_rate;
|
||||
uint64_t tcp_rack_prop;
|
||||
uint64_t tcp_rack_tlp_reduce;
|
||||
uint64_t tcp_rack_early_recov;
|
||||
uint64_t tcp_rack_pace_always;
|
||||
uint64_t tcp_rack_pace_reduce;
|
||||
uint64_t tcp_rack_max_seg;
|
||||
@ -177,6 +231,20 @@ struct rack_opts_stats {
|
||||
uint64_t tcp_npush;
|
||||
uint64_t tcp_lscwnd;
|
||||
uint64_t tcp_profile;
|
||||
uint64_t tcp_hdwr_rate_cap;
|
||||
uint64_t tcp_pacing_rate_cap;
|
||||
uint64_t tcp_pacing_up_only;
|
||||
uint64_t tcp_use_cmp_acks;
|
||||
uint64_t tcp_rack_abc_val;
|
||||
uint64_t tcp_rec_abc_val;
|
||||
uint64_t tcp_rack_measure_cnt;
|
||||
uint64_t tcp_rack_delayed_ack;
|
||||
uint64_t tcp_rack_rtt_use;
|
||||
uint64_t tcp_data_after_close;
|
||||
uint64_t tcp_defer_opt;
|
||||
uint64_t tcp_rack_fastrsm_hack;
|
||||
uint64_t tcp_rack_beta;
|
||||
uint64_t tcp_rack_beta_ecn;
|
||||
};
|
||||
|
||||
/* RTT shrink reasons */
|
||||
@ -247,6 +315,23 @@ extern counter_u64_t rack_opts_arry[RACK_OPTS_SIZE];
|
||||
*/
|
||||
#define RACK_GP_HIST 4 /* How much goodput history do we maintain? */
|
||||
|
||||
#define RACK_NUM_FSB_DEBUG 16
|
||||
struct rack_fast_send_blk {
|
||||
uint32_t left_to_send;
|
||||
uint16_t tcp_ip_hdr_len;
|
||||
uint8_t tcp_flags;
|
||||
uint8_t hoplimit;
|
||||
uint8_t *tcp_ip_hdr;
|
||||
uint32_t recwin;
|
||||
uint32_t off;
|
||||
struct tcphdr *th;
|
||||
struct udphdr *udp;
|
||||
struct mbuf *m;
|
||||
uint32_t o_m_len;
|
||||
uint32_t rfo_apply_push : 1,
|
||||
unused : 31;
|
||||
};
|
||||
|
||||
struct rack_control {
|
||||
/* Second cache line 0x40 from tcp_rack */
|
||||
struct rack_rb_tree_head rc_mtree; /* Tree of all segments Lock(a) */
|
||||
@ -255,6 +340,7 @@ struct rack_control {
|
||||
* tlp_sending Lock(a) */
|
||||
struct rack_sendmap *rc_resend; /* something we have been asked to
|
||||
* resend */
|
||||
struct rack_fast_send_blk fsb; /* The fast-send block */
|
||||
uint32_t input_pkt;
|
||||
uint32_t saved_input_pkt;
|
||||
uint32_t rc_hpts_flags;
|
||||
@ -268,6 +354,9 @@ struct rack_control {
|
||||
|
||||
/* Third Cache line 0x80 */
|
||||
struct rack_head rc_free; /* Allocation array */
|
||||
uint64_t last_hw_bw_req;
|
||||
uint64_t crte_prev_rate;
|
||||
uint64_t bw_rate_cap;
|
||||
uint32_t rc_time_last_sent; /* Time we last sent some data and
|
||||
* logged it Lock(a). */
|
||||
uint32_t rc_reorder_ts; /* Last time we saw reordering Lock(a) */
|
||||
@ -342,8 +431,8 @@ struct rack_control {
|
||||
uint32_t rc_agg_delayed;
|
||||
uint32_t rc_tlp_rxt_last_time;
|
||||
uint32_t rc_saved_cwnd;
|
||||
uint32_t rc_gp_output_ts;
|
||||
uint32_t rc_gp_cumack_ts;
|
||||
uint64_t rc_gp_output_ts; /* chg*/
|
||||
uint64_t rc_gp_cumack_ts; /* chg*/
|
||||
struct timeval act_rcv_time;
|
||||
struct timeval rc_last_time_decay; /* SAD time decay happened here */
|
||||
uint64_t gp_bw;
|
||||
@ -354,6 +443,7 @@ struct rack_control {
|
||||
uint64_t last_gp_comp_bw;
|
||||
uint64_t last_max_bw; /* Our calculated max b/w last */
|
||||
struct time_filter_small rc_gp_min_rtt;
|
||||
struct def_opt_head opt_list;
|
||||
int32_t rc_rtt_diff; /* Timely style rtt diff of our gp_srtt */
|
||||
uint32_t rc_gp_srtt; /* Current GP srtt */
|
||||
uint32_t rc_prev_gp_srtt; /* Previous RTT */
|
||||
@ -370,21 +460,40 @@ struct rack_control {
|
||||
uint32_t rc_time_of_last_probertt;
|
||||
uint32_t rc_target_probertt_flight;
|
||||
uint32_t rc_probertt_sndmax_atexit; /* Highest sent to in probe-rtt */
|
||||
uint32_t rc_cwnd_at_erec;
|
||||
uint32_t rc_ssthresh_at_erec;
|
||||
uint32_t dsack_byte_cnt;
|
||||
uint32_t retran_during_recovery;
|
||||
uint32_t rc_gp_lowrtt; /* Lowest rtt seen during GPUT measurement */
|
||||
uint32_t rc_gp_high_rwnd; /* Highest rwnd seen during GPUT measurement */
|
||||
uint32_t rc_snd_max_at_rto; /* For non-sack when the RTO occured what was snd-max */
|
||||
uint32_t rc_out_at_rto;
|
||||
int32_t rc_scw_index;
|
||||
uint32_t rc_tlp_threshold; /* Socket option value Lock(a) */
|
||||
uint32_t rc_last_timeout_snduna;
|
||||
uint32_t challenge_ack_ts;
|
||||
uint32_t challenge_ack_cnt;
|
||||
uint32_t rc_min_to; /* Socket option value Lock(a) */
|
||||
uint32_t rc_pkt_delay; /* Socket option value Lock(a) */
|
||||
struct newreno rc_saved_beta; /*
|
||||
* For newreno cc:
|
||||
* rc_saved_cc are the values we have had
|
||||
* set by the user, if pacing is not happening
|
||||
* (i.e. its early and we have not turned on yet
|
||||
* or it was turned off). The minute pacing
|
||||
* is turned on we pull out the values currently
|
||||
* being used by newreno and replace them with
|
||||
* these values, then save off the old values here,
|
||||
* we also set the flag (if ecn_beta is set) to make
|
||||
* new_reno do less of a backoff for ecn (think abe).
|
||||
*/
|
||||
uint16_t rc_early_recovery_segs; /* Socket option value Lock(a) */
|
||||
uint16_t rc_reorder_shift; /* Socket option value Lock(a) */
|
||||
uint16_t rc_pkt_delay; /* Socket option value Lock(a) */
|
||||
uint8_t rc_no_push_at_mrtt; /* No push when we exceed max rtt */
|
||||
uint8_t num_avg; /* average count before we go to normal decay */
|
||||
uint8_t rc_prop_rate; /* Socket option value Lock(a) */
|
||||
uint8_t rc_prop_reduce; /* Socket option value Lock(a) */
|
||||
uint8_t num_measurements; /* Number of measurements (up to 0xff, we freeze at 0xff) */
|
||||
uint8_t req_measurements; /* How many measurements are required? */
|
||||
uint8_t rc_tlp_cwnd_reduce; /* Socket option value Lock(a) */
|
||||
uint8_t rc_early_recovery; /* Socket option value Lock(a) */
|
||||
uint8_t rc_prr_sendalot;/* Socket option value Lock(a) */
|
||||
uint8_t rc_min_to; /* Socket option value Lock(a) */
|
||||
uint8_t rc_rate_sample_method;
|
||||
uint8_t rc_gp_hist_idx;
|
||||
};
|
||||
@ -402,21 +511,57 @@ struct tcp_rack {
|
||||
int32_t, int32_t, uint32_t, int, int, uint8_t); /* Lock(a) */
|
||||
struct tcpcb *rc_tp; /* The tcpcb Lock(a) */
|
||||
struct inpcb *rc_inp; /* The inpcb Lock(a) */
|
||||
uint32_t rc_free_cnt; /* Number of free entries on the rc_free list
|
||||
uint8_t rc_free_cnt; /* Number of free entries on the rc_free list
|
||||
* Lock(a) */
|
||||
uint8_t client_bufferlvl; /* 0 - 5 normaly, less than or at 2 means its real low */
|
||||
uint8_t no_prr_addback : 1,
|
||||
gp_ready : 1,
|
||||
defer_options: 1,
|
||||
fast_rsm_hack: 1,
|
||||
rc_ack_can_sendout_data: 1, /*
|
||||
* If set it will override pacing restrictions on not sending
|
||||
* data when the pacing timer is running. I.e. you set this
|
||||
* and an ACK will send data. Default is off and its only used
|
||||
* without pacing when we are doing 5G speed up for there
|
||||
* ack filtering.
|
||||
*/
|
||||
rc_pacing_cc_set: 1, /*
|
||||
* If we are pacing (pace_always=1) and we have reached the
|
||||
* point where we start pacing (fixed or gp has reached its
|
||||
* magic gp_ready state) this flag indicates we have set in
|
||||
* values to effect CC's backoff's. If pacing is turned off
|
||||
* then we must restore the values saved in rc_saved_beta,
|
||||
* if its going to gp_ready we need to copy the values into
|
||||
* the CC module and set our flags.
|
||||
*
|
||||
* Note this only happens if the cc name is newreno (CCALGONAME_NEWRENO).
|
||||
*/
|
||||
|
||||
avail :2;
|
||||
uint8_t avail_bytes;
|
||||
uint32_t rc_rack_rtt; /* RACK-RTT Lock(a) */
|
||||
uint16_t r_mbuf_queue : 1, /* Do we do mbuf queue for non-paced */
|
||||
rtt_limit_mul : 4, /* muliply this by low rtt */
|
||||
r_limit_scw : 1,
|
||||
r_avail_bits : 10; /* Available */
|
||||
r_must_retran : 1, /* For non-sack customers we hit an RTO and new data should be resends */
|
||||
r_use_cmp_ack: 1, /* Do we use compressed acks */
|
||||
r_ent_rec_ns: 1, /* We entered recovery and have not sent */
|
||||
r_might_revert: 1, /* Flag to find out if we might need to revert */
|
||||
r_fast_output: 1, /* Fast output is in progress we can skip the bulk of rack_output */
|
||||
r_fsb_inited: 1,
|
||||
r_rack_hw_rate_caps: 1,
|
||||
r_up_only: 1,
|
||||
r_via_fill_cw : 1,
|
||||
r_fill_less_agg : 1;
|
||||
|
||||
uint16_t rc_user_set_max_segs; /* Socket option value Lock(a) */
|
||||
uint8_t rc_user_set_max_segs; /* Socket option value Lock(a) */
|
||||
uint8_t rc_labc; /* Appropriate Byte Counting Value */
|
||||
uint16_t forced_ack : 1,
|
||||
rc_gp_incr : 1,
|
||||
rc_gp_bwred : 1,
|
||||
rc_gp_timely_inc_cnt : 3,
|
||||
rc_gp_timely_dec_cnt : 3,
|
||||
rc_not_backing_off: 1,
|
||||
r_use_labc_for_rec: 1,
|
||||
rc_highly_buffered: 1, /* The path is highly buffered */
|
||||
rc_dragged_bottom: 1,
|
||||
rc_dack_mode : 1, /* Mac O/S emulation of d-ack */
|
||||
@ -435,7 +580,7 @@ struct tcp_rack {
|
||||
rc_always_pace : 1, /* Socket option value Lock(a) */
|
||||
rc_pace_to_cwnd : 1,
|
||||
rc_pace_fill_if_rttin_range : 1,
|
||||
xxx_avail_bits : 1;
|
||||
rc_srtt_measure_made : 1;
|
||||
uint8_t app_limited_needs_set : 1,
|
||||
use_fixed_rate : 1,
|
||||
rc_has_collapsed : 1,
|
||||
|
@ -193,6 +193,16 @@ SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, sad_low_pps,
|
||||
&tcp_sad_low_pps, 100,
|
||||
"What is the input pps that below which we do not decay?");
|
||||
#endif
|
||||
uint32_t tcp_ack_war_time_window = 1000;
|
||||
SYSCTL_UINT(_net_inet_tcp, OID_AUTO, ack_war_timewindow,
|
||||
CTLFLAG_RW,
|
||||
&tcp_ack_war_time_window, 1000,
|
||||
"If the tcp_stack does ack-war prevention how many milliseconds are in its time window?");
|
||||
uint32_t tcp_ack_war_cnt = 5;
|
||||
SYSCTL_UINT(_net_inet_tcp, OID_AUTO, ack_war_cnt,
|
||||
CTLFLAG_RW,
|
||||
&tcp_ack_war_cnt, 5,
|
||||
"If the tcp_stack does ack-war prevention how many acks can be sent in its time window?");
|
||||
|
||||
struct rwlock tcp_function_lock;
|
||||
|
||||
@ -268,6 +278,18 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, ts_offset_per_conn, CTLFLAG_VNET | CTLFLAG_R
|
||||
&VNET_NAME(tcp_ts_offset_per_conn), 0,
|
||||
"Initialize TCP timestamps per connection instead of per host pair");
|
||||
|
||||
/* How many connections are pacing */
|
||||
static volatile uint32_t number_of_tcp_connections_pacing = 0;
|
||||
static uint32_t shadow_num_connections = 0;
|
||||
|
||||
static int tcp_pacing_limit = 10000;
|
||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, pacing_limit, CTLFLAG_RW,
|
||||
&tcp_pacing_limit, 1000,
|
||||
"If the TCP stack does pacing, is there a limit (-1 = no, 0 = no pacing N = number of connections)");
|
||||
|
||||
SYSCTL_UINT(_net_inet_tcp, OID_AUTO, pacing_count, CTLFLAG_RD,
|
||||
&shadow_num_connections, 0, "Number of TCP connections being paced");
|
||||
|
||||
static int tcp_log_debug = 0;
|
||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_debug, CTLFLAG_RW,
|
||||
&tcp_log_debug, 0, "Log errors caused by incoming TCP segments");
|
||||
@ -3511,6 +3533,54 @@ tcp_maxseg(const struct tcpcb *tp)
|
||||
return (tp->t_maxseg - optlen);
|
||||
}
|
||||
|
||||
|
||||
u_int
|
||||
tcp_fixed_maxseg(const struct tcpcb *tp)
|
||||
{
|
||||
int optlen;
|
||||
|
||||
if (tp->t_flags & TF_NOOPT)
|
||||
return (tp->t_maxseg);
|
||||
|
||||
/*
|
||||
* Here we have a simplified code from tcp_addoptions(),
|
||||
* without a proper loop, and having most of paddings hardcoded.
|
||||
* We only consider fixed options that we would send every
|
||||
* time I.e. SACK is not considered. This is important
|
||||
* for cc modules to figure out what the modulo of the
|
||||
* cwnd should be.
|
||||
*/
|
||||
#define PAD(len) ((((len) / 4) + !!((len) % 4)) * 4)
|
||||
if (TCPS_HAVEESTABLISHED(tp->t_state)) {
|
||||
if (tp->t_flags & TF_RCVD_TSTMP)
|
||||
optlen = TCPOLEN_TSTAMP_APPA;
|
||||
else
|
||||
optlen = 0;
|
||||
#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
|
||||
if (tp->t_flags & TF_SIGNATURE)
|
||||
optlen += PAD(TCPOLEN_SIGNATURE);
|
||||
#endif
|
||||
} else {
|
||||
if (tp->t_flags & TF_REQ_TSTMP)
|
||||
optlen = TCPOLEN_TSTAMP_APPA;
|
||||
else
|
||||
optlen = PAD(TCPOLEN_MAXSEG);
|
||||
if (tp->t_flags & TF_REQ_SCALE)
|
||||
optlen += PAD(TCPOLEN_WINDOW);
|
||||
#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
|
||||
if (tp->t_flags & TF_SIGNATURE)
|
||||
optlen += PAD(TCPOLEN_SIGNATURE);
|
||||
#endif
|
||||
if (tp->t_flags & TF_SACK_PERMIT)
|
||||
optlen += PAD(TCPOLEN_SACK_PERMITTED);
|
||||
}
|
||||
#undef PAD
|
||||
optlen = min(optlen, TCP_MAXOLEN);
|
||||
return (tp->t_maxseg - optlen);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int
|
||||
sysctl_drop(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
@ -3972,3 +4042,38 @@ tcp_log_end_status(struct tcpcb *tp, uint8_t status)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
tcp_can_enable_pacing(void)
|
||||
{
|
||||
|
||||
if ((tcp_pacing_limit == -1) ||
|
||||
(tcp_pacing_limit > number_of_tcp_connections_pacing)) {
|
||||
atomic_fetchadd_int(&number_of_tcp_connections_pacing, 1);
|
||||
shadow_num_connections = number_of_tcp_connections_pacing;
|
||||
return (1);
|
||||
} else {
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
|
||||
static uint8_t tcp_pacing_warning = 0;
|
||||
|
||||
void
|
||||
tcp_decrement_paced_conn(void)
|
||||
{
|
||||
uint32_t ret;
|
||||
|
||||
ret = atomic_fetchadd_int(&number_of_tcp_connections_pacing, -1);
|
||||
shadow_num_connections = number_of_tcp_connections_pacing;
|
||||
KASSERT(ret != 0, ("tcp_paced_connection_exits -1 would cause wrap?"));
|
||||
if (ret == 0) {
|
||||
if (tcp_pacing_limit != -1) {
|
||||
printf("Warning all pacing is now disabled, count decrements invalidly!\n");
|
||||
tcp_pacing_limit = 0;
|
||||
} else if (tcp_pacing_warning == 0) {
|
||||
printf("Warning pacing count is invalid, invalid decrement\n");
|
||||
tcp_pacing_warning = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -258,6 +258,10 @@ struct tcpcb {
|
||||
tcp_seq gput_seq; /* Outbound measurement seq */
|
||||
tcp_seq gput_ack; /* Inbound measurement ack */
|
||||
int32_t t_stats_gput_prev; /* XXXLAS: Prev gput measurement */
|
||||
uint32_t t_maxpeakrate; /* max peak rate set by user, in bytes/s */
|
||||
uint32_t t_sndtlppack; /* tail loss probe packets sent */
|
||||
uint64_t t_sndtlpbyte; /* total tail loss probe bytes sent */
|
||||
|
||||
uint8_t t_tfo_client_cookie_len; /* TCP Fast Open client cookie length */
|
||||
uint32_t t_end_info_status; /* Status flag of end info */
|
||||
unsigned int *t_tfo_pending; /* TCP Fast Open server pending counter */
|
||||
@ -974,6 +978,7 @@ void cc_ack_received(struct tcpcb *tp, struct tcphdr *th,
|
||||
void cc_conn_init(struct tcpcb *tp);
|
||||
void cc_post_recovery(struct tcpcb *tp, struct tcphdr *th);
|
||||
void cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos);
|
||||
void cc_ecnpkt_handler_flags(struct tcpcb *tp, uint16_t flags, uint8_t iptos);
|
||||
void cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type);
|
||||
#ifdef TCP_HHOOK
|
||||
void hhook_run_tcp_est_in(struct tcpcb *tp,
|
||||
@ -1022,10 +1027,13 @@ extern int32_t tcp_sad_low_pps;
|
||||
extern int32_t tcp_map_minimum;
|
||||
extern int32_t tcp_attack_on_turns_on_logging;
|
||||
#endif
|
||||
extern uint32_t tcp_ack_war_time_window;
|
||||
extern uint32_t tcp_ack_war_cnt;
|
||||
|
||||
uint32_t tcp_maxmtu(struct in_conninfo *, struct tcp_ifcap *);
|
||||
uint32_t tcp_maxmtu6(struct in_conninfo *, struct tcp_ifcap *);
|
||||
u_int tcp_maxseg(const struct tcpcb *);
|
||||
u_int tcp_fixed_maxseg(const struct tcpcb *);
|
||||
void tcp_mss_update(struct tcpcb *, int, int, struct hc_metrics_lite *,
|
||||
struct tcp_ifcap *);
|
||||
void tcp_mss(struct tcpcb *, int);
|
||||
@ -1075,6 +1083,7 @@ uint32_t tcp_new_ts_offset(struct in_conninfo *);
|
||||
tcp_seq tcp_new_isn(struct in_conninfo *);
|
||||
|
||||
int tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
|
||||
int tcp_dsack_block_exists(struct tcpcb *);
|
||||
void tcp_update_dsack_list(struct tcpcb *, tcp_seq, tcp_seq);
|
||||
void tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend);
|
||||
void tcp_clean_dsack_blocks(struct tcpcb *tp);
|
||||
@ -1090,6 +1099,9 @@ uint32_t tcp_compute_initwnd(uint32_t);
|
||||
void tcp_sndbuf_autoscale(struct tcpcb *, struct socket *, uint32_t);
|
||||
int tcp_stats_sample_rollthedice(struct tcpcb *tp, void *seed_bytes,
|
||||
size_t seed_len);
|
||||
int tcp_can_enable_pacing(void);
|
||||
void tcp_decrement_paced_conn(void);
|
||||
|
||||
struct mbuf *
|
||||
tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen,
|
||||
int32_t seglimit, int32_t segsize, struct sockbuf *sb, bool hw_tls);
|
||||
|
Loading…
Reference in New Issue
Block a user