This commit is a bit of a re-arrange of deck chairs. It
gets both rack and bbr ready for the completion of the STATs framework in FreeBSD. For now if you don't have both NF_stats and stats on it disables them. As soon as the rest of the stats framework lands we can remove that restriction and then just uses stats when defined. Sponsored by: Netflix Inc. Differential Revision: https://reviews.freebsd.org/D22479
This commit is contained in:
parent
c7b0edf219
commit
1cf55767b8
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=355859
@ -1,5 +1,5 @@
|
||||
/*-
|
||||
* Copyright (c) 2016-2019
|
||||
* Copyright (c) 2016-9
|
||||
* Netflix Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include "opt_ratelimit.h"
|
||||
#include "opt_kern_tls.h"
|
||||
#include <sys/param.h>
|
||||
#include <sys/arb.h>
|
||||
#include <sys/module.h>
|
||||
#include <sys/kernel.h>
|
||||
#ifdef TCP_HHOOK
|
||||
@ -57,9 +58,9 @@ __FBSDID("$FreeBSD$");
|
||||
#endif
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/systm.h>
|
||||
#ifdef STATS
|
||||
#include <sys/qmath.h>
|
||||
#include <sys/tree.h>
|
||||
#ifdef NETFLIX_STATS
|
||||
#include <sys/stats.h> /* Must come after qmath.h and tree.h */
|
||||
#endif
|
||||
#include <sys/refcount.h>
|
||||
@ -161,8 +162,7 @@ static int32_t bbr_num_pktepo_for_del_limit = BBR_NUM_RTTS_FOR_DEL_LIMIT;
|
||||
static int32_t bbr_hardware_pacing_limit = 8000;
|
||||
static int32_t bbr_quanta = 3; /* How much extra quanta do we get? */
|
||||
static int32_t bbr_no_retran = 0;
|
||||
static int32_t bbr_tcp_map_entries_limit = 1500;
|
||||
static int32_t bbr_tcp_map_split_limit = 256;
|
||||
|
||||
|
||||
static int32_t bbr_error_base_paceout = 10000; /* usec to pace */
|
||||
static int32_t bbr_max_net_error_cnt = 10;
|
||||
@ -3381,8 +3381,8 @@ bbr_alloc(struct tcp_bbr *bbr)
|
||||
static struct bbr_sendmap *
|
||||
bbr_alloc_full_limit(struct tcp_bbr *bbr)
|
||||
{
|
||||
if ((bbr_tcp_map_entries_limit > 0) &&
|
||||
(bbr->r_ctl.rc_num_maps_alloced >= bbr_tcp_map_entries_limit)) {
|
||||
if ((V_tcp_map_entries_limit > 0) &&
|
||||
(bbr->r_ctl.rc_num_maps_alloced >= V_tcp_map_entries_limit)) {
|
||||
BBR_STAT_INC(bbr_alloc_limited);
|
||||
if (!bbr->alloc_limit_reported) {
|
||||
bbr->alloc_limit_reported = 1;
|
||||
@ -3402,8 +3402,8 @@ bbr_alloc_limit(struct tcp_bbr *bbr, uint8_t limit_type)
|
||||
|
||||
if (limit_type) {
|
||||
/* currently there is only one limit type */
|
||||
if (bbr_tcp_map_split_limit > 0 &&
|
||||
bbr->r_ctl.rc_num_split_allocs >= bbr_tcp_map_split_limit) {
|
||||
if (V_tcp_map_split_limit > 0 &&
|
||||
bbr->r_ctl.rc_num_split_allocs >= V_tcp_map_split_limit) {
|
||||
BBR_STAT_INC(bbr_split_limited);
|
||||
if (!bbr->alloc_limit_reported) {
|
||||
bbr->alloc_limit_reported = 1;
|
||||
@ -3685,7 +3685,7 @@ bbr_ack_received(struct tcpcb *tp, struct tcp_bbr *bbr, struct tcphdr *th, uint3
|
||||
uint32_t cwnd, target_cwnd, saved_bytes, maxseg;
|
||||
int32_t meth;
|
||||
|
||||
#ifdef NETFLIX_STATS
|
||||
#ifdef STATS
|
||||
if ((tp->t_flags & TF_GPUTINPROG) &&
|
||||
SEQ_GEQ(th->th_ack, tp->gput_ack)) {
|
||||
/*
|
||||
@ -6510,7 +6510,7 @@ tcp_bbr_xmit_timer_commit(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts)
|
||||
}
|
||||
TCPSTAT_INC(tcps_rttupdated);
|
||||
tp->t_rttupdated++;
|
||||
#ifdef NETFLIX_STATS
|
||||
#ifdef STATS
|
||||
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RTT, imax(0, rtt_ticks));
|
||||
#endif
|
||||
/*
|
||||
@ -8490,6 +8490,7 @@ bbr_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
if (DELAY_ACK(tp, bbr, nsegs) || tfo_syn) {
|
||||
bbr->bbr_segs_rcvd += max(1, nsegs);
|
||||
@ -8698,6 +8699,7 @@ bbr_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
* reassembly queue and we have enough buffer space to take it.
|
||||
*/
|
||||
nsegs = max(1, m->m_pkthdr.lro_nsegs);
|
||||
|
||||
#ifdef NETFLIX_SB_LIMITS
|
||||
if (so->so_rcv.sb_shlim) {
|
||||
mcnt = m_memcnt(m);
|
||||
@ -8746,6 +8748,7 @@ bbr_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
newsize, so, NULL))
|
||||
so->so_rcv.sb_flags &= ~SB_AUTOSIZE;
|
||||
m_adj(m, drop_hdrlen); /* delayed header drop */
|
||||
|
||||
#ifdef NETFLIX_SB_LIMITS
|
||||
appended =
|
||||
#endif
|
||||
@ -11561,7 +11564,7 @@ bbr_do_segment_nounlock(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
* the scale is zero.
|
||||
*/
|
||||
tiwin = th->th_win << tp->snd_scale;
|
||||
#ifdef NETFLIX_STATS
|
||||
#ifdef STATS
|
||||
stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_FRWIN, tiwin);
|
||||
#endif
|
||||
/*
|
||||
@ -11960,7 +11963,7 @@ bbr_do_send_accounting(struct tcpcb *tp, struct tcp_bbr *bbr, struct bbr_sendmap
|
||||
if ((tp->t_flags & TF_FORCEDATA) && len == 1) {
|
||||
/* Window probe */
|
||||
TCPSTAT_INC(tcps_sndprobe);
|
||||
#ifdef NETFLIX_STATS
|
||||
#ifdef STATS
|
||||
stats_voi_update_abs_u32(tp->t_stats,
|
||||
VOI_TCP_RETXPB, len);
|
||||
#endif
|
||||
@ -11981,7 +11984,7 @@ bbr_do_send_accounting(struct tcpcb *tp, struct tcp_bbr *bbr, struct bbr_sendmap
|
||||
tp->t_sndrexmitpack++;
|
||||
TCPSTAT_INC(tcps_sndrexmitpack);
|
||||
TCPSTAT_ADD(tcps_sndrexmitbyte, len);
|
||||
#ifdef NETFLIX_STATS
|
||||
#ifdef STATS
|
||||
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RETXPB,
|
||||
len);
|
||||
#endif
|
||||
@ -12017,7 +12020,7 @@ bbr_do_send_accounting(struct tcpcb *tp, struct tcp_bbr *bbr, struct bbr_sendmap
|
||||
/* Place in 17's the total sent */
|
||||
counter_u64_add(bbr_state_resend[17], len);
|
||||
counter_u64_add(bbr_state_lost[17], len);
|
||||
#ifdef NETFLIX_STATS
|
||||
#ifdef STATS
|
||||
stats_voi_update_abs_u64(tp->t_stats, VOI_TCP_TXPB,
|
||||
len);
|
||||
#endif
|
||||
@ -12517,8 +12520,8 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
|
||||
* as long as we are not retransmiting.
|
||||
*/
|
||||
if ((rsm == NULL) &&
|
||||
(bbr_tcp_map_entries_limit > 0) &&
|
||||
(bbr->r_ctl.rc_num_maps_alloced >= bbr_tcp_map_entries_limit)) {
|
||||
(V_tcp_map_entries_limit > 0) &&
|
||||
(bbr->r_ctl.rc_num_maps_alloced >= V_tcp_map_entries_limit)) {
|
||||
BBR_STAT_INC(bbr_alloc_limited);
|
||||
if (!bbr->alloc_limit_reported) {
|
||||
bbr->alloc_limit_reported = 1;
|
||||
@ -13256,7 +13259,6 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
|
||||
SOCKBUF_UNLOCK(&so->so_snd);
|
||||
return (EHOSTUNREACH);
|
||||
}
|
||||
|
||||
hdrlen += sizeof(struct udphdr);
|
||||
}
|
||||
#endif
|
||||
@ -14276,7 +14278,7 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
|
||||
bbr_start_hpts_timer(bbr, tp, cts, 11, slot, 0);
|
||||
return (error);
|
||||
}
|
||||
#ifdef NETFLIX_STATS
|
||||
#ifdef STATS
|
||||
} else if (((tp->t_flags & TF_GPUTINPROG) == 0) &&
|
||||
len &&
|
||||
(rsm == NULL) &&
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*-
|
||||
* Copyright (c) 2016-2019 Netflix, Inc.
|
||||
* Copyright (c) 2016-9 Netflix, Inc.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include "opt_ratelimit.h"
|
||||
#include "opt_kern_tls.h"
|
||||
#include <sys/param.h>
|
||||
#include <sys/arb.h>
|
||||
#include <sys/module.h>
|
||||
#include <sys/kernel.h>
|
||||
#ifdef TCP_HHOOK
|
||||
@ -52,7 +53,7 @@ __FBSDID("$FreeBSD$");
|
||||
#endif
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/systm.h>
|
||||
#ifdef NETFLIX_STATS
|
||||
#ifdef STATS
|
||||
#include <sys/qmath.h>
|
||||
#include <sys/tree.h>
|
||||
#include <sys/stats.h> /* Must come after qmath.h and tree.h */
|
||||
@ -187,21 +188,6 @@ static int32_t rack_persist_max = 1000; /* 1 Second */
|
||||
static int32_t rack_sack_not_required = 0; /* set to one to allow non-sack to use rack */
|
||||
static int32_t rack_hw_tls_max_seg = 0; /* 0 means use hw-tls single segment */
|
||||
|
||||
/* Sack attack detection thresholds and such */
|
||||
static int32_t tcp_force_detection = 0;
|
||||
|
||||
#ifdef NETFLIX_EXP_DETECTION
|
||||
static int32_t tcp_sack_to_ack_thresh = 700; /* 70 % */
|
||||
static int32_t tcp_sack_to_move_thresh = 600; /* 60 % */
|
||||
static int32_t tcp_restoral_thresh = 650; /* 65 % (sack:2:ack -5%) */
|
||||
static int32_t tcp_attack_on_turns_on_logging = 0;
|
||||
static int32_t tcp_map_minimum = 500;
|
||||
#endif
|
||||
static int32_t tcp_sad_decay_val = 800;
|
||||
static int32_t tcp_sad_pacing_interval = 2000;
|
||||
static int32_t tcp_sad_low_pps = 100;
|
||||
|
||||
|
||||
/*
|
||||
* Currently regular tcp has a rto_min of 30ms
|
||||
* the backoff goes 12 times so that ends up
|
||||
@ -226,9 +212,6 @@ static int32_t rack_always_send_oldest = 0;
|
||||
static int32_t rack_use_sack_filter = 1;
|
||||
static int32_t rack_tlp_threshold_use = TLP_USE_TWO_ONE;
|
||||
static int32_t rack_per_of_gp = 50;
|
||||
static int32_t rack_tcp_map_entries_limit = 1500;
|
||||
static int32_t rack_tcp_map_split_limit = 256;
|
||||
|
||||
|
||||
/* Rack specific counters */
|
||||
counter_u64_t rack_badfr;
|
||||
@ -1577,9 +1560,9 @@ rack_alloc(struct tcp_rack *rack)
|
||||
static struct rack_sendmap *
|
||||
rack_alloc_full_limit(struct tcp_rack *rack)
|
||||
{
|
||||
if ((rack_tcp_map_entries_limit > 0) &&
|
||||
if ((V_tcp_map_entries_limit > 0) &&
|
||||
(rack->do_detection == 0) &&
|
||||
(rack->r_ctl.rc_num_maps_alloced >= rack_tcp_map_entries_limit)) {
|
||||
(rack->r_ctl.rc_num_maps_alloced >= V_tcp_map_entries_limit)) {
|
||||
counter_u64_add(rack_to_alloc_limited, 1);
|
||||
if (!rack->alloc_limit_reported) {
|
||||
rack->alloc_limit_reported = 1;
|
||||
@ -1598,9 +1581,9 @@ rack_alloc_limit(struct tcp_rack *rack, uint8_t limit_type)
|
||||
|
||||
if (limit_type) {
|
||||
/* currently there is only one limit type */
|
||||
if (rack_tcp_map_split_limit > 0 &&
|
||||
if (V_tcp_map_split_limit > 0 &&
|
||||
(rack->do_detection == 0) &&
|
||||
rack->r_ctl.rc_num_split_allocs >= rack_tcp_map_split_limit) {
|
||||
rack->r_ctl.rc_num_split_allocs >= V_tcp_map_split_limit) {
|
||||
counter_u64_add(rack_split_limited, 1);
|
||||
if (!rack->alloc_limit_reported) {
|
||||
rack->alloc_limit_reported = 1;
|
||||
@ -1648,7 +1631,7 @@ static void
|
||||
rack_ack_received(struct tcpcb *tp, struct tcp_rack *rack, struct tcphdr *th, uint16_t nsegs,
|
||||
uint16_t type, int32_t recovery)
|
||||
{
|
||||
#ifdef NETFLIX_STATS
|
||||
#ifdef STATS
|
||||
int32_t gput;
|
||||
#endif
|
||||
|
||||
@ -1671,7 +1654,7 @@ rack_ack_received(struct tcpcb *tp, struct tcp_rack *rack, struct tcphdr *th, ui
|
||||
tp->ccv->flags &= ~CCF_CWND_LIMITED;
|
||||
|
||||
if (type == CC_ACK) {
|
||||
#ifdef NETFLIX_STATS
|
||||
#ifdef STATS
|
||||
stats_voi_update_abs_s32(tp->t_stats, VOI_TCP_CALCFRWINDIFF,
|
||||
((int32_t) tp->snd_cwnd) - tp->snd_wnd);
|
||||
if ((tp->t_flags & TF_GPUTINPROG) &&
|
||||
@ -1725,7 +1708,7 @@ rack_ack_received(struct tcpcb *tp, struct tcp_rack *rack, struct tcphdr *th, ui
|
||||
tp->ccv->curack = th->th_ack;
|
||||
CC_ALGO(tp)->ack_received(tp->ccv, type);
|
||||
}
|
||||
#ifdef NETFLIX_STATS
|
||||
#ifdef STATS
|
||||
stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_LCWIN, tp->snd_cwnd);
|
||||
#endif
|
||||
if (rack->r_ctl.rc_rack_largest_cwnd < tp->snd_cwnd) {
|
||||
@ -2436,6 +2419,7 @@ rack_start_hpts_timer(struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
|
||||
slot = 1;
|
||||
}
|
||||
hpts_timeout = rack_timer_start(tp, rack, cts, sup_rack);
|
||||
#ifdef NETFLIX_EXP_DETECTION
|
||||
if (rack->sack_attack_disable &&
|
||||
(slot < USEC_TO_MSEC(tcp_sad_pacing_interval))) {
|
||||
/*
|
||||
@ -2450,6 +2434,7 @@ rack_start_hpts_timer(struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
|
||||
*/
|
||||
slot = USEC_TO_MSEC(tcp_sad_pacing_interval);
|
||||
}
|
||||
#endif
|
||||
if (tp->t_flags & TF_DELACK) {
|
||||
delayed_ack = TICKS_2_MSEC(tcp_delacktime);
|
||||
rack->r_ctl.rc_hpts_flags |= PACE_TMR_DELACK;
|
||||
@ -3776,7 +3761,8 @@ rack_log_output(struct tcpcb *tp, struct tcpopt *to, int32_t len,
|
||||
seq_out = rack_update_entry(tp, rack, nrsm, ts, &len);
|
||||
if (len == 0) {
|
||||
return;
|
||||
}
|
||||
} else if (len > 0)
|
||||
goto refind;
|
||||
}
|
||||
}
|
||||
/*
|
||||
@ -3912,7 +3898,7 @@ tcp_rack_xmit_timer_commit(struct tcp_rack *rack, struct tcpcb *tp)
|
||||
TCPSTAT_INC(tcps_rttupdated);
|
||||
rack_log_rtt_upd(tp, rack, rtt, o_srtt, o_var);
|
||||
tp->t_rttupdated++;
|
||||
#ifdef NETFLIX_STATS
|
||||
#ifdef STATS
|
||||
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RTT, imax(0, rtt));
|
||||
#endif
|
||||
tp->t_rxtshift = 0;
|
||||
@ -4620,6 +4606,7 @@ rack_peer_reneges(struct tcp_rack *rack, struct rack_sendmap *rsm, tcp_seq th_ac
|
||||
static void
|
||||
rack_do_decay(struct tcp_rack *rack)
|
||||
{
|
||||
#ifdef NETFLIX_EXP_DETECTION
|
||||
struct timeval res;
|
||||
|
||||
#define timersub(tvp, uvp, vvp) \
|
||||
@ -4670,6 +4657,7 @@ rack_do_decay(struct tcp_rack *rack)
|
||||
rack->r_ctl.sack_noextra_move = ctf_decay_count(rack->r_ctl.sack_noextra_move,
|
||||
tcp_sad_decay_val);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
@ -7406,9 +7394,11 @@ rack_init(struct tcpcb *tp)
|
||||
rack->r_ctl.rc_last_time_decay = rack->r_ctl.rc_last_ack;
|
||||
rack->r_ctl.rc_tlp_rxt_last_time = tcp_ts_getticks();
|
||||
/* Do we force on detection? */
|
||||
#ifdef NETFLIX_EXP_DETECTION
|
||||
if (tcp_force_detection)
|
||||
rack->do_detection = 1;
|
||||
else
|
||||
#endif
|
||||
rack->do_detection = 0;
|
||||
if (tp->snd_una != tp->snd_max) {
|
||||
/* Create a send map for the current outstanding data */
|
||||
@ -7701,7 +7691,7 @@ rack_do_segment_nounlock(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
* the scale is zero.
|
||||
*/
|
||||
tiwin = th->th_win << tp->snd_scale;
|
||||
#ifdef NETFLIX_STATS
|
||||
#ifdef STATS
|
||||
stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_FRWIN, tiwin);
|
||||
#endif
|
||||
if (tiwin > rack->r_ctl.rc_high_rwnd)
|
||||
@ -8390,8 +8380,8 @@ rack_output(struct tcpcb *tp)
|
||||
*/
|
||||
if ((rsm == NULL) &&
|
||||
(rack->do_detection == 0) &&
|
||||
(rack_tcp_map_entries_limit > 0) &&
|
||||
(rack->r_ctl.rc_num_maps_alloced >= rack_tcp_map_entries_limit)) {
|
||||
(V_tcp_map_entries_limit > 0) &&
|
||||
(rack->r_ctl.rc_num_maps_alloced >= V_tcp_map_entries_limit)) {
|
||||
counter_u64_add(rack_to_alloc_limited, 1);
|
||||
if (!rack->alloc_limit_reported) {
|
||||
rack->alloc_limit_reported = 1;
|
||||
@ -9318,7 +9308,7 @@ rack_output(struct tcpcb *tp)
|
||||
}
|
||||
if ((tp->t_flags & TF_FORCEDATA) && len == 1) {
|
||||
TCPSTAT_INC(tcps_sndprobe);
|
||||
#ifdef NETFLIX_STATS
|
||||
#ifdef STATS
|
||||
if (SEQ_LT(tp->snd_nxt, tp->snd_max))
|
||||
stats_voi_update_abs_u32(tp->t_stats,
|
||||
VOI_TCP_RETXPB, len);
|
||||
@ -9339,14 +9329,14 @@ rack_output(struct tcpcb *tp)
|
||||
TCPSTAT_INC(tcps_sndrexmitpack);
|
||||
TCPSTAT_ADD(tcps_sndrexmitbyte, len);
|
||||
}
|
||||
#ifdef NETFLIX_STATS
|
||||
#ifdef STATS
|
||||
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RETXPB,
|
||||
len);
|
||||
#endif
|
||||
} else {
|
||||
TCPSTAT_INC(tcps_sndpack);
|
||||
TCPSTAT_ADD(tcps_sndbyte, len);
|
||||
#ifdef NETFLIX_STATS
|
||||
#ifdef STATS
|
||||
stats_voi_update_abs_u64(tp->t_stats, VOI_TCP_TXPB,
|
||||
len);
|
||||
#endif
|
||||
@ -9929,7 +9919,7 @@ rack_output(struct tcpcb *tp)
|
||||
tp->t_rtseq = startseq;
|
||||
TCPSTAT_INC(tcps_segstimed);
|
||||
}
|
||||
#ifdef NETFLIX_STATS
|
||||
#ifdef STATS
|
||||
if (!(tp->t_flags & TF_GPUTINPROG) && len) {
|
||||
tp->t_flags |= TF_GPUTINPROG;
|
||||
tp->gput_seq = startseq;
|
||||
@ -10142,7 +10132,7 @@ rack_set_sockopt(struct socket *so, struct sockopt *sopt,
|
||||
rack = (struct tcp_rack *)tp->t_fb_ptr;
|
||||
switch (sopt->sopt_name) {
|
||||
case TCP_RACK_DO_DETECTION:
|
||||
RACK_OPTS_INC(tcp_rack_no_sack);
|
||||
RACK_OPTS_INC(tcp_rack_do_detection);
|
||||
if (optval == 0)
|
||||
rack->do_detection = 0;
|
||||
else
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*-
|
||||
* Copyright (c) 2016-2018
|
||||
* Copyright (c) 2016-9
|
||||
* Netflix Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include "opt_ratelimit.h"
|
||||
#include "opt_kern_tls.h"
|
||||
#include <sys/param.h>
|
||||
#include <sys/arb.h>
|
||||
#include <sys/module.h>
|
||||
#include <sys/kernel.h>
|
||||
#ifdef TCP_HHOOK
|
||||
@ -133,8 +134,6 @@ __FBSDID("$FreeBSD$");
|
||||
* Common TCP Functions - These are shared by borth
|
||||
* rack and BBR.
|
||||
*/
|
||||
|
||||
|
||||
#ifdef KERN_TLS
|
||||
uint32_t
|
||||
ctf_get_opt_tls_size(struct socket *so, uint32_t rwnd)
|
||||
|
@ -1,5 +1,5 @@
|
||||
#ifndef __pacer_timer_h__
|
||||
#define __pacer_timer_h__
|
||||
#ifndef __rack_bbr_common_h__
|
||||
#define __rack_bbr_common_h__
|
||||
/*-
|
||||
* Copyright (c) 2017-9 Netflix, Inc.
|
||||
*
|
||||
@ -26,6 +26,12 @@
|
||||
*
|
||||
* __FBSDID("$FreeBSD$");
|
||||
*/
|
||||
|
||||
/* XXXLAS: Couple STATS to NETFLIX_STATS until stats(3) is fully upstreamed. */
|
||||
#ifndef NETFLIX_STATS
|
||||
#undef STATS
|
||||
#endif
|
||||
|
||||
/* Common defines and such used by both RACK and BBR */
|
||||
/* Special values for mss accounting array */
|
||||
#define TCP_MSS_ACCT_JUSTRET 0
|
||||
@ -46,6 +52,23 @@
|
||||
#define PROGRESS_CLEAR 3
|
||||
#define PROGRESS_START 4
|
||||
|
||||
/* codes for just-return */
|
||||
#define CTF_JR_SENT_DATA 0
|
||||
#define CTF_JR_CWND_LIMITED 1
|
||||
#define CTF_JR_RWND_LIMITED 2
|
||||
#define CTF_JR_APP_LIMITED 3
|
||||
#define CTF_JR_ASSESSING 4
|
||||
#define CTF_JR_PERSISTS 5
|
||||
#define CTF_JR_PRR 6
|
||||
|
||||
/* Compat. */
|
||||
#define BBR_JR_SENT_DATA CTF_JR_SENT_DATA
|
||||
#define BBR_JR_CWND_LIMITED CTF_JR_CWND_LIMITED
|
||||
#define BBR_JR_RWND_LIMITED CTF_JR_RWND_LIMITED
|
||||
#define BBR_JR_APP_LIMITED CTF_JR_APP_LIMITED
|
||||
#define BBR_JR_ASSESSING CTF_JR_ASSESSING
|
||||
#define BBR_JR_PERSISTS CTF_JR_PERSISTS
|
||||
#define BBR_JR_PRR CTF_JR_PRR
|
||||
|
||||
/* RTT sample methods */
|
||||
#define USE_RTT_HIGH 0
|
||||
@ -59,6 +82,13 @@
|
||||
#define USEC_TO_MSEC(x) (x / MS_IN_USEC)
|
||||
#define TCP_TS_OVERHEAD 12 /* Overhead of having Timestamps on */
|
||||
|
||||
/* Bits per second in bytes per second */
|
||||
#define FORTY_EIGHT_MBPS 6000000 /* 48 megabits in bytes */
|
||||
#define THIRTY_MBPS 3750000 /* 30 megabits in bytes */
|
||||
#define TWENTY_THREE_MBPS 2896000
|
||||
#define FIVETWELVE_MBPS 64000000 /* 512 megabits in bytes */
|
||||
#define ONE_POINT_TWO_MEG 150000 /* 1.2 megabits in bytes */
|
||||
|
||||
#ifdef _KERNEL
|
||||
/* We have only 7 bits in rack so assert its true */
|
||||
CTASSERT((PACE_TMR_MASK & 0x80) == 0);
|
||||
|
@ -25,11 +25,16 @@
|
||||
*/
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
#ifndef _KERNEL
|
||||
#define _WANT_TCPCB 1
|
||||
#endif
|
||||
#include <sys/types.h>
|
||||
#include <sys/queue.h>
|
||||
#include <sys/socket.h>
|
||||
#ifdef _KERNEL
|
||||
#include <sys/mbuf.h>
|
||||
#include <sys/sockopt.h>
|
||||
#endif
|
||||
#include <netinet/tcp.h>
|
||||
#include <netinet/tcp_var.h>
|
||||
#include <netinet/tcp_seq.h>
|
||||
|
@ -128,12 +128,6 @@ TAILQ_HEAD(bbr_head, bbr_sendmap);
|
||||
* an clear to start measuring */
|
||||
#define BBR_RED_BW_PE_NOEARLY_OUT 7 /* Set pkt epoch judged that we do not
|
||||
* get out of jail early */
|
||||
/* codes for just-return */
|
||||
#define BBR_JR_SENT_DATA 0
|
||||
#define BBR_JR_CWND_LIMITED 1
|
||||
#define BBR_JR_RWND_LIMITED 2
|
||||
#define BBR_JR_APP_LIMITED 3
|
||||
#define BBR_JR_ASSESSING 4
|
||||
/* For calculating a rate */
|
||||
#define BBR_CALC_BW 1
|
||||
#define BBR_CALC_LOSS 2
|
||||
@ -385,13 +379,6 @@ struct bbr_log_sysctl_out {
|
||||
|
||||
#define BBR_BIG_LOG_SIZE 300000
|
||||
|
||||
/* Bits per second in bytes per second */
|
||||
#define FORTY_EIGHT_MBPS 6000000 /* 48 megabits in bytes */
|
||||
#define THIRTY_MBPS 3750000 /* 30 megabits in bytes */
|
||||
#define TWENTY_THREE_MBPS 2896000
|
||||
#define FIVETWELVE_MBPS 64000000 /* 512 megabits in bytes */
|
||||
#define ONE_POINT_TWO_MEG 150000 /* 1.2 megabits in bytes */
|
||||
|
||||
struct bbr_stats {
|
||||
uint64_t bbr_badfr; /* 0 */
|
||||
uint64_t bbr_badfr_bytes; /* 1 */
|
||||
|
@ -137,7 +137,7 @@ struct rack_opts_stats {
|
||||
uint64_t tcp_rack_min_pace_seg;
|
||||
uint64_t tcp_rack_min_pace;
|
||||
uint64_t tcp_rack_cheat;
|
||||
uint64_t tcp_rack_no_sack;
|
||||
uint64_t tcp_rack_do_detection;
|
||||
};
|
||||
|
||||
#define TLP_USE_ID 1 /* Internet draft behavior */
|
||||
|
@ -138,6 +138,58 @@ VNET_DEFINE(int, tcp_mssdflt) = TCP_MSS;
|
||||
VNET_DEFINE(int, tcp_v6mssdflt) = TCP6_MSS;
|
||||
#endif
|
||||
|
||||
#ifdef NETFLIX_EXP_DETECTION
|
||||
/* Sack attack detection thresholds and such */
|
||||
SYSCTL_NODE(_net_inet_tcp, OID_AUTO, sack_attack, CTLFLAG_RW, 0,
|
||||
"Sack Attack detection thresholds");
|
||||
int32_t tcp_force_detection = 0;
|
||||
SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, force_detection,
|
||||
CTLFLAG_RW,
|
||||
&tcp_force_detection, 0,
|
||||
"Do we force detection even if the INP has it off?");
|
||||
int32_t tcp_sack_to_ack_thresh = 700; /* 70 % */
|
||||
SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, sack_to_ack_thresh,
|
||||
CTLFLAG_RW,
|
||||
&tcp_sack_to_ack_thresh, 700,
|
||||
"Percentage of sacks to acks we must see above (10.1 percent is 101)?");
|
||||
int32_t tcp_sack_to_move_thresh = 600; /* 60 % */
|
||||
SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, move_thresh,
|
||||
CTLFLAG_RW,
|
||||
&tcp_sack_to_move_thresh, 600,
|
||||
"Percentage of sack moves we must see above (10.1 percent is 101)");
|
||||
int32_t tcp_restoral_thresh = 650; /* 65 % (sack:2:ack -5%) */
|
||||
SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, restore_thresh,
|
||||
CTLFLAG_RW,
|
||||
&tcp_restoral_thresh, 550,
|
||||
"Percentage of sack to ack percentage we must see below to restore(10.1 percent is 101)");
|
||||
int32_t tcp_sad_decay_val = 800;
|
||||
SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, decay_per,
|
||||
CTLFLAG_RW,
|
||||
&tcp_sad_decay_val, 800,
|
||||
"The decay percentage (10.1 percent equals 101 )");
|
||||
int32_t tcp_map_minimum = 500;
|
||||
SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, nummaps,
|
||||
CTLFLAG_RW,
|
||||
&tcp_map_minimum, 500,
|
||||
"Number of Map enteries before we start detection");
|
||||
int32_t tcp_attack_on_turns_on_logging = 0;
|
||||
SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, attacks_logged,
|
||||
CTLFLAG_RW,
|
||||
&tcp_attack_on_turns_on_logging, 0,
|
||||
"When we have a positive hit on attack, do we turn on logging?");
|
||||
int32_t tcp_sad_pacing_interval = 2000;
|
||||
SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, sad_pacing_int,
|
||||
CTLFLAG_RW,
|
||||
&tcp_sad_pacing_interval, 2000,
|
||||
"What is the minimum pacing interval for a classified attacker?");
|
||||
|
||||
int32_t tcp_sad_low_pps = 100;
|
||||
SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, sad_low_pps,
|
||||
CTLFLAG_RW,
|
||||
&tcp_sad_low_pps, 100,
|
||||
"What is the input pps that below which we do not decay?");
|
||||
#endif
|
||||
|
||||
struct rwlock tcp_function_lock;
|
||||
|
||||
static int
|
||||
@ -240,6 +292,34 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, soreceive_stream, CTLFLAG_RDTUN,
|
||||
|
||||
VNET_DEFINE(uma_zone_t, sack_hole_zone);
|
||||
#define V_sack_hole_zone VNET(sack_hole_zone)
|
||||
VNET_DEFINE(uint32_t, tcp_map_entries_limit) = 0; /* unlimited */
|
||||
static int
|
||||
sysctl_net_inet_tcp_map_limit_check(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
int error;
|
||||
uint32_t new;
|
||||
|
||||
new = V_tcp_map_entries_limit;
|
||||
error = sysctl_handle_int(oidp, &new, 0, req);
|
||||
if (error == 0 && req->newptr) {
|
||||
/* only allow "0" and value > minimum */
|
||||
if (new > 0 && new < TCP_MIN_MAP_ENTRIES_LIMIT)
|
||||
error = EINVAL;
|
||||
else
|
||||
V_tcp_map_entries_limit = new;
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
SYSCTL_PROC(_net_inet_tcp, OID_AUTO, map_limit,
|
||||
CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW,
|
||||
&VNET_NAME(tcp_map_entries_limit), 0,
|
||||
&sysctl_net_inet_tcp_map_limit_check, "IU",
|
||||
"Total sendmap entries limit");
|
||||
|
||||
VNET_DEFINE(uint32_t, tcp_map_split_limit) = 0; /* unlimited */
|
||||
SYSCTL_UINT(_net_inet_tcp, OID_AUTO, split_limit, CTLFLAG_VNET | CTLFLAG_RW,
|
||||
&VNET_NAME(tcp_map_split_limit), 0,
|
||||
"Total sendmap split entries limit");
|
||||
|
||||
#ifdef TCP_HHOOK
|
||||
VNET_DEFINE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST+1]);
|
||||
|
@ -235,6 +235,9 @@ struct tcptemp {
|
||||
struct tcphdr tt_t;
|
||||
};
|
||||
|
||||
/* Minimum map entries limit value, if set */
|
||||
#define TCP_MIN_MAP_ENTRIES_LIMIT 128
|
||||
|
||||
/*
|
||||
* TODO: We yet need to brave plowing in
|
||||
* to tcp_input() and the pru_usrreq() block.
|
||||
@ -790,6 +793,8 @@ VNET_DECLARE(int, tcp_ecn_maxretries);
|
||||
VNET_DECLARE(int, tcp_initcwnd_segments);
|
||||
VNET_DECLARE(int, tcp_insecure_rst);
|
||||
VNET_DECLARE(int, tcp_insecure_syn);
|
||||
VNET_DECLARE(uint32_t, tcp_map_entries_limit);
|
||||
VNET_DECLARE(uint32_t, tcp_map_split_limit);
|
||||
VNET_DECLARE(int, tcp_minmss);
|
||||
VNET_DECLARE(int, tcp_mssdflt);
|
||||
#ifdef STATS
|
||||
@ -830,6 +835,8 @@ VNET_DECLARE(struct inpcbinfo, tcbinfo);
|
||||
#define V_tcp_initcwnd_segments VNET(tcp_initcwnd_segments)
|
||||
#define V_tcp_insecure_rst VNET(tcp_insecure_rst)
|
||||
#define V_tcp_insecure_syn VNET(tcp_insecure_syn)
|
||||
#define V_tcp_map_entries_limit VNET(tcp_map_entries_limit)
|
||||
#define V_tcp_map_split_limit VNET(tcp_map_split_limit)
|
||||
#define V_tcp_minmss VNET(tcp_minmss)
|
||||
#define V_tcp_mssdflt VNET(tcp_mssdflt)
|
||||
#ifdef STATS
|
||||
@ -845,7 +852,6 @@ VNET_DECLARE(struct inpcbinfo, tcbinfo);
|
||||
#define V_tcp_udp_tunneling_overhead VNET(tcp_udp_tunneling_overhead)
|
||||
#define V_tcp_udp_tunneling_port VNET(tcp_udp_tunneling_port)
|
||||
|
||||
|
||||
#ifdef TCP_HHOOK
|
||||
VNET_DECLARE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST + 1]);
|
||||
#define V_tcp_hhh VNET(tcp_hhh)
|
||||
@ -915,6 +921,19 @@ extern counter_u64_t tcp_inp_lro_single_push;
|
||||
extern counter_u64_t tcp_inp_lro_locks_taken;
|
||||
extern counter_u64_t tcp_inp_lro_sack_wake;
|
||||
|
||||
#ifdef NETFLIX_EXP_DETECTION
|
||||
/* Various SACK attack thresholds */
|
||||
extern int32_t tcp_force_detection;
|
||||
extern int32_t tcp_sack_to_ack_thresh;
|
||||
extern int32_t tcp_sack_to_move_thresh;
|
||||
extern int32_t tcp_restoral_thresh;
|
||||
extern int32_t tcp_sad_decay_val;
|
||||
extern int32_t tcp_sad_pacing_interval;
|
||||
extern int32_t tcp_sad_low_pps;
|
||||
extern int32_t tcp_map_minimum;
|
||||
extern int32_t tcp_attack_on_turns_on_logging;
|
||||
#endif
|
||||
|
||||
uint32_t tcp_maxmtu(struct in_conninfo *, struct tcp_ifcap *);
|
||||
uint32_t tcp_maxmtu6(struct in_conninfo *, struct tcp_ifcap *);
|
||||
u_int tcp_maxseg(const struct tcpcb *);
|
||||
|
Loading…
Reference in New Issue
Block a user