This commit is a bit of a re-arrange of deck chairs. It

gets both rack and bbr ready for the completion of the STATs
framework in FreeBSD. For now if you don't have both NF_stats and
stats on it disables them. As soon as the rest of the stats framework
lands we can remove that restriction and then just uses stats when
defined.

Sponsored by:	Netflix Inc.
Differential Revision:	https://reviews.freebsd.org/D22479
This commit is contained in:
Randall Stewart 2019-12-17 16:08:07 +00:00
parent c7b0edf219
commit 1cf55767b8
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=355859
9 changed files with 187 additions and 75 deletions

View File

@ -1,5 +1,5 @@
/*-
* Copyright (c) 2016-2019
* Copyright (c) 2016-9
* Netflix Inc.
* All rights reserved.
*
@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
#include "opt_ratelimit.h"
#include "opt_kern_tls.h"
#include <sys/param.h>
#include <sys/arb.h>
#include <sys/module.h>
#include <sys/kernel.h>
#ifdef TCP_HHOOK
@ -57,9 +58,9 @@ __FBSDID("$FreeBSD$");
#endif
#include <sys/sysctl.h>
#include <sys/systm.h>
#ifdef STATS
#include <sys/qmath.h>
#include <sys/tree.h>
#ifdef NETFLIX_STATS
#include <sys/stats.h> /* Must come after qmath.h and tree.h */
#endif
#include <sys/refcount.h>
@ -161,8 +162,7 @@ static int32_t bbr_num_pktepo_for_del_limit = BBR_NUM_RTTS_FOR_DEL_LIMIT;
static int32_t bbr_hardware_pacing_limit = 8000;
static int32_t bbr_quanta = 3; /* How much extra quanta do we get? */
static int32_t bbr_no_retran = 0;
static int32_t bbr_tcp_map_entries_limit = 1500;
static int32_t bbr_tcp_map_split_limit = 256;
static int32_t bbr_error_base_paceout = 10000; /* usec to pace */
static int32_t bbr_max_net_error_cnt = 10;
@ -3381,8 +3381,8 @@ bbr_alloc(struct tcp_bbr *bbr)
static struct bbr_sendmap *
bbr_alloc_full_limit(struct tcp_bbr *bbr)
{
if ((bbr_tcp_map_entries_limit > 0) &&
(bbr->r_ctl.rc_num_maps_alloced >= bbr_tcp_map_entries_limit)) {
if ((V_tcp_map_entries_limit > 0) &&
(bbr->r_ctl.rc_num_maps_alloced >= V_tcp_map_entries_limit)) {
BBR_STAT_INC(bbr_alloc_limited);
if (!bbr->alloc_limit_reported) {
bbr->alloc_limit_reported = 1;
@ -3402,8 +3402,8 @@ bbr_alloc_limit(struct tcp_bbr *bbr, uint8_t limit_type)
if (limit_type) {
/* currently there is only one limit type */
if (bbr_tcp_map_split_limit > 0 &&
bbr->r_ctl.rc_num_split_allocs >= bbr_tcp_map_split_limit) {
if (V_tcp_map_split_limit > 0 &&
bbr->r_ctl.rc_num_split_allocs >= V_tcp_map_split_limit) {
BBR_STAT_INC(bbr_split_limited);
if (!bbr->alloc_limit_reported) {
bbr->alloc_limit_reported = 1;
@ -3685,7 +3685,7 @@ bbr_ack_received(struct tcpcb *tp, struct tcp_bbr *bbr, struct tcphdr *th, uint3
uint32_t cwnd, target_cwnd, saved_bytes, maxseg;
int32_t meth;
#ifdef NETFLIX_STATS
#ifdef STATS
if ((tp->t_flags & TF_GPUTINPROG) &&
SEQ_GEQ(th->th_ack, tp->gput_ack)) {
/*
@ -6510,7 +6510,7 @@ tcp_bbr_xmit_timer_commit(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts)
}
TCPSTAT_INC(tcps_rttupdated);
tp->t_rttupdated++;
#ifdef NETFLIX_STATS
#ifdef STATS
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RTT, imax(0, rtt_ticks));
#endif
/*
@ -8490,6 +8490,7 @@ bbr_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
return (0);
}
}
#endif
if (DELAY_ACK(tp, bbr, nsegs) || tfo_syn) {
bbr->bbr_segs_rcvd += max(1, nsegs);
@ -8698,6 +8699,7 @@ bbr_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so,
* reassembly queue and we have enough buffer space to take it.
*/
nsegs = max(1, m->m_pkthdr.lro_nsegs);
#ifdef NETFLIX_SB_LIMITS
if (so->so_rcv.sb_shlim) {
mcnt = m_memcnt(m);
@ -8746,6 +8748,7 @@ bbr_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so,
newsize, so, NULL))
so->so_rcv.sb_flags &= ~SB_AUTOSIZE;
m_adj(m, drop_hdrlen); /* delayed header drop */
#ifdef NETFLIX_SB_LIMITS
appended =
#endif
@ -11561,7 +11564,7 @@ bbr_do_segment_nounlock(struct mbuf *m, struct tcphdr *th, struct socket *so,
* the scale is zero.
*/
tiwin = th->th_win << tp->snd_scale;
#ifdef NETFLIX_STATS
#ifdef STATS
stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_FRWIN, tiwin);
#endif
/*
@ -11960,7 +11963,7 @@ bbr_do_send_accounting(struct tcpcb *tp, struct tcp_bbr *bbr, struct bbr_sendmap
if ((tp->t_flags & TF_FORCEDATA) && len == 1) {
/* Window probe */
TCPSTAT_INC(tcps_sndprobe);
#ifdef NETFLIX_STATS
#ifdef STATS
stats_voi_update_abs_u32(tp->t_stats,
VOI_TCP_RETXPB, len);
#endif
@ -11981,7 +11984,7 @@ bbr_do_send_accounting(struct tcpcb *tp, struct tcp_bbr *bbr, struct bbr_sendmap
tp->t_sndrexmitpack++;
TCPSTAT_INC(tcps_sndrexmitpack);
TCPSTAT_ADD(tcps_sndrexmitbyte, len);
#ifdef NETFLIX_STATS
#ifdef STATS
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RETXPB,
len);
#endif
@ -12017,7 +12020,7 @@ bbr_do_send_accounting(struct tcpcb *tp, struct tcp_bbr *bbr, struct bbr_sendmap
/* Place in 17's the total sent */
counter_u64_add(bbr_state_resend[17], len);
counter_u64_add(bbr_state_lost[17], len);
#ifdef NETFLIX_STATS
#ifdef STATS
stats_voi_update_abs_u64(tp->t_stats, VOI_TCP_TXPB,
len);
#endif
@ -12517,8 +12520,8 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
* as long as we are not retransmiting.
*/
if ((rsm == NULL) &&
(bbr_tcp_map_entries_limit > 0) &&
(bbr->r_ctl.rc_num_maps_alloced >= bbr_tcp_map_entries_limit)) {
(V_tcp_map_entries_limit > 0) &&
(bbr->r_ctl.rc_num_maps_alloced >= V_tcp_map_entries_limit)) {
BBR_STAT_INC(bbr_alloc_limited);
if (!bbr->alloc_limit_reported) {
bbr->alloc_limit_reported = 1;
@ -13256,7 +13259,6 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
SOCKBUF_UNLOCK(&so->so_snd);
return (EHOSTUNREACH);
}
hdrlen += sizeof(struct udphdr);
}
#endif
@ -14276,7 +14278,7 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
bbr_start_hpts_timer(bbr, tp, cts, 11, slot, 0);
return (error);
}
#ifdef NETFLIX_STATS
#ifdef STATS
} else if (((tp->t_flags & TF_GPUTINPROG) == 0) &&
len &&
(rsm == NULL) &&

View File

@ -1,5 +1,5 @@
/*-
* Copyright (c) 2016-2019 Netflix, Inc.
* Copyright (c) 2016-9 Netflix, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$");
#include "opt_ratelimit.h"
#include "opt_kern_tls.h"
#include <sys/param.h>
#include <sys/arb.h>
#include <sys/module.h>
#include <sys/kernel.h>
#ifdef TCP_HHOOK
@ -52,7 +53,7 @@ __FBSDID("$FreeBSD$");
#endif
#include <sys/sysctl.h>
#include <sys/systm.h>
#ifdef NETFLIX_STATS
#ifdef STATS
#include <sys/qmath.h>
#include <sys/tree.h>
#include <sys/stats.h> /* Must come after qmath.h and tree.h */
@ -187,21 +188,6 @@ static int32_t rack_persist_max = 1000; /* 1 Second */
static int32_t rack_sack_not_required = 0; /* set to one to allow non-sack to use rack */
static int32_t rack_hw_tls_max_seg = 0; /* 0 means use hw-tls single segment */
/* Sack attack detection thresholds and such */
static int32_t tcp_force_detection = 0;
#ifdef NETFLIX_EXP_DETECTION
static int32_t tcp_sack_to_ack_thresh = 700; /* 70 % */
static int32_t tcp_sack_to_move_thresh = 600; /* 60 % */
static int32_t tcp_restoral_thresh = 650; /* 65 % (sack:2:ack -5%) */
static int32_t tcp_attack_on_turns_on_logging = 0;
static int32_t tcp_map_minimum = 500;
#endif
static int32_t tcp_sad_decay_val = 800;
static int32_t tcp_sad_pacing_interval = 2000;
static int32_t tcp_sad_low_pps = 100;
/*
* Currently regular tcp has a rto_min of 30ms
* the backoff goes 12 times so that ends up
@ -226,9 +212,6 @@ static int32_t rack_always_send_oldest = 0;
static int32_t rack_use_sack_filter = 1;
static int32_t rack_tlp_threshold_use = TLP_USE_TWO_ONE;
static int32_t rack_per_of_gp = 50;
static int32_t rack_tcp_map_entries_limit = 1500;
static int32_t rack_tcp_map_split_limit = 256;
/* Rack specific counters */
counter_u64_t rack_badfr;
@ -1577,9 +1560,9 @@ rack_alloc(struct tcp_rack *rack)
static struct rack_sendmap *
rack_alloc_full_limit(struct tcp_rack *rack)
{
if ((rack_tcp_map_entries_limit > 0) &&
if ((V_tcp_map_entries_limit > 0) &&
(rack->do_detection == 0) &&
(rack->r_ctl.rc_num_maps_alloced >= rack_tcp_map_entries_limit)) {
(rack->r_ctl.rc_num_maps_alloced >= V_tcp_map_entries_limit)) {
counter_u64_add(rack_to_alloc_limited, 1);
if (!rack->alloc_limit_reported) {
rack->alloc_limit_reported = 1;
@ -1598,9 +1581,9 @@ rack_alloc_limit(struct tcp_rack *rack, uint8_t limit_type)
if (limit_type) {
/* currently there is only one limit type */
if (rack_tcp_map_split_limit > 0 &&
if (V_tcp_map_split_limit > 0 &&
(rack->do_detection == 0) &&
rack->r_ctl.rc_num_split_allocs >= rack_tcp_map_split_limit) {
rack->r_ctl.rc_num_split_allocs >= V_tcp_map_split_limit) {
counter_u64_add(rack_split_limited, 1);
if (!rack->alloc_limit_reported) {
rack->alloc_limit_reported = 1;
@ -1648,7 +1631,7 @@ static void
rack_ack_received(struct tcpcb *tp, struct tcp_rack *rack, struct tcphdr *th, uint16_t nsegs,
uint16_t type, int32_t recovery)
{
#ifdef NETFLIX_STATS
#ifdef STATS
int32_t gput;
#endif
@ -1671,7 +1654,7 @@ rack_ack_received(struct tcpcb *tp, struct tcp_rack *rack, struct tcphdr *th, ui
tp->ccv->flags &= ~CCF_CWND_LIMITED;
if (type == CC_ACK) {
#ifdef NETFLIX_STATS
#ifdef STATS
stats_voi_update_abs_s32(tp->t_stats, VOI_TCP_CALCFRWINDIFF,
((int32_t) tp->snd_cwnd) - tp->snd_wnd);
if ((tp->t_flags & TF_GPUTINPROG) &&
@ -1725,7 +1708,7 @@ rack_ack_received(struct tcpcb *tp, struct tcp_rack *rack, struct tcphdr *th, ui
tp->ccv->curack = th->th_ack;
CC_ALGO(tp)->ack_received(tp->ccv, type);
}
#ifdef NETFLIX_STATS
#ifdef STATS
stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_LCWIN, tp->snd_cwnd);
#endif
if (rack->r_ctl.rc_rack_largest_cwnd < tp->snd_cwnd) {
@ -2436,6 +2419,7 @@ rack_start_hpts_timer(struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
slot = 1;
}
hpts_timeout = rack_timer_start(tp, rack, cts, sup_rack);
#ifdef NETFLIX_EXP_DETECTION
if (rack->sack_attack_disable &&
(slot < USEC_TO_MSEC(tcp_sad_pacing_interval))) {
/*
@ -2450,6 +2434,7 @@ rack_start_hpts_timer(struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
*/
slot = USEC_TO_MSEC(tcp_sad_pacing_interval);
}
#endif
if (tp->t_flags & TF_DELACK) {
delayed_ack = TICKS_2_MSEC(tcp_delacktime);
rack->r_ctl.rc_hpts_flags |= PACE_TMR_DELACK;
@ -3776,7 +3761,8 @@ rack_log_output(struct tcpcb *tp, struct tcpopt *to, int32_t len,
seq_out = rack_update_entry(tp, rack, nrsm, ts, &len);
if (len == 0) {
return;
}
} else if (len > 0)
goto refind;
}
}
/*
@ -3912,7 +3898,7 @@ tcp_rack_xmit_timer_commit(struct tcp_rack *rack, struct tcpcb *tp)
TCPSTAT_INC(tcps_rttupdated);
rack_log_rtt_upd(tp, rack, rtt, o_srtt, o_var);
tp->t_rttupdated++;
#ifdef NETFLIX_STATS
#ifdef STATS
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RTT, imax(0, rtt));
#endif
tp->t_rxtshift = 0;
@ -4620,6 +4606,7 @@ rack_peer_reneges(struct tcp_rack *rack, struct rack_sendmap *rsm, tcp_seq th_ac
static void
rack_do_decay(struct tcp_rack *rack)
{
#ifdef NETFLIX_EXP_DETECTION
struct timeval res;
#define timersub(tvp, uvp, vvp) \
@ -4670,6 +4657,7 @@ rack_do_decay(struct tcp_rack *rack)
rack->r_ctl.sack_noextra_move = ctf_decay_count(rack->r_ctl.sack_noextra_move,
tcp_sad_decay_val);
}
#endif
}
static void
@ -7406,9 +7394,11 @@ rack_init(struct tcpcb *tp)
rack->r_ctl.rc_last_time_decay = rack->r_ctl.rc_last_ack;
rack->r_ctl.rc_tlp_rxt_last_time = tcp_ts_getticks();
/* Do we force on detection? */
#ifdef NETFLIX_EXP_DETECTION
if (tcp_force_detection)
rack->do_detection = 1;
else
#endif
rack->do_detection = 0;
if (tp->snd_una != tp->snd_max) {
/* Create a send map for the current outstanding data */
@ -7701,7 +7691,7 @@ rack_do_segment_nounlock(struct mbuf *m, struct tcphdr *th, struct socket *so,
* the scale is zero.
*/
tiwin = th->th_win << tp->snd_scale;
#ifdef NETFLIX_STATS
#ifdef STATS
stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_FRWIN, tiwin);
#endif
if (tiwin > rack->r_ctl.rc_high_rwnd)
@ -8390,8 +8380,8 @@ rack_output(struct tcpcb *tp)
*/
if ((rsm == NULL) &&
(rack->do_detection == 0) &&
(rack_tcp_map_entries_limit > 0) &&
(rack->r_ctl.rc_num_maps_alloced >= rack_tcp_map_entries_limit)) {
(V_tcp_map_entries_limit > 0) &&
(rack->r_ctl.rc_num_maps_alloced >= V_tcp_map_entries_limit)) {
counter_u64_add(rack_to_alloc_limited, 1);
if (!rack->alloc_limit_reported) {
rack->alloc_limit_reported = 1;
@ -9318,7 +9308,7 @@ rack_output(struct tcpcb *tp)
}
if ((tp->t_flags & TF_FORCEDATA) && len == 1) {
TCPSTAT_INC(tcps_sndprobe);
#ifdef NETFLIX_STATS
#ifdef STATS
if (SEQ_LT(tp->snd_nxt, tp->snd_max))
stats_voi_update_abs_u32(tp->t_stats,
VOI_TCP_RETXPB, len);
@ -9339,14 +9329,14 @@ rack_output(struct tcpcb *tp)
TCPSTAT_INC(tcps_sndrexmitpack);
TCPSTAT_ADD(tcps_sndrexmitbyte, len);
}
#ifdef NETFLIX_STATS
#ifdef STATS
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RETXPB,
len);
#endif
} else {
TCPSTAT_INC(tcps_sndpack);
TCPSTAT_ADD(tcps_sndbyte, len);
#ifdef NETFLIX_STATS
#ifdef STATS
stats_voi_update_abs_u64(tp->t_stats, VOI_TCP_TXPB,
len);
#endif
@ -9929,7 +9919,7 @@ rack_output(struct tcpcb *tp)
tp->t_rtseq = startseq;
TCPSTAT_INC(tcps_segstimed);
}
#ifdef NETFLIX_STATS
#ifdef STATS
if (!(tp->t_flags & TF_GPUTINPROG) && len) {
tp->t_flags |= TF_GPUTINPROG;
tp->gput_seq = startseq;
@ -10142,7 +10132,7 @@ rack_set_sockopt(struct socket *so, struct sockopt *sopt,
rack = (struct tcp_rack *)tp->t_fb_ptr;
switch (sopt->sopt_name) {
case TCP_RACK_DO_DETECTION:
RACK_OPTS_INC(tcp_rack_no_sack);
RACK_OPTS_INC(tcp_rack_do_detection);
if (optval == 0)
rack->do_detection = 0;
else

View File

@ -1,5 +1,5 @@
/*-
* Copyright (c) 2016-2018
* Copyright (c) 2016-9
* Netflix Inc.
* All rights reserved.
*
@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
#include "opt_ratelimit.h"
#include "opt_kern_tls.h"
#include <sys/param.h>
#include <sys/arb.h>
#include <sys/module.h>
#include <sys/kernel.h>
#ifdef TCP_HHOOK
@ -133,8 +134,6 @@ __FBSDID("$FreeBSD$");
* Common TCP Functions - These are shared by borth
* rack and BBR.
*/
#ifdef KERN_TLS
uint32_t
ctf_get_opt_tls_size(struct socket *so, uint32_t rwnd)

View File

@ -1,5 +1,5 @@
#ifndef __pacer_timer_h__
#define __pacer_timer_h__
#ifndef __rack_bbr_common_h__
#define __rack_bbr_common_h__
/*-
* Copyright (c) 2017-9 Netflix, Inc.
*
@ -26,6 +26,12 @@
*
* __FBSDID("$FreeBSD$");
*/
/* XXXLAS: Couple STATS to NETFLIX_STATS until stats(3) is fully upstreamed. */
#ifndef NETFLIX_STATS
#undef STATS
#endif
/* Common defines and such used by both RACK and BBR */
/* Special values for mss accounting array */
#define TCP_MSS_ACCT_JUSTRET 0
@ -46,6 +52,23 @@
#define PROGRESS_CLEAR 3
#define PROGRESS_START 4
/* codes for just-return */
#define CTF_JR_SENT_DATA 0
#define CTF_JR_CWND_LIMITED 1
#define CTF_JR_RWND_LIMITED 2
#define CTF_JR_APP_LIMITED 3
#define CTF_JR_ASSESSING 4
#define CTF_JR_PERSISTS 5
#define CTF_JR_PRR 6
/* Compat. */
#define BBR_JR_SENT_DATA CTF_JR_SENT_DATA
#define BBR_JR_CWND_LIMITED CTF_JR_CWND_LIMITED
#define BBR_JR_RWND_LIMITED CTF_JR_RWND_LIMITED
#define BBR_JR_APP_LIMITED CTF_JR_APP_LIMITED
#define BBR_JR_ASSESSING CTF_JR_ASSESSING
#define BBR_JR_PERSISTS CTF_JR_PERSISTS
#define BBR_JR_PRR CTF_JR_PRR
/* RTT sample methods */
#define USE_RTT_HIGH 0
@ -59,6 +82,13 @@
#define USEC_TO_MSEC(x) (x / MS_IN_USEC)
#define TCP_TS_OVERHEAD 12 /* Overhead of having Timestamps on */
/* Bits per second in bytes per second */
#define FORTY_EIGHT_MBPS 6000000 /* 48 megabits in bytes */
#define THIRTY_MBPS 3750000 /* 30 megabits in bytes */
#define TWENTY_THREE_MBPS 2896000
#define FIVETWELVE_MBPS 64000000 /* 512 megabits in bytes */
#define ONE_POINT_TWO_MEG 150000 /* 1.2 megabits in bytes */
#ifdef _KERNEL
/* We have only 7 bits in rack so assert its true */
CTASSERT((PACE_TMR_MASK & 0x80) == 0);

View File

@ -25,11 +25,16 @@
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#ifndef _KERNEL
#define _WANT_TCPCB 1
#endif
#include <sys/types.h>
#include <sys/queue.h>
#include <sys/socket.h>
#ifdef _KERNEL
#include <sys/mbuf.h>
#include <sys/sockopt.h>
#endif
#include <netinet/tcp.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_seq.h>

View File

@ -128,12 +128,6 @@ TAILQ_HEAD(bbr_head, bbr_sendmap);
* an clear to start measuring */
#define BBR_RED_BW_PE_NOEARLY_OUT 7 /* Set pkt epoch judged that we do not
* get out of jail early */
/* codes for just-return */
#define BBR_JR_SENT_DATA 0
#define BBR_JR_CWND_LIMITED 1
#define BBR_JR_RWND_LIMITED 2
#define BBR_JR_APP_LIMITED 3
#define BBR_JR_ASSESSING 4
/* For calculating a rate */
#define BBR_CALC_BW 1
#define BBR_CALC_LOSS 2
@ -385,13 +379,6 @@ struct bbr_log_sysctl_out {
#define BBR_BIG_LOG_SIZE 300000
/* Bits per second in bytes per second */
#define FORTY_EIGHT_MBPS 6000000 /* 48 megabits in bytes */
#define THIRTY_MBPS 3750000 /* 30 megabits in bytes */
#define TWENTY_THREE_MBPS 2896000
#define FIVETWELVE_MBPS 64000000 /* 512 megabits in bytes */
#define ONE_POINT_TWO_MEG 150000 /* 1.2 megabits in bytes */
struct bbr_stats {
uint64_t bbr_badfr; /* 0 */
uint64_t bbr_badfr_bytes; /* 1 */

View File

@ -137,7 +137,7 @@ struct rack_opts_stats {
uint64_t tcp_rack_min_pace_seg;
uint64_t tcp_rack_min_pace;
uint64_t tcp_rack_cheat;
uint64_t tcp_rack_no_sack;
uint64_t tcp_rack_do_detection;
};
#define TLP_USE_ID 1 /* Internet draft behavior */

View File

@ -138,6 +138,58 @@ VNET_DEFINE(int, tcp_mssdflt) = TCP_MSS;
VNET_DEFINE(int, tcp_v6mssdflt) = TCP6_MSS;
#endif
#ifdef NETFLIX_EXP_DETECTION
/* Sack attack detection thresholds and such */
SYSCTL_NODE(_net_inet_tcp, OID_AUTO, sack_attack, CTLFLAG_RW, 0,
"Sack Attack detection thresholds");
int32_t tcp_force_detection = 0;
SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, force_detection,
CTLFLAG_RW,
&tcp_force_detection, 0,
"Do we force detection even if the INP has it off?");
int32_t tcp_sack_to_ack_thresh = 700; /* 70 % */
SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, sack_to_ack_thresh,
CTLFLAG_RW,
&tcp_sack_to_ack_thresh, 700,
"Percentage of sacks to acks we must see above (10.1 percent is 101)?");
int32_t tcp_sack_to_move_thresh = 600; /* 60 % */
SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, move_thresh,
CTLFLAG_RW,
&tcp_sack_to_move_thresh, 600,
"Percentage of sack moves we must see above (10.1 percent is 101)");
int32_t tcp_restoral_thresh = 650; /* 65 % (sack:2:ack -5%) */
SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, restore_thresh,
CTLFLAG_RW,
&tcp_restoral_thresh, 550,
"Percentage of sack to ack percentage we must see below to restore(10.1 percent is 101)");
int32_t tcp_sad_decay_val = 800;
SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, decay_per,
CTLFLAG_RW,
&tcp_sad_decay_val, 800,
"The decay percentage (10.1 percent equals 101 )");
int32_t tcp_map_minimum = 500;
SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, nummaps,
CTLFLAG_RW,
&tcp_map_minimum, 500,
"Number of Map enteries before we start detection");
int32_t tcp_attack_on_turns_on_logging = 0;
SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, attacks_logged,
CTLFLAG_RW,
&tcp_attack_on_turns_on_logging, 0,
"When we have a positive hit on attack, do we turn on logging?");
int32_t tcp_sad_pacing_interval = 2000;
SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, sad_pacing_int,
CTLFLAG_RW,
&tcp_sad_pacing_interval, 2000,
"What is the minimum pacing interval for a classified attacker?");
int32_t tcp_sad_low_pps = 100;
SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, sad_low_pps,
CTLFLAG_RW,
&tcp_sad_low_pps, 100,
"What is the input pps that below which we do not decay?");
#endif
struct rwlock tcp_function_lock;
static int
@ -240,6 +292,34 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, soreceive_stream, CTLFLAG_RDTUN,
VNET_DEFINE(uma_zone_t, sack_hole_zone);
#define V_sack_hole_zone VNET(sack_hole_zone)
VNET_DEFINE(uint32_t, tcp_map_entries_limit) = 0; /* unlimited */
static int
sysctl_net_inet_tcp_map_limit_check(SYSCTL_HANDLER_ARGS)
{
int error;
uint32_t new;
new = V_tcp_map_entries_limit;
error = sysctl_handle_int(oidp, &new, 0, req);
if (error == 0 && req->newptr) {
/* only allow "0" and value > minimum */
if (new > 0 && new < TCP_MIN_MAP_ENTRIES_LIMIT)
error = EINVAL;
else
V_tcp_map_entries_limit = new;
}
return (error);
}
SYSCTL_PROC(_net_inet_tcp, OID_AUTO, map_limit,
CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW,
&VNET_NAME(tcp_map_entries_limit), 0,
&sysctl_net_inet_tcp_map_limit_check, "IU",
"Total sendmap entries limit");
VNET_DEFINE(uint32_t, tcp_map_split_limit) = 0; /* unlimited */
SYSCTL_UINT(_net_inet_tcp, OID_AUTO, split_limit, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_map_split_limit), 0,
"Total sendmap split entries limit");
#ifdef TCP_HHOOK
VNET_DEFINE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST+1]);

View File

@ -235,6 +235,9 @@ struct tcptemp {
struct tcphdr tt_t;
};
/* Minimum map entries limit value, if set */
#define TCP_MIN_MAP_ENTRIES_LIMIT 128
/*
* TODO: We yet need to brave plowing in
* to tcp_input() and the pru_usrreq() block.
@ -790,6 +793,8 @@ VNET_DECLARE(int, tcp_ecn_maxretries);
VNET_DECLARE(int, tcp_initcwnd_segments);
VNET_DECLARE(int, tcp_insecure_rst);
VNET_DECLARE(int, tcp_insecure_syn);
VNET_DECLARE(uint32_t, tcp_map_entries_limit);
VNET_DECLARE(uint32_t, tcp_map_split_limit);
VNET_DECLARE(int, tcp_minmss);
VNET_DECLARE(int, tcp_mssdflt);
#ifdef STATS
@ -830,6 +835,8 @@ VNET_DECLARE(struct inpcbinfo, tcbinfo);
#define V_tcp_initcwnd_segments VNET(tcp_initcwnd_segments)
#define V_tcp_insecure_rst VNET(tcp_insecure_rst)
#define V_tcp_insecure_syn VNET(tcp_insecure_syn)
#define V_tcp_map_entries_limit VNET(tcp_map_entries_limit)
#define V_tcp_map_split_limit VNET(tcp_map_split_limit)
#define V_tcp_minmss VNET(tcp_minmss)
#define V_tcp_mssdflt VNET(tcp_mssdflt)
#ifdef STATS
@ -845,7 +852,6 @@ VNET_DECLARE(struct inpcbinfo, tcbinfo);
#define V_tcp_udp_tunneling_overhead VNET(tcp_udp_tunneling_overhead)
#define V_tcp_udp_tunneling_port VNET(tcp_udp_tunneling_port)
#ifdef TCP_HHOOK
VNET_DECLARE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST + 1]);
#define V_tcp_hhh VNET(tcp_hhh)
@ -915,6 +921,19 @@ extern counter_u64_t tcp_inp_lro_single_push;
extern counter_u64_t tcp_inp_lro_locks_taken;
extern counter_u64_t tcp_inp_lro_sack_wake;
#ifdef NETFLIX_EXP_DETECTION
/* Various SACK attack thresholds */
extern int32_t tcp_force_detection;
extern int32_t tcp_sack_to_ack_thresh;
extern int32_t tcp_sack_to_move_thresh;
extern int32_t tcp_restoral_thresh;
extern int32_t tcp_sad_decay_val;
extern int32_t tcp_sad_pacing_interval;
extern int32_t tcp_sad_low_pps;
extern int32_t tcp_map_minimum;
extern int32_t tcp_attack_on_turns_on_logging;
#endif
uint32_t tcp_maxmtu(struct in_conninfo *, struct tcp_ifcap *);
uint32_t tcp_maxmtu6(struct in_conninfo *, struct tcp_ifcap *);
u_int tcp_maxseg(const struct tcpcb *);