Make use of the stats(3) framework in the TCP stack.
This makes it possible to retrieve per-connection statistical information such as the receive window size, RTT, or goodput, using a newly added TCP_STATS getsockopt(3) option, and extract them using the stats_voistat_fetch(3) API. See the net/tcprtt port for an example consumer of this API. Compared to the existing TCP_INFO system, the main differences are that this mechanism is easy to extend without breaking ABI, and provides statistical information instead of raw "snapshots" of values at a given point in time. stats(3) is more generic and can be used in both userland and the kernel. Reviewed by: thj Tested by: thj Obtained from: Netflix Relnotes: yes Sponsored by: Klara Inc, Netflix Differential Revision: https://reviews.freebsd.org/D20655
This commit is contained in:
parent
79c1428ed6
commit
adc56f5a38
@ -3,12 +3,12 @@
|
||||
LIB= stats
|
||||
SHLIBDIR?= /lib
|
||||
SHLIB_MAJOR= 0
|
||||
SRCS= subr_stats.c
|
||||
SRCS= subr_stats.c tcp_stats.c
|
||||
|
||||
# To debug, comment WITHOUT_ASSERT_DEBUG= and uncomment CFLAGS:=
|
||||
WITHOUT_ASSERT_DEBUG=
|
||||
#CFLAGS:=${CFLAGS:C/-O[0-9]/-O0 -g3/} -DDIAGNOSTIC
|
||||
|
||||
.PATH: ${.CURDIR}/../../sys/kern
|
||||
.PATH: ${.CURDIR}/../../sys/kern ${.CURDIR}/../../sys/netinet
|
||||
|
||||
.include <bsd.lib.mk>
|
||||
|
@ -34,7 +34,7 @@
|
||||
.\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd December 1, 2019
|
||||
.Dd December 2, 2019
|
||||
.Dt TCP 4
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -291,6 +291,10 @@ This entry can only be specified on a per-host basis at this time.
|
||||
.Pp
|
||||
If an SADB entry cannot be found for the destination,
|
||||
the system does not send any outgoing segments and drops any inbound segments.
|
||||
.It Dv TCP_STATS
|
||||
Manage collection of connection level statistics using the
|
||||
.Xr stats 3
|
||||
framework.
|
||||
.Pp
|
||||
Each dropped segment is taken into account in the TCP protocol statistics.
|
||||
.It Dv TCP_TXTLS_ENABLE
|
||||
@ -664,6 +668,17 @@ Default is false.
|
||||
When initializing the TCP timestamps, use a per connection offset instead of a
|
||||
per host pair offset.
|
||||
Default is to use per connection offsets as recommended in RFC 7323.
|
||||
.It Va perconn_stats_enable
|
||||
Controls the default collection of statistics for all connections using the
|
||||
.Xr stats 3
|
||||
framework.
|
||||
0 disables, 1 enables, 2 enables random sampling across log id connection
|
||||
groups with all connections in a group receiving the same setting.
|
||||
.It Va perconn_stats_sample_rates
|
||||
A CSV list of template_spec=percent key-value pairs which controls the per
|
||||
template sampling rates when
|
||||
.Xr stats 3
|
||||
sampling is enabled.
|
||||
.El
|
||||
.Sh ERRORS
|
||||
A socket operation may fail with one of the following errors returned:
|
||||
@ -703,6 +718,7 @@ when trying to use a TCP function block that is not available;
|
||||
.Sh SEE ALSO
|
||||
.Xr getsockopt 2 ,
|
||||
.Xr socket 2 ,
|
||||
.Xr stats 3 ,
|
||||
.Xr sysctl 3 ,
|
||||
.Xr blackhole 4 ,
|
||||
.Xr inet 4 ,
|
||||
|
@ -4295,6 +4295,7 @@ netinet/tcp_pcap.c optional inet tcppcap | inet6 tcppcap \
|
||||
compile-with "${NORMAL_C} ${NO_WNONNULL}"
|
||||
netinet/tcp_reass.c optional inet | inet6
|
||||
netinet/tcp_sack.c optional inet | inet6
|
||||
netinet/tcp_stats.c optional stats inet | stats inet6
|
||||
netinet/tcp_subr.c optional inet | inet6
|
||||
netinet/tcp_syncache.c optional inet | inet6
|
||||
netinet/tcp_timer.c optional inet | inet6
|
||||
|
@ -51,9 +51,7 @@
|
||||
#ifndef _NETINET_CC_CC_H_
|
||||
#define _NETINET_CC_CC_H_
|
||||
|
||||
#if !defined(_KERNEL)
|
||||
#error "no user-serviceable parts inside"
|
||||
#endif
|
||||
#ifdef _KERNEL
|
||||
|
||||
/* Global CC vars. */
|
||||
extern STAILQ_HEAD(cc_head, cc_algo) cc_list;
|
||||
@ -108,6 +106,7 @@ struct cc_var {
|
||||
#define CC_DUPACK 0x0002 /* Duplicate ACK. */
|
||||
#define CC_PARTIALACK 0x0004 /* Not yet. */
|
||||
#define CC_SACK 0x0008 /* Not yet. */
|
||||
#endif /* _KERNEL */
|
||||
|
||||
/*
|
||||
* Congestion signal types passed to the cong_signal() hook. The highest order 8
|
||||
@ -121,6 +120,7 @@ struct cc_var {
|
||||
|
||||
#define CC_SIGPRIVMASK 0xFF000000 /* Mask to check if sig is private. */
|
||||
|
||||
#ifdef _KERNEL
|
||||
/*
|
||||
* Structure to hold data and function pointers that together represent a
|
||||
* congestion control algorithm.
|
||||
@ -184,4 +184,5 @@ extern struct rwlock cc_list_lock;
|
||||
|
||||
#define CC_ALGOOPT_LIMIT 2048
|
||||
|
||||
#endif /* _KERNEL */
|
||||
#endif /* _NETINET_CC_CC_H_ */
|
||||
|
@ -168,6 +168,7 @@ struct tcphdr {
|
||||
#define TCP_NOOPT 8 /* don't use TCP options */
|
||||
#define TCP_MD5SIG 16 /* use MD5 digests (RFC2385) */
|
||||
#define TCP_INFO 32 /* retrieve tcp_info structure */
|
||||
#define TCP_STATS 33 /* retrieve stats blob structure */
|
||||
#define TCP_LOG 34 /* configure event logging for connection */
|
||||
#define TCP_LOGBUF 35 /* retrieve event log for connection */
|
||||
#define TCP_LOGID 36 /* configure log ID to correlate connections */
|
||||
@ -364,4 +365,18 @@ struct tcp_function_set {
|
||||
*/
|
||||
#define TLS_SET_RECORD_TYPE 1
|
||||
|
||||
/*
|
||||
* TCP specific variables of interest for tp->t_stats stats(9) accounting.
|
||||
*/
|
||||
#define VOI_TCP_TXPB 0 /* Transmit payload bytes */
|
||||
#define VOI_TCP_RETXPB 1 /* Retransmit payload bytes */
|
||||
#define VOI_TCP_FRWIN 2 /* Foreign receive window */
|
||||
#define VOI_TCP_LCWIN 3 /* Local congesiton window */
|
||||
#define VOI_TCP_RTT 4 /* Round trip time */
|
||||
#define VOI_TCP_CSIG 5 /* Congestion signal */
|
||||
#define VOI_TCP_GPUT 6 /* Goodput */
|
||||
#define VOI_TCP_CALCFRWINDIFF 7 /* Congestion avoidance LCWIN - FRWIN */
|
||||
#define VOI_TCP_GPUT_ND 8 /* Goodput normalised delta */
|
||||
#define VOI_TCP_ACKLEN 9 /* Average ACKed bytes per ACK */
|
||||
|
||||
#endif /* !_NETINET_TCP_H_ */
|
||||
|
@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include "opt_tcpdebug.h"
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/arb.h>
|
||||
#include <sys/kernel.h>
|
||||
#ifdef TCP_HHOOK
|
||||
#include <sys/hhook.h>
|
||||
@ -66,6 +67,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/mbuf.h>
|
||||
#include <sys/proc.h> /* for proc0 declaration */
|
||||
#include <sys/protosw.h>
|
||||
#include <sys/qmath.h>
|
||||
#include <sys/sdt.h>
|
||||
#include <sys/signalvar.h>
|
||||
#include <sys/socket.h>
|
||||
@ -73,6 +75,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/syslog.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/stats.h>
|
||||
|
||||
#include <machine/cpu.h> /* before tcp_seq.h, for tcp_random18() */
|
||||
|
||||
@ -298,6 +301,10 @@ void
|
||||
cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t nsegs,
|
||||
uint16_t type)
|
||||
{
|
||||
#ifdef STATS
|
||||
int32_t gput;
|
||||
#endif
|
||||
|
||||
INP_WLOCK_ASSERT(tp->t_inpcb);
|
||||
|
||||
tp->ccv->nsegs = nsegs;
|
||||
@ -310,6 +317,35 @@ cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t nsegs,
|
||||
tp->ccv->flags &= ~CCF_CWND_LIMITED;
|
||||
|
||||
if (type == CC_ACK) {
|
||||
#ifdef STATS
|
||||
stats_voi_update_abs_s32(tp->t_stats, VOI_TCP_CALCFRWINDIFF,
|
||||
((int32_t)tp->snd_cwnd) - tp->snd_wnd);
|
||||
if (!IN_RECOVERY(tp->t_flags))
|
||||
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_ACKLEN,
|
||||
tp->ccv->bytes_this_ack / (tcp_maxseg(tp) * nsegs));
|
||||
if ((tp->t_flags & TF_GPUTINPROG) &&
|
||||
SEQ_GEQ(th->th_ack, tp->gput_ack)) {
|
||||
/*
|
||||
* Compute goodput in bits per millisecond.
|
||||
*/
|
||||
gput = (((int64_t)(th->th_ack - tp->gput_seq)) << 3) /
|
||||
max(1, tcp_ts_getticks() - tp->gput_ts);
|
||||
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_GPUT,
|
||||
gput);
|
||||
/*
|
||||
* XXXLAS: This is a temporary hack, and should be
|
||||
* chained off VOI_TCP_GPUT when stats(9) grows an API
|
||||
* to deal with chained VOIs.
|
||||
*/
|
||||
if (tp->t_stats_gput_prev > 0)
|
||||
stats_voi_update_abs_s32(tp->t_stats,
|
||||
VOI_TCP_GPUT_ND,
|
||||
((gput - tp->t_stats_gput_prev) * 100) /
|
||||
tp->t_stats_gput_prev);
|
||||
tp->t_flags &= ~TF_GPUTINPROG;
|
||||
tp->t_stats_gput_prev = gput;
|
||||
}
|
||||
#endif /* STATS */
|
||||
if (tp->snd_cwnd > tp->snd_ssthresh) {
|
||||
tp->t_bytes_acked += min(tp->ccv->bytes_this_ack,
|
||||
nsegs * V_tcp_abc_l_var * tcp_maxseg(tp));
|
||||
@ -328,6 +364,9 @@ cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t nsegs,
|
||||
tp->ccv->curack = th->th_ack;
|
||||
CC_ALGO(tp)->ack_received(tp->ccv, type);
|
||||
}
|
||||
#ifdef STATS
|
||||
stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_LCWIN, tp->snd_cwnd);
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
@ -393,6 +432,10 @@ cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type)
|
||||
|
||||
INP_WLOCK_ASSERT(tp->t_inpcb);
|
||||
|
||||
#ifdef STATS
|
||||
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_CSIG, type);
|
||||
#endif
|
||||
|
||||
switch(type) {
|
||||
case CC_NDUPACK:
|
||||
if (!IN_FASTRECOVERY(tp->t_flags)) {
|
||||
@ -1496,6 +1539,9 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
* For the SYN_SENT state the scale is zero.
|
||||
*/
|
||||
tiwin = th->th_win << tp->snd_scale;
|
||||
#ifdef STATS
|
||||
stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_FRWIN, tiwin);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* TCP ECN processing.
|
||||
@ -3359,6 +3405,10 @@ tcp_xmit_timer(struct tcpcb *tp, int rtt)
|
||||
|
||||
TCPSTAT_INC(tcps_rttupdated);
|
||||
tp->t_rttupdated++;
|
||||
#ifdef STATS
|
||||
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RTT,
|
||||
imax(0, rtt * 1000 / hz));
|
||||
#endif
|
||||
if ((tp->t_srtt != 0) && (tp->t_rxtshift <= TCP_RTT_INVALIDATE)) {
|
||||
/*
|
||||
* srtt is stored as fixed point with 5 bits after the
|
||||
|
@ -30,10 +30,12 @@
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/arb.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/qmath.h>
|
||||
#include <sys/queue.h>
|
||||
#include <sys/refcount.h>
|
||||
#include <sys/rwlock.h>
|
||||
@ -41,6 +43,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/socketvar.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/tree.h>
|
||||
#include <sys/stats.h>
|
||||
#include <sys/counter.h>
|
||||
|
||||
#include <dev/tcp_log/tcp_log_dev.h>
|
||||
@ -475,7 +478,7 @@ tcp_log_grow_tlb(char *tlb_id, struct tcpcb *tp)
|
||||
|
||||
INP_WLOCK_ASSERT(tp->t_inpcb);
|
||||
|
||||
#ifdef NETFLIX
|
||||
#ifdef STATS
|
||||
if (V_tcp_perconn_stats_enable == 2 && tp->t_stats == NULL)
|
||||
(void)tcp_stats_sample_rollthedice(tp, tlb_id, strlen(tlb_id));
|
||||
#endif
|
||||
|
@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/arb.h>
|
||||
#include <sys/domain.h>
|
||||
#ifdef TCP_HHOOK
|
||||
#include <sys/hhook.h>
|
||||
@ -54,10 +55,12 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/mbuf.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/protosw.h>
|
||||
#include <sys/qmath.h>
|
||||
#include <sys/sdt.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/socketvar.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/stats.h>
|
||||
|
||||
#include <net/if.h>
|
||||
#include <net/route.h>
|
||||
@ -991,15 +994,31 @@ tcp_output(struct tcpcb *tp)
|
||||
struct sockbuf *msb;
|
||||
u_int moff;
|
||||
|
||||
if ((tp->t_flags & TF_FORCEDATA) && len == 1)
|
||||
if ((tp->t_flags & TF_FORCEDATA) && len == 1) {
|
||||
TCPSTAT_INC(tcps_sndprobe);
|
||||
else if (SEQ_LT(tp->snd_nxt, tp->snd_max) || sack_rxmit) {
|
||||
#ifdef STATS
|
||||
if (SEQ_LT(tp->snd_nxt, tp->snd_max))
|
||||
stats_voi_update_abs_u32(tp->t_stats,
|
||||
VOI_TCP_RETXPB, len);
|
||||
else
|
||||
stats_voi_update_abs_u64(tp->t_stats,
|
||||
VOI_TCP_TXPB, len);
|
||||
#endif /* STATS */
|
||||
} else if (SEQ_LT(tp->snd_nxt, tp->snd_max) || sack_rxmit) {
|
||||
tp->t_sndrexmitpack++;
|
||||
TCPSTAT_INC(tcps_sndrexmitpack);
|
||||
TCPSTAT_ADD(tcps_sndrexmitbyte, len);
|
||||
#ifdef STATS
|
||||
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RETXPB,
|
||||
len);
|
||||
#endif /* STATS */
|
||||
} else {
|
||||
TCPSTAT_INC(tcps_sndpack);
|
||||
TCPSTAT_ADD(tcps_sndbyte, len);
|
||||
#ifdef STATS
|
||||
stats_voi_update_abs_u64(tp->t_stats, VOI_TCP_TXPB,
|
||||
len);
|
||||
#endif /* STATS */
|
||||
}
|
||||
#ifdef INET6
|
||||
if (MHLEN < hdrlen + max_linkhdr)
|
||||
@ -1472,6 +1491,15 @@ tcp_output(struct tcpcb *tp)
|
||||
tp->t_rtseq = startseq;
|
||||
TCPSTAT_INC(tcps_segstimed);
|
||||
}
|
||||
#ifdef STATS
|
||||
if (!(tp->t_flags & TF_GPUTINPROG) && len) {
|
||||
tp->t_flags |= TF_GPUTINPROG;
|
||||
tp->gput_seq = startseq;
|
||||
tp->gput_ack = startseq +
|
||||
ulmin(sbavail(&so->so_snd) - off, sendwin);
|
||||
tp->gput_ts = tcp_ts_getticks();
|
||||
}
|
||||
#endif /* STATS */
|
||||
}
|
||||
|
||||
/*
|
||||
|
274
sys/netinet/tcp_stats.c
Normal file
274
sys/netinet/tcp_stats.c
Normal file
@ -0,0 +1,274 @@
|
||||
/*-
|
||||
* Copyright (c) 2016-2018 Netflix, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Author: Lawrence Stewart <lstewart@netflix.com>
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/arb.h>
|
||||
#include <sys/errno.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/qmath.h>
|
||||
#include <sys/queue.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/socketvar.h>
|
||||
#include <sys/sysctl.h>
|
||||
#ifdef _KERNEL
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/rmlock.h>
|
||||
#include <sys/systm.h>
|
||||
#endif
|
||||
#include <sys/stats.h>
|
||||
|
||||
#include <net/vnet.h>
|
||||
|
||||
#include <netinet/in.h>
|
||||
#include <netinet/in_pcb.h>
|
||||
#include <netinet/tcp.h>
|
||||
#include <netinet/tcp_var.h>
|
||||
|
||||
#include <netinet/cc/cc.h>
|
||||
|
||||
VNET_DEFINE(int, tcp_perconn_stats_dflt_tpl) = -1;
|
||||
|
||||
#ifndef _KERNEL
|
||||
#define V_tcp_perconn_stats_enable VNET(tcp_perconn_stats_enable)
|
||||
#define V_tcp_perconn_stats_dflt_tpl VNET(tcp_perconn_stats_dflt_tpl)
|
||||
#else /* _KERNEL */
|
||||
|
||||
VNET_DEFINE(int, tcp_perconn_stats_enable) = 2;
|
||||
VNET_DEFINE_STATIC(struct stats_tpl_sample_rate *, tcp_perconn_stats_sample_rates);
|
||||
VNET_DEFINE_STATIC(int, tcp_stats_nrates) = 0;
|
||||
#define V_tcp_perconn_stats_sample_rates VNET(tcp_perconn_stats_sample_rates)
|
||||
#define V_tcp_stats_nrates VNET(tcp_stats_nrates)
|
||||
|
||||
static struct rmlock tcp_stats_tpl_sampling_lock;
|
||||
static int tcp_stats_tpl_sr_cb(enum stats_tpl_sr_cb_action action,
|
||||
struct stats_tpl_sample_rate **rates, int *nrates, void *ctx);
|
||||
|
||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, perconn_stats_enable,
|
||||
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_perconn_stats_enable), 0,
|
||||
"Enable per-connection TCP stats gathering; 1 enables for all connections, "
|
||||
"2 enables random sampling across log id connection groups");
|
||||
SYSCTL_PROC(_net_inet_tcp, OID_AUTO, perconn_stats_sample_rates,
|
||||
CTLTYPE_STRING | CTLFLAG_RW, tcp_stats_tpl_sr_cb,
|
||||
sizeof(struct rm_priotracker), stats_tpl_sample_rates, "A",
|
||||
"TCP stats per template random sampling rates, in CSV tpl_spec=percent "
|
||||
"key-value pairs (see stats(9) for template spec details)");
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#ifdef _KERNEL
|
||||
int
|
||||
#else
|
||||
static int
|
||||
/* Ensure all templates are also added to the userland template list. */
|
||||
__attribute__ ((constructor))
|
||||
#endif
|
||||
tcp_stats_init()
|
||||
{
|
||||
int err, lasterr;
|
||||
|
||||
err = lasterr = 0;
|
||||
|
||||
V_tcp_perconn_stats_dflt_tpl = stats_tpl_alloc("TCP_DEFAULT", 0);
|
||||
if (V_tcp_perconn_stats_dflt_tpl < 0)
|
||||
return (-V_tcp_perconn_stats_dflt_tpl);
|
||||
|
||||
struct voistatspec vss_sum[] = {
|
||||
STATS_VSS_SUM(),
|
||||
};
|
||||
err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
|
||||
VOI_TCP_TXPB, "TCP_TXPB", VSD_DTYPE_INT_U64,
|
||||
NVSS(vss_sum), vss_sum, 0);
|
||||
lasterr = err ? err : lasterr;
|
||||
err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
|
||||
VOI_TCP_RETXPB, "TCP_RETXPB", VSD_DTYPE_INT_U32,
|
||||
NVSS(vss_sum), vss_sum, 0);
|
||||
lasterr = err ? err : lasterr;
|
||||
|
||||
struct voistatspec vss_max[] = {
|
||||
STATS_VSS_MAX(),
|
||||
};
|
||||
err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
|
||||
VOI_TCP_FRWIN, "TCP_FRWIN", VSD_DTYPE_INT_ULONG,
|
||||
NVSS(vss_max), vss_max, 0);
|
||||
lasterr = err ? err : lasterr;
|
||||
err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
|
||||
VOI_TCP_LCWIN, "TCP_LCWIN", VSD_DTYPE_INT_ULONG,
|
||||
NVSS(vss_max), vss_max, 0);
|
||||
lasterr = err ? err : lasterr;
|
||||
|
||||
struct voistatspec vss_rtt[] = {
|
||||
STATS_VSS_MAX(),
|
||||
STATS_VSS_MIN(),
|
||||
STATS_VSS_TDGSTCLUST32(20, 4),
|
||||
};
|
||||
err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
|
||||
VOI_TCP_RTT, "TCP_RTT", VSD_DTYPE_INT_U32,
|
||||
NVSS(vss_rtt), vss_rtt, 0);
|
||||
lasterr = err ? err : lasterr;
|
||||
|
||||
struct voistatspec vss_congsig[] = {
|
||||
STATS_VSS_DVHIST32_USR(HBKTS(DVBKT(CC_ECN), DVBKT(CC_RTO),
|
||||
DVBKT(CC_RTO_ERR), DVBKT(CC_NDUPACK)), 0)
|
||||
};
|
||||
err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
|
||||
VOI_TCP_CSIG, "TCP_CSIG", VSD_DTYPE_INT_U32,
|
||||
NVSS(vss_congsig), vss_congsig, 0);
|
||||
lasterr = err ? err : lasterr;
|
||||
|
||||
struct voistatspec vss_gput[] = {
|
||||
STATS_VSS_MAX(),
|
||||
STATS_VSS_TDGSTCLUST32(20, 4),
|
||||
};
|
||||
err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
|
||||
VOI_TCP_GPUT, "TCP_GPUT", VSD_DTYPE_INT_U32,
|
||||
NVSS(vss_gput), vss_gput, 0);
|
||||
lasterr = err ? err : lasterr;
|
||||
|
||||
struct voistatspec vss_gput_nd[] = {
|
||||
STATS_VSS_TDGSTCLUST32(10, 4),
|
||||
};
|
||||
err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
|
||||
VOI_TCP_GPUT_ND, "TCP_GPUT_ND", VSD_DTYPE_INT_S32,
|
||||
NVSS(vss_gput_nd), vss_gput_nd, 0);
|
||||
lasterr = err ? err : lasterr;
|
||||
|
||||
struct voistatspec vss_windiff[] = {
|
||||
STATS_VSS_CRHIST32_USR(HBKTS(CRBKT(0)), VSD_HIST_LBOUND_INF)
|
||||
};
|
||||
err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
|
||||
VOI_TCP_CALCFRWINDIFF, "TCP_CALCFRWINDIFF", VSD_DTYPE_INT_S32,
|
||||
NVSS(vss_windiff), vss_windiff, 0);
|
||||
lasterr = err ? err : lasterr;
|
||||
|
||||
struct voistatspec vss_acklen[] = {
|
||||
STATS_VSS_MAX(),
|
||||
STATS_VSS_CRHIST32_LIN(0, 9, 1, VSD_HIST_UBOUND_INF)
|
||||
};
|
||||
err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
|
||||
VOI_TCP_ACKLEN, "TCP_ACKLEN", VSD_DTYPE_INT_U32,
|
||||
NVSS(vss_acklen), vss_acklen, 0);
|
||||
lasterr = err ? err : lasterr;
|
||||
|
||||
return (lasterr);
|
||||
}
|
||||
|
||||
#ifdef _KERNEL
|
||||
int
|
||||
tcp_stats_sample_rollthedice(struct tcpcb *tp, void *seed_bytes,
|
||||
size_t seed_len)
|
||||
{
|
||||
struct rm_priotracker tracker;
|
||||
int tpl;
|
||||
|
||||
tpl = -1;
|
||||
|
||||
if (V_tcp_stats_nrates > 0) {
|
||||
rm_rlock(&tcp_stats_tpl_sampling_lock, &tracker);
|
||||
tpl = stats_tpl_sample_rollthedice(V_tcp_perconn_stats_sample_rates,
|
||||
V_tcp_stats_nrates, seed_bytes, seed_len);
|
||||
rm_runlock(&tcp_stats_tpl_sampling_lock, &tracker);
|
||||
|
||||
if (tpl >= 0) {
|
||||
INP_WLOCK_ASSERT(tp->t_inpcb);
|
||||
if (tp->t_stats != NULL)
|
||||
stats_blob_destroy(tp->t_stats);
|
||||
tp->t_stats = stats_blob_alloc(tpl, 0);
|
||||
if (tp->t_stats == NULL)
|
||||
tpl = -ENOMEM;
|
||||
}
|
||||
}
|
||||
|
||||
return (tpl);
|
||||
}
|
||||
|
||||
/*
|
||||
* Callback function for stats_tpl_sample_rates() to interact with the TCP
|
||||
* subsystem's stats template sample rates list.
|
||||
*/
|
||||
int
|
||||
tcp_stats_tpl_sr_cb(enum stats_tpl_sr_cb_action action,
|
||||
struct stats_tpl_sample_rate **rates, int *nrates, void *ctx)
|
||||
{
|
||||
struct stats_tpl_sample_rate *old_rates;
|
||||
int old_nrates;
|
||||
|
||||
if (ctx == NULL)
|
||||
return (ENOMEM);
|
||||
|
||||
switch (action) {
|
||||
case TPL_SR_RLOCKED_GET:
|
||||
/*
|
||||
* Return with rlock held i.e. this call must be paired with a
|
||||
* "action == TPL_SR_RUNLOCK" call.
|
||||
*/
|
||||
rm_assert(&tcp_stats_tpl_sampling_lock, RA_UNLOCKED);
|
||||
rm_rlock(&tcp_stats_tpl_sampling_lock,
|
||||
(struct rm_priotracker *)ctx);
|
||||
/* FALLTHROUGH */
|
||||
case TPL_SR_UNLOCKED_GET:
|
||||
if (rates != NULL)
|
||||
*rates = V_tcp_perconn_stats_sample_rates;
|
||||
if (nrates != NULL)
|
||||
*nrates = V_tcp_stats_nrates;
|
||||
break;
|
||||
case TPL_SR_RUNLOCK:
|
||||
rm_assert(&tcp_stats_tpl_sampling_lock, RA_RLOCKED);
|
||||
rm_runlock(&tcp_stats_tpl_sampling_lock,
|
||||
(struct rm_priotracker *)ctx);
|
||||
break;
|
||||
case TPL_SR_PUT:
|
||||
KASSERT(rates != NULL && nrates != NULL,
|
||||
("%s: PUT without new rates", __func__));
|
||||
rm_assert(&tcp_stats_tpl_sampling_lock, RA_UNLOCKED);
|
||||
if (rates == NULL || nrates == NULL)
|
||||
return (EINVAL);
|
||||
rm_wlock(&tcp_stats_tpl_sampling_lock);
|
||||
old_rates = V_tcp_perconn_stats_sample_rates;
|
||||
old_nrates = V_tcp_stats_nrates;
|
||||
V_tcp_perconn_stats_sample_rates = *rates;
|
||||
V_tcp_stats_nrates = *nrates;
|
||||
rm_wunlock(&tcp_stats_tpl_sampling_lock);
|
||||
*rates = old_rates;
|
||||
*nrates = old_nrates;
|
||||
break;
|
||||
default:
|
||||
return (EINVAL);
|
||||
break;
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
RM_SYSINIT(tcp_stats_tpl_sampling_lock, &tcp_stats_tpl_sampling_lock,
|
||||
"tcp_stats_tpl_sampling_lock");
|
||||
#endif /* _KERNEL */
|
@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/arb.h>
|
||||
#include <sys/callout.h>
|
||||
#include <sys/eventhandler.h>
|
||||
#ifdef TCP_HHOOK
|
||||
@ -54,6 +55,8 @@ __FBSDID("$FreeBSD$");
|
||||
#ifdef KERN_TLS
|
||||
#include <sys/ktls.h>
|
||||
#endif
|
||||
#include <sys/qmath.h>
|
||||
#include <sys/stats.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/jail.h>
|
||||
#include <sys/malloc.h>
|
||||
@ -1004,6 +1007,11 @@ tcp_init(void)
|
||||
if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_OUT,
|
||||
&V_tcp_hhh[HHOOK_TCP_EST_OUT], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0)
|
||||
printf("%s: WARNING: unable to register helper hook\n", __func__);
|
||||
#endif
|
||||
#ifdef STATS
|
||||
if (tcp_stats_init())
|
||||
printf("%s: WARNING: unable to initialise TCP stats\n",
|
||||
__func__);
|
||||
#endif
|
||||
hashsize = TCBHASHSIZE;
|
||||
TUNABLE_INT_FETCH(tcbhash_tuneable, &hashsize);
|
||||
@ -1694,6 +1702,10 @@ tcp_newtcpcb(struct inpcb *inp)
|
||||
if (tp->t_fb->tfb_tcp_fb_init) {
|
||||
(*tp->t_fb->tfb_tcp_fb_init)(tp);
|
||||
}
|
||||
#ifdef STATS
|
||||
if (V_tcp_perconn_stats_enable == 1)
|
||||
tp->t_stats = stats_blob_alloc(V_tcp_perconn_stats_dflt_tpl, 0);
|
||||
#endif
|
||||
return (tp); /* XXX */
|
||||
}
|
||||
|
||||
@ -1912,6 +1924,9 @@ tcp_discardcb(struct tcpcb *tp)
|
||||
#ifdef TCP_HHOOK
|
||||
khelp_destroy_osd(tp->osd);
|
||||
#endif
|
||||
#ifdef STATS
|
||||
stats_blob_destroy(tp->t_stats);
|
||||
#endif
|
||||
|
||||
CC_ALGO(tp) = NULL;
|
||||
inp->inp_ppcb = NULL;
|
||||
|
@ -49,11 +49,13 @@ __FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/arb.h>
|
||||
#include <sys/limits.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/refcount.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/ktls.h>
|
||||
#include <sys/qmath.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/mbuf.h>
|
||||
#ifdef INET6
|
||||
@ -65,6 +67,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/proc.h>
|
||||
#include <sys/jail.h>
|
||||
#include <sys/syslog.h>
|
||||
#include <sys/stats.h>
|
||||
|
||||
#ifdef DDB
|
||||
#include <ddb/ddb.h>
|
||||
@ -108,6 +111,13 @@ __FBSDID("$FreeBSD$");
|
||||
#endif
|
||||
#include <netipsec/ipsec_support.h>
|
||||
|
||||
#include <vm/vm.h>
|
||||
#include <vm/vm_param.h>
|
||||
#include <vm/pmap.h>
|
||||
#include <vm/vm_extern.h>
|
||||
#include <vm/vm_map.h>
|
||||
#include <vm/vm_page.h>
|
||||
|
||||
/*
|
||||
* TCP protocol interface to socket abstraction.
|
||||
*/
|
||||
@ -1816,6 +1826,9 @@ tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp
|
||||
#endif
|
||||
struct cc_algo *algo;
|
||||
char *pbuf, buf[TCP_LOG_ID_LEN];
|
||||
#ifdef STATS
|
||||
struct statsblob *sbp;
|
||||
#endif
|
||||
size_t len;
|
||||
|
||||
/*
|
||||
@ -1933,6 +1946,35 @@ tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp
|
||||
error = EINVAL;
|
||||
break;
|
||||
|
||||
case TCP_STATS:
|
||||
INP_WUNLOCK(inp);
|
||||
#ifdef STATS
|
||||
error = sooptcopyin(sopt, &optval, sizeof optval,
|
||||
sizeof optval);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
if (optval > 0)
|
||||
sbp = stats_blob_alloc(
|
||||
V_tcp_perconn_stats_dflt_tpl, 0);
|
||||
else
|
||||
sbp = NULL;
|
||||
|
||||
INP_WLOCK_RECHECK(inp);
|
||||
if ((tp->t_stats != NULL && sbp == NULL) ||
|
||||
(tp->t_stats == NULL && sbp != NULL)) {
|
||||
struct statsblob *t = tp->t_stats;
|
||||
tp->t_stats = sbp;
|
||||
sbp = t;
|
||||
}
|
||||
INP_WUNLOCK(inp);
|
||||
|
||||
stats_blob_destroy(sbp);
|
||||
#else
|
||||
return (EOPNOTSUPP);
|
||||
#endif /* !STATS */
|
||||
break;
|
||||
|
||||
case TCP_CONGESTION:
|
||||
INP_WUNLOCK(inp);
|
||||
error = sooptcopyin(sopt, buf, TCP_CA_NAME_MAX - 1, 1);
|
||||
@ -2217,6 +2259,55 @@ tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp
|
||||
INP_WUNLOCK(inp);
|
||||
error = sooptcopyout(sopt, &ti, sizeof ti);
|
||||
break;
|
||||
case TCP_STATS:
|
||||
{
|
||||
#ifdef STATS
|
||||
int nheld;
|
||||
TYPEOF_MEMBER(struct statsblob, flags) sbflags = 0;
|
||||
|
||||
error = 0;
|
||||
socklen_t outsbsz = sopt->sopt_valsize;
|
||||
if (tp->t_stats == NULL)
|
||||
error = ENOENT;
|
||||
else if (outsbsz >= tp->t_stats->cursz)
|
||||
outsbsz = tp->t_stats->cursz;
|
||||
else if (outsbsz >= sizeof(struct statsblob))
|
||||
outsbsz = sizeof(struct statsblob);
|
||||
else
|
||||
error = EINVAL;
|
||||
INP_WUNLOCK(inp);
|
||||
if (error)
|
||||
break;
|
||||
|
||||
sbp = sopt->sopt_val;
|
||||
nheld = atop(round_page(((vm_offset_t)sbp) +
|
||||
(vm_size_t)outsbsz) - trunc_page((vm_offset_t)sbp));
|
||||
vm_page_t ma[nheld];
|
||||
if (vm_fault_quick_hold_pages(
|
||||
&curproc->p_vmspace->vm_map, (vm_offset_t)sbp,
|
||||
outsbsz, VM_PROT_READ | VM_PROT_WRITE, ma,
|
||||
nheld) < 0) {
|
||||
error = EFAULT;
|
||||
break;
|
||||
}
|
||||
|
||||
if ((error = copyin_nofault(&(sbp->flags), &sbflags,
|
||||
SIZEOF_MEMBER(struct statsblob, flags))))
|
||||
goto unhold;
|
||||
|
||||
INP_WLOCK_RECHECK(inp);
|
||||
error = stats_blob_snapshot(&sbp, outsbsz, tp->t_stats,
|
||||
sbflags | SB_CLONE_USRDSTNOFAULT);
|
||||
INP_WUNLOCK(inp);
|
||||
sopt->sopt_valsize = outsbsz;
|
||||
unhold:
|
||||
vm_page_unhold_pages(ma, nheld);
|
||||
#else
|
||||
INP_WUNLOCK(inp);
|
||||
error = EOPNOTSUPP;
|
||||
#endif /* !STATS */
|
||||
break;
|
||||
}
|
||||
case TCP_CONGESTION:
|
||||
len = strlcpy(buf, CC_ALGO(tp)->name, TCP_CA_NAME_MAX);
|
||||
INP_WUNLOCK(inp);
|
||||
|
@ -210,7 +210,12 @@ struct tcpcb {
|
||||
struct tcp_log_id_node *t_lin;
|
||||
struct tcp_log_id_bucket *t_lib;
|
||||
const char *t_output_caller; /* Function that called tcp_output */
|
||||
struct statsblob *t_stats; /* Per-connection stats */
|
||||
uint32_t t_logsn; /* Log "serial number" */
|
||||
uint32_t gput_ts; /* Time goodput measurement started */
|
||||
tcp_seq gput_seq; /* Outbound measurement seq */
|
||||
tcp_seq gput_ack; /* Inbound measurement ack */
|
||||
int32_t t_stats_gput_prev; /* XXXLAS: Prev gput measurement */
|
||||
uint8_t t_tfo_client_cookie_len; /* TCP Fast Open client cookie length */
|
||||
unsigned int *t_tfo_pending; /* TCP Fast Open server pending counter */
|
||||
union {
|
||||
@ -327,7 +332,7 @@ TAILQ_HEAD(tcp_funchead, tcp_function);
|
||||
#define TF_NOPUSH 0x00001000 /* don't push */
|
||||
#define TF_PREVVALID 0x00002000 /* saved values for bad rxmit valid */
|
||||
#define TF_UNUSED1 0x00004000 /* unused */
|
||||
#define TF_UNUSED2 0x00008000 /* unused */
|
||||
#define TF_GPUTINPROG 0x00008000 /* Goodput measurement in progress */
|
||||
#define TF_MORETOCOME 0x00010000 /* More data to be appended to sock */
|
||||
#define TF_LQ_OVERFLOW 0x00020000 /* listen queue overflow */
|
||||
#define TF_LASTIDLE 0x00040000 /* connection was previously idle */
|
||||
@ -787,6 +792,10 @@ VNET_DECLARE(int, tcp_insecure_rst);
|
||||
VNET_DECLARE(int, tcp_insecure_syn);
|
||||
VNET_DECLARE(int, tcp_minmss);
|
||||
VNET_DECLARE(int, tcp_mssdflt);
|
||||
#ifdef STATS
|
||||
VNET_DECLARE(int, tcp_perconn_stats_dflt_tpl);
|
||||
VNET_DECLARE(int, tcp_perconn_stats_enable);
|
||||
#endif /* STATS */
|
||||
VNET_DECLARE(int, tcp_recvspace);
|
||||
VNET_DECLARE(int, tcp_sack_globalholes);
|
||||
VNET_DECLARE(int, tcp_sack_globalmaxholes);
|
||||
@ -823,6 +832,10 @@ VNET_DECLARE(struct inpcbinfo, tcbinfo);
|
||||
#define V_tcp_insecure_syn VNET(tcp_insecure_syn)
|
||||
#define V_tcp_minmss VNET(tcp_minmss)
|
||||
#define V_tcp_mssdflt VNET(tcp_mssdflt)
|
||||
#ifdef STATS
|
||||
#define V_tcp_perconn_stats_dflt_tpl VNET(tcp_perconn_stats_dflt_tpl)
|
||||
#define V_tcp_perconn_stats_enable VNET(tcp_perconn_stats_enable)
|
||||
#endif /* STATS */
|
||||
#define V_tcp_recvspace VNET(tcp_recvspace)
|
||||
#define V_tcp_sack_globalholes VNET(tcp_sack_globalholes)
|
||||
#define V_tcp_sack_globalmaxholes VNET(tcp_sack_globalmaxholes)
|
||||
@ -966,10 +979,13 @@ int tcp_newreno(struct tcpcb *, struct tcphdr *);
|
||||
int tcp_compute_pipe(struct tcpcb *);
|
||||
uint32_t tcp_compute_initwnd(uint32_t);
|
||||
void tcp_sndbuf_autoscale(struct tcpcb *, struct socket *, uint32_t);
|
||||
int tcp_stats_sample_rollthedice(struct tcpcb *tp, void *seed_bytes,
|
||||
size_t seed_len);
|
||||
struct mbuf *
|
||||
tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen,
|
||||
int32_t seglimit, int32_t segsize, struct sockbuf *sb, bool hw_tls);
|
||||
|
||||
int tcp_stats_init(void);
|
||||
|
||||
static inline void
|
||||
tcp_fields_to_host(struct tcphdr *th)
|
||||
|
@ -58,6 +58,9 @@
|
||||
#define _SYS_STATS_H_
|
||||
|
||||
#include <sys/limits.h>
|
||||
#ifdef DIAGNOSTIC
|
||||
#include <sys/tree.h>
|
||||
#endif
|
||||
|
||||
#ifndef _KERNEL
|
||||
/*
|
||||
|
Loading…
Reference in New Issue
Block a user