Make use of the stats(3) framework in the TCP stack.

This makes it possible to retrieve per-connection statistical
information such as the receive window size, RTT, or goodput,
using a newly added TCP_STATS getsockopt(3) option, and extract
them using the stats_voistat_fetch(3) API.

See the net/tcprtt port for an example consumer of this API.

Compared to the existing TCP_INFO system, the main differences
are that this mechanism is easy to extend without breaking ABI,
and provides statistical information instead of raw "snapshots"
of values at a given point in time.  stats(3) is more generic
and can be used in both userland and the kernel.

Reviewed by:	thj
Tested by:	thj
Obtained from:	Netflix
Relnotes:	yes
Sponsored by:	Klara Inc, Netflix
Differential Revision:	https://reviews.freebsd.org/D20655
This commit is contained in:
Edward Tomasz Napierala 2019-12-02 20:58:04 +00:00
parent 79c1428ed6
commit adc56f5a38
13 changed files with 523 additions and 10 deletions

View File

@ -3,12 +3,12 @@
LIB= stats
SHLIBDIR?= /lib
SHLIB_MAJOR= 0
SRCS= subr_stats.c
SRCS= subr_stats.c tcp_stats.c
# To debug, comment WITHOUT_ASSERT_DEBUG= and uncomment CFLAGS:=
WITHOUT_ASSERT_DEBUG=
#CFLAGS:=${CFLAGS:C/-O[0-9]/-O0 -g3/} -DDIAGNOSTIC
.PATH: ${.CURDIR}/../../sys/kern
.PATH: ${.CURDIR}/../../sys/kern ${.CURDIR}/../../sys/netinet
.include <bsd.lib.mk>

View File

@ -34,7 +34,7 @@
.\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93
.\" $FreeBSD$
.\"
.Dd December 1, 2019
.Dd December 2, 2019
.Dt TCP 4
.Os
.Sh NAME
@ -291,6 +291,10 @@ This entry can only be specified on a per-host basis at this time.
.Pp
If an SADB entry cannot be found for the destination,
the system does not send any outgoing segments and drops any inbound segments.
.It Dv TCP_STATS
Manage collection of connection level statistics using the
.Xr stats 3
framework.
.Pp
Each dropped segment is taken into account in the TCP protocol statistics.
.It Dv TCP_TXTLS_ENABLE
@ -664,6 +668,17 @@ Default is false.
When initializing the TCP timestamps, use a per connection offset instead of a
per host pair offset.
Default is to use per connection offsets as recommended in RFC 7323.
.It Va perconn_stats_enable
Controls the default collection of statistics for all connections using the
.Xr stats 3
framework.
0 disables, 1 enables, 2 enables random sampling across log id connection
groups with all connections in a group receiving the same setting.
.It Va perconn_stats_sample_rates
A CSV list of template_spec=percent key-value pairs which controls the per
template sampling rates when
.Xr stats 3
sampling is enabled.
.El
.Sh ERRORS
A socket operation may fail with one of the following errors returned:
@ -703,6 +718,7 @@ when trying to use a TCP function block that is not available;
.Sh SEE ALSO
.Xr getsockopt 2 ,
.Xr socket 2 ,
.Xr stats 3 ,
.Xr sysctl 3 ,
.Xr blackhole 4 ,
.Xr inet 4 ,

View File

@ -4295,6 +4295,7 @@ netinet/tcp_pcap.c optional inet tcppcap | inet6 tcppcap \
compile-with "${NORMAL_C} ${NO_WNONNULL}"
netinet/tcp_reass.c optional inet | inet6
netinet/tcp_sack.c optional inet | inet6
netinet/tcp_stats.c optional stats inet | stats inet6
netinet/tcp_subr.c optional inet | inet6
netinet/tcp_syncache.c optional inet | inet6
netinet/tcp_timer.c optional inet | inet6

View File

@ -51,9 +51,7 @@
#ifndef _NETINET_CC_CC_H_
#define _NETINET_CC_CC_H_
#if !defined(_KERNEL)
#error "no user-serviceable parts inside"
#endif
#ifdef _KERNEL
/* Global CC vars. */
extern STAILQ_HEAD(cc_head, cc_algo) cc_list;
@ -108,6 +106,7 @@ struct cc_var {
#define CC_DUPACK 0x0002 /* Duplicate ACK. */
#define CC_PARTIALACK 0x0004 /* Not yet. */
#define CC_SACK 0x0008 /* Not yet. */
#endif /* _KERNEL */
/*
* Congestion signal types passed to the cong_signal() hook. The highest order 8
@ -121,6 +120,7 @@ struct cc_var {
#define CC_SIGPRIVMASK 0xFF000000 /* Mask to check if sig is private. */
#ifdef _KERNEL
/*
* Structure to hold data and function pointers that together represent a
* congestion control algorithm.
@ -184,4 +184,5 @@ extern struct rwlock cc_list_lock;
#define CC_ALGOOPT_LIMIT 2048
#endif /* _KERNEL */
#endif /* _NETINET_CC_CC_H_ */

View File

@ -168,6 +168,7 @@ struct tcphdr {
#define TCP_NOOPT 8 /* don't use TCP options */
#define TCP_MD5SIG 16 /* use MD5 digests (RFC2385) */
#define TCP_INFO 32 /* retrieve tcp_info structure */
#define TCP_STATS 33 /* retrieve stats blob structure */
#define TCP_LOG 34 /* configure event logging for connection */
#define TCP_LOGBUF 35 /* retrieve event log for connection */
#define TCP_LOGID 36 /* configure log ID to correlate connections */
@ -364,4 +365,18 @@ struct tcp_function_set {
*/
#define TLS_SET_RECORD_TYPE 1
/*
* TCP specific variables of interest for tp->t_stats stats(9) accounting.
*/
#define VOI_TCP_TXPB 0 /* Transmit payload bytes */
#define VOI_TCP_RETXPB 1 /* Retransmit payload bytes */
#define VOI_TCP_FRWIN 2 /* Foreign receive window */
#define VOI_TCP_LCWIN 3 /* Local congesiton window */
#define VOI_TCP_RTT 4 /* Round trip time */
#define VOI_TCP_CSIG 5 /* Congestion signal */
#define VOI_TCP_GPUT 6 /* Goodput */
#define VOI_TCP_CALCFRWINDIFF 7 /* Congestion avoidance LCWIN - FRWIN */
#define VOI_TCP_GPUT_ND 8 /* Goodput normalised delta */
#define VOI_TCP_ACKLEN 9 /* Average ACKed bytes per ACK */
#endif /* !_NETINET_TCP_H_ */

View File

@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$");
#include "opt_tcpdebug.h"
#include <sys/param.h>
#include <sys/arb.h>
#include <sys/kernel.h>
#ifdef TCP_HHOOK
#include <sys/hhook.h>
@ -66,6 +67,7 @@ __FBSDID("$FreeBSD$");
#include <sys/mbuf.h>
#include <sys/proc.h> /* for proc0 declaration */
#include <sys/protosw.h>
#include <sys/qmath.h>
#include <sys/sdt.h>
#include <sys/signalvar.h>
#include <sys/socket.h>
@ -73,6 +75,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/systm.h>
#include <sys/stats.h>
#include <machine/cpu.h> /* before tcp_seq.h, for tcp_random18() */
@ -298,6 +301,10 @@ void
cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t nsegs,
uint16_t type)
{
#ifdef STATS
int32_t gput;
#endif
INP_WLOCK_ASSERT(tp->t_inpcb);
tp->ccv->nsegs = nsegs;
@ -310,6 +317,35 @@ cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t nsegs,
tp->ccv->flags &= ~CCF_CWND_LIMITED;
if (type == CC_ACK) {
#ifdef STATS
stats_voi_update_abs_s32(tp->t_stats, VOI_TCP_CALCFRWINDIFF,
((int32_t)tp->snd_cwnd) - tp->snd_wnd);
if (!IN_RECOVERY(tp->t_flags))
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_ACKLEN,
tp->ccv->bytes_this_ack / (tcp_maxseg(tp) * nsegs));
if ((tp->t_flags & TF_GPUTINPROG) &&
SEQ_GEQ(th->th_ack, tp->gput_ack)) {
/*
* Compute goodput in bits per millisecond.
*/
gput = (((int64_t)(th->th_ack - tp->gput_seq)) << 3) /
max(1, tcp_ts_getticks() - tp->gput_ts);
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_GPUT,
gput);
/*
* XXXLAS: This is a temporary hack, and should be
* chained off VOI_TCP_GPUT when stats(9) grows an API
* to deal with chained VOIs.
*/
if (tp->t_stats_gput_prev > 0)
stats_voi_update_abs_s32(tp->t_stats,
VOI_TCP_GPUT_ND,
((gput - tp->t_stats_gput_prev) * 100) /
tp->t_stats_gput_prev);
tp->t_flags &= ~TF_GPUTINPROG;
tp->t_stats_gput_prev = gput;
}
#endif /* STATS */
if (tp->snd_cwnd > tp->snd_ssthresh) {
tp->t_bytes_acked += min(tp->ccv->bytes_this_ack,
nsegs * V_tcp_abc_l_var * tcp_maxseg(tp));
@ -328,6 +364,9 @@ cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t nsegs,
tp->ccv->curack = th->th_ack;
CC_ALGO(tp)->ack_received(tp->ccv, type);
}
#ifdef STATS
stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_LCWIN, tp->snd_cwnd);
#endif
}
void
@ -393,6 +432,10 @@ cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type)
INP_WLOCK_ASSERT(tp->t_inpcb);
#ifdef STATS
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_CSIG, type);
#endif
switch(type) {
case CC_NDUPACK:
if (!IN_FASTRECOVERY(tp->t_flags)) {
@ -1496,6 +1539,9 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
* For the SYN_SENT state the scale is zero.
*/
tiwin = th->th_win << tp->snd_scale;
#ifdef STATS
stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_FRWIN, tiwin);
#endif
/*
* TCP ECN processing.
@ -3359,6 +3405,10 @@ tcp_xmit_timer(struct tcpcb *tp, int rtt)
TCPSTAT_INC(tcps_rttupdated);
tp->t_rttupdated++;
#ifdef STATS
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RTT,
imax(0, rtt * 1000 / hz));
#endif
if ((tp->t_srtt != 0) && (tp->t_rxtshift <= TCP_RTT_INVALIDATE)) {
/*
* srtt is stored as fixed point with 5 bits after the

View File

@ -30,10 +30,12 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/arb.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/qmath.h>
#include <sys/queue.h>
#include <sys/refcount.h>
#include <sys/rwlock.h>
@ -41,6 +43,7 @@ __FBSDID("$FreeBSD$");
#include <sys/socketvar.h>
#include <sys/sysctl.h>
#include <sys/tree.h>
#include <sys/stats.h>
#include <sys/counter.h>
#include <dev/tcp_log/tcp_log_dev.h>
@ -475,7 +478,7 @@ tcp_log_grow_tlb(char *tlb_id, struct tcpcb *tp)
INP_WLOCK_ASSERT(tp->t_inpcb);
#ifdef NETFLIX
#ifdef STATS
if (V_tcp_perconn_stats_enable == 2 && tp->t_stats == NULL)
(void)tcp_stats_sample_rollthedice(tp, tlb_id, strlen(tlb_id));
#endif

View File

@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/arb.h>
#include <sys/domain.h>
#ifdef TCP_HHOOK
#include <sys/hhook.h>
@ -54,10 +55,12 @@ __FBSDID("$FreeBSD$");
#include <sys/mbuf.h>
#include <sys/mutex.h>
#include <sys/protosw.h>
#include <sys/qmath.h>
#include <sys/sdt.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
#include <sys/stats.h>
#include <net/if.h>
#include <net/route.h>
@ -991,15 +994,31 @@ tcp_output(struct tcpcb *tp)
struct sockbuf *msb;
u_int moff;
if ((tp->t_flags & TF_FORCEDATA) && len == 1)
if ((tp->t_flags & TF_FORCEDATA) && len == 1) {
TCPSTAT_INC(tcps_sndprobe);
else if (SEQ_LT(tp->snd_nxt, tp->snd_max) || sack_rxmit) {
#ifdef STATS
if (SEQ_LT(tp->snd_nxt, tp->snd_max))
stats_voi_update_abs_u32(tp->t_stats,
VOI_TCP_RETXPB, len);
else
stats_voi_update_abs_u64(tp->t_stats,
VOI_TCP_TXPB, len);
#endif /* STATS */
} else if (SEQ_LT(tp->snd_nxt, tp->snd_max) || sack_rxmit) {
tp->t_sndrexmitpack++;
TCPSTAT_INC(tcps_sndrexmitpack);
TCPSTAT_ADD(tcps_sndrexmitbyte, len);
#ifdef STATS
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RETXPB,
len);
#endif /* STATS */
} else {
TCPSTAT_INC(tcps_sndpack);
TCPSTAT_ADD(tcps_sndbyte, len);
#ifdef STATS
stats_voi_update_abs_u64(tp->t_stats, VOI_TCP_TXPB,
len);
#endif /* STATS */
}
#ifdef INET6
if (MHLEN < hdrlen + max_linkhdr)
@ -1472,6 +1491,15 @@ tcp_output(struct tcpcb *tp)
tp->t_rtseq = startseq;
TCPSTAT_INC(tcps_segstimed);
}
#ifdef STATS
if (!(tp->t_flags & TF_GPUTINPROG) && len) {
tp->t_flags |= TF_GPUTINPROG;
tp->gput_seq = startseq;
tp->gput_ack = startseq +
ulmin(sbavail(&so->so_snd) - off, sendwin);
tp->gput_ts = tcp_ts_getticks();
}
#endif /* STATS */
}
/*

274
sys/netinet/tcp_stats.c Normal file
View File

@ -0,0 +1,274 @@
/*-
* Copyright (c) 2016-2018 Netflix, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* Author: Lawrence Stewart <lstewart@netflix.com>
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/arb.h>
#include <sys/errno.h>
#include <sys/malloc.h>
#include <sys/qmath.h>
#include <sys/queue.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
#ifdef _KERNEL
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/rmlock.h>
#include <sys/systm.h>
#endif
#include <sys/stats.h>
#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/in_pcb.h>
#include <netinet/tcp.h>
#include <netinet/tcp_var.h>
#include <netinet/cc/cc.h>
VNET_DEFINE(int, tcp_perconn_stats_dflt_tpl) = -1;
#ifndef _KERNEL
#define V_tcp_perconn_stats_enable VNET(tcp_perconn_stats_enable)
#define V_tcp_perconn_stats_dflt_tpl VNET(tcp_perconn_stats_dflt_tpl)
#else /* _KERNEL */
VNET_DEFINE(int, tcp_perconn_stats_enable) = 2;
VNET_DEFINE_STATIC(struct stats_tpl_sample_rate *, tcp_perconn_stats_sample_rates);
VNET_DEFINE_STATIC(int, tcp_stats_nrates) = 0;
#define V_tcp_perconn_stats_sample_rates VNET(tcp_perconn_stats_sample_rates)
#define V_tcp_stats_nrates VNET(tcp_stats_nrates)
static struct rmlock tcp_stats_tpl_sampling_lock;
static int tcp_stats_tpl_sr_cb(enum stats_tpl_sr_cb_action action,
struct stats_tpl_sample_rate **rates, int *nrates, void *ctx);
SYSCTL_INT(_net_inet_tcp, OID_AUTO, perconn_stats_enable,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_perconn_stats_enable), 0,
"Enable per-connection TCP stats gathering; 1 enables for all connections, "
"2 enables random sampling across log id connection groups");
SYSCTL_PROC(_net_inet_tcp, OID_AUTO, perconn_stats_sample_rates,
CTLTYPE_STRING | CTLFLAG_RW, tcp_stats_tpl_sr_cb,
sizeof(struct rm_priotracker), stats_tpl_sample_rates, "A",
"TCP stats per template random sampling rates, in CSV tpl_spec=percent "
"key-value pairs (see stats(9) for template spec details)");
#endif /* _KERNEL */
#ifdef _KERNEL
int
#else
static int
/* Ensure all templates are also added to the userland template list. */
__attribute__ ((constructor))
#endif
tcp_stats_init()
{
int err, lasterr;
err = lasterr = 0;
V_tcp_perconn_stats_dflt_tpl = stats_tpl_alloc("TCP_DEFAULT", 0);
if (V_tcp_perconn_stats_dflt_tpl < 0)
return (-V_tcp_perconn_stats_dflt_tpl);
struct voistatspec vss_sum[] = {
STATS_VSS_SUM(),
};
err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
VOI_TCP_TXPB, "TCP_TXPB", VSD_DTYPE_INT_U64,
NVSS(vss_sum), vss_sum, 0);
lasterr = err ? err : lasterr;
err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
VOI_TCP_RETXPB, "TCP_RETXPB", VSD_DTYPE_INT_U32,
NVSS(vss_sum), vss_sum, 0);
lasterr = err ? err : lasterr;
struct voistatspec vss_max[] = {
STATS_VSS_MAX(),
};
err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
VOI_TCP_FRWIN, "TCP_FRWIN", VSD_DTYPE_INT_ULONG,
NVSS(vss_max), vss_max, 0);
lasterr = err ? err : lasterr;
err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
VOI_TCP_LCWIN, "TCP_LCWIN", VSD_DTYPE_INT_ULONG,
NVSS(vss_max), vss_max, 0);
lasterr = err ? err : lasterr;
struct voistatspec vss_rtt[] = {
STATS_VSS_MAX(),
STATS_VSS_MIN(),
STATS_VSS_TDGSTCLUST32(20, 4),
};
err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
VOI_TCP_RTT, "TCP_RTT", VSD_DTYPE_INT_U32,
NVSS(vss_rtt), vss_rtt, 0);
lasterr = err ? err : lasterr;
struct voistatspec vss_congsig[] = {
STATS_VSS_DVHIST32_USR(HBKTS(DVBKT(CC_ECN), DVBKT(CC_RTO),
DVBKT(CC_RTO_ERR), DVBKT(CC_NDUPACK)), 0)
};
err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
VOI_TCP_CSIG, "TCP_CSIG", VSD_DTYPE_INT_U32,
NVSS(vss_congsig), vss_congsig, 0);
lasterr = err ? err : lasterr;
struct voistatspec vss_gput[] = {
STATS_VSS_MAX(),
STATS_VSS_TDGSTCLUST32(20, 4),
};
err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
VOI_TCP_GPUT, "TCP_GPUT", VSD_DTYPE_INT_U32,
NVSS(vss_gput), vss_gput, 0);
lasterr = err ? err : lasterr;
struct voistatspec vss_gput_nd[] = {
STATS_VSS_TDGSTCLUST32(10, 4),
};
err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
VOI_TCP_GPUT_ND, "TCP_GPUT_ND", VSD_DTYPE_INT_S32,
NVSS(vss_gput_nd), vss_gput_nd, 0);
lasterr = err ? err : lasterr;
struct voistatspec vss_windiff[] = {
STATS_VSS_CRHIST32_USR(HBKTS(CRBKT(0)), VSD_HIST_LBOUND_INF)
};
err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
VOI_TCP_CALCFRWINDIFF, "TCP_CALCFRWINDIFF", VSD_DTYPE_INT_S32,
NVSS(vss_windiff), vss_windiff, 0);
lasterr = err ? err : lasterr;
struct voistatspec vss_acklen[] = {
STATS_VSS_MAX(),
STATS_VSS_CRHIST32_LIN(0, 9, 1, VSD_HIST_UBOUND_INF)
};
err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
VOI_TCP_ACKLEN, "TCP_ACKLEN", VSD_DTYPE_INT_U32,
NVSS(vss_acklen), vss_acklen, 0);
lasterr = err ? err : lasterr;
return (lasterr);
}
#ifdef _KERNEL
int
tcp_stats_sample_rollthedice(struct tcpcb *tp, void *seed_bytes,
size_t seed_len)
{
struct rm_priotracker tracker;
int tpl;
tpl = -1;
if (V_tcp_stats_nrates > 0) {
rm_rlock(&tcp_stats_tpl_sampling_lock, &tracker);
tpl = stats_tpl_sample_rollthedice(V_tcp_perconn_stats_sample_rates,
V_tcp_stats_nrates, seed_bytes, seed_len);
rm_runlock(&tcp_stats_tpl_sampling_lock, &tracker);
if (tpl >= 0) {
INP_WLOCK_ASSERT(tp->t_inpcb);
if (tp->t_stats != NULL)
stats_blob_destroy(tp->t_stats);
tp->t_stats = stats_blob_alloc(tpl, 0);
if (tp->t_stats == NULL)
tpl = -ENOMEM;
}
}
return (tpl);
}
/*
* Callback function for stats_tpl_sample_rates() to interact with the TCP
* subsystem's stats template sample rates list.
*/
int
tcp_stats_tpl_sr_cb(enum stats_tpl_sr_cb_action action,
struct stats_tpl_sample_rate **rates, int *nrates, void *ctx)
{
struct stats_tpl_sample_rate *old_rates;
int old_nrates;
if (ctx == NULL)
return (ENOMEM);
switch (action) {
case TPL_SR_RLOCKED_GET:
/*
* Return with rlock held i.e. this call must be paired with a
* "action == TPL_SR_RUNLOCK" call.
*/
rm_assert(&tcp_stats_tpl_sampling_lock, RA_UNLOCKED);
rm_rlock(&tcp_stats_tpl_sampling_lock,
(struct rm_priotracker *)ctx);
/* FALLTHROUGH */
case TPL_SR_UNLOCKED_GET:
if (rates != NULL)
*rates = V_tcp_perconn_stats_sample_rates;
if (nrates != NULL)
*nrates = V_tcp_stats_nrates;
break;
case TPL_SR_RUNLOCK:
rm_assert(&tcp_stats_tpl_sampling_lock, RA_RLOCKED);
rm_runlock(&tcp_stats_tpl_sampling_lock,
(struct rm_priotracker *)ctx);
break;
case TPL_SR_PUT:
KASSERT(rates != NULL && nrates != NULL,
("%s: PUT without new rates", __func__));
rm_assert(&tcp_stats_tpl_sampling_lock, RA_UNLOCKED);
if (rates == NULL || nrates == NULL)
return (EINVAL);
rm_wlock(&tcp_stats_tpl_sampling_lock);
old_rates = V_tcp_perconn_stats_sample_rates;
old_nrates = V_tcp_stats_nrates;
V_tcp_perconn_stats_sample_rates = *rates;
V_tcp_stats_nrates = *nrates;
rm_wunlock(&tcp_stats_tpl_sampling_lock);
*rates = old_rates;
*nrates = old_nrates;
break;
default:
return (EINVAL);
break;
}
return (0);
}
RM_SYSINIT(tcp_stats_tpl_sampling_lock, &tcp_stats_tpl_sampling_lock,
"tcp_stats_tpl_sampling_lock");
#endif /* _KERNEL */

View File

@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/arb.h>
#include <sys/callout.h>
#include <sys/eventhandler.h>
#ifdef TCP_HHOOK
@ -54,6 +55,8 @@ __FBSDID("$FreeBSD$");
#ifdef KERN_TLS
#include <sys/ktls.h>
#endif
#include <sys/qmath.h>
#include <sys/stats.h>
#include <sys/sysctl.h>
#include <sys/jail.h>
#include <sys/malloc.h>
@ -1004,6 +1007,11 @@ tcp_init(void)
if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_OUT,
&V_tcp_hhh[HHOOK_TCP_EST_OUT], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0)
printf("%s: WARNING: unable to register helper hook\n", __func__);
#endif
#ifdef STATS
if (tcp_stats_init())
printf("%s: WARNING: unable to initialise TCP stats\n",
__func__);
#endif
hashsize = TCBHASHSIZE;
TUNABLE_INT_FETCH(tcbhash_tuneable, &hashsize);
@ -1694,6 +1702,10 @@ tcp_newtcpcb(struct inpcb *inp)
if (tp->t_fb->tfb_tcp_fb_init) {
(*tp->t_fb->tfb_tcp_fb_init)(tp);
}
#ifdef STATS
if (V_tcp_perconn_stats_enable == 1)
tp->t_stats = stats_blob_alloc(V_tcp_perconn_stats_dflt_tpl, 0);
#endif
return (tp); /* XXX */
}
@ -1912,6 +1924,9 @@ tcp_discardcb(struct tcpcb *tp)
#ifdef TCP_HHOOK
khelp_destroy_osd(tp->osd);
#endif
#ifdef STATS
stats_blob_destroy(tp->t_stats);
#endif
CC_ALGO(tp) = NULL;
inp->inp_ppcb = NULL;

View File

@ -49,11 +49,13 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/arb.h>
#include <sys/limits.h>
#include <sys/malloc.h>
#include <sys/refcount.h>
#include <sys/kernel.h>
#include <sys/ktls.h>
#include <sys/qmath.h>
#include <sys/sysctl.h>
#include <sys/mbuf.h>
#ifdef INET6
@ -65,6 +67,7 @@ __FBSDID("$FreeBSD$");
#include <sys/proc.h>
#include <sys/jail.h>
#include <sys/syslog.h>
#include <sys/stats.h>
#ifdef DDB
#include <ddb/ddb.h>
@ -108,6 +111,13 @@ __FBSDID("$FreeBSD$");
#endif
#include <netipsec/ipsec_support.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/pmap.h>
#include <vm/vm_extern.h>
#include <vm/vm_map.h>
#include <vm/vm_page.h>
/*
* TCP protocol interface to socket abstraction.
*/
@ -1816,6 +1826,9 @@ tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp
#endif
struct cc_algo *algo;
char *pbuf, buf[TCP_LOG_ID_LEN];
#ifdef STATS
struct statsblob *sbp;
#endif
size_t len;
/*
@ -1933,6 +1946,35 @@ tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp
error = EINVAL;
break;
case TCP_STATS:
INP_WUNLOCK(inp);
#ifdef STATS
error = sooptcopyin(sopt, &optval, sizeof optval,
sizeof optval);
if (error)
return (error);
if (optval > 0)
sbp = stats_blob_alloc(
V_tcp_perconn_stats_dflt_tpl, 0);
else
sbp = NULL;
INP_WLOCK_RECHECK(inp);
if ((tp->t_stats != NULL && sbp == NULL) ||
(tp->t_stats == NULL && sbp != NULL)) {
struct statsblob *t = tp->t_stats;
tp->t_stats = sbp;
sbp = t;
}
INP_WUNLOCK(inp);
stats_blob_destroy(sbp);
#else
return (EOPNOTSUPP);
#endif /* !STATS */
break;
case TCP_CONGESTION:
INP_WUNLOCK(inp);
error = sooptcopyin(sopt, buf, TCP_CA_NAME_MAX - 1, 1);
@ -2217,6 +2259,55 @@ tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp
INP_WUNLOCK(inp);
error = sooptcopyout(sopt, &ti, sizeof ti);
break;
case TCP_STATS:
{
#ifdef STATS
int nheld;
TYPEOF_MEMBER(struct statsblob, flags) sbflags = 0;
error = 0;
socklen_t outsbsz = sopt->sopt_valsize;
if (tp->t_stats == NULL)
error = ENOENT;
else if (outsbsz >= tp->t_stats->cursz)
outsbsz = tp->t_stats->cursz;
else if (outsbsz >= sizeof(struct statsblob))
outsbsz = sizeof(struct statsblob);
else
error = EINVAL;
INP_WUNLOCK(inp);
if (error)
break;
sbp = sopt->sopt_val;
nheld = atop(round_page(((vm_offset_t)sbp) +
(vm_size_t)outsbsz) - trunc_page((vm_offset_t)sbp));
vm_page_t ma[nheld];
if (vm_fault_quick_hold_pages(
&curproc->p_vmspace->vm_map, (vm_offset_t)sbp,
outsbsz, VM_PROT_READ | VM_PROT_WRITE, ma,
nheld) < 0) {
error = EFAULT;
break;
}
if ((error = copyin_nofault(&(sbp->flags), &sbflags,
SIZEOF_MEMBER(struct statsblob, flags))))
goto unhold;
INP_WLOCK_RECHECK(inp);
error = stats_blob_snapshot(&sbp, outsbsz, tp->t_stats,
sbflags | SB_CLONE_USRDSTNOFAULT);
INP_WUNLOCK(inp);
sopt->sopt_valsize = outsbsz;
unhold:
vm_page_unhold_pages(ma, nheld);
#else
INP_WUNLOCK(inp);
error = EOPNOTSUPP;
#endif /* !STATS */
break;
}
case TCP_CONGESTION:
len = strlcpy(buf, CC_ALGO(tp)->name, TCP_CA_NAME_MAX);
INP_WUNLOCK(inp);

View File

@ -210,7 +210,12 @@ struct tcpcb {
struct tcp_log_id_node *t_lin;
struct tcp_log_id_bucket *t_lib;
const char *t_output_caller; /* Function that called tcp_output */
struct statsblob *t_stats; /* Per-connection stats */
uint32_t t_logsn; /* Log "serial number" */
uint32_t gput_ts; /* Time goodput measurement started */
tcp_seq gput_seq; /* Outbound measurement seq */
tcp_seq gput_ack; /* Inbound measurement ack */
int32_t t_stats_gput_prev; /* XXXLAS: Prev gput measurement */
uint8_t t_tfo_client_cookie_len; /* TCP Fast Open client cookie length */
unsigned int *t_tfo_pending; /* TCP Fast Open server pending counter */
union {
@ -327,7 +332,7 @@ TAILQ_HEAD(tcp_funchead, tcp_function);
#define TF_NOPUSH 0x00001000 /* don't push */
#define TF_PREVVALID 0x00002000 /* saved values for bad rxmit valid */
#define TF_UNUSED1 0x00004000 /* unused */
#define TF_UNUSED2 0x00008000 /* unused */
#define TF_GPUTINPROG 0x00008000 /* Goodput measurement in progress */
#define TF_MORETOCOME 0x00010000 /* More data to be appended to sock */
#define TF_LQ_OVERFLOW 0x00020000 /* listen queue overflow */
#define TF_LASTIDLE 0x00040000 /* connection was previously idle */
@ -787,6 +792,10 @@ VNET_DECLARE(int, tcp_insecure_rst);
VNET_DECLARE(int, tcp_insecure_syn);
VNET_DECLARE(int, tcp_minmss);
VNET_DECLARE(int, tcp_mssdflt);
#ifdef STATS
VNET_DECLARE(int, tcp_perconn_stats_dflt_tpl);
VNET_DECLARE(int, tcp_perconn_stats_enable);
#endif /* STATS */
VNET_DECLARE(int, tcp_recvspace);
VNET_DECLARE(int, tcp_sack_globalholes);
VNET_DECLARE(int, tcp_sack_globalmaxholes);
@ -823,6 +832,10 @@ VNET_DECLARE(struct inpcbinfo, tcbinfo);
#define V_tcp_insecure_syn VNET(tcp_insecure_syn)
#define V_tcp_minmss VNET(tcp_minmss)
#define V_tcp_mssdflt VNET(tcp_mssdflt)
#ifdef STATS
#define V_tcp_perconn_stats_dflt_tpl VNET(tcp_perconn_stats_dflt_tpl)
#define V_tcp_perconn_stats_enable VNET(tcp_perconn_stats_enable)
#endif /* STATS */
#define V_tcp_recvspace VNET(tcp_recvspace)
#define V_tcp_sack_globalholes VNET(tcp_sack_globalholes)
#define V_tcp_sack_globalmaxholes VNET(tcp_sack_globalmaxholes)
@ -966,10 +979,13 @@ int tcp_newreno(struct tcpcb *, struct tcphdr *);
int tcp_compute_pipe(struct tcpcb *);
uint32_t tcp_compute_initwnd(uint32_t);
void tcp_sndbuf_autoscale(struct tcpcb *, struct socket *, uint32_t);
int tcp_stats_sample_rollthedice(struct tcpcb *tp, void *seed_bytes,
size_t seed_len);
struct mbuf *
tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen,
int32_t seglimit, int32_t segsize, struct sockbuf *sb, bool hw_tls);
int tcp_stats_init(void);
static inline void
tcp_fields_to_host(struct tcphdr *th)

View File

@ -58,6 +58,9 @@
#define _SYS_STATS_H_
#include <sys/limits.h>
#ifdef DIAGNOSTIC
#include <sys/tree.h>
#endif
#ifndef _KERNEL
/*