Provide new socket option TCP_CCALGOOPT, which stands for TCP congestion

control algorithm options.  The argument is variable length and is opaque
to TCP, forwarded directly to the algorithm's ctl_output method.

Provide new includes directory netinet/cc, where algorithm specific
headers can be installed.

The new API doesn't yet have any in tree consumers.

The original code written by lstewart.
Reviewed by:	rrs, emax
Sponsored by:	Netflix
Differential Revision:	https://reviews.freebsd.org/D711
This commit is contained in:
Gleb Smirnoff 2016-01-22 02:07:48 +00:00
parent 5e766348d2
commit d519cedbad
8 changed files with 70 additions and 6 deletions

View File

@ -270,6 +270,8 @@
..
..
netinet
cc
..
..
netinet6
..

View File

@ -53,6 +53,7 @@ LSUBDIRS= cam/ata cam/scsi \
geom/raid geom/raid3 geom/shsec geom/stripe geom/virstor \
net/altq \
netgraph/atm netgraph/netflow \
netinet/cc \
security/audit \
security/mac_biba security/mac_bsdextended security/mac_lomac \
security/mac_mls security/mac_partition \

View File

@ -30,7 +30,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd January 12, 2015
.Dd January 21, 2016
.Dt MOD_CC 4
.Os
.Sh NAME
@ -49,7 +49,9 @@ using the
facility.
.Pp
The default algorithm is NewReno, and all connections use the default unless
explicitly overridden using the TCP_CONGESTION socket option (see
explicitly overridden using the
.Dv TCP_CONGESTION
socket option (see
.Xr tcp 4
for details).
The default can be changed using a
@ -57,6 +59,14 @@ The default can be changed using a
MIB variable detailed in the
.Sx MIB Variables
section below.
.Pp
Algorithm specific parameters can be set or queried using the
.Dv TCP_CCALGOOPT
socket option (see
.Xr tcp 4
for details).
Callers must pass a pointer to an algorithm specific data, and specify
its size.
.Sh MIB Variables
The framework exposes the following variables in the
.Va net.inet.tcp.cc

View File

@ -34,7 +34,7 @@
.\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93
.\" $FreeBSD$
.\"
.Dd October 27, 2015
.Dd January 21, 2016
.Dt TCP 4
.Os
.Sh NAME
@ -137,6 +137,11 @@ send window size,
receive window size,
and
bandwidth-controlled window space.
.It Dv TCP_CCALGOOPT
Set or query congestion control algorithm specific parameters.
See
.Xr mod_cc 4
for details.
.It Dv TCP_CONGESTION
Select or query the congestion control algorithm that TCP will use for the
connection.

View File

@ -31,7 +31,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd December 26, 2014
.Dd January 21, 2016
.Dt MOD_CC 9
.Os
.Sh NAME
@ -40,7 +40,8 @@
.Nm CCV
.Nd Modular Congestion Control
.Sh SYNOPSIS
.In netinet/cc.h
.In netinet/tcp.h
.In netinet/tcp_cc.h
.In netinet/cc/cc_module.h
.Fn DECLARE_CC_MODULE "ccname" "ccalgo"
.Fn CCV "ccv" "what"
@ -74,6 +75,7 @@ struct cc_algo {
void (*cong_signal) (struct cc_var *ccv, uint32_t type);
void (*post_recovery) (struct cc_var *ccv);
void (*after_idle) (struct cc_var *ccv);
int (*ctl_output)(struct cc_var *, struct sockopt *, void *);
};
.Ed
.Pp
@ -166,6 +168,20 @@ function is called when data transfer resumes after an idle period.
It should be implemented to adjust state as required.
.Pp
The
.Va ctl_output
function is called when
.Xr getsockopt 2
or
.Xr setsockopt 2
is called on a
.Xr tcp 4
socket with the
.Va struct sockopt
pointer forwarded unmodified from the TCP control, and a
.Va void *
pointer to algorithm specific argument.
.Pp
The
.Fn DECLARE_CC_MODULE
macro provides a convenient wrapper around the
.Xr DECLARE_MODULE 9

View File

@ -165,6 +165,7 @@ struct tcphdr {
#define TCP_MD5SIG 16 /* use MD5 digests (RFC2385) */
#define TCP_INFO 32 /* retrieve tcp_info structure */
#define TCP_CONGESTION 64 /* get/set congestion control algorithm */
#define TCP_CCALGOOPT 65 /* get/set cc algorithm specific options */
#define TCP_KEEPINIT 128 /* N, time to establish connection */
#define TCP_KEEPIDLE 256 /* L,N,X start keeplives after this period */
#define TCP_KEEPINTVL 512 /* L,N interval between keepalives */

View File

@ -151,6 +151,9 @@ struct cc_algo {
/* Called for an additional ECN processing apart from RFC3168. */
void (*ecnpkt_handler)(struct cc_var *ccv);
/* Called for {get|set}sockopt() on a TCP socket with TCP_CCALGOOPT. */
int (*ctl_output)(struct cc_var *, struct sockopt *, void *);
STAILQ_ENTRY (cc_algo) entries;
};

View File

@ -1480,7 +1480,33 @@ tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp
struct tcp_info ti;
struct cc_algo *algo;
char *buf;
/*
* For TCP_CCALGOOPT forward the control to CC module, for both
* SOPT_SET and SOPT_GET.
*/
switch (sopt->sopt_name) {
case TCP_CCALGOOPT:
INP_WUNLOCK(inp);
buf = malloc(sopt->sopt_valsize, M_TEMP, M_WAITOK | M_ZERO);
error = sooptcopyin(sopt, buf, sopt->sopt_valsize,
sopt->sopt_valsize);
if (error) {
free(buf, M_TEMP);
return (error);
}
INP_WLOCK_RECHECK(inp);
if (CC_ALGO(tp)->ctl_output != NULL)
error = CC_ALGO(tp)->ctl_output(tp->ccv, sopt, buf);
else
error = ENOENT;
INP_WUNLOCK(inp);
if (error == 0 && sopt->sopt_dir == SOPT_GET)
error = sooptcopyout(sopt, buf, sopt->sopt_valsize);
free(buf, M_TEMP);
return (error);
}
switch (sopt->sopt_dir) {
case SOPT_SET:
switch (sopt->sopt_name) {