Provide new socket option TCP_CCALGOOPT, which stands for TCP congestion
control algorithm options. The argument is variable length and is opaque to TCP, forwarded directly to the algorithm's ctl_output method. Provide new includes directory netinet/cc, where algorithm specific headers can be installed. The new API doesn't yet have any in tree consumers. The original code written by lstewart. Reviewed by: rrs, emax Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D711
This commit is contained in:
parent
5e766348d2
commit
d519cedbad
@ -270,6 +270,8 @@
|
||||
..
|
||||
..
|
||||
netinet
|
||||
cc
|
||||
..
|
||||
..
|
||||
netinet6
|
||||
..
|
||||
|
@ -53,6 +53,7 @@ LSUBDIRS= cam/ata cam/scsi \
|
||||
geom/raid geom/raid3 geom/shsec geom/stripe geom/virstor \
|
||||
net/altq \
|
||||
netgraph/atm netgraph/netflow \
|
||||
netinet/cc \
|
||||
security/audit \
|
||||
security/mac_biba security/mac_bsdextended security/mac_lomac \
|
||||
security/mac_mls security/mac_partition \
|
||||
|
@ -30,7 +30,7 @@
|
||||
.\"
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd January 12, 2015
|
||||
.Dd January 21, 2016
|
||||
.Dt MOD_CC 4
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -49,7 +49,9 @@ using the
|
||||
facility.
|
||||
.Pp
|
||||
The default algorithm is NewReno, and all connections use the default unless
|
||||
explicitly overridden using the TCP_CONGESTION socket option (see
|
||||
explicitly overridden using the
|
||||
.Dv TCP_CONGESTION
|
||||
socket option (see
|
||||
.Xr tcp 4
|
||||
for details).
|
||||
The default can be changed using a
|
||||
@ -57,6 +59,14 @@ The default can be changed using a
|
||||
MIB variable detailed in the
|
||||
.Sx MIB Variables
|
||||
section below.
|
||||
.Pp
|
||||
Algorithm specific parameters can be set or queried using the
|
||||
.Dv TCP_CCALGOOPT
|
||||
socket option (see
|
||||
.Xr tcp 4
|
||||
for details).
|
||||
Callers must pass a pointer to an algorithm specific data, and specify
|
||||
its size.
|
||||
.Sh MIB Variables
|
||||
The framework exposes the following variables in the
|
||||
.Va net.inet.tcp.cc
|
||||
|
@ -34,7 +34,7 @@
|
||||
.\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd October 27, 2015
|
||||
.Dd January 21, 2016
|
||||
.Dt TCP 4
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -137,6 +137,11 @@ send window size,
|
||||
receive window size,
|
||||
and
|
||||
bandwidth-controlled window space.
|
||||
.It Dv TCP_CCALGOOPT
|
||||
Set or query congestion control algorithm specific parameters.
|
||||
See
|
||||
.Xr mod_cc 4
|
||||
for details.
|
||||
.It Dv TCP_CONGESTION
|
||||
Select or query the congestion control algorithm that TCP will use for the
|
||||
connection.
|
||||
|
@ -31,7 +31,7 @@
|
||||
.\"
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd December 26, 2014
|
||||
.Dd January 21, 2016
|
||||
.Dt MOD_CC 9
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -40,7 +40,8 @@
|
||||
.Nm CCV
|
||||
.Nd Modular Congestion Control
|
||||
.Sh SYNOPSIS
|
||||
.In netinet/cc.h
|
||||
.In netinet/tcp.h
|
||||
.In netinet/tcp_cc.h
|
||||
.In netinet/cc/cc_module.h
|
||||
.Fn DECLARE_CC_MODULE "ccname" "ccalgo"
|
||||
.Fn CCV "ccv" "what"
|
||||
@ -74,6 +75,7 @@ struct cc_algo {
|
||||
void (*cong_signal) (struct cc_var *ccv, uint32_t type);
|
||||
void (*post_recovery) (struct cc_var *ccv);
|
||||
void (*after_idle) (struct cc_var *ccv);
|
||||
int (*ctl_output)(struct cc_var *, struct sockopt *, void *);
|
||||
};
|
||||
.Ed
|
||||
.Pp
|
||||
@ -166,6 +168,20 @@ function is called when data transfer resumes after an idle period.
|
||||
It should be implemented to adjust state as required.
|
||||
.Pp
|
||||
The
|
||||
.Va ctl_output
|
||||
function is called when
|
||||
.Xr getsockopt 2
|
||||
or
|
||||
.Xr setsockopt 2
|
||||
is called on a
|
||||
.Xr tcp 4
|
||||
socket with the
|
||||
.Va struct sockopt
|
||||
pointer forwarded unmodified from the TCP control, and a
|
||||
.Va void *
|
||||
pointer to algorithm specific argument.
|
||||
.Pp
|
||||
The
|
||||
.Fn DECLARE_CC_MODULE
|
||||
macro provides a convenient wrapper around the
|
||||
.Xr DECLARE_MODULE 9
|
||||
|
@ -165,6 +165,7 @@ struct tcphdr {
|
||||
#define TCP_MD5SIG 16 /* use MD5 digests (RFC2385) */
|
||||
#define TCP_INFO 32 /* retrieve tcp_info structure */
|
||||
#define TCP_CONGESTION 64 /* get/set congestion control algorithm */
|
||||
#define TCP_CCALGOOPT 65 /* get/set cc algorithm specific options */
|
||||
#define TCP_KEEPINIT 128 /* N, time to establish connection */
|
||||
#define TCP_KEEPIDLE 256 /* L,N,X start keeplives after this period */
|
||||
#define TCP_KEEPINTVL 512 /* L,N interval between keepalives */
|
||||
|
@ -151,6 +151,9 @@ struct cc_algo {
|
||||
/* Called for an additional ECN processing apart from RFC3168. */
|
||||
void (*ecnpkt_handler)(struct cc_var *ccv);
|
||||
|
||||
/* Called for {get|set}sockopt() on a TCP socket with TCP_CCALGOOPT. */
|
||||
int (*ctl_output)(struct cc_var *, struct sockopt *, void *);
|
||||
|
||||
STAILQ_ENTRY (cc_algo) entries;
|
||||
};
|
||||
|
||||
|
@ -1480,7 +1480,33 @@ tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp
|
||||
struct tcp_info ti;
|
||||
struct cc_algo *algo;
|
||||
char *buf;
|
||||
|
||||
|
||||
/*
|
||||
* For TCP_CCALGOOPT forward the control to CC module, for both
|
||||
* SOPT_SET and SOPT_GET.
|
||||
*/
|
||||
switch (sopt->sopt_name) {
|
||||
case TCP_CCALGOOPT:
|
||||
INP_WUNLOCK(inp);
|
||||
buf = malloc(sopt->sopt_valsize, M_TEMP, M_WAITOK | M_ZERO);
|
||||
error = sooptcopyin(sopt, buf, sopt->sopt_valsize,
|
||||
sopt->sopt_valsize);
|
||||
if (error) {
|
||||
free(buf, M_TEMP);
|
||||
return (error);
|
||||
}
|
||||
INP_WLOCK_RECHECK(inp);
|
||||
if (CC_ALGO(tp)->ctl_output != NULL)
|
||||
error = CC_ALGO(tp)->ctl_output(tp->ccv, sopt, buf);
|
||||
else
|
||||
error = ENOENT;
|
||||
INP_WUNLOCK(inp);
|
||||
if (error == 0 && sopt->sopt_dir == SOPT_GET)
|
||||
error = sooptcopyout(sopt, buf, sopt->sopt_valsize);
|
||||
free(buf, M_TEMP);
|
||||
return (error);
|
||||
}
|
||||
|
||||
switch (sopt->sopt_dir) {
|
||||
case SOPT_SET:
|
||||
switch (sopt->sopt_name) {
|
||||
|
Loading…
Reference in New Issue
Block a user