Implement Resource Pooling V2 and an MPTCP like congestion

control.
Based on a patch received from Martin Becke.

MFC after: 2 weeks.
This commit is contained in:
tuexen 2011-05-04 21:27:05 +00:00
parent 416b5943ad
commit 0cc27c3d6b
4 changed files with 217 additions and 30 deletions

View File

@ -265,6 +265,13 @@ struct sctp_paramhdr {
#define SCTP_CC_OPT_USE_DCCC_ECN 0x00002001
#define SCTP_CC_OPT_STEADY_STEP 0x00002002
#define SCTP_CMT_OFF 0
#define SCTP_CMT_BASE 1
#define SCTP_CMT_RPV1 2
#define SCTP_CMT_RPV2 3
#define SCTP_CMT_MPTCP 4
#define SCTP_CMT_MAX SCTP_CMT_MPTCP
/* RS - Supported stream scheduling modules for pluggable
* stream scheduling
*/

View File

@ -47,6 +47,10 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#define SHIFT_MPTCP_MULTI_N 40
#define SHIFT_MPTCP_MULTI_Z 16
#define SHIFT_MPTCP_MULTI 8
static void
sctp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net)
{
@ -67,7 +71,8 @@ sctp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net)
cwnd_in_mtu = assoc->max_burst;
net->cwnd = (net->mtu - sizeof(struct sctphdr)) * cwnd_in_mtu;
}
if (stcb->asoc.sctp_cmt_on_off == 2) {
if ((stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) ||
(stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV2)) {
/* In case of resource pooling initialize appropriately */
net->cwnd /= assoc->numnets;
if (net->cwnd < (net->mtu - sizeof(struct sctphdr))) {
@ -91,14 +96,23 @@ sctp_cwnd_update_after_fr(struct sctp_tcb *stcb,
{
struct sctp_nets *net;
uint32_t t_ssthresh, t_cwnd;
uint64_t t_ucwnd_sbw;
/* MT FIXME: Don't compute this over and over again */
t_ssthresh = 0;
t_cwnd = 0;
if (asoc->sctp_cmt_on_off == 2) {
t_ucwnd_sbw = 0;
if ((asoc->sctp_cmt_on_off == SCTP_CMT_RPV1) ||
(asoc->sctp_cmt_on_off == SCTP_CMT_RPV2)) {
TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
t_ssthresh += net->ssthresh;
t_cwnd += net->cwnd;
if (net->lastsa > 0) {
t_ucwnd_sbw += (uint64_t) net->cwnd / (uint64_t) net->lastsa;
}
}
if (t_ucwnd_sbw == 0) {
t_ucwnd_sbw = 1;
}
}
/*-
@ -119,11 +133,37 @@ sctp_cwnd_update_after_fr(struct sctp_tcb *stcb,
struct sctp_tmit_chunk *lchk;
int old_cwnd = net->cwnd;
if (asoc->sctp_cmt_on_off == 2) {
net->ssthresh = (uint32_t) (((uint64_t) 4 *
(uint64_t) net->mtu *
(uint64_t) net->ssthresh) /
(uint64_t) t_ssthresh);
if ((asoc->sctp_cmt_on_off == SCTP_CMT_RPV1) ||
(asoc->sctp_cmt_on_off == SCTP_CMT_RPV2)) {
if (asoc->sctp_cmt_on_off == SCTP_CMT_RPV1) {
net->ssthresh = (uint32_t) (((uint64_t) 4 *
(uint64_t) net->mtu *
(uint64_t) net->ssthresh) /
(uint64_t) t_ssthresh);
}
if (asoc->sctp_cmt_on_off == SCTP_CMT_RPV2) {
uint32_t srtt;
srtt = net->lastsa;
/*
* lastsa>>3; we don't need
* to devide ...
*/
if (srtt == 0) {
srtt = 1;
}
/*
* Short Version => Equal to
* Contel Version MBe
*/
net->ssthresh = (uint32_t) (((uint64_t) 4 *
(uint64_t) net->mtu *
(uint64_t) net->cwnd) /
((uint64_t) srtt *
t_ucwnd_sbw));
/* INCREASE FACTOR */ ;
}
if ((net->cwnd > t_cwnd / 2) &&
(net->ssthresh < net->cwnd - t_cwnd / 2)) {
net->ssthresh = net->cwnd - t_cwnd / 2;
@ -629,14 +669,47 @@ sctp_cwnd_update_after_sack_common(struct sctp_tcb *stcb,
struct sctp_nets *net;
int old_cwnd;
uint32_t t_ssthresh, t_cwnd, incr;
uint64_t t_ucwnd_sbw;
uint64_t t_path_mptcp;
uint64_t mptcp_like_alpha;
uint32_t srtt;
uint64_t max_path;
/* MT FIXME: Don't compute this over and over again */
t_ssthresh = 0;
t_cwnd = 0;
if (stcb->asoc.sctp_cmt_on_off == 2) {
t_ucwnd_sbw = 0;
t_path_mptcp = 0;
mptcp_like_alpha = 1;
if ((stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) ||
(stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV2) ||
(stcb->asoc.sctp_cmt_on_off == SCTP_CMT_MPTCP)) {
max_path = 0;
TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
t_ssthresh += net->ssthresh;
t_cwnd += net->cwnd;
/* lastsa>>3; we don't need to devide ... */
srtt = net->lastsa;
if (srtt > 0) {
uint64_t tmp;
t_ucwnd_sbw += (uint64_t) net->cwnd / (uint64_t) srtt;
t_path_mptcp += (((uint64_t) net->cwnd) << SHIFT_MPTCP_MULTI_Z) /
(((uint64_t) net->mtu) * (uint64_t) srtt);
tmp = (((uint64_t) net->cwnd) << SHIFT_MPTCP_MULTI_N) /
((uint64_t) net->mtu * (uint64_t) (srtt * srtt));
if (tmp > max_path) {
max_path = tmp;
}
}
}
if (t_ucwnd_sbw == 0) {
t_ucwnd_sbw = 1;
}
if (t_path_mptcp > 0) {
mptcp_like_alpha = max_path / (t_path_mptcp * t_path_mptcp);
} else {
mptcp_like_alpha = 1;
}
}
/******************************/
@ -818,10 +891,11 @@ sctp_cwnd_update_after_sack_common(struct sctp_tcb *stcb,
if (net->cwnd <= net->ssthresh) {
/* We are in slow start */
if (net->flight_size + net->net_ack >= net->cwnd) {
old_cwnd = net->cwnd;
if (stcb->asoc.sctp_cmt_on_off == 2) {
uint32_t limit;
uint32_t limit;
old_cwnd = net->cwnd;
switch (asoc->sctp_cmt_on_off) {
case SCTP_CMT_RPV1:
limit = (uint32_t) (((uint64_t) net->mtu *
(uint64_t) SCTP_BASE_SYSCTL(sctp_L2_abc_variable) *
(uint64_t) net->ssthresh) /
@ -835,11 +909,56 @@ sctp_cwnd_update_after_sack_common(struct sctp_tcb *stcb,
if (incr == 0) {
incr = 1;
}
} else {
break;
case SCTP_CMT_RPV2:
/*
* lastsa>>3; we don't need
* to divide ...
*/
srtt = net->lastsa;
if (srtt == 0) {
srtt = 1;
}
limit = (uint32_t) (((uint64_t) net->mtu *
(uint64_t) SCTP_BASE_SYSCTL(sctp_L2_abc_variable) *
(uint64_t) net->cwnd) /
((uint64_t) srtt * t_ucwnd_sbw));
/* INCREASE FACTOR */
incr = (uint32_t) (((uint64_t) net->net_ack *
(uint64_t) net->cwnd) /
((uint64_t) srtt * t_ucwnd_sbw));
/* INCREASE FACTOR */
if (incr > limit) {
incr = limit;
}
if (incr == 0) {
incr = 1;
}
break;
case SCTP_CMT_MPTCP:
limit = (uint32_t) (((uint64_t) net->mtu *
mptcp_like_alpha *
(uint64_t) SCTP_BASE_SYSCTL(sctp_L2_abc_variable)) >>
SHIFT_MPTCP_MULTI);
incr = (uint32_t) (((uint64_t) net->net_ack *
mptcp_like_alpha) >>
SHIFT_MPTCP_MULTI);
if (incr > limit) {
incr = limit;
}
if (incr > net->net_ack) {
incr = net->net_ack;
}
if (incr > net->mtu) {
incr = net->mtu;
}
break;
default:
incr = net->net_ack;
if (incr > net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable)) {
incr = net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable);
}
break;
}
net->cwnd += incr;
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
@ -868,15 +987,44 @@ sctp_cwnd_update_after_sack_common(struct sctp_tcb *stcb,
(net->partial_bytes_acked >= net->cwnd)) {
net->partial_bytes_acked -= net->cwnd;
old_cwnd = net->cwnd;
if (asoc->sctp_cmt_on_off == 2) {
switch (asoc->sctp_cmt_on_off) {
case SCTP_CMT_RPV1:
incr = (uint32_t) (((uint64_t) net->mtu *
(uint64_t) net->ssthresh) /
(uint64_t) t_ssthresh);
if (incr == 0) {
incr = 1;
}
} else {
break;
case SCTP_CMT_RPV2:
/*
* lastsa>>3; we don't need
* to divide ...
*/
srtt = net->lastsa;
if (srtt == 0) {
srtt = 1;
}
incr = (uint32_t) ((uint64_t) net->mtu *
(uint64_t) net->cwnd /
((uint64_t) srtt *
t_ucwnd_sbw));
/* INCREASE FACTOR */
if (incr == 0) {
incr = 1;
}
break;
case SCTP_CMT_MPTCP:
incr = (uint32_t) ((mptcp_like_alpha *
(uint64_t) net->cwnd) >>
SHIFT_MPTCP_MULTI);
if (incr > net->mtu) {
incr = net->mtu;
}
break;
default:
incr = net->mtu;
break;
}
net->cwnd += incr;
SDT_PROBE(sctp, cwnd, net, ack,
@ -926,21 +1074,49 @@ sctp_cwnd_update_after_timeout(struct sctp_tcb *stcb, struct sctp_nets *net)
{
int old_cwnd = net->cwnd;
uint32_t t_ssthresh, t_cwnd;
uint64_t t_ucwnd_sbw;
/* MT FIXME: Don't compute this over and over again */
t_ssthresh = 0;
t_cwnd = 0;
if (stcb->asoc.sctp_cmt_on_off == 2) {
if ((stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) ||
(stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV2)) {
struct sctp_nets *lnet;
uint32_t srtt;
t_ucwnd_sbw = 0;
TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) {
t_ssthresh += lnet->ssthresh;
t_cwnd += lnet->cwnd;
srtt = lnet->lastsa;
/* lastsa>>3; we don't need to divide ... */
if (srtt > 0) {
t_ucwnd_sbw += (uint64_t) lnet->cwnd / (uint64_t) srtt;
}
}
if (t_ucwnd_sbw < 1) {
t_ucwnd_sbw = 1;
}
if (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) {
net->ssthresh = (uint32_t) (((uint64_t) 4 *
(uint64_t) net->mtu *
(uint64_t) net->ssthresh) /
(uint64_t) t_ssthresh);
} else {
uint64_t cc_delta;
srtt = net->lastsa;
/* lastsa>>3; we don't need to divide ... */
if (srtt == 0) {
srtt = 1;
}
cc_delta = t_ucwnd_sbw * (uint64_t) srtt / 2;
if (cc_delta < t_cwnd) {
net->ssthresh = (uint32_t) ((uint64_t) t_cwnd - cc_delta);
} else {
net->ssthresh = net->mtu;
}
}
net->ssthresh = (uint32_t) (((uint64_t) 4 *
(uint64_t) net->mtu *
(uint64_t) net->ssthresh) /
(uint64_t) t_ssthresh);
if ((net->cwnd > t_cwnd / 2) &&
(net->ssthresh < net->cwnd - t_cwnd / 2)) {
net->ssthresh = net->cwnd - t_cwnd / 2;

View File

@ -336,9 +336,9 @@ struct sctp_sysctl {
/* cmt_on_off: CMT on/off flag */
#define SCTPCTL_CMT_ON_OFF_DESC "CMT settings"
#define SCTPCTL_CMT_ON_OFF_MIN 0
#define SCTPCTL_CMT_ON_OFF_MAX 2
#define SCTPCTL_CMT_ON_OFF_DEFAULT 0
#define SCTPCTL_CMT_ON_OFF_MIN SCTP_CMT_OFF
#define SCTPCTL_CMT_ON_OFF_MAX SCTP_CMT_MAX
#define SCTPCTL_CMT_ON_OFF_DEFAULT SCTP_CMT_OFF
/* EY - nr_sack_on_off: NR_SACK on/off flag */
#define SCTPCTL_NR_SACK_ON_OFF_DESC "NR_SACK on/off flag"

View File

@ -2992,18 +2992,22 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
SCTP_FIND_STCB(inp, stcb, av->assoc_id);
if (stcb) {
stcb->asoc.sctp_cmt_on_off = av->assoc_value;
if (stcb->asoc.sctp_cmt_on_off > 2) {
stcb->asoc.sctp_cmt_on_off = 2;
if (av->assoc_value > SCTP_CMT_MAX) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
} else {
stcb->asoc.sctp_cmt_on_off = av->assoc_value;
}
SCTP_TCB_UNLOCK(stcb);
} else {
SCTP_INP_WLOCK(inp);
inp->sctp_cmt_on_off = av->assoc_value;
if (inp->sctp_cmt_on_off > 2) {
inp->sctp_cmt_on_off = 2;
if (av->assoc_value > SCTP_CMT_MAX) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
} else {
SCTP_INP_WLOCK(inp);
inp->sctp_cmt_on_off = av->assoc_value;
SCTP_INP_WUNLOCK(inp);
}
SCTP_INP_WUNLOCK(inp);
}
} else {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT);