ECN marking implenetation for dummynet.

Changes include both DCTCP and RFC 3168 ECN marking methodology.

DCTCP draft: http://tools.ietf.org/html/draft-bensley-tcpm-dctcp-00

Submitted by:	Midori Kato (aoimidori27@gmail.com)
Worked with:	Lars Eggert (lars@netapp.com)
Reviewed by:	luigi, hiren
This commit is contained in:
Hiren Panchasara 2014-06-01 07:28:24 +00:00
parent 80ce0850f4
commit fc5e1956d9
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=266941
7 changed files with 108 additions and 15 deletions

View File

@ -56,6 +56,7 @@ static struct _s_x dummynet_params[] = {
{ "sched_mask", TOK_SCHED_MASK },
{ "flow_mask", TOK_FLOW_MASK },
{ "droptail", TOK_DROPTAIL },
{ "ecn", TOK_ECN },
{ "red", TOK_RED },
{ "gred", TOK_GRED },
{ "bw", TOK_BW },
@ -239,7 +240,7 @@ print_flowset_parms(struct dn_fs *fs, char *prefix)
else
plr[0] = '\0';
if (fs->flags & DN_IS_RED) /* RED parameters */
if (fs->flags & DN_IS_RED) { /* RED parameters */
sprintf(red,
"\n\t %cRED w_q %f min_th %d max_th %d max_p %f",
(fs->flags & DN_IS_GENTLE_RED) ? 'G' : ' ',
@ -247,7 +248,9 @@ print_flowset_parms(struct dn_fs *fs, char *prefix)
fs->min_th,
fs->max_th,
1.0 * fs->max_p / (double)(1 << SCALE_RED));
else
if (fs->flags & DN_IS_ECN)
strncat(red, " (ecn)", 6);
} else
sprintf(red, "droptail");
if (prefix[0]) {
@ -1046,13 +1049,17 @@ ipfw_config_pipe(int ac, char **av)
}
if ((end = strsep(&av[0], "/"))) {
double max_p = strtod(end, NULL);
if (max_p > 1 || max_p <= 0)
errx(EX_DATAERR, "0 < max_p <= 1");
if (max_p > 1 || max_p < 0)
errx(EX_DATAERR, "0 <= max_p <= 1");
fs->max_p = (int)(max_p * (1 << SCALE_RED));
}
ac--; av++;
break;
case TOK_ECN:
fs->flags |= DN_IS_ECN;
break;
case TOK_DROPTAIL:
NEED(fs, "droptail is only for flowsets");
fs->flags &= ~(DN_IS_RED|DN_IS_GENTLE_RED);
@ -1175,13 +1182,20 @@ ipfw_config_pipe(int ac, char **av)
errx(EX_DATAERR, "2 <= queue size <= %ld", limit);
}
if ((fs->flags & DN_IS_ECN) && !(fs->flags & DN_IS_RED))
errx(EX_USAGE, "enable red/gred for ECN");
if (fs->flags & DN_IS_RED) {
size_t len;
int lookup_depth, avg_pkt_size;
if (fs->min_th >= fs->max_th)
if (!(fs->flags & DN_IS_ECN) && (fs->min_th >= fs->max_th))
errx(EX_DATAERR, "min_th %d must be < than max_th %d",
fs->min_th, fs->max_th);
else if ((fs->flags & DN_IS_ECN) && (fs->min_th > fs->max_th))
errx(EX_DATAERR, "min_th %d must be =< than max_th %d",
fs->min_th, fs->max_th);
if (fs->max_th == 0)
errx(EX_DATAERR, "max_th must be > 0");

View File

@ -1,7 +1,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd October 25, 2012
.Dd May 31, 2014
.Dt IPFW 8
.Os
.Sh NAME
@ -2441,22 +2441,23 @@ and
control the maximum lengths that can be specified.
.Pp
.It Cm red | gred Ar w_q Ns / Ns Ar min_th Ns / Ns Ar max_th Ns / Ns Ar max_p
[ecn]
Make use of the RED (Random Early Detection) queue management algorithm.
.Ar w_q
and
.Ar max_p
are floating
point numbers between 0 and 1 (0 not included), while
point numbers between 0 and 1 (inclusive), while
.Ar min_th
and
.Ar max_th
are integer numbers specifying thresholds for queue management
(thresholds are computed in bytes if the queue has been defined
in bytes, in slots otherwise).
The
The two parameters can also be of the same value if needed. The
.Nm dummynet
also supports the gentle RED variant (gred).
Three
also supports the gentle RED variant (gred) and ECN (Explicit Congestion
Notification) as optional. Three
.Xr sysctl 8
variables can be used to control the RED behaviour:
.Bl -tag -width indent

View File

@ -165,6 +165,7 @@ enum tokens {
TOK_BURST,
TOK_RED,
TOK_GRED,
TOK_ECN,
TOK_DROPTAIL,
TOK_PROTO,
/* dummynet tokens */

View File

@ -104,6 +104,7 @@ enum { /* user flags */
DN_HAS_PROFILE = 0x0010, /* a link has a profile */
DN_IS_RED = 0x0020,
DN_IS_GENTLE_RED= 0x0040,
DN_IS_ECN = 0x0080,
DN_PIPE_CMD = 0x1000, /* pipe config... */
};

View File

@ -83,6 +83,7 @@ struct dn_flow_set {
#define DNOLD_QSIZE_IS_BYTES 0x0008 /* queue size is measured in bytes */
#define DNOLD_NOERROR 0x0010 /* do not report ENOBUFS on drops */
#define DNOLD_HAS_PROFILE 0x0020 /* the pipe has a delay profile. */
#define DNOLD_IS_ECN 0x0040
#define DNOLD_IS_PIPE 0x4000
#define DNOLD_IS_QUEUE 0x8000
@ -338,6 +339,8 @@ convertflags2new(int src)
dst |= DN_IS_RED;
if (src & DNOLD_IS_GENTLE_RED)
dst |= DN_IS_GENTLE_RED;
if (src & DNOLD_IS_ECN)
dst |= DN_IS_ECN;
if (src & DNOLD_HAS_PROFILE)
dst |= DN_HAS_PROFILE;

View File

@ -367,6 +367,8 @@ red_drops (struct dn_queue *q, int len)
return (0); /* accept packet */
}
if (q->avg >= fs->max_th) { /* average queue >= max threshold */
if (fs->fs.flags & DN_IS_ECN)
return (1);
if (fs->fs.flags & DN_IS_GENTLE_RED) {
/*
* According to Gentle-RED, if avg is greater than
@ -382,6 +384,8 @@ red_drops (struct dn_queue *q, int len)
return (1);
}
} else if (q->avg > fs->min_th) {
if (fs->fs.flags & DN_IS_ECN)
return (1);
/*
* We compute p_b using the linear dropping function
* p_b = c_1 * avg - c_2
@ -413,6 +417,70 @@ red_drops (struct dn_queue *q, int len)
}
/*
* ECN/ECT Processing (partially adopted from altq)
*/
static int
ecn_mark(struct mbuf* m)
{
struct ip *ip;
ip = mtod(m, struct ip *);
switch (ip->ip_v) {
case IPVERSION:
{
u_int8_t otos;
int sum;
if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
return (0); /* not-ECT */
if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
return (1); /* already marked */
/*
* ecn-capable but not marked,
* mark CE and update checksum
*/
otos = ip->ip_tos;
ip->ip_tos |= IPTOS_ECN_CE;
/*
* update checksum (from RFC1624)
* HC' = ~(~HC + ~m + m')
*/
sum = ~ntohs(ip->ip_sum) & 0xffff;
sum += (~otos & 0xffff) + ip->ip_tos;
sum = (sum >> 16) + (sum & 0xffff);
sum += (sum >> 16); /* add carry */
ip->ip_sum = htons(~sum & 0xffff);
return (1);
}
#ifdef INET6
case (IPV6_VERSION >> 4):
{
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
u_int32_t flowlabel;
flowlabel = ntohl(ip6->ip6_flow);
if ((flowlabel >> 28) != 6)
return (0); /* version mismatch! */
if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
(IPTOS_ECN_NOTECT << 20))
return (0); /* not-ECT */
if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
(IPTOS_ECN_CE << 20))
return (1); /* already marked */
/*
* ecn-capable but not marked, mark CE
*/
flowlabel |= (IPTOS_ECN_CE << 20);
ip6->ip6_flow = htonl(flowlabel);
return (1);
}
#endif
}
return (0);
}
/*
* Enqueue a packet in q, subject to space and queue management policy
* (whose parameters are in q->fs).
@ -444,8 +512,10 @@ dn_enqueue(struct dn_queue *q, struct mbuf* m, int drop)
goto drop;
if (f->plr && random() < f->plr)
goto drop;
if (f->flags & DN_IS_RED && red_drops(q, m->m_pkthdr.len))
goto drop;
if (f->flags & DN_IS_RED && red_drops(q, m->m_pkthdr.len)) {
if (!(f->flags & DN_IS_ECN) || !ecn_mark(m))
goto drop;
}
if (f->flags & DN_QSIZE_BYTES) {
if (q->ni.len_bytes > f->qsize)
goto drop;
@ -457,14 +527,14 @@ dn_enqueue(struct dn_queue *q, struct mbuf* m, int drop)
q->ni.len_bytes += len;
ni->length++;
ni->len_bytes += len;
return 0;
return (0);
drop:
io_pkt_drop++;
q->ni.drops++;
ni->drops++;
FREE_PKT(m);
return 1;
return (1);
}
/*

View File

@ -1071,7 +1071,10 @@ config_red(struct dn_fsk *fs)
fs->min_th = SCALE(fs->fs.min_th);
fs->max_th = SCALE(fs->fs.max_th);
fs->c_1 = fs->max_p / (fs->fs.max_th - fs->fs.min_th);
if (fs->fs.max_th == fs->fs.min_th)
fs->c_1 = fs->max_p;
else
fs->c_1 = SCALE((int64_t)(fs->max_p)) / (fs->fs.max_th - fs->fs.min_th);
fs->c_2 = SCALE_MUL(fs->c_1, SCALE(fs->fs.min_th));
if (fs->fs.flags & DN_IS_GENTLE_RED) {