freebsd-dev/sys/netinet/ip_dummynet.h
Hiren Panchasara fc5e1956d9 ECN marking implenetation for dummynet.
Changes include both DCTCP and RFC 3168 ECN marking methodology.

DCTCP draft: http://tools.ietf.org/html/draft-bensley-tcpm-dctcp-00

Submitted by:	Midori Kato (aoimidori27@gmail.com)
Worked with:	Lars Eggert (lars@netapp.com)
Reviewed by:	luigi, hiren
2014-06-01 07:28:24 +00:00

265 lines
8.3 KiB
C

/*-
* Copyright (c) 1998-2010 Luigi Rizzo, Universita` di Pisa
* Portions Copyright (c) 2000 Akamba Corp.
* All rights reserved
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _IP_DUMMYNET_H
#define _IP_DUMMYNET_H
/*
* Definition of the kernel-userland API for dummynet.
*
* Setsockopt() and getsockopt() pass a batch of objects, each
* of them starting with a "struct dn_id" which should fully identify
* the object and its relation with others in the sequence.
* The first object in each request should have
* type= DN_CMD_*, id = DN_API_VERSION.
* For other objects, type and subtype specify the object, len indicates
* the total length including the header, and 'id' identifies the specific
* object.
*
* Most objects are numbered with an identifier in the range 1..65535.
* DN_MAX_ID indicates the first value outside the range.
*/
#define DN_API_VERSION 12500000
#define DN_MAX_ID 0x10000
struct dn_id {
uint16_t len; /* total obj len including this header */
uint8_t type;
uint8_t subtype;
uint32_t id; /* generic id */
};
/*
* These values are in the type field of struct dn_id.
* To preserve the ABI, never rearrange the list or delete
* entries with the exception of DN_LAST
*/
enum {
DN_NONE = 0,
DN_LINK = 1,
DN_FS,
DN_SCH,
DN_SCH_I,
DN_QUEUE,
DN_DELAY_LINE,
DN_PROFILE,
DN_FLOW, /* struct dn_flow */
DN_TEXT, /* opaque text is the object */
DN_CMD_CONFIG = 0x80, /* objects follow */
DN_CMD_DELETE, /* subtype + list of entries */
DN_CMD_GET, /* subtype + list of entries */
DN_CMD_FLUSH,
/* for compatibility with FreeBSD 7.2/8 */
DN_COMPAT_PIPE,
DN_COMPAT_QUEUE,
DN_GET_COMPAT,
/* special commands for emulation of sysctl variables */
DN_SYSCTL_GET,
DN_SYSCTL_SET,
DN_LAST,
};
enum { /* subtype for schedulers, flowset and the like */
DN_SCHED_UNKNOWN = 0,
DN_SCHED_FIFO = 1,
DN_SCHED_WF2QP = 2,
/* others are in individual modules */
};
enum { /* user flags */
DN_HAVE_MASK = 0x0001, /* fs or sched has a mask */
DN_NOERROR = 0x0002, /* do not report errors */
DN_QHT_HASH = 0x0004, /* qht is a hash table */
DN_QSIZE_BYTES = 0x0008, /* queue size is in bytes */
DN_HAS_PROFILE = 0x0010, /* a link has a profile */
DN_IS_RED = 0x0020,
DN_IS_GENTLE_RED= 0x0040,
DN_IS_ECN = 0x0080,
DN_PIPE_CMD = 0x1000, /* pipe config... */
};
/*
* link template.
*/
struct dn_link {
struct dn_id oid;
/*
* Userland sets bw and delay in bits/s and milliseconds.
* The kernel converts this back and forth to bits/tick and ticks.
* XXX what about burst ?
*/
int32_t link_nr;
int bandwidth; /* bit/s or bits/tick. */
int delay; /* ms and ticks */
uint64_t burst; /* scaled. bits*Hz XXX */
};
/*
* A flowset, which is a template for flows. Contains parameters
* from the command line: id, target scheduler, queue sizes, plr,
* flow masks, buckets for the flow hash, and possibly scheduler-
* specific parameters (weight, quantum and so on).
*/
struct dn_fs {
struct dn_id oid;
uint32_t fs_nr; /* the flowset number */
uint32_t flags; /* userland flags */
int qsize; /* queue size in slots or bytes */
int32_t plr; /* PLR, pkt loss rate (2^31-1 means 100%) */
uint32_t buckets; /* buckets used for the queue hash table */
struct ipfw_flow_id flow_mask;
uint32_t sched_nr; /* the scheduler we attach to */
/* generic scheduler parameters. Leave them at -1 if unset.
* Now we use 0: weight, 1: lmax, 2: priority
*/
int par[4];
/* RED/GRED parameters.
* weight and probabilities are in the range 0..1 represented
* in fixed point arithmetic with SCALE_RED decimal bits.
*/
#define SCALE_RED 16
#define SCALE(x) ( (x) << SCALE_RED )
#define SCALE_VAL(x) ( (x) >> SCALE_RED )
#define SCALE_MUL(x,y) ( ( (x) * (y) ) >> SCALE_RED )
int w_q ; /* queue weight (scaled) */
int max_th ; /* maximum threshold for queue (scaled) */
int min_th ; /* minimum threshold for queue (scaled) */
int max_p ; /* maximum value for p_b (scaled) */
};
/*
* dn_flow collects flow_id and stats for queues and scheduler
* instances, and is used to pass these info to userland.
* oid.type/oid.subtype describe the object, oid.id is number
* of the parent object.
*/
struct dn_flow {
struct dn_id oid;
struct ipfw_flow_id fid;
uint64_t tot_pkts; /* statistics counters */
uint64_t tot_bytes;
uint32_t length; /* Queue length, in packets */
uint32_t len_bytes; /* Queue length, in bytes */
uint32_t drops;
};
/*
* Scheduler template, mostly indicating the name, number,
* sched_mask and buckets.
*/
struct dn_sch {
struct dn_id oid;
uint32_t sched_nr; /* N, scheduler number */
uint32_t buckets; /* number of buckets for the instances */
uint32_t flags; /* have_mask, ... */
char name[16]; /* null terminated */
/* mask to select the appropriate scheduler instance */
struct ipfw_flow_id sched_mask; /* M */
};
/* A delay profile is attached to a link.
* Note that a profile, as any other object, cannot be longer than 2^16
*/
#define ED_MAX_SAMPLES_NO 1024
struct dn_profile {
struct dn_id oid;
/* fields to simulate a delay profile */
#define ED_MAX_NAME_LEN 32
char name[ED_MAX_NAME_LEN];
int link_nr;
int loss_level;
int bandwidth; // XXX use link bandwidth?
int samples_no; /* actual len of samples[] */
int samples[ED_MAX_SAMPLES_NO]; /* may be shorter */
};
/*
* Overall structure of dummynet
In dummynet, packets are selected with the firewall rules, and passed
to two different objects: PIPE or QUEUE (bad name).
A QUEUE defines a classifier, which groups packets into flows
according to a 'mask', puts them into independent queues (one
per flow) with configurable size and queue management policy,
and passes flows to a scheduler:
(flow_mask|sched_mask) sched_mask
+---------+ weight Wx +-------------+
| |->-[flow]-->--| |-+
-->--| QUEUE x | ... | | |
| |->-[flow]-->--| SCHEDuler N | |
+---------+ | | |
... | +--[LINK N]-->--
+---------+ weight Wy | | +--[LINK N]-->--
| |->-[flow]-->--| | |
-->--| QUEUE y | ... | | |
| |->-[flow]-->--| | |
+---------+ +-------------+ |
+-------------+
Many QUEUE objects can connect to the same scheduler, each
QUEUE object can have its own set of parameters.
In turn, the SCHEDuler 'forks' multiple instances according
to a 'sched_mask', each instance manages its own set of queues
and transmits on a private instance of a configurable LINK.
A PIPE is a simplified version of the above, where there
is no flow_mask, and each scheduler instance handles a single queue.
The following data structures (visible from userland) describe
the objects used by dummynet:
+ dn_link, contains the main configuration parameters related
to delay and bandwidth;
+ dn_profile describes a delay profile;
+ dn_flow describes the flow status (flow id, statistics)
+ dn_sch describes a scheduler
+ dn_fs describes a flowset (msk, weight, queue parameters)
*
*/
#endif /* _IP_DUMMYNET_H */