988790bfd9
now you can dynamically create rate-limited queues for different flows using masks on dst/src IP, port and protocols. Read the ipfw(8) manpage for details and examples. Restructure the internals of the traffic shaper to use heaps, so that it manages efficiently large number of queues. Fix a bug which was present in the previous versions which could cause, under certain unfrequent conditions, to send out very large bursts of traffic. All in all, this new code is much cleaner than the previous one and should also perform better. Work supported by Akamba Corp.
961 lines
27 KiB
C
961 lines
27 KiB
C
/*
|
|
* Copyright (c) 1998-2000 Luigi Rizzo, Universita` di Pisa
|
|
* Portions Copyright (c) 2000 Akamba Corp.
|
|
* All rights reserved
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*
|
|
* $FreeBSD$
|
|
*/
|
|
|
|
#define DEB(x)
|
|
#define DDB(x) x
|
|
|
|
/*
|
|
* This module implements IP dummynet, a bandwidth limiter/delay emulator
|
|
* used in conjunction with the ipfw package.
|
|
*
|
|
* Most important Changes:
|
|
*
|
|
* 000106: large rewrite, use heaps to handle very many pipes.
|
|
* 980513: initial release
|
|
*
|
|
* include files marked with XXX are probably not needed
|
|
*/
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/malloc.h>
|
|
#include <sys/mbuf.h>
|
|
#include <sys/queue.h> /* XXX */
|
|
#include <sys/kernel.h>
|
|
#include <sys/module.h>
|
|
#include <sys/socket.h>
|
|
#include <sys/socketvar.h>
|
|
#include <sys/time.h>
|
|
#include <sys/sysctl.h>
|
|
#include <net/if.h>
|
|
#include <net/route.h>
|
|
#include <netinet/in.h>
|
|
#include <netinet/in_systm.h>
|
|
#include <netinet/in_var.h>
|
|
#include <netinet/ip.h>
|
|
#include <netinet/ip_fw.h>
|
|
#include <netinet/ip_dummynet.h>
|
|
#include <netinet/ip_var.h>
|
|
|
|
#include "opt_bdg.h"
|
|
#ifdef BRIDGE
|
|
#include <netinet/if_ether.h> /* for struct arpcom */
|
|
#include <net/bridge.h>
|
|
#endif
|
|
|
|
/*
|
|
* the addresses/ports of last pkt matched by the firewall are
|
|
* in this structure. This is so that we can easily find them without
|
|
* navigating through the mbuf.
|
|
*/
|
|
struct dn_flow_id dn_last_pkt ;
|
|
|
|
/*
|
|
* we keep a private variable for the simulation time, but probably
|
|
* it would be better to use the already existing one "softticks"
|
|
* (in sys/kern/kern_timer.c)
|
|
*/
|
|
static dn_key curr_time = 0 ; /* current simulation time */
|
|
|
|
static int dn_hash_size = 64 ; /* default hash size */
|
|
|
|
/* statistics on number of queue searches and search steps */
|
|
static int searches, search_steps ;
|
|
|
|
static struct dn_heap ready_heap, extract_heap ;
|
|
static int heap_init(struct dn_heap *h, int size) ;
|
|
static int heap_insert (struct dn_heap *h, dn_key key1, void *p);
|
|
static void heap_extract(struct dn_heap *h);
|
|
static void transmit_event(struct dn_pipe *pipe);
|
|
static void ready_event(struct dn_flow_queue *q);
|
|
|
|
static struct dn_pipe *all_pipes = NULL ; /* list of all pipes */
|
|
|
|
#ifdef SYSCTL_NODE
|
|
SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet,
|
|
CTLFLAG_RW, 0, "Dummynet");
|
|
SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, hash_size,
|
|
CTLFLAG_RD, &dn_hash_size, 0, "Default hash table size");
|
|
SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, curr_time,
|
|
CTLFLAG_RD, &curr_time, 0, "Current tick");
|
|
SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, ready_heap,
|
|
CTLFLAG_RD, &ready_heap.size, 0, "Size of ready heap");
|
|
SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, extract_heap,
|
|
CTLFLAG_RD, &extract_heap.size, 0, "Size of extract heap");
|
|
SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, searches,
|
|
CTLFLAG_RD, &searches, 0, "Number of queue searches");
|
|
SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, search_steps,
|
|
CTLFLAG_RD, &search_steps, 0, "Number of queue search steps");
|
|
#endif
|
|
|
|
static int ip_dn_ctl(struct sockopt *sopt);
|
|
|
|
static void rt_unref(struct rtentry *);
|
|
static void dummynet(void *);
|
|
static void dummynet_flush(void);
|
|
|
|
/*
|
|
* ip_fw_chain is used when deleting a pipe, because ipfw rules can
|
|
* hold references to the pipe.
|
|
*/
|
|
extern LIST_HEAD (ip_fw_head, ip_fw_chain) ip_fw_chain;
|
|
|
|
static void
|
|
rt_unref(struct rtentry *rt)
|
|
{
|
|
if (rt == NULL)
|
|
return ;
|
|
if (rt->rt_refcnt <= 0)
|
|
printf("-- warning, refcnt now %ld, decreasing\n", rt->rt_refcnt);
|
|
RTFREE(rt);
|
|
}
|
|
|
|
/*
|
|
* Heap management functions.
|
|
*
|
|
* In the heap, first node is element 0. Children of i are 2i+1 and 2i+2.
|
|
* Some macros help finding parent/children so we can optimize them.
|
|
#
|
|
* heap_init() is called to expand the heap when needed.
|
|
* Increment size in blocks of 256 entries (which make one 4KB page)
|
|
* XXX failure to allocate a new element is a pretty bad failure
|
|
* as we basically stall a whole queue forever!!
|
|
* Returns 1 on error, 0 on success
|
|
*/
|
|
#define HEAP_FATHER(x) ( ( (x) - 1 ) / 2 )
|
|
#define HEAP_LEFT(x) ( 2*(x) + 1 )
|
|
#define HEAP_IS_LEFT(x) ( (x) & 1 )
|
|
#define HEAP_RIGHT(x) ( 2*(x) + 1 )
|
|
#define HEAP_SWAP(a, b, buffer) { buffer = a ; a = b ; b = buffer ; }
|
|
#define HEAP_INCREMENT 255
|
|
|
|
static int
|
|
heap_init(struct dn_heap *h, int new_size)
|
|
{
|
|
struct dn_heap_entry *p;
|
|
|
|
if (h->size >= new_size ) {
|
|
printf("heap_init, Bogus call, have %d want %d\n",
|
|
h->size, new_size);
|
|
return 0 ;
|
|
}
|
|
new_size = (new_size + HEAP_INCREMENT ) & ~HEAP_INCREMENT ;
|
|
p = malloc(new_size * sizeof(*p), M_IPFW, M_DONTWAIT );
|
|
if (p == NULL) {
|
|
printf(" heap_init, resize %d failed\n", new_size );
|
|
return 1 ; /* error */
|
|
}
|
|
if (h->size > 0) {
|
|
bcopy(h->p, p, h->size * sizeof(*p) );
|
|
free(h->p, M_IPFW);
|
|
}
|
|
h->p = p ;
|
|
h->size = new_size ;
|
|
return 0 ;
|
|
}
|
|
|
|
/*
|
|
* Insert element in heap. Normally, p != NULL, we insert p in
|
|
* a new position and bubble up. If p == NULL, then the element is
|
|
* already in place, and key is the position where to start the
|
|
* bubble-up.
|
|
* Returns 1 on failure (cannot allocate new heap entry)
|
|
*/
|
|
static int
|
|
heap_insert(struct dn_heap *h, dn_key key1, void *p)
|
|
{
|
|
int son = h->elements ;
|
|
|
|
if (p == NULL) /* data already there, set starting point */
|
|
son = key1 ;
|
|
else { /* insert new element at the end, possibly resize */
|
|
son = h->elements ;
|
|
if (son == h->size) /* need resize... */
|
|
if (heap_init(h, h->elements+1) )
|
|
return 1 ; /* failure... */
|
|
h->p[son].object = p ;
|
|
h->p[son].key = key1 ;
|
|
h->elements++ ;
|
|
}
|
|
while (son > 0) { /* bubble up */
|
|
int father = HEAP_FATHER(son) ;
|
|
struct dn_heap_entry tmp ;
|
|
|
|
if (DN_KEY_LT( h->p[father].key, h->p[son].key ) )
|
|
break ; /* found right position */
|
|
/* son smaller than father, swap and try again */
|
|
HEAP_SWAP(h->p[son], h->p[father], tmp) ;
|
|
son = father ;
|
|
}
|
|
return 0 ;
|
|
}
|
|
|
|
/*
|
|
* remove top element from heap
|
|
*/
|
|
static void
|
|
heap_extract(struct dn_heap *h)
|
|
{
|
|
int child, father, max = h->elements - 1 ;
|
|
if (max < 0)
|
|
return ;
|
|
|
|
/* move up smallest child */
|
|
father = 0 ;
|
|
child = HEAP_LEFT(father) ; /* left child */
|
|
while (child <= max) { /* valid entry */
|
|
if (child != max && DN_KEY_LT(h->p[child+1].key, h->p[child].key) )
|
|
child = child+1 ; /* take right child, otherwise left */
|
|
h->p[father] = h->p[child] ;
|
|
father = child ;
|
|
child = HEAP_LEFT(child) ; /* left child for next loop */
|
|
}
|
|
h->elements-- ;
|
|
if (father != max) {
|
|
/*
|
|
* Fill hole with last entry and bubble up, reusing the insert code
|
|
*/
|
|
h->p[father] = h->p[max] ;
|
|
heap_insert(h, father, NULL); /* this one cannot fail */
|
|
}
|
|
}
|
|
|
|
/*
|
|
* heapify() will reorganize data inside an array to maintain the
|
|
* heap property. It is needed when we delete a bunch of entries.
|
|
*/
|
|
static void
|
|
heapify(struct dn_heap *h)
|
|
{
|
|
int father, i ;
|
|
struct dn_heap_entry tmp ;
|
|
|
|
for (i = h->elements - 1 ; i > 0 ; i-- ) {
|
|
father = HEAP_FATHER(i) ;
|
|
if ( DN_KEY_LT(h->p[i].key, h->p[father].key) )
|
|
HEAP_SWAP(h->p[father], h->p[i], tmp) ;
|
|
}
|
|
}
|
|
/*
|
|
* --- end of heap management functions ---
|
|
*/
|
|
|
|
/*
|
|
* Scheduler functions -- transmit_event(), ready_event()
|
|
*
|
|
* transmit_event() is called when the delay-line needs to enter
|
|
* the scheduler, either because of existing pkts getting ready,
|
|
* or new packets entering the queue. The event handled is the delivery
|
|
* time of the packet.
|
|
*
|
|
* ready_event() does something similar with flow queues, and the
|
|
* event handled is the finish time of the head pkt.
|
|
*
|
|
* In both cases, we make sure that the data structures are consistent
|
|
* before passing pkts out, because this might trigger recursive
|
|
* invocations of the procedures.
|
|
*/
|
|
static void
|
|
transmit_event(struct dn_pipe *pipe)
|
|
{
|
|
struct dn_pkt *pkt ;
|
|
|
|
while ( (pkt = pipe->p.head) && DN_KEY_LEQ(pkt->output_time, curr_time) ) {
|
|
/*
|
|
* first unlink, then call procedures, since ip_input() can invoke
|
|
* ip_output() and viceversa, thus causing nested calls
|
|
*/
|
|
pipe->p.head = DN_NEXT(pkt) ;
|
|
|
|
/*
|
|
* The actual mbuf is preceded by a struct dn_pkt, resembling an mbuf
|
|
* (NOT A REAL one, just a small block of malloc'ed memory) with
|
|
* m_type = MT_DUMMYNET
|
|
* m_next = actual mbuf to be processed by ip_input/output
|
|
* m_data = the matching rule
|
|
* and some other fields.
|
|
* The block IS FREED HERE because it contains parameters passed
|
|
* to the called routine.
|
|
*/
|
|
switch (pkt->dn_dir) {
|
|
case DN_TO_IP_OUT:
|
|
(void)ip_output((struct mbuf *)pkt, NULL, NULL, 0, NULL);
|
|
rt_unref (pkt->ro.ro_rt) ;
|
|
break ;
|
|
|
|
case DN_TO_IP_IN :
|
|
ip_input((struct mbuf *)pkt) ;
|
|
break ;
|
|
|
|
#ifdef BRIDGE
|
|
case DN_TO_BDG_FWD : {
|
|
struct mbuf *m = (struct mbuf *)pkt ;
|
|
bdg_forward(&m, pkt->ifp);
|
|
if (m)
|
|
m_freem(m);
|
|
}
|
|
break ;
|
|
#endif
|
|
|
|
default:
|
|
printf("dummynet: bad switch %d!\n", pkt->dn_dir);
|
|
m_freem(pkt->dn_m);
|
|
break ;
|
|
}
|
|
FREE(pkt, M_IPFW);
|
|
}
|
|
/* if there are leftover packets, put into the heap for next event */
|
|
if ( (pkt = pipe->p.head) )
|
|
heap_insert(&extract_heap, pkt->output_time, pipe ) ;
|
|
/* XXX should check errors on heap_insert, by draining the
|
|
* whole pipe p and hoping in the future we are more successful
|
|
*/
|
|
}
|
|
|
|
/*
|
|
* ready_event() is invoked every time the queue must enter the
|
|
* scheduler, either because the first packet arrives, or because
|
|
* a previously scheduled event fired.
|
|
* On invokation, drain as many pkts as possible (could be 0) and then
|
|
* if there are leftover packets reinsert the pkt in the scheduler.
|
|
*/
|
|
static void
|
|
ready_event(struct dn_flow_queue *q)
|
|
{
|
|
struct dn_pkt *pkt;
|
|
struct dn_pipe *p = q->p ;
|
|
int p_was_empty = (p->p.head == NULL) ;
|
|
|
|
while ( (pkt = q->r.head) != NULL ) {
|
|
int len = pkt->dn_m->m_pkthdr.len;
|
|
int len_scaled = p->bandwidth ? len*8*hz : 0 ;
|
|
/*
|
|
* bandwidth==0 (no limit) means we can drain as many pkts as
|
|
* needed from the queue. Setting len_scaled = 0 does the job.
|
|
*/
|
|
if (len_scaled > q->numbytes )
|
|
break ;
|
|
/*
|
|
* extract pkt from queue, compute output time (could be now)
|
|
* and put into delay line (p_queue)
|
|
*/
|
|
q->numbytes -= len_scaled ;
|
|
q->r.head = DN_NEXT(pkt) ;
|
|
q->len-- ;
|
|
q->len_bytes -= len ;
|
|
|
|
pkt->output_time = curr_time + p->delay ;
|
|
|
|
if (p->p.head == NULL)
|
|
p->p.head = pkt;
|
|
else
|
|
DN_NEXT(p->p.tail) = pkt;
|
|
p->p.tail = pkt;
|
|
DN_NEXT(p->p.tail) = NULL;
|
|
}
|
|
/*
|
|
* If the delay line was empty call transmit_event(p) now.
|
|
* Otherwise, the scheduler will take care of it.
|
|
*/
|
|
if (p_was_empty)
|
|
transmit_event(p);
|
|
/*
|
|
* If we have more packets queued, schedule next ready event
|
|
* (can only occur when bandwidth != 0, otherwise we would have
|
|
* flushed the whole queue in the previous loop).
|
|
* To this purpose compute how many ticks to go for the next
|
|
* event, accounting for packet size and residual credit. This means
|
|
* we compute the finish time of the packet.
|
|
*/
|
|
if ( (pkt = q->r.head) != NULL ) { /* this implies bandwidth != 0 */
|
|
dn_key t ;
|
|
t = (pkt->dn_m->m_pkthdr.len*8*hz - q->numbytes + p->bandwidth - 1 ) /
|
|
p->bandwidth ;
|
|
q->numbytes += t * p->bandwidth ;
|
|
heap_insert(&ready_heap, curr_time + t, (void *)q );
|
|
/* XXX should check errors on heap_insert, and drain the whole
|
|
* queue on error hoping next time we are luckier.
|
|
*/
|
|
}
|
|
}
|
|
|
|
/*
|
|
* this is called once per tick, or HZ times per second. It is used to
|
|
* increment the current tick counter and schedule expired events.
|
|
*/
|
|
static void
|
|
dummynet(void * __unused unused)
|
|
{
|
|
void *p ; /* generic parameter to handler */
|
|
struct dn_heap *h ;
|
|
int s ;
|
|
|
|
s = splnet(); /* avoid network interrupts... */
|
|
curr_time++ ;
|
|
h = &ready_heap ;
|
|
while (h->elements > 0 && DN_KEY_LEQ(h->p[0].key, curr_time) ) {
|
|
/*
|
|
* XXX if the event is late, we should probably credit the queue
|
|
* by q->p->bandwidth * (delta_ticks). On the other hand, i dont
|
|
* think this can ever occur with this code (i.e. curr_time will
|
|
* still be incremented by one at each tick. Things might be
|
|
* different if we were using the counter from the high priority
|
|
* timer.
|
|
*/
|
|
if (h->p[0].key != curr_time)
|
|
printf("-- dummynet: warning, event is %d ticks late\n",
|
|
curr_time - h->p[0].key);
|
|
p = h->p[0].object ;
|
|
heap_extract(h); /* need to extract before processing */
|
|
ready_event(p) ;
|
|
}
|
|
h = &extract_heap ;
|
|
while (h->elements > 0 && DN_KEY_LEQ(h->p[0].key, curr_time) ) {
|
|
if (h->p[0].key != curr_time) /* XXX same as above */
|
|
printf("-- dummynet: warning, event is %d ticks late\n",
|
|
curr_time - h->p[0].key);
|
|
p = h->p[0].object ;
|
|
heap_extract(&extract_heap);
|
|
transmit_event(p);
|
|
}
|
|
splx(s);
|
|
timeout(dummynet, NULL, 1);
|
|
}
|
|
|
|
/*
|
|
* Given a pipe and a pkt in dn_last_pkt, find a matching queue
|
|
* after appropriate masking. The queue is moved to front
|
|
* so that further searches take less time.
|
|
* XXX if the queue is longer than some threshold should consider
|
|
* purging old unused entries. They will get in the way every time
|
|
* we have a new flow.
|
|
*/
|
|
static struct dn_flow_queue *
|
|
find_queue(struct dn_pipe *pipe)
|
|
{
|
|
int i = 0 ; /* we need i and q for new allocations */
|
|
struct dn_flow_queue *q, *prev;
|
|
|
|
if ( !(pipe->flags & DN_HAVE_FLOW_MASK) )
|
|
q = pipe->rq[0] ;
|
|
else {
|
|
/* first, do the masking */
|
|
dn_last_pkt.dst_ip &= pipe->flow_mask.dst_ip ;
|
|
dn_last_pkt.src_ip &= pipe->flow_mask.src_ip ;
|
|
dn_last_pkt.dst_port &= pipe->flow_mask.dst_port ;
|
|
dn_last_pkt.src_port &= pipe->flow_mask.src_port ;
|
|
dn_last_pkt.proto &= pipe->flow_mask.proto ;
|
|
/* then, hash function */
|
|
i = ( (dn_last_pkt.dst_ip) & 0xffff ) ^
|
|
( (dn_last_pkt.dst_ip >> 15) & 0xffff ) ^
|
|
( (dn_last_pkt.src_ip << 1) & 0xffff ) ^
|
|
( (dn_last_pkt.src_ip >> 16 ) & 0xffff ) ^
|
|
(dn_last_pkt.dst_port << 1) ^ (dn_last_pkt.src_port) ^
|
|
(dn_last_pkt.proto );
|
|
i = i % pipe->rq_size ;
|
|
/* finally, scan the current list for a match */
|
|
searches++ ;
|
|
for (prev=NULL, q = pipe->rq[i] ; q ; prev = q , q = q->next ) {
|
|
search_steps++;
|
|
if (bcmp(&dn_last_pkt, &(q->id), sizeof(q->id) ) == 0)
|
|
break ; /* found */
|
|
}
|
|
if (q && prev != NULL) { /* found and not in front */
|
|
prev->next = q->next ;
|
|
q->next = pipe->rq[i] ;
|
|
pipe->rq[i] = q ;
|
|
}
|
|
}
|
|
if (q == NULL) { /* no match, need to allocate a new entry */
|
|
q = malloc(sizeof(*q), M_IPFW, M_DONTWAIT) ;
|
|
if (q == NULL) {
|
|
printf("sorry, cannot allocate new flow\n");
|
|
return NULL ;
|
|
}
|
|
bzero(q, sizeof(*q) ); /* needed */
|
|
q->id = dn_last_pkt ;
|
|
q->p = pipe ;
|
|
q->hash_slot = i ;
|
|
q->next = pipe->rq[i] ;
|
|
pipe->rq[i] = q ;
|
|
pipe->rq_elements++ ;
|
|
DEB(printf("++ new queue (%d) for 0x%08x/0x%04x -> 0x%08x/0x%04x\n",
|
|
pipe->rq_elements,
|
|
dn_last_pkt.src_ip, dn_last_pkt.src_port,
|
|
dn_last_pkt.dst_ip, dn_last_pkt.dst_port); )
|
|
}
|
|
return q ;
|
|
}
|
|
|
|
/*
|
|
* dummynet hook for packets.
|
|
*/
|
|
int
|
|
dummynet_io(int pipe_nr, int dir,
|
|
struct mbuf *m, struct ifnet *ifp, struct route *ro,
|
|
struct sockaddr_in *dst,
|
|
struct ip_fw_chain *rule, int flags)
|
|
{
|
|
struct dn_pkt *pkt;
|
|
struct dn_pipe *p;
|
|
int len = m->m_pkthdr.len ;
|
|
struct dn_flow_queue *q = NULL ;
|
|
int s ;
|
|
|
|
s = splimp();
|
|
/* XXX check the spl protection. It might be unnecessary since we
|
|
* run this at splnet() already.
|
|
*/
|
|
|
|
DEB(printf("-- last_pkt dst 0x%08x/0x%04x src 0x%08x/0x%04x\n",
|
|
dn_last_pkt.dst_ip, dn_last_pkt.dst_port,
|
|
dn_last_pkt.src_ip, dn_last_pkt.src_port);)
|
|
|
|
pipe_nr &= 0xffff ;
|
|
/*
|
|
* locate pipe. First time is expensive, next have direct access.
|
|
*/
|
|
if ( (p = rule->rule->pipe_ptr) == NULL ) {
|
|
for (p = all_pipes; p && p->pipe_nr != pipe_nr; p = p->next)
|
|
;
|
|
if (p == NULL)
|
|
goto dropit ; /* this pipe does not exist! */
|
|
rule->rule->pipe_ptr = p ; /* record pipe ptr for the future */
|
|
}
|
|
q = find_queue(p);
|
|
/*
|
|
* update statistics, then do various check on reasons to drop pkt
|
|
*/
|
|
if ( q == NULL )
|
|
goto dropit ; /* cannot allocate queue */
|
|
q->tot_bytes += len ;
|
|
q->tot_pkts++ ;
|
|
if ( p->plr && random() < p->plr )
|
|
goto dropit ; /* random pkt drop */
|
|
if ( p->queue_size && q->len >= p->queue_size)
|
|
goto dropit ; /* queue count overflow */
|
|
if ( p->queue_size_bytes && len + q->len_bytes > p->queue_size_bytes)
|
|
goto dropit ; /* queue size overflow */
|
|
/*
|
|
* can implement RED drops here if needed.
|
|
*/
|
|
|
|
pkt = (struct dn_pkt *)malloc(sizeof (*pkt), M_IPFW, M_NOWAIT) ;
|
|
if ( pkt == NULL )
|
|
goto dropit ; /* cannot allocate packet header */
|
|
/* ok, i can handle the pkt now... */
|
|
bzero(pkt, sizeof(*pkt) ); /* XXX expensive, see if we can remove it*/
|
|
/* build and enqueue packet + parameters */
|
|
pkt->hdr.mh_type = MT_DUMMYNET ;
|
|
(struct ip_fw_chain *)pkt->hdr.mh_data = rule ;
|
|
DN_NEXT(pkt) = NULL;
|
|
pkt->dn_m = m;
|
|
pkt->dn_dir = dir ;
|
|
|
|
pkt->ifp = ifp;
|
|
if (dir == DN_TO_IP_OUT) {
|
|
/*
|
|
* We need to copy *ro because for ICMP pkts (and maybe others)
|
|
* the caller passed a pointer into the stack; and, dst might
|
|
* also be a pointer into *ro so it needs to be updated.
|
|
*/
|
|
pkt->ro = *ro;
|
|
if (ro->ro_rt)
|
|
ro->ro_rt->rt_refcnt++ ; /* XXX */
|
|
if (dst == (struct sockaddr_in *)&ro->ro_dst) /* dst points into ro */
|
|
dst = (struct sockaddr_in *)&(pkt->ro.ro_dst) ;
|
|
|
|
pkt->dn_dst = dst;
|
|
pkt->flags = flags ;
|
|
}
|
|
if (q->r.head == NULL)
|
|
q->r.head = pkt;
|
|
else
|
|
DN_NEXT(q->r.tail) = pkt;
|
|
q->r.tail = pkt;
|
|
q->len++;
|
|
q->len_bytes += len ;
|
|
|
|
/*
|
|
* If queue was empty (this is first pkt) then call ready_event()
|
|
* now to make the pkt go out at the right time. Otherwise we are done,
|
|
* as there must be a ready event already scheduled.
|
|
*/
|
|
if (q->r.head == pkt) /* r_queue was empty */
|
|
ready_event( q );
|
|
splx(s);
|
|
return 0;
|
|
|
|
dropit:
|
|
splx(s);
|
|
if (q)
|
|
q->drops++ ;
|
|
m_freem(m);
|
|
return 0 ; /* XXX should I return an error ? */
|
|
}
|
|
|
|
/*
|
|
* below, the rt_unref is only needed when (pkt->dn_dir == DN_TO_IP_OUT)
|
|
* Doing this would probably save us the initial bzero of dn_pkt
|
|
*/
|
|
#define DN_FREE_PKT(pkt) { \
|
|
struct dn_pkt *n = pkt ; \
|
|
rt_unref ( n->ro.ro_rt ) ; \
|
|
m_freem(n->dn_m); \
|
|
pkt = DN_NEXT(n) ; \
|
|
free(n, M_IPFW) ; }
|
|
/*
|
|
* dispose all packets queued on a pipe
|
|
*/
|
|
static void
|
|
purge_pipe(struct dn_pipe *pipe)
|
|
{
|
|
struct dn_pkt *pkt ;
|
|
struct dn_flow_queue *q, *qn ;
|
|
int i ;
|
|
|
|
for (i = 0 ; i < pipe->rq_size ; i++ )
|
|
for (q = pipe->rq[i] ; q ; q = qn ) {
|
|
for (pkt = q->r.head ; pkt ; )
|
|
DN_FREE_PKT(pkt) ;
|
|
qn = q->next ;
|
|
free(q, M_IPFW);
|
|
}
|
|
for (pkt = pipe->p.head ; pkt ; )
|
|
DN_FREE_PKT(pkt) ;
|
|
}
|
|
|
|
/*
|
|
* Delete all pipes and heaps returning memory. Must also
|
|
* remove references from all ipfw rules to all pipes.
|
|
*/
|
|
static void
|
|
dummynet_flush()
|
|
{
|
|
struct dn_pipe *curr_p, *p ;
|
|
struct ip_fw_chain *chain ;
|
|
int s ;
|
|
|
|
s = splnet() ;
|
|
|
|
/* remove all references to pipes ...*/
|
|
for (chain= ip_fw_chain.lh_first ; chain; chain = chain->chain.le_next)
|
|
chain->rule->pipe_ptr = NULL ;
|
|
/* prevent future matches... */
|
|
p = all_pipes ;
|
|
all_pipes = NULL ;
|
|
/* and free heaps so we don't have unwanted events */
|
|
if (ready_heap.size >0 )
|
|
free(ready_heap.p, M_IPFW);
|
|
ready_heap.elements = ready_heap.size = 0 ;
|
|
if (extract_heap.size >0 )
|
|
free(extract_heap.p, M_IPFW);
|
|
extract_heap.elements = extract_heap.size = 0 ;
|
|
splx(s) ;
|
|
/*
|
|
* Now purge all queued pkts and delete all pipes
|
|
*/
|
|
for ( ; p ; ) {
|
|
purge_pipe(p);
|
|
curr_p = p ;
|
|
p = p->next ;
|
|
free(curr_p->rq, M_IPFW);
|
|
free(curr_p, M_IPFW);
|
|
}
|
|
}
|
|
|
|
extern struct ip_fw_chain *ip_fw_default_rule ;
|
|
/*
|
|
* when a firewall rule is deleted, scan all queues and remove the flow-id
|
|
* from packets matching this rule.
|
|
*/
|
|
void
|
|
dn_rule_delete(void *r)
|
|
{
|
|
struct dn_pipe *p ;
|
|
struct dn_flow_queue *q ;
|
|
struct dn_pkt *pkt ;
|
|
int i ;
|
|
|
|
for ( p = all_pipes ; p ; p = p->next ) {
|
|
for (i = 0 ; i < p->rq_size ; i++)
|
|
for (q = p->rq[i] ; q ; q = q->next )
|
|
for (pkt = q->r.head ; pkt ; pkt = DN_NEXT(pkt) )
|
|
if (pkt->hdr.mh_data == r)
|
|
pkt->hdr.mh_data = (void *)ip_fw_default_rule ;
|
|
for (pkt = p->p.head ; pkt ; pkt = DN_NEXT(pkt) )
|
|
if (pkt->hdr.mh_data == r)
|
|
pkt->hdr.mh_data = (void *)ip_fw_default_rule ;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* handler for the various dummynet socket options
|
|
* (get, flush, config, del)
|
|
*/
|
|
static int
|
|
ip_dn_ctl(struct sockopt *sopt)
|
|
{
|
|
int error = 0 ;
|
|
size_t size ;
|
|
char *buf, *bp ; /* bp is the "copy-pointer" */
|
|
struct dn_pipe *p, tmp_pipe ;
|
|
|
|
struct dn_pipe *x, *a, *b ;
|
|
|
|
/* Disallow sets in really-really secure mode. */
|
|
if (sopt->sopt_dir == SOPT_SET && securelevel >= 3)
|
|
return (EPERM);
|
|
|
|
switch (sopt->sopt_name) {
|
|
default :
|
|
panic("ip_dn_ctl -- unknown option");
|
|
|
|
case IP_DUMMYNET_GET :
|
|
for (p = all_pipes, size = 0 ; p ; p = p->next )
|
|
size += sizeof( *p ) +
|
|
p->rq_elements * sizeof(struct dn_flow_queue);
|
|
buf = malloc(size, M_TEMP, M_WAITOK);
|
|
if (buf == 0) {
|
|
error = ENOBUFS ;
|
|
break ;
|
|
}
|
|
for (p = all_pipes, bp = buf ; p ; p = p->next ) {
|
|
int i ;
|
|
struct dn_pipe *pipe_bp = (struct dn_pipe *)bp ;
|
|
struct dn_flow_queue *q;
|
|
|
|
/*
|
|
* copy the pipe descriptor into *bp, convert delay back to ms,
|
|
* then copy the queue descriptor(s) one at a time.
|
|
*/
|
|
bcopy(p, bp, sizeof( *p ) );
|
|
pipe_bp->delay = (pipe_bp->delay * 1000) / hz ;
|
|
bp += sizeof( *p ) ;
|
|
for (i = 0 ; i < p->rq_size ; i++)
|
|
for (q = p->rq[i] ; q ; q = q->next, bp += sizeof(*q) )
|
|
bcopy(q, bp, sizeof( *q ) );
|
|
}
|
|
error = sooptcopyout(sopt, buf, size);
|
|
FREE(buf, M_TEMP);
|
|
break ;
|
|
|
|
case IP_DUMMYNET_FLUSH :
|
|
dummynet_flush() ;
|
|
break ;
|
|
|
|
case IP_DUMMYNET_CONFIGURE :
|
|
p = &tmp_pipe ;
|
|
error = sooptcopyin(sopt, p, sizeof *p, sizeof *p);
|
|
if (error)
|
|
break ;
|
|
/*
|
|
* The config program passes parameters as follows:
|
|
* bandwidth = bits/second (0 means no limits);
|
|
* delay = millisec., must be translated into ticks.
|
|
* queue_size = slots (0 means no limit)
|
|
* queue_size_bytes = bytes (0 means no limit)
|
|
* only one can be set, must be bound-checked
|
|
*/
|
|
p->delay = ( p->delay * hz ) / 1000 ;
|
|
if (p->queue_size == 0 && p->queue_size_bytes == 0)
|
|
p->queue_size = 50 ;
|
|
if (p->queue_size != 0 ) /* buffers are prevailing */
|
|
p->queue_size_bytes = 0 ;
|
|
if (p->queue_size > 100)
|
|
p->queue_size = 50 ;
|
|
if (p->queue_size_bytes > 1024*1024)
|
|
p->queue_size_bytes = 1024*1024 ;
|
|
for (a = NULL , b = all_pipes ; b && b->pipe_nr < p->pipe_nr ;
|
|
a = b , b = b->next) ;
|
|
if (b && b->pipe_nr == p->pipe_nr) {
|
|
b->bandwidth = p->bandwidth ;
|
|
b->delay = p->delay ;
|
|
b->queue_size = p->queue_size ;
|
|
b->queue_size_bytes = p->queue_size_bytes ;
|
|
b->plr = p->plr ;
|
|
b->flow_mask = p->flow_mask ;
|
|
b->flags = p->flags ;
|
|
} else { /* completely new pipe */
|
|
int s ;
|
|
x = malloc(sizeof(struct dn_pipe), M_IPFW, M_DONTWAIT) ;
|
|
if (x == NULL) {
|
|
printf("ip_dummynet.c: no memory for new pipe\n");
|
|
error = ENOSPC ;
|
|
break ;
|
|
}
|
|
bzero(x, sizeof(*x) );
|
|
x->bandwidth = p->bandwidth ;
|
|
x->delay = p->delay ;
|
|
x->pipe_nr = p->pipe_nr ;
|
|
x->queue_size = p->queue_size ;
|
|
x->queue_size_bytes = p->queue_size_bytes ;
|
|
x->plr = p->plr ;
|
|
x->flow_mask = p->flow_mask ;
|
|
x->flags = p->flags ;
|
|
if (x->flags & DN_HAVE_FLOW_MASK) {/* allocate some slots */
|
|
int l = p->rq_size ;
|
|
if (l == 0)
|
|
l = dn_hash_size ;
|
|
if (l < 4)
|
|
l = 4 ;
|
|
else if (l > 1024)
|
|
l = 1024 ;
|
|
x->rq_size = l ;
|
|
} else /* one is enough for null mask */
|
|
x->rq_size = 1 ;
|
|
x->rq = malloc(x->rq_size * sizeof(struct dn_flow_queue *),
|
|
M_IPFW, M_DONTWAIT) ;
|
|
if (x->rq == NULL ) {
|
|
printf("sorry, cannot allocate queue\n");
|
|
free(x, M_IPFW);
|
|
error = ENOSPC ;
|
|
break ;
|
|
}
|
|
bzero(x->rq, x->rq_size * sizeof(struct dn_flow_queue *) );
|
|
x->rq_elements = 0 ;
|
|
|
|
s = splnet() ;
|
|
x->next = b ;
|
|
if (a == NULL)
|
|
all_pipes = x ;
|
|
else
|
|
a->next = x ;
|
|
splx(s);
|
|
}
|
|
break ;
|
|
|
|
case IP_DUMMYNET_DEL :
|
|
p = &tmp_pipe ;
|
|
error = sooptcopyin(sopt, p, sizeof *p, sizeof *p);
|
|
if (error)
|
|
break ;
|
|
|
|
for (a = NULL , b = all_pipes ; b && b->pipe_nr < p->pipe_nr ;
|
|
a = b , b = b->next) ;
|
|
if (b && b->pipe_nr == p->pipe_nr) { /* found pipe */
|
|
int s ;
|
|
struct ip_fw_chain *chain ;
|
|
|
|
s = splnet() ;
|
|
chain = ip_fw_chain.lh_first;
|
|
|
|
if (a == NULL)
|
|
all_pipes = b->next ;
|
|
else
|
|
a->next = b->next ;
|
|
/*
|
|
* remove references to this pipe from the ip_fw rules.
|
|
*/
|
|
for (; chain; chain = chain->chain.le_next)
|
|
if (chain->rule->pipe_ptr == b)
|
|
chain->rule->pipe_ptr = NULL ;
|
|
/* remove all references to b from heaps */
|
|
if (ready_heap.elements > 0) {
|
|
struct dn_heap *h = &ready_heap ;
|
|
int i = 0, found = 0 ;
|
|
while ( i < h->elements ) {
|
|
if (((struct dn_flow_queue *)(h->p[i].object))->p == b) {
|
|
/* found one */
|
|
h->elements-- ;
|
|
h->p[i] = h->p[h->elements] ;
|
|
found++ ;
|
|
} else
|
|
i++ ;
|
|
}
|
|
if (found)
|
|
heapify(h);
|
|
}
|
|
if (extract_heap.elements > 0) {
|
|
struct dn_heap *h = &extract_heap ;
|
|
int i = 0, found = 0 ;
|
|
while ( i < h->elements ) {
|
|
if (h->p[i].object == b) { /* found one */
|
|
h->elements-- ;
|
|
h->p[i] = h->p[h->elements] ;
|
|
found++ ;
|
|
} else
|
|
i++ ;
|
|
}
|
|
if (found)
|
|
heapify(h);
|
|
}
|
|
splx(s);
|
|
purge_pipe(b); /* remove pkts from here */
|
|
free(b->rq, M_IPFW);
|
|
free(b, M_IPFW);
|
|
}
|
|
break ;
|
|
}
|
|
return error ;
|
|
}
|
|
|
|
static void
|
|
ip_dn_init(void)
|
|
{
|
|
printf("DUMMYNET initialized (000106)\n");
|
|
all_pipes = NULL ;
|
|
ready_heap.size = ready_heap.elements = 0 ;
|
|
extract_heap.size = extract_heap.elements = 0 ;
|
|
ip_dn_ctl_ptr = ip_dn_ctl;
|
|
timeout(dummynet, NULL, 1);
|
|
}
|
|
|
|
static ip_dn_ctl_t *old_dn_ctl_ptr ;
|
|
|
|
static int
|
|
dummynet_modevent(module_t mod, int type, void *data)
|
|
{
|
|
int s ;
|
|
switch (type) {
|
|
case MOD_LOAD:
|
|
s = splnet();
|
|
old_dn_ctl_ptr = ip_dn_ctl_ptr;
|
|
ip_dn_init();
|
|
splx(s);
|
|
break;
|
|
case MOD_UNLOAD:
|
|
s = splnet();
|
|
ip_dn_ctl_ptr = old_dn_ctl_ptr;
|
|
splx(s);
|
|
dummynet_flush();
|
|
break ;
|
|
default:
|
|
break ;
|
|
}
|
|
return 0 ;
|
|
}
|
|
|
|
static moduledata_t dummynet_mod = {
|
|
"dummynet",
|
|
dummynet_modevent,
|
|
NULL
|
|
} ;
|
|
DECLARE_MODULE(dummynet, dummynet_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
|