/* * Copyright (c) 1998-2000 Luigi Rizzo, Universita` di Pisa * Portions Copyright (c) 2000 Akamba Corp. * All rights reserved * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #define DEB(x) #define DDB(x) x /* * This module implements IP dummynet, a bandwidth limiter/delay emulator * used in conjunction with the ipfw package. * * Most important Changes: * * 000106: large rewrite, use heaps to handle very many pipes. * 980513: initial release * * include files marked with XXX are probably not needed */ #include #include #include #include #include /* XXX */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "opt_bdg.h" #ifdef BRIDGE #include /* for struct arpcom */ #include #endif /* * we keep a private variable for the simulation time, but probably * it would be better to use the already existing one "softticks" * (in sys/kern/kern_timer.c) */ static dn_key curr_time = 0 ; /* current simulation time */ static int dn_hash_size = 64 ; /* default hash size */ /* statistics on number of queue searches and search steps */ static int searches, search_steps ; static int pipe_expire = 1 ; /* expire queue if empty */ static struct dn_heap ready_heap, extract_heap ; static int heap_init(struct dn_heap *h, int size) ; static int heap_insert (struct dn_heap *h, dn_key key1, void *p); static void heap_extract(struct dn_heap *h); static void transmit_event(struct dn_pipe *pipe); static void ready_event(struct dn_flow_queue *q); static struct dn_pipe *all_pipes = NULL ; /* list of all pipes */ #ifdef SYSCTL_NODE SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet"); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, hash_size, CTLFLAG_RW, &dn_hash_size, 0, "Default hash table size"); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, curr_time, CTLFLAG_RD, &curr_time, 0, "Current tick"); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, ready_heap, CTLFLAG_RD, &ready_heap.size, 0, "Size of ready heap"); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, extract_heap, CTLFLAG_RD, &extract_heap.size, 0, "Size of extract heap"); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, searches, CTLFLAG_RD, &searches, 0, "Number of queue searches"); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, search_steps, CTLFLAG_RD, &search_steps, 0, "Number of queue search steps"); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire, CTLFLAG_RW, &pipe_expire, 0, "Expire queue if empty"); #endif static int ip_dn_ctl(struct sockopt *sopt); static void rt_unref(struct rtentry *); static void dummynet(void *); static void dummynet_flush(void); /* * ip_fw_chain is used when deleting a pipe, because ipfw rules can * hold references to the pipe. */ extern LIST_HEAD (ip_fw_head, ip_fw_chain) ip_fw_chain; static void rt_unref(struct rtentry *rt) { if (rt == NULL) return ; if (rt->rt_refcnt <= 0) printf("-- warning, refcnt now %ld, decreasing\n", rt->rt_refcnt); RTFREE(rt); } /* * Heap management functions. * * In the heap, first node is element 0. Children of i are 2i+1 and 2i+2. * Some macros help finding parent/children so we can optimize them. * * heap_init() is called to expand the heap when needed. * Increment size in blocks of 256 entries (which make one 4KB page) * XXX failure to allocate a new element is a pretty bad failure * as we basically stall a whole queue forever!! * Returns 1 on error, 0 on success */ #define HEAP_FATHER(x) ( ( (x) - 1 ) / 2 ) #define HEAP_LEFT(x) ( 2*(x) + 1 ) #define HEAP_IS_LEFT(x) ( (x) & 1 ) #define HEAP_RIGHT(x) ( 2*(x) + 1 ) #define HEAP_SWAP(a, b, buffer) { buffer = a ; a = b ; b = buffer ; } #define HEAP_INCREMENT 255 static int heap_init(struct dn_heap *h, int new_size) { struct dn_heap_entry *p; if (h->size >= new_size ) { printf("heap_init, Bogus call, have %d want %d\n", h->size, new_size); return 0 ; } new_size = (new_size + HEAP_INCREMENT ) & ~HEAP_INCREMENT ; p = malloc(new_size * sizeof(*p), M_IPFW, M_DONTWAIT ); if (p == NULL) { printf(" heap_init, resize %d failed\n", new_size ); return 1 ; /* error */ } if (h->size > 0) { bcopy(h->p, p, h->size * sizeof(*p) ); free(h->p, M_IPFW); } h->p = p ; h->size = new_size ; return 0 ; } /* * Insert element in heap. Normally, p != NULL, we insert p in * a new position and bubble up. If p == NULL, then the element is * already in place, and key is the position where to start the * bubble-up. * Returns 1 on failure (cannot allocate new heap entry) */ static int heap_insert(struct dn_heap *h, dn_key key1, void *p) { int son = h->elements ; if (p == NULL) /* data already there, set starting point */ son = key1 ; else { /* insert new element at the end, possibly resize */ son = h->elements ; if (son == h->size) /* need resize... */ if (heap_init(h, h->elements+1) ) return 1 ; /* failure... */ h->p[son].object = p ; h->p[son].key = key1 ; h->elements++ ; } while (son > 0) { /* bubble up */ int father = HEAP_FATHER(son) ; struct dn_heap_entry tmp ; if (DN_KEY_LT( h->p[father].key, h->p[son].key ) ) break ; /* found right position */ /* son smaller than father, swap and try again */ HEAP_SWAP(h->p[son], h->p[father], tmp) ; son = father ; } return 0 ; } /* * remove top element from heap */ static void heap_extract(struct dn_heap *h) { int child, father, max = h->elements - 1 ; if (max < 0) return ; /* move up smallest child */ father = 0 ; child = HEAP_LEFT(father) ; /* left child */ while (child <= max) { /* valid entry */ if (child != max && DN_KEY_LT(h->p[child+1].key, h->p[child].key) ) child = child+1 ; /* take right child, otherwise left */ h->p[father] = h->p[child] ; father = child ; child = HEAP_LEFT(child) ; /* left child for next loop */ } h->elements-- ; if (father != max) { /* * Fill hole with last entry and bubble up, reusing the insert code */ h->p[father] = h->p[max] ; heap_insert(h, father, NULL); /* this one cannot fail */ } } /* * heapify() will reorganize data inside an array to maintain the * heap property. It is needed when we delete a bunch of entries. */ static void heapify(struct dn_heap *h) { int father, i ; struct dn_heap_entry tmp ; for (i = h->elements - 1 ; i > 0 ; i-- ) { father = HEAP_FATHER(i) ; if ( DN_KEY_LT(h->p[i].key, h->p[father].key) ) HEAP_SWAP(h->p[father], h->p[i], tmp) ; } } /* * --- end of heap management functions --- */ /* * Scheduler functions -- transmit_event(), ready_event() * * transmit_event() is called when the delay-line needs to enter * the scheduler, either because of existing pkts getting ready, * or new packets entering the queue. The event handled is the delivery * time of the packet. * * ready_event() does something similar with flow queues, and the * event handled is the finish time of the head pkt. * * In both cases, we make sure that the data structures are consistent * before passing pkts out, because this might trigger recursive * invocations of the procedures. */ static void transmit_event(struct dn_pipe *pipe) { struct dn_pkt *pkt ; while ( (pkt = pipe->p.head) && DN_KEY_LEQ(pkt->output_time, curr_time) ) { /* * first unlink, then call procedures, since ip_input() can invoke * ip_output() and viceversa, thus causing nested calls */ pipe->p.head = DN_NEXT(pkt) ; /* * The actual mbuf is preceded by a struct dn_pkt, resembling an mbuf * (NOT A REAL one, just a small block of malloc'ed memory) with * m_type = MT_DUMMYNET * m_next = actual mbuf to be processed by ip_input/output * m_data = the matching rule * and some other fields. * The block IS FREED HERE because it contains parameters passed * to the called routine. */ switch (pkt->dn_dir) { case DN_TO_IP_OUT: (void)ip_output((struct mbuf *)pkt, NULL, NULL, 0, NULL); rt_unref (pkt->ro.ro_rt) ; break ; case DN_TO_IP_IN : ip_input((struct mbuf *)pkt) ; break ; #ifdef BRIDGE case DN_TO_BDG_FWD : { struct mbuf *m = (struct mbuf *)pkt ; struct ether_header hdr; if (m->m_len < ETHER_HDR_LEN && (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) { m_freem(m); break; } bcopy(mtod(m, struct ether_header *), &hdr, ETHER_HDR_LEN); m_adj(m, ETHER_HDR_LEN); bdg_forward(&m, &hdr, pkt->ifp); if (m) m_freem(m); } break ; #endif default: printf("dummynet: bad switch %d!\n", pkt->dn_dir); m_freem(pkt->dn_m); break ; } FREE(pkt, M_IPFW); } /* if there are leftover packets, put into the heap for next event */ if ( (pkt = pipe->p.head) ) heap_insert(&extract_heap, pkt->output_time, pipe ) ; /* XXX should check errors on heap_insert, by draining the * whole pipe p and hoping in the future we are more successful */ } /* * ready_event() is invoked every time the queue must enter the * scheduler, either because the first packet arrives, or because * a previously scheduled event fired. * On invokation, drain as many pkts as possible (could be 0) and then * if there are leftover packets reinsert the pkt in the scheduler. */ static void ready_event(struct dn_flow_queue *q) { struct dn_pkt *pkt; struct dn_pipe *p = q->p ; int p_was_empty = (p->p.head == NULL) ; while ( (pkt = q->r.head) != NULL ) { int len = pkt->dn_m->m_pkthdr.len; int len_scaled = p->bandwidth ? len*8*hz : 0 ; /* * bandwidth==0 (no limit) means we can drain as many pkts as * needed from the queue. Setting len_scaled = 0 does the job. */ if (len_scaled > q->numbytes ) break ; /* * extract pkt from queue, compute output time (could be now) * and put into delay line (p_queue) */ q->numbytes -= len_scaled ; q->r.head = DN_NEXT(pkt) ; q->len-- ; q->len_bytes -= len ; pkt->output_time = curr_time + p->delay ; if (p->p.head == NULL) p->p.head = pkt; else DN_NEXT(p->p.tail) = pkt; p->p.tail = pkt; DN_NEXT(p->p.tail) = NULL; } /* * If we have more packets queued, schedule next ready event * (can only occur when bandwidth != 0, otherwise we would have * flushed the whole queue in the previous loop). * To this purpose compute how many ticks to go for the next * event, accounting for packet size and residual credit. This means * we compute the finish time of the packet. */ if ( (pkt = q->r.head) != NULL ) { /* this implies bandwidth != 0 */ dn_key t ; t = (pkt->dn_m->m_pkthdr.len*8*hz - q->numbytes + p->bandwidth - 1 ) / p->bandwidth ; q->numbytes += t * p->bandwidth ; heap_insert(&ready_heap, curr_time + t, (void *)q ); /* XXX should check errors on heap_insert, and drain the whole * queue on error hoping next time we are luckier. */ } /* * If the delay line was empty call transmit_event(p) now. * Otherwise, the scheduler will take care of it. */ if (p_was_empty) transmit_event(p); } /* * this is called once per tick, or HZ times per second. It is used to * increment the current tick counter and schedule expired events. */ static void dummynet(void * __unused unused) { void *p ; /* generic parameter to handler */ struct dn_heap *h ; int s ; s = splnet(); /* avoid network interrupts... */ curr_time++ ; h = &ready_heap ; while (h->elements > 0 && DN_KEY_LEQ(h->p[0].key, curr_time) ) { /* * XXX if the event is late, we should probably credit the queue * by q->p->bandwidth * (delta_ticks). On the other hand, i dont * think this can ever occur with this code (i.e. curr_time will * still be incremented by one at each tick. Things might be * different if we were using the counter from the high priority * timer. */ if (h->p[0].key != curr_time) printf("-- dummynet: warning, event is %d ticks late\n", curr_time - h->p[0].key); p = h->p[0].object ; heap_extract(h); /* need to extract before processing */ ready_event(p) ; } h = &extract_heap ; while (h->elements > 0 && DN_KEY_LEQ(h->p[0].key, curr_time) ) { if (h->p[0].key != curr_time) /* XXX same as above */ printf("-- dummynet: warning, event is %d ticks late\n", curr_time - h->p[0].key); p = h->p[0].object ; heap_extract(&extract_heap); transmit_event(p); } splx(s); timeout(dummynet, NULL, 1); } /* * Given a pipe and a pkt in last_pkt, find a matching queue * after appropriate masking. The queue is moved to front * so that further searches take less time. * XXX if the queue is longer than some threshold should consider * purging old unused entries. They will get in the way every time * we have a new flow. */ static struct dn_flow_queue * find_queue(struct dn_pipe *pipe) { int i = 0 ; /* we need i and q for new allocations */ struct dn_flow_queue *q, *prev; if ( !(pipe->flags & DN_HAVE_FLOW_MASK) ) q = pipe->rq[0] ; else { /* first, do the masking */ last_pkt.dst_ip &= pipe->flow_mask.dst_ip ; last_pkt.src_ip &= pipe->flow_mask.src_ip ; last_pkt.dst_port &= pipe->flow_mask.dst_port ; last_pkt.src_port &= pipe->flow_mask.src_port ; last_pkt.proto &= pipe->flow_mask.proto ; last_pkt.flags = 0 ; /* we don't care about this one */ /* then, hash function */ i = ( (last_pkt.dst_ip) & 0xffff ) ^ ( (last_pkt.dst_ip >> 15) & 0xffff ) ^ ( (last_pkt.src_ip << 1) & 0xffff ) ^ ( (last_pkt.src_ip >> 16 ) & 0xffff ) ^ (last_pkt.dst_port << 1) ^ (last_pkt.src_port) ^ (last_pkt.proto ); i = i % pipe->rq_size ; /* finally, scan the current list for a match */ searches++ ; for (prev=NULL, q = pipe->rq[i] ; q ; ) { search_steps++; if (bcmp(&last_pkt, &(q->id), sizeof(q->id) ) == 0) break ; /* found */ else if (pipe_expire && q->r.head == NULL) { /* entry is idle, expire it */ struct dn_flow_queue *old_q = q ; if (prev != NULL) prev->next = q = q->next ; else pipe->rq[i] = q = q->next ; pipe->rq_elements-- ; free(old_q, M_IPFW); continue ; } prev = q ; q = q->next ; } if (q && prev != NULL) { /* found and not in front */ prev->next = q->next ; q->next = pipe->rq[i] ; pipe->rq[i] = q ; } } if (q == NULL) { /* no match, need to allocate a new entry */ q = malloc(sizeof(*q), M_IPFW, M_DONTWAIT) ; if (q == NULL) { printf("sorry, cannot allocate new flow\n"); return NULL ; } bzero(q, sizeof(*q) ); /* needed */ q->id = last_pkt ; q->p = pipe ; q->hash_slot = i ; q->next = pipe->rq[i] ; pipe->rq[i] = q ; pipe->rq_elements++ ; DEB(printf("++ new queue (%d) for 0x%08x/0x%04x -> 0x%08x/0x%04x\n", pipe->rq_elements, last_pkt.src_ip, last_pkt.src_port, last_pkt.dst_ip, last_pkt.dst_port); ) } return q ; } /* * dummynet hook for packets. */ int dummynet_io(int pipe_nr, int dir, struct mbuf *m, struct ifnet *ifp, struct route *ro, struct sockaddr_in *dst, struct ip_fw_chain *rule, int flags) { struct dn_pkt *pkt; struct dn_pipe *p; int len = m->m_pkthdr.len ; struct dn_flow_queue *q = NULL ; int s ; s = splimp(); /* XXX check the spl protection. It might be unnecessary since we * run this at splnet() already. */ DEB(printf("-- last_pkt dst 0x%08x/0x%04x src 0x%08x/0x%04x\n", last_pkt.dst_ip, last_pkt.dst_port, last_pkt.src_ip, last_pkt.src_port);) pipe_nr &= 0xffff ; /* * locate pipe. First time is expensive, next have direct access. */ if ( (p = rule->rule->pipe_ptr) == NULL ) { for (p = all_pipes; p && p->pipe_nr != pipe_nr; p = p->next) ; if (p == NULL) goto dropit ; /* this pipe does not exist! */ rule->rule->pipe_ptr = p ; /* record pipe ptr for the future */ } q = find_queue(p); /* * update statistics, then do various check on reasons to drop pkt */ if ( q == NULL ) goto dropit ; /* cannot allocate queue */ q->tot_bytes += len ; q->tot_pkts++ ; if ( p->plr && random() < p->plr ) goto dropit ; /* random pkt drop */ if ( p->queue_size && q->len >= p->queue_size) goto dropit ; /* queue count overflow */ if ( p->queue_size_bytes && len + q->len_bytes > p->queue_size_bytes) goto dropit ; /* queue size overflow */ /* * can implement RED drops here if needed. */ pkt = (struct dn_pkt *)malloc(sizeof (*pkt), M_IPFW, M_NOWAIT) ; if ( pkt == NULL ) goto dropit ; /* cannot allocate packet header */ /* ok, i can handle the pkt now... */ bzero(pkt, sizeof(*pkt) ); /* XXX expensive, see if we can remove it*/ /* build and enqueue packet + parameters */ pkt->hdr.mh_type = MT_DUMMYNET ; (struct ip_fw_chain *)pkt->hdr.mh_data = rule ; DN_NEXT(pkt) = NULL; pkt->dn_m = m; pkt->dn_dir = dir ; pkt->ifp = ifp; if (dir == DN_TO_IP_OUT) { /* * We need to copy *ro because for ICMP pkts (and maybe others) * the caller passed a pointer into the stack; and, dst might * also be a pointer into *ro so it needs to be updated. */ pkt->ro = *ro; if (ro->ro_rt) ro->ro_rt->rt_refcnt++ ; /* XXX */ if (dst == (struct sockaddr_in *)&ro->ro_dst) /* dst points into ro */ dst = (struct sockaddr_in *)&(pkt->ro.ro_dst) ; pkt->dn_dst = dst; pkt->flags = flags ; } if (q->r.head == NULL) q->r.head = pkt; else DN_NEXT(q->r.tail) = pkt; q->r.tail = pkt; q->len++; q->len_bytes += len ; /* * If queue was empty (this is first pkt) then call ready_event() * now to make the pkt go out at the right time. Otherwise we are done, * as there must be a ready event already scheduled. */ if (q->r.head == pkt) /* r_queue was empty */ ready_event( q ); splx(s); return 0; dropit: splx(s); if (q) q->drops++ ; m_freem(m); return 0 ; /* XXX should I return an error ? */ } /* * below, the rt_unref is only needed when (pkt->dn_dir == DN_TO_IP_OUT) * Doing this would probably save us the initial bzero of dn_pkt */ #define DN_FREE_PKT(pkt) { \ struct dn_pkt *n = pkt ; \ rt_unref ( n->ro.ro_rt ) ; \ m_freem(n->dn_m); \ pkt = DN_NEXT(n) ; \ free(n, M_IPFW) ; } /* * dispose all packets queued on a pipe */ static void purge_pipe(struct dn_pipe *pipe) { struct dn_pkt *pkt ; struct dn_flow_queue *q, *qn ; int i ; for (i = 0 ; i < pipe->rq_size ; i++ ) for (q = pipe->rq[i] ; q ; q = qn ) { for (pkt = q->r.head ; pkt ; ) DN_FREE_PKT(pkt) ; qn = q->next ; free(q, M_IPFW); } for (pkt = pipe->p.head ; pkt ; ) DN_FREE_PKT(pkt) ; } /* * Delete all pipes and heaps returning memory. Must also * remove references from all ipfw rules to all pipes. */ static void dummynet_flush() { struct dn_pipe *curr_p, *p ; struct ip_fw_chain *chain ; int s ; s = splnet() ; /* remove all references to pipes ...*/ for (chain= ip_fw_chain.lh_first ; chain; chain = chain->chain.le_next) chain->rule->pipe_ptr = NULL ; /* prevent future matches... */ p = all_pipes ; all_pipes = NULL ; /* and free heaps so we don't have unwanted events */ if (ready_heap.size >0 ) free(ready_heap.p, M_IPFW); ready_heap.elements = ready_heap.size = 0 ; if (extract_heap.size >0 ) free(extract_heap.p, M_IPFW); extract_heap.elements = extract_heap.size = 0 ; splx(s) ; /* * Now purge all queued pkts and delete all pipes */ for ( ; p ; ) { purge_pipe(p); curr_p = p ; p = p->next ; free(curr_p->rq, M_IPFW); free(curr_p, M_IPFW); } } extern struct ip_fw_chain *ip_fw_default_rule ; /* * when a firewall rule is deleted, scan all queues and remove the flow-id * from packets matching this rule. */ void dn_rule_delete(void *r) { struct dn_pipe *p ; struct dn_flow_queue *q ; struct dn_pkt *pkt ; int i ; for ( p = all_pipes ; p ; p = p->next ) { for (i = 0 ; i < p->rq_size ; i++) for (q = p->rq[i] ; q ; q = q->next ) for (pkt = q->r.head ; pkt ; pkt = DN_NEXT(pkt) ) if (pkt->hdr.mh_data == r) pkt->hdr.mh_data = (void *)ip_fw_default_rule ; for (pkt = p->p.head ; pkt ; pkt = DN_NEXT(pkt) ) if (pkt->hdr.mh_data == r) pkt->hdr.mh_data = (void *)ip_fw_default_rule ; } } /* * handler for the various dummynet socket options * (get, flush, config, del) */ static int ip_dn_ctl(struct sockopt *sopt) { int error = 0 ; size_t size ; char *buf, *bp ; /* bp is the "copy-pointer" */ struct dn_pipe *p, tmp_pipe ; struct dn_pipe *x, *a, *b ; /* Disallow sets in really-really secure mode. */ if (sopt->sopt_dir == SOPT_SET && securelevel >= 3) return (EPERM); switch (sopt->sopt_name) { default : panic("ip_dn_ctl -- unknown option"); case IP_DUMMYNET_GET : for (p = all_pipes, size = 0 ; p ; p = p->next ) size += sizeof( *p ) + p->rq_elements * sizeof(struct dn_flow_queue); buf = malloc(size, M_TEMP, M_WAITOK); if (buf == 0) { error = ENOBUFS ; break ; } for (p = all_pipes, bp = buf ; p ; p = p->next ) { int i ; struct dn_pipe *pipe_bp = (struct dn_pipe *)bp ; struct dn_flow_queue *q; /* * copy the pipe descriptor into *bp, convert delay back to ms, * then copy the queue descriptor(s) one at a time. */ bcopy(p, bp, sizeof( *p ) ); pipe_bp->delay = (pipe_bp->delay * 1000) / hz ; bp += sizeof( *p ) ; for (i = 0 ; i < p->rq_size ; i++) for (q = p->rq[i] ; q ; q = q->next, bp += sizeof(*q) ) bcopy(q, bp, sizeof( *q ) ); } error = sooptcopyout(sopt, buf, size); FREE(buf, M_TEMP); break ; case IP_DUMMYNET_FLUSH : dummynet_flush() ; break ; case IP_DUMMYNET_CONFIGURE : p = &tmp_pipe ; error = sooptcopyin(sopt, p, sizeof *p, sizeof *p); if (error) break ; /* * The config program passes parameters as follows: * bandwidth = bits/second (0 means no limits); * delay = millisec., must be translated into ticks. * queue_size = slots (0 means no limit) * queue_size_bytes = bytes (0 means no limit) * only one can be set, must be bound-checked */ p->delay = ( p->delay * hz ) / 1000 ; if (p->queue_size == 0 && p->queue_size_bytes == 0) p->queue_size = 50 ; if (p->queue_size != 0 ) /* buffers are prevailing */ p->queue_size_bytes = 0 ; if (p->queue_size > 100) p->queue_size = 50 ; if (p->queue_size_bytes > 1024*1024) p->queue_size_bytes = 1024*1024 ; for (a = NULL , b = all_pipes ; b && b->pipe_nr < p->pipe_nr ; a = b , b = b->next) ; if (b && b->pipe_nr == p->pipe_nr) { b->bandwidth = p->bandwidth ; b->delay = p->delay ; b->queue_size = p->queue_size ; b->queue_size_bytes = p->queue_size_bytes ; b->plr = p->plr ; b->flow_mask = p->flow_mask ; b->flags = p->flags ; } else { /* completely new pipe */ int s ; x = malloc(sizeof(struct dn_pipe), M_IPFW, M_DONTWAIT) ; if (x == NULL) { printf("ip_dummynet.c: no memory for new pipe\n"); error = ENOSPC ; break ; } bzero(x, sizeof(*x) ); x->bandwidth = p->bandwidth ; x->delay = p->delay ; x->pipe_nr = p->pipe_nr ; x->queue_size = p->queue_size ; x->queue_size_bytes = p->queue_size_bytes ; x->plr = p->plr ; x->flow_mask = p->flow_mask ; x->flags = p->flags ; if (x->flags & DN_HAVE_FLOW_MASK) {/* allocate some slots */ int l = p->rq_size ; if (l == 0) l = dn_hash_size ; if (l < 4) l = 4 ; else if (l > 1024) l = 1024 ; x->rq_size = l ; } else /* one is enough for null mask */ x->rq_size = 1 ; x->rq = malloc(x->rq_size * sizeof(struct dn_flow_queue *), M_IPFW, M_DONTWAIT) ; if (x->rq == NULL ) { printf("sorry, cannot allocate queue\n"); free(x, M_IPFW); error = ENOSPC ; break ; } bzero(x->rq, x->rq_size * sizeof(struct dn_flow_queue *) ); x->rq_elements = 0 ; s = splnet() ; x->next = b ; if (a == NULL) all_pipes = x ; else a->next = x ; splx(s); } break ; case IP_DUMMYNET_DEL : p = &tmp_pipe ; error = sooptcopyin(sopt, p, sizeof *p, sizeof *p); if (error) break ; for (a = NULL , b = all_pipes ; b && b->pipe_nr < p->pipe_nr ; a = b , b = b->next) ; if (b && b->pipe_nr == p->pipe_nr) { /* found pipe */ int s ; struct ip_fw_chain *chain ; s = splnet() ; chain = ip_fw_chain.lh_first; if (a == NULL) all_pipes = b->next ; else a->next = b->next ; /* * remove references to this pipe from the ip_fw rules. */ for (; chain; chain = chain->chain.le_next) if (chain->rule->pipe_ptr == b) chain->rule->pipe_ptr = NULL ; /* remove all references to b from heaps */ if (ready_heap.elements > 0) { struct dn_heap *h = &ready_heap ; int i = 0, found = 0 ; while ( i < h->elements ) { if (((struct dn_flow_queue *)(h->p[i].object))->p == b) { /* found one */ h->elements-- ; h->p[i] = h->p[h->elements] ; found++ ; } else i++ ; } if (found) heapify(h); } if (extract_heap.elements > 0) { struct dn_heap *h = &extract_heap ; int i = 0, found = 0 ; while ( i < h->elements ) { if (h->p[i].object == b) { /* found one */ h->elements-- ; h->p[i] = h->p[h->elements] ; found++ ; } else i++ ; } if (found) heapify(h); } splx(s); purge_pipe(b); /* remove pkts from here */ free(b->rq, M_IPFW); free(b, M_IPFW); } break ; } return error ; } static void ip_dn_init(void) { printf("DUMMYNET initialized (000106)\n"); all_pipes = NULL ; ready_heap.size = ready_heap.elements = 0 ; extract_heap.size = extract_heap.elements = 0 ; ip_dn_ctl_ptr = ip_dn_ctl; timeout(dummynet, NULL, 1); } static ip_dn_ctl_t *old_dn_ctl_ptr ; static int dummynet_modevent(module_t mod, int type, void *data) { int s ; switch (type) { case MOD_LOAD: s = splnet(); old_dn_ctl_ptr = ip_dn_ctl_ptr; ip_dn_init(); splx(s); break; case MOD_UNLOAD: s = splnet(); ip_dn_ctl_ptr = old_dn_ctl_ptr; splx(s); dummynet_flush(); break ; default: break ; } return 0 ; } static moduledata_t dummynet_mod = { "dummynet", dummynet_modevent, NULL } ; DECLARE_MODULE(dummynet, dummynet_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);