From 36e8826ffbfb16b1970dec46e3dd8487c014788a Mon Sep 17 00:00:00 2001 From: Max Laier Date: Wed, 18 Feb 2004 00:04:52 +0000 Subject: [PATCH] Backout MT_TAG removal (i.e. bring back MT_TAGs) for now, as dummynet is not working properly with the patch in place. Approved by: bms(mentor) --- sys/net/bridge.c | 14 ++- sys/net/if_ethersubr.c | 17 ++- sys/netinet/in_proto.c | 1 - sys/netinet/ip_divert.c | 69 ++++-------- sys/netinet/ip_divert.h | 83 --------------- sys/netinet/ip_dummynet.c | 218 ++++++++++++++++---------------------- sys/netinet/ip_dummynet.h | 35 +++--- sys/netinet/ip_fastfwd.c | 85 +++++++++++---- sys/netinet/ip_fw.h | 1 + sys/netinet/ip_fw2.c | 30 +----- sys/netinet/ip_input.c | 182 ++++++++++++++++++++----------- sys/netinet/ip_output.c | 113 +++++++++----------- sys/netinet/ip_var.h | 25 ++--- sys/netinet/tcp_debug.c | 1 - sys/netinet/tcp_input.c | 8 +- sys/netinet/tcp_reass.c | 8 +- sys/sys/mbuf.h | 19 +++- 17 files changed, 443 insertions(+), 466 deletions(-) delete mode 100644 sys/netinet/ip_divert.h diff --git a/sys/net/bridge.c b/sys/net/bridge.c index 9ca1dd88ceea..a444f1c89bbe 100644 --- a/sys/net/bridge.c +++ b/sys/net/bridge.c @@ -920,11 +920,14 @@ bdg_forward(struct mbuf *m0, struct ifnet *dst) DDB(quad_t ticks; ticks = rdtsc();) - /* did we match a firewall rule ? */ - args.rule = ip_dn_find_rule(m0); - if (args.rule) - shared = 0; /* For sure this is our own mbuf. */ - else + args.rule = NULL; /* did we match a firewall rule ? */ + /* Fetch state from dummynet tag, ignore others */ + for (;m0->m_type == MT_TAG; m0 = m0->m_next) + if (m0->_m_tag_id == PACKET_TAG_DUMMYNET) { + args.rule = ((struct dn_pkt *)m0)->rule; + shared = 0; /* For sure this is our own mbuf. */ + } + if (args.rule == NULL) bdg_thru++; /* count 1st time through bdg_forward */ /* @@ -1043,6 +1046,7 @@ bdg_forward(struct mbuf *m0, struct ifnet *dst) args.m = m0; /* the packet we are looking at */ args.oif = NULL; /* this is an input packet */ + args.divert_rule = 0; /* we do not support divert yet */ args.next_hop = NULL; /* we do not support forward yet */ args.eh = &save_eh; /* MAC header for bridged/MAC packets */ i = ip_fw_chk_ptr(&args); diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index 835f5dcca476..567b0b77cde7 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -322,7 +322,12 @@ bad: if (m != NULL) int ether_output_frame(struct ifnet *ifp, struct mbuf *m) { - struct ip_fw *rule = ip_dn_find_rule(m); + struct ip_fw *rule = NULL; + + /* Extract info from dummynet tag, ignore others */ + for (; m->m_type == MT_TAG; m = m->m_next) + if (m->m_flags == PACKET_TAG_DUMMYNET) + rule = ((struct dn_pkt *)m)->rule; if (rule == NULL && BDG_ACTIVE(ifp)) { /* @@ -392,6 +397,7 @@ ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst, args.m = m; /* the packet we are looking at */ args.oif = dst; /* destination, if any */ + args.divert_rule = 0; /* we do not support divert yet */ args.rule = *rule; /* matching rule to restart */ args.next_hop = NULL; /* we do not support forward yet */ args.eh = &save_eh; /* MAC header for bridged/MAC packets */ @@ -605,7 +611,14 @@ ether_demux(struct ifnet *ifp, struct mbuf *m) #if defined(NETATALK) struct llc *l; #endif - struct ip_fw *rule = ip_dn_find_rule(m); + struct ip_fw *rule = NULL; + + /* Extract info from dummynet tag, ignore others */ + for (;m->m_type == MT_TAG; m = m->m_next) + if (m->m_flags == PACKET_TAG_DUMMYNET) { + rule = ((struct dn_pkt *)m)->rule; + ifp = m->m_next->m_pkthdr.rcvif; + } KASSERT(ifp != NULL, ("ether_demux: NULL interface pointer")); diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c index 6f2e8dac80e5..c57f878d89b7 100644 --- a/sys/netinet/in_proto.c +++ b/sys/netinet/in_proto.c @@ -57,7 +57,6 @@ #include #include #include -#include #include #ifdef PIM #include diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c index ee1f97cc4634..fe560a0871bf 100644 --- a/sys/netinet/ip_divert.c +++ b/sys/netinet/ip_divert.c @@ -68,7 +68,6 @@ #include #include #include -#include #include /* @@ -151,21 +150,17 @@ div_input(struct mbuf *m, int off) * then pass them along with mbuf chain. */ void -divert_packet(struct mbuf *m, int incoming) +divert_packet(struct mbuf *m, int incoming, int port, int rule) { struct ip *ip; struct inpcb *inp; struct socket *sa; u_int16_t nport; struct sockaddr_in divsrc; - struct m_tag *mtag; - mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL); - if (mtag == NULL) { - printf("%s: no divert tag\n", __func__); - m_freem(m); - return; - } + /* Sanity check */ + KASSERT(port != 0, ("%s: port=0", __func__)); + /* Assure header */ if (m->m_len < sizeof(struct ip) && (m = m_pullup(m, sizeof(struct ip))) == 0) @@ -179,7 +174,7 @@ divert_packet(struct mbuf *m, int incoming) bzero(&divsrc, sizeof(divsrc)); divsrc.sin_len = sizeof(divsrc); divsrc.sin_family = AF_INET; - divsrc.sin_port = divert_cookie(mtag); /* record matching rule */ + divsrc.sin_port = rule; /* record matching rule */ if (incoming) { struct ifaddr *ifa; @@ -239,7 +234,7 @@ divert_packet(struct mbuf *m, int incoming) mtx_lock(&Giant); /* Put packet on socket queue, if any */ sa = NULL; - nport = htons((u_int16_t)divert_info(mtag)); + nport = htons((u_int16_t)port); INP_INFO_RLOCK(&divcbinfo); LIST_FOREACH(inp, &divcb, inp_list) { INP_LOCK(inp); @@ -278,8 +273,19 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin, struct mbuf *control) { int error = 0; + struct m_hdr divert_tag; - KASSERT(m->m_pkthdr.rcvif == NULL, ("rcvif not null")); + /* + * Prepare the tag for divert info. Note that a packet + * with a 0 tag in mh_data is effectively untagged, + * so we could optimize that case. + */ + divert_tag.mh_type = MT_TAG; + divert_tag.mh_flags = PACKET_TAG_DIVERT; + divert_tag.mh_next = m; + divert_tag.mh_data = 0; /* the matching rule # */ + divert_tag.mh_nextpkt = NULL; + m->m_pkthdr.rcvif = NULL; /* XXX is it necessary ? */ #ifdef MAC mac_create_mbuf_from_socket(so, m); @@ -290,21 +296,9 @@ div_output(struct socket *so, struct mbuf *m, /* Loopback avoidance and state recovery */ if (sin) { - struct m_tag *mtag; - struct divert_tag *dt; int i; - mtag = m_tag_get(PACKET_TAG_DIVERT, - sizeof(struct divert_tag), M_NOWAIT); - if (mtag == NULL) { - error = ENOBUFS; - goto cantsend; - } - dt = (struct divert_tag *)(mtag+1); - dt->info = 0; - dt->cookie = sin->sin_port; - m_tag_prepend(m, mtag); - + divert_tag.mh_data = (caddr_t)(uintptr_t)sin->sin_port; /* * Find receive interface with the given name, stuffed * (if it exists) in the sin_zero[] field. @@ -341,7 +335,7 @@ div_output(struct socket *so, struct mbuf *m, /* Send packet to output processing */ ipstat.ips_rawout++; /* XXX */ - error = ip_output(m, + error = ip_output((struct mbuf *)&divert_tag, inp->inp_options, NULL, (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST | IP_RAWOUTPUT, @@ -368,7 +362,7 @@ div_output(struct socket *so, struct mbuf *m, m->m_pkthdr.rcvif = ifa->ifa_ifp; } /* Send packet to input processing */ - ip_input(m); + ip_input((struct mbuf *)&divert_tag); } return error; @@ -378,27 +372,6 @@ div_output(struct socket *so, struct mbuf *m, return error; } -/* - * Return a copy of the specified packet, but without - * the divert tag. This is used when packets are ``tee'd'' - * and we want the cloned copy to not have divert processing. - */ -struct mbuf * -divert_clone(struct mbuf *m) -{ - struct mbuf *clone; - struct m_tag *mtag; - - clone = m_dup(m, M_DONTWAIT); - if (clone != NULL) { - /* strip divert tag from copy */ - mtag = m_tag_find(clone, PACKET_TAG_DIVERT, NULL); - if (mtag != NULL) - m_tag_delete(clone, mtag); - } - return clone; -} - static int div_attach(struct socket *so, int proto, struct thread *td) { diff --git a/sys/netinet/ip_divert.h b/sys/netinet/ip_divert.h deleted file mode 100644 index d925918cca35..000000000000 --- a/sys/netinet/ip_divert.h +++ /dev/null @@ -1,83 +0,0 @@ -/*- - * Copyright (c) 2003 Sam Leffler, Errno Consulting - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer, - * without modification. - * 2. Redistributions in binary form must reproduce at minimum a disclaimer - * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any - * redistribution must be conditioned upon including a substantially - * similar Disclaimer requirement for further binary redistribution. - * 3. Neither the names of the above-listed copyright holders nor the names - * of any contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * NO WARRANTY - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, - * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGES. - * - * $FreeBSD$ - */ - -#ifndef _NETINET_IP_DIVERT_H_ -#define _NETINET_IP_DIVERT_H_ - -/* - * Divert socket definitions. - */ - -struct divert_tag { - u_int32_t info; /* port & flags */ - u_int16_t cookie; /* ipfw rule number */ -}; - -/* - * Return the divert cookie associated with the mbuf; if any. - */ -static __inline u_int16_t -divert_cookie(struct m_tag *mtag) -{ - return ((struct divert_tag *)(mtag+1))->cookie; -} -static __inline u_int16_t -divert_find_cookie(struct mbuf *m) -{ - struct m_tag *mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL); - return mtag ? divert_cookie(mtag) : 0; -} - -/* - * Return the divert info associated with the mbuf; if any. - */ -static __inline u_int32_t -divert_info(struct m_tag *mtag) -{ - return ((struct divert_tag *)(mtag+1))->info; -} -static __inline u_int32_t -divert_find_info(struct mbuf *m) -{ - struct m_tag *mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL); - return mtag ? divert_info(mtag) : 0; -} - -extern void div_init(void); -extern void div_input(struct mbuf *, int); -extern void div_ctlinput(int, struct sockaddr *, void *); -extern void divert_packet(struct mbuf *m, int incoming); -extern struct mbuf *divert_clone(struct mbuf *); -extern struct pr_usrreqs div_usrreqs; -#endif /* _NETINET_IP_DIVERT_H_ */ diff --git a/sys/netinet/ip_dummynet.c b/sys/netinet/ip_dummynet.c index 114ed44c72f1..1e9ecb8e7994 100644 --- a/sys/netinet/ip_dummynet.c +++ b/sys/netinet/ip_dummynet.c @@ -404,22 +404,6 @@ heap_free(struct dn_heap *h) * --- end of heap management functions --- */ -/* - * Return the mbuf tag holding the dummynet state. As an optimization - * this is assumed to be the first tag on the list. If this turns out - * wrong we'll need to search the list. - */ -static struct dn_pkt_tag * -dn_tag_get(struct mbuf *m) -{ - struct m_tag *mtag = m_tag_first(m); - KASSERT(mtag != NULL && - mtag->m_tag_cookie == MTAG_ABI_COMPAT && - mtag->m_tag_id == PACKET_TAG_DUMMYNET, - ("packet on dummynet queue w/o dummynet tag!")); - return (struct dn_pkt_tag *)(mtag+1); -} - /* * Scheduler functions: * @@ -441,85 +425,87 @@ dn_tag_get(struct mbuf *m) static void transmit_event(struct dn_pipe *pipe) { - struct mbuf *m ; - struct dn_pkt_tag *pkt ; + struct dn_pkt *pkt ; DUMMYNET_LOCK_ASSERT(); - while ( (m = pipe->head) ) { - pkt = dn_tag_get(m); - if ( !DN_KEY_LEQ(pkt->output_time, curr_time) ) - break; + while ( (pkt = pipe->head) && DN_KEY_LEQ(pkt->output_time, curr_time) ) { /* * first unlink, then call procedures, since ip_input() can invoke * ip_output() and viceversa, thus causing nested calls */ - pipe->head = m->m_nextpkt ; + pipe->head = DN_NEXT(pkt) ; /* XXX: drop the lock for now to avoid LOR's */ DUMMYNET_UNLOCK(); + /* + * The actual mbuf is preceded by a struct dn_pkt, resembling an mbuf + * (NOT A REAL one, just a small block of malloc'ed memory) with + * m_type = MT_TAG, m_flags = PACKET_TAG_DUMMYNET + * dn_m (m_next) = actual mbuf to be processed by ip_input/output + * and some other fields. + * The block IS FREED HERE because it contains parameters passed + * to the called routine. + */ switch (pkt->dn_dir) { case DN_TO_IP_OUT: - (void)ip_output(m, NULL, NULL, pkt->flags, NULL, NULL); + (void)ip_output((struct mbuf *)pkt, NULL, NULL, 0, NULL, NULL); + rt_unref (pkt->ro.ro_rt, __func__) ; break ; case DN_TO_IP_IN : - ip_input(m) ; + ip_input((struct mbuf *)pkt) ; break ; case DN_TO_BDG_FWD : - /* - * The bridge requires/assumes the Ethernet header is - * contiguous in the first mbuf header. Insure this is true. - */ - if (BDG_LOADED) { - if (m->m_len < ETHER_HDR_LEN && - (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) { - printf("dummynet/bridge: pullup fail, dropping pkt\n"); - break; - } - m = bdg_forward_ptr(m, pkt->ifp); - } else { + if (!BDG_LOADED) { /* somebody unloaded the bridge module. Drop pkt */ /* XXX rate limit */ printf("dummynet: dropping bridged packet trapped in pipe\n"); - } - if (m) - m_freem(m); - break; - - case DN_TO_ETH_DEMUX: - /* - * The Ethernet code assumes the Ethernet header is - * contiguous in the first mbuf header. Insure this is true. - */ - if (m->m_len < ETHER_HDR_LEN && - (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) { - printf("dummynet/ether: pullup fail, dropping pkt\n"); + m_freem(pkt->dn_m); break; - } - ether_demux(m->m_pkthdr.rcvif, m); /* which consumes the mbuf */ - break ; + } /* fallthrough */ + case DN_TO_ETH_DEMUX: + { + struct mbuf *m = (struct mbuf *)pkt ; + if (pkt->dn_m->m_len < ETHER_HDR_LEN && + (pkt->dn_m = m_pullup(pkt->dn_m, ETHER_HDR_LEN)) == NULL) { + printf("dummynet/bridge: pullup fail, dropping pkt\n"); + break; + } + /* + * bdg_forward() wants a pointer to the pseudo-mbuf-header, but + * on return it will supply the pointer to the actual packet + * (originally pkt->dn_m, but could be something else now) if + * it has not consumed it. + */ + if (pkt->dn_dir == DN_TO_BDG_FWD) { + m = bdg_forward_ptr(m, pkt->ifp); + if (m) + m_freem(m); + } else + ether_demux(NULL, m); /* which consumes the mbuf */ + } + break ; case DN_TO_ETH_OUT: - ether_output_frame(pkt->ifp, m); + ether_output_frame(pkt->ifp, (struct mbuf *)pkt); break; default: printf("dummynet: bad switch %d!\n", pkt->dn_dir); - m_freem(m); + m_freem(pkt->dn_m); break ; } + free(pkt, M_DUMMYNET); DUMMYNET_LOCK(); } /* if there are leftover packets, put into the heap for next event */ - if ( (m = pipe->head) ) { - pkt = dn_tag_get(m) ; - /* XXX should check errors on heap_insert, by draining the - * whole pipe p and hoping in the future we are more successful - */ - heap_insert(&extract_heap, pkt->output_time, pipe ) ; - } + if ( (pkt = pipe->head) ) + heap_insert(&extract_heap, pkt->output_time, pipe ) ; + /* XXX should check errors on heap_insert, by draining the + * whole pipe p and hoping in the future we are more successful + */ } /* @@ -527,8 +513,8 @@ transmit_event(struct dn_pipe *pipe) * before being able to transmit a packet. The credit is taken from * either a pipe (WF2Q) or a flow_queue (per-flow queueing) */ -#define SET_TICKS(_m, q, p) \ - ((_m)->m_pkthdr.len*8*hz - (q)->numbytes + p->bandwidth - 1 ) / \ +#define SET_TICKS(pkt, q, p) \ + (pkt->dn_m->m_pkthdr.len*8*hz - (q)->numbytes + p->bandwidth - 1 ) / \ p->bandwidth ; /* @@ -536,23 +522,21 @@ transmit_event(struct dn_pipe *pipe) * and put into delay line (p_queue) */ static void -move_pkt(struct mbuf *pkt, struct dn_flow_queue *q, +move_pkt(struct dn_pkt *pkt, struct dn_flow_queue *q, struct dn_pipe *p, int len) { - struct dn_pkt_tag *dt = dn_tag_get(pkt); - - q->head = pkt->m_nextpkt ; + q->head = DN_NEXT(pkt) ; q->len-- ; q->len_bytes -= len ; - dt->output_time = curr_time + p->delay ; + pkt->output_time = curr_time + p->delay ; if (p->head == NULL) p->head = pkt; else - p->tail->m_nextpkt = pkt; + DN_NEXT(p->tail) = pkt; p->tail = pkt; - p->tail->m_nextpkt = NULL; + DN_NEXT(p->tail) = NULL; } /* @@ -565,7 +549,7 @@ move_pkt(struct mbuf *pkt, struct dn_flow_queue *q, static void ready_event(struct dn_flow_queue *q) { - struct mbuf *pkt; + struct dn_pkt *pkt; struct dn_pipe *p = q->fs->pipe ; int p_was_empty ; @@ -587,7 +571,7 @@ ready_event(struct dn_flow_queue *q) */ q->numbytes += ( curr_time - q->sched_time ) * p->bandwidth; while ( (pkt = q->head) != NULL ) { - int len = pkt->m_pkthdr.len; + int len = pkt->dn_m->m_pkthdr.len; int len_scaled = p->bandwidth ? len*8*hz : 0 ; if (len_scaled > q->numbytes ) break ; @@ -655,9 +639,9 @@ ready_event_wfq(struct dn_pipe *p) while ( p->numbytes >=0 && (sch->elements>0 || neh->elements >0) ) { if (sch->elements > 0) { /* have some eligible pkts to send out */ struct dn_flow_queue *q = sch->p[0].object ; - struct mbuf *pkt = q->head; + struct dn_pkt *pkt = q->head; struct dn_flow_set *fs = q->fs; - u_int64_t len = pkt->m_pkthdr.len; + u_int64_t len = pkt->dn_m->m_pkthdr.len; int len_scaled = p->bandwidth ? len*8*hz : 0 ; heap_extract(sch, NULL); /* remove queue from heap */ @@ -674,7 +658,7 @@ ready_event_wfq(struct dn_pipe *p) * update F and position in backlogged queue, then * put flow in not_eligible_heap (we will fix this later). */ - len = (q->head)->m_pkthdr.len; + len = (q->head)->dn_m->m_pkthdr.len; q->F += (len<weight ; if (DN_KEY_LEQ(q->S, p->V)) heap_insert(neh, q->S, q); @@ -729,7 +713,7 @@ ready_event_wfq(struct dn_pipe *p) if (p->bandwidth > 0) t = ( p->bandwidth -1 - p->numbytes) / p->bandwidth ; - dn_tag_get(p->tail)->output_time += t ; + p->tail->output_time += t ; p->sched_time = curr_time ; heap_insert(&wfq_ready_heap, curr_time + t, (void *)p); /* XXX should check errors on heap_insert, and drain the whole @@ -1132,8 +1116,7 @@ locate_flowset(int pipe_nr, struct ip_fw *rule) static int dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) { - struct dn_pkt_tag *pkt; - struct m_tag *mtag; + struct dn_pkt *pkt; struct dn_flow_set *fs; struct dn_pipe *pipe ; u_int64_t len = m->m_pkthdr.len ; @@ -1141,9 +1124,7 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) int is_pipe; #if IPFW2 ipfw_insn *cmd = fwa->rule->cmd + fwa->rule->act_ofs; -#endif -#if IPFW2 if (cmd->opcode == O_LOG) cmd += F_LEN(cmd); is_pipe = (cmd->opcode == O_PIPE); @@ -1194,17 +1175,16 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) goto dropit ; /* XXX expensive to zero, see if we can remove it*/ - mtag = m_tag_get(PACKET_TAG_DUMMYNET, - sizeof(struct dn_pkt_tag), M_NOWAIT|M_ZERO); - if ( mtag == NULL ) + pkt = (struct dn_pkt *)malloc(sizeof (*pkt), M_DUMMYNET, M_NOWAIT|M_ZERO); + if ( pkt == NULL ) goto dropit ; /* cannot allocate packet header */ - m_tag_prepend(m, mtag); /* attach to mbuf chain */ - m->m_nextpkt = NULL; - - pkt = (struct dn_pkt_tag *)(mtag+1); /* ok, i can handle the pkt now... */ /* build and enqueue packet + parameters */ + pkt->hdr.mh_type = MT_TAG; + pkt->hdr.mh_flags = PACKET_TAG_DUMMYNET; pkt->rule = fwa->rule ; + DN_NEXT(pkt) = NULL; + pkt->dn_m = m; pkt->dn_dir = dir ; pkt->ifp = fwa->oif; @@ -1226,14 +1206,14 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) pkt->flags = fwa->flags; } if (q->head == NULL) - q->head = m; + q->head = pkt; else - q->tail->m_nextpkt = m; - q->tail = m; + DN_NEXT(q->tail) = pkt; + q->tail = pkt; q->len++; q->len_bytes += len ; - if ( q->head != m ) /* flow was not idle, we are done */ + if ( q->head != pkt ) /* flow was not idle, we are done */ goto done; /* * If we reach this point the flow was previously idle, so we need @@ -1246,7 +1226,7 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) */ dn_key t = 0 ; if (pipe->bandwidth) - t = SET_TICKS(m, q, pipe); + t = SET_TICKS(pkt, q, pipe); q->sched_time = curr_time ; if (t == 0) /* must process it now */ ready_event( q ); @@ -1320,10 +1300,12 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) * Below, the rt_unref is only needed when (pkt->dn_dir == DN_TO_IP_OUT) * Doing this would probably save us the initial bzero of dn_pkt */ -#define DN_FREE_PKT(_m) do { \ - rt_unref(dn_tag_get(_m)->ro.ro_rt, __func__); \ - m_freem(_m); \ -} while (0) +#define DN_FREE_PKT(pkt) { \ + struct dn_pkt *n = pkt ; \ + rt_unref ( n->ro.ro_rt, __func__ ) ; \ + m_freem(n->dn_m); \ + pkt = DN_NEXT(n) ; \ + free(n, M_DUMMYNET) ; } /* * Dispose all packets and flow_queues on a flow_set. @@ -1334,6 +1316,7 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) static void purge_flow_set(struct dn_flow_set *fs, int all) { + struct dn_pkt *pkt ; struct dn_flow_queue *q, *qn ; int i ; @@ -1341,13 +1324,8 @@ purge_flow_set(struct dn_flow_set *fs, int all) for (i = 0 ; i <= fs->rq_size ; i++ ) { for (q = fs->rq[i] ; q ; q = qn ) { - struct mbuf *m, *mnext; - - mnext = q->head; - while ((m = mnext) != NULL) { - mnext = m->m_nextpkt; - DN_FREE_PKT(m); - } + for (pkt = q->head ; pkt ; ) + DN_FREE_PKT(pkt) ; qn = q->next ; free(q, M_DUMMYNET); } @@ -1374,15 +1352,12 @@ purge_flow_set(struct dn_flow_set *fs, int all) static void purge_pipe(struct dn_pipe *pipe) { - struct mbuf *m, *mnext; + struct dn_pkt *pkt ; purge_flow_set( &(pipe->fs), 1 ); - mnext = pipe->head; - while ((m = mnext) != NULL) { - mnext = m->m_nextpkt; - DN_FREE_PKT(m); - } + for (pkt = pipe->head ; pkt ; ) + DN_FREE_PKT(pkt) ; heap_free( &(pipe->scheduler_heap) ); heap_free( &(pipe->not_eligible_heap) ); @@ -1437,15 +1412,13 @@ dn_rule_delete_fs(struct dn_flow_set *fs, void *r) { int i ; struct dn_flow_queue *q ; - struct mbuf *m ; + struct dn_pkt *pkt ; for (i = 0 ; i <= fs->rq_size ; i++) /* last one is ovflow */ for (q = fs->rq[i] ; q ; q = q->next ) - for (m = q->head ; m ; m = m->m_nextpkt ) { - struct dn_pkt_tag *pkt = dn_tag_get(m) ; + for (pkt = q->head ; pkt ; pkt = DN_NEXT(pkt) ) if (pkt->rule == r) pkt->rule = ip_fw_default_rule ; - } } /* * when a firewall rule is deleted, scan all queues and remove the flow-id @@ -1455,9 +1428,8 @@ void dn_rule_delete(void *r) { struct dn_pipe *p ; + struct dn_pkt *pkt ; struct dn_flow_set *fs ; - struct dn_pkt_tag *pkt ; - struct mbuf *m ; DUMMYNET_LOCK(); /* @@ -1470,11 +1442,9 @@ dn_rule_delete(void *r) for ( p = all_pipes ; p ; p = p->next ) { fs = &(p->fs) ; dn_rule_delete_fs(fs, r); - for (m = p->head ; m ; m = m->m_nextpkt ) { - pkt = dn_tag_get(m) ; + for (pkt = p->head ; pkt ; pkt = DN_NEXT(pkt) ) if (pkt->rule == r) pkt->rule = ip_fw_default_rule ; - } } DUMMYNET_UNLOCK(); } @@ -1748,7 +1718,7 @@ dummynet_drain() { struct dn_flow_set *fs; struct dn_pipe *p; - struct mbuf *m, *mnext; + struct dn_pkt *pkt; DUMMYNET_LOCK_ASSERT(); @@ -1761,12 +1731,8 @@ dummynet_drain() for (p = all_pipes; p; p= p->next ) { purge_flow_set(&(p->fs), 0); - - mnext = p->head; - while ((m = mnext) != NULL) { - mnext = m->m_nextpkt; - DN_FREE_PKT(m); - } + for (pkt = p->head ; pkt ; ) + DN_FREE_PKT(pkt) ; p->head = p->tail = NULL ; } } diff --git a/sys/netinet/ip_dummynet.h b/sys/netinet/ip_dummynet.h index 81c27b20538e..bc506afc267f 100644 --- a/sys/netinet/ip_dummynet.h +++ b/sys/netinet/ip_dummynet.h @@ -111,12 +111,24 @@ struct dn_heap { #ifdef _KERNEL /* - * Packets processed by dummynet have an mbuf tag associated with - * them that carries their dummynet state. This is used within - * the dummynet code as well as outside when checking for special - * processing requirements. + * struct dn_pkt identifies a packet in the dummynet queue, but + * is also used to tag packets passed back to the various destinations + * (ip_input(), ip_output(), bdg_forward() and so on). + * As such the first part of the structure must be a struct m_hdr, + * followed by dummynet-specific parameters. The m_hdr must be + * initialized with + * mh_type = MT_TAG; + * mh_flags = PACKET_TYPE_DUMMYNET; + * mh_next = + * + * mh_nextpkt, mh_data are free for dummynet use (mh_nextpkt is used to + * build a linked list of packets in a dummynet queue). */ -struct dn_pkt_tag { +struct dn_pkt { + struct m_hdr hdr ; +#define DN_NEXT(x) (struct dn_pkt *)(x)->hdr.mh_nextpkt +#define dn_m hdr.mh_next /* packet to be forwarded */ + struct ip_fw *rule; /* matching rule */ int dn_dir; /* action when packet comes out. */ #define DN_TO_IP_OUT 1 @@ -205,7 +217,7 @@ struct dn_flow_queue { struct dn_flow_queue *next ; struct ipfw_flow_id id ; - struct mbuf *head, *tail ; /* queue of packets */ + struct dn_pkt *head, *tail ; /* queue of packets */ u_int len ; u_int len_bytes ; u_long numbytes ; /* credit for transmission (dynamic queues) */ @@ -318,7 +330,7 @@ struct dn_pipe { /* a pipe */ int bandwidth; /* really, bytes/tick. */ int delay ; /* really, ticks */ - struct mbuf *head, *tail ; /* packets in delay line */ + struct dn_pkt *head, *tail ; /* packets in delay line */ /* WF2Q+ */ struct dn_heap scheduler_heap ; /* top extract - key Finish time*/ @@ -352,14 +364,5 @@ extern ip_dn_ruledel_t *ip_dn_ruledel_ptr; extern ip_dn_io_t *ip_dn_io_ptr; #define DUMMYNET_LOADED (ip_dn_io_ptr != NULL) -/* - * Return the IPFW rule associated with the dummynet tag; if any. - */ -static __inline struct ip_fw * -ip_dn_find_rule(struct mbuf *m) -{ - struct m_tag *mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); - return mtag ? ((struct dn_pkt_tag *)(mtag+1))->rule : NULL; -} #endif #endif /* _IP_DUMMYNET_H */ diff --git a/sys/netinet/ip_fastfwd.c b/sys/netinet/ip_fastfwd.c index 3db615dc8e33..404171252b93 100644 --- a/sys/netinet/ip_fastfwd.c +++ b/sys/netinet/ip_fastfwd.c @@ -110,7 +110,6 @@ #include #include -#include #include static int ipfastforward_active = 0; @@ -133,7 +132,7 @@ ip_fastforward(struct mbuf *m) struct ip *tip; struct mbuf *teem = NULL; #endif - struct m_tag *mtag; + struct mbuf *tag = NULL; struct route ro; struct sockaddr_in *dst = NULL; struct in_ifaddr *ia = NULL; @@ -151,6 +150,16 @@ ip_fastforward(struct mbuf *m) if (!ipfastforward_active || !ipforwarding) return 0; + /* + * If there is any MT_TAG we fall back to ip_input because we can't + * handle TAGs here. Should never happen as we get directly called + * from the if_output routines. + */ + if (m->m_type == MT_TAG) { + KASSERT(0, ("%s: packet with MT_TAG not expected", __func__)); + return 0; + } + M_ASSERTVALID(m); M_ASSERTPKTHDR(m); @@ -364,13 +373,25 @@ ip_fastforward(struct mbuf *m) /* * See if this is a fragment */ - if (ip->ip_off & (IP_MF | IP_OFFMASK)) + if (ip->ip_off & (IP_MF | IP_OFFMASK)) { + MGETHDR(tag, M_DONTWAIT, MT_TAG); + if (tag == NULL) + goto drop; + tag->m_flags = PACKET_TAG_DIVERT; + tag->m_data = (caddr_t)(intptr_t)args.divert_rule; + tag->m_next = m; + /* XXX: really bloody hack, see ip_input */ + tag->m_nextpkt = (struct mbuf *)1; + m = tag; + tag = NULL; + goto droptoours; + } /* * Tee packet */ if ((ipfw & IP_FW_PORT_TEE_FLAG) != 0) - teem = divert_clone(m); + teem = m_dup(m, M_DONTWAIT); else teem = m; if (teem == NULL) @@ -392,7 +413,7 @@ ip_fastforward(struct mbuf *m) /* * Deliver packet to divert input routine */ - divert_packet(teem, 0); + divert_packet(teem, 0, ipfw & 0xffff, args.divert_rule); /* * If this was not tee, we are done */ @@ -539,13 +560,27 @@ ip_fastforward(struct mbuf *m) /* * See if this is a fragment */ - if (ip->ip_off & (IP_MF | IP_OFFMASK)) + if (ip->ip_off & (IP_MF | IP_OFFMASK)) { + MGETHDR(tag, M_DONTWAIT, MT_TAG); + if (tag == NULL) { + RTFREE(ro.ro_rt); + goto drop; + } + tag->m_flags = PACKET_TAG_DIVERT; + tag->m_data = (caddr_t)(intptr_t)args.divert_rule; + tag->m_next = m; + /* XXX: really bloody hack, see ip_input */ + tag->m_nextpkt = (struct mbuf *)1; + m = tag; + tag = NULL; + goto droptoours; + } /* * Tee packet */ if ((ipfw & IP_FW_PORT_TEE_FLAG) != 0) - teem = divert_clone(m); + teem = m_dup(m, M_DONTWAIT); else teem = m; if (teem == NULL) @@ -567,7 +602,7 @@ ip_fastforward(struct mbuf *m) /* * Deliver packet to divert input routine */ - divert_packet(teem, 0); + divert_packet(teem, 0, ipfw & 0xffff, args.divert_rule); /* * If this was not tee, we are done */ @@ -603,24 +638,38 @@ ip_fastforward(struct mbuf *m) if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr) { forwardlocal: if (args.next_hop) { - mtag = m_tag_get(PACKET_TAG_IPFORWARD, - sizeof(struct sockaddr_in *), - M_NOWAIT); - if (mtag == NULL) { - /* XXX statistic */ + /* XXX leak */ + MGETHDR(tag, M_DONTWAIT, MT_TAG); + if (tag == NULL) { if (ro.ro_rt) RTFREE(ro.ro_rt); goto drop; } - *(struct sockaddr_in **)(mtag+1) = - args.next_hop; - m_tag_prepend(m, mtag); + tag->m_flags = PACKET_TAG_IPFORWARD; + tag->m_data = (caddr_t)args.next_hop; + tag->m_next = m; + /* XXX: really bloody hack, + * see ip_input */ + tag->m_nextpkt = (struct mbuf *)1; + m = tag; + tag = NULL; } #ifdef IPDIVERT droptoours: /* Used for DIVERT */ #endif - /* NB: ip_input understands this */ - m->m_flags |= M_FASTFWD_OURS; + MGETHDR(tag, M_DONTWAIT, MT_TAG); + if (tag == NULL) { + if (ro.ro_rt) + RTFREE(ro.ro_rt); + goto drop; + } + tag->m_flags = PACKET_TAG_IPFASTFWD_OURS; + tag->m_data = NULL; + tag->m_next = m; + /* XXX: really bloody hack, see ip_input */ + tag->m_nextpkt = (struct mbuf *)1; + m = tag; + tag = NULL; /* ip still points to the real packet */ ip->ip_len = htons(ip->ip_len); diff --git a/sys/netinet/ip_fw.h b/sys/netinet/ip_fw.h index 8e3047de183f..e6eae2d4aaec 100644 --- a/sys/netinet/ip_fw.h +++ b/sys/netinet/ip_fw.h @@ -400,6 +400,7 @@ struct ip_fw_args { int flags; /* for dummynet */ struct ipfw_flow_id f_id; /* grabbed from IP header */ + u_int16_t divert_rule; /* divert cookie */ u_int32_t retval; }; diff --git a/sys/netinet/ip_fw2.c b/sys/netinet/ip_fw2.c index 7097b20f7abe..836be9d61997 100644 --- a/sys/netinet/ip_fw2.c +++ b/sys/netinet/ip_fw2.c @@ -66,7 +66,6 @@ #include #include #include -#include #include #include #include @@ -1458,7 +1457,6 @@ ipfw_chk(struct ip_fw_args *args) int dyn_dir = MATCH_UNKNOWN; ipfw_dyn_rule *q = NULL; struct ip_fw_chain *chain = &layer3_chain; - struct m_tag *mtag; if (m->m_flags & M_SKIP_FIREWALL) return 0; /* accept */ @@ -1547,7 +1545,6 @@ ipfw_chk(struct ip_fw_args *args) after_ip_checks: IPFW_LOCK(chain); /* XXX expensive? can we run lock free? */ - mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL); if (args->rule) { /* * Packet has already been tagged. Look for the next rule @@ -1570,7 +1567,7 @@ ipfw_chk(struct ip_fw_args *args) * Find the starting rule. It can be either the first * one, or the one after divert_rule if asked so. */ - int skipto = mtag ? divert_cookie(mtag) : 0; + int skipto = args->divert_rule; f = chain->rules; if (args->eh == NULL && skipto != 0) { @@ -1586,9 +1583,7 @@ ipfw_chk(struct ip_fw_args *args) } } } - /* reset divert rule to avoid confusion later */ - if (mtag) - m_tag_delete(m, mtag); + args->divert_rule = 0; /* reset to avoid confusion later */ /* * Now scan the rules, and parse microinstructions for each rule. @@ -2023,29 +2018,14 @@ ipfw_chk(struct ip_fw_args *args) goto done; case O_DIVERT: - case O_TEE: { - struct divert_tag *dt; - + case O_TEE: if (args->eh) /* not on layer 2 */ break; - mtag = m_tag_get(PACKET_TAG_DIVERT, - sizeof(struct divert_tag), - M_NOWAIT); - if (mtag == NULL) { - /* XXX statistic */ - /* drop packet */ - IPFW_UNLOCK(chain); - return IP_FW_PORT_DENY_FLAG; - } - dt = (struct divert_tag *)(mtag+1); - dt->cookie = f->rulenum; - dt->info = (cmd->opcode == O_DIVERT) ? + args->divert_rule = f->rulenum; + retval = (cmd->opcode == O_DIVERT) ? cmd->arg1 : cmd->arg1 | IP_FW_PORT_TEE_FLAG; - m_tag_prepend(m, mtag); - retval = dt->info; goto done; - } case O_COUNT: case O_SKIPTO: diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index 68a69bbce6b3..e82b1add8b88 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -78,7 +78,6 @@ #include #include -#include #include #ifdef IPSEC @@ -240,7 +239,8 @@ static int ip_dooptions(struct mbuf *m, int, static void ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop); static void ip_freef(struct ipqhead *, struct ipq *); -static struct mbuf *ip_reass(struct mbuf *, struct ipqhead *, struct ipq *); +static struct mbuf *ip_reass(struct mbuf *, struct ipqhead *, + struct ipq *, u_int32_t *, u_int16_t *); /* * IP initialization: fill in IP protocol switch table. @@ -300,18 +300,17 @@ ip_input(struct mbuf *m) struct in_ifaddr *ia = NULL; struct ifaddr *ifa; int i, checkif, hlen = 0; + int ours = 0; u_short sum; struct in_addr pkt_dst; - struct m_tag *mtag; -#ifdef IPDIVERT - u_int32_t divert_info; /* packet divert/tee info */ -#endif + u_int32_t divert_info = 0; /* packet divert/tee info */ struct ip_fw_args args; int dchg = 0; /* dest changed after fw */ #ifdef PFIL_HOOKS struct in_addr odst; /* original dst address */ #endif #ifdef FAST_IPSEC + struct m_tag *mtag; struct tdb_ident *tdbi; struct secpolicy *sp; int s, error; @@ -319,19 +318,60 @@ ip_input(struct mbuf *m) args.eh = NULL; args.oif = NULL; - args.next_hop = ip_claim_next_hop(m); - args.rule = ip_dn_find_rule(m); + args.rule = NULL; + args.divert_rule = 0; /* divert cookie */ + args.next_hop = NULL; - M_ASSERTPKTHDR(m); + /* + * Grab info from MT_TAG mbufs prepended to the chain. + * + * XXX: This is ugly. These pseudo mbuf prepend tags should really + * be real m_tags. Before these have always been allocated on the + * callers stack, so we didn't have to free them. Now with + * ip_fastforward they are true mbufs and we have to free them + * otherwise we have a leak. Must rewrite ipfw to use m_tags. + */ + for (; m && m->m_type == MT_TAG;) { + struct mbuf *m0; - if (m->m_flags & M_FASTFWD_OURS) { - /* ip_fastforward firewall changed dest to local */ - m->m_flags &= ~M_FASTFWD_OURS; /* just in case... */ - goto ours; + switch(m->_m_tag_id) { + default: + printf("ip_input: unrecognised MT_TAG tag %d\n", + m->_m_tag_id); + break; + + case PACKET_TAG_DUMMYNET: + args.rule = ((struct dn_pkt *)m)->rule; + break; + + case PACKET_TAG_DIVERT: + args.divert_rule = (intptr_t)m->m_hdr.mh_data & 0xffff; + break; + + case PACKET_TAG_IPFORWARD: + args.next_hop = (struct sockaddr_in *)m->m_hdr.mh_data; + break; + + case PACKET_TAG_IPFASTFWD_OURS: + ours = 1; + break; + } + + m0 = m; + m = m->m_next; + /* XXX: This is set by ip_fastforward */ + if (m0->m_nextpkt == (struct mbuf *)1) + m_free(m0); } - if (args.rule) { /* dummynet already filtered us */ - ip = mtod(m, struct ip *); - hlen = ip->ip_hl << 2; + + M_ASSERTPKTHDR(m); + + if (ours) /* ip_fastforward firewall changed dest to local */ + goto ours; + + if (args.rule) { /* dummynet already filtered us */ + ip = mtod(m, struct ip *); + hlen = ip->ip_hl << 2; goto iphack ; } @@ -491,6 +531,7 @@ ip_input(struct mbuf *m) #ifdef IPDIVERT if (i != 0 && (i & IP_FW_PORT_DYNT_FLAG) == 0) { /* Divert or tee packet */ + divert_info = i; goto ours; } #endif @@ -798,11 +839,13 @@ ip_input(struct mbuf *m) /* * Attempt reassembly; if it succeeds, proceed. - * ip_reass() will return a different mbuf. + * ip_reass() will return a different mbuf, and update + * the divert info in divert_info and args.divert_rule. */ ipstat.ips_fragments++; m->m_pkthdr.header = ip; - m = ip_reass(m, &ipq[sum], fp); + m = ip_reass(m, + &ipq[sum], fp, &divert_info, &args.divert_rule); IPQ_UNLOCK(); if (m == 0) return; @@ -812,7 +855,7 @@ ip_input(struct mbuf *m) hlen = ip->ip_hl << 2; #ifdef IPDIVERT /* Restore original checksum before diverting packet */ - if (divert_find_info(m) != 0) { + if (divert_info != 0) { ip->ip_len += hlen; ip->ip_len = htons(ip->ip_len); ip->ip_off = htons(ip->ip_off); @@ -833,15 +876,12 @@ ip_input(struct mbuf *m) /* * Divert or tee packet to the divert protocol if required. */ - divert_info = divert_find_info(m); if (divert_info != 0) { - struct mbuf *clone; + struct mbuf *clone = NULL; /* Clone packet if we're doing a 'tee' */ if ((divert_info & IP_FW_PORT_TEE_FLAG) != 0) - clone = divert_clone(m); - else - clone = NULL; + clone = m_dup(m, M_DONTWAIT); /* Restore packet header fields to original values */ ip->ip_len += hlen; @@ -849,7 +889,7 @@ ip_input(struct mbuf *m) ip->ip_off = htons(ip->ip_off); /* Deliver packet to divert input routine */ - divert_packet(m, 1); + divert_packet(m, 1, divert_info & 0xffff, args.divert_rule); ipstat.ips_delivered++; /* If 'tee', continue with original packet */ @@ -860,11 +900,12 @@ ip_input(struct mbuf *m) ip->ip_len += hlen; /* * Jump backwards to complete processing of the - * packet. We do not need to clear args.next_hop - * as that will not be used again and the cloned packet - * doesn't contain a divert packet tag so we won't - * re-entry this block. + * packet. But first clear divert_info to avoid + * entering this block again. + * We do not need to clear args.divert_rule + * or args.next_hop as they will not be used. */ + divert_info = 0; goto pass; } #endif @@ -927,18 +968,19 @@ DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/ ipstat.ips_delivered++; NET_PICKUP_GIANT(); if (args.next_hop && ip->ip_p == IPPROTO_TCP) { - /* attach next hop info for TCP */ - mtag = m_tag_get(PACKET_TAG_IPFORWARD, - sizeof(struct sockaddr_in *), M_NOWAIT); - if (mtag == NULL) { - /* XXX statistic */ - NET_DROP_GIANT(); - goto bad; - } - *(struct sockaddr_in **)(mtag+1) = args.next_hop; - m_tag_prepend(m, mtag); - } - (*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen); + /* TCP needs IPFORWARD info if available */ + struct m_hdr tag; + + tag.mh_type = MT_TAG; + tag.mh_flags = PACKET_TAG_IPFORWARD; + tag.mh_data = (caddr_t)args.next_hop; + tag.mh_next = m; + tag.mh_nextpkt = NULL; + + (*inetsw[ip_protox[ip->ip_p]].pr_input)( + (struct mbuf *)&tag, hlen); + } else + (*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen); NET_DROP_GIANT(); return; bad: @@ -957,7 +999,8 @@ DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/ */ static struct mbuf * -ip_reass(struct mbuf *m, struct ipqhead *head, struct ipq *fp) +ip_reass(struct mbuf *m, struct ipqhead *head, struct ipq *fp, + u_int32_t *divinfo, u_int16_t *divert_rule) { struct ip *ip = mtod(m, struct ip *); register struct mbuf *p, *q, *nq; @@ -999,6 +1042,10 @@ ip_reass(struct mbuf *m, struct ipqhead *head, struct ipq *fp) fp->ipq_dst = ip->ip_dst; fp->ipq_frags = m; m->m_nextpkt = NULL; +#ifdef IPDIVERT + fp->ipq_div_info = 0; + fp->ipq_div_cookie = 0; +#endif goto inserted; } else { fp->ipq_nfrags++; @@ -1082,15 +1129,16 @@ ip_reass(struct mbuf *m, struct ipqhead *head, struct ipq *fp) inserted: #ifdef IPDIVERT - if (ip->ip_off != 0) { - /* - * Strip any divert information; only the info - * on the first fragment is used/kept. - */ - struct m_tag *mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL); - if (mtag) - m_tag_delete(m, mtag); + /* + * Transfer firewall instructions to the fragment structure. + * Only trust info in the fragment at offset 0. + */ + if (ip->ip_off == 0) { + fp->ipq_div_info = *divinfo; + fp->ipq_div_cookie = *divert_rule; } + *divinfo = 0; + *divert_rule = 0; #endif /* @@ -1156,6 +1204,14 @@ ip_reass(struct mbuf *m, struct ipqhead *head, struct ipq *fp) mac_destroy_ipq(fp); #endif +#ifdef IPDIVERT + /* + * Extract firewall instructions from the fragment structure. + */ + *divinfo = fp->ipq_div_info; + *divert_rule = fp->ipq_div_cookie; +#endif + /* * Create header for new ip packet by * modifying header of first packet; @@ -1176,6 +1232,10 @@ ip_reass(struct mbuf *m, struct ipqhead *head, struct ipq *fp) return (m); dropfrag: +#ifdef IPDIVERT + *divinfo = 0; + *divert_rule = 0; +#endif ipstat.ips_fragdropped++; if (fp != NULL) fp->ipq_nfrags--; @@ -1721,7 +1781,6 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop) struct in_ifaddr *ia; int error, type = 0, code = 0; struct mbuf *mcopy; - struct m_tag *mtag; n_long dest; struct in_addr pkt_dst; struct ifnet *destifp; @@ -1854,18 +1913,21 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop) RTFREE(rt); } + { + struct m_hdr tag; + if (next_hop) { - mtag = m_tag_get(PACKET_TAG_IPFORWARD, - sizeof(struct sockaddr_in *), M_NOWAIT); - if (mtag == NULL) { - /* XXX statistic */ - m_freem(m); - return; - } - *(struct sockaddr_in **)(mtag+1) = next_hop; - m_tag_prepend(m, mtag); + /* Pass IPFORWARD info if available */ + + tag.mh_type = MT_TAG; + tag.mh_flags = PACKET_TAG_IPFORWARD; + tag.mh_data = (caddr_t)next_hop; + tag.mh_next = m; + tag.mh_nextpkt = NULL; + m = (struct mbuf *)&tag; } error = ip_output(m, (struct mbuf *)0, NULL, IP_FORWARDING, 0, NULL); + } if (error) ipstat.ips_cantforward++; else { diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index 4ab15280bd5c..368d3c550be4 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -90,7 +90,6 @@ static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options"); #endif /*FAST_IPSEC*/ #include -#include #include #define print_ip(x, a, y) printf("%s %d.%d.%d.%d%s",\ @@ -131,11 +130,12 @@ extern struct protosw inetsw[]; * inserted, so must have a NULL opt pointer. */ int -ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, +ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, int flags, struct ip_moptions *imo, struct inpcb *inp) { struct ip *ip; struct ifnet *ifp = NULL; /* keep compiler happy */ + struct mbuf *m; int hlen = sizeof (struct ip); int len, off, error = 0; struct sockaddr_in *dst = NULL; /* keep compiler happy */ @@ -143,13 +143,11 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int isbroadcast, sw_csum; struct in_addr pkt_dst; struct route iproute; - struct m_tag *dummytag; /* dummynet packet tag */ - struct m_tag *mtag; - struct mbuf *m0; /* XXX */ #ifdef IPSEC struct secpolicy *sp = NULL; #endif #ifdef FAST_IPSEC + struct m_tag *mtag; struct secpolicy *sp = NULL; struct tdb_ident *tdbi; int s; @@ -159,7 +157,42 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, args.eh = NULL; args.rule = NULL; - args.next_hop = ip_claim_next_hop(m); + args.next_hop = NULL; + args.divert_rule = 0; /* divert cookie */ + + /* Grab info from MT_TAG mbufs prepended to the chain. */ + for (; m0 && m0->m_type == MT_TAG; m0 = m0->m_next) { + switch(m0->_m_tag_id) { + default: + printf("ip_output: unrecognised MT_TAG tag %d\n", + m0->_m_tag_id); + break; + + case PACKET_TAG_DUMMYNET: + /* + * the packet was already tagged, so part of the + * processing was already done, and we need to go down. + * Get parameters from the header. + */ + args.rule = ((struct dn_pkt *)m0)->rule; + opt = NULL ; + ro = & ( ((struct dn_pkt *)m0)->ro ) ; + imo = NULL ; + dst = ((struct dn_pkt *)m0)->dn_dst ; + ifp = ((struct dn_pkt *)m0)->ifp ; + flags = ((struct dn_pkt *)m0)->flags ; + break; + + case PACKET_TAG_DIVERT: + args.divert_rule = (intptr_t)m0->m_data & 0xffff; + break; + + case PACKET_TAG_IPFORWARD: + args.next_hop = (struct sockaddr_in *)m0->m_data; + break; + } + } + m = m0; M_ASSERTPKTHDR(m); @@ -171,34 +204,7 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, if (inp != NULL) INP_LOCK_ASSERT(inp); - /* - * When packet comes from dummynet restore state from - * previous processing instead of the header. Yech! - * - * XXX add conditional compilation? - */ - dummytag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); - if (dummytag != NULL) { - struct dn_pkt_tag *dt = (struct dn_pkt_tag *)(dummytag+1); - - /* - * NB: the route in the tag is known to have a - * reference that must be free'd, but doing this - * before the storage is reclaimed is painful due - * to some of the contorted code in this routine. - * So instead unlink the tag from the mbuf so it - * doesn't get reclaimed and do the cleanup explicitly - * below. We should be able to do this automatically - * using a uma dtor method when m_tag's can be - * allocated from zones. - */ - m_tag_unlink(m, dummytag); - - args.rule = dt->rule; - ro = &dt->ro; - dst = dt->dn_dst; - ifp = dt->ifp; - + if (args.rule != NULL) { /* dummynet already saw us */ ip = mtod(m, struct ip *); hlen = ip->ip_hl << 2 ; if (ro->ro_rt) @@ -551,7 +557,7 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, dst = (struct sockaddr_in *)state.dst; if (error) { /* mbuf is already reclaimed in ipsec4_output. */ - m = NULL; + m0 = NULL; switch (error) { case EHOSTUNREACH: case ENETUNREACH: @@ -791,13 +797,11 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, } #ifdef IPDIVERT if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) { - struct mbuf *clone; + struct mbuf *clone = NULL; /* Clone packet if we're doing a 'tee' */ if ((off & IP_FW_PORT_TEE_FLAG) != 0) - clone = divert_clone(m); - else - clone = NULL; + clone = m_dup(m, M_DONTWAIT); /* * XXX @@ -814,7 +818,7 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, ip->ip_off = htons(ip->ip_off); /* Deliver packet to divert input routine */ - divert_packet(m, 0); + divert_packet(m, 0, off & 0xffff, args.divert_rule); /* If 'tee', continue with original packet */ if (clone != NULL) { @@ -892,31 +896,26 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, break; } if (ia) { /* tell ip_input "dont filter" */ - mtag = m_tag_get(PACKET_TAG_IPFORWARD, - sizeof(struct sockaddr_in *), - M_NOWAIT); - if (mtag == NULL) { - /* XXX statistic */ - error = ENOBUFS; /* XXX */ - goto bad; - } - *(struct sockaddr_in **)(mtag+1) = - args.next_hop; - m_tag_prepend(m, mtag); + struct m_hdr tag; + + tag.mh_type = MT_TAG; + tag.mh_flags = PACKET_TAG_IPFORWARD; + tag.mh_data = (caddr_t)args.next_hop; + tag.mh_next = m; + tag.mh_nextpkt = NULL; if (m->m_pkthdr.rcvif == NULL) m->m_pkthdr.rcvif = ifunit("lo0"); if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; - m->m_pkthdr.csum_data = 0xffff; + m0->m_pkthdr.csum_data = 0xffff; } m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID; ip->ip_len = htons(ip->ip_len); ip->ip_off = htons(ip->ip_off); - /* XXX netisr_queue(NETISR_IP, m); */ - ip_input(m); + ip_input((struct mbuf *)&tag); goto done; } /* @@ -1073,12 +1072,6 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, RTFREE(ro->ro_rt); ro->ro_rt = NULL; } - if (dummytag) { - struct dn_pkt_tag *dt = (struct dn_pkt_tag *)(dummytag+1); - if (dt->ro.ro_rt) - RTFREE(dt->ro.ro_rt); - m_tag_free(dummytag); - } #ifdef IPSEC if (sp != NULL) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h index 23d1d00b3e08..4bd600d7f6fe 100644 --- a/sys/netinet/ip_var.h +++ b/sys/netinet/ip_var.h @@ -65,6 +65,8 @@ struct ipq { struct mbuf *ipq_frags; /* to ip headers of fragments */ struct in_addr ipq_src,ipq_dst; u_char ipq_nfrags; /* # frags in this packet */ + u_int32_t ipq_div_info; /* ipfw divert port & flags */ + u_int16_t ipq_div_cookie; /* ipfw divert cookie */ struct label *ipq_label; /* MAC label */ }; #endif /* _KERNEL */ @@ -195,23 +197,14 @@ extern int (*ip_rsvp_vif)(struct socket *, struct sockopt *); extern void (*ip_rsvp_force_done)(struct socket *); extern void (*rsvp_input_p)(struct mbuf *m, int off); -#define M_FASTFWD_OURS M_PROTO1 /* sent by ip_fastforward to ip_input */ -/* - * Return the next hop address associated with the mbuf; if any. - * If a tag is present it is also removed. - */ -static __inline struct sockaddr_in * -ip_claim_next_hop(struct mbuf *m) -{ - struct m_tag *mtag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL); - if (mtag) { - struct sockaddr_in *sin = *(struct sockaddr_in **)(mtag+1); - m_tag_delete(m, mtag); - return sin; - } else - return NULL; -} +#ifdef IPDIVERT +void div_init(void); +void div_input(struct mbuf *, int); +void div_ctlinput(int, struct sockaddr *, void *); +void divert_packet(struct mbuf *m, int incoming, int port, int rule); +extern struct pr_usrreqs div_usrreqs; +#endif #ifdef PFIL_HOOKS extern struct pfil_head inet_pfil_hook; diff --git a/sys/netinet/tcp_debug.c b/sys/netinet/tcp_debug.c index 421ecbeedeef..89e9d7cbc94a 100644 --- a/sys/netinet/tcp_debug.c +++ b/sys/netinet/tcp_debug.c @@ -54,7 +54,6 @@ #include #include #include -#include #include #include diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 01033ffb1f7c..39d2dc199d24 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -360,7 +360,7 @@ tcp_input(m, off0) struct tcpopt to; /* options in this segment */ struct rmxp_tao tao; /* our TAO cache entry */ int headlocked = 0; - struct sockaddr_in *next_hop; + struct sockaddr_in *next_hop = NULL; int rstreason; /* For badport_bandlim accounting purposes */ struct ip6_hdr *ip6 = NULL; @@ -380,7 +380,11 @@ tcp_input(m, off0) short ostate = 0; #endif - next_hop = ip_claim_next_hop(m); + /* Grab info from MT_TAG mbufs prepended to the chain. */ + for (;m && m->m_type == MT_TAG; m = m->m_next) { + if (m->_m_tag_id == PACKET_TAG_IPFORWARD) + next_hop = (struct sockaddr_in *)m->m_hdr.mh_data; + } #ifdef INET6 isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0; #endif diff --git a/sys/netinet/tcp_reass.c b/sys/netinet/tcp_reass.c index 01033ffb1f7c..39d2dc199d24 100644 --- a/sys/netinet/tcp_reass.c +++ b/sys/netinet/tcp_reass.c @@ -360,7 +360,7 @@ tcp_input(m, off0) struct tcpopt to; /* options in this segment */ struct rmxp_tao tao; /* our TAO cache entry */ int headlocked = 0; - struct sockaddr_in *next_hop; + struct sockaddr_in *next_hop = NULL; int rstreason; /* For badport_bandlim accounting purposes */ struct ip6_hdr *ip6 = NULL; @@ -380,7 +380,11 @@ tcp_input(m, off0) short ostate = 0; #endif - next_hop = ip_claim_next_hop(m); + /* Grab info from MT_TAG mbufs prepended to the chain. */ + for (;m && m->m_type == MT_TAG; m = m->m_next) { + if (m->_m_tag_id == PACKET_TAG_IPFORWARD) + next_hop = (struct sockaddr_in *)m->m_hdr.mh_data; + } #ifdef INET6 isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0; #endif diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h index fa2cc40bcf2e..6ce9922a03c9 100644 --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -221,8 +221,8 @@ struct mbuf { #if 0 #define MT_RIGHTS 12 /* access rights */ #define MT_IFADDR 13 /* interface address */ -#define MT_TAG 13 /* deprecated: use m_tag's instead */ #endif +#define MT_TAG 13 /* volatile metadata associated to pkts */ #define MT_CONTROL 14 /* extra-data protocol message */ #define MT_OOBDATA 15 /* expedited data */ #define MT_NTYPES 16 /* number of mbuf types for mbtypes[] */ @@ -549,11 +549,28 @@ struct mbuf * #define PACKET_TAG_IPSEC_SOCKET 12 /* IPSEC socket ref */ #define PACKET_TAG_IPSEC_HISTORY 13 /* IPSEC history */ #define PACKET_TAG_IPV6_INPUT 14 /* IPV6 input processing */ + +/* + * As a temporary and low impact solution to replace the even uglier + * approach used so far in some parts of the network stack (which relies + * on global variables), packet tag-like annotations are stored in MT_TAG + * mbufs (or lookalikes) prepended to the actual mbuf chain. + * + * m_type = MT_TAG + * m_flags = m_tag_id + * m_next = next buffer in chain. + * + * BE VERY CAREFUL not to pass these blocks to the mbuf handling routines. + */ +#define _m_tag_id m_hdr.mh_flags + +/* Packet tags used in the FreeBSD network stack. */ #define PACKET_TAG_DUMMYNET 15 /* dummynet info */ #define PACKET_TAG_IPFW 16 /* ipfw classification */ #define PACKET_TAG_DIVERT 17 /* divert info */ #define PACKET_TAG_IPFORWARD 18 /* ipforward info */ #define PACKET_TAG_MACLABEL (19 | MTAG_PERSISTENT) /* MAC label */ +#define PACKET_TAG_IPFASTFWD_OURS 20 /* IP fastforward dropback */ /* Packet tag routines. */ struct m_tag *m_tag_alloc(u_int32_t, int, int, int);