freebsd-skq/sys/net/bridge.c

1165 lines
32 KiB
C
Raw Normal View History

1998-09-12 22:07:47 +00:00
/*
* Copyright (c) 1998-2002 Luigi Rizzo
*
* Work partly supported by: Cisco Systems, Inc. - NSITE lab, RTP, NC
1998-09-12 22:07:47 +00:00
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
1998-09-12 22:07:47 +00:00
*/
/*
* This code implements bridging in FreeBSD. It only acts on ethernet
* interfaces, including VLANs (others are still usable for routing).
* A FreeBSD host can implement multiple logical bridges, called
* "clusters". Each cluster is made of a set of interfaces, and
* identified by a "cluster-id" which is a number in the range 1..2^16-1.
*
* Bridging is enabled by the sysctl variable
* net.link.ether.bridge
* the grouping of interfaces into clusters is done with
* net.link.ether.bridge_cfg
* containing a list of interfaces each optionally followed by
* a colon and the cluster it belongs to (1 is the default).
* Separators can be * spaces, commas or tabs, e.g.
* net.link.ether.bridge_cfg="fxp0:2 fxp1:2 dc0 dc1:1"
* Optionally bridged packets can be passed through the firewall,
* this is controlled by the variable
* net.link.ether.bridge_ipfw
*
* For each cluster there is a descriptor (cluster_softc) storing
* the following data structures:
* - a hash table with the MAC address and destination interface for each
* known node. The table is indexed using a hash of the source address.
* - an array with the MAC addresses of the interfaces used in the cluster.
1998-09-12 22:07:47 +00:00
*
* Input packets are tapped near the beginning of ether_input(), and
* analysed by bridge_in(). Depending on the result, the packet
1998-09-12 22:07:47 +00:00
* can be forwarded to one or more output interfaces using bdg_forward(),
* and/or sent to the upper layer (e.g. in case of multicast).
*
* Output packets are intercepted near the end of ether_output().
* The correct destination is selected by bridge_dst_lookup(),
* and then forwarding is done by bdg_forward().
1998-09-12 22:07:47 +00:00
*
* The arp code is also modified to let a machine answer to requests
* irrespective of the port the request came from.
*
* In case of loops in the bridging topology, the bridge detects this
* event and temporarily mutes output bridging on one of the ports.
* Periodically, interfaces are unmuted by bdg_timeout().
* Muting is only implemented as a safety measure, and also as
1998-09-12 22:07:47 +00:00
* a mechanism to support a user-space implementation of the spanning
* tree algorithm.
1998-09-12 22:07:47 +00:00
*
* To build a bridging kernel, use the following option
* option BRIDGE
* and then at runtime set the sysctl variable to enable bridging.
*
* Only one interface per cluster is supposed to have addresses set (but
* there are no substantial problems if you set addresses for none or
* for more than one interface).
1998-09-12 22:07:47 +00:00
* Bridging will act before routing, but nothing prevents a machine
* from doing both (modulo bugs in the implementation...).
*
* THINGS TO REMEMBER
* - bridging is incompatible with multicast routing on the same
* machine. There is not an easy fix to this.
* - be very careful when bridging VLANs
1998-09-12 22:07:47 +00:00
* - loop detection is still not very robust.
*/
#include <sys/param.h>
#include <sys/mbuf.h>
#include <sys/malloc.h>
#include <sys/protosw.h>
1998-09-12 22:07:47 +00:00
#include <sys/systm.h>
#include <sys/socket.h> /* for net/if.h */
#include <sys/ctype.h> /* string functions */
1998-09-12 22:07:47 +00:00
#include <sys/kernel.h>
#include <sys/sysctl.h>
#include <net/pfil.h> /* for ipfilter */
1998-09-12 22:07:47 +00:00
#include <net/if.h>
#include <net/if_types.h>
#include <net/if_var.h>
1998-09-12 22:07:47 +00:00
#include <netinet/in.h> /* for struct arpcom */
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
1998-09-12 22:07:47 +00:00
#include <netinet/if_ether.h> /* for struct arpcom */
#include <net/route.h>
#include <netinet/ip_fw.h>
1998-09-12 22:07:47 +00:00
#include <netinet/ip_dummynet.h>
#include <net/bridge.h>
/*--------------------*/
/*
* For each cluster, source MAC addresses are stored into a hash
* table which locates the port they reside on.
*/
#define HASH_SIZE 8192 /* Table size, must be a power of 2 */
typedef struct hash_table { /* each entry. */
struct ifnet * name;
u_char etheraddr[6];
u_int16_t used; /* also, padding */
} bdg_hash_table ;
/*
* The hash function applied to MAC addresses. Out of the 6 bytes,
* the last ones tend to vary more. Since we are on a little endian machine,
* we have to do some gimmick...
*/
#define HASH_FN(addr) ( \
ntohs( ((u_int16_t *)addr)[1] ^ ((u_int16_t *)addr)[2] ) & (HASH_SIZE -1))
/*
* This is the data structure where local addresses are stored.
*/
struct bdg_addr {
u_char etheraddr[6] ;
u_int16_t _padding ;
};
/*
* The configuration of each cluster includes the cluster id, a pointer to
* the hash table, and an array of local MAC addresses (of size "ports").
*/
struct cluster_softc {
u_int16_t cluster_id;
u_int16_t ports;
bdg_hash_table *ht;
struct bdg_addr *my_macs; /* local MAC addresses */
};
extern struct protosw inetsw[]; /* from netinet/ip_input.c */
extern u_char ip_protox[]; /* from netinet/ip_input.c */
static int n_clusters; /* number of clusters */
static struct cluster_softc *clusters;
#define BDG_MUTED(ifp) (ifp2sc[ifp->if_index].flags & IFF_MUTE)
#define BDG_MUTE(ifp) ifp2sc[ifp->if_index].flags |= IFF_MUTE
#define BDG_CLUSTER(ifp) (ifp2sc[ifp->if_index].cluster)
#define BDG_SAMECLUSTER(ifp,src) \
(src == NULL || BDG_CLUSTER(ifp) == BDG_CLUSTER(src) )
#ifdef __i386__
#define BDG_MATCH(a,b) ( \
((u_int16_t *)(a))[2] == ((u_int16_t *)(b))[2] && \
*((u_int32_t *)(a)) == *((u_int32_t *)(b)) )
#define IS_ETHER_BROADCAST(a) ( \
*((u_int32_t *)(a)) == 0xffffffff && \
((u_int16_t *)(a))[2] == 0xffff )
#else
/* for machines that do not support unaligned access */
#define BDG_MATCH(a,b) (!bcmp(a, b, ETHER_ADDR_LEN) )
#define IS_ETHER_BROADCAST(a) (!bcmp(a, "\377\377\377\377\377\377", 6))
#endif
1998-09-12 22:07:47 +00:00
/*
* For timing-related debugging, you can use the following macros.
1998-09-12 22:07:47 +00:00
* remember, rdtsc() only works on Pentium-class machines
quad_t ticks;
DDB(ticks = rdtsc();)
... interesting code ...
DDB(bdg_fw_ticks += (u_long)(rdtsc() - ticks) ; bdg_fw_count++ ;)
*
*/
#define DDB(x) x
#define DEB(x)
static int bdginit(void);
static void parse_bdg_cfg(void);
1998-09-12 22:07:47 +00:00
static int bdg_ipf; /* IPFilter enabled in bridge */
static int bdg_ipfw;
#if 0 /* debugging only */
static char *bdg_dst_names[] = {
"BDG_NULL ",
"BDG_BCAST ",
"BDG_MCAST ",
"BDG_LOCAL ",
"BDG_DROP ",
"BDG_UNKNOWN ",
"BDG_IN ",
"BDG_OUT ",
"BDG_FORWARD " };
#endif
1998-09-12 22:07:47 +00:00
/*
* System initialization
*/
static struct bdg_stats bdg_stats ;
static struct callout_handle bdg_timeout_h ;
/*
* Add an interface to a cluster, possibly creating a new entry in
* the cluster table. This requires reallocation of the table and
* updating pointers in ifp2sc.
*/
static struct cluster_softc *
add_cluster(u_int16_t cluster_id, struct arpcom *ac)
{
struct cluster_softc *c = NULL;
int i;
for (i = 0; i < n_clusters ; i++)
if (clusters[i].cluster_id == cluster_id)
goto found;
/* Not found, need to reallocate */
c = malloc((1+n_clusters) * sizeof (*c), M_IFADDR, M_NOWAIT | M_ZERO);
if (c == NULL) {/* malloc failure */
printf("-- bridge: cannot add new cluster\n");
return NULL;
}
c[n_clusters].ht = (struct hash_table *)
malloc(HASH_SIZE * sizeof(struct hash_table),
M_IFADDR, M_ZERO);
if (c[n_clusters].ht == NULL) {
printf("-- bridge: cannot allocate hash table for new cluster\n");
free(c, M_IFADDR);
return NULL;
}
c[n_clusters].my_macs = (struct bdg_addr *)
malloc(BDG_MAX_PORTS * sizeof(struct bdg_addr),
M_IFADDR, M_ZERO);
if (c[n_clusters].my_macs == NULL) {
printf("-- bridge: cannot allocate mac addr table for new cluster\n");
free(c[n_clusters].ht, M_IFADDR);
free(c, M_IFADDR);
return NULL;
}
c[n_clusters].cluster_id = cluster_id;
c[n_clusters].ports = 0;
/*
* now copy old descriptors here
*/
if (n_clusters > 0) {
for (i=0; i < n_clusters; i++)
c[i] = clusters[i];
/*
* and finally update pointers in ifp2sc
*/
for (i = 0 ; i < if_index && i < BDG_MAX_PORTS; i++)
if (ifp2sc[i].cluster != NULL)
ifp2sc[i].cluster = c + (ifp2sc[i].cluster - clusters);
free(clusters, M_IFADDR);
}
clusters = c;
i = n_clusters; /* index of cluster entry */
n_clusters++;
found:
c = clusters + i; /* the right cluster ... */
bcopy(ac->ac_enaddr, &(c->my_macs[c->ports]), 6);
c->ports++;
return c;
}
/*
* Turn off bridging, by clearing promisc mode on the interface,
* marking the interface as unused, and clearing the name in the
* stats entry.
* Also dispose the hash tables associated with the clusters.
*/
static void
bridge_off(void)
{
struct ifnet *ifp ;
int i, s;
DEB(printf("bridge_off: n_clusters %d\n", n_clusters);)
2002-12-22 05:35:03 +00:00
IFNET_RLOCK();
TAILQ_FOREACH(ifp, &ifnet, if_link) {
struct bdg_softc *b;
if (ifp->if_index >= BDG_MAX_PORTS)
continue; /* make sure we do not go beyond the end */
b = &(ifp2sc[ifp->if_index]);
if ( b->flags & IFF_BDG_PROMISC ) {
s = splimp();
ifpromisc(ifp, 0);
splx(s);
b->flags &= ~(IFF_BDG_PROMISC|IFF_MUTE) ;
DEB(printf(">> now %s%d promisc OFF if_flags 0x%x bdg_flags 0x%x\n",
ifp->if_name, ifp->if_unit,
ifp->if_flags, b->flags);)
2001-02-02 00:19:25 +00:00
}
b->flags &= ~(IFF_USED) ;
b->cluster = NULL;
bdg_stats.s[ifp->if_index].name[0] = '\0';
}
2002-12-22 05:35:03 +00:00
IFNET_RUNLOCK();
/* flush_tables */
s = splimp();
for (i=0; i < n_clusters; i++) {
free(clusters[i].ht, M_IFADDR);
free(clusters[i].my_macs, M_IFADDR);
}
if (clusters != NULL)
free(clusters, M_IFADDR);
clusters = NULL;
n_clusters =0;
splx(s);
}
/*
* set promisc mode on the interfaces we use.
*/
static void
bridge_on(void)
{
struct ifnet *ifp ;
int s ;
2002-12-22 05:35:03 +00:00
IFNET_RLOCK();
TAILQ_FOREACH(ifp, &ifnet, if_link) {
struct bdg_softc *b = &ifp2sc[ifp->if_index];
if ( !(b->flags & IFF_USED) )
continue ;
if ( !( ifp->if_flags & IFF_UP) ) {
s = splimp();
if_up(ifp);
splx(s);
}
if ( !(b->flags & IFF_BDG_PROMISC) ) {
int ret ;
s = splimp();
ret = ifpromisc(ifp, 1);
splx(s);
b->flags |= IFF_BDG_PROMISC ;
DEB(printf(">> now %s%d promisc ON if_flags 0x%x bdg_flags 0x%x\n",
ifp->if_name, ifp->if_unit,
ifp->if_flags, b->flags);)
}
if (b->flags & IFF_MUTE) {
DEB(printf(">> unmuting %s%d\n", ifp->if_name, ifp->if_unit);)
b->flags &= ~IFF_MUTE;
}
}
2002-12-22 05:35:03 +00:00
IFNET_RUNLOCK();
}
1998-09-12 22:07:47 +00:00
/**
* reconfigure bridge.
* This is also done every time we attach or detach an interface.
* Main use is to make sure that we do not bridge on some old
* (ejected) device. So, it would be really useful to have a
* pointer to the modified device as an argument. Without it, we
* have to scan all interfaces.
*/
static void
reconfigure_bridge(void)
1998-09-12 22:07:47 +00:00
{
bridge_off();
if (do_bridge) {
if (if_index >= BDG_MAX_PORTS) {
printf("-- sorry too many interfaces (%d, max is %d),"
" disabling bridging\n", if_index, BDG_MAX_PORTS);
do_bridge=0;
return;
2001-02-02 00:19:25 +00:00
}
parse_bdg_cfg();
bridge_on();
}
}
static char bridge_cfg[1024]; /* in BSS so initialized to all NULs */
/*
* parse the config string, set IFF_USED, name and cluster_id
* for all interfaces found.
* The config string is a list of "if[:cluster]" with
* a number of possible separators (see "sep"). In particular the
* use of the space lets you set bridge_cfg with the output from
* "ifconfig -l"
*/
static void
parse_bdg_cfg()
{
char *p, *beg ;
int l, cluster;
static char *sep = ", \t";
for (p = bridge_cfg; *p ; p++) {
struct ifnet *ifp;
int found = 0;
char c;
if (index(sep, *p)) /* skip separators */
continue ;
/* names are lowercase and digits */
for ( beg = p ; islower(*p) || isdigit(*p) ; p++ )
;
l = p - beg ; /* length of name string */
if (l == 0) /* invalid name */
break ;
if ( *p != ':' ) /* no ':', assume default cluster 1 */
cluster = 1 ;
else /* fetch cluster */
cluster = strtoul( p+1, &p, 10);
c = *p;
*p = '\0';
/*
* now search in interface list for a matching name
*/
2002-12-22 05:35:03 +00:00
IFNET_RLOCK(); /* could sleep XXX */
TAILQ_FOREACH(ifp, &ifnet, if_link) {
char buf[IFNAMSIZ];
snprintf(buf, sizeof(buf), "%s%d", ifp->if_name, ifp->if_unit);
if (!strncmp(beg, buf, max(l, strlen(buf)))) {
struct bdg_softc *b = &ifp2sc[ifp->if_index];
if (ifp->if_type != IFT_ETHER && ifp->if_type != IFT_L2VLAN) {
printf("%s is not an ethernet, continue\n", buf);
continue;
}
if (b->flags & IFF_USED) {
printf("%s already used, skipping\n", buf);
break;
}
b->cluster = add_cluster(htons(cluster), (struct arpcom *)ifp);
b->flags |= IFF_USED ;
sprintf(bdg_stats.s[ifp->if_index].name,
"%s%d:%d", ifp->if_name, ifp->if_unit, cluster);
DEB(printf("--++ found %s next c %d\n",
bdg_stats.s[ifp->if_index].name, c);)
found = 1;
break ;
}
}
2002-12-22 05:35:03 +00:00
IFNET_RUNLOCK();
if (!found)
printf("interface %s Not found in bridge\n", beg);
*p = c;
if (c == '\0')
break; /* no more */
}
}
/*
* handler for net.link.ether.bridge
*/
static int
sysctl_bdg(SYSCTL_HANDLER_ARGS)
{
int error, oldval = do_bridge ;
error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
DEB( printf("called sysctl for bridge name %s arg2 %d val %d->%d\n",
oidp->oid_name, oidp->oid_arg2,
oldval, do_bridge); )
if (oldval != do_bridge)
reconfigure_bridge();
return error ;
}
/*
* handler for net.link.ether.bridge_cfg
*/
static int
sysctl_bdg_cfg(SYSCTL_HANDLER_ARGS)
{
int error = 0 ;
char old_cfg[1024] ;
strcpy(old_cfg, bridge_cfg) ;
error = sysctl_handle_string(oidp, bridge_cfg, oidp->oid_arg2, req);
DEB(
printf("called sysctl for bridge name %s arg2 %d err %d val %s->%s\n",
oidp->oid_name, oidp->oid_arg2,
error,
old_cfg, bridge_cfg);
)
if (strcmp(old_cfg, bridge_cfg))
reconfigure_bridge();
1998-09-12 22:07:47 +00:00
return error ;
}
static int
sysctl_refresh(SYSCTL_HANDLER_ARGS)
{
if (req->newptr)
reconfigure_bridge();
return 0;
}
SYSCTL_DECL(_net_link_ether);
SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge_cfg, CTLTYPE_STRING|CTLFLAG_RW,
&bridge_cfg, sizeof(bridge_cfg), &sysctl_bdg_cfg, "A",
"Bridge configuration");
1998-09-12 22:07:47 +00:00
SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge, CTLTYPE_INT|CTLFLAG_RW,
&do_bridge, 0, &sysctl_bdg, "I", "Bridging");
SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw, CTLFLAG_RW,
&bdg_ipfw,0,"Pass bridged pkts through firewall");
SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipf, CTLFLAG_RW,
&bdg_ipf, 0,"Pass bridged pkts through IPFilter");
/*
* The follow macro declares a variable, and maps it to
* a SYSCTL_INT entry with the same name.
*/
#define SY(parent, var, comment) \
static int var ; \
SYSCTL_INT(parent, OID_AUTO, var, CTLFLAG_RW, &(var), 0, comment);
int bdg_ipfw_drops;
SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw_drop,
CTLFLAG_RW, &bdg_ipfw_drops,0,"");
int bdg_ipfw_colls;
SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw_collisions,
CTLFLAG_RW, &bdg_ipfw_colls,0,"");
1998-09-12 22:07:47 +00:00
SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge_refresh, CTLTYPE_INT|CTLFLAG_WR,
NULL, 0, &sysctl_refresh, "I", "iface refresh");
#if 1 /* diagnostic vars */
2001-02-02 00:19:25 +00:00
SY(_net_link_ether, verbose, "Be verbose");
SY(_net_link_ether, bdg_split_pkts, "Packets split in bdg_forward");
SY(_net_link_ether, bdg_thru, "Packets through bridge");
SY(_net_link_ether, bdg_copied, "Packets copied in bdg_forward");
SY(_net_link_ether, bdg_dropped, "Packets dropped in bdg_forward");
SY(_net_link_ether, bdg_copy, "Force copy in bdg_forward");
SY(_net_link_ether, bdg_predict, "Correctly predicted header location");
SY(_net_link_ether, bdg_fw_avg, "Cycle counter avg");
SY(_net_link_ether, bdg_fw_ticks, "Cycle counter item");
SY(_net_link_ether, bdg_fw_count, "Cycle counter count");
#endif
1998-09-12 22:07:47 +00:00
SYSCTL_STRUCT(_net_link_ether, PF_BDG, bdgstats,
CTLFLAG_RD, &bdg_stats , bdg_stats, "bridge statistics");
1998-09-12 22:07:47 +00:00
static int bdg_loops ;
/*
* called periodically to flush entries etc.
*/
static void
bdg_timeout(void *dummy)
{
static int slowtimer; /* in BSS so initialized to 0 */
1998-09-12 22:07:47 +00:00
if (do_bridge) {
static int age_index = 0 ; /* index of table position to age */
int l = age_index + HASH_SIZE/4 ;
int i;
1998-09-12 22:07:47 +00:00
/*
* age entries in the forwarding table.
*/
if (l > HASH_SIZE)
l = HASH_SIZE ;
for (i=0; i<n_clusters; i++) {
bdg_hash_table *bdg_table = clusters[i].ht;
for (; age_index < l ; age_index++)
if (bdg_table[age_index].used)
bdg_table[age_index].used = 0 ;
else if (bdg_table[age_index].name) {
/* printf("xx flushing stale entry %d\n", age_index); */
bdg_table[age_index].name = NULL ;
}
}
1998-09-12 22:07:47 +00:00
if (age_index >= HASH_SIZE)
age_index = 0 ;
if (--slowtimer <= 0 ) {
slowtimer = 5 ;
bridge_on() ; /* we just need unmute, really */
1998-09-12 22:07:47 +00:00
bdg_loops = 0 ;
}
}
bdg_timeout_h = timeout(bdg_timeout, NULL, 2*hz );
1998-09-12 22:07:47 +00:00
}
/*
* Find the right pkt destination:
* BDG_BCAST is a broadcast
* BDG_MCAST is a multicast
* BDG_LOCAL is for a local address
* BDG_DROP must be dropped
* other ifp of the dest. interface (incl.self)
*
* We assume this is only called for interfaces for which bridging
* is enabled, i.e. BDG_USED(ifp) is true.
*/
static __inline
struct ifnet *
bridge_dst_lookup(struct ether_header *eh, struct cluster_softc *c)
{
struct ifnet *dst ;
int index ;
struct bdg_addr *p ;
bdg_hash_table *bt; /* pointer to entry in hash table */
if (IS_ETHER_BROADCAST(eh->ether_dhost))
return BDG_BCAST ;
if (eh->ether_dhost[0] & 1)
return BDG_MCAST ;
/*
* Lookup local addresses in case one matches.
*/
for (index = c->ports, p = c->my_macs; index ; index--, p++ )
if (BDG_MATCH(p->etheraddr, eh->ether_dhost) )
return BDG_LOCAL ;
/*
* Look for a possible destination in table
*/
index= HASH_FN( eh->ether_dhost );
bt = &(c->ht[index]);
dst = bt->name;
if ( dst && BDG_MATCH( bt->etheraddr, eh->ether_dhost) )
return dst ;
else
return BDG_UNKNOWN ;
1998-09-12 22:07:47 +00:00
}
/**
1998-09-12 22:07:47 +00:00
* bridge_in() is invoked to perform bridging decision on input packets.
*
1998-09-12 22:07:47 +00:00
* On Input:
* eh Ethernet header of the incoming packet.
* ifp interface the packet is coming from.
1998-09-12 22:07:47 +00:00
*
* On Return: destination of packet, one of
* BDG_BCAST broadcast
* BDG_MCAST multicast
* BDG_LOCAL is only for a local address (do not forward)
* BDG_DROP drop the packet
* ifp ifp of the destination interface.
*
* Forwarding is not done directly to give a chance to some drivers
* to fetch more of the packet, or simply drop it completely.
*/
static struct ifnet *
bridge_in(struct ifnet *ifp, struct ether_header *eh)
1998-09-12 22:07:47 +00:00
{
int index;
struct ifnet *dst , *old ;
bdg_hash_table *bt; /* location in hash table */
2001-02-02 00:19:25 +00:00
int dropit = BDG_MUTED(ifp) ;
1998-09-12 22:07:47 +00:00
/*
* hash the source address
*/
index= HASH_FN(eh->ether_shost);
bt = &(ifp2sc[ifp->if_index].cluster->ht[index]);
bt->used = 1 ;
old = bt->name ;
1998-09-12 22:07:47 +00:00
if ( old ) { /* the entry is valid. */
if (!BDG_MATCH( eh->ether_shost, bt->etheraddr) ) {
bdg_ipfw_colls++ ;
bt->name = NULL ;
} else if (old != ifp) {
1998-09-12 22:07:47 +00:00
/*
* Found a loop. Either a machine has moved, or there
1998-09-12 22:07:47 +00:00
* is a misconfiguration/reconfiguration of the network.
* First, do not forward this packet!
* Record the relocation anyways; then, if loops persist,
* suspect a reconfiguration and disable forwarding
* from the old interface.
*/
bt->name = ifp ; /* relocate address */
1998-09-12 22:07:47 +00:00
printf("-- loop (%d) %6D to %s%d from %s%d (%s)\n",
bdg_loops, eh->ether_shost, ".",
ifp->if_name, ifp->if_unit,
old->if_name, old->if_unit,
2001-02-02 00:19:25 +00:00
BDG_MUTED(old) ? "muted":"active");
1998-09-12 22:07:47 +00:00
dropit = 1 ;
2001-02-02 00:19:25 +00:00
if ( !BDG_MUTED(old) ) {
1998-09-12 22:07:47 +00:00
if (++bdg_loops > 10)
2001-02-02 00:19:25 +00:00
BDG_MUTE(old) ;
1998-09-12 22:07:47 +00:00
}
}
1998-09-12 22:07:47 +00:00
}
/*
* now write the source address into the table
*/
if (bt->name == NULL) {
1998-09-12 22:07:47 +00:00
DEB(printf("new addr %6D at %d for %s%d\n",
eh->ether_shost, ".", index, ifp->if_name, ifp->if_unit);)
bcopy(eh->ether_shost, bt->etheraddr, 6);
bt->name = ifp ;
1998-09-12 22:07:47 +00:00
}
dst = bridge_dst_lookup(eh, ifp2sc[ifp->if_index].cluster);
/*
* bridge_dst_lookup can return the following values:
1998-09-12 22:07:47 +00:00
* BDG_BCAST, BDG_MCAST, BDG_LOCAL, BDG_UNKNOWN, BDG_DROP, ifp.
* For muted interfaces, or when we detect a loop, the first 3 are
* changed in BDG_LOCAL (we still listen to incoming traffic),
* and others to BDG_DROP (no use for the local host).
* Also, for incoming packets, ifp is changed to BDG_DROP if ifp == src.
* These changes are not necessary for outgoing packets from ether_output().
1998-09-12 22:07:47 +00:00
*/
BDG_STAT(ifp, BDG_IN);
switch ((uintptr_t)dst) {
case (uintptr_t)BDG_BCAST:
case (uintptr_t)BDG_MCAST:
case (uintptr_t)BDG_LOCAL:
case (uintptr_t)BDG_UNKNOWN:
case (uintptr_t)BDG_DROP:
1998-09-12 22:07:47 +00:00
BDG_STAT(ifp, dst);
break ;
default :
if (dst == ifp || dropit)
1998-09-12 22:07:47 +00:00
BDG_STAT(ifp, BDG_DROP);
else
BDG_STAT(ifp, BDG_FORWARD);
break ;
}
if ( dropit ) {
if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_LOCAL)
dst = BDG_LOCAL ;
1998-09-12 22:07:47 +00:00
else
dst = BDG_DROP ;
1998-09-12 22:07:47 +00:00
} else {
if (dst == ifp)
dst = BDG_DROP;
1998-09-12 22:07:47 +00:00
}
DEB(printf("bridge_in %6D ->%6D ty 0x%04x dst %s%d\n",
eh->ether_shost, ".",
eh->ether_dhost, ".",
ntohs(eh->ether_type),
(dst <= BDG_FORWARD) ? bdg_dst_names[(int)dst] :
dst->if_name,
(dst <= BDG_FORWARD) ? 0 : dst->if_unit); )
return dst ;
1998-09-12 22:07:47 +00:00
}
/*
* Forward a packet to dst -- which can be a single interface or
* an entire cluster. The src port and muted interfaces are excluded.
*
* If src == NULL, the pkt comes from ether_output, and dst is the real
* interface the packet is originally sent to. In this case, we must forward
* it to the whole cluster.
* We never call bdg_forward from ether_output on interfaces which are
* not part of a cluster.
*
* If possible (i.e. we can determine that the caller does not need
* a copy), the packet is consumed here, and bdg_forward returns NULL.
* Otherwise, a pointer to a copy of the packet is returned.
1998-09-12 22:07:47 +00:00
*/
static struct mbuf *
bdg_forward(struct mbuf *m0, struct ifnet *dst)
1998-09-12 22:07:47 +00:00
{
#define EH_RESTORE(_m) do { \
M_PREPEND((_m), ETHER_HDR_LEN, M_NOWAIT); \
if ((_m) == NULL) { \
bdg_dropped++; \
return NULL; \
} \
if (eh != mtod((_m), struct ether_header *)) \
bcopy(&save_eh, mtod((_m), struct ether_header *), ETHER_HDR_LEN); \
else \
bdg_predict++; \
} while (0);
struct ether_header *eh;
Remove (almost all) global variables that were used to hold packet forwarding state ("annotations") during ip processing. The code is considerably cleaner now. The variables removed by this change are: ip_divert_cookie used by divert sockets ip_fw_fwd_addr used for transparent ip redirection last_pkt used by dynamic pipes in dummynet Removal of the first two has been done by carrying the annotations into volatile structs prepended to the mbuf chains, and adding appropriate code to add/remove annotations in the routines which make use of them, i.e. ip_input(), ip_output(), tcp_input(), bdg_forward(), ether_demux(), ether_output_frame(), div_output(). On passing, remove a bug in divert handling of fragmented packet. Now it is the fragment at offset 0 which sets the divert status of the whole packet, whereas formerly it was the last incoming fragment to decide. Removal of last_pkt required a change in the interface of ip_fw_chk() and dummynet_io(). On passing, use the same mechanism for dummynet annotations and for divert/forward annotations. option IPFIREWALL_FORWARD is effectively useless, the code to implement it is very small and is now in by default to avoid the obfuscation of conditionally compiled code. NOTES: * there is at least one global variable left, sro_fwd, in ip_output(). I am not sure if/how this can be removed. * I have deliberately avoided gratuitous style changes in this commit to avoid cluttering the diffs. Minor stule cleanup will likely be necessary * this commit only focused on the IP layer. I am sure there is a number of global variables used in the TCP and maybe UDP stack. * despite the number of files touched, there are absolutely no API's or data structures changed by this commit (except the interfaces of ip_fw_chk() and dummynet_io(), which are internal anyways), so an MFC is quite safe and unintrusive (and desirable, given the improved readability of the code). MFC after: 10 days
2002-06-22 11:51:02 +00:00
struct ifnet *src;
struct ifnet *ifp, *last;
2001-02-02 00:19:25 +00:00
int shared = bdg_copy ; /* someone else is using the mbuf */
int once = 0; /* loop only once */
struct ifnet *real_dst = dst ; /* real dst from ether_output */
Remove (almost all) global variables that were used to hold packet forwarding state ("annotations") during ip processing. The code is considerably cleaner now. The variables removed by this change are: ip_divert_cookie used by divert sockets ip_fw_fwd_addr used for transparent ip redirection last_pkt used by dynamic pipes in dummynet Removal of the first two has been done by carrying the annotations into volatile structs prepended to the mbuf chains, and adding appropriate code to add/remove annotations in the routines which make use of them, i.e. ip_input(), ip_output(), tcp_input(), bdg_forward(), ether_demux(), ether_output_frame(), div_output(). On passing, remove a bug in divert handling of fragmented packet. Now it is the fragment at offset 0 which sets the divert status of the whole packet, whereas formerly it was the last incoming fragment to decide. Removal of last_pkt required a change in the interface of ip_fw_chk() and dummynet_io(). On passing, use the same mechanism for dummynet annotations and for divert/forward annotations. option IPFIREWALL_FORWARD is effectively useless, the code to implement it is very small and is now in by default to avoid the obfuscation of conditionally compiled code. NOTES: * there is at least one global variable left, sro_fwd, in ip_output(). I am not sure if/how this can be removed. * I have deliberately avoided gratuitous style changes in this commit to avoid cluttering the diffs. Minor stule cleanup will likely be necessary * this commit only focused on the IP layer. I am sure there is a number of global variables used in the TCP and maybe UDP stack. * despite the number of files touched, there are absolutely no API's or data structures changed by this commit (except the interfaces of ip_fw_chk() and dummynet_io(), which are internal anyways), so an MFC is quite safe and unintrusive (and desirable, given the improved readability of the code). MFC after: 10 days
2002-06-22 11:51:02 +00:00
struct ip_fw_args args;
#ifdef PFIL_HOOKS
struct packet_filter_hook *pfh;
int rv;
#endif /* PFIL_HOOKS */
struct ether_header save_eh;
DEB(quad_t ticks; ticks = rdtsc();)
Remove (almost all) global variables that were used to hold packet forwarding state ("annotations") during ip processing. The code is considerably cleaner now. The variables removed by this change are: ip_divert_cookie used by divert sockets ip_fw_fwd_addr used for transparent ip redirection last_pkt used by dynamic pipes in dummynet Removal of the first two has been done by carrying the annotations into volatile structs prepended to the mbuf chains, and adding appropriate code to add/remove annotations in the routines which make use of them, i.e. ip_input(), ip_output(), tcp_input(), bdg_forward(), ether_demux(), ether_output_frame(), div_output(). On passing, remove a bug in divert handling of fragmented packet. Now it is the fragment at offset 0 which sets the divert status of the whole packet, whereas formerly it was the last incoming fragment to decide. Removal of last_pkt required a change in the interface of ip_fw_chk() and dummynet_io(). On passing, use the same mechanism for dummynet annotations and for divert/forward annotations. option IPFIREWALL_FORWARD is effectively useless, the code to implement it is very small and is now in by default to avoid the obfuscation of conditionally compiled code. NOTES: * there is at least one global variable left, sro_fwd, in ip_output(). I am not sure if/how this can be removed. * I have deliberately avoided gratuitous style changes in this commit to avoid cluttering the diffs. Minor stule cleanup will likely be necessary * this commit only focused on the IP layer. I am sure there is a number of global variables used in the TCP and maybe UDP stack. * despite the number of files touched, there are absolutely no API's or data structures changed by this commit (except the interfaces of ip_fw_chk() and dummynet_io(), which are internal anyways), so an MFC is quite safe and unintrusive (and desirable, given the improved readability of the code). MFC after: 10 days
2002-06-22 11:51:02 +00:00
args.rule = NULL; /* did we match a firewall rule ? */
/* Fetch state from dummynet tag, ignore others */
for (;m0->m_type == MT_TAG; m0 = m0->m_next)
if (m0->_m_tag_id == PACKET_TAG_DUMMYNET) {
Remove (almost all) global variables that were used to hold packet forwarding state ("annotations") during ip processing. The code is considerably cleaner now. The variables removed by this change are: ip_divert_cookie used by divert sockets ip_fw_fwd_addr used for transparent ip redirection last_pkt used by dynamic pipes in dummynet Removal of the first two has been done by carrying the annotations into volatile structs prepended to the mbuf chains, and adding appropriate code to add/remove annotations in the routines which make use of them, i.e. ip_input(), ip_output(), tcp_input(), bdg_forward(), ether_demux(), ether_output_frame(), div_output(). On passing, remove a bug in divert handling of fragmented packet. Now it is the fragment at offset 0 which sets the divert status of the whole packet, whereas formerly it was the last incoming fragment to decide. Removal of last_pkt required a change in the interface of ip_fw_chk() and dummynet_io(). On passing, use the same mechanism for dummynet annotations and for divert/forward annotations. option IPFIREWALL_FORWARD is effectively useless, the code to implement it is very small and is now in by default to avoid the obfuscation of conditionally compiled code. NOTES: * there is at least one global variable left, sro_fwd, in ip_output(). I am not sure if/how this can be removed. * I have deliberately avoided gratuitous style changes in this commit to avoid cluttering the diffs. Minor stule cleanup will likely be necessary * this commit only focused on the IP layer. I am sure there is a number of global variables used in the TCP and maybe UDP stack. * despite the number of files touched, there are absolutely no API's or data structures changed by this commit (except the interfaces of ip_fw_chk() and dummynet_io(), which are internal anyways), so an MFC is quite safe and unintrusive (and desirable, given the improved readability of the code). MFC after: 10 days
2002-06-22 11:51:02 +00:00
args.rule = ((struct dn_pkt *)m0)->rule;
shared = 0; /* For sure this is our own mbuf. */
}
if (args.rule == NULL)
bdg_thru++; /* first time through bdg_forward, count packet */
/*
* The packet arrives with the Ethernet header at the front.
*/
eh = mtod(m0, struct ether_header *);
Remove (almost all) global variables that were used to hold packet forwarding state ("annotations") during ip processing. The code is considerably cleaner now. The variables removed by this change are: ip_divert_cookie used by divert sockets ip_fw_fwd_addr used for transparent ip redirection last_pkt used by dynamic pipes in dummynet Removal of the first two has been done by carrying the annotations into volatile structs prepended to the mbuf chains, and adding appropriate code to add/remove annotations in the routines which make use of them, i.e. ip_input(), ip_output(), tcp_input(), bdg_forward(), ether_demux(), ether_output_frame(), div_output(). On passing, remove a bug in divert handling of fragmented packet. Now it is the fragment at offset 0 which sets the divert status of the whole packet, whereas formerly it was the last incoming fragment to decide. Removal of last_pkt required a change in the interface of ip_fw_chk() and dummynet_io(). On passing, use the same mechanism for dummynet annotations and for divert/forward annotations. option IPFIREWALL_FORWARD is effectively useless, the code to implement it is very small and is now in by default to avoid the obfuscation of conditionally compiled code. NOTES: * there is at least one global variable left, sro_fwd, in ip_output(). I am not sure if/how this can be removed. * I have deliberately avoided gratuitous style changes in this commit to avoid cluttering the diffs. Minor stule cleanup will likely be necessary * this commit only focused on the IP layer. I am sure there is a number of global variables used in the TCP and maybe UDP stack. * despite the number of files touched, there are absolutely no API's or data structures changed by this commit (except the interfaces of ip_fw_chk() and dummynet_io(), which are internal anyways), so an MFC is quite safe and unintrusive (and desirable, given the improved readability of the code). MFC after: 10 days
2002-06-22 11:51:02 +00:00
src = m0->m_pkthdr.rcvif;
if (src == NULL) /* packet from ether_output */
dst = bridge_dst_lookup(eh, ifp2sc[real_dst->if_index].cluster);
1998-09-12 22:07:47 +00:00
if (dst == BDG_DROP) { /* this should not happen */
printf("xx bdg_forward for BDG_DROP\n");
m_freem(m0);
bdg_dropped++;
2001-02-02 00:19:25 +00:00
return NULL;
1998-09-12 22:07:47 +00:00
}
if (dst == BDG_LOCAL) { /* this should not happen as well */
printf("xx ouch, bdg_forward for local pkt\n");
2001-02-02 00:19:25 +00:00
return m0;
1998-09-12 22:07:47 +00:00
}
2002-12-22 05:35:03 +00:00
if (dst == BDG_BCAST || dst == BDG_MCAST) {
/* need a copy for the local stack */
shared = 1 ;
1998-09-12 22:07:47 +00:00
}
1998-09-12 22:07:47 +00:00
/*
* Do filtering in a very similar way to what is done in ip_output.
* Only if firewall is loaded, enabled, and the packet is not
* from ether_output() (src==NULL, or we would filter it twice).
* Additional restrictions may apply e.g. non-IP, short packets,
* and pkts already gone through a pipe.
1998-09-12 22:07:47 +00:00
*/
if (src != NULL && (
#ifdef PFIL_HOOKS
((pfh = pfil_hook_get(PFIL_IN, &inetsw[ip_protox[IPPROTO_IP]].pr_pfh)) != NULL && bdg_ipf !=0) ||
#endif
(IPFW_LOADED && bdg_ipfw != 0))) {
2001-02-02 00:19:25 +00:00
int i;
1998-09-12 22:07:47 +00:00
if (args.rule != NULL && fw_one_pass)
goto forward; /* packet already partially processed */
/*
2001-02-02 00:19:25 +00:00
* i need some amt of data to be contiguous, and in case others need
* the packet (shared==1) also better be in the first mbuf.
*/
2001-02-02 00:19:25 +00:00
i = min(m0->m_pkthdr.len, max_protohdr) ;
if ( shared || m0->m_len < i) {
m0 = m_pullup(m0, i) ;
if (m0 == NULL) {
printf("-- bdg: pullup failed.\n") ;
bdg_dropped++;
2001-02-02 00:19:25 +00:00
return NULL ;
}
eh = mtod(m0, struct ether_header *);
1998-09-12 22:07:47 +00:00
}
/*
* Processing below expects the Ethernet header is stripped.
* Furthermore, the mbuf chain might be replaced at various
* places. To deal with this we copy the header to a temporary
* location, strip the header, and restore it as needed.
*/
bcopy(eh, &save_eh, ETHER_HDR_LEN); /* local copy for restore */
m_adj(m0, ETHER_HDR_LEN); /* temporarily strip header */
#ifdef PFIL_HOOKS
/*
* NetBSD-style generic packet filter, pfil(9), hooks.
* Enables ipf(8) in bridging.
*/
if (pfh != NULL && m0->m_pkthdr.len >= sizeof(struct ip) &&
ntohs(save_eh.ether_type) == ETHERTYPE_IP) {
/*
* before calling the firewall, swap fields the same as IP does.
* here we assume the pkt is an IP one and the header is contiguous
*/
struct ip *ip = mtod(m0, struct ip *);
ip->ip_len = ntohs(ip->ip_len);
ip->ip_off = ntohs(ip->ip_off);
do {
if (pfh->pfil_func) {
rv = pfh->pfil_func(ip, ip->ip_hl << 2, src, 0, &m0);
if (m0 == NULL) {
bdg_dropped++;
return NULL;
}
if (rv != 0) {
EH_RESTORE(m0); /* restore Ethernet header */
return m0;
}
ip = mtod(m0, struct ip *);
}
} while ((pfh = TAILQ_NEXT(pfh, pfil_link)) != NULL);
/*
* If we get here, the firewall has passed the pkt, but the mbuf
* pointer might have changed. Restore ip and the fields ntohs()'d.
*/
ip = mtod(m0, struct ip *);
ip->ip_len = htons(ip->ip_len);
ip->ip_off = htons(ip->ip_off);
}
#endif /* PFIL_HOOKS */
/*
Remove (almost all) global variables that were used to hold packet forwarding state ("annotations") during ip processing. The code is considerably cleaner now. The variables removed by this change are: ip_divert_cookie used by divert sockets ip_fw_fwd_addr used for transparent ip redirection last_pkt used by dynamic pipes in dummynet Removal of the first two has been done by carrying the annotations into volatile structs prepended to the mbuf chains, and adding appropriate code to add/remove annotations in the routines which make use of them, i.e. ip_input(), ip_output(), tcp_input(), bdg_forward(), ether_demux(), ether_output_frame(), div_output(). On passing, remove a bug in divert handling of fragmented packet. Now it is the fragment at offset 0 which sets the divert status of the whole packet, whereas formerly it was the last incoming fragment to decide. Removal of last_pkt required a change in the interface of ip_fw_chk() and dummynet_io(). On passing, use the same mechanism for dummynet annotations and for divert/forward annotations. option IPFIREWALL_FORWARD is effectively useless, the code to implement it is very small and is now in by default to avoid the obfuscation of conditionally compiled code. NOTES: * there is at least one global variable left, sro_fwd, in ip_output(). I am not sure if/how this can be removed. * I have deliberately avoided gratuitous style changes in this commit to avoid cluttering the diffs. Minor stule cleanup will likely be necessary * this commit only focused on the IP layer. I am sure there is a number of global variables used in the TCP and maybe UDP stack. * despite the number of files touched, there are absolutely no API's or data structures changed by this commit (except the interfaces of ip_fw_chk() and dummynet_io(), which are internal anyways), so an MFC is quite safe and unintrusive (and desirable, given the improved readability of the code). MFC after: 10 days
2002-06-22 11:51:02 +00:00
* Prepare arguments and call the firewall.
*/
if (!IPFW_LOADED || bdg_ipfw == 0) {
EH_RESTORE(m0); /* restore Ethernet header */
goto forward; /* not using ipfw, accept the packet */
}
Remove (almost all) global variables that were used to hold packet forwarding state ("annotations") during ip processing. The code is considerably cleaner now. The variables removed by this change are: ip_divert_cookie used by divert sockets ip_fw_fwd_addr used for transparent ip redirection last_pkt used by dynamic pipes in dummynet Removal of the first two has been done by carrying the annotations into volatile structs prepended to the mbuf chains, and adding appropriate code to add/remove annotations in the routines which make use of them, i.e. ip_input(), ip_output(), tcp_input(), bdg_forward(), ether_demux(), ether_output_frame(), div_output(). On passing, remove a bug in divert handling of fragmented packet. Now it is the fragment at offset 0 which sets the divert status of the whole packet, whereas formerly it was the last incoming fragment to decide. Removal of last_pkt required a change in the interface of ip_fw_chk() and dummynet_io(). On passing, use the same mechanism for dummynet annotations and for divert/forward annotations. option IPFIREWALL_FORWARD is effectively useless, the code to implement it is very small and is now in by default to avoid the obfuscation of conditionally compiled code. NOTES: * there is at least one global variable left, sro_fwd, in ip_output(). I am not sure if/how this can be removed. * I have deliberately avoided gratuitous style changes in this commit to avoid cluttering the diffs. Minor stule cleanup will likely be necessary * this commit only focused on the IP layer. I am sure there is a number of global variables used in the TCP and maybe UDP stack. * despite the number of files touched, there are absolutely no API's or data structures changed by this commit (except the interfaces of ip_fw_chk() and dummynet_io(), which are internal anyways), so an MFC is quite safe and unintrusive (and desirable, given the improved readability of the code). MFC after: 10 days
2002-06-22 11:51:02 +00:00
/*
* XXX The following code is very similar to the one in
* if_ethersubr.c:ether_ipfw_chk()
*/
args.m = m0; /* the packet we are looking at */
args.oif = NULL; /* this is an input packet */
args.divert_rule = 0; /* we do not support divert yet */
Remove (almost all) global variables that were used to hold packet forwarding state ("annotations") during ip processing. The code is considerably cleaner now. The variables removed by this change are: ip_divert_cookie used by divert sockets ip_fw_fwd_addr used for transparent ip redirection last_pkt used by dynamic pipes in dummynet Removal of the first two has been done by carrying the annotations into volatile structs prepended to the mbuf chains, and adding appropriate code to add/remove annotations in the routines which make use of them, i.e. ip_input(), ip_output(), tcp_input(), bdg_forward(), ether_demux(), ether_output_frame(), div_output(). On passing, remove a bug in divert handling of fragmented packet. Now it is the fragment at offset 0 which sets the divert status of the whole packet, whereas formerly it was the last incoming fragment to decide. Removal of last_pkt required a change in the interface of ip_fw_chk() and dummynet_io(). On passing, use the same mechanism for dummynet annotations and for divert/forward annotations. option IPFIREWALL_FORWARD is effectively useless, the code to implement it is very small and is now in by default to avoid the obfuscation of conditionally compiled code. NOTES: * there is at least one global variable left, sro_fwd, in ip_output(). I am not sure if/how this can be removed. * I have deliberately avoided gratuitous style changes in this commit to avoid cluttering the diffs. Minor stule cleanup will likely be necessary * this commit only focused on the IP layer. I am sure there is a number of global variables used in the TCP and maybe UDP stack. * despite the number of files touched, there are absolutely no API's or data structures changed by this commit (except the interfaces of ip_fw_chk() and dummynet_io(), which are internal anyways), so an MFC is quite safe and unintrusive (and desirable, given the improved readability of the code). MFC after: 10 days
2002-06-22 11:51:02 +00:00
args.next_hop = NULL; /* we do not support forward yet */
args.eh = &save_eh; /* MAC header for bridged/MAC packets */
i = ip_fw_chk_ptr(&args);
m0 = args.m; /* in case the firewall used the mbuf */
if (m0 != NULL)
EH_RESTORE(m0); /* restore Ethernet header */
if ( (i & IP_FW_PORT_DENY_FLAG) || m0 == NULL) /* drop */
return m0 ;
2001-02-02 00:19:25 +00:00
if (i == 0) /* a PASS rule. */
1998-09-12 22:07:47 +00:00
goto forward ;
if (DUMMYNET_LOADED && (i & IP_FW_PORT_DYNT_FLAG)) {
1998-09-12 22:07:47 +00:00
/*
* Pass the pkt to dummynet, which consumes it.
* If shared, make a copy and keep the original.
1998-09-12 22:07:47 +00:00
*/
2001-02-02 00:19:25 +00:00
struct mbuf *m ;
Remove (almost all) global variables that were used to hold packet forwarding state ("annotations") during ip processing. The code is considerably cleaner now. The variables removed by this change are: ip_divert_cookie used by divert sockets ip_fw_fwd_addr used for transparent ip redirection last_pkt used by dynamic pipes in dummynet Removal of the first two has been done by carrying the annotations into volatile structs prepended to the mbuf chains, and adding appropriate code to add/remove annotations in the routines which make use of them, i.e. ip_input(), ip_output(), tcp_input(), bdg_forward(), ether_demux(), ether_output_frame(), div_output(). On passing, remove a bug in divert handling of fragmented packet. Now it is the fragment at offset 0 which sets the divert status of the whole packet, whereas formerly it was the last incoming fragment to decide. Removal of last_pkt required a change in the interface of ip_fw_chk() and dummynet_io(). On passing, use the same mechanism for dummynet annotations and for divert/forward annotations. option IPFIREWALL_FORWARD is effectively useless, the code to implement it is very small and is now in by default to avoid the obfuscation of conditionally compiled code. NOTES: * there is at least one global variable left, sro_fwd, in ip_output(). I am not sure if/how this can be removed. * I have deliberately avoided gratuitous style changes in this commit to avoid cluttering the diffs. Minor stule cleanup will likely be necessary * this commit only focused on the IP layer. I am sure there is a number of global variables used in the TCP and maybe UDP stack. * despite the number of files touched, there are absolutely no API's or data structures changed by this commit (except the interfaces of ip_fw_chk() and dummynet_io(), which are internal anyways), so an MFC is quite safe and unintrusive (and desirable, given the improved readability of the code). MFC after: 10 days
2002-06-22 11:51:02 +00:00
2001-02-02 00:19:25 +00:00
if (shared) {
m = m_copypacket(m0, M_NOWAIT);
if (m == NULL) { /* copy failed, give up */
bdg_dropped++;
return NULL;
}
2001-02-02 00:19:25 +00:00
} else {
m = m0 ; /* pass the original to dummynet */
m0 = NULL ; /* and nothing back to the caller */
}
Remove (almost all) global variables that were used to hold packet forwarding state ("annotations") during ip processing. The code is considerably cleaner now. The variables removed by this change are: ip_divert_cookie used by divert sockets ip_fw_fwd_addr used for transparent ip redirection last_pkt used by dynamic pipes in dummynet Removal of the first two has been done by carrying the annotations into volatile structs prepended to the mbuf chains, and adding appropriate code to add/remove annotations in the routines which make use of them, i.e. ip_input(), ip_output(), tcp_input(), bdg_forward(), ether_demux(), ether_output_frame(), div_output(). On passing, remove a bug in divert handling of fragmented packet. Now it is the fragment at offset 0 which sets the divert status of the whole packet, whereas formerly it was the last incoming fragment to decide. Removal of last_pkt required a change in the interface of ip_fw_chk() and dummynet_io(). On passing, use the same mechanism for dummynet annotations and for divert/forward annotations. option IPFIREWALL_FORWARD is effectively useless, the code to implement it is very small and is now in by default to avoid the obfuscation of conditionally compiled code. NOTES: * there is at least one global variable left, sro_fwd, in ip_output(). I am not sure if/how this can be removed. * I have deliberately avoided gratuitous style changes in this commit to avoid cluttering the diffs. Minor stule cleanup will likely be necessary * this commit only focused on the IP layer. I am sure there is a number of global variables used in the TCP and maybe UDP stack. * despite the number of files touched, there are absolutely no API's or data structures changed by this commit (except the interfaces of ip_fw_chk() and dummynet_io(), which are internal anyways), so an MFC is quite safe and unintrusive (and desirable, given the improved readability of the code). MFC after: 10 days
2002-06-22 11:51:02 +00:00
args.oif = real_dst;
ip_dn_io_ptr(m, (i & 0xffff),DN_TO_BDG_FWD, &args);
2001-02-02 00:19:25 +00:00
return m0 ;
1998-09-12 22:07:47 +00:00
}
/*
Remove (almost all) global variables that were used to hold packet forwarding state ("annotations") during ip processing. The code is considerably cleaner now. The variables removed by this change are: ip_divert_cookie used by divert sockets ip_fw_fwd_addr used for transparent ip redirection last_pkt used by dynamic pipes in dummynet Removal of the first two has been done by carrying the annotations into volatile structs prepended to the mbuf chains, and adding appropriate code to add/remove annotations in the routines which make use of them, i.e. ip_input(), ip_output(), tcp_input(), bdg_forward(), ether_demux(), ether_output_frame(), div_output(). On passing, remove a bug in divert handling of fragmented packet. Now it is the fragment at offset 0 which sets the divert status of the whole packet, whereas formerly it was the last incoming fragment to decide. Removal of last_pkt required a change in the interface of ip_fw_chk() and dummynet_io(). On passing, use the same mechanism for dummynet annotations and for divert/forward annotations. option IPFIREWALL_FORWARD is effectively useless, the code to implement it is very small and is now in by default to avoid the obfuscation of conditionally compiled code. NOTES: * there is at least one global variable left, sro_fwd, in ip_output(). I am not sure if/how this can be removed. * I have deliberately avoided gratuitous style changes in this commit to avoid cluttering the diffs. Minor stule cleanup will likely be necessary * this commit only focused on the IP layer. I am sure there is a number of global variables used in the TCP and maybe UDP stack. * despite the number of files touched, there are absolutely no API's or data structures changed by this commit (except the interfaces of ip_fw_chk() and dummynet_io(), which are internal anyways), so an MFC is quite safe and unintrusive (and desirable, given the improved readability of the code). MFC after: 10 days
2002-06-22 11:51:02 +00:00
* XXX at some point, add support for divert/forward actions.
* If none of the above matches, we have to drop the packet.
*/
bdg_ipfw_drops++ ;
2001-02-02 00:19:25 +00:00
return m0 ;
1998-09-12 22:07:47 +00:00
}
forward:
/*
2001-02-02 00:19:25 +00:00
* Again, bring up the headers in case of shared bufs to avoid
* corruptions in the future.
*/
2001-02-02 00:19:25 +00:00
if ( shared ) {
int i = min(m0->m_pkthdr.len, max_protohdr) ;
m0 = m_pullup(m0, i) ;
if (m0 == NULL) {
bdg_dropped++ ;
2001-02-02 00:19:25 +00:00
return NULL ;
}
/* NB: eh is not used below; no need to recalculate it */
}
Remove (almost all) global variables that were used to hold packet forwarding state ("annotations") during ip processing. The code is considerably cleaner now. The variables removed by this change are: ip_divert_cookie used by divert sockets ip_fw_fwd_addr used for transparent ip redirection last_pkt used by dynamic pipes in dummynet Removal of the first two has been done by carrying the annotations into volatile structs prepended to the mbuf chains, and adding appropriate code to add/remove annotations in the routines which make use of them, i.e. ip_input(), ip_output(), tcp_input(), bdg_forward(), ether_demux(), ether_output_frame(), div_output(). On passing, remove a bug in divert handling of fragmented packet. Now it is the fragment at offset 0 which sets the divert status of the whole packet, whereas formerly it was the last incoming fragment to decide. Removal of last_pkt required a change in the interface of ip_fw_chk() and dummynet_io(). On passing, use the same mechanism for dummynet annotations and for divert/forward annotations. option IPFIREWALL_FORWARD is effectively useless, the code to implement it is very small and is now in by default to avoid the obfuscation of conditionally compiled code. NOTES: * there is at least one global variable left, sro_fwd, in ip_output(). I am not sure if/how this can be removed. * I have deliberately avoided gratuitous style changes in this commit to avoid cluttering the diffs. Minor stule cleanup will likely be necessary * this commit only focused on the IP layer. I am sure there is a number of global variables used in the TCP and maybe UDP stack. * despite the number of files touched, there are absolutely no API's or data structures changed by this commit (except the interfaces of ip_fw_chk() and dummynet_io(), which are internal anyways), so an MFC is quite safe and unintrusive (and desirable, given the improved readability of the code). MFC after: 10 days
2002-06-22 11:51:02 +00:00
/*
* now real_dst is used to determine the cluster where to forward.
* For packets coming from ether_input, this is the one of the 'src'
* interface, whereas for locally generated packets (src==NULL) it
* is the cluster of the original destination interface, which
* was already saved into real_dst.
*/
if (src != NULL)
real_dst = src ;
Remove (almost all) global variables that were used to hold packet forwarding state ("annotations") during ip processing. The code is considerably cleaner now. The variables removed by this change are: ip_divert_cookie used by divert sockets ip_fw_fwd_addr used for transparent ip redirection last_pkt used by dynamic pipes in dummynet Removal of the first two has been done by carrying the annotations into volatile structs prepended to the mbuf chains, and adding appropriate code to add/remove annotations in the routines which make use of them, i.e. ip_input(), ip_output(), tcp_input(), bdg_forward(), ether_demux(), ether_output_frame(), div_output(). On passing, remove a bug in divert handling of fragmented packet. Now it is the fragment at offset 0 which sets the divert status of the whole packet, whereas formerly it was the last incoming fragment to decide. Removal of last_pkt required a change in the interface of ip_fw_chk() and dummynet_io(). On passing, use the same mechanism for dummynet annotations and for divert/forward annotations. option IPFIREWALL_FORWARD is effectively useless, the code to implement it is very small and is now in by default to avoid the obfuscation of conditionally compiled code. NOTES: * there is at least one global variable left, sro_fwd, in ip_output(). I am not sure if/how this can be removed. * I have deliberately avoided gratuitous style changes in this commit to avoid cluttering the diffs. Minor stule cleanup will likely be necessary * this commit only focused on the IP layer. I am sure there is a number of global variables used in the TCP and maybe UDP stack. * despite the number of files touched, there are absolutely no API's or data structures changed by this commit (except the interfaces of ip_fw_chk() and dummynet_io(), which are internal anyways), so an MFC is quite safe and unintrusive (and desirable, given the improved readability of the code). MFC after: 10 days
2002-06-22 11:51:02 +00:00
last = NULL;
2002-12-22 05:35:03 +00:00
IFNET_RLOCK();
if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_UNKNOWN) {
ifp = TAILQ_FIRST(&ifnet) ; /* scan all ports */
once = 0 ;
} else {
ifp = dst ;
once = 1 ;
}
if ((uintptr_t)(ifp) <= (u_int)BDG_FORWARD)
panic("bdg_forward: bad dst");
for (;;) {
if (last) { /* need to forward packet leftover from previous loop */
struct mbuf *m ;
2001-02-02 00:19:25 +00:00
if (shared == 0 && once ) { /* no need to copy */
m = m0 ;
m0 = NULL ; /* original is gone */
} else {
m = m_copypacket(m0, M_NOWAIT);
2001-02-02 00:19:25 +00:00
if (m == NULL) {
2002-12-22 05:35:03 +00:00
IFNET_RUNLOCK();
2001-02-02 00:19:25 +00:00
printf("bdg_forward: sorry, m_copypacket failed!\n");
bdg_dropped++ ;
2001-02-02 00:19:25 +00:00
return m0 ; /* the original is still there... */
}
1998-09-12 22:07:47 +00:00
}
if (!IF_HANDOFF(&last->if_snd, m, last)) {
#if 0
2001-02-02 00:19:25 +00:00
BDG_MUTE(last); /* should I also mute ? */
#endif
1998-09-12 22:07:47 +00:00
}
BDG_STAT(last, BDG_OUT);
last = NULL ;
if (once)
break ;
1998-09-12 22:07:47 +00:00
}
if (ifp == NULL)
1998-09-12 22:07:47 +00:00
break ;
/*
* If the interface is used for bridging, not muted, not full,
* up and running, is not the source interface, and belongs to
* the same cluster as the 'real_dst', then send here.
*/
if ( BDG_USED(ifp) && !BDG_MUTED(ifp) && !_IF_QFULL(&ifp->if_snd) &&
(ifp->if_flags & (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING) &&
ifp != src && BDG_SAMECLUSTER(ifp, real_dst) )
last = ifp ;
ifp = TAILQ_NEXT(ifp, if_link) ;
if (ifp == NULL)
once = 1 ;
1998-09-12 22:07:47 +00:00
}
2002-12-22 05:35:03 +00:00
IFNET_RUNLOCK();
DEB(bdg_fw_ticks += (u_long)(rdtsc() - ticks) ; bdg_fw_count++ ;
if (bdg_fw_count != 0) bdg_fw_avg = bdg_fw_ticks/bdg_fw_count; )
2001-02-02 00:19:25 +00:00
return m0 ;
#undef EH_RESTORE
1998-09-12 22:07:47 +00:00
}
/*
* initialization of bridge code.
*/
static int
bdginit(void)
{
printf("BRIDGE 020214 loaded\n");
ifp2sc = malloc(BDG_MAX_PORTS * sizeof(struct bdg_softc),
M_IFADDR, M_ZERO );
if (ifp2sc == NULL)
return ENOMEM ;
bridge_in_ptr = bridge_in;
bdg_forward_ptr = bdg_forward;
bdgtakeifaces_ptr = reconfigure_bridge;
n_clusters = 0;
clusters = NULL;
do_bridge=0;
bzero(&bdg_stats, sizeof(bdg_stats) );
bdgtakeifaces_ptr();
bdg_timeout(0);
return 0 ;
}
/*
* initialization code, both for static and dynamic loading.
*/
static int
bridge_modevent(module_t mod, int type, void *unused)
{
int s;
int err = 0 ;
switch (type) {
case MOD_LOAD:
if (BDG_LOADED) {
err = EEXIST;
break ;
}
s = splimp();
err = bdginit();
splx(s);
break;
case MOD_UNLOAD:
#if !defined(KLD_MODULE)
printf("bridge statically compiled, cannot unload\n");
err = EINVAL ;
#else
s = splimp();
do_bridge = 0;
bridge_in_ptr = NULL;
bdg_forward_ptr = NULL;
bdgtakeifaces_ptr = NULL;
untimeout(bdg_timeout, NULL, bdg_timeout_h);
bridge_off();
if (clusters)
free(clusters, M_IFADDR);
free(ifp2sc, M_IFADDR);
ifp2sc = NULL ;
splx(s);
#endif
break;
default:
err = EINVAL ;
break;
}
return err;
}
static moduledata_t bridge_mod = {
"bridge",
bridge_modevent,
0
};
DECLARE_MODULE(bridge, bridge_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
MODULE_VERSION(bridge, 1);