freebsd-dev/sys/netgraph/ng_bridge.c

1101 lines
29 KiB
C
Raw Normal View History

/*-
* Copyright (c) 2000 Whistle Communications, Inc.
* All rights reserved.
*
* Subject to the following obligations and disclaimer of warranty, use and
* redistribution of this software, in source or object code forms, with or
* without modifications are expressly permitted by Whistle Communications;
* provided, however, that:
* 1. Any and all reproductions of the source or object code must include the
* copyright notice above and the following disclaimer of warranties; and
* 2. No rights are granted, in any manner or form, to use Whistle
* Communications, Inc. trademarks, including the mark "WHISTLE
* COMMUNICATIONS" on advertising, endorsements, or otherwise except as
* such appears in the above copyright notice or in the software.
*
* THIS SOFTWARE IS BEING PROVIDED BY WHISTLE COMMUNICATIONS "AS IS", AND
* TO THE MAXIMUM EXTENT PERMITTED BY LAW, WHISTLE COMMUNICATIONS MAKES NO
* REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, REGARDING THIS SOFTWARE,
* INCLUDING WITHOUT LIMITATION, ANY AND ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT.
* WHISTLE COMMUNICATIONS DOES NOT WARRANT, GUARANTEE, OR MAKE ANY
* REPRESENTATIONS REGARDING THE USE OF, OR THE RESULTS OF THE USE OF THIS
* SOFTWARE IN TERMS OF ITS CORRECTNESS, ACCURACY, RELIABILITY OR OTHERWISE.
* IN NO EVENT SHALL WHISTLE COMMUNICATIONS BE LIABLE FOR ANY DAMAGES
* RESULTING FROM OR ARISING OUT OF ANY USE OF THIS SOFTWARE, INCLUDING
* WITHOUT LIMITATION, ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
* PUNITIVE, OR CONSEQUENTIAL DAMAGES, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES, LOSS OF USE, DATA OR PROFITS, HOWEVER CAUSED AND UNDER ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF WHISTLE COMMUNICATIONS IS ADVISED OF THE POSSIBILITY
* OF SUCH DAMAGE.
*
* Author: Archie Cobbs <archie@freebsd.org>
*
* $FreeBSD$
*/
/*
* ng_bridge(4) netgraph node type
*
* The node performs standard intelligent Ethernet bridging over
* each of its connected hooks, or links. A simple loop detection
* algorithm is included which disables a link for priv->conf.loopTimeout
* seconds when a host is seen to have jumped from one link to
* another within priv->conf.minStableAge seconds.
*
* We keep a hashtable that maps Ethernet addresses to host info,
* which is contained in struct ng_bridge_host's. These structures
* tell us on which link the host may be found. A host's entry will
* expire after priv->conf.maxStaleness seconds.
*
* This node is optimzed for stable networks, where machines jump
* from one port to the other only rarely.
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
Conditionally compile out V_ globals while instantiating the appropriate container structures, depending on VIMAGE_GLOBALS compile time option. Make VIMAGE_GLOBALS a new compile-time option, which by default will not be defined, resulting in instatiations of global variables selected for V_irtualization (enclosed in #ifdef VIMAGE_GLOBALS blocks) to be effectively compiled out. Instantiate new global container structures to hold V_irtualized variables: vnet_net_0, vnet_inet_0, vnet_inet6_0, vnet_ipsec_0, vnet_netgraph_0, and vnet_gif_0. Update the VSYM() macro so that depending on VIMAGE_GLOBALS the V_ macros resolve either to the original globals, or to fields inside container structures, i.e. effectively #ifdef VIMAGE_GLOBALS #define V_rt_tables rt_tables #else #define V_rt_tables vnet_net_0._rt_tables #endif Update SYSCTL_V_*() macros to operate either on globals or on fields inside container structs. Extend the internal kldsym() lookups with the ability to resolve selected fields inside the virtualization container structs. This applies only to the fields which are explicitly registered for kldsym() visibility via VNET_MOD_DECLARE() and vnet_mod_register(), currently this is done only in sys/net/if.c. Fix a few broken instances of MODULE_GLOBAL() macro use in SCTP code, and modify the MODULE_GLOBAL() macro to resolve to V_ macros, which in turn result in proper code being generated depending on VIMAGE_GLOBALS. De-virtualize local static variables in sys/contrib/pf/net/pf_subr.c which were prematurely V_irtualized by automated V_ prepending scripts during earlier merging steps. PF virtualization will be done separately, most probably after next PF import. Convert a few variable initializations at instantiation to initialization in init functions, most notably in ipfw. Also convert TUNABLE_INT() initializers for V_ variables to TUNABLE_FETCH_INT() in initializer functions. Discussed at: devsummit Strassburg Reviewed by: bz, julian Approved by: julian (mentor) Obtained from: //depot/projects/vimage-commit2/... X-MFC after: never Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-12-10 23:12:39 +00:00
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/errno.h>
Conditionally compile out V_ globals while instantiating the appropriate container structures, depending on VIMAGE_GLOBALS compile time option. Make VIMAGE_GLOBALS a new compile-time option, which by default will not be defined, resulting in instatiations of global variables selected for V_irtualization (enclosed in #ifdef VIMAGE_GLOBALS blocks) to be effectively compiled out. Instantiate new global container structures to hold V_irtualized variables: vnet_net_0, vnet_inet_0, vnet_inet6_0, vnet_ipsec_0, vnet_netgraph_0, and vnet_gif_0. Update the VSYM() macro so that depending on VIMAGE_GLOBALS the V_ macros resolve either to the original globals, or to fields inside container structures, i.e. effectively #ifdef VIMAGE_GLOBALS #define V_rt_tables rt_tables #else #define V_rt_tables vnet_net_0._rt_tables #endif Update SYSCTL_V_*() macros to operate either on globals or on fields inside container structs. Extend the internal kldsym() lookups with the ability to resolve selected fields inside the virtualization container structs. This applies only to the fields which are explicitly registered for kldsym() visibility via VNET_MOD_DECLARE() and vnet_mod_register(), currently this is done only in sys/net/if.c. Fix a few broken instances of MODULE_GLOBAL() macro use in SCTP code, and modify the MODULE_GLOBAL() macro to resolve to V_ macros, which in turn result in proper code being generated depending on VIMAGE_GLOBALS. De-virtualize local static variables in sys/contrib/pf/net/pf_subr.c which were prematurely V_irtualized by automated V_ prepending scripts during earlier merging steps. PF virtualization will be done separately, most probably after next PF import. Convert a few variable initializations at instantiation to initialization in init functions, most notably in ipfw. Also convert TUNABLE_INT() initializers for V_ variables to TUNABLE_FETCH_INT() in initializer functions. Discussed at: devsummit Strassburg Reviewed by: bz, julian Approved by: julian (mentor) Obtained from: //depot/projects/vimage-commit2/... X-MFC after: never Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-12-10 23:12:39 +00:00
#include <sys/rwlock.h>
#include <sys/syslog.h>
#include <sys/socket.h>
#include <sys/ctype.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/ethernet.h>
#include <net/vnet.h>
#include <netinet/in.h>
#if 0 /* not used yet */
#include <netinet/ip_fw.h>
#endif
#include <netgraph/ng_message.h>
#include <netgraph/netgraph.h>
#include <netgraph/ng_parse.h>
#include <netgraph/ng_bridge.h>
#ifdef NG_SEPARATE_MALLOC
static MALLOC_DEFINE(M_NETGRAPH_BRIDGE, "netgraph_bridge",
"netgraph bridge node");
#else
#define M_NETGRAPH_BRIDGE M_NETGRAPH
#endif
/* Per-link private data */
struct ng_bridge_link {
hook_p hook; /* netgraph hook */
u_int16_t loopCount; /* loop ignore timer */
struct ng_bridge_link_stats stats; /* link stats */
};
/* Per-node private data */
struct ng_bridge_private {
struct ng_bridge_bucket *tab; /* hash table bucket array */
struct ng_bridge_config conf; /* node configuration */
node_p node; /* netgraph node */
u_int numHosts; /* num entries in table */
u_int numBuckets; /* num buckets in table */
u_int hashMask; /* numBuckets - 1 */
int numLinks; /* num connected links */
int persistent; /* can exist w/o hooks */
struct callout timer; /* one second periodic timer */
};
typedef struct ng_bridge_private *priv_p;
/* Information about a host, stored in a hash table entry */
struct ng_bridge_hent {
struct ng_bridge_host host; /* actual host info */
SLIST_ENTRY(ng_bridge_hent) next; /* next entry in bucket */
};
/* Hash table bucket declaration */
SLIST_HEAD(ng_bridge_bucket, ng_bridge_hent);
/* Netgraph node methods */
static ng_constructor_t ng_bridge_constructor;
static ng_rcvmsg_t ng_bridge_rcvmsg;
static ng_shutdown_t ng_bridge_shutdown;
static ng_newhook_t ng_bridge_newhook;
static ng_rcvdata_t ng_bridge_rcvdata;
static ng_disconnect_t ng_bridge_disconnect;
/* Other internal functions */
static struct ng_bridge_host *ng_bridge_get(priv_p priv, const u_char *addr);
static int ng_bridge_put(priv_p priv, const u_char *addr, link_p link);
static void ng_bridge_rehash(priv_p priv);
static void ng_bridge_remove_hosts(priv_p priv, link_p link);
static void ng_bridge_timeout(node_p node, hook_p hook, void *arg1, int arg2);
static const char *ng_bridge_nodename(node_p node);
/* Ethernet broadcast */
static const u_char ng_bridge_bcast_addr[ETHER_ADDR_LEN] =
{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
/* Compare Ethernet addresses using 32 and 16 bit words instead of bytewise */
#define ETHER_EQUAL(a,b) (((const u_int32_t *)(a))[0] \
== ((const u_int32_t *)(b))[0] \
&& ((const u_int16_t *)(a))[2] \
== ((const u_int16_t *)(b))[2])
/* Minimum and maximum number of hash buckets. Must be a power of two. */
#define MIN_BUCKETS (1 << 5) /* 32 */
#define MAX_BUCKETS (1 << 14) /* 16384 */
/* Configuration default values */
#define DEFAULT_LOOP_TIMEOUT 60
#define DEFAULT_MAX_STALENESS (15 * 60) /* same as ARP timeout */
#define DEFAULT_MIN_STABLE_AGE 1
/******************************************************************
NETGRAPH PARSE TYPES
******************************************************************/
/*
* How to determine the length of the table returned by NGM_BRIDGE_GET_TABLE
*/
static int
ng_bridge_getTableLength(const struct ng_parse_type *type,
const u_char *start, const u_char *buf)
{
const struct ng_bridge_host_ary *const hary
= (const struct ng_bridge_host_ary *)(buf - sizeof(u_int32_t));
return hary->numHosts;
}
/* Parse type for struct ng_bridge_host_ary */
static const struct ng_parse_struct_field ng_bridge_host_type_fields[]
= NG_BRIDGE_HOST_TYPE_INFO(&ng_parse_enaddr_type);
static const struct ng_parse_type ng_bridge_host_type = {
&ng_parse_struct_type,
&ng_bridge_host_type_fields
};
static const struct ng_parse_array_info ng_bridge_hary_type_info = {
&ng_bridge_host_type,
ng_bridge_getTableLength
};
static const struct ng_parse_type ng_bridge_hary_type = {
&ng_parse_array_type,
&ng_bridge_hary_type_info
};
static const struct ng_parse_struct_field ng_bridge_host_ary_type_fields[]
= NG_BRIDGE_HOST_ARY_TYPE_INFO(&ng_bridge_hary_type);
static const struct ng_parse_type ng_bridge_host_ary_type = {
&ng_parse_struct_type,
&ng_bridge_host_ary_type_fields
};
/* Parse type for struct ng_bridge_config */
static const struct ng_parse_struct_field ng_bridge_config_type_fields[]
= NG_BRIDGE_CONFIG_TYPE_INFO;
static const struct ng_parse_type ng_bridge_config_type = {
&ng_parse_struct_type,
&ng_bridge_config_type_fields
};
/* Parse type for struct ng_bridge_link_stat */
static const struct ng_parse_struct_field ng_bridge_stats_type_fields[]
= NG_BRIDGE_STATS_TYPE_INFO;
static const struct ng_parse_type ng_bridge_stats_type = {
&ng_parse_struct_type,
&ng_bridge_stats_type_fields
};
/* List of commands and how to convert arguments to/from ASCII */
static const struct ng_cmdlist ng_bridge_cmdlist[] = {
{
NGM_BRIDGE_COOKIE,
NGM_BRIDGE_SET_CONFIG,
"setconfig",
&ng_bridge_config_type,
NULL
},
{
NGM_BRIDGE_COOKIE,
NGM_BRIDGE_GET_CONFIG,
"getconfig",
NULL,
&ng_bridge_config_type
},
{
NGM_BRIDGE_COOKIE,
NGM_BRIDGE_RESET,
"reset",
NULL,
NULL
},
{
NGM_BRIDGE_COOKIE,
NGM_BRIDGE_GET_STATS,
"getstats",
&ng_parse_uint32_type,
&ng_bridge_stats_type
},
{
NGM_BRIDGE_COOKIE,
NGM_BRIDGE_CLR_STATS,
"clrstats",
&ng_parse_uint32_type,
NULL
},
{
NGM_BRIDGE_COOKIE,
NGM_BRIDGE_GETCLR_STATS,
"getclrstats",
&ng_parse_uint32_type,
&ng_bridge_stats_type
},
{
NGM_BRIDGE_COOKIE,
NGM_BRIDGE_GET_TABLE,
"gettable",
NULL,
&ng_bridge_host_ary_type
},
{
NGM_BRIDGE_COOKIE,
NGM_BRIDGE_SET_PERSISTENT,
"setpersistent",
NULL,
NULL
},
{ 0 }
};
/* Node type descriptor */
static struct ng_type ng_bridge_typestruct = {
.version = NG_ABI_VERSION,
.name = NG_BRIDGE_NODE_TYPE,
.constructor = ng_bridge_constructor,
.rcvmsg = ng_bridge_rcvmsg,
.shutdown = ng_bridge_shutdown,
.newhook = ng_bridge_newhook,
.rcvdata = ng_bridge_rcvdata,
.disconnect = ng_bridge_disconnect,
.cmdlist = ng_bridge_cmdlist,
};
2000-10-09 18:37:11 +00:00
NETGRAPH_INIT(bridge, &ng_bridge_typestruct);
/******************************************************************
NETGRAPH NODE METHODS
******************************************************************/
/*
* Node constructor
*/
static int
ng_bridge_constructor(node_p node)
{
priv_p priv;
/* Allocate and initialize private info */
priv = malloc(sizeof(*priv), M_NETGRAPH_BRIDGE, M_WAITOK | M_ZERO);
ng_callout_init(&priv->timer);
/* Allocate and initialize hash table, etc. */
priv->tab = malloc(MIN_BUCKETS * sizeof(*priv->tab),
M_NETGRAPH_BRIDGE, M_WAITOK | M_ZERO);
priv->numBuckets = MIN_BUCKETS;
priv->hashMask = MIN_BUCKETS - 1;
priv->conf.debugLevel = 1;
priv->conf.loopTimeout = DEFAULT_LOOP_TIMEOUT;
priv->conf.maxStaleness = DEFAULT_MAX_STALENESS;
priv->conf.minStableAge = DEFAULT_MIN_STABLE_AGE;
/*
* This node has all kinds of stuff that could be screwed by SMP.
* Until it gets it's own internal protection, we go through in
* single file. This could hurt a machine bridging between two
* GB ethernets so it should be fixed.
* When it's fixed the process SHOULD NOT SLEEP, spinlocks please!
* (and atomic ops )
*/
NG_NODE_FORCE_WRITER(node);
NG_NODE_SET_PRIVATE(node, priv);
priv->node = node;
/* Start timer; timer is always running while node is alive */
ng_callout(&priv->timer, node, NULL, hz, ng_bridge_timeout, NULL, 0);
/* Done */
return (0);
}
/*
* Method for attaching a new hook
*/
static int
ng_bridge_newhook(node_p node, hook_p hook, const char *name)
{
const priv_p priv = NG_NODE_PRIVATE(node);
/* Check for a link hook */
if (strlen(name) > strlen(NG_BRIDGE_HOOK_LINK_PREFIX)) {
char linkName[NG_HOOKSIZ];
u_int32_t linkNum;
link_p link;
/* primitive parsing */
linkNum = strtoul(name + strlen(NG_BRIDGE_HOOK_LINK_PREFIX),
NULL, 10);
/* validation by comparing against the reconstucted name */
snprintf(linkName, sizeof(linkName),
"%s%u", NG_BRIDGE_HOOK_LINK_PREFIX,
linkNum);
if (strcmp(linkName, name) != 0)
return (EINVAL);
if(NG_PEER_NODE(hook) == node)
return (ELOOP);
link = malloc(sizeof(*link), M_NETGRAPH_BRIDGE,
M_WAITOK|M_ZERO);
if (link == NULL)
return (ENOMEM);
link->hook = hook;
NG_HOOK_SET_PRIVATE(hook, link);
priv->numLinks++;
return (0);
}
/* Unknown hook name */
return (EINVAL);
}
/*
* Receive a control message
*/
static int
ng_bridge_reset_link(hook_p hook, void *arg __unused)
{
link_p priv = NG_HOOK_PRIVATE(hook);
priv->loopCount = 0;
bzero(&priv->stats, sizeof(priv->stats));
return (1);
}
static int
ng_bridge_rcvmsg(node_p node, item_p item, hook_p lasthook)
{
const priv_p priv = NG_NODE_PRIVATE(node);
struct ng_mesg *resp = NULL;
int error = 0;
struct ng_mesg *msg;
NGI_GET_MSG(item, msg);
switch (msg->header.typecookie) {
#ifdef NGM_BRIDGE_TABLE_ABI
case NGM_BRIDGE_COOKIE_TBL:
switch (msg->header.cmd) {
case NGM_BRIDGE_GET_CONFIG:
{
struct ng_bridge_config_tbl *conf;
NG_MKRESPONSE(resp, msg, sizeof(*conf),
M_NOWAIT|M_ZERO);
if (resp == NULL) {
error = ENOMEM;
break;
}
conf = (struct ng_bridge_config_tbl *)resp->data;
conf->cfg = priv->conf;
break;
}
case NGM_BRIDGE_SET_CONFIG:
{
struct ng_bridge_config_tbl *conf;
if (msg->header.arglen != sizeof(*conf)) {
error = EINVAL;
break;
}
conf = (struct ng_bridge_config_tbl *)msg->data;
priv->conf = conf->cfg;
break;
}
case NGM_BRIDGE_GET_TABLE:
{
struct ng_bridge_host_tbl_ary *ary;
struct ng_bridge_hent *hent;
int i, bucket;
NG_MKRESPONSE(resp, msg, sizeof(*ary) +
(priv->numHosts * sizeof(*ary->hosts)), M_NOWAIT);
if (resp == NULL) {
error = ENOMEM;
break;
}
ary = (struct ng_bridge_host_tbl_ary *)resp->data;
ary->numHosts = priv->numHosts;
i = 0;
for (bucket = 0; bucket < priv->numBuckets; bucket++) {
SLIST_FOREACH(hent, &priv->tab[bucket], next) {
memcpy(ary->hosts[i].addr,
hent->host.addr,
sizeof(ary->hosts[i].addr));
ary->hosts[i].age = hent->host.age;
ary->hosts[i].staleness =
hent->host.staleness;
ary->hosts[i].linkNum = strtol(
NG_HOOK_NAME(hent->host.link->hook) +
strlen(NG_BRIDGE_HOOK_LINK_PREFIX),
NULL, 10);
i++;
}
}
break;
}
}
/* If already handled break, otherwise use new ABI. */
if (resp != NULL || error != 0)
break;
#endif /* NGM_BRIDGE_TABLE_ABI */
case NGM_BRIDGE_COOKIE:
switch (msg->header.cmd) {
case NGM_BRIDGE_GET_CONFIG:
{
struct ng_bridge_config *conf;
NG_MKRESPONSE(resp, msg,
sizeof(struct ng_bridge_config), M_NOWAIT);
if (resp == NULL) {
error = ENOMEM;
break;
}
conf = (struct ng_bridge_config *)resp->data;
*conf = priv->conf; /* no sanity checking needed */
break;
}
case NGM_BRIDGE_SET_CONFIG:
{
struct ng_bridge_config *conf;
if (msg->header.arglen
!= sizeof(struct ng_bridge_config)) {
error = EINVAL;
break;
}
conf = (struct ng_bridge_config *)msg->data;
priv->conf = *conf;
break;
}
case NGM_BRIDGE_RESET:
{
hook_p rethook;
/* Flush all entries in the hash table */
ng_bridge_remove_hosts(priv, NULL);
/* Reset all loop detection counters and stats */
NG_NODE_FOREACH_HOOK(node, ng_bridge_reset_link, NULL,
rethook);
break;
}
case NGM_BRIDGE_GET_STATS:
case NGM_BRIDGE_CLR_STATS:
case NGM_BRIDGE_GETCLR_STATS:
{
hook_p hook;
link_p link;
char linkName[NG_HOOKSIZ];
/* Get link number */
if (msg->header.arglen != sizeof(u_int32_t)) {
error = EINVAL;
break;
}
snprintf(linkName, sizeof(linkName),
"%s%u", NG_BRIDGE_HOOK_LINK_PREFIX,
*((u_int32_t *)msg->data));
if ((hook = ng_findhook(node, linkName)) == NULL) {
error = ENOTCONN;
break;
}
link = NG_HOOK_PRIVATE(hook);
/* Get/clear stats */
if (msg->header.cmd != NGM_BRIDGE_CLR_STATS) {
NG_MKRESPONSE(resp, msg,
sizeof(link->stats), M_NOWAIT);
if (resp == NULL) {
error = ENOMEM;
break;
}
bcopy(&link->stats,
resp->data, sizeof(link->stats));
}
if (msg->header.cmd != NGM_BRIDGE_GET_STATS)
bzero(&link->stats, sizeof(link->stats));
break;
}
case NGM_BRIDGE_GET_TABLE:
{
struct ng_bridge_host_ary *ary;
struct ng_bridge_hent *hent;
int i = 0, bucket;
NG_MKRESPONSE(resp, msg, sizeof(*ary)
+ (priv->numHosts * sizeof(*ary->hosts)), M_NOWAIT);
if (resp == NULL) {
error = ENOMEM;
break;
}
ary = (struct ng_bridge_host_ary *)resp->data;
ary->numHosts = priv->numHosts;
for (bucket = 0; bucket < priv->numBuckets; bucket++) {
SLIST_FOREACH(hent, &priv->tab[bucket], next) {
memcpy(ary->hosts[i].addr,
hent->host.addr,
sizeof(ary->hosts[i].addr));
ary->hosts[i].age = hent->host.age;
ary->hosts[i].staleness = hent->host.staleness;
strncpy(ary->hosts[i].hook,
NG_HOOK_NAME(hent->host.link->hook),
sizeof(ary->hosts[i].hook));
i++;
}
}
break;
}
case NGM_BRIDGE_SET_PERSISTENT:
{
priv->persistent = 1;
break;
}
default:
error = EINVAL;
break;
}
break;
default:
error = EINVAL;
break;
}
/* Done */
NG_RESPOND_MSG(error, node, item, resp);
NG_FREE_MSG(msg);
return (error);
}
/*
* Receive data on a hook
*/
struct ng_bridge_send_ctx {
link_p foundFirst, incoming;
struct mbuf * m;
int manycast, error;
};
static int
ng_bridge_send_ctx(hook_p dst, void *arg)
{
struct ng_bridge_send_ctx *ctx = arg;
link_p destLink = NG_HOOK_PRIVATE(dst);
struct mbuf *m2 = NULL;
int error = 0;
/* Skip incoming link */
if (destLink == ctx->incoming) {
return (1);
}
if (ctx->foundFirst == NULL) {
/*
* This is the first usable link we have found.
* Reserve it for the originals.
* If we never find another we save a copy.
*/
ctx->foundFirst = destLink;
return (1);
}
/*
* It's usable link but not the reserved (first) one.
* Copy mbuf info for sending.
*/
m2 = m_dup(ctx->m, M_NOWAIT); /* XXX m_copypacket() */
if (m2 == NULL) {
ctx->incoming->stats.memoryFailures++;
ctx->error = ENOBUFS;
return (0); /* abort loop */
}
/* Update stats */
destLink->stats.xmitPackets++;
destLink->stats.xmitOctets += m2->m_pkthdr.len;
switch (ctx->manycast) {
default: /* unknown unicast */
break;
case 1: /* multicast */
destLink->stats.xmitMulticasts++;
break;
case 2: /* broadcast */
destLink->stats.xmitBroadcasts++;
break;
}
/* Send packet */
NG_SEND_DATA_ONLY(error, destLink->hook, m2);
if(error)
ctx->error = error;
return (1);
}
static int
ng_bridge_rcvdata(hook_p hook, item_p item)
{
const node_p node = NG_HOOK_NODE(hook);
const priv_p priv = NG_NODE_PRIVATE(node);
struct ng_bridge_host *host;
struct ether_header *eh;
struct ng_bridge_send_ctx ctx = { 0 };
hook_p ret;
NGI_GET_M(item, ctx.m);
ctx.incoming = NG_HOOK_PRIVATE(hook);
/* Sanity check packet and pull up header */
if (ctx.m->m_pkthdr.len < ETHER_HDR_LEN) {
ctx.incoming->stats.recvRunts++;
NG_FREE_ITEM(item);
NG_FREE_M(ctx.m);
return (EINVAL);
}
if (ctx.m->m_len < ETHER_HDR_LEN && !(ctx.m = m_pullup(ctx.m, ETHER_HDR_LEN))) {
ctx.incoming->stats.memoryFailures++;
NG_FREE_ITEM(item);
return (ENOBUFS);
}
eh = mtod(ctx.m, struct ether_header *);
if ((eh->ether_shost[0] & 1) != 0) {
ctx.incoming->stats.recvInvalid++;
NG_FREE_ITEM(item);
NG_FREE_M(ctx.m);
return (EINVAL);
}
/* Is link disabled due to a loopback condition? */
if (ctx.incoming->loopCount != 0) {
ctx.incoming->stats.loopDrops++;
NG_FREE_ITEM(item);
NG_FREE_M(ctx.m);
return (ELOOP); /* XXX is this an appropriate error? */
}
/* Update stats */
ctx.incoming->stats.recvPackets++;
ctx.incoming->stats.recvOctets += ctx.m->m_pkthdr.len;
if ((ctx.manycast = (eh->ether_dhost[0] & 1)) != 0) {
if (ETHER_EQUAL(eh->ether_dhost, ng_bridge_bcast_addr)) {
ctx.incoming->stats.recvBroadcasts++;
ctx.manycast = 2;
} else
ctx.incoming->stats.recvMulticasts++;
}
/* Look up packet's source Ethernet address in hashtable */
if ((host = ng_bridge_get(priv, eh->ether_shost)) != NULL) {
/* Update time since last heard from this host */
host->staleness = 0;
/* Did host jump to a different link? */
if (host->link != ctx.incoming) {
/*
* If the host's old link was recently established
* on the old link and it's already jumped to a new
* link, declare a loopback condition.
*/
if (host->age < priv->conf.minStableAge) {
/* Log the problem */
if (priv->conf.debugLevel >= 2) {
struct ifnet *ifp = ctx.m->m_pkthdr.rcvif;
char suffix[32];
if (ifp != NULL)
snprintf(suffix, sizeof(suffix),
" (%s)", ifp->if_xname);
else
*suffix = '\0';
log(LOG_WARNING, "ng_bridge: %s:"
" loopback detected on %s%s\n",
ng_bridge_nodename(node),
NG_HOOK_NAME(hook), suffix);
}
/* Mark link as linka non grata */
ctx.incoming->loopCount = priv->conf.loopTimeout;
ctx.incoming->stats.loopDetects++;
/* Forget all hosts on this link */
ng_bridge_remove_hosts(priv, ctx.incoming);
/* Drop packet */
ctx.incoming->stats.loopDrops++;
NG_FREE_ITEM(item);
NG_FREE_M(ctx.m);
return (ELOOP); /* XXX appropriate? */
}
/* Move host over to new link */
host->link = ctx.incoming;
host->age = 0;
}
} else {
if (!ng_bridge_put(priv, eh->ether_shost, ctx.incoming)) {
ctx.incoming->stats.memoryFailures++;
NG_FREE_ITEM(item);
NG_FREE_M(ctx.m);
return (ENOMEM);
}
}
/* Run packet through ipfw processing, if enabled */
Convert ipfw to use PFIL_HOOKS. This is change is transparent to userland and preserves the ipfw ABI. The ipfw core packet inspection and filtering functions have not been changed, only how ipfw is invoked is different. However there are many changes how ipfw is and its add-on's are handled: In general ipfw is now called through the PFIL_HOOKS and most associated magic, that was in ip_input() or ip_output() previously, is now done in ipfw_check_[in|out]() in the ipfw PFIL handler. IPDIVERT is entirely handled within the ipfw PFIL handlers. A packet to be diverted is checked if it is fragmented, if yes, ip_reass() gets in for reassembly. If not, or all fragments arrived and the packet is complete, divert_packet is called directly. For 'tee' no reassembly attempt is made and a copy of the packet is sent to the divert socket unmodified. The original packet continues its way through ip_input/output(). ipfw 'forward' is done via m_tag's. The ipfw PFIL handlers tag the packet with the new destination sockaddr_in. A check if the new destination is a local IP address is made and the m_flags are set appropriately. ip_input() and ip_output() have some more work to do here. For ip_input() the m_flags are checked and a packet for us is directly sent to the 'ours' section for further processing. Destination changes on the input path are only tagged and the 'srcrt' flag to ip_forward() is set to disable destination checks and ICMP replies at this stage. The tag is going to be handled on output. ip_output() again checks for m_flags and the 'ours' tag. If found, the packet will be dropped back to the IP netisr where it is going to be picked up by ip_input() again and the directly sent to the 'ours' section. When only the destination changes, the route's 'dst' is overwritten with the new destination from the forward m_tag. Then it jumps back at the route lookup again and skips the firewall check because it has been marked with M_SKIP_FIREWALL. ipfw 'forward' has to be compiled into the kernel with 'option IPFIREWALL_FORWARD' to enable it. DUMMYNET is entirely handled within the ipfw PFIL handlers. A packet for a dummynet pipe or queue is directly sent to dummynet_io(). Dummynet will then inject it back into ip_input/ip_output() after it has served its time. Dummynet packets are tagged and will continue from the next rule when they hit the ipfw PFIL handlers again after re-injection. BRIDGING and IPFW_ETHER are not changed yet and use ipfw_chk() directly as they did before. Later this will be changed to dedicated ETHER PFIL_HOOKS. More detailed changes to the code: conf/files Add netinet/ip_fw_pfil.c. conf/options Add IPFIREWALL_FORWARD option. modules/ipfw/Makefile Add ip_fw_pfil.c. net/bridge.c Disable PFIL_HOOKS if ipfw for bridging is active. Bridging ipfw is still directly invoked to handle layer2 headers and packets would get a double ipfw when run through PFIL_HOOKS as well. netinet/ip_divert.c Removed divert_clone() function. It is no longer used. netinet/ip_dummynet.[ch] Neither the route 'ro' nor the destination 'dst' need to be stored while in dummynet transit. Structure members and associated macros are removed. netinet/ip_fastfwd.c Removed all direct ipfw handling code and replace it with the new 'ipfw forward' handling code. netinet/ip_fw.h Removed 'ro' and 'dst' from struct ip_fw_args. netinet/ip_fw2.c (Re)moved some global variables and the module handling. netinet/ip_fw_pfil.c New file containing the ipfw PFIL handlers and module initialization. netinet/ip_input.c Removed all direct ipfw handling code and replace it with the new 'ipfw forward' handling code. ip_forward() does not longer require the 'next_hop' struct sockaddr_in argument. Disable early checks if 'srcrt' is set. netinet/ip_output.c Removed all direct ipfw handling code and replace it with the new 'ipfw forward' handling code. netinet/ip_var.h Add ip_reass() as general function. (Used from ipfw PFIL handlers for IPDIVERT.) netinet/raw_ip.c Directly check if ipfw and dummynet control pointers are active. netinet/tcp_input.c Rework the 'ipfw forward' to local code to work with the new way of forward tags. netinet/tcp_sack.c Remove include 'opt_ipfw.h' which is not needed here. sys/mbuf.h Remove m_claim_next() macro which was exclusively for ipfw 'forward' and is no longer needed. Approved by: re (scottl)
2004-08-17 22:05:54 +00:00
#if 0
if (priv->conf.ipfw[linkNum] && V_fw_enable && V_ip_fw_chk_ptr != NULL) {
/* XXX not implemented yet */
}
Convert ipfw to use PFIL_HOOKS. This is change is transparent to userland and preserves the ipfw ABI. The ipfw core packet inspection and filtering functions have not been changed, only how ipfw is invoked is different. However there are many changes how ipfw is and its add-on's are handled: In general ipfw is now called through the PFIL_HOOKS and most associated magic, that was in ip_input() or ip_output() previously, is now done in ipfw_check_[in|out]() in the ipfw PFIL handler. IPDIVERT is entirely handled within the ipfw PFIL handlers. A packet to be diverted is checked if it is fragmented, if yes, ip_reass() gets in for reassembly. If not, or all fragments arrived and the packet is complete, divert_packet is called directly. For 'tee' no reassembly attempt is made and a copy of the packet is sent to the divert socket unmodified. The original packet continues its way through ip_input/output(). ipfw 'forward' is done via m_tag's. The ipfw PFIL handlers tag the packet with the new destination sockaddr_in. A check if the new destination is a local IP address is made and the m_flags are set appropriately. ip_input() and ip_output() have some more work to do here. For ip_input() the m_flags are checked and a packet for us is directly sent to the 'ours' section for further processing. Destination changes on the input path are only tagged and the 'srcrt' flag to ip_forward() is set to disable destination checks and ICMP replies at this stage. The tag is going to be handled on output. ip_output() again checks for m_flags and the 'ours' tag. If found, the packet will be dropped back to the IP netisr where it is going to be picked up by ip_input() again and the directly sent to the 'ours' section. When only the destination changes, the route's 'dst' is overwritten with the new destination from the forward m_tag. Then it jumps back at the route lookup again and skips the firewall check because it has been marked with M_SKIP_FIREWALL. ipfw 'forward' has to be compiled into the kernel with 'option IPFIREWALL_FORWARD' to enable it. DUMMYNET is entirely handled within the ipfw PFIL handlers. A packet for a dummynet pipe or queue is directly sent to dummynet_io(). Dummynet will then inject it back into ip_input/ip_output() after it has served its time. Dummynet packets are tagged and will continue from the next rule when they hit the ipfw PFIL handlers again after re-injection. BRIDGING and IPFW_ETHER are not changed yet and use ipfw_chk() directly as they did before. Later this will be changed to dedicated ETHER PFIL_HOOKS. More detailed changes to the code: conf/files Add netinet/ip_fw_pfil.c. conf/options Add IPFIREWALL_FORWARD option. modules/ipfw/Makefile Add ip_fw_pfil.c. net/bridge.c Disable PFIL_HOOKS if ipfw for bridging is active. Bridging ipfw is still directly invoked to handle layer2 headers and packets would get a double ipfw when run through PFIL_HOOKS as well. netinet/ip_divert.c Removed divert_clone() function. It is no longer used. netinet/ip_dummynet.[ch] Neither the route 'ro' nor the destination 'dst' need to be stored while in dummynet transit. Structure members and associated macros are removed. netinet/ip_fastfwd.c Removed all direct ipfw handling code and replace it with the new 'ipfw forward' handling code. netinet/ip_fw.h Removed 'ro' and 'dst' from struct ip_fw_args. netinet/ip_fw2.c (Re)moved some global variables and the module handling. netinet/ip_fw_pfil.c New file containing the ipfw PFIL handlers and module initialization. netinet/ip_input.c Removed all direct ipfw handling code and replace it with the new 'ipfw forward' handling code. ip_forward() does not longer require the 'next_hop' struct sockaddr_in argument. Disable early checks if 'srcrt' is set. netinet/ip_output.c Removed all direct ipfw handling code and replace it with the new 'ipfw forward' handling code. netinet/ip_var.h Add ip_reass() as general function. (Used from ipfw PFIL handlers for IPDIVERT.) netinet/raw_ip.c Directly check if ipfw and dummynet control pointers are active. netinet/tcp_input.c Rework the 'ipfw forward' to local code to work with the new way of forward tags. netinet/tcp_sack.c Remove include 'opt_ipfw.h' which is not needed here. sys/mbuf.h Remove m_claim_next() macro which was exclusively for ipfw 'forward' and is no longer needed. Approved by: re (scottl)
2004-08-17 22:05:54 +00:00
#endif
/*
* If unicast and destination host known, deliver to host's link,
* unless it is the same link as the packet came in on.
*/
if (!ctx.manycast) {
/* Determine packet destination link */
if ((host = ng_bridge_get(priv, eh->ether_dhost)) != NULL) {
link_p destLink = host->link;
/* If destination same as incoming link, do nothing */
if (destLink == ctx.incoming) {
NG_FREE_ITEM(item);
NG_FREE_M(ctx.m);
return (0);
}
/* Deliver packet out the destination link */
destLink->stats.xmitPackets++;
destLink->stats.xmitOctets += ctx.m->m_pkthdr.len;
NG_FWD_NEW_DATA(ctx.error, item, destLink->hook, ctx.m);
return (ctx.error);
}
/* Destination host is not known */
ctx.incoming->stats.recvUnknown++;
}
/* Distribute unknown, multicast, broadcast pkts to all other links */
NG_NODE_FOREACH_HOOK(node, ng_bridge_send_ctx, &ctx, ret);
/* If we never saw a good link, leave. */
if (ctx.foundFirst == NULL || ctx.error != 0) {
NG_FREE_ITEM(item);
NG_FREE_M(ctx.m);
return (ctx.error);
}
/*
* If we've sent all the others, send the original
* on the first link we found.
*/
NG_FWD_NEW_DATA(ctx.error, item, ctx.foundFirst->hook, ctx.m);
return (ctx.error);
}
/*
* Shutdown node
*/
static int
ng_bridge_shutdown(node_p node)
{
const priv_p priv = NG_NODE_PRIVATE(node);
/*
* Shut down everything including the timer. Even if the
* callout has already been dequeued and is about to be
* run, ng_bridge_timeout() won't be fired as the node
* is already marked NGF_INVALID, so we're safe to free
* the node now.
*/
KASSERT(priv->numLinks == 0 && priv->numHosts == 0,
("%s: numLinks=%d numHosts=%d",
__func__, priv->numLinks, priv->numHosts));
ng_uncallout(&priv->timer, node);
NG_NODE_SET_PRIVATE(node, NULL);
NG_NODE_UNREF(node);
free(priv->tab, M_NETGRAPH_BRIDGE);
free(priv, M_NETGRAPH_BRIDGE);
return (0);
}
/*
* Hook disconnection.
*/
static int
ng_bridge_disconnect(hook_p hook)
{
const priv_p priv = NG_NODE_PRIVATE(NG_HOOK_NODE(hook));
link_p link = NG_HOOK_PRIVATE(hook);
/* Remove all hosts associated with this link */
ng_bridge_remove_hosts(priv, link);
/* Free associated link information */
free(link, M_NETGRAPH_BRIDGE);
priv->numLinks--;
/* If no more hooks, go away */
if ((NG_NODE_NUMHOOKS(NG_HOOK_NODE(hook)) == 0)
&& (NG_NODE_IS_VALID(NG_HOOK_NODE(hook)))
&& !priv->persistent) {
ng_rmnode_self(NG_HOOK_NODE(hook));
}
return (0);
}
/******************************************************************
HASH TABLE FUNCTIONS
******************************************************************/
/*
* Hash algorithm
*/
#define HASH(addr,mask) ( (((const u_int16_t *)(addr))[0] \
^ ((const u_int16_t *)(addr))[1] \
^ ((const u_int16_t *)(addr))[2]) & (mask) )
/*
* Find a host entry in the table.
*/
static struct ng_bridge_host *
ng_bridge_get(priv_p priv, const u_char *addr)
{
const int bucket = HASH(addr, priv->hashMask);
struct ng_bridge_hent *hent;
SLIST_FOREACH(hent, &priv->tab[bucket], next) {
if (ETHER_EQUAL(hent->host.addr, addr))
return (&hent->host);
}
return (NULL);
}
/*
* Add a new host entry to the table. This assumes the host doesn't
* already exist in the table. Returns 1 on success, 0 if there
* was a memory allocation failure.
*/
static int
ng_bridge_put(priv_p priv, const u_char *addr, link_p link)
{
const int bucket = HASH(addr, priv->hashMask);
struct ng_bridge_hent *hent;
#ifdef INVARIANTS
/* Assert that entry does not already exist in hashtable */
SLIST_FOREACH(hent, &priv->tab[bucket], next) {
KASSERT(!ETHER_EQUAL(hent->host.addr, addr),
("%s: entry %6D exists in table", __func__, addr, ":"));
}
#endif
/* Allocate and initialize new hashtable entry */
hent = malloc(sizeof(*hent), M_NETGRAPH_BRIDGE, M_NOWAIT);
if (hent == NULL)
return (0);
bcopy(addr, hent->host.addr, ETHER_ADDR_LEN);
hent->host.link = link;
hent->host.staleness = 0;
hent->host.age = 0;
/* Add new element to hash bucket */
SLIST_INSERT_HEAD(&priv->tab[bucket], hent, next);
priv->numHosts++;
/* Resize table if necessary */
ng_bridge_rehash(priv);
return (1);
}
/*
* Resize the hash table. We try to maintain the number of buckets
* such that the load factor is in the range 0.25 to 1.0.
*
* If we can't get the new memory then we silently fail. This is OK
* because things will still work and we'll try again soon anyway.
*/
static void
ng_bridge_rehash(priv_p priv)
{
struct ng_bridge_bucket *newTab;
int oldBucket, newBucket;
int newNumBuckets;
u_int newMask;
/* Is table too full or too empty? */
if (priv->numHosts > priv->numBuckets
&& (priv->numBuckets << 1) <= MAX_BUCKETS)
newNumBuckets = priv->numBuckets << 1;
else if (priv->numHosts < (priv->numBuckets >> 2)
&& (priv->numBuckets >> 2) >= MIN_BUCKETS)
newNumBuckets = priv->numBuckets >> 2;
else
return;
newMask = newNumBuckets - 1;
/* Allocate and initialize new table */
newTab = malloc(newNumBuckets * sizeof(*newTab),
M_NETGRAPH_BRIDGE, M_NOWAIT | M_ZERO);
if (newTab == NULL)
return;
/* Move all entries from old table to new table */
for (oldBucket = 0; oldBucket < priv->numBuckets; oldBucket++) {
struct ng_bridge_bucket *const oldList = &priv->tab[oldBucket];
while (!SLIST_EMPTY(oldList)) {
struct ng_bridge_hent *const hent
= SLIST_FIRST(oldList);
SLIST_REMOVE_HEAD(oldList, next);
newBucket = HASH(hent->host.addr, newMask);
SLIST_INSERT_HEAD(&newTab[newBucket], hent, next);
}
}
/* Replace old table with new one */
if (priv->conf.debugLevel >= 3) {
log(LOG_INFO, "ng_bridge: %s: table size %d -> %d\n",
ng_bridge_nodename(priv->node),
priv->numBuckets, newNumBuckets);
}
free(priv->tab, M_NETGRAPH_BRIDGE);
priv->numBuckets = newNumBuckets;
priv->hashMask = newMask;
priv->tab = newTab;
return;
}
/******************************************************************
MISC FUNCTIONS
******************************************************************/
/*
* Remove all hosts associated with a specific link from the hashtable.
* If linkNum == -1, then remove all hosts in the table.
*/
static void
ng_bridge_remove_hosts(priv_p priv, link_p link)
{
int bucket;
for (bucket = 0; bucket < priv->numBuckets; bucket++) {
struct ng_bridge_hent **hptr = &SLIST_FIRST(&priv->tab[bucket]);
while (*hptr != NULL) {
struct ng_bridge_hent *const hent = *hptr;
if (link == NULL || hent->host.link == link) {
*hptr = SLIST_NEXT(hent, next);
free(hent, M_NETGRAPH_BRIDGE);
priv->numHosts--;
} else
hptr = &SLIST_NEXT(hent, next);
}
}
}
/*
* Handle our once-per-second timeout event. We do two things:
* we decrement link->loopCount for those links being muted due to
* a detected loopback condition, and we remove any hosts from
* the hashtable whom we haven't heard from in a long while.
*/
static int
ng_bridge_unmute(hook_p hook, void *arg)
{
link_p link = NG_HOOK_PRIVATE(hook);
node_p node = NG_HOOK_NODE(hook);
priv_p priv = NG_NODE_PRIVATE(node);
int *counter = arg;
if (link->loopCount != 0) {
link->loopCount--;
if (link->loopCount == 0 && priv->conf.debugLevel >= 2) {
log(LOG_INFO, "ng_bridge: %s:"
" restoring looped back %s\n",
ng_bridge_nodename(node), NG_HOOK_NAME(hook));
}
}
(*counter)++;
return (1);
}
static void
ng_bridge_timeout(node_p node, hook_p hook, void *arg1, int arg2)
{
const priv_p priv = NG_NODE_PRIVATE(node);
int bucket;
int counter = 0;
hook_p ret;
/* Update host time counters and remove stale entries */
for (bucket = 0; bucket < priv->numBuckets; bucket++) {
struct ng_bridge_hent **hptr = &SLIST_FIRST(&priv->tab[bucket]);
while (*hptr != NULL) {
struct ng_bridge_hent *const hent = *hptr;
/* Remove hosts we haven't heard from in a while */
if (++hent->host.staleness >= priv->conf.maxStaleness) {
*hptr = SLIST_NEXT(hent, next);
free(hent, M_NETGRAPH_BRIDGE);
priv->numHosts--;
} else {
if (hent->host.age < 0xffff)
hent->host.age++;
hptr = &SLIST_NEXT(hent, next);
counter++;
}
}
}
KASSERT(priv->numHosts == counter,
("%s: hosts: %d != %d", __func__, priv->numHosts, counter));
/* Decrease table size if necessary */
ng_bridge_rehash(priv);
/* Decrease loop counter on muted looped back links */
counter = 0;
NG_NODE_FOREACH_HOOK(node, ng_bridge_unmute, &counter, ret);
KASSERT(priv->numLinks == counter,
("%s: links: %d != %d", __func__, priv->numLinks, counter));
/* Register a new timeout, keeping the existing node reference */
ng_callout(&priv->timer, node, NULL, hz, ng_bridge_timeout, NULL, 0);
}
/*
* Return node's "name", even if it doesn't have one.
*/
static const char *
ng_bridge_nodename(node_p node)
{
static char name[NG_NODESIZ];
if (NG_NODE_HAS_NAME(node))
snprintf(name, sizeof(name), "%s", NG_NODE_NAME(node));
else
snprintf(name, sizeof(name), "[%x]", ng_node2ID(node));
return name;
}