2005-01-07 01:45:51 +00:00
|
|
|
/*-
|
1994-05-24 10:09:53 +00:00
|
|
|
* Copyright (c) 1982, 1989, 1993
|
|
|
|
* The Regents of the University of California. All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* 4. Neither the name of the University nor the names of its contributors
|
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*
|
|
|
|
* @(#)if_ethersubr.c 8.1 (Berkeley) 6/10/93
|
1999-08-28 01:08:13 +00:00
|
|
|
* $FreeBSD$
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
|
|
|
|
1998-01-09 00:51:57 +00:00
|
|
|
#include "opt_atalk.h"
|
1998-01-08 23:42:31 +00:00
|
|
|
#include "opt_inet.h"
|
1999-12-07 17:39:16 +00:00
|
|
|
#include "opt_inet6.h"
|
1997-12-15 20:31:25 +00:00
|
|
|
#include "opt_ipx.h"
|
1999-10-21 09:06:11 +00:00
|
|
|
#include "opt_netgraph.h"
|
2005-02-22 13:04:05 +00:00
|
|
|
#include "opt_carp.h"
|
2008-04-29 21:23:21 +00:00
|
|
|
#include "opt_mbuf_profiling.h"
|
1997-12-15 20:31:25 +00:00
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <sys/param.h>
|
|
|
|
#include <sys/systm.h>
|
|
|
|
#include <sys/kernel.h>
|
Conditionally compile out V_ globals while instantiating the appropriate
container structures, depending on VIMAGE_GLOBALS compile time option.
Make VIMAGE_GLOBALS a new compile-time option, which by default will not
be defined, resulting in instatiations of global variables selected for
V_irtualization (enclosed in #ifdef VIMAGE_GLOBALS blocks) to be
effectively compiled out. Instantiate new global container structures
to hold V_irtualized variables: vnet_net_0, vnet_inet_0, vnet_inet6_0,
vnet_ipsec_0, vnet_netgraph_0, and vnet_gif_0.
Update the VSYM() macro so that depending on VIMAGE_GLOBALS the V_
macros resolve either to the original globals, or to fields inside
container structures, i.e. effectively
#ifdef VIMAGE_GLOBALS
#define V_rt_tables rt_tables
#else
#define V_rt_tables vnet_net_0._rt_tables
#endif
Update SYSCTL_V_*() macros to operate either on globals or on fields
inside container structs.
Extend the internal kldsym() lookups with the ability to resolve
selected fields inside the virtualization container structs. This
applies only to the fields which are explicitly registered for kldsym()
visibility via VNET_MOD_DECLARE() and vnet_mod_register(), currently
this is done only in sys/net/if.c.
Fix a few broken instances of MODULE_GLOBAL() macro use in SCTP code,
and modify the MODULE_GLOBAL() macro to resolve to V_ macros, which in
turn result in proper code being generated depending on VIMAGE_GLOBALS.
De-virtualize local static variables in sys/contrib/pf/net/pf_subr.c
which were prematurely V_irtualized by automated V_ prepending scripts
during earlier merging steps. PF virtualization will be done
separately, most probably after next PF import.
Convert a few variable initializations at instantiation to
initialization in init functions, most notably in ipfw. Also convert
TUNABLE_INT() initializers for V_ variables to TUNABLE_FETCH_INT() in
initializer functions.
Discussed at: devsummit Strassburg
Reviewed by: bz, julian
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-12-10 23:12:39 +00:00
|
|
|
#include <sys/lock.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <sys/malloc.h>
|
2004-05-30 17:57:46 +00:00
|
|
|
#include <sys/module.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <sys/mbuf.h>
|
2001-02-18 17:54:52 +00:00
|
|
|
#include <sys/random.h>
|
Conditionally compile out V_ globals while instantiating the appropriate
container structures, depending on VIMAGE_GLOBALS compile time option.
Make VIMAGE_GLOBALS a new compile-time option, which by default will not
be defined, resulting in instatiations of global variables selected for
V_irtualization (enclosed in #ifdef VIMAGE_GLOBALS blocks) to be
effectively compiled out. Instantiate new global container structures
to hold V_irtualized variables: vnet_net_0, vnet_inet_0, vnet_inet6_0,
vnet_ipsec_0, vnet_netgraph_0, and vnet_gif_0.
Update the VSYM() macro so that depending on VIMAGE_GLOBALS the V_
macros resolve either to the original globals, or to fields inside
container structures, i.e. effectively
#ifdef VIMAGE_GLOBALS
#define V_rt_tables rt_tables
#else
#define V_rt_tables vnet_net_0._rt_tables
#endif
Update SYSCTL_V_*() macros to operate either on globals or on fields
inside container structs.
Extend the internal kldsym() lookups with the ability to resolve
selected fields inside the virtualization container structs. This
applies only to the fields which are explicitly registered for kldsym()
visibility via VNET_MOD_DECLARE() and vnet_mod_register(), currently
this is done only in sys/net/if.c.
Fix a few broken instances of MODULE_GLOBAL() macro use in SCTP code,
and modify the MODULE_GLOBAL() macro to resolve to V_ macros, which in
turn result in proper code being generated depending on VIMAGE_GLOBALS.
De-virtualize local static variables in sys/contrib/pf/net/pf_subr.c
which were prematurely V_irtualized by automated V_ prepending scripts
during earlier merging steps. PF virtualization will be done
separately, most probably after next PF import.
Convert a few variable initializations at instantiation to
initialization in init functions, most notably in ipfw. Also convert
TUNABLE_INT() initializers for V_ variables to TUNABLE_FETCH_INT() in
initializer functions.
Discussed at: devsummit Strassburg
Reviewed by: bz, julian
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-12-10 23:12:39 +00:00
|
|
|
#include <sys/rwlock.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <sys/socket.h>
|
1997-03-24 11:33:46 +00:00
|
|
|
#include <sys/sockio.h>
|
1995-12-20 21:53:53 +00:00
|
|
|
#include <sys/sysctl.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
|
|
|
|
#include <net/if.h>
|
2004-06-24 10:58:08 +00:00
|
|
|
#include <net/if_arp.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <net/netisr.h>
|
|
|
|
#include <net/route.h>
|
|
|
|
#include <net/if_llc.h>
|
|
|
|
#include <net/if_dl.h>
|
|
|
|
#include <net/if_types.h>
|
2000-05-14 02:18:43 +00:00
|
|
|
#include <net/bpf.h>
|
2000-06-26 23:34:54 +00:00
|
|
|
#include <net/ethernet.h>
|
2005-10-14 02:38:47 +00:00
|
|
|
#include <net/if_bridgevar.h>
|
2002-11-14 23:35:06 +00:00
|
|
|
#include <net/if_vlan_var.h>
|
This main goals of this project are:
1. separating L2 tables (ARP, NDP) from the L3 routing tables
2. removing as much locking dependencies among these layers as
possible to allow for some parallelism in the search operations
3. simplify the logic in the routing code,
The most notable end result is the obsolescent of the route
cloning (RTF_CLONING) concept, which translated into code reduction
in both IPv4 ARP and IPv6 NDP related modules, and size reduction in
struct rtentry{}. The change in design obsoletes the semantics of
RTF_CLONING, RTF_WASCLONE and RTF_LLINFO routing flags. The userland
applications such as "arp" and "ndp" have been modified to reflect
those changes. The output from "netstat -r" shows only the routing
entries.
Quite a few developers have contributed to this project in the
past: Glebius Smirnoff, Luigi Rizzo, Alessandro Cerri, and
Andre Oppermann. And most recently:
- Kip Macy revised the locking code completely, thus completing
the last piece of the puzzle, Kip has also been conducting
active functional testing
- Sam Leffler has helped me improving/refactoring the code, and
provided valuable reviews
- Julian Elischer setup the perforce tree for me and has helped
me maintaining that branch before the svn conversion
2008-12-15 06:10:57 +00:00
|
|
|
#include <net/if_llatbl.h>
|
2007-07-03 12:46:08 +00:00
|
|
|
#include <net/pf_mtag.h>
|
2008-12-02 21:37:28 +00:00
|
|
|
#include <net/vnet.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
|
1999-11-22 02:45:11 +00:00
|
|
|
#if defined(INET) || defined(INET6)
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <netinet/in.h>
|
|
|
|
#include <netinet/in_var.h>
|
|
|
|
#include <netinet/if_ether.h>
|
2002-05-13 10:37:19 +00:00
|
|
|
#include <netinet/ip_fw.h>
|
2009-12-15 16:15:14 +00:00
|
|
|
#include <netinet/ipfw/ip_fw_private.h>
|
2002-05-13 10:37:19 +00:00
|
|
|
#include <netinet/ip_dummynet.h>
|
Build on Jeff Roberson's linker-set based dynamic per-CPU allocator
(DPCPU), as suggested by Peter Wemm, and implement a new per-virtual
network stack memory allocator. Modify vnet to use the allocator
instead of monolithic global container structures (vinet, ...). This
change solves many binary compatibility problems associated with
VIMAGE, and restores ELF symbols for virtualized global variables.
Each virtualized global variable exists as a "reference copy", and also
once per virtual network stack. Virtualized global variables are
tagged at compile-time, placing the in a special linker set, which is
loaded into a contiguous region of kernel memory. Virtualized global
variables in the base kernel are linked as normal, but those in modules
are copied and relocated to a reserved portion of the kernel's vnet
region with the help of a the kernel linker.
Virtualized global variables exist in per-vnet memory set up when the
network stack instance is created, and are initialized statically from
the reference copy. Run-time access occurs via an accessor macro, which
converts from the current vnet and requested symbol to a per-vnet
address. When "options VIMAGE" is not compiled into the kernel, normal
global ELF symbols will be used instead and indirection is avoided.
This change restores static initialization for network stack global
variables, restores support for non-global symbols and types, eliminates
the need for many subsystem constructors, eliminates large per-subsystem
structures that caused many binary compatibility issues both for
monitoring applications (netstat) and kernel modules, removes the
per-function INIT_VNET_*() macros throughout the stack, eliminates the
need for vnet_symmap ksym(2) munging, and eliminates duplicate
definitions of virtualized globals under VIMAGE_GLOBALS.
Bump __FreeBSD_version and update UPDATING.
Portions submitted by: bz
Reviewed by: bz, zec
Discussed with: gnn, jamie, jeff, jhb, julian, sam
Suggested by: peter
Approved by: re (kensmith)
2009-07-14 22:48:30 +00:00
|
|
|
#include <netinet/ip_var.h>
|
1998-01-08 23:42:31 +00:00
|
|
|
#endif
|
1999-11-22 02:45:11 +00:00
|
|
|
#ifdef INET6
|
|
|
|
#include <netinet6/nd6.h>
|
|
|
|
#endif
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2009-06-11 10:26:38 +00:00
|
|
|
#if defined(INET) || defined(INET6)
|
2005-02-22 13:04:05 +00:00
|
|
|
#ifdef DEV_CARP
|
|
|
|
#include <netinet/ip_carp.h>
|
|
|
|
#endif
|
2009-06-11 10:26:38 +00:00
|
|
|
#endif
|
2005-02-22 13:04:05 +00:00
|
|
|
|
1995-10-26 20:31:59 +00:00
|
|
|
#ifdef IPX
|
|
|
|
#include <netipx/ipx.h>
|
|
|
|
#include <netipx/ipx_if.h>
|
2005-03-06 22:59:40 +00:00
|
|
|
#endif
|
This main goals of this project are:
1. separating L2 tables (ARP, NDP) from the L3 routing tables
2. removing as much locking dependencies among these layers as
possible to allow for some parallelism in the search operations
3. simplify the logic in the routing code,
The most notable end result is the obsolescent of the route
cloning (RTF_CLONING) concept, which translated into code reduction
in both IPv4 ARP and IPv6 NDP related modules, and size reduction in
struct rtentry{}. The change in design obsoletes the semantics of
RTF_CLONING, RTF_WASCLONE and RTF_LLINFO routing flags. The userland
applications such as "arp" and "ndp" have been modified to reflect
those changes. The output from "netstat -r" shows only the routing
entries.
Quite a few developers have contributed to this project in the
past: Glebius Smirnoff, Luigi Rizzo, Alessandro Cerri, and
Andre Oppermann. And most recently:
- Kip Macy revised the locking code completely, thus completing
the last piece of the puzzle, Kip has also been conducting
active functional testing
- Sam Leffler has helped me improving/refactoring the code, and
provided valuable reviews
- Julian Elischer setup the perforce tree for me and has helped
me maintaining that branch before the svn conversion
2008-12-15 06:10:57 +00:00
|
|
|
|
1999-12-13 16:24:22 +00:00
|
|
|
int (*ef_inputp)(struct ifnet*, struct ether_header *eh, struct mbuf *m);
|
2000-04-27 10:13:52 +00:00
|
|
|
int (*ef_outputp)(struct ifnet *ifp, struct mbuf **mp,
|
2000-09-30 14:33:53 +00:00
|
|
|
struct sockaddr *dst, short *tp, int *hlen);
|
1995-10-26 20:31:59 +00:00
|
|
|
|
1996-05-24 01:35:45 +00:00
|
|
|
#ifdef NETATALK
|
|
|
|
#include <netatalk/at.h>
|
|
|
|
#include <netatalk/at_var.h>
|
|
|
|
#include <netatalk/at_extern.h>
|
|
|
|
|
|
|
|
#define llc_snap_org_code llc_un.type_snap.org_code
|
|
|
|
#define llc_snap_ether_type llc_un.type_snap.ether_type
|
|
|
|
|
1998-03-18 01:40:12 +00:00
|
|
|
extern u_char at_org_code[3];
|
|
|
|
extern u_char aarp_org_code[3];
|
|
|
|
#endif /* NETATALK */
|
|
|
|
|
2006-10-22 11:52:19 +00:00
|
|
|
#include <security/mac/mac_framework.h>
|
|
|
|
|
2008-08-27 17:10:37 +00:00
|
|
|
#ifdef CTASSERT
|
|
|
|
CTASSERT(sizeof (struct ether_header) == ETHER_ADDR_LEN * 2 + 2);
|
|
|
|
CTASSERT(sizeof (struct ether_addr) == ETHER_ADDR_LEN);
|
|
|
|
#endif
|
|
|
|
|
2000-06-26 23:34:54 +00:00
|
|
|
/* netgraph node hooks for ng_ether(4) */
|
2002-11-14 23:35:06 +00:00
|
|
|
void (*ng_ether_input_p)(struct ifnet *ifp, struct mbuf **mp);
|
|
|
|
void (*ng_ether_input_orphan_p)(struct ifnet *ifp, struct mbuf *m);
|
2000-06-26 23:34:54 +00:00
|
|
|
int (*ng_ether_output_p)(struct ifnet *ifp, struct mbuf **mp);
|
|
|
|
void (*ng_ether_attach_p)(struct ifnet *ifp);
|
|
|
|
void (*ng_ether_detach_p)(struct ifnet *ifp);
|
|
|
|
|
2003-05-05 09:15:50 +00:00
|
|
|
void (*vlan_input_p)(struct ifnet *, struct mbuf *);
|
2001-09-05 21:10:28 +00:00
|
|
|
|
2005-10-14 02:38:47 +00:00
|
|
|
/* if_bridge(4) support */
|
2005-06-05 03:13:13 +00:00
|
|
|
struct mbuf *(*bridge_input_p)(struct ifnet *, struct mbuf *);
|
2005-06-10 01:25:22 +00:00
|
|
|
int (*bridge_output_p)(struct ifnet *, struct mbuf *,
|
2005-06-05 03:13:13 +00:00
|
|
|
struct sockaddr *, struct rtentry *);
|
2005-06-10 01:25:22 +00:00
|
|
|
void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
|
2005-06-05 03:13:13 +00:00
|
|
|
|
2007-04-17 00:35:11 +00:00
|
|
|
/* if_lagg(4) support */
|
|
|
|
struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *);
|
2007-04-10 00:27:25 +00:00
|
|
|
|
2004-03-09 23:55:59 +00:00
|
|
|
static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
|
2003-03-21 17:53:16 +00:00
|
|
|
{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
|
|
|
|
|
2002-03-19 21:54:18 +00:00
|
|
|
static int ether_resolvemulti(struct ifnet *, struct sockaddr **,
|
2002-06-23 11:19:53 +00:00
|
|
|
struct sockaddr *);
|
2003-03-21 17:53:16 +00:00
|
|
|
|
2005-06-10 16:49:24 +00:00
|
|
|
/* XXX: should be in an arp support file, not here */
|
|
|
|
MALLOC_DEFINE(M_ARPCOM, "arpcom", "802.* interface internals");
|
|
|
|
|
2007-03-19 18:39:36 +00:00
|
|
|
#define ETHER_IS_BROADCAST(addr) \
|
|
|
|
(bcmp(etherbroadcastaddr, (addr), ETHER_ADDR_LEN) == 0)
|
|
|
|
|
1999-01-31 08:17:16 +00:00
|
|
|
#define senderr(e) do { error = (e); goto bad;} while (0)
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2004-06-24 10:58:08 +00:00
|
|
|
#if defined(INET) || defined(INET6)
|
2002-05-13 10:37:19 +00:00
|
|
|
int
|
2009-06-09 21:27:11 +00:00
|
|
|
ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst, int shared);
|
Build on Jeff Roberson's linker-set based dynamic per-CPU allocator
(DPCPU), as suggested by Peter Wemm, and implement a new per-virtual
network stack memory allocator. Modify vnet to use the allocator
instead of monolithic global container structures (vinet, ...). This
change solves many binary compatibility problems associated with
VIMAGE, and restores ELF symbols for virtualized global variables.
Each virtualized global variable exists as a "reference copy", and also
once per virtual network stack. Virtualized global variables are
tagged at compile-time, placing the in a special linker set, which is
loaded into a contiguous region of kernel memory. Virtualized global
variables in the base kernel are linked as normal, but those in modules
are copied and relocated to a reserved portion of the kernel's vnet
region with the help of a the kernel linker.
Virtualized global variables exist in per-vnet memory set up when the
network stack instance is created, and are initialized statically from
the reference copy. Run-time access occurs via an accessor macro, which
converts from the current vnet and requested symbol to a per-vnet
address. When "options VIMAGE" is not compiled into the kernel, normal
global ELF symbols will be used instead and indirection is avoided.
This change restores static initialization for network stack global
variables, restores support for non-global symbols and types, eliminates
the need for many subsystem constructors, eliminates large per-subsystem
structures that caused many binary compatibility issues both for
monitoring applications (netstat) and kernel modules, removes the
per-function INIT_VNET_*() macros throughout the stack, eliminates the
need for vnet_symmap ksym(2) munging, and eliminates duplicate
definitions of virtualized globals under VIMAGE_GLOBALS.
Bump __FreeBSD_version and update UPDATING.
Portions submitted by: bz
Reviewed by: bz, zec
Discussed with: gnn, jamie, jeff, jhb, julian, sam
Suggested by: peter
Approved by: re (kensmith)
2009-07-14 22:48:30 +00:00
|
|
|
static VNET_DEFINE(int, ether_ipfw);
|
2009-07-16 21:13:04 +00:00
|
|
|
#define V_ether_ipfw VNET(ether_ipfw)
|
2008-11-19 09:39:34 +00:00
|
|
|
#endif
|
2002-05-13 10:37:19 +00:00
|
|
|
|
This main goals of this project are:
1. separating L2 tables (ARP, NDP) from the L3 routing tables
2. removing as much locking dependencies among these layers as
possible to allow for some parallelism in the search operations
3. simplify the logic in the routing code,
The most notable end result is the obsolescent of the route
cloning (RTF_CLONING) concept, which translated into code reduction
in both IPv4 ARP and IPv6 NDP related modules, and size reduction in
struct rtentry{}. The change in design obsoletes the semantics of
RTF_CLONING, RTF_WASCLONE and RTF_LLINFO routing flags. The userland
applications such as "arp" and "ndp" have been modified to reflect
those changes. The output from "netstat -r" shows only the routing
entries.
Quite a few developers have contributed to this project in the
past: Glebius Smirnoff, Luigi Rizzo, Alessandro Cerri, and
Andre Oppermann. And most recently:
- Kip Macy revised the locking code completely, thus completing
the last piece of the puzzle, Kip has also been conducting
active functional testing
- Sam Leffler has helped me improving/refactoring the code, and
provided valuable reviews
- Julian Elischer setup the perforce tree for me and has helped
me maintaining that branch before the svn conversion
2008-12-15 06:10:57 +00:00
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* Ethernet output routine.
|
|
|
|
* Encapsulate a packet of type family for the local net.
|
|
|
|
* Use trailer local net encapsulation if enough data in first
|
|
|
|
* packet leaves a multiple of 512 bytes of data in remainder.
|
|
|
|
*/
|
|
|
|
int
|
2003-10-23 13:49:10 +00:00
|
|
|
ether_output(struct ifnet *ifp, struct mbuf *m,
|
2009-04-16 20:30:28 +00:00
|
|
|
struct sockaddr *dst, struct route *ro)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
|
|
|
short type;
|
2009-04-16 20:30:28 +00:00
|
|
|
int error = 0, hdrcmplt = 0;
|
2003-10-23 15:09:34 +00:00
|
|
|
u_char esrc[ETHER_ADDR_LEN], edst[ETHER_ADDR_LEN];
|
This main goals of this project are:
1. separating L2 tables (ARP, NDP) from the L3 routing tables
2. removing as much locking dependencies among these layers as
possible to allow for some parallelism in the search operations
3. simplify the logic in the routing code,
The most notable end result is the obsolescent of the route
cloning (RTF_CLONING) concept, which translated into code reduction
in both IPv4 ARP and IPv6 NDP related modules, and size reduction in
struct rtentry{}. The change in design obsoletes the semantics of
RTF_CLONING, RTF_WASCLONE and RTF_LLINFO routing flags. The userland
applications such as "arp" and "ndp" have been modified to reflect
those changes. The output from "netstat -r" shows only the routing
entries.
Quite a few developers have contributed to this project in the
past: Glebius Smirnoff, Luigi Rizzo, Alessandro Cerri, and
Andre Oppermann. And most recently:
- Kip Macy revised the locking code completely, thus completing
the last piece of the puzzle, Kip has also been conducting
active functional testing
- Sam Leffler has helped me improving/refactoring the code, and
provided valuable reviews
- Julian Elischer setup the perforce tree for me and has helped
me maintaining that branch before the svn conversion
2008-12-15 06:10:57 +00:00
|
|
|
struct llentry *lle = NULL;
|
2009-04-16 20:30:28 +00:00
|
|
|
struct rtentry *rt0 = NULL;
|
2003-03-03 00:21:52 +00:00
|
|
|
struct ether_header *eh;
|
2007-07-03 12:46:08 +00:00
|
|
|
struct pf_mtag *t;
|
2005-12-22 12:16:20 +00:00
|
|
|
int loop_copy = 1;
|
2003-10-12 20:51:26 +00:00
|
|
|
int hlen; /* link layer header length */
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2009-04-16 20:30:28 +00:00
|
|
|
if (ro != NULL) {
|
2009-07-28 17:16:54 +00:00
|
|
|
if (!(m->m_flags & (M_BCAST | M_MCAST)))
|
|
|
|
lle = ro->ro_lle;
|
2009-04-16 20:30:28 +00:00
|
|
|
rt0 = ro->ro_rt;
|
|
|
|
}
|
2002-07-31 16:22:02 +00:00
|
|
|
#ifdef MAC
|
2007-10-24 19:04:04 +00:00
|
|
|
error = mac_ifnet_check_transmit(ifp, m);
|
2002-07-31 16:22:02 +00:00
|
|
|
if (error)
|
|
|
|
senderr(error);
|
|
|
|
#endif
|
|
|
|
|
2008-04-29 21:23:21 +00:00
|
|
|
M_PROFILE(m);
|
2002-09-27 18:57:47 +00:00
|
|
|
if (ifp->if_flags & IFF_MONITOR)
|
|
|
|
senderr(ENETDOWN);
|
2005-08-09 10:20:02 +00:00
|
|
|
if (!((ifp->if_flags & IFF_UP) &&
|
|
|
|
(ifp->if_drv_flags & IFF_DRV_RUNNING)))
|
1994-05-24 10:09:53 +00:00
|
|
|
senderr(ENETDOWN);
|
2003-03-02 21:34:37 +00:00
|
|
|
|
1998-08-04 23:17:05 +00:00
|
|
|
hlen = ETHER_HDR_LEN;
|
1994-05-24 10:09:53 +00:00
|
|
|
switch (dst->sa_family) {
|
|
|
|
#ifdef INET
|
|
|
|
case AF_INET:
|
2009-04-16 20:30:28 +00:00
|
|
|
if (lle != NULL && (lle->la_flags & LLE_VALID))
|
|
|
|
memcpy(edst, &lle->ll_addr.mac16, sizeof(edst));
|
|
|
|
else
|
|
|
|
error = arpresolve(ifp, rt0, m, dst, edst, &lle);
|
This commit does two things:
1. rt_check() cleanup:
rt_check() is only necessary for some address families to gain access
to the corresponding arp entry, so call it only in/near the *resolve()
routines where it is actually used -- at the moment this is
arpresolve(), nd6_storelladdr() (the call is embedded here),
and atmresolve() (the call is just before atmresolve to reduce
the number of changes).
This change will make it a lot easier to decouple the arp table
from the routing table.
There is an extra call to rt_check() in if_iso88025subr.c to
determine the routing info length. I have left it alone for
the time being.
The interface of arpresolve() and nd6_storelladdr() now changes slightly:
+ the 'rtentry' parameter (really a hint from the upper level layer)
is now passed unchanged from *_output(), so it becomes the route
to the final destination and not to the gateway.
+ the routines will return 0 if resolution is possible, non-zero
otherwise.
+ arpresolve() returns EWOULDBLOCK in case the mbuf is being held
waiting for an arp reply -- in this case the error code is masked
in the caller so the upper layer protocol will not see a failure.
2. arpcom untangling
Where possible, use 'struct ifnet' instead of 'struct arpcom' variables,
and use the IFP2AC macro to access arpcom fields.
This mostly affects the netatalk code.
=== Detailed changes: ===
net/if_arcsubr.c
rt_check() cleanup, remove a useless variable
net/if_atmsubr.c
rt_check() cleanup
net/if_ethersubr.c
rt_check() cleanup, arpcom untangling
net/if_fddisubr.c
rt_check() cleanup, arpcom untangling
net/if_iso88025subr.c
rt_check() cleanup
netatalk/aarp.c
arpcom untangling, remove a block of duplicated code
netatalk/at_extern.h
arpcom untangling
netinet/if_ether.c
rt_check() cleanup (change arpresolve)
netinet6/nd6.c
rt_check() cleanup (change nd6_storelladdr)
2004-04-25 09:24:52 +00:00
|
|
|
if (error)
|
|
|
|
return (error == EWOULDBLOCK ? 0 : error);
|
1996-06-13 02:54:19 +00:00
|
|
|
type = htons(ETHERTYPE_IP);
|
1994-05-24 10:09:53 +00:00
|
|
|
break;
|
2004-03-14 05:24:54 +00:00
|
|
|
case AF_ARP:
|
|
|
|
{
|
|
|
|
struct arphdr *ah;
|
|
|
|
ah = mtod(m, struct arphdr *);
|
|
|
|
ah->ar_hrd = htons(ARPHRD_ETHER);
|
|
|
|
|
2005-12-22 12:16:20 +00:00
|
|
|
loop_copy = 0; /* if this is for us, don't do it */
|
2004-03-14 05:24:54 +00:00
|
|
|
|
|
|
|
switch(ntohs(ah->ar_op)) {
|
|
|
|
case ARPOP_REVREQUEST:
|
|
|
|
case ARPOP_REVREPLY:
|
|
|
|
type = htons(ETHERTYPE_REVARP);
|
|
|
|
break;
|
|
|
|
case ARPOP_REQUEST:
|
|
|
|
case ARPOP_REPLY:
|
|
|
|
default:
|
|
|
|
type = htons(ETHERTYPE_ARP);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (m->m_flags & M_BCAST)
|
|
|
|
bcopy(ifp->if_broadcastaddr, edst, ETHER_ADDR_LEN);
|
|
|
|
else
|
|
|
|
bcopy(ar_tha(ah), edst, ETHER_ADDR_LEN);
|
|
|
|
|
|
|
|
}
|
|
|
|
break;
|
1994-05-24 10:09:53 +00:00
|
|
|
#endif
|
1999-11-22 02:45:11 +00:00
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
2009-04-16 20:30:28 +00:00
|
|
|
if (lle != NULL && (lle->la_flags & LLE_VALID))
|
|
|
|
memcpy(edst, &lle->ll_addr.mac16, sizeof(edst));
|
|
|
|
else
|
|
|
|
error = nd6_storelladdr(ifp, m, dst, (u_char *)edst, &lle);
|
This commit does two things:
1. rt_check() cleanup:
rt_check() is only necessary for some address families to gain access
to the corresponding arp entry, so call it only in/near the *resolve()
routines where it is actually used -- at the moment this is
arpresolve(), nd6_storelladdr() (the call is embedded here),
and atmresolve() (the call is just before atmresolve to reduce
the number of changes).
This change will make it a lot easier to decouple the arp table
from the routing table.
There is an extra call to rt_check() in if_iso88025subr.c to
determine the routing info length. I have left it alone for
the time being.
The interface of arpresolve() and nd6_storelladdr() now changes slightly:
+ the 'rtentry' parameter (really a hint from the upper level layer)
is now passed unchanged from *_output(), so it becomes the route
to the final destination and not to the gateway.
+ the routines will return 0 if resolution is possible, non-zero
otherwise.
+ arpresolve() returns EWOULDBLOCK in case the mbuf is being held
waiting for an arp reply -- in this case the error code is masked
in the caller so the upper layer protocol will not see a failure.
2. arpcom untangling
Where possible, use 'struct ifnet' instead of 'struct arpcom' variables,
and use the IFP2AC macro to access arpcom fields.
This mostly affects the netatalk code.
=== Detailed changes: ===
net/if_arcsubr.c
rt_check() cleanup, remove a useless variable
net/if_atmsubr.c
rt_check() cleanup
net/if_ethersubr.c
rt_check() cleanup, arpcom untangling
net/if_fddisubr.c
rt_check() cleanup, arpcom untangling
net/if_iso88025subr.c
rt_check() cleanup
netatalk/aarp.c
arpcom untangling, remove a block of duplicated code
netatalk/at_extern.h
arpcom untangling
netinet/if_ether.c
rt_check() cleanup (change arpresolve)
netinet6/nd6.c
rt_check() cleanup (change nd6_storelladdr)
2004-04-25 09:24:52 +00:00
|
|
|
if (error)
|
|
|
|
return error;
|
1999-11-22 02:45:11 +00:00
|
|
|
type = htons(ETHERTYPE_IPV6);
|
|
|
|
break;
|
|
|
|
#endif
|
1995-10-26 20:31:59 +00:00
|
|
|
#ifdef IPX
|
|
|
|
case AF_IPX:
|
1999-12-13 16:24:22 +00:00
|
|
|
if (ef_outputp) {
|
2000-09-30 14:33:53 +00:00
|
|
|
error = ef_outputp(ifp, &m, dst, &type, &hlen);
|
2000-04-27 10:13:52 +00:00
|
|
|
if (error)
|
|
|
|
goto bad;
|
1999-12-13 16:24:22 +00:00
|
|
|
} else
|
|
|
|
type = htons(ETHERTYPE_IPX);
|
2003-10-23 13:49:10 +00:00
|
|
|
bcopy((caddr_t)&(((struct sockaddr_ipx *)dst)->sipx_addr.x_host),
|
1995-10-26 20:31:59 +00:00
|
|
|
(caddr_t)edst, sizeof (edst));
|
|
|
|
break;
|
|
|
|
#endif
|
1996-05-24 01:35:45 +00:00
|
|
|
#ifdef NETATALK
|
|
|
|
case AF_APPLETALK:
|
1998-06-12 03:48:19 +00:00
|
|
|
{
|
|
|
|
struct at_ifaddr *aa;
|
1997-10-29 00:30:52 +00:00
|
|
|
|
This commit does two things:
1. rt_check() cleanup:
rt_check() is only necessary for some address families to gain access
to the corresponding arp entry, so call it only in/near the *resolve()
routines where it is actually used -- at the moment this is
arpresolve(), nd6_storelladdr() (the call is embedded here),
and atmresolve() (the call is just before atmresolve to reduce
the number of changes).
This change will make it a lot easier to decouple the arp table
from the routing table.
There is an extra call to rt_check() in if_iso88025subr.c to
determine the routing info length. I have left it alone for
the time being.
The interface of arpresolve() and nd6_storelladdr() now changes slightly:
+ the 'rtentry' parameter (really a hint from the upper level layer)
is now passed unchanged from *_output(), so it becomes the route
to the final destination and not to the gateway.
+ the routines will return 0 if resolution is possible, non-zero
otherwise.
+ arpresolve() returns EWOULDBLOCK in case the mbuf is being held
waiting for an arp reply -- in this case the error code is masked
in the caller so the upper layer protocol will not see a failure.
2. arpcom untangling
Where possible, use 'struct ifnet' instead of 'struct arpcom' variables,
and use the IFP2AC macro to access arpcom fields.
This mostly affects the netatalk code.
=== Detailed changes: ===
net/if_arcsubr.c
rt_check() cleanup, remove a useless variable
net/if_atmsubr.c
rt_check() cleanup
net/if_ethersubr.c
rt_check() cleanup, arpcom untangling
net/if_fddisubr.c
rt_check() cleanup, arpcom untangling
net/if_iso88025subr.c
rt_check() cleanup
netatalk/aarp.c
arpcom untangling, remove a block of duplicated code
netatalk/at_extern.h
arpcom untangling
netinet/if_ether.c
rt_check() cleanup (change arpresolve)
netinet6/nd6.c
rt_check() cleanup (change nd6_storelladdr)
2004-04-25 09:24:52 +00:00
|
|
|
if ((aa = at_ifawithnet((struct sockaddr_at *)dst)) == NULL)
|
|
|
|
senderr(EHOSTUNREACH); /* XXX */
|
2009-06-24 10:32:44 +00:00
|
|
|
if (!aarpresolve(ifp, m, (struct sockaddr_at *)dst, edst)) {
|
|
|
|
ifa_free(&aa->aa_ifa);
|
1998-06-12 03:48:19 +00:00
|
|
|
return (0);
|
2009-06-24 10:32:44 +00:00
|
|
|
}
|
1996-05-24 01:35:45 +00:00
|
|
|
/*
|
1998-06-12 03:48:19 +00:00
|
|
|
* In the phase 2 case, need to prepend an mbuf for the llc header.
|
1996-05-24 01:35:45 +00:00
|
|
|
*/
|
|
|
|
if ( aa->aa_flags & AFA_PHASE2 ) {
|
|
|
|
struct llc llc;
|
|
|
|
|
2009-06-24 10:32:44 +00:00
|
|
|
ifa_free(&aa->aa_ifa);
|
2005-02-22 15:03:25 +00:00
|
|
|
M_PREPEND(m, LLC_SNAPFRAMELEN, M_DONTWAIT);
|
2003-08-29 19:12:18 +00:00
|
|
|
if (m == NULL)
|
|
|
|
senderr(ENOBUFS);
|
1996-05-24 01:35:45 +00:00
|
|
|
llc.llc_dsap = llc.llc_ssap = LLC_SNAP_LSAP;
|
|
|
|
llc.llc_control = LLC_UI;
|
|
|
|
bcopy(at_org_code, llc.llc_snap_org_code, sizeof(at_org_code));
|
|
|
|
llc.llc_snap_ether_type = htons( ETHERTYPE_AT );
|
2003-03-03 05:04:57 +00:00
|
|
|
bcopy(&llc, mtod(m, caddr_t), LLC_SNAPFRAMELEN);
|
1996-06-13 02:54:19 +00:00
|
|
|
type = htons(m->m_pkthdr.len);
|
2003-03-03 05:04:57 +00:00
|
|
|
hlen = LLC_SNAPFRAMELEN + ETHER_HDR_LEN;
|
1996-05-24 01:35:45 +00:00
|
|
|
} else {
|
2009-06-24 10:32:44 +00:00
|
|
|
ifa_free(&aa->aa_ifa);
|
1996-06-13 02:54:19 +00:00
|
|
|
type = htons(ETHERTYPE_AT);
|
1996-05-24 01:35:45 +00:00
|
|
|
}
|
|
|
|
break;
|
1998-06-12 03:48:19 +00:00
|
|
|
}
|
2001-09-10 01:33:03 +00:00
|
|
|
#endif /* NETATALK */
|
1994-05-24 10:09:53 +00:00
|
|
|
|
1999-10-15 05:07:00 +00:00
|
|
|
case pseudo_AF_HDRCMPLT:
|
|
|
|
hdrcmplt = 1;
|
|
|
|
eh = (struct ether_header *)dst->sa_data;
|
|
|
|
(void)memcpy(esrc, eh->ether_shost, sizeof (esrc));
|
|
|
|
/* FALLTHROUGH */
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
case AF_UNSPEC:
|
2005-12-22 12:16:20 +00:00
|
|
|
loop_copy = 0; /* if this is for us, don't do it */
|
1994-05-24 10:09:53 +00:00
|
|
|
eh = (struct ether_header *)dst->sa_data;
|
2003-10-23 13:49:10 +00:00
|
|
|
(void)memcpy(edst, eh->ether_dhost, sizeof (edst));
|
1994-05-24 10:09:53 +00:00
|
|
|
type = eh->ether_type;
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
2002-10-21 02:51:56 +00:00
|
|
|
if_printf(ifp, "can't handle af%d\n", dst->sa_family);
|
1994-05-24 10:09:53 +00:00
|
|
|
senderr(EAFNOSUPPORT);
|
|
|
|
}
|
|
|
|
|
This main goals of this project are:
1. separating L2 tables (ARP, NDP) from the L3 routing tables
2. removing as much locking dependencies among these layers as
possible to allow for some parallelism in the search operations
3. simplify the logic in the routing code,
The most notable end result is the obsolescent of the route
cloning (RTF_CLONING) concept, which translated into code reduction
in both IPv4 ARP and IPv6 NDP related modules, and size reduction in
struct rtentry{}. The change in design obsoletes the semantics of
RTF_CLONING, RTF_WASCLONE and RTF_LLINFO routing flags. The userland
applications such as "arp" and "ndp" have been modified to reflect
those changes. The output from "netstat -r" shows only the routing
entries.
Quite a few developers have contributed to this project in the
past: Glebius Smirnoff, Luigi Rizzo, Alessandro Cerri, and
Andre Oppermann. And most recently:
- Kip Macy revised the locking code completely, thus completing
the last piece of the puzzle, Kip has also been conducting
active functional testing
- Sam Leffler has helped me improving/refactoring the code, and
provided valuable reviews
- Julian Elischer setup the perforce tree for me and has helped
me maintaining that branch before the svn conversion
2008-12-15 06:10:57 +00:00
|
|
|
if (lle != NULL && (lle->la_flags & LLE_IFADDR)) {
|
|
|
|
int csum_flags = 0;
|
|
|
|
if (m->m_pkthdr.csum_flags & CSUM_IP)
|
|
|
|
csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
|
|
|
|
if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
|
|
|
|
csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
|
2009-02-03 11:00:43 +00:00
|
|
|
if (m->m_pkthdr.csum_flags & CSUM_SCTP)
|
|
|
|
csum_flags |= CSUM_SCTP_VALID;
|
This main goals of this project are:
1. separating L2 tables (ARP, NDP) from the L3 routing tables
2. removing as much locking dependencies among these layers as
possible to allow for some parallelism in the search operations
3. simplify the logic in the routing code,
The most notable end result is the obsolescent of the route
cloning (RTF_CLONING) concept, which translated into code reduction
in both IPv4 ARP and IPv6 NDP related modules, and size reduction in
struct rtentry{}. The change in design obsoletes the semantics of
RTF_CLONING, RTF_WASCLONE and RTF_LLINFO routing flags. The userland
applications such as "arp" and "ndp" have been modified to reflect
those changes. The output from "netstat -r" shows only the routing
entries.
Quite a few developers have contributed to this project in the
past: Glebius Smirnoff, Luigi Rizzo, Alessandro Cerri, and
Andre Oppermann. And most recently:
- Kip Macy revised the locking code completely, thus completing
the last piece of the puzzle, Kip has also been conducting
active functional testing
- Sam Leffler has helped me improving/refactoring the code, and
provided valuable reviews
- Julian Elischer setup the perforce tree for me and has helped
me maintaining that branch before the svn conversion
2008-12-15 06:10:57 +00:00
|
|
|
m->m_pkthdr.csum_flags |= csum_flags;
|
|
|
|
m->m_pkthdr.csum_data = 0xffff;
|
|
|
|
return (if_simloop(ifp, m, dst->sa_family, 0));
|
|
|
|
}
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* Add local net header. If no space in first mbuf,
|
|
|
|
* allocate another.
|
|
|
|
*/
|
2003-03-03 05:04:57 +00:00
|
|
|
M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
|
2003-08-29 19:12:18 +00:00
|
|
|
if (m == NULL)
|
1994-05-24 10:09:53 +00:00
|
|
|
senderr(ENOBUFS);
|
|
|
|
eh = mtod(m, struct ether_header *);
|
1995-05-09 13:35:48 +00:00
|
|
|
(void)memcpy(&eh->ether_type, &type,
|
1994-05-24 10:09:53 +00:00
|
|
|
sizeof(eh->ether_type));
|
2003-10-23 13:49:10 +00:00
|
|
|
(void)memcpy(eh->ether_dhost, edst, sizeof (edst));
|
1999-10-15 05:07:00 +00:00
|
|
|
if (hdrcmplt)
|
|
|
|
(void)memcpy(eh->ether_shost, esrc,
|
|
|
|
sizeof(eh->ether_shost));
|
|
|
|
else
|
2005-11-11 16:04:59 +00:00
|
|
|
(void)memcpy(eh->ether_shost, IF_LLADDR(ifp),
|
1999-10-15 05:07:00 +00:00
|
|
|
sizeof(eh->ether_shost));
|
1998-06-12 03:48:19 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If a simplex interface, and the packet is being sent to our
|
|
|
|
* Ethernet address or a broadcast address, loopback a copy.
|
|
|
|
* XXX To make a simplex device behave exactly like a duplex
|
|
|
|
* device, we should copy in the case of sending to our own
|
|
|
|
* ethernet address (thus letting the original actually appear
|
|
|
|
* on the wire). However, we don't do that here for security
|
|
|
|
* reasons and compatibility with the original behavior.
|
|
|
|
*/
|
2005-12-22 12:16:20 +00:00
|
|
|
if ((ifp->if_flags & IFF_SIMPLEX) && loop_copy &&
|
2007-07-03 12:46:08 +00:00
|
|
|
((t = pf_find_mtag(m)) == NULL || !t->routed)) {
|
2002-01-11 16:04:47 +00:00
|
|
|
int csum_flags = 0;
|
|
|
|
|
|
|
|
if (m->m_pkthdr.csum_flags & CSUM_IP)
|
|
|
|
csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
|
|
|
|
if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
|
|
|
|
csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
|
2009-02-03 11:00:43 +00:00
|
|
|
if (m->m_pkthdr.csum_flags & CSUM_SCTP)
|
|
|
|
csum_flags |= CSUM_SCTP_VALID;
|
2003-03-15 19:37:44 +00:00
|
|
|
|
2005-12-22 12:16:20 +00:00
|
|
|
if (m->m_flags & M_BCAST) {
|
2003-03-15 19:37:44 +00:00
|
|
|
struct mbuf *n;
|
1998-06-12 03:48:19 +00:00
|
|
|
|
2006-12-08 10:36:45 +00:00
|
|
|
/*
|
|
|
|
* Because if_simloop() modifies the packet, we need a
|
|
|
|
* writable copy through m_dup() instead of a readonly
|
|
|
|
* one as m_copy[m] would give us. The alternative would
|
|
|
|
* be to modify if_simloop() to handle the readonly mbuf,
|
|
|
|
* but performancewise it is mostly equivalent (trading
|
|
|
|
* extra data copying vs. extra locking).
|
2006-12-24 08:52:13 +00:00
|
|
|
*
|
|
|
|
* XXX This is a local workaround. A number of less
|
|
|
|
* often used kernel parts suffer from the same bug.
|
|
|
|
* See PR kern/105943 for a proposed general solution.
|
2006-12-08 10:36:45 +00:00
|
|
|
*/
|
|
|
|
if ((n = m_dup(m, M_DONTWAIT)) != NULL) {
|
2003-04-23 18:35:40 +00:00
|
|
|
n->m_pkthdr.csum_flags |= csum_flags;
|
|
|
|
if (csum_flags & CSUM_DATA_VALID)
|
|
|
|
n->m_pkthdr.csum_data = 0xffff;
|
|
|
|
(void)if_simloop(ifp, n, dst->sa_family, hlen);
|
2003-04-23 23:45:57 +00:00
|
|
|
} else
|
|
|
|
ifp->if_iqdrops++;
|
2003-03-15 19:37:44 +00:00
|
|
|
} else if (bcmp(eh->ether_dhost, eh->ether_shost,
|
|
|
|
ETHER_ADDR_LEN) == 0) {
|
2002-01-11 16:04:47 +00:00
|
|
|
m->m_pkthdr.csum_flags |= csum_flags;
|
|
|
|
if (csum_flags & CSUM_DATA_VALID)
|
|
|
|
m->m_pkthdr.csum_data = 0xffff;
|
2000-05-24 21:16:56 +00:00
|
|
|
(void) if_simloop(ifp, m, dst->sa_family, hlen);
|
1999-01-12 12:07:00 +00:00
|
|
|
return (0); /* XXX */
|
1998-06-12 03:48:19 +00:00
|
|
|
}
|
|
|
|
}
|
2000-05-14 02:18:43 +00:00
|
|
|
|
2006-08-25 20:16:39 +00:00
|
|
|
/*
|
|
|
|
* Bridges require special output handling.
|
|
|
|
*/
|
|
|
|
if (ifp->if_bridge) {
|
|
|
|
BRIDGE_OUTPUT(ifp, m, error);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2009-06-11 10:26:38 +00:00
|
|
|
#if defined(INET) || defined(INET6)
|
2005-02-22 13:04:05 +00:00
|
|
|
#ifdef DEV_CARP
|
|
|
|
if (ifp->if_carp &&
|
|
|
|
(error = carp_output(ifp, m, dst, NULL)))
|
|
|
|
goto bad;
|
2009-06-11 10:26:38 +00:00
|
|
|
#endif
|
2005-02-22 13:04:05 +00:00
|
|
|
#endif
|
|
|
|
|
2000-06-26 23:34:54 +00:00
|
|
|
/* Handle ng_ether(4) processing, if any */
|
2005-02-14 11:58:54 +00:00
|
|
|
if (IFP2AC(ifp)->ac_netgraph != NULL) {
|
2005-07-21 09:00:51 +00:00
|
|
|
KASSERT(ng_ether_output_p != NULL,
|
|
|
|
("ng_ether_output_p is NULL"));
|
2000-06-26 23:34:54 +00:00
|
|
|
if ((error = (*ng_ether_output_p)(ifp, &m)) != 0) {
|
|
|
|
bad: if (m != NULL)
|
|
|
|
m_freem(m);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
if (m == NULL)
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Continue with link-layer output */
|
|
|
|
return ether_output_frame(ifp, m);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Ethernet link layer output routine to send a raw frame to the device.
|
|
|
|
*
|
|
|
|
* This assumes that the 14 byte Ethernet header is present and contiguous
|
|
|
|
* in the first mbuf (if BRIDGE'ing).
|
|
|
|
*/
|
|
|
|
int
|
2002-11-14 23:35:06 +00:00
|
|
|
ether_output_frame(struct ifnet *ifp, struct mbuf *m)
|
2000-06-26 23:34:54 +00:00
|
|
|
{
|
2004-06-24 10:58:08 +00:00
|
|
|
#if defined(INET) || defined(INET6)
|
Remove (almost all) global variables that were used to hold
packet forwarding state ("annotations") during ip processing.
The code is considerably cleaner now.
The variables removed by this change are:
ip_divert_cookie used by divert sockets
ip_fw_fwd_addr used for transparent ip redirection
last_pkt used by dynamic pipes in dummynet
Removal of the first two has been done by carrying the annotations
into volatile structs prepended to the mbuf chains, and adding
appropriate code to add/remove annotations in the routines which
make use of them, i.e. ip_input(), ip_output(), tcp_input(),
bdg_forward(), ether_demux(), ether_output_frame(), div_output().
On passing, remove a bug in divert handling of fragmented packet.
Now it is the fragment at offset 0 which sets the divert status of
the whole packet, whereas formerly it was the last incoming fragment
to decide.
Removal of last_pkt required a change in the interface of ip_fw_chk()
and dummynet_io(). On passing, use the same mechanism for dummynet
annotations and for divert/forward annotations.
option IPFIREWALL_FORWARD is effectively useless, the code to
implement it is very small and is now in by default to avoid the
obfuscation of conditionally compiled code.
NOTES:
* there is at least one global variable left, sro_fwd, in ip_output().
I am not sure if/how this can be removed.
* I have deliberately avoided gratuitous style changes in this commit
to avoid cluttering the diffs. Minor stule cleanup will likely be
necessary
* this commit only focused on the IP layer. I am sure there is a
number of global variables used in the TCP and maybe UDP stack.
* despite the number of files touched, there are absolutely no API's
or data structures changed by this commit (except the interfaces of
ip_fw_chk() and dummynet_io(), which are internal anyways), so
an MFC is quite safe and unintrusive (and desirable, given the
improved readability of the code).
MFC after: 10 days
2002-06-22 11:51:02 +00:00
|
|
|
|
2009-10-11 05:59:43 +00:00
|
|
|
if (V_ip_fw_chk_ptr && V_ether_ipfw != 0) {
|
2009-06-09 21:27:11 +00:00
|
|
|
if (ether_ipfw_chk(&m, ifp, 0) == 0) {
|
2002-05-13 10:37:19 +00:00
|
|
|
if (m) {
|
|
|
|
m_freem(m);
|
2004-02-15 21:27:27 +00:00
|
|
|
return EACCES; /* pkt dropped */
|
2002-05-13 10:37:19 +00:00
|
|
|
} else
|
|
|
|
return 0; /* consumed e.g. in a pipe */
|
|
|
|
}
|
|
|
|
}
|
2004-06-24 10:58:08 +00:00
|
|
|
#endif
|
Remove (almost all) global variables that were used to hold
packet forwarding state ("annotations") during ip processing.
The code is considerably cleaner now.
The variables removed by this change are:
ip_divert_cookie used by divert sockets
ip_fw_fwd_addr used for transparent ip redirection
last_pkt used by dynamic pipes in dummynet
Removal of the first two has been done by carrying the annotations
into volatile structs prepended to the mbuf chains, and adding
appropriate code to add/remove annotations in the routines which
make use of them, i.e. ip_input(), ip_output(), tcp_input(),
bdg_forward(), ether_demux(), ether_output_frame(), div_output().
On passing, remove a bug in divert handling of fragmented packet.
Now it is the fragment at offset 0 which sets the divert status of
the whole packet, whereas formerly it was the last incoming fragment
to decide.
Removal of last_pkt required a change in the interface of ip_fw_chk()
and dummynet_io(). On passing, use the same mechanism for dummynet
annotations and for divert/forward annotations.
option IPFIREWALL_FORWARD is effectively useless, the code to
implement it is very small and is now in by default to avoid the
obfuscation of conditionally compiled code.
NOTES:
* there is at least one global variable left, sro_fwd, in ip_output().
I am not sure if/how this can be removed.
* I have deliberately avoided gratuitous style changes in this commit
to avoid cluttering the diffs. Minor stule cleanup will likely be
necessary
* this commit only focused on the IP layer. I am sure there is a
number of global variables used in the TCP and maybe UDP stack.
* despite the number of files touched, there are absolutely no API's
or data structures changed by this commit (except the interfaces of
ip_fw_chk() and dummynet_io(), which are internal anyways), so
an MFC is quite safe and unintrusive (and desirable, given the
improved readability of the code).
MFC after: 10 days
2002-06-22 11:51:02 +00:00
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
Lock down the network interface queues. The queue mutex must be obtained
before adding/removing packets from the queue. Also, the if_obytes and
if_omcasts fields should only be manipulated under protection of the mutex.
IF_ENQUEUE, IF_PREPEND, and IF_DEQUEUE perform all necessary locking on
the queue. An IF_LOCK macro is provided, as well as the old (mutex-less)
versions of the macros in the form _IF_ENQUEUE, _IF_QFULL, for code which
needs them, but their use is discouraged.
Two new macros are introduced: IF_DRAIN() to drain a queue, and IF_HANDOFF,
which takes care of locking/enqueue, and also statistics updating/start
if necessary.
2000-11-25 07:35:38 +00:00
|
|
|
* Queue message on interface, update output statistics if
|
|
|
|
* successful, and start output if interface not yet active.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
2008-11-22 07:35:45 +00:00
|
|
|
return ((ifp->if_transmit)(ifp, m));
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
2004-06-24 10:58:08 +00:00
|
|
|
#if defined(INET) || defined(INET6)
|
2002-05-13 10:37:19 +00:00
|
|
|
/*
|
|
|
|
* ipfw processing for ethernet packets (in and out).
|
|
|
|
* The second parameter is NULL from ether_demux, and ifp from
|
2005-09-27 18:10:43 +00:00
|
|
|
* ether_output_frame.
|
2002-05-13 10:37:19 +00:00
|
|
|
*/
|
|
|
|
int
|
2009-06-09 21:27:11 +00:00
|
|
|
ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst, int shared)
|
2002-05-13 10:37:19 +00:00
|
|
|
{
|
2002-11-14 23:35:06 +00:00
|
|
|
struct ether_header *eh;
|
|
|
|
struct ether_header save_eh;
|
|
|
|
struct mbuf *m;
|
Remove (almost all) global variables that were used to hold
packet forwarding state ("annotations") during ip processing.
The code is considerably cleaner now.
The variables removed by this change are:
ip_divert_cookie used by divert sockets
ip_fw_fwd_addr used for transparent ip redirection
last_pkt used by dynamic pipes in dummynet
Removal of the first two has been done by carrying the annotations
into volatile structs prepended to the mbuf chains, and adding
appropriate code to add/remove annotations in the routines which
make use of them, i.e. ip_input(), ip_output(), tcp_input(),
bdg_forward(), ether_demux(), ether_output_frame(), div_output().
On passing, remove a bug in divert handling of fragmented packet.
Now it is the fragment at offset 0 which sets the divert status of
the whole packet, whereas formerly it was the last incoming fragment
to decide.
Removal of last_pkt required a change in the interface of ip_fw_chk()
and dummynet_io(). On passing, use the same mechanism for dummynet
annotations and for divert/forward annotations.
option IPFIREWALL_FORWARD is effectively useless, the code to
implement it is very small and is now in by default to avoid the
obfuscation of conditionally compiled code.
NOTES:
* there is at least one global variable left, sro_fwd, in ip_output().
I am not sure if/how this can be removed.
* I have deliberately avoided gratuitous style changes in this commit
to avoid cluttering the diffs. Minor stule cleanup will likely be
necessary
* this commit only focused on the IP layer. I am sure there is a
number of global variables used in the TCP and maybe UDP stack.
* despite the number of files touched, there are absolutely no API's
or data structures changed by this commit (except the interfaces of
ip_fw_chk() and dummynet_io(), which are internal anyways), so
an MFC is quite safe and unintrusive (and desirable, given the
improved readability of the code).
MFC after: 10 days
2002-06-22 11:51:02 +00:00
|
|
|
int i;
|
|
|
|
struct ip_fw_args args;
|
2009-12-28 10:47:04 +00:00
|
|
|
struct m_tag *mtag;
|
2002-05-13 10:37:19 +00:00
|
|
|
|
2009-12-28 10:47:04 +00:00
|
|
|
mtag = m_tag_find(*m0, PACKET_TAG_DUMMYNET, NULL);
|
|
|
|
if (mtag == NULL) {
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
args.slot = 0;
|
|
|
|
} else {
|
2009-12-28 10:47:04 +00:00
|
|
|
struct dn_pkt_tag *dn_tag;
|
|
|
|
|
|
|
|
mtag->m_tag_id = PACKET_TAG_NONE;
|
|
|
|
dn_tag = (struct dn_pkt_tag *)(mtag + 1);
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
if (dn_tag->slot != 0 && V_fw_one_pass)
|
2009-06-09 21:27:11 +00:00
|
|
|
/* dummynet packet, already partially processed */
|
|
|
|
return (1);
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
args.slot = dn_tag->slot; /* matching rule to restart */
|
|
|
|
args.rulenum = dn_tag->rulenum;
|
2009-06-09 21:27:11 +00:00
|
|
|
args.rule_id = dn_tag->rule_id;
|
|
|
|
args.chain_id = dn_tag->chain_id;
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
}
|
2002-06-23 11:19:53 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* I need some amt of data to be contiguous, and in case others need
|
|
|
|
* the packet (shared==1) also better be in the first mbuf.
|
|
|
|
*/
|
2002-11-14 23:35:06 +00:00
|
|
|
m = *m0;
|
|
|
|
i = min( m->m_pkthdr.len, max_protohdr);
|
|
|
|
if ( shared || m->m_len < i) {
|
|
|
|
m = m_pullup(m, i);
|
|
|
|
if (m == NULL) {
|
|
|
|
*m0 = m;
|
2002-05-13 10:37:19 +00:00
|
|
|
return 0;
|
2002-11-14 23:35:06 +00:00
|
|
|
}
|
2002-06-23 11:19:53 +00:00
|
|
|
}
|
2002-11-14 23:35:06 +00:00
|
|
|
eh = mtod(m, struct ether_header *);
|
|
|
|
save_eh = *eh; /* save copy for restore below */
|
|
|
|
m_adj(m, ETHER_HDR_LEN); /* strip ethernet header */
|
2002-05-13 10:37:19 +00:00
|
|
|
|
2002-11-14 23:35:06 +00:00
|
|
|
args.m = m; /* the packet we are looking at */
|
Remove (almost all) global variables that were used to hold
packet forwarding state ("annotations") during ip processing.
The code is considerably cleaner now.
The variables removed by this change are:
ip_divert_cookie used by divert sockets
ip_fw_fwd_addr used for transparent ip redirection
last_pkt used by dynamic pipes in dummynet
Removal of the first two has been done by carrying the annotations
into volatile structs prepended to the mbuf chains, and adding
appropriate code to add/remove annotations in the routines which
make use of them, i.e. ip_input(), ip_output(), tcp_input(),
bdg_forward(), ether_demux(), ether_output_frame(), div_output().
On passing, remove a bug in divert handling of fragmented packet.
Now it is the fragment at offset 0 which sets the divert status of
the whole packet, whereas formerly it was the last incoming fragment
to decide.
Removal of last_pkt required a change in the interface of ip_fw_chk()
and dummynet_io(). On passing, use the same mechanism for dummynet
annotations and for divert/forward annotations.
option IPFIREWALL_FORWARD is effectively useless, the code to
implement it is very small and is now in by default to avoid the
obfuscation of conditionally compiled code.
NOTES:
* there is at least one global variable left, sro_fwd, in ip_output().
I am not sure if/how this can be removed.
* I have deliberately avoided gratuitous style changes in this commit
to avoid cluttering the diffs. Minor stule cleanup will likely be
necessary
* this commit only focused on the IP layer. I am sure there is a
number of global variables used in the TCP and maybe UDP stack.
* despite the number of files touched, there are absolutely no API's
or data structures changed by this commit (except the interfaces of
ip_fw_chk() and dummynet_io(), which are internal anyways), so
an MFC is quite safe and unintrusive (and desirable, given the
improved readability of the code).
MFC after: 10 days
2002-06-22 11:51:02 +00:00
|
|
|
args.oif = dst; /* destination, if any */
|
2002-06-23 11:19:53 +00:00
|
|
|
args.next_hop = NULL; /* we do not support forward yet */
|
Remove (almost all) global variables that were used to hold
packet forwarding state ("annotations") during ip processing.
The code is considerably cleaner now.
The variables removed by this change are:
ip_divert_cookie used by divert sockets
ip_fw_fwd_addr used for transparent ip redirection
last_pkt used by dynamic pipes in dummynet
Removal of the first two has been done by carrying the annotations
into volatile structs prepended to the mbuf chains, and adding
appropriate code to add/remove annotations in the routines which
make use of them, i.e. ip_input(), ip_output(), tcp_input(),
bdg_forward(), ether_demux(), ether_output_frame(), div_output().
On passing, remove a bug in divert handling of fragmented packet.
Now it is the fragment at offset 0 which sets the divert status of
the whole packet, whereas formerly it was the last incoming fragment
to decide.
Removal of last_pkt required a change in the interface of ip_fw_chk()
and dummynet_io(). On passing, use the same mechanism for dummynet
annotations and for divert/forward annotations.
option IPFIREWALL_FORWARD is effectively useless, the code to
implement it is very small and is now in by default to avoid the
obfuscation of conditionally compiled code.
NOTES:
* there is at least one global variable left, sro_fwd, in ip_output().
I am not sure if/how this can be removed.
* I have deliberately avoided gratuitous style changes in this commit
to avoid cluttering the diffs. Minor stule cleanup will likely be
necessary
* this commit only focused on the IP layer. I am sure there is a
number of global variables used in the TCP and maybe UDP stack.
* despite the number of files touched, there are absolutely no API's
or data structures changed by this commit (except the interfaces of
ip_fw_chk() and dummynet_io(), which are internal anyways), so
an MFC is quite safe and unintrusive (and desirable, given the
improved readability of the code).
MFC after: 10 days
2002-06-22 11:51:02 +00:00
|
|
|
args.eh = &save_eh; /* MAC header for bridged/MAC packets */
|
2006-02-03 23:03:07 +00:00
|
|
|
args.inp = NULL; /* used by ipfw uid/gid/jail rules */
|
2009-10-11 05:59:43 +00:00
|
|
|
i = V_ip_fw_chk_ptr(&args);
|
2002-11-14 23:35:06 +00:00
|
|
|
m = args.m;
|
|
|
|
if (m != NULL) {
|
|
|
|
/*
|
|
|
|
* Restore Ethernet header, as needed, in case the
|
|
|
|
* mbuf chain was replaced by ipfw.
|
|
|
|
*/
|
2003-02-19 05:47:46 +00:00
|
|
|
M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
|
2002-11-14 23:35:06 +00:00
|
|
|
if (m == NULL) {
|
|
|
|
*m0 = m;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (eh != mtod(m, struct ether_header *))
|
|
|
|
bcopy(&save_eh, mtod(m, struct ether_header *),
|
|
|
|
ETHER_HDR_LEN);
|
|
|
|
}
|
|
|
|
*m0 = m;
|
Remove (almost all) global variables that were used to hold
packet forwarding state ("annotations") during ip processing.
The code is considerably cleaner now.
The variables removed by this change are:
ip_divert_cookie used by divert sockets
ip_fw_fwd_addr used for transparent ip redirection
last_pkt used by dynamic pipes in dummynet
Removal of the first two has been done by carrying the annotations
into volatile structs prepended to the mbuf chains, and adding
appropriate code to add/remove annotations in the routines which
make use of them, i.e. ip_input(), ip_output(), tcp_input(),
bdg_forward(), ether_demux(), ether_output_frame(), div_output().
On passing, remove a bug in divert handling of fragmented packet.
Now it is the fragment at offset 0 which sets the divert status of
the whole packet, whereas formerly it was the last incoming fragment
to decide.
Removal of last_pkt required a change in the interface of ip_fw_chk()
and dummynet_io(). On passing, use the same mechanism for dummynet
annotations and for divert/forward annotations.
option IPFIREWALL_FORWARD is effectively useless, the code to
implement it is very small and is now in by default to avoid the
obfuscation of conditionally compiled code.
NOTES:
* there is at least one global variable left, sro_fwd, in ip_output().
I am not sure if/how this can be removed.
* I have deliberately avoided gratuitous style changes in this commit
to avoid cluttering the diffs. Minor stule cleanup will likely be
necessary
* this commit only focused on the IP layer. I am sure there is a
number of global variables used in the TCP and maybe UDP stack.
* despite the number of files touched, there are absolutely no API's
or data structures changed by this commit (except the interfaces of
ip_fw_chk() and dummynet_io(), which are internal anyways), so
an MFC is quite safe and unintrusive (and desirable, given the
improved readability of the code).
MFC after: 10 days
2002-06-22 11:51:02 +00:00
|
|
|
|
2005-01-14 09:00:46 +00:00
|
|
|
if (i == IP_FW_DENY) /* drop */
|
2002-05-13 10:37:19 +00:00
|
|
|
return 0;
|
|
|
|
|
2005-01-14 09:00:46 +00:00
|
|
|
KASSERT(m != NULL, ("ether_ipfw_chk: m is NULL"));
|
|
|
|
|
|
|
|
if (i == IP_FW_PASS) /* a PASS rule. */
|
2002-05-13 10:37:19 +00:00
|
|
|
return 1;
|
|
|
|
|
2009-06-05 13:44:30 +00:00
|
|
|
if (ip_dn_io_ptr && (i == IP_FW_DUMMYNET)) {
|
2009-12-28 10:47:04 +00:00
|
|
|
int dir;
|
2002-05-13 10:37:19 +00:00
|
|
|
/*
|
|
|
|
* Pass the pkt to dummynet, which consumes it.
|
|
|
|
* If shared, make a copy and keep the original.
|
|
|
|
*/
|
|
|
|
if (shared) {
|
2003-02-19 05:47:46 +00:00
|
|
|
m = m_copypacket(m, M_DONTWAIT);
|
Remove (almost all) global variables that were used to hold
packet forwarding state ("annotations") during ip processing.
The code is considerably cleaner now.
The variables removed by this change are:
ip_divert_cookie used by divert sockets
ip_fw_fwd_addr used for transparent ip redirection
last_pkt used by dynamic pipes in dummynet
Removal of the first two has been done by carrying the annotations
into volatile structs prepended to the mbuf chains, and adding
appropriate code to add/remove annotations in the routines which
make use of them, i.e. ip_input(), ip_output(), tcp_input(),
bdg_forward(), ether_demux(), ether_output_frame(), div_output().
On passing, remove a bug in divert handling of fragmented packet.
Now it is the fragment at offset 0 which sets the divert status of
the whole packet, whereas formerly it was the last incoming fragment
to decide.
Removal of last_pkt required a change in the interface of ip_fw_chk()
and dummynet_io(). On passing, use the same mechanism for dummynet
annotations and for divert/forward annotations.
option IPFIREWALL_FORWARD is effectively useless, the code to
implement it is very small and is now in by default to avoid the
obfuscation of conditionally compiled code.
NOTES:
* there is at least one global variable left, sro_fwd, in ip_output().
I am not sure if/how this can be removed.
* I have deliberately avoided gratuitous style changes in this commit
to avoid cluttering the diffs. Minor stule cleanup will likely be
necessary
* this commit only focused on the IP layer. I am sure there is a
number of global variables used in the TCP and maybe UDP stack.
* despite the number of files touched, there are absolutely no API's
or data structures changed by this commit (except the interfaces of
ip_fw_chk() and dummynet_io(), which are internal anyways), so
an MFC is quite safe and unintrusive (and desirable, given the
improved readability of the code).
MFC after: 10 days
2002-06-22 11:51:02 +00:00
|
|
|
if (m == NULL)
|
2002-05-13 10:37:19 +00:00
|
|
|
return 0;
|
|
|
|
} else {
|
2002-11-14 23:35:06 +00:00
|
|
|
/*
|
|
|
|
* Pass the original to dummynet and
|
|
|
|
* nothing back to the caller
|
|
|
|
*/
|
|
|
|
*m0 = NULL ;
|
2002-05-13 10:37:19 +00:00
|
|
|
}
|
2009-12-28 10:47:04 +00:00
|
|
|
dir = PROTO_LAYER2 | (dst ? DIR_OUT : DIR_IN);
|
|
|
|
ip_dn_io_ptr(&m, dir, &args);
|
2002-05-13 10:37:19 +00:00
|
|
|
return 0;
|
2002-06-23 11:19:53 +00:00
|
|
|
}
|
|
|
|
/*
|
|
|
|
* XXX at some point add support for divert/forward actions.
|
|
|
|
* If none of the above matches, we have to drop the pkt.
|
|
|
|
*/
|
|
|
|
return 0;
|
2002-05-13 10:37:19 +00:00
|
|
|
}
|
2004-06-24 10:58:08 +00:00
|
|
|
#endif
|
2002-05-13 10:37:19 +00:00
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
2002-11-14 23:35:06 +00:00
|
|
|
* Process a received Ethernet packet; the packet is in the
|
|
|
|
* mbuf chain m with the ethernet header at the front.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
2002-11-14 23:35:06 +00:00
|
|
|
static void
|
|
|
|
ether_input(struct ifnet *ifp, struct mbuf *m)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
2002-11-14 23:35:06 +00:00
|
|
|
struct ether_header *eh;
|
|
|
|
u_short etype;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2007-03-19 18:39:36 +00:00
|
|
|
if ((ifp->if_flags & IFF_UP) == 0) {
|
|
|
|
m_freem(m);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
#ifdef DIAGNOSTIC
|
|
|
|
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
|
|
|
|
if_printf(ifp, "discard frame at !IFF_DRV_RUNNING\n");
|
|
|
|
m_freem(m);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
#endif
|
2002-11-14 23:35:06 +00:00
|
|
|
/*
|
|
|
|
* Do consistency checks to verify assumptions
|
|
|
|
* made by code past this point.
|
|
|
|
*/
|
|
|
|
if ((m->m_flags & M_PKTHDR) == 0) {
|
|
|
|
if_printf(ifp, "discard frame w/o packet header\n");
|
|
|
|
ifp->if_ierrors++;
|
|
|
|
m_freem(m);
|
|
|
|
return;
|
|
|
|
}
|
2003-03-03 05:04:57 +00:00
|
|
|
if (m->m_len < ETHER_HDR_LEN) {
|
2002-11-14 23:35:06 +00:00
|
|
|
/* XXX maybe should pullup? */
|
|
|
|
if_printf(ifp, "discard frame w/o leading ethernet "
|
|
|
|
"header (len %u pkt len %u)\n",
|
|
|
|
m->m_len, m->m_pkthdr.len);
|
|
|
|
ifp->if_ierrors++;
|
|
|
|
m_freem(m);
|
|
|
|
return;
|
2000-05-14 02:18:43 +00:00
|
|
|
}
|
2002-11-14 23:35:06 +00:00
|
|
|
eh = mtod(m, struct ether_header *);
|
|
|
|
etype = ntohs(eh->ether_type);
|
|
|
|
if (m->m_pkthdr.rcvif == NULL) {
|
|
|
|
if_printf(ifp, "discard frame w/o interface pointer\n");
|
|
|
|
ifp->if_ierrors++;
|
|
|
|
m_freem(m);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
#ifdef DIAGNOSTIC
|
|
|
|
if (m->m_pkthdr.rcvif != ifp) {
|
2003-10-31 18:32:15 +00:00
|
|
|
if_printf(ifp, "Warning, frame marked as received on %s\n",
|
|
|
|
m->m_pkthdr.rcvif->if_xname);
|
2002-11-14 23:35:06 +00:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
Change the curvnet variable from a global const struct vnet *,
previously always pointing to the default vnet context, to a
dynamically changing thread-local one. The currvnet context
should be set on entry to networking code via CURVNET_SET() macros,
and reverted to previous state via CURVNET_RESTORE(). Recursions
on curvnet are permitted, though strongly discuouraged.
This change should have no functional impact on nooptions VIMAGE
kernel builds, where CURVNET_* macros expand to whitespace.
The curthread->td_vnet (aka curvnet) variable's purpose is to be an
indicator of the vnet context in which the current network-related
operation takes place, in case we cannot deduce the current vnet
context from any other source, such as by looking at mbuf's
m->m_pkthdr.rcvif->if_vnet, sockets's so->so_vnet etc. Moreover, so
far curvnet has turned out to be an invaluable consistency checking
aid: it helps to catch cases when sockets, ifnets or any other
vnet-aware structures may have leaked from one vnet to another.
The exact placement of the CURVNET_SET() / CURVNET_RESTORE() macros
was a result of an empirical iterative process, whith an aim to
reduce recursions on CURVNET_SET() to a minimum, while still reducing
the scope of CURVNET_SET() to networking only operations - the
alternative would be calling CURVNET_SET() on each system call entry.
In general, curvnet has to be set in three typicall cases: when
processing socket-related requests from userspace or from within the
kernel; when processing inbound traffic flowing from device drivers
to upper layers of the networking stack, and when executing
timer-driven networking functions.
This change also introduces a DDB subcommand to show the list of all
vnet instances.
Approved by: julian (mentor)
2009-05-05 10:56:12 +00:00
|
|
|
CURVNET_SET_QUIET(ifp->if_vnet);
|
|
|
|
|
2007-03-19 18:39:36 +00:00
|
|
|
if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
|
|
|
|
if (ETHER_IS_BROADCAST(eh->ether_dhost))
|
|
|
|
m->m_flags |= M_BCAST;
|
|
|
|
else
|
|
|
|
m->m_flags |= M_MCAST;
|
|
|
|
ifp->if_imcasts++;
|
|
|
|
}
|
|
|
|
|
2003-07-13 20:32:58 +00:00
|
|
|
#ifdef MAC
|
|
|
|
/*
|
|
|
|
* Tag the mbuf with an appropriate MAC label before any other
|
|
|
|
* consumers can get to it.
|
|
|
|
*/
|
2007-10-24 19:04:04 +00:00
|
|
|
mac_ifnet_create_mbuf(ifp, m);
|
2003-07-13 20:32:58 +00:00
|
|
|
#endif
|
|
|
|
|
2002-11-14 23:35:06 +00:00
|
|
|
/*
|
2007-03-19 18:39:36 +00:00
|
|
|
* Give bpf a chance at the packet.
|
2002-11-14 23:35:06 +00:00
|
|
|
*/
|
2007-02-22 14:50:31 +00:00
|
|
|
ETHER_BPF_MTAP(ifp, m);
|
2000-05-14 02:18:43 +00:00
|
|
|
|
2007-03-19 18:39:36 +00:00
|
|
|
/*
|
|
|
|
* If the CRC is still on the packet, trim it off. We do this once
|
|
|
|
* and once only in case we are re-entered. Nothing else on the
|
|
|
|
* Ethernet receive path expects to see the FCS.
|
|
|
|
*/
|
2002-11-14 23:35:06 +00:00
|
|
|
if (m->m_flags & M_HASFCS) {
|
|
|
|
m_adj(m, -ETHER_CRC_LEN);
|
|
|
|
m->m_flags &= ~M_HASFCS;
|
|
|
|
}
|
|
|
|
|
|
|
|
ifp->if_ibytes += m->m_pkthdr.len;
|
2001-12-14 04:41:07 +00:00
|
|
|
|
2007-03-19 18:39:36 +00:00
|
|
|
/* Allow monitor mode to claim this frame, after stats are updated. */
|
2006-03-03 17:21:08 +00:00
|
|
|
if (ifp->if_flags & IFF_MONITOR) {
|
|
|
|
m_freem(m);
|
Change the curvnet variable from a global const struct vnet *,
previously always pointing to the default vnet context, to a
dynamically changing thread-local one. The currvnet context
should be set on entry to networking code via CURVNET_SET() macros,
and reverted to previous state via CURVNET_RESTORE(). Recursions
on curvnet are permitted, though strongly discuouraged.
This change should have no functional impact on nooptions VIMAGE
kernel builds, where CURVNET_* macros expand to whitespace.
The curthread->td_vnet (aka curvnet) variable's purpose is to be an
indicator of the vnet context in which the current network-related
operation takes place, in case we cannot deduce the current vnet
context from any other source, such as by looking at mbuf's
m->m_pkthdr.rcvif->if_vnet, sockets's so->so_vnet etc. Moreover, so
far curvnet has turned out to be an invaluable consistency checking
aid: it helps to catch cases when sockets, ifnets or any other
vnet-aware structures may have leaked from one vnet to another.
The exact placement of the CURVNET_SET() / CURVNET_RESTORE() macros
was a result of an empirical iterative process, whith an aim to
reduce recursions on CURVNET_SET() to a minimum, while still reducing
the scope of CURVNET_SET() to networking only operations - the
alternative would be calling CURVNET_SET() on each system call entry.
In general, curvnet has to be set in three typicall cases: when
processing socket-related requests from userspace or from within the
kernel; when processing inbound traffic flowing from device drivers
to upper layers of the networking stack, and when executing
timer-driven networking functions.
This change also introduces a DDB subcommand to show the list of all
vnet instances.
Approved by: julian (mentor)
2009-05-05 10:56:12 +00:00
|
|
|
CURVNET_RESTORE();
|
2006-03-03 17:21:08 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2007-04-17 00:35:11 +00:00
|
|
|
/* Handle input from a lagg(4) port */
|
2007-04-10 00:27:25 +00:00
|
|
|
if (ifp->if_type == IFT_IEEE8023ADLAG) {
|
2007-04-17 00:35:11 +00:00
|
|
|
KASSERT(lagg_input_p != NULL,
|
|
|
|
("%s: if_lagg not loaded!", __func__));
|
|
|
|
m = (*lagg_input_p)(ifp, m);
|
2007-04-10 00:27:25 +00:00
|
|
|
if (m != NULL)
|
|
|
|
ifp = m->m_pkthdr.rcvif;
|
|
|
|
else
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2007-03-19 18:39:36 +00:00
|
|
|
/*
|
|
|
|
* If the hardware did not process an 802.1Q tag, do this now,
|
|
|
|
* to allow 802.1P priority frames to be passed to the main input
|
|
|
|
* path correctly.
|
|
|
|
* TODO: Deal with Q-in-Q frames, but not arbitrary nesting levels.
|
|
|
|
*/
|
|
|
|
if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_VLAN) {
|
|
|
|
struct ether_vlan_header *evl;
|
|
|
|
|
|
|
|
if (m->m_len < sizeof(*evl) &&
|
|
|
|
(m = m_pullup(m, sizeof(*evl))) == NULL) {
|
2007-03-20 14:29:54 +00:00
|
|
|
#ifdef DIAGNOSTIC
|
2007-03-19 18:39:36 +00:00
|
|
|
if_printf(ifp, "cannot pullup VLAN header\n");
|
2007-03-20 14:29:54 +00:00
|
|
|
#endif
|
2007-03-19 18:39:36 +00:00
|
|
|
ifp->if_ierrors++;
|
|
|
|
m_freem(m);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
evl = mtod(m, struct ether_vlan_header *);
|
|
|
|
m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
|
|
|
|
m->m_flags |= M_VLANTAG;
|
|
|
|
|
|
|
|
bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
|
|
|
|
ETHER_HDR_LEN - ETHER_TYPE_LEN);
|
|
|
|
m_adj(m, ETHER_VLAN_ENCAP_LEN);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Allow ng_ether(4) to claim this frame. */
|
2005-02-14 11:58:54 +00:00
|
|
|
if (IFP2AC(ifp)->ac_netgraph != NULL) {
|
2005-07-21 09:00:51 +00:00
|
|
|
KASSERT(ng_ether_input_p != NULL,
|
2007-03-19 18:39:36 +00:00
|
|
|
("%s: ng_ether_input_p is NULL", __func__));
|
|
|
|
m->m_flags &= ~M_PROMISC;
|
2002-11-14 23:35:06 +00:00
|
|
|
(*ng_ether_input_p)(ifp, &m);
|
Change the curvnet variable from a global const struct vnet *,
previously always pointing to the default vnet context, to a
dynamically changing thread-local one. The currvnet context
should be set on entry to networking code via CURVNET_SET() macros,
and reverted to previous state via CURVNET_RESTORE(). Recursions
on curvnet are permitted, though strongly discuouraged.
This change should have no functional impact on nooptions VIMAGE
kernel builds, where CURVNET_* macros expand to whitespace.
The curthread->td_vnet (aka curvnet) variable's purpose is to be an
indicator of the vnet context in which the current network-related
operation takes place, in case we cannot deduce the current vnet
context from any other source, such as by looking at mbuf's
m->m_pkthdr.rcvif->if_vnet, sockets's so->so_vnet etc. Moreover, so
far curvnet has turned out to be an invaluable consistency checking
aid: it helps to catch cases when sockets, ifnets or any other
vnet-aware structures may have leaked from one vnet to another.
The exact placement of the CURVNET_SET() / CURVNET_RESTORE() macros
was a result of an empirical iterative process, whith an aim to
reduce recursions on CURVNET_SET() to a minimum, while still reducing
the scope of CURVNET_SET() to networking only operations - the
alternative would be calling CURVNET_SET() on each system call entry.
In general, curvnet has to be set in three typicall cases: when
processing socket-related requests from userspace or from within the
kernel; when processing inbound traffic flowing from device drivers
to upper layers of the networking stack, and when executing
timer-driven networking functions.
This change also introduces a DDB subcommand to show the list of all
vnet instances.
Approved by: julian (mentor)
2009-05-05 10:56:12 +00:00
|
|
|
if (m == NULL) {
|
|
|
|
CURVNET_RESTORE();
|
2000-06-26 23:34:54 +00:00
|
|
|
return;
|
Change the curvnet variable from a global const struct vnet *,
previously always pointing to the default vnet context, to a
dynamically changing thread-local one. The currvnet context
should be set on entry to networking code via CURVNET_SET() macros,
and reverted to previous state via CURVNET_RESTORE(). Recursions
on curvnet are permitted, though strongly discuouraged.
This change should have no functional impact on nooptions VIMAGE
kernel builds, where CURVNET_* macros expand to whitespace.
The curthread->td_vnet (aka curvnet) variable's purpose is to be an
indicator of the vnet context in which the current network-related
operation takes place, in case we cannot deduce the current vnet
context from any other source, such as by looking at mbuf's
m->m_pkthdr.rcvif->if_vnet, sockets's so->so_vnet etc. Moreover, so
far curvnet has turned out to be an invaluable consistency checking
aid: it helps to catch cases when sockets, ifnets or any other
vnet-aware structures may have leaked from one vnet to another.
The exact placement of the CURVNET_SET() / CURVNET_RESTORE() macros
was a result of an empirical iterative process, whith an aim to
reduce recursions on CURVNET_SET() to a minimum, while still reducing
the scope of CURVNET_SET() to networking only operations - the
alternative would be calling CURVNET_SET() on each system call entry.
In general, curvnet has to be set in three typicall cases: when
processing socket-related requests from userspace or from within the
kernel; when processing inbound traffic flowing from device drivers
to upper layers of the networking stack, and when executing
timer-driven networking functions.
This change also introduces a DDB subcommand to show the list of all
vnet instances.
Approved by: julian (mentor)
2009-05-05 10:56:12 +00:00
|
|
|
}
|
2000-06-26 23:34:54 +00:00
|
|
|
}
|
|
|
|
|
2005-06-05 03:13:13 +00:00
|
|
|
/*
|
2007-03-19 18:39:36 +00:00
|
|
|
* Allow if_bridge(4) to claim this frame.
|
|
|
|
* The BRIDGE_INPUT() macro will update ifp if the bridge changed it
|
|
|
|
* and the frame should be delivered locally.
|
2005-06-05 03:49:23 +00:00
|
|
|
*/
|
2007-03-19 18:39:36 +00:00
|
|
|
if (ifp->if_bridge != NULL) {
|
|
|
|
m->m_flags &= ~M_PROMISC;
|
2005-10-14 02:38:47 +00:00
|
|
|
BRIDGE_INPUT(ifp, m);
|
Change the curvnet variable from a global const struct vnet *,
previously always pointing to the default vnet context, to a
dynamically changing thread-local one. The currvnet context
should be set on entry to networking code via CURVNET_SET() macros,
and reverted to previous state via CURVNET_RESTORE(). Recursions
on curvnet are permitted, though strongly discuouraged.
This change should have no functional impact on nooptions VIMAGE
kernel builds, where CURVNET_* macros expand to whitespace.
The curthread->td_vnet (aka curvnet) variable's purpose is to be an
indicator of the vnet context in which the current network-related
operation takes place, in case we cannot deduce the current vnet
context from any other source, such as by looking at mbuf's
m->m_pkthdr.rcvif->if_vnet, sockets's so->so_vnet etc. Moreover, so
far curvnet has turned out to be an invaluable consistency checking
aid: it helps to catch cases when sockets, ifnets or any other
vnet-aware structures may have leaked from one vnet to another.
The exact placement of the CURVNET_SET() / CURVNET_RESTORE() macros
was a result of an empirical iterative process, whith an aim to
reduce recursions on CURVNET_SET() to a minimum, while still reducing
the scope of CURVNET_SET() to networking only operations - the
alternative would be calling CURVNET_SET() on each system call entry.
In general, curvnet has to be set in three typicall cases: when
processing socket-related requests from userspace or from within the
kernel; when processing inbound traffic flowing from device drivers
to upper layers of the networking stack, and when executing
timer-driven networking functions.
This change also introduces a DDB subcommand to show the list of all
vnet instances.
Approved by: julian (mentor)
2009-05-05 10:56:12 +00:00
|
|
|
if (m == NULL) {
|
|
|
|
CURVNET_RESTORE();
|
2005-06-05 03:13:13 +00:00
|
|
|
return;
|
Change the curvnet variable from a global const struct vnet *,
previously always pointing to the default vnet context, to a
dynamically changing thread-local one. The currvnet context
should be set on entry to networking code via CURVNET_SET() macros,
and reverted to previous state via CURVNET_RESTORE(). Recursions
on curvnet are permitted, though strongly discuouraged.
This change should have no functional impact on nooptions VIMAGE
kernel builds, where CURVNET_* macros expand to whitespace.
The curthread->td_vnet (aka curvnet) variable's purpose is to be an
indicator of the vnet context in which the current network-related
operation takes place, in case we cannot deduce the current vnet
context from any other source, such as by looking at mbuf's
m->m_pkthdr.rcvif->if_vnet, sockets's so->so_vnet etc. Moreover, so
far curvnet has turned out to be an invaluable consistency checking
aid: it helps to catch cases when sockets, ifnets or any other
vnet-aware structures may have leaked from one vnet to another.
The exact placement of the CURVNET_SET() / CURVNET_RESTORE() macros
was a result of an empirical iterative process, whith an aim to
reduce recursions on CURVNET_SET() to a minimum, while still reducing
the scope of CURVNET_SET() to networking only operations - the
alternative would be calling CURVNET_SET() on each system call entry.
In general, curvnet has to be set in three typicall cases: when
processing socket-related requests from userspace or from within the
kernel; when processing inbound traffic flowing from device drivers
to upper layers of the networking stack, and when executing
timer-driven networking functions.
This change also introduces a DDB subcommand to show the list of all
vnet instances.
Approved by: julian (mentor)
2009-05-05 10:56:12 +00:00
|
|
|
}
|
2005-06-05 03:13:13 +00:00
|
|
|
}
|
|
|
|
|
2009-06-11 10:26:38 +00:00
|
|
|
#if defined(INET) || defined(INET6)
|
2007-03-19 18:39:36 +00:00
|
|
|
#ifdef DEV_CARP
|
|
|
|
/*
|
|
|
|
* Clear M_PROMISC on frame so that carp(4) will see it when the
|
|
|
|
* mbuf flows up to Layer 3.
|
|
|
|
* FreeBSD's implementation of carp(4) uses the inprotosw
|
|
|
|
* to dispatch IPPROTO_CARP. carp(4) also allocates its own
|
|
|
|
* Ethernet addresses of the form 00:00:5e:00:01:xx, which
|
|
|
|
* is outside the scope of the M_PROMISC test below.
|
|
|
|
* TODO: Maintain a hash table of ethernet addresses other than
|
|
|
|
* ether_dhost which may be active on this ifp.
|
|
|
|
*/
|
|
|
|
if (ifp->if_carp && carp_forus(ifp->if_carp, eh->ether_dhost)) {
|
|
|
|
m->m_flags &= ~M_PROMISC;
|
|
|
|
} else
|
2009-06-11 10:26:38 +00:00
|
|
|
#endif
|
2007-03-19 18:39:36 +00:00
|
|
|
#endif
|
|
|
|
{
|
|
|
|
/*
|
2007-03-22 19:08:39 +00:00
|
|
|
* If the frame received was not for our MAC address, set the
|
2007-03-19 18:39:36 +00:00
|
|
|
* M_PROMISC flag on the mbuf chain. The frame may need to
|
|
|
|
* be seen by the rest of the Ethernet input path in case of
|
|
|
|
* re-entry (e.g. bridge, vlan, netgraph) but should not be
|
|
|
|
* seen by upper protocol layers.
|
|
|
|
*/
|
|
|
|
if (!ETHER_IS_MULTICAST(eh->ether_dhost) &&
|
2007-03-22 19:08:39 +00:00
|
|
|
bcmp(IF_LLADDR(ifp), eh->ether_dhost, ETHER_ADDR_LEN) != 0)
|
2007-03-19 18:39:36 +00:00
|
|
|
m->m_flags |= M_PROMISC;
|
|
|
|
}
|
|
|
|
|
2002-09-26 14:01:50 +00:00
|
|
|
/* First chunk of an mbuf contains good entropy */
|
2001-02-18 17:54:52 +00:00
|
|
|
if (harvest.ethernet)
|
|
|
|
random_harvest(m, 16, 3, 0, RANDOM_NET);
|
2007-03-19 18:39:36 +00:00
|
|
|
|
2004-10-11 10:21:34 +00:00
|
|
|
ether_demux(ifp, m);
|
Change the curvnet variable from a global const struct vnet *,
previously always pointing to the default vnet context, to a
dynamically changing thread-local one. The currvnet context
should be set on entry to networking code via CURVNET_SET() macros,
and reverted to previous state via CURVNET_RESTORE(). Recursions
on curvnet are permitted, though strongly discuouraged.
This change should have no functional impact on nooptions VIMAGE
kernel builds, where CURVNET_* macros expand to whitespace.
The curthread->td_vnet (aka curvnet) variable's purpose is to be an
indicator of the vnet context in which the current network-related
operation takes place, in case we cannot deduce the current vnet
context from any other source, such as by looking at mbuf's
m->m_pkthdr.rcvif->if_vnet, sockets's so->so_vnet etc. Moreover, so
far curvnet has turned out to be an invaluable consistency checking
aid: it helps to catch cases when sockets, ifnets or any other
vnet-aware structures may have leaked from one vnet to another.
The exact placement of the CURVNET_SET() / CURVNET_RESTORE() macros
was a result of an empirical iterative process, whith an aim to
reduce recursions on CURVNET_SET() to a minimum, while still reducing
the scope of CURVNET_SET() to networking only operations - the
alternative would be calling CURVNET_SET() on each system call entry.
In general, curvnet has to be set in three typicall cases: when
processing socket-related requests from userspace or from within the
kernel; when processing inbound traffic flowing from device drivers
to upper layers of the networking stack, and when executing
timer-driven networking functions.
This change also introduces a DDB subcommand to show the list of all
vnet instances.
Approved by: julian (mentor)
2009-05-05 10:56:12 +00:00
|
|
|
CURVNET_RESTORE();
|
2000-06-26 23:34:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Upper layer processing for a received Ethernet packet.
|
|
|
|
*/
|
|
|
|
void
|
2002-11-14 23:35:06 +00:00
|
|
|
ether_demux(struct ifnet *ifp, struct mbuf *m)
|
2000-06-26 23:34:54 +00:00
|
|
|
{
|
2002-11-14 23:35:06 +00:00
|
|
|
struct ether_header *eh;
|
2003-03-04 23:19:55 +00:00
|
|
|
int isr;
|
2000-06-26 23:34:54 +00:00
|
|
|
u_short ether_type;
|
|
|
|
#if defined(NETATALK)
|
2003-03-03 00:21:52 +00:00
|
|
|
struct llc *l;
|
2000-06-26 23:34:54 +00:00
|
|
|
#endif
|
2002-11-14 23:35:06 +00:00
|
|
|
|
2007-03-19 18:39:36 +00:00
|
|
|
KASSERT(ifp != NULL, ("%s: NULL interface pointer", __func__));
|
2002-11-14 23:35:06 +00:00
|
|
|
|
2004-06-24 10:58:08 +00:00
|
|
|
#if defined(INET) || defined(INET6)
|
2007-03-19 18:39:36 +00:00
|
|
|
/*
|
|
|
|
* Allow dummynet and/or ipfw to claim the frame.
|
|
|
|
* Do not do this for PROMISC frames in case we are re-entered.
|
|
|
|
*/
|
2009-10-11 05:59:43 +00:00
|
|
|
if (V_ip_fw_chk_ptr && V_ether_ipfw != 0 && !(m->m_flags & M_PROMISC)) {
|
2009-06-09 21:27:11 +00:00
|
|
|
if (ether_ipfw_chk(&m, NULL, 0) == 0) {
|
2002-05-13 10:37:19 +00:00
|
|
|
if (m)
|
2007-03-19 18:39:36 +00:00
|
|
|
m_freem(m); /* dropped; free mbuf chain */
|
|
|
|
return; /* consumed */
|
2002-05-13 10:37:19 +00:00
|
|
|
}
|
|
|
|
}
|
2004-06-24 10:58:08 +00:00
|
|
|
#endif
|
2007-03-19 18:39:36 +00:00
|
|
|
eh = mtod(m, struct ether_header *);
|
|
|
|
ether_type = ntohs(eh->ether_type);
|
2002-05-13 10:37:19 +00:00
|
|
|
|
2002-11-14 23:35:06 +00:00
|
|
|
/*
|
2007-03-19 18:39:36 +00:00
|
|
|
* If this frame has a VLAN tag other than 0, call vlan_input()
|
|
|
|
* if its module is loaded. Otherwise, drop.
|
2002-11-14 23:35:06 +00:00
|
|
|
*/
|
2007-03-19 18:39:36 +00:00
|
|
|
if ((m->m_flags & M_VLANTAG) &&
|
|
|
|
EVL_VLANOFTAG(m->m_pkthdr.ether_vtag) != 0) {
|
Merge the //depot/user/yar/vlan branch into CVS. It contains some collective
work by yar, thompsa and myself. The checksum offloading part also involves
work done by Mihail Balikov.
The most important changes:
o Instead of global linked list of all vlan softc use a per-trunk
hash. The size of hash is dynamically adjusted, depending on
number of entries. This changes struct ifnet, replacing counter
of vlans with a pointer to trunk structure. This change is an
improvement for setups with big number of VLANs, several interfaces
and several CPUs. It is a small regression for a setup with a single
VLAN interface.
An alternative to dynamic hash is a per-trunk static array with
4096 entries, which is a compile time option - VLAN_ARRAY. In my
experiments the array is not an improvement, probably because such
a big trunk structure doesn't fit into CPU cache.
o Introduce an UMA zone for VLAN tags. Since drivers depend on it,
the zone is declared in kern_mbuf.c, not in optional vlan(4) driver.
This change is a big improvement for any setup utilizing vlan(4).
o Use rwlock(9) instead of mutex(9) for locking. We are the first
ones to do this! :)
o Some drivers can do hardware VLAN tagging + hardware checksum
offloading. Add an infrastructure for this. Whenever vlan(4) is
attached to a parent or parent configuration is changed, the flags
on vlan(4) interface are updated.
In collaboration with: yar, thompsa
In collaboration with: Mihail Balikov <mihail.balikov interbgc.com>
2006-01-30 13:45:15 +00:00
|
|
|
if (ifp->if_vlantrunk == NULL) {
|
2005-02-14 08:29:42 +00:00
|
|
|
ifp->if_noproto++;
|
|
|
|
m_freem(m);
|
|
|
|
return;
|
|
|
|
}
|
2007-03-19 18:39:36 +00:00
|
|
|
KASSERT(vlan_input_p != NULL,("%s: VLAN not loaded!",
|
|
|
|
__func__));
|
|
|
|
/* Clear before possibly re-entering ether_input(). */
|
|
|
|
m->m_flags &= ~M_PROMISC;
|
2002-11-14 23:35:06 +00:00
|
|
|
(*vlan_input_p)(ifp, m);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2007-03-19 18:39:36 +00:00
|
|
|
* Pass promiscuously received frames to the upper layer if the user
|
|
|
|
* requested this by setting IFF_PPROMISC. Otherwise, drop them.
|
2002-11-14 23:35:06 +00:00
|
|
|
*/
|
2007-03-19 18:39:36 +00:00
|
|
|
if ((ifp->if_flags & IFF_PPROMISC) == 0 && (m->m_flags & M_PROMISC)) {
|
|
|
|
m_freem(m);
|
2002-11-14 23:35:06 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2007-03-19 18:39:36 +00:00
|
|
|
/*
|
|
|
|
* Reset layer specific mbuf flags to avoid confusing upper layers.
|
|
|
|
* Strip off Ethernet header.
|
|
|
|
*/
|
|
|
|
m->m_flags &= ~M_VLANTAG;
|
2005-11-18 16:23:26 +00:00
|
|
|
m->m_flags &= ~(M_PROTOFLAGS);
|
2007-03-19 18:39:36 +00:00
|
|
|
m_adj(m, ETHER_HDR_LEN);
|
2005-11-18 16:23:26 +00:00
|
|
|
|
2007-03-19 18:39:36 +00:00
|
|
|
/*
|
|
|
|
* Dispatch frame to upper layer.
|
|
|
|
*/
|
1994-11-24 14:29:38 +00:00
|
|
|
switch (ether_type) {
|
1994-05-24 10:09:53 +00:00
|
|
|
#ifdef INET
|
|
|
|
case ETHERTYPE_IP:
|
2006-01-18 14:24:39 +00:00
|
|
|
if ((m = ip_fastforward(m)) == NULL)
|
1998-05-19 14:04:36 +00:00
|
|
|
return;
|
2003-03-04 23:19:55 +00:00
|
|
|
isr = NETISR_IP;
|
1994-05-24 10:09:53 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case ETHERTYPE_ARP:
|
2001-06-15 21:00:32 +00:00
|
|
|
if (ifp->if_flags & IFF_NOARP) {
|
|
|
|
/* Discard packet if ARP is disabled on interface */
|
|
|
|
m_freem(m);
|
|
|
|
return;
|
|
|
|
}
|
2003-03-04 23:19:55 +00:00
|
|
|
isr = NETISR_ARP;
|
1994-05-24 10:09:53 +00:00
|
|
|
break;
|
|
|
|
#endif
|
1995-10-26 20:31:59 +00:00
|
|
|
#ifdef IPX
|
|
|
|
case ETHERTYPE_IPX:
|
1999-12-13 16:24:22 +00:00
|
|
|
if (ef_inputp && ef_inputp(ifp, eh, m) == 0)
|
|
|
|
return;
|
2003-03-04 23:19:55 +00:00
|
|
|
isr = NETISR_IPX;
|
1995-10-26 20:31:59 +00:00
|
|
|
break;
|
|
|
|
#endif
|
1999-11-22 02:45:11 +00:00
|
|
|
#ifdef INET6
|
|
|
|
case ETHERTYPE_IPV6:
|
2003-03-04 23:19:55 +00:00
|
|
|
isr = NETISR_IPV6;
|
1999-11-22 02:45:11 +00:00
|
|
|
break;
|
|
|
|
#endif
|
1996-05-24 01:35:45 +00:00
|
|
|
#ifdef NETATALK
|
2003-03-04 23:19:55 +00:00
|
|
|
case ETHERTYPE_AT:
|
|
|
|
isr = NETISR_ATALK1;
|
|
|
|
break;
|
|
|
|
case ETHERTYPE_AARP:
|
|
|
|
isr = NETISR_AARP;
|
|
|
|
break;
|
2001-09-10 01:33:03 +00:00
|
|
|
#endif /* NETATALK */
|
1994-05-24 10:09:53 +00:00
|
|
|
default:
|
1999-12-13 16:24:22 +00:00
|
|
|
#ifdef IPX
|
|
|
|
if (ef_inputp && ef_inputp(ifp, eh, m) == 0)
|
|
|
|
return;
|
|
|
|
#endif /* IPX */
|
2000-02-13 03:32:07 +00:00
|
|
|
#if defined(NETATALK)
|
1994-11-24 14:29:38 +00:00
|
|
|
if (ether_type > ETHERMTU)
|
2002-11-14 23:35:06 +00:00
|
|
|
goto discard;
|
1994-05-24 10:09:53 +00:00
|
|
|
l = mtod(m, struct llc *);
|
2003-03-04 23:19:55 +00:00
|
|
|
if (l->llc_dsap == LLC_SNAP_LSAP &&
|
|
|
|
l->llc_ssap == LLC_SNAP_LSAP &&
|
|
|
|
l->llc_control == LLC_UI) {
|
2004-04-18 11:01:15 +00:00
|
|
|
if (bcmp(&(l->llc_snap_org_code)[0], at_org_code,
|
2003-03-04 23:19:55 +00:00
|
|
|
sizeof(at_org_code)) == 0 &&
|
|
|
|
ntohs(l->llc_snap_ether_type) == ETHERTYPE_AT) {
|
2003-10-23 13:49:10 +00:00
|
|
|
m_adj(m, LLC_SNAPFRAMELEN);
|
2003-03-04 23:19:55 +00:00
|
|
|
isr = NETISR_ATALK2;
|
2003-10-23 13:49:10 +00:00
|
|
|
break;
|
1996-05-24 01:35:45 +00:00
|
|
|
}
|
2004-04-18 11:01:15 +00:00
|
|
|
if (bcmp(&(l->llc_snap_org_code)[0], aarp_org_code,
|
2003-03-04 23:19:55 +00:00
|
|
|
sizeof(aarp_org_code)) == 0 &&
|
|
|
|
ntohs(l->llc_snap_ether_type) == ETHERTYPE_AARP) {
|
|
|
|
m_adj(m, LLC_SNAPFRAMELEN);
|
|
|
|
isr = NETISR_AARP;
|
|
|
|
break;
|
1996-05-24 01:35:45 +00:00
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
2000-02-13 03:32:07 +00:00
|
|
|
#endif /* NETATALK */
|
2003-03-04 23:19:55 +00:00
|
|
|
goto discard;
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
2003-03-04 23:19:55 +00:00
|
|
|
netisr_dispatch(isr, m);
|
2002-11-14 23:35:06 +00:00
|
|
|
return;
|
2003-03-04 23:19:55 +00:00
|
|
|
|
2002-11-14 23:35:06 +00:00
|
|
|
discard:
|
|
|
|
/*
|
|
|
|
* Packet is to be discarded. If netgraph is present,
|
|
|
|
* hand the packet to it for last chance processing;
|
|
|
|
* otherwise dispose of it.
|
|
|
|
*/
|
2005-02-14 11:58:54 +00:00
|
|
|
if (IFP2AC(ifp)->ac_netgraph != NULL) {
|
2005-07-21 09:00:51 +00:00
|
|
|
KASSERT(ng_ether_input_orphan_p != NULL,
|
|
|
|
("ng_ether_input_orphan_p is NULL"));
|
2002-11-14 23:35:06 +00:00
|
|
|
/*
|
|
|
|
* Put back the ethernet header so netgraph has a
|
|
|
|
* consistent view of inbound packets.
|
|
|
|
*/
|
2003-03-03 05:04:57 +00:00
|
|
|
M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
|
2002-11-14 23:35:06 +00:00
|
|
|
(*ng_ether_input_orphan_p)(ifp, m);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
m_freem(m);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Convert Ethernet address to printable (loggable) representation.
|
|
|
|
* This routine is for compatibility; it's better to just use
|
|
|
|
*
|
|
|
|
* printf("%6D", <pointer to address>, ":");
|
|
|
|
*
|
|
|
|
* since there's no static buffer involved.
|
|
|
|
*/
|
|
|
|
char *
|
|
|
|
ether_sprintf(const u_char *ap)
|
|
|
|
{
|
|
|
|
static char etherbuf[18];
|
|
|
|
snprintf(etherbuf, sizeof (etherbuf), "%6D", ap, ":");
|
|
|
|
return (etherbuf);
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Perform common duties while attaching to interface list
|
|
|
|
*/
|
|
|
|
void
|
2005-11-11 07:36:14 +00:00
|
|
|
ether_ifattach(struct ifnet *ifp, const u_int8_t *lla)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
2004-07-02 19:44:59 +00:00
|
|
|
int i;
|
2003-03-03 00:21:52 +00:00
|
|
|
struct ifaddr *ifa;
|
|
|
|
struct sockaddr_dl *sdl;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2002-11-14 23:35:06 +00:00
|
|
|
ifp->if_addrlen = ETHER_ADDR_LEN;
|
|
|
|
ifp->if_hdrlen = ETHER_HDR_LEN;
|
2001-10-11 05:37:59 +00:00
|
|
|
if_attach(ifp);
|
1994-05-24 10:09:53 +00:00
|
|
|
ifp->if_mtu = ETHERMTU;
|
2002-11-14 23:35:06 +00:00
|
|
|
ifp->if_output = ether_output;
|
|
|
|
ifp->if_input = ether_input;
|
1997-01-07 19:15:32 +00:00
|
|
|
ifp->if_resolvemulti = ether_resolvemulti;
|
1996-06-01 23:25:10 +00:00
|
|
|
if (ifp->if_baudrate == 0)
|
2002-11-14 23:35:06 +00:00
|
|
|
ifp->if_baudrate = IF_Mbps(10); /* just a default */
|
2001-10-14 20:17:53 +00:00
|
|
|
ifp->if_broadcastaddr = etherbroadcastaddr;
|
2002-11-14 23:35:06 +00:00
|
|
|
|
2005-11-11 16:04:59 +00:00
|
|
|
ifa = ifp->if_addr;
|
2001-12-10 08:09:49 +00:00
|
|
|
KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
|
1996-12-13 21:29:07 +00:00
|
|
|
sdl = (struct sockaddr_dl *)ifa->ifa_addr;
|
|
|
|
sdl->sdl_type = IFT_ETHER;
|
|
|
|
sdl->sdl_alen = ifp->if_addrlen;
|
2005-11-11 07:36:14 +00:00
|
|
|
bcopy(lla, LLADDR(sdl), ifp->if_addrlen);
|
2002-11-14 23:35:06 +00:00
|
|
|
|
2003-03-03 05:04:57 +00:00
|
|
|
bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN);
|
2000-06-26 23:34:54 +00:00
|
|
|
if (ng_ether_attach_p != NULL)
|
|
|
|
(*ng_ether_attach_p)(ifp);
|
2004-03-14 07:12:25 +00:00
|
|
|
|
2004-07-02 19:44:59 +00:00
|
|
|
/* Announce Ethernet MAC address if non-zero. */
|
|
|
|
for (i = 0; i < ifp->if_addrlen; i++)
|
2005-11-11 07:36:14 +00:00
|
|
|
if (lla[i] != 0)
|
2004-07-02 19:44:59 +00:00
|
|
|
break;
|
|
|
|
if (i != ifp->if_addrlen)
|
2005-11-11 07:36:14 +00:00
|
|
|
if_printf(ifp, "Ethernet address: %6D\n", lla, ":");
|
2000-06-26 23:34:54 +00:00
|
|
|
}
|
|
|
|
|
2000-07-13 22:54:34 +00:00
|
|
|
/*
|
|
|
|
* Perform common duties while detaching an Ethernet interface
|
|
|
|
*/
|
|
|
|
void
|
2002-11-14 23:35:06 +00:00
|
|
|
ether_ifdetach(struct ifnet *ifp)
|
2000-07-13 22:54:34 +00:00
|
|
|
{
|
2005-07-21 09:00:51 +00:00
|
|
|
if (IFP2AC(ifp)->ac_netgraph != NULL) {
|
|
|
|
KASSERT(ng_ether_detach_p != NULL,
|
|
|
|
("ng_ether_detach_p is NULL"));
|
2000-07-13 22:54:34 +00:00
|
|
|
(*ng_ether_detach_p)(ifp);
|
2005-07-21 09:00:51 +00:00
|
|
|
}
|
2005-10-13 23:05:55 +00:00
|
|
|
|
2002-11-14 23:35:06 +00:00
|
|
|
bpfdetach(ifp);
|
2000-07-13 22:54:34 +00:00
|
|
|
if_detach(ifp);
|
|
|
|
}
|
|
|
|
|
1999-02-16 10:49:55 +00:00
|
|
|
SYSCTL_DECL(_net_link);
|
1995-12-20 21:53:53 +00:00
|
|
|
SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW, 0, "Ethernet");
|
2004-06-24 10:58:08 +00:00
|
|
|
#if defined(INET) || defined(INET6)
|
Build on Jeff Roberson's linker-set based dynamic per-CPU allocator
(DPCPU), as suggested by Peter Wemm, and implement a new per-virtual
network stack memory allocator. Modify vnet to use the allocator
instead of monolithic global container structures (vinet, ...). This
change solves many binary compatibility problems associated with
VIMAGE, and restores ELF symbols for virtualized global variables.
Each virtualized global variable exists as a "reference copy", and also
once per virtual network stack. Virtualized global variables are
tagged at compile-time, placing the in a special linker set, which is
loaded into a contiguous region of kernel memory. Virtualized global
variables in the base kernel are linked as normal, but those in modules
are copied and relocated to a reserved portion of the kernel's vnet
region with the help of a the kernel linker.
Virtualized global variables exist in per-vnet memory set up when the
network stack instance is created, and are initialized statically from
the reference copy. Run-time access occurs via an accessor macro, which
converts from the current vnet and requested symbol to a per-vnet
address. When "options VIMAGE" is not compiled into the kernel, normal
global ELF symbols will be used instead and indirection is avoided.
This change restores static initialization for network stack global
variables, restores support for non-global symbols and types, eliminates
the need for many subsystem constructors, eliminates large per-subsystem
structures that caused many binary compatibility issues both for
monitoring applications (netstat) and kernel modules, removes the
per-function INIT_VNET_*() macros throughout the stack, eliminates the
need for vnet_symmap ksym(2) munging, and eliminates duplicate
definitions of virtualized globals under VIMAGE_GLOBALS.
Bump __FreeBSD_version and update UPDATING.
Portions submitted by: bz
Reviewed by: bz, zec
Discussed with: gnn, jamie, jeff, jhb, julian, sam
Suggested by: peter
Approved by: re (kensmith)
2009-07-14 22:48:30 +00:00
|
|
|
SYSCTL_VNET_INT(_net_link_ether, OID_AUTO, ipfw, CTLFLAG_RW,
|
|
|
|
&VNET_NAME(ether_ipfw), 0, "Pass ether pkts through firewall");
|
2004-06-24 10:58:08 +00:00
|
|
|
#endif
|
1996-08-04 10:54:13 +00:00
|
|
|
|
2004-06-02 21:34:14 +00:00
|
|
|
#if 0
|
|
|
|
/*
|
|
|
|
* This is for reference. We have a table-driven version
|
|
|
|
* of the little-endian crc32 generator, which is faster
|
|
|
|
* than the double-loop.
|
|
|
|
*/
|
|
|
|
uint32_t
|
|
|
|
ether_crc32_le(const uint8_t *buf, size_t len)
|
|
|
|
{
|
|
|
|
size_t i;
|
|
|
|
uint32_t crc;
|
|
|
|
int bit;
|
|
|
|
uint8_t data;
|
|
|
|
|
|
|
|
crc = 0xffffffff; /* initial value */
|
|
|
|
|
|
|
|
for (i = 0; i < len; i++) {
|
2008-05-10 18:33:38 +00:00
|
|
|
for (data = *buf++, bit = 0; bit < 8; bit++, data >>= 1) {
|
2004-06-02 21:34:14 +00:00
|
|
|
carry = (crc ^ data) & 1;
|
|
|
|
crc >>= 1;
|
|
|
|
if (carry)
|
|
|
|
crc = (crc ^ ETHER_CRC_POLY_LE);
|
2008-05-10 18:33:38 +00:00
|
|
|
}
|
2004-06-02 21:34:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return (crc);
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
uint32_t
|
|
|
|
ether_crc32_le(const uint8_t *buf, size_t len)
|
|
|
|
{
|
|
|
|
static const uint32_t crctab[] = {
|
|
|
|
0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac,
|
|
|
|
0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
|
|
|
|
0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c,
|
|
|
|
0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c
|
|
|
|
};
|
|
|
|
size_t i;
|
|
|
|
uint32_t crc;
|
|
|
|
|
|
|
|
crc = 0xffffffff; /* initial value */
|
|
|
|
|
|
|
|
for (i = 0; i < len; i++) {
|
|
|
|
crc ^= buf[i];
|
|
|
|
crc = (crc >> 4) ^ crctab[crc & 0xf];
|
|
|
|
crc = (crc >> 4) ^ crctab[crc & 0xf];
|
|
|
|
}
|
|
|
|
|
|
|
|
return (crc);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
uint32_t
|
|
|
|
ether_crc32_be(const uint8_t *buf, size_t len)
|
|
|
|
{
|
|
|
|
size_t i;
|
|
|
|
uint32_t crc, carry;
|
|
|
|
int bit;
|
|
|
|
uint8_t data;
|
|
|
|
|
|
|
|
crc = 0xffffffff; /* initial value */
|
|
|
|
|
|
|
|
for (i = 0; i < len; i++) {
|
|
|
|
for (data = *buf++, bit = 0; bit < 8; bit++, data >>= 1) {
|
|
|
|
carry = ((crc & 0x80000000) ? 1 : 0) ^ (data & 0x01);
|
|
|
|
crc <<= 1;
|
|
|
|
if (carry)
|
|
|
|
crc = (crc ^ ETHER_CRC_POLY_BE) | carry;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return (crc);
|
|
|
|
}
|
|
|
|
|
1996-12-10 07:29:50 +00:00
|
|
|
int
|
2007-05-29 12:40:45 +00:00
|
|
|
ether_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
|
1996-08-04 10:54:13 +00:00
|
|
|
{
|
|
|
|
struct ifaddr *ifa = (struct ifaddr *) data;
|
|
|
|
struct ifreq *ifr = (struct ifreq *) data;
|
1996-12-10 07:29:50 +00:00
|
|
|
int error = 0;
|
1996-08-04 10:54:13 +00:00
|
|
|
|
|
|
|
switch (command) {
|
|
|
|
case SIOCSIFADDR:
|
|
|
|
ifp->if_flags |= IFF_UP;
|
|
|
|
|
|
|
|
switch (ifa->ifa_addr->sa_family) {
|
|
|
|
#ifdef INET
|
|
|
|
case AF_INET:
|
2008-03-20 06:19:34 +00:00
|
|
|
ifp->if_init(ifp->if_softc); /* before arpwhohas */
|
2001-10-14 20:17:53 +00:00
|
|
|
arp_ifinit(ifp, ifa);
|
1996-08-04 10:54:13 +00:00
|
|
|
break;
|
|
|
|
#endif
|
|
|
|
#ifdef IPX
|
|
|
|
/*
|
|
|
|
* XXX - This code is probably wrong
|
|
|
|
*/
|
|
|
|
case AF_IPX:
|
|
|
|
{
|
2003-03-03 00:21:52 +00:00
|
|
|
struct ipx_addr *ina = &(IA_SIPX(ifa)->sipx_addr);
|
1996-08-04 10:54:13 +00:00
|
|
|
|
|
|
|
if (ipx_nullhost(*ina))
|
|
|
|
ina->x_host =
|
1999-11-22 02:45:11 +00:00
|
|
|
*(union ipx_host *)
|
2005-11-11 16:04:59 +00:00
|
|
|
IF_LLADDR(ifp);
|
1996-08-04 10:54:13 +00:00
|
|
|
else {
|
|
|
|
bcopy((caddr_t) ina->x_host.c_host,
|
2005-11-11 16:04:59 +00:00
|
|
|
(caddr_t) IF_LLADDR(ifp),
|
2005-06-10 16:49:24 +00:00
|
|
|
ETHER_ADDR_LEN);
|
1996-08-04 10:54:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set new address
|
|
|
|
*/
|
|
|
|
ifp->if_init(ifp->if_softc);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
default:
|
|
|
|
ifp->if_init(ifp->if_softc);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case SIOCGIFADDR:
|
|
|
|
{
|
|
|
|
struct sockaddr *sa;
|
|
|
|
|
|
|
|
sa = (struct sockaddr *) & ifr->ifr_data;
|
2005-11-11 16:04:59 +00:00
|
|
|
bcopy(IF_LLADDR(ifp),
|
1996-08-04 10:54:13 +00:00
|
|
|
(caddr_t) sa->sa_data, ETHER_ADDR_LEN);
|
|
|
|
}
|
|
|
|
break;
|
1996-12-10 07:29:50 +00:00
|
|
|
|
|
|
|
case SIOCSIFMTU:
|
|
|
|
/*
|
|
|
|
* Set the interface MTU.
|
|
|
|
*/
|
|
|
|
if (ifr->ifr_mtu > ETHERMTU) {
|
|
|
|
error = EINVAL;
|
|
|
|
} else {
|
|
|
|
ifp->if_mtu = ifr->ifr_mtu;
|
|
|
|
}
|
|
|
|
break;
|
2002-11-14 23:35:06 +00:00
|
|
|
default:
|
|
|
|
error = EINVAL; /* XXX netbsd has ENOTTY??? */
|
|
|
|
break;
|
1996-08-04 10:54:13 +00:00
|
|
|
}
|
1996-12-10 07:29:50 +00:00
|
|
|
return (error);
|
1996-08-04 10:54:13 +00:00
|
|
|
}
|
1997-01-07 19:15:32 +00:00
|
|
|
|
2002-09-28 17:15:38 +00:00
|
|
|
static int
|
2003-10-23 13:49:10 +00:00
|
|
|
ether_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
|
|
|
|
struct sockaddr *sa)
|
1997-01-07 19:15:32 +00:00
|
|
|
{
|
|
|
|
struct sockaddr_dl *sdl;
|
2004-06-24 10:58:08 +00:00
|
|
|
#ifdef INET
|
1997-01-07 19:15:32 +00:00
|
|
|
struct sockaddr_in *sin;
|
2004-06-24 10:58:08 +00:00
|
|
|
#endif
|
1999-11-22 02:45:11 +00:00
|
|
|
#ifdef INET6
|
|
|
|
struct sockaddr_in6 *sin6;
|
|
|
|
#endif
|
1997-01-07 19:15:32 +00:00
|
|
|
u_char *e_addr;
|
|
|
|
|
|
|
|
switch(sa->sa_family) {
|
|
|
|
case AF_LINK:
|
1999-11-22 02:45:11 +00:00
|
|
|
/*
|
1997-07-15 23:25:32 +00:00
|
|
|
* No mapping needed. Just check that it's a valid MC address.
|
|
|
|
*/
|
1997-01-07 19:15:32 +00:00
|
|
|
sdl = (struct sockaddr_dl *)sa;
|
|
|
|
e_addr = LLADDR(sdl);
|
2004-07-09 05:26:27 +00:00
|
|
|
if (!ETHER_IS_MULTICAST(e_addr))
|
1997-01-07 19:15:32 +00:00
|
|
|
return EADDRNOTAVAIL;
|
|
|
|
*llsa = 0;
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
#ifdef INET
|
|
|
|
case AF_INET:
|
|
|
|
sin = (struct sockaddr_in *)sa;
|
|
|
|
if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
|
|
|
|
return EADDRNOTAVAIL;
|
2008-10-23 15:53:51 +00:00
|
|
|
sdl = malloc(sizeof *sdl, M_IFMADDR,
|
2005-08-02 17:52:52 +00:00
|
|
|
M_NOWAIT|M_ZERO);
|
|
|
|
if (sdl == NULL)
|
|
|
|
return ENOMEM;
|
1997-01-07 19:15:32 +00:00
|
|
|
sdl->sdl_len = sizeof *sdl;
|
|
|
|
sdl->sdl_family = AF_LINK;
|
|
|
|
sdl->sdl_index = ifp->if_index;
|
|
|
|
sdl->sdl_type = IFT_ETHER;
|
|
|
|
sdl->sdl_alen = ETHER_ADDR_LEN;
|
|
|
|
e_addr = LLADDR(sdl);
|
|
|
|
ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr);
|
|
|
|
*llsa = (struct sockaddr *)sdl;
|
|
|
|
return 0;
|
|
|
|
#endif
|
1999-11-22 02:45:11 +00:00
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
|
|
|
sin6 = (struct sockaddr_in6 *)sa;
|
2000-07-09 11:17:17 +00:00
|
|
|
if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
|
|
|
|
/*
|
|
|
|
* An IP6 address of 0 means listen to all
|
|
|
|
* of the Ethernet multicast address used for IP6.
|
|
|
|
* (This is used for multicast routers.)
|
|
|
|
*/
|
|
|
|
ifp->if_flags |= IFF_ALLMULTI;
|
|
|
|
*llsa = 0;
|
|
|
|
return 0;
|
|
|
|
}
|
1999-11-22 02:45:11 +00:00
|
|
|
if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
|
|
|
|
return EADDRNOTAVAIL;
|
2008-10-23 15:53:51 +00:00
|
|
|
sdl = malloc(sizeof *sdl, M_IFMADDR,
|
2005-08-02 17:52:52 +00:00
|
|
|
M_NOWAIT|M_ZERO);
|
|
|
|
if (sdl == NULL)
|
|
|
|
return (ENOMEM);
|
1999-11-22 02:45:11 +00:00
|
|
|
sdl->sdl_len = sizeof *sdl;
|
|
|
|
sdl->sdl_family = AF_LINK;
|
|
|
|
sdl->sdl_index = ifp->if_index;
|
|
|
|
sdl->sdl_type = IFT_ETHER;
|
|
|
|
sdl->sdl_alen = ETHER_ADDR_LEN;
|
|
|
|
e_addr = LLADDR(sdl);
|
|
|
|
ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr);
|
|
|
|
*llsa = (struct sockaddr *)sdl;
|
|
|
|
return 0;
|
|
|
|
#endif
|
1997-01-07 19:15:32 +00:00
|
|
|
|
|
|
|
default:
|
1999-11-22 02:45:11 +00:00
|
|
|
/*
|
1997-01-07 19:15:32 +00:00
|
|
|
* Well, the text isn't quite right, but it's the name
|
|
|
|
* that counts...
|
|
|
|
*/
|
|
|
|
return EAFNOSUPPORT;
|
|
|
|
}
|
|
|
|
}
|
2003-03-15 15:38:02 +00:00
|
|
|
|
2005-06-10 16:49:24 +00:00
|
|
|
static void*
|
|
|
|
ether_alloc(u_char type, struct ifnet *ifp)
|
|
|
|
{
|
|
|
|
struct arpcom *ac;
|
|
|
|
|
|
|
|
ac = malloc(sizeof(struct arpcom), M_ARPCOM, M_WAITOK | M_ZERO);
|
|
|
|
ac->ac_ifp = ifp;
|
|
|
|
|
|
|
|
return (ac);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
ether_free(void *com, u_char type)
|
|
|
|
{
|
|
|
|
|
|
|
|
free(com, M_ARPCOM);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
ether_modevent(module_t mod, int type, void *data)
|
|
|
|
{
|
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
case MOD_LOAD:
|
|
|
|
if_register_com_alloc(IFT_ETHER, ether_alloc, ether_free);
|
|
|
|
break;
|
|
|
|
case MOD_UNLOAD:
|
|
|
|
if_deregister_com_alloc(IFT_ETHER);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return EOPNOTSUPP;
|
|
|
|
}
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2003-03-15 15:38:02 +00:00
|
|
|
static moduledata_t ether_mod = {
|
2003-10-23 13:49:10 +00:00
|
|
|
"ether",
|
2005-06-10 16:49:24 +00:00
|
|
|
ether_modevent,
|
2003-10-23 13:49:10 +00:00
|
|
|
0
|
2003-03-15 15:38:02 +00:00
|
|
|
};
|
2003-10-23 13:49:10 +00:00
|
|
|
|
2006-11-18 23:17:22 +00:00
|
|
|
void
|
|
|
|
ether_vlan_mtap(struct bpf_if *bp, struct mbuf *m, void *data, u_int dlen)
|
|
|
|
{
|
|
|
|
struct ether_vlan_header vlan;
|
|
|
|
struct mbuf mv, mb;
|
|
|
|
|
|
|
|
KASSERT((m->m_flags & M_VLANTAG) != 0,
|
|
|
|
("%s: vlan information not present", __func__));
|
|
|
|
KASSERT(m->m_len >= sizeof(struct ether_header),
|
|
|
|
("%s: mbuf not large enough for header", __func__));
|
|
|
|
bcopy(mtod(m, char *), &vlan, sizeof(struct ether_header));
|
|
|
|
vlan.evl_proto = vlan.evl_encap_proto;
|
|
|
|
vlan.evl_encap_proto = htons(ETHERTYPE_VLAN);
|
|
|
|
vlan.evl_tag = htons(m->m_pkthdr.ether_vtag);
|
|
|
|
m->m_len -= sizeof(struct ether_header);
|
|
|
|
m->m_data += sizeof(struct ether_header);
|
|
|
|
/*
|
|
|
|
* If a data link has been supplied by the caller, then we will need to
|
|
|
|
* re-create a stack allocated mbuf chain with the following structure:
|
|
|
|
*
|
|
|
|
* (1) mbuf #1 will contain the supplied data link
|
|
|
|
* (2) mbuf #2 will contain the vlan header
|
|
|
|
* (3) mbuf #3 will contain the original mbuf's packet data
|
|
|
|
*
|
|
|
|
* Otherwise, submit the packet and vlan header via bpf_mtap2().
|
|
|
|
*/
|
|
|
|
if (data != NULL) {
|
|
|
|
mv.m_next = m;
|
|
|
|
mv.m_data = (caddr_t)&vlan;
|
|
|
|
mv.m_len = sizeof(vlan);
|
|
|
|
mb.m_next = &mv;
|
|
|
|
mb.m_data = data;
|
|
|
|
mb.m_len = dlen;
|
|
|
|
bpf_mtap(bp, &mb);
|
|
|
|
} else
|
|
|
|
bpf_mtap2(bp, &vlan, sizeof(vlan), m);
|
|
|
|
m->m_len += sizeof(struct ether_header);
|
|
|
|
m->m_data -= sizeof(struct ether_header);
|
|
|
|
}
|
|
|
|
|
2007-10-18 21:22:15 +00:00
|
|
|
struct mbuf *
|
2007-10-18 21:52:31 +00:00
|
|
|
ether_vlanencap(struct mbuf *m, uint16_t tag)
|
2007-10-18 21:22:15 +00:00
|
|
|
{
|
|
|
|
struct ether_vlan_header *evl;
|
|
|
|
|
|
|
|
M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT);
|
|
|
|
if (m == NULL)
|
|
|
|
return (NULL);
|
|
|
|
/* M_PREPEND takes care of m_len, m_pkthdr.len for us */
|
|
|
|
|
|
|
|
if (m->m_len < sizeof(*evl)) {
|
|
|
|
m = m_pullup(m, sizeof(*evl));
|
|
|
|
if (m == NULL)
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Transform the Ethernet header into an Ethernet header
|
|
|
|
* with 802.1Q encapsulation.
|
|
|
|
*/
|
|
|
|
evl = mtod(m, struct ether_vlan_header *);
|
|
|
|
bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
|
|
|
|
(char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
|
|
|
|
evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
|
|
|
|
evl->evl_tag = htons(tag);
|
|
|
|
return (m);
|
|
|
|
}
|
|
|
|
|
2005-06-10 16:49:24 +00:00
|
|
|
DECLARE_MODULE(ether, ether_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
|
2003-03-15 15:38:02 +00:00
|
|
|
MODULE_VERSION(ether, 1);
|