2005-01-07 01:45:51 +00:00
|
|
|
/*-
|
2009-12-22 13:53:34 +00:00
|
|
|
* Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
2003-06-23 21:18:56 +00:00
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
2002-10-23 10:07:55 +00:00
|
|
|
*
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
2007-10-07 20:44:24 +00:00
|
|
|
#include <sys/cdefs.h>
|
|
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
/*
|
2009-12-16 10:48:40 +00:00
|
|
|
* The FreeBSD IP packet firewall, main file
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
*/
|
|
|
|
|
2004-08-26 14:18:30 +00:00
|
|
|
#if !defined(KLD_MODULE)
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
#include "opt_ipfw.h"
|
2007-02-03 22:11:53 +00:00
|
|
|
#include "opt_ipdivert.h"
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
#include "opt_ipdn.h"
|
|
|
|
#include "opt_inet.h"
|
|
|
|
#ifndef INET
|
|
|
|
#error IPFIREWALL requires INET.
|
|
|
|
#endif /* INET */
|
2004-08-26 14:18:30 +00:00
|
|
|
#endif
|
2005-11-29 17:56:11 +00:00
|
|
|
#include "opt_inet6.h"
|
|
|
|
#include "opt_ipsec.h"
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
|
|
|
#include <sys/param.h>
|
|
|
|
#include <sys/systm.h>
|
2004-12-10 02:17:18 +00:00
|
|
|
#include <sys/condvar.h>
|
2006-12-29 21:59:17 +00:00
|
|
|
#include <sys/eventhandler.h>
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
#include <sys/malloc.h>
|
|
|
|
#include <sys/mbuf.h>
|
|
|
|
#include <sys/kernel.h>
|
2006-02-02 03:13:16 +00:00
|
|
|
#include <sys/lock.h>
|
2004-08-12 22:06:55 +00:00
|
|
|
#include <sys/jail.h>
|
2004-05-30 17:57:46 +00:00
|
|
|
#include <sys/module.h>
|
2006-11-06 13:42:10 +00:00
|
|
|
#include <sys/priv.h>
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
#include <sys/proc.h>
|
2006-02-02 03:13:16 +00:00
|
|
|
#include <sys/rwlock.h>
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
#include <sys/socket.h>
|
|
|
|
#include <sys/socketvar.h>
|
|
|
|
#include <sys/sysctl.h>
|
|
|
|
#include <sys/syslog.h>
|
|
|
|
#include <sys/ucred.h>
|
2009-02-16 15:10:51 +00:00
|
|
|
#include <net/ethernet.h> /* for ETHERTYPE_IP */
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
#include <net/if.h>
|
|
|
|
#include <net/route.h>
|
2007-07-03 12:46:08 +00:00
|
|
|
#include <net/pf_mtag.h>
|
2008-12-02 21:37:28 +00:00
|
|
|
#include <net/vnet.h>
|
2008-01-25 14:38:27 +00:00
|
|
|
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
#include <netinet/in.h>
|
|
|
|
#include <netinet/in_var.h>
|
|
|
|
#include <netinet/in_pcb.h>
|
|
|
|
#include <netinet/ip.h>
|
|
|
|
#include <netinet/ip_var.h>
|
|
|
|
#include <netinet/ip_icmp.h>
|
|
|
|
#include <netinet/ip_fw.h>
|
2009-12-15 16:15:14 +00:00
|
|
|
#include <netinet/ipfw/ip_fw_private.h>
|
2006-10-07 10:19:58 +00:00
|
|
|
#include <netinet/ip_carp.h>
|
2006-08-31 16:56:45 +00:00
|
|
|
#include <netinet/pim.h>
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
#include <netinet/tcp_var.h>
|
|
|
|
#include <netinet/udp.h>
|
|
|
|
#include <netinet/udp_var.h>
|
2006-11-13 19:07:32 +00:00
|
|
|
#include <netinet/sctp.h>
|
2008-12-02 21:37:28 +00:00
|
|
|
|
2005-04-18 18:35:05 +00:00
|
|
|
#include <netinet/ip6.h>
|
|
|
|
#include <netinet/icmp6.h>
|
2005-07-26 00:19:58 +00:00
|
|
|
#ifdef INET6
|
|
|
|
#include <netinet6/scope6_var.h>
|
2009-12-02 14:32:01 +00:00
|
|
|
#include <netinet6/ip6_var.h>
|
2005-07-26 00:19:58 +00:00
|
|
|
#endif
|
2005-04-18 18:35:05 +00:00
|
|
|
|
2002-07-05 22:43:06 +00:00
|
|
|
#include <machine/in_cksum.h> /* XXX for in_cksum */
|
|
|
|
|
2009-02-13 15:37:14 +00:00
|
|
|
#ifdef MAC
|
2006-10-22 11:52:19 +00:00
|
|
|
#include <security/mac/mac_framework.h>
|
2009-02-13 15:37:14 +00:00
|
|
|
#endif
|
2006-10-22 11:52:19 +00:00
|
|
|
|
One bugfix and one new feature.
The bugfix (ipfw2.c) makes the handling of port numbers with
a dash in the name, e.g. ftp-data, consistent with old ipfw:
use \\ before the - to consider it as part of the name and not
a range separator.
The new feature (all this description will go in the manpage):
each rule now belongs to one of 32 different sets, which can
be optionally specified in the following form:
ipfw add 100 set 23 allow ip from any to any
If "set N" is not specified, the rule belongs to set 0.
Individual sets can be disabled, enabled, and deleted with the commands:
ipfw disable set N
ipfw enable set N
ipfw delete set N
Enabling/disabling of a set is atomic. Rules belonging to a disabled
set are skipped during packet matching, and they are not listed
unless you use the '-S' flag in the show/list commands.
Note that dynamic rules, once created, are always active until
they expire or their parent rule is deleted.
Set 31 is reserved for the default rule and cannot be disabled.
All sets are enabled by default. The enable/disable status of the sets
can be shown with the command
ipfw show sets
Hopefully, this feature will make life easier to those who want to
have atomic ruleset addition/deletion/tests. Examples:
To add a set of rules atomically:
ipfw disable set 18
ipfw add ... set 18 ... # repeat as needed
ipfw enable set 18
To delete a set of rules atomically
ipfw disable set 18
ipfw delete set 18
ipfw enable set 18
To test a ruleset and disable it and regain control if something
goes wrong:
ipfw disable set 18
ipfw add ... set 18 ... # repeat as needed
ipfw enable set 18 ; echo "done "; sleep 30 && ipfw disable set 18
here if everything goes well, you press control-C before
the "sleep" terminates, and your ruleset will be left
active. Otherwise, e.g. if you cannot access your box,
the ruleset will be disabled after the sleep terminates.
I think there is only one more thing that one might want, namely
a command to assign all rules in set X to set Y, so one can
test a ruleset using the above mechanisms, and once it is
considered acceptable, make it part of an existing ruleset.
2002-08-10 04:37:32 +00:00
|
|
|
/*
|
2009-12-16 10:48:40 +00:00
|
|
|
* static variables followed by global ones.
|
|
|
|
* All ipfw global variables are here.
|
One bugfix and one new feature.
The bugfix (ipfw2.c) makes the handling of port numbers with
a dash in the name, e.g. ftp-data, consistent with old ipfw:
use \\ before the - to consider it as part of the name and not
a range separator.
The new feature (all this description will go in the manpage):
each rule now belongs to one of 32 different sets, which can
be optionally specified in the following form:
ipfw add 100 set 23 allow ip from any to any
If "set N" is not specified, the rule belongs to set 0.
Individual sets can be disabled, enabled, and deleted with the commands:
ipfw disable set N
ipfw enable set N
ipfw delete set N
Enabling/disabling of a set is atomic. Rules belonging to a disabled
set are skipped during packet matching, and they are not listed
unless you use the '-S' flag in the show/list commands.
Note that dynamic rules, once created, are always active until
they expire or their parent rule is deleted.
Set 31 is reserved for the default rule and cannot be disabled.
All sets are enabled by default. The enable/disable status of the sets
can be shown with the command
ipfw show sets
Hopefully, this feature will make life easier to those who want to
have atomic ruleset addition/deletion/tests. Examples:
To add a set of rules atomically:
ipfw disable set 18
ipfw add ... set 18 ... # repeat as needed
ipfw enable set 18
To delete a set of rules atomically
ipfw disable set 18
ipfw delete set 18
ipfw enable set 18
To test a ruleset and disable it and regain control if something
goes wrong:
ipfw disable set 18
ipfw add ... set 18 ... # repeat as needed
ipfw enable set 18 ; echo "done "; sleep 30 && ipfw disable set 18
here if everything goes well, you press control-C before
the "sleep" terminates, and your ruleset will be left
active. Otherwise, e.g. if you cannot access your box,
the ruleset will be disabled after the sleep terminates.
I think there is only one more thing that one might want, namely
a command to assign all rules in set X to set Y, so one can
test a ruleset using the above mechanisms, and once it is
considered acceptable, make it part of an existing ruleset.
2002-08-10 04:37:32 +00:00
|
|
|
*/
|
Build on Jeff Roberson's linker-set based dynamic per-CPU allocator
(DPCPU), as suggested by Peter Wemm, and implement a new per-virtual
network stack memory allocator. Modify vnet to use the allocator
instead of monolithic global container structures (vinet, ...). This
change solves many binary compatibility problems associated with
VIMAGE, and restores ELF symbols for virtualized global variables.
Each virtualized global variable exists as a "reference copy", and also
once per virtual network stack. Virtualized global variables are
tagged at compile-time, placing the in a special linker set, which is
loaded into a contiguous region of kernel memory. Virtualized global
variables in the base kernel are linked as normal, but those in modules
are copied and relocated to a reserved portion of the kernel's vnet
region with the help of a the kernel linker.
Virtualized global variables exist in per-vnet memory set up when the
network stack instance is created, and are initialized statically from
the reference copy. Run-time access occurs via an accessor macro, which
converts from the current vnet and requested symbol to a per-vnet
address. When "options VIMAGE" is not compiled into the kernel, normal
global ELF symbols will be used instead and indirection is avoided.
This change restores static initialization for network stack global
variables, restores support for non-global symbols and types, eliminates
the need for many subsystem constructors, eliminates large per-subsystem
structures that caused many binary compatibility issues both for
monitoring applications (netstat) and kernel modules, removes the
per-function INIT_VNET_*() macros throughout the stack, eliminates the
need for vnet_symmap ksym(2) munging, and eliminates duplicate
definitions of virtualized globals under VIMAGE_GLOBALS.
Bump __FreeBSD_version and update UPDATING.
Portions submitted by: bz
Reviewed by: bz, zec
Discussed with: gnn, jamie, jeff, jhb, julian, sam
Suggested by: peter
Approved by: re (kensmith)
2009-07-14 22:48:30 +00:00
|
|
|
|
2009-12-16 10:48:40 +00:00
|
|
|
/* ipfw_vnet_ready controls when we are open for business */
|
2010-11-22 19:32:54 +00:00
|
|
|
static VNET_DEFINE(int, ipfw_vnet_ready) = 0;
|
2009-12-16 10:48:40 +00:00
|
|
|
#define V_ipfw_vnet_ready VNET(ipfw_vnet_ready)
|
|
|
|
|
2010-11-22 19:32:54 +00:00
|
|
|
static VNET_DEFINE(int, fw_deny_unknown_exthdrs);
|
2009-12-16 10:48:40 +00:00
|
|
|
#define V_fw_deny_unknown_exthdrs VNET(fw_deny_unknown_exthdrs)
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
2009-05-09 05:07:36 +00:00
|
|
|
#ifdef IPFIREWALL_DEFAULT_TO_ACCEPT
|
|
|
|
static int default_to_accept = 1;
|
|
|
|
#else
|
|
|
|
static int default_to_accept;
|
|
|
|
#endif
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
2009-12-16 10:48:40 +00:00
|
|
|
VNET_DEFINE(int, autoinc_step);
|
2011-04-20 07:55:33 +00:00
|
|
|
VNET_DEFINE(int, fw_one_pass) = 1;
|
2009-06-09 21:27:11 +00:00
|
|
|
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
/*
|
2009-12-16 10:48:40 +00:00
|
|
|
* Each rule belongs to one of 32 different sets (0..31).
|
|
|
|
* The variable set_disable contains one bit per set.
|
|
|
|
* If the bit is set, all rules in the corresponding set
|
|
|
|
* are disabled. Set RESVD_SET(31) is reserved for the default rule
|
|
|
|
* and rules that are not deleted by the flush command,
|
|
|
|
* and CANNOT be disabled.
|
|
|
|
* Rules in set RESVD_SET can only be deleted individually.
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
*/
|
2009-12-16 10:48:40 +00:00
|
|
|
VNET_DEFINE(u_int32_t, set_disable);
|
|
|
|
#define V_set_disable VNET(set_disable)
|
|
|
|
|
|
|
|
VNET_DEFINE(int, fw_verbose);
|
|
|
|
/* counter for ipfw_log(NULL...) */
|
|
|
|
VNET_DEFINE(u_int64_t, norule_counter);
|
|
|
|
VNET_DEFINE(int, verbose_limit);
|
|
|
|
|
|
|
|
/* layer3_chain contains the list of rules for layer 3 */
|
Build on Jeff Roberson's linker-set based dynamic per-CPU allocator
(DPCPU), as suggested by Peter Wemm, and implement a new per-virtual
network stack memory allocator. Modify vnet to use the allocator
instead of monolithic global container structures (vinet, ...). This
change solves many binary compatibility problems associated with
VIMAGE, and restores ELF symbols for virtualized global variables.
Each virtualized global variable exists as a "reference copy", and also
once per virtual network stack. Virtualized global variables are
tagged at compile-time, placing the in a special linker set, which is
loaded into a contiguous region of kernel memory. Virtualized global
variables in the base kernel are linked as normal, but those in modules
are copied and relocated to a reserved portion of the kernel's vnet
region with the help of a the kernel linker.
Virtualized global variables exist in per-vnet memory set up when the
network stack instance is created, and are initialized statically from
the reference copy. Run-time access occurs via an accessor macro, which
converts from the current vnet and requested symbol to a per-vnet
address. When "options VIMAGE" is not compiled into the kernel, normal
global ELF symbols will be used instead and indirection is avoided.
This change restores static initialization for network stack global
variables, restores support for non-global symbols and types, eliminates
the need for many subsystem constructors, eliminates large per-subsystem
structures that caused many binary compatibility issues both for
monitoring applications (netstat) and kernel modules, removes the
per-function INIT_VNET_*() macros throughout the stack, eliminates the
need for vnet_symmap ksym(2) munging, and eliminates duplicate
definitions of virtualized globals under VIMAGE_GLOBALS.
Bump __FreeBSD_version and update UPDATING.
Portions submitted by: bz
Reviewed by: bz, zec
Discussed with: gnn, jamie, jeff, jhb, julian, sam
Suggested by: peter
Approved by: re (kensmith)
2009-07-14 22:48:30 +00:00
|
|
|
VNET_DEFINE(struct ip_fw_chain, layer3_chain);
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
2008-02-29 22:27:19 +00:00
|
|
|
ipfw_nat_t *ipfw_nat_ptr = NULL;
|
2009-12-15 16:15:14 +00:00
|
|
|
struct cfg_nat *(*lookup_nat_ptr)(struct nat_list *, int);
|
2008-02-29 22:27:19 +00:00
|
|
|
ipfw_nat_cfg_t *ipfw_nat_cfg_ptr;
|
|
|
|
ipfw_nat_cfg_t *ipfw_nat_del_ptr;
|
|
|
|
ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr;
|
|
|
|
ipfw_nat_cfg_t *ipfw_nat_get_log_ptr;
|
2004-06-09 20:10:38 +00:00
|
|
|
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
#ifdef SYSCTL_NODE
|
Bring in the most recent version of ipfw and dummynet, developed
and tested over the past two months in the ipfw3-head branch. This
also happens to be the same code available in the Linux and Windows
ports of ipfw and dummynet.
The major enhancement is a completely restructured version of
dummynet, with support for different packet scheduling algorithms
(loadable at runtime), faster queue/pipe lookup, and a much cleaner
internal architecture and kernel/userland ABI which simplifies
future extensions.
In addition to the existing schedulers (FIFO and WF2Q+), we include
a Deficit Round Robin (DRR or RR for brevity) scheduler, and a new,
very fast version of WF2Q+ called QFQ.
Some test code is also present (in sys/netinet/ipfw/test) that
lets you build and test schedulers in userland.
Also, we have added a compatibility layer that understands requests
from the RELENG_7 and RELENG_8 versions of the /sbin/ipfw binaries,
and replies correctly (at least, it does its best; sometimes you
just cannot tell who sent the request and how to answer).
The compatibility layer should make it possible to MFC this code in a
relatively short time.
Some minor glitches (e.g. handling of ipfw set enable/disable,
and a workaround for a bug in RELENG_7's /sbin/ipfw) will be
fixed with separate commits.
CREDITS:
This work has been partly supported by the ONELAB2 project, and
mostly developed by Riccardo Panicucci and myself.
The code for the qfq scheduler is mostly from Fabio Checconi,
and Marta Carbone and Francesco Magno have helped with testing,
debugging and some bug fixes.
2010-03-02 17:40:48 +00:00
|
|
|
uint32_t dummy_def = IPFW_DEFAULT_RULE;
|
|
|
|
uint32_t dummy_tables_max = IPFW_TABLES_MAX;
|
|
|
|
|
|
|
|
SYSBEGIN(f3)
|
|
|
|
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
|
Build on Jeff Roberson's linker-set based dynamic per-CPU allocator
(DPCPU), as suggested by Peter Wemm, and implement a new per-virtual
network stack memory allocator. Modify vnet to use the allocator
instead of monolithic global container structures (vinet, ...). This
change solves many binary compatibility problems associated with
VIMAGE, and restores ELF symbols for virtualized global variables.
Each virtualized global variable exists as a "reference copy", and also
once per virtual network stack. Virtualized global variables are
tagged at compile-time, placing the in a special linker set, which is
loaded into a contiguous region of kernel memory. Virtualized global
variables in the base kernel are linked as normal, but those in modules
are copied and relocated to a reserved portion of the kernel's vnet
region with the help of a the kernel linker.
Virtualized global variables exist in per-vnet memory set up when the
network stack instance is created, and are initialized statically from
the reference copy. Run-time access occurs via an accessor macro, which
converts from the current vnet and requested symbol to a per-vnet
address. When "options VIMAGE" is not compiled into the kernel, normal
global ELF symbols will be used instead and indirection is avoided.
This change restores static initialization for network stack global
variables, restores support for non-global symbols and types, eliminates
the need for many subsystem constructors, eliminates large per-subsystem
structures that caused many binary compatibility issues both for
monitoring applications (netstat) and kernel modules, removes the
per-function INIT_VNET_*() macros throughout the stack, eliminates the
need for vnet_symmap ksym(2) munging, and eliminates duplicate
definitions of virtualized globals under VIMAGE_GLOBALS.
Bump __FreeBSD_version and update UPDATING.
Portions submitted by: bz
Reviewed by: bz, zec
Discussed with: gnn, jamie, jeff, jhb, julian, sam
Suggested by: peter
Approved by: re (kensmith)
2009-07-14 22:48:30 +00:00
|
|
|
SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, one_pass,
|
|
|
|
CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_one_pass), 0,
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
"Only do a single pass through ipfw when using dummynet(4)");
|
2009-12-16 10:48:40 +00:00
|
|
|
SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step,
|
|
|
|
CTLFLAG_RW, &VNET_NAME(autoinc_step), 0,
|
|
|
|
"Rule number auto-increment step");
|
Build on Jeff Roberson's linker-set based dynamic per-CPU allocator
(DPCPU), as suggested by Peter Wemm, and implement a new per-virtual
network stack memory allocator. Modify vnet to use the allocator
instead of monolithic global container structures (vinet, ...). This
change solves many binary compatibility problems associated with
VIMAGE, and restores ELF symbols for virtualized global variables.
Each virtualized global variable exists as a "reference copy", and also
once per virtual network stack. Virtualized global variables are
tagged at compile-time, placing the in a special linker set, which is
loaded into a contiguous region of kernel memory. Virtualized global
variables in the base kernel are linked as normal, but those in modules
are copied and relocated to a reserved portion of the kernel's vnet
region with the help of a the kernel linker.
Virtualized global variables exist in per-vnet memory set up when the
network stack instance is created, and are initialized statically from
the reference copy. Run-time access occurs via an accessor macro, which
converts from the current vnet and requested symbol to a per-vnet
address. When "options VIMAGE" is not compiled into the kernel, normal
global ELF symbols will be used instead and indirection is avoided.
This change restores static initialization for network stack global
variables, restores support for non-global symbols and types, eliminates
the need for many subsystem constructors, eliminates large per-subsystem
structures that caused many binary compatibility issues both for
monitoring applications (netstat) and kernel modules, removes the
per-function INIT_VNET_*() macros throughout the stack, eliminates the
need for vnet_symmap ksym(2) munging, and eliminates duplicate
definitions of virtualized globals under VIMAGE_GLOBALS.
Bump __FreeBSD_version and update UPDATING.
Portions submitted by: bz
Reviewed by: bz, zec
Discussed with: gnn, jamie, jeff, jhb, julian, sam
Suggested by: peter
Approved by: re (kensmith)
2009-07-14 22:48:30 +00:00
|
|
|
SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, verbose,
|
2009-07-25 06:42:42 +00:00
|
|
|
CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_verbose), 0,
|
|
|
|
"Log matches to ipfw rules");
|
Build on Jeff Roberson's linker-set based dynamic per-CPU allocator
(DPCPU), as suggested by Peter Wemm, and implement a new per-virtual
network stack memory allocator. Modify vnet to use the allocator
instead of monolithic global container structures (vinet, ...). This
change solves many binary compatibility problems associated with
VIMAGE, and restores ELF symbols for virtualized global variables.
Each virtualized global variable exists as a "reference copy", and also
once per virtual network stack. Virtualized global variables are
tagged at compile-time, placing the in a special linker set, which is
loaded into a contiguous region of kernel memory. Virtualized global
variables in the base kernel are linked as normal, but those in modules
are copied and relocated to a reserved portion of the kernel's vnet
region with the help of a the kernel linker.
Virtualized global variables exist in per-vnet memory set up when the
network stack instance is created, and are initialized statically from
the reference copy. Run-time access occurs via an accessor macro, which
converts from the current vnet and requested symbol to a per-vnet
address. When "options VIMAGE" is not compiled into the kernel, normal
global ELF symbols will be used instead and indirection is avoided.
This change restores static initialization for network stack global
variables, restores support for non-global symbols and types, eliminates
the need for many subsystem constructors, eliminates large per-subsystem
structures that caused many binary compatibility issues both for
monitoring applications (netstat) and kernel modules, removes the
per-function INIT_VNET_*() macros throughout the stack, eliminates the
need for vnet_symmap ksym(2) munging, and eliminates duplicate
definitions of virtualized globals under VIMAGE_GLOBALS.
Bump __FreeBSD_version and update UPDATING.
Portions submitted by: bz
Reviewed by: bz, zec
Discussed with: gnn, jamie, jeff, jhb, julian, sam
Suggested by: peter
Approved by: re (kensmith)
2009-07-14 22:48:30 +00:00
|
|
|
SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit,
|
|
|
|
CTLFLAG_RW, &VNET_NAME(verbose_limit), 0,
|
2008-12-13 19:13:03 +00:00
|
|
|
"Set upper limit of matches of ipfw rules logged");
|
2008-09-06 16:47:07 +00:00
|
|
|
SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, default_rule, CTLFLAG_RD,
|
Bring in the most recent version of ipfw and dummynet, developed
and tested over the past two months in the ipfw3-head branch. This
also happens to be the same code available in the Linux and Windows
ports of ipfw and dummynet.
The major enhancement is a completely restructured version of
dummynet, with support for different packet scheduling algorithms
(loadable at runtime), faster queue/pipe lookup, and a much cleaner
internal architecture and kernel/userland ABI which simplifies
future extensions.
In addition to the existing schedulers (FIFO and WF2Q+), we include
a Deficit Round Robin (DRR or RR for brevity) scheduler, and a new,
very fast version of WF2Q+ called QFQ.
Some test code is also present (in sys/netinet/ipfw/test) that
lets you build and test schedulers in userland.
Also, we have added a compatibility layer that understands requests
from the RELENG_7 and RELENG_8 versions of the /sbin/ipfw binaries,
and replies correctly (at least, it does its best; sometimes you
just cannot tell who sent the request and how to answer).
The compatibility layer should make it possible to MFC this code in a
relatively short time.
Some minor glitches (e.g. handling of ipfw set enable/disable,
and a workaround for a bug in RELENG_7's /sbin/ipfw) will be
fixed with separate commits.
CREDITS:
This work has been partly supported by the ONELAB2 project, and
mostly developed by Riccardo Panicucci and myself.
The code for the qfq scheduler is mostly from Fabio Checconi,
and Marta Carbone and Francesco Magno have helped with testing,
debugging and some bug fixes.
2010-03-02 17:40:48 +00:00
|
|
|
&dummy_def, 0,
|
2009-07-25 06:42:42 +00:00
|
|
|
"The default/max possible rule number.");
|
2008-09-14 09:24:12 +00:00
|
|
|
SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, tables_max, CTLFLAG_RD,
|
Bring in the most recent version of ipfw and dummynet, developed
and tested over the past two months in the ipfw3-head branch. This
also happens to be the same code available in the Linux and Windows
ports of ipfw and dummynet.
The major enhancement is a completely restructured version of
dummynet, with support for different packet scheduling algorithms
(loadable at runtime), faster queue/pipe lookup, and a much cleaner
internal architecture and kernel/userland ABI which simplifies
future extensions.
In addition to the existing schedulers (FIFO and WF2Q+), we include
a Deficit Round Robin (DRR or RR for brevity) scheduler, and a new,
very fast version of WF2Q+ called QFQ.
Some test code is also present (in sys/netinet/ipfw/test) that
lets you build and test schedulers in userland.
Also, we have added a compatibility layer that understands requests
from the RELENG_7 and RELENG_8 versions of the /sbin/ipfw binaries,
and replies correctly (at least, it does its best; sometimes you
just cannot tell who sent the request and how to answer).
The compatibility layer should make it possible to MFC this code in a
relatively short time.
Some minor glitches (e.g. handling of ipfw set enable/disable,
and a workaround for a bug in RELENG_7's /sbin/ipfw) will be
fixed with separate commits.
CREDITS:
This work has been partly supported by the ONELAB2 project, and
mostly developed by Riccardo Panicucci and myself.
The code for the qfq scheduler is mostly from Fabio Checconi,
and Marta Carbone and Francesco Magno have helped with testing,
debugging and some bug fixes.
2010-03-02 17:40:48 +00:00
|
|
|
&dummy_tables_max, 0,
|
2009-07-25 06:42:42 +00:00
|
|
|
"The maximum number of tables.");
|
2009-05-09 05:07:36 +00:00
|
|
|
SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, default_to_accept, CTLFLAG_RDTUN,
|
2009-07-25 06:42:42 +00:00
|
|
|
&default_to_accept, 0,
|
|
|
|
"Make the default rule accept all packets.");
|
2009-05-09 05:07:36 +00:00
|
|
|
TUNABLE_INT("net.inet.ip.fw.default_to_accept", &default_to_accept);
|
2009-12-22 13:53:34 +00:00
|
|
|
SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, static_count,
|
|
|
|
CTLFLAG_RD, &VNET_NAME(layer3_chain.n_rules), 0,
|
|
|
|
"Number of static rules");
|
2009-12-02 17:50:52 +00:00
|
|
|
|
2009-07-25 06:42:42 +00:00
|
|
|
#ifdef INET6
|
|
|
|
SYSCTL_DECL(_net_inet6_ip6);
|
|
|
|
SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
|
|
|
|
SYSCTL_VNET_INT(_net_inet6_ip6_fw, OID_AUTO, deny_unknown_exthdrs,
|
|
|
|
CTLFLAG_RW | CTLFLAG_SECURE, &VNET_NAME(fw_deny_unknown_exthdrs), 0,
|
|
|
|
"Deny packets with unknown IPv6 Extension Headers");
|
2009-12-02 17:50:52 +00:00
|
|
|
#endif /* INET6 */
|
|
|
|
|
Bring in the most recent version of ipfw and dummynet, developed
and tested over the past two months in the ipfw3-head branch. This
also happens to be the same code available in the Linux and Windows
ports of ipfw and dummynet.
The major enhancement is a completely restructured version of
dummynet, with support for different packet scheduling algorithms
(loadable at runtime), faster queue/pipe lookup, and a much cleaner
internal architecture and kernel/userland ABI which simplifies
future extensions.
In addition to the existing schedulers (FIFO and WF2Q+), we include
a Deficit Round Robin (DRR or RR for brevity) scheduler, and a new,
very fast version of WF2Q+ called QFQ.
Some test code is also present (in sys/netinet/ipfw/test) that
lets you build and test schedulers in userland.
Also, we have added a compatibility layer that understands requests
from the RELENG_7 and RELENG_8 versions of the /sbin/ipfw binaries,
and replies correctly (at least, it does its best; sometimes you
just cannot tell who sent the request and how to answer).
The compatibility layer should make it possible to MFC this code in a
relatively short time.
Some minor glitches (e.g. handling of ipfw set enable/disable,
and a workaround for a bug in RELENG_7's /sbin/ipfw) will be
fixed with separate commits.
CREDITS:
This work has been partly supported by the ONELAB2 project, and
mostly developed by Riccardo Panicucci and myself.
The code for the qfq scheduler is mostly from Fabio Checconi,
and Marta Carbone and Francesco Magno have helped with testing,
debugging and some bug fixes.
2010-03-02 17:40:48 +00:00
|
|
|
SYSEND
|
|
|
|
|
2009-01-28 13:11:22 +00:00
|
|
|
#endif /* SYSCTL_NODE */
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
2009-12-15 16:15:14 +00:00
|
|
|
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
/*
|
2009-12-16 10:48:40 +00:00
|
|
|
* Some macros used in the various matching options.
|
2005-04-15 00:47:44 +00:00
|
|
|
* L3HDR maps an ipv4 pointer into a layer3 header pointer of type T
|
|
|
|
* Other macros just cast void * into the appropriate type
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
*/
|
2005-04-15 00:47:44 +00:00
|
|
|
#define L3HDR(T, ip) ((T *)((u_int32_t *)(ip) + (ip)->ip_hl))
|
|
|
|
#define TCP(p) ((struct tcphdr *)(p))
|
2006-11-13 19:07:32 +00:00
|
|
|
#define SCTP(p) ((struct sctphdr *)(p))
|
2005-04-15 00:47:44 +00:00
|
|
|
#define UDP(p) ((struct udphdr *)(p))
|
2005-04-26 18:10:21 +00:00
|
|
|
#define ICMP(p) ((struct icmphdr *)(p))
|
2005-04-18 18:35:05 +00:00
|
|
|
#define ICMP6(p) ((struct icmp6_hdr *)(p))
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
2002-07-08 22:46:01 +00:00
|
|
|
static __inline int
|
2005-04-26 18:10:21 +00:00
|
|
|
icmptype_match(struct icmphdr *icmp, ipfw_insn_u32 *cmd)
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
{
|
2005-04-15 00:47:44 +00:00
|
|
|
int type = icmp->icmp_type;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
|
|
|
return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1<<type)) );
|
|
|
|
}
|
|
|
|
|
|
|
|
#define TT ( (1 << ICMP_ECHO) | (1 << ICMP_ROUTERSOLICIT) | \
|
|
|
|
(1 << ICMP_TSTAMP) | (1 << ICMP_IREQ) | (1 << ICMP_MASKREQ) )
|
|
|
|
|
|
|
|
static int
|
2005-04-26 18:10:21 +00:00
|
|
|
is_icmp_query(struct icmphdr *icmp)
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
{
|
2005-04-15 00:47:44 +00:00
|
|
|
int type = icmp->icmp_type;
|
|
|
|
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
return (type <= ICMP_MAXTYPE && (TT & (1<<type)) );
|
|
|
|
}
|
|
|
|
#undef TT
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The following checks use two arrays of 8 or 16 bits to store the
|
|
|
|
* bits that we want set or clear, respectively. They are in the
|
|
|
|
* low and high half of cmd->arg1 or cmd->d[0].
|
|
|
|
*
|
|
|
|
* We scan options and store the bits we find set. We succeed if
|
|
|
|
*
|
|
|
|
* (want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear
|
|
|
|
*
|
|
|
|
* The code is sometimes optimized not to store additional variables.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int
|
|
|
|
flags_match(ipfw_insn *cmd, u_int8_t bits)
|
|
|
|
{
|
|
|
|
u_char want_clear;
|
|
|
|
bits = ~bits;
|
|
|
|
|
|
|
|
if ( ((cmd->arg1 & 0xff) & bits) != 0)
|
|
|
|
return 0; /* some bits we want set were clear */
|
|
|
|
want_clear = (cmd->arg1 >> 8) & 0xff;
|
|
|
|
if ( (want_clear & bits) != want_clear)
|
|
|
|
return 0; /* some bits we want clear were set */
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
ipopts_match(struct ip *ip, ipfw_insn *cmd)
|
|
|
|
{
|
|
|
|
int optlen, bits = 0;
|
|
|
|
u_char *cp = (u_char *)(ip + 1);
|
|
|
|
int x = (ip->ip_hl << 2) - sizeof (struct ip);
|
|
|
|
|
|
|
|
for (; x > 0; x -= optlen, cp += optlen) {
|
|
|
|
int opt = cp[IPOPT_OPTVAL];
|
|
|
|
|
|
|
|
if (opt == IPOPT_EOL)
|
|
|
|
break;
|
|
|
|
if (opt == IPOPT_NOP)
|
|
|
|
optlen = 1;
|
|
|
|
else {
|
|
|
|
optlen = cp[IPOPT_OLEN];
|
|
|
|
if (optlen <= 0 || optlen > x)
|
|
|
|
return 0; /* invalid or truncated */
|
|
|
|
}
|
|
|
|
switch (opt) {
|
|
|
|
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
|
|
|
|
case IPOPT_LSRR:
|
|
|
|
bits |= IP_FW_IPOPT_LSRR;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case IPOPT_SSRR:
|
|
|
|
bits |= IP_FW_IPOPT_SSRR;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case IPOPT_RR:
|
|
|
|
bits |= IP_FW_IPOPT_RR;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case IPOPT_TS:
|
|
|
|
bits |= IP_FW_IPOPT_TS;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return (flags_match(cmd, bits));
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2005-04-15 00:47:44 +00:00
|
|
|
tcpopts_match(struct tcphdr *tcp, ipfw_insn *cmd)
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
{
|
|
|
|
int optlen, bits = 0;
|
|
|
|
u_char *cp = (u_char *)(tcp + 1);
|
|
|
|
int x = (tcp->th_off << 2) - sizeof(struct tcphdr);
|
|
|
|
|
|
|
|
for (; x > 0; x -= optlen, cp += optlen) {
|
|
|
|
int opt = cp[0];
|
|
|
|
if (opt == TCPOPT_EOL)
|
|
|
|
break;
|
|
|
|
if (opt == TCPOPT_NOP)
|
|
|
|
optlen = 1;
|
|
|
|
else {
|
|
|
|
optlen = cp[1];
|
|
|
|
if (optlen <= 0)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (opt) {
|
|
|
|
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
|
|
|
|
case TCPOPT_MAXSEG:
|
|
|
|
bits |= IP_FW_TCPOPT_MSS;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case TCPOPT_WINDOW:
|
|
|
|
bits |= IP_FW_TCPOPT_WINDOW;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case TCPOPT_SACK_PERMITTED:
|
|
|
|
case TCPOPT_SACK:
|
|
|
|
bits |= IP_FW_TCPOPT_SACK;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case TCPOPT_TIMESTAMP:
|
|
|
|
bits |= IP_FW_TCPOPT_TS;
|
|
|
|
break;
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return (flags_match(cmd, bits));
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
iface_match(struct ifnet *ifp, ipfw_insn_if *cmd)
|
|
|
|
{
|
|
|
|
if (ifp == NULL) /* no iface with this packet, match fails */
|
|
|
|
return 0;
|
|
|
|
/* Check by name or by IP address */
|
2002-07-05 22:43:06 +00:00
|
|
|
if (cmd->name[0] != '\0') { /* match by name */
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
/* Check name */
|
2003-10-31 18:32:15 +00:00
|
|
|
if (cmd->p.glob) {
|
|
|
|
if (fnmatch(cmd->name, ifp->if_xname, 0) == 0)
|
|
|
|
return(1);
|
|
|
|
} else {
|
|
|
|
if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0)
|
|
|
|
return(1);
|
|
|
|
}
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
} else {
|
2010-07-15 14:37:59 +00:00
|
|
|
#ifdef __FreeBSD__ /* and OSX too ? */
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
struct ifaddr *ia;
|
|
|
|
|
2009-06-26 00:46:50 +00:00
|
|
|
if_addr_rlock(ifp);
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) {
|
|
|
|
if (ia->ifa_addr->sa_family != AF_INET)
|
|
|
|
continue;
|
|
|
|
if (cmd->p.ip.s_addr == ((struct sockaddr_in *)
|
2009-04-19 22:34:35 +00:00
|
|
|
(ia->ifa_addr))->sin_addr.s_addr) {
|
2009-06-26 00:46:50 +00:00
|
|
|
if_addr_runlock(ifp);
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
return(1); /* match */
|
2009-04-19 22:34:35 +00:00
|
|
|
}
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
}
|
2009-06-26 00:46:50 +00:00
|
|
|
if_addr_runlock(ifp);
|
Bring in the most recent version of ipfw and dummynet, developed
and tested over the past two months in the ipfw3-head branch. This
also happens to be the same code available in the Linux and Windows
ports of ipfw and dummynet.
The major enhancement is a completely restructured version of
dummynet, with support for different packet scheduling algorithms
(loadable at runtime), faster queue/pipe lookup, and a much cleaner
internal architecture and kernel/userland ABI which simplifies
future extensions.
In addition to the existing schedulers (FIFO and WF2Q+), we include
a Deficit Round Robin (DRR or RR for brevity) scheduler, and a new,
very fast version of WF2Q+ called QFQ.
Some test code is also present (in sys/netinet/ipfw/test) that
lets you build and test schedulers in userland.
Also, we have added a compatibility layer that understands requests
from the RELENG_7 and RELENG_8 versions of the /sbin/ipfw binaries,
and replies correctly (at least, it does its best; sometimes you
just cannot tell who sent the request and how to answer).
The compatibility layer should make it possible to MFC this code in a
relatively short time.
Some minor glitches (e.g. handling of ipfw set enable/disable,
and a workaround for a bug in RELENG_7's /sbin/ipfw) will be
fixed with separate commits.
CREDITS:
This work has been partly supported by the ONELAB2 project, and
mostly developed by Riccardo Panicucci and myself.
The code for the qfq scheduler is mostly from Fabio Checconi,
and Marta Carbone and Francesco Magno have helped with testing,
debugging and some bug fixes.
2010-03-02 17:40:48 +00:00
|
|
|
#endif /* __FreeBSD__ */
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
}
|
|
|
|
return(0); /* no match, fail ... */
|
|
|
|
}
|
|
|
|
|
2003-03-15 01:13:00 +00:00
|
|
|
/*
|
2004-04-23 14:28:38 +00:00
|
|
|
* The verify_path function checks if a route to the src exists and
|
|
|
|
* if it is reachable via ifp (when provided).
|
|
|
|
*
|
2003-03-15 01:13:00 +00:00
|
|
|
* The 'verrevpath' option checks that the interface that an IP packet
|
2003-06-23 21:18:56 +00:00
|
|
|
* arrives on is the same interface that traffic destined for the
|
2009-12-16 10:48:40 +00:00
|
|
|
* packet's source address would be routed out of.
|
|
|
|
* The 'versrcreach' option just checks that the source address is
|
|
|
|
* reachable via any route (except default) in the routing table.
|
|
|
|
* These two are a measure to block forged packets. This is also
|
|
|
|
* commonly known as "anti-spoofing" or Unicast Reverse Path
|
|
|
|
* Forwarding (Unicast RFP) in Cisco-ese. The name of the knobs
|
2004-04-23 14:28:38 +00:00
|
|
|
* is purposely reminiscent of the Cisco IOS command,
|
2003-03-15 01:13:00 +00:00
|
|
|
*
|
|
|
|
* ip verify unicast reverse-path
|
2004-04-23 14:28:38 +00:00
|
|
|
* ip verify unicast source reachable-via any
|
2003-03-15 01:13:00 +00:00
|
|
|
*
|
2009-12-16 10:48:40 +00:00
|
|
|
* which implements the same functionality. But note that the syntax
|
|
|
|
* is misleading, and the check may be performed on all IP packets
|
|
|
|
* whether unicast, multicast, or broadcast.
|
2003-03-15 01:13:00 +00:00
|
|
|
*/
|
|
|
|
static int
|
Add code to allow the system to handle multiple routing tables.
This particular implementation is designed to be fully backwards compatible
and to be MFC-able to 7.x (and 6.x)
Currently the only protocol that can make use of the multiple tables is IPv4
Similar functionality exists in OpenBSD and Linux.
From my notes:
-----
One thing where FreeBSD has been falling behind, and which by chance I
have some time to work on is "policy based routing", which allows
different
packet streams to be routed by more than just the destination address.
Constraints:
------------
I want to make some form of this available in the 6.x tree
(and by extension 7.x) , but FreeBSD in general needs it so I might as
well do it in -current and back port the portions I need.
One of the ways that this can be done is to have the ability to
instantiate multiple kernel routing tables (which I will now
refer to as "Forwarding Information Bases" or "FIBs" for political
correctness reasons). Which FIB a particular packet uses to make
the next hop decision can be decided by a number of mechanisms.
The policies these mechanisms implement are the "Policies" referred
to in "Policy based routing".
One of the constraints I have if I try to back port this work to
6.x is that it must be implemented as a EXTENSION to the existing
ABIs in 6.x so that third party applications do not need to be
recompiled in timespan of the branch.
This first version will not have some of the bells and whistles that
will come with later versions. It will, for example, be limited to 16
tables in the first commit.
Implementation method, Compatible version. (part 1)
-------------------------------
For this reason I have implemented a "sufficient subset" of a
multiple routing table solution in Perforce, and back-ported it
to 6.x. (also in Perforce though not always caught up with what I
have done in -current/P4). The subset allows a number of FIBs
to be defined at compile time (8 is sufficient for my purposes in 6.x)
and implements the changes needed to allow IPV4 to use them. I have not
done the changes for ipv6 simply because I do not need it, and I do not
have enough knowledge of ipv6 (e.g. neighbor discovery) needed to do it.
Other protocol families are left untouched and should there be
users with proprietary protocol families, they should continue to work
and be oblivious to the existence of the extra FIBs.
To understand how this is done, one must know that the current FIB
code starts everything off with a single dimensional array of
pointers to FIB head structures (One per protocol family), each of
which in turn points to the trie of routes available to that family.
The basic change in the ABI compatible version of the change is to
extent that array to be a 2 dimensional array, so that
instead of protocol family X looking at rt_tables[X] for the
table it needs, it looks at rt_tables[Y][X] when for all
protocol families except ipv4 Y is always 0.
Code that is unaware of the change always just sees the first row
of the table, which of course looks just like the one dimensional
array that existed before.
The entry points rtrequest(), rtalloc(), rtalloc1(), rtalloc_ign()
are all maintained, but refer only to the first row of the array,
so that existing callers in proprietary protocols can continue to
do the "right thing".
Some new entry points are added, for the exclusive use of ipv4 code
called in_rtrequest(), in_rtalloc(), in_rtalloc1() and in_rtalloc_ign(),
which have an extra argument which refers the code to the correct row.
In addition, there are some new entry points (currently called
rtalloc_fib() and friends) that check the Address family being
looked up and call either rtalloc() (and friends) if the protocol
is not IPv4 forcing the action to row 0 or to the appropriate row
if it IS IPv4 (and that info is available). These are for calling
from code that is not specific to any particular protocol. The way
these are implemented would change in the non ABI preserving code
to be added later.
One feature of the first version of the code is that for ipv4,
the interface routes show up automatically on all the FIBs, so
that no matter what FIB you select you always have the basic
direct attached hosts available to you. (rtinit() does this
automatically).
You CAN delete an interface route from one FIB should you want
to but by default it's there. ARP information is also available
in each FIB. It's assumed that the same machine would have the
same MAC address, regardless of which FIB you are using to get
to it.
This brings us as to how the correct FIB is selected for an outgoing
IPV4 packet.
Firstly, all packets have a FIB associated with them. if nothing
has been done to change it, it will be FIB 0. The FIB is changed
in the following ways.
Packets fall into one of a number of classes.
1/ locally generated packets, coming from a socket/PCB.
Such packets select a FIB from a number associated with the
socket/PCB. This in turn is inherited from the process,
but can be changed by a socket option. The process in turn
inherits it on fork. I have written a utility call setfib
that acts a bit like nice..
setfib -3 ping target.example.com # will use fib 3 for ping.
It is an obvious extension to make it a property of a jail
but I have not done so. It can be achieved by combining the setfib and
jail commands.
2/ packets received on an interface for forwarding.
By default these packets would use table 0,
(or possibly a number settable in a sysctl(not yet)).
but prior to routing the firewall can inspect them (see below).
(possibly in the future you may be able to associate a FIB
with packets received on an interface.. An ifconfig arg, but not yet.)
3/ packets inspected by a packet classifier, which can arbitrarily
associate a fib with it on a packet by packet basis.
A fib assigned to a packet by a packet classifier
(such as ipfw) would over-ride a fib associated by
a more default source. (such as cases 1 or 2).
4/ a tcp listen socket associated with a fib will generate
accept sockets that are associated with that same fib.
5/ Packets generated in response to some other packet (e.g. reset
or icmp packets). These should use the FIB associated with the
packet being reponded to.
6/ Packets generated during encapsulation.
gif, tun and other tunnel interfaces will encapsulate using the FIB
that was in effect withthe proces that set up the tunnel.
thus setfib 1 ifconfig gif0 [tunnel instructions]
will set the fib for the tunnel to use to be fib 1.
Routing messages would be associated with their
process, and thus select one FIB or another.
messages from the kernel would be associated with the fib they
refer to and would only be received by a routing socket associated
with that fib. (not yet implemented)
In addition Netstat has been edited to be able to cope with the
fact that the array is now 2 dimensional. (It looks in system
memory using libkvm (!)). Old versions of netstat see only the first FIB.
In addition two sysctls are added to give:
a) the number of FIBs compiled in (active)
b) the default FIB of the calling process.
Early testing experience:
-------------------------
Basically our (IronPort's) appliance does this functionality already
using ipfw fwd but that method has some drawbacks.
For example,
It can't fully simulate a routing table because it can't influence the
socket's choice of local address when a connect() is done.
Testing during the generating of these changes has been
remarkably smooth so far. Multiple tables have co-existed
with no notable side effects, and packets have been routes
accordingly.
ipfw has grown 2 new keywords:
setfib N ip from anay to any
count ip from any to any fib N
In pf there seems to be a requirement to be able to give symbolic names to the
fibs but I do not have that capacity. I am not sure if it is required.
SCTP has interestingly enough built in support for this, called VRFs
in Cisco parlance. it will be interesting to see how that handles it
when it suddenly actually does something.
Where to next:
--------------------
After committing the ABI compatible version and MFCing it, I'd
like to proceed in a forward direction in -current. this will
result in some roto-tilling in the routing code.
Firstly: the current code's idea of having a separate tree per
protocol family, all of the same format, and pointed to by the
1 dimensional array is a bit silly. Especially when one considers that
there is code that makes assumptions about every protocol having the
same internal structures there. Some protocols don't WANT that
sort of structure. (for example the whole idea of a netmask is foreign
to appletalk). This needs to be made opaque to the external code.
My suggested first change is to add routing method pointers to the
'domain' structure, along with information pointing the data.
instead of having an array of pointers to uniform structures,
there would be an array pointing to the 'domain' structures
for each protocol address domain (protocol family),
and the methods this reached would be called. The methods would have
an argument that gives FIB number, but the protocol would be free
to ignore it.
When the ABI can be changed it raises the possibilty of the
addition of a fib entry into the "struct route". Currently,
the structure contains the sockaddr of the desination, and the resulting
fib entry. To make this work fully, one could add a fib number
so that given an address and a fib, one can find the third element, the
fib entry.
Interaction with the ARP layer/ LL layer would need to be
revisited as well. Qing Li has been working on this already.
This work was sponsored by Ironport Systems/Cisco
Reviewed by: several including rwatson, bz and mlair (parts each)
Obtained from: Ironport systems/Cisco
2008-05-09 23:03:00 +00:00
|
|
|
verify_path(struct in_addr src, struct ifnet *ifp, u_int fib)
|
2003-03-15 01:13:00 +00:00
|
|
|
{
|
Bring in the most recent version of ipfw and dummynet, developed
and tested over the past two months in the ipfw3-head branch. This
also happens to be the same code available in the Linux and Windows
ports of ipfw and dummynet.
The major enhancement is a completely restructured version of
dummynet, with support for different packet scheduling algorithms
(loadable at runtime), faster queue/pipe lookup, and a much cleaner
internal architecture and kernel/userland ABI which simplifies
future extensions.
In addition to the existing schedulers (FIFO and WF2Q+), we include
a Deficit Round Robin (DRR or RR for brevity) scheduler, and a new,
very fast version of WF2Q+ called QFQ.
Some test code is also present (in sys/netinet/ipfw/test) that
lets you build and test schedulers in userland.
Also, we have added a compatibility layer that understands requests
from the RELENG_7 and RELENG_8 versions of the /sbin/ipfw binaries,
and replies correctly (at least, it does its best; sometimes you
just cannot tell who sent the request and how to answer).
The compatibility layer should make it possible to MFC this code in a
relatively short time.
Some minor glitches (e.g. handling of ipfw set enable/disable,
and a workaround for a bug in RELENG_7's /sbin/ipfw) will be
fixed with separate commits.
CREDITS:
This work has been partly supported by the ONELAB2 project, and
mostly developed by Riccardo Panicucci and myself.
The code for the qfq scheduler is mostly from Fabio Checconi,
and Marta Carbone and Francesco Magno have helped with testing,
debugging and some bug fixes.
2010-03-02 17:40:48 +00:00
|
|
|
#ifndef __FreeBSD__
|
|
|
|
return 0;
|
|
|
|
#else
|
2003-11-27 09:40:13 +00:00
|
|
|
struct route ro;
|
2003-03-15 01:13:00 +00:00
|
|
|
struct sockaddr_in *dst;
|
|
|
|
|
2003-11-27 09:40:13 +00:00
|
|
|
bzero(&ro, sizeof(ro));
|
2003-03-15 01:13:00 +00:00
|
|
|
|
2003-11-27 09:40:13 +00:00
|
|
|
dst = (struct sockaddr_in *)&(ro.ro_dst);
|
|
|
|
dst->sin_family = AF_INET;
|
|
|
|
dst->sin_len = sizeof(*dst);
|
|
|
|
dst->sin_addr = src;
|
This main goals of this project are:
1. separating L2 tables (ARP, NDP) from the L3 routing tables
2. removing as much locking dependencies among these layers as
possible to allow for some parallelism in the search operations
3. simplify the logic in the routing code,
The most notable end result is the obsolescent of the route
cloning (RTF_CLONING) concept, which translated into code reduction
in both IPv4 ARP and IPv6 NDP related modules, and size reduction in
struct rtentry{}. The change in design obsoletes the semantics of
RTF_CLONING, RTF_WASCLONE and RTF_LLINFO routing flags. The userland
applications such as "arp" and "ndp" have been modified to reflect
those changes. The output from "netstat -r" shows only the routing
entries.
Quite a few developers have contributed to this project in the
past: Glebius Smirnoff, Luigi Rizzo, Alessandro Cerri, and
Andre Oppermann. And most recently:
- Kip Macy revised the locking code completely, thus completing
the last piece of the puzzle, Kip has also been conducting
active functional testing
- Sam Leffler has helped me improving/refactoring the code, and
provided valuable reviews
- Julian Elischer setup the perforce tree for me and has helped
me maintaining that branch before the svn conversion
2008-12-15 06:10:57 +00:00
|
|
|
in_rtalloc_ign(&ro, 0, fib);
|
2003-03-15 01:13:00 +00:00
|
|
|
|
2003-11-20 20:07:39 +00:00
|
|
|
if (ro.ro_rt == NULL)
|
2003-03-15 01:13:00 +00:00
|
|
|
return 0;
|
2004-04-23 14:28:38 +00:00
|
|
|
|
2006-01-24 13:38:06 +00:00
|
|
|
/*
|
|
|
|
* If ifp is provided, check for equality with rtentry.
|
|
|
|
* We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp,
|
|
|
|
* in order to pass packets injected back by if_simloop():
|
|
|
|
* if useloopback == 1 routing entry (via lo0) for our own address
|
|
|
|
* may exist, so we need to handle routing assymetry.
|
|
|
|
*/
|
|
|
|
if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) {
|
2004-04-23 14:28:38 +00:00
|
|
|
RTFREE(ro.ro_rt);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* if no ifp provided, check if rtentry is not default route */
|
|
|
|
if (ifp == NULL &&
|
|
|
|
satosin(rt_key(ro.ro_rt))->sin_addr.s_addr == INADDR_ANY) {
|
2003-11-20 20:07:39 +00:00
|
|
|
RTFREE(ro.ro_rt);
|
|
|
|
return 0;
|
|
|
|
}
|
2004-04-23 14:28:38 +00:00
|
|
|
|
2004-07-21 19:55:14 +00:00
|
|
|
/* or if this is a blackhole/reject route */
|
|
|
|
if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
|
|
|
|
RTFREE(ro.ro_rt);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2004-04-23 14:28:38 +00:00
|
|
|
/* found valid route */
|
2003-11-20 20:07:39 +00:00
|
|
|
RTFREE(ro.ro_rt);
|
2003-06-28 14:16:53 +00:00
|
|
|
return 1;
|
Bring in the most recent version of ipfw and dummynet, developed
and tested over the past two months in the ipfw3-head branch. This
also happens to be the same code available in the Linux and Windows
ports of ipfw and dummynet.
The major enhancement is a completely restructured version of
dummynet, with support for different packet scheduling algorithms
(loadable at runtime), faster queue/pipe lookup, and a much cleaner
internal architecture and kernel/userland ABI which simplifies
future extensions.
In addition to the existing schedulers (FIFO and WF2Q+), we include
a Deficit Round Robin (DRR or RR for brevity) scheduler, and a new,
very fast version of WF2Q+ called QFQ.
Some test code is also present (in sys/netinet/ipfw/test) that
lets you build and test schedulers in userland.
Also, we have added a compatibility layer that understands requests
from the RELENG_7 and RELENG_8 versions of the /sbin/ipfw binaries,
and replies correctly (at least, it does its best; sometimes you
just cannot tell who sent the request and how to answer).
The compatibility layer should make it possible to MFC this code in a
relatively short time.
Some minor glitches (e.g. handling of ipfw set enable/disable,
and a workaround for a bug in RELENG_7's /sbin/ipfw) will be
fixed with separate commits.
CREDITS:
This work has been partly supported by the ONELAB2 project, and
mostly developed by Riccardo Panicucci and myself.
The code for the qfq scheduler is mostly from Fabio Checconi,
and Marta Carbone and Francesco Magno have helped with testing,
debugging and some bug fixes.
2010-03-02 17:40:48 +00:00
|
|
|
#endif /* __FreeBSD__ */
|
2003-03-15 01:13:00 +00:00
|
|
|
}
|
|
|
|
|
2005-04-19 09:56:14 +00:00
|
|
|
#ifdef INET6
|
2005-04-18 18:35:05 +00:00
|
|
|
/*
|
|
|
|
* ipv6 specific rules here...
|
|
|
|
*/
|
|
|
|
static __inline int
|
|
|
|
icmp6type_match (int type, ipfw_insn_u32 *cmd)
|
|
|
|
{
|
2005-06-16 13:20:36 +00:00
|
|
|
return (type <= ICMP6_MAXTYPE && (cmd->d[type/32] & (1<<(type%32)) ) );
|
2005-04-18 18:35:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
flow6id_match( int curr_flow, ipfw_insn_u32 *cmd )
|
|
|
|
{
|
2005-06-16 13:20:36 +00:00
|
|
|
int i;
|
|
|
|
for (i=0; i <= cmd->o.arg1; ++i )
|
|
|
|
if (curr_flow == cmd->d[i] )
|
|
|
|
return 1;
|
|
|
|
return 0;
|
2005-04-18 18:35:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* support for IP6_*_ME opcodes */
|
|
|
|
static int
|
|
|
|
search_ip6_addr_net (struct in6_addr * ip6_addr)
|
|
|
|
{
|
2005-06-16 13:20:36 +00:00
|
|
|
struct ifnet *mdc;
|
|
|
|
struct ifaddr *mdc2;
|
|
|
|
struct in6_ifaddr *fdm;
|
|
|
|
struct in6_addr copia;
|
|
|
|
|
2009-04-19 22:34:35 +00:00
|
|
|
TAILQ_FOREACH(mdc, &V_ifnet, if_link) {
|
2009-06-26 00:46:50 +00:00
|
|
|
if_addr_rlock(mdc);
|
2009-04-20 22:40:44 +00:00
|
|
|
TAILQ_FOREACH(mdc2, &mdc->if_addrhead, ifa_link) {
|
2005-06-16 13:20:36 +00:00
|
|
|
if (mdc2->ifa_addr->sa_family == AF_INET6) {
|
|
|
|
fdm = (struct in6_ifaddr *)mdc2;
|
|
|
|
copia = fdm->ia_addr.sin6_addr;
|
|
|
|
/* need for leaving scope_id in the sock_addr */
|
|
|
|
in6_clearscope(&copia);
|
2009-04-19 22:34:35 +00:00
|
|
|
if (IN6_ARE_ADDR_EQUAL(ip6_addr, &copia)) {
|
2009-06-26 00:46:50 +00:00
|
|
|
if_addr_runlock(mdc);
|
2005-06-16 13:20:36 +00:00
|
|
|
return 1;
|
2009-04-19 22:34:35 +00:00
|
|
|
}
|
2005-06-16 13:20:36 +00:00
|
|
|
}
|
|
|
|
}
|
2009-06-26 00:46:50 +00:00
|
|
|
if_addr_runlock(mdc);
|
2009-04-19 22:34:35 +00:00
|
|
|
}
|
2005-06-16 13:20:36 +00:00
|
|
|
return 0;
|
2005-04-18 18:35:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2005-06-16 14:55:58 +00:00
|
|
|
verify_path6(struct in6_addr *src, struct ifnet *ifp)
|
2005-04-18 18:35:05 +00:00
|
|
|
{
|
2005-06-16 14:55:58 +00:00
|
|
|
struct route_in6 ro;
|
2005-06-16 13:20:36 +00:00
|
|
|
struct sockaddr_in6 *dst;
|
|
|
|
|
2005-06-16 14:55:58 +00:00
|
|
|
bzero(&ro, sizeof(ro));
|
|
|
|
|
2005-06-16 13:20:36 +00:00
|
|
|
dst = (struct sockaddr_in6 * )&(ro.ro_dst);
|
2005-06-16 14:55:58 +00:00
|
|
|
dst->sin6_family = AF_INET6;
|
|
|
|
dst->sin6_len = sizeof(*dst);
|
|
|
|
dst->sin6_addr = *src;
|
Add code to allow the system to handle multiple routing tables.
This particular implementation is designed to be fully backwards compatible
and to be MFC-able to 7.x (and 6.x)
Currently the only protocol that can make use of the multiple tables is IPv4
Similar functionality exists in OpenBSD and Linux.
From my notes:
-----
One thing where FreeBSD has been falling behind, and which by chance I
have some time to work on is "policy based routing", which allows
different
packet streams to be routed by more than just the destination address.
Constraints:
------------
I want to make some form of this available in the 6.x tree
(and by extension 7.x) , but FreeBSD in general needs it so I might as
well do it in -current and back port the portions I need.
One of the ways that this can be done is to have the ability to
instantiate multiple kernel routing tables (which I will now
refer to as "Forwarding Information Bases" or "FIBs" for political
correctness reasons). Which FIB a particular packet uses to make
the next hop decision can be decided by a number of mechanisms.
The policies these mechanisms implement are the "Policies" referred
to in "Policy based routing".
One of the constraints I have if I try to back port this work to
6.x is that it must be implemented as a EXTENSION to the existing
ABIs in 6.x so that third party applications do not need to be
recompiled in timespan of the branch.
This first version will not have some of the bells and whistles that
will come with later versions. It will, for example, be limited to 16
tables in the first commit.
Implementation method, Compatible version. (part 1)
-------------------------------
For this reason I have implemented a "sufficient subset" of a
multiple routing table solution in Perforce, and back-ported it
to 6.x. (also in Perforce though not always caught up with what I
have done in -current/P4). The subset allows a number of FIBs
to be defined at compile time (8 is sufficient for my purposes in 6.x)
and implements the changes needed to allow IPV4 to use them. I have not
done the changes for ipv6 simply because I do not need it, and I do not
have enough knowledge of ipv6 (e.g. neighbor discovery) needed to do it.
Other protocol families are left untouched and should there be
users with proprietary protocol families, they should continue to work
and be oblivious to the existence of the extra FIBs.
To understand how this is done, one must know that the current FIB
code starts everything off with a single dimensional array of
pointers to FIB head structures (One per protocol family), each of
which in turn points to the trie of routes available to that family.
The basic change in the ABI compatible version of the change is to
extent that array to be a 2 dimensional array, so that
instead of protocol family X looking at rt_tables[X] for the
table it needs, it looks at rt_tables[Y][X] when for all
protocol families except ipv4 Y is always 0.
Code that is unaware of the change always just sees the first row
of the table, which of course looks just like the one dimensional
array that existed before.
The entry points rtrequest(), rtalloc(), rtalloc1(), rtalloc_ign()
are all maintained, but refer only to the first row of the array,
so that existing callers in proprietary protocols can continue to
do the "right thing".
Some new entry points are added, for the exclusive use of ipv4 code
called in_rtrequest(), in_rtalloc(), in_rtalloc1() and in_rtalloc_ign(),
which have an extra argument which refers the code to the correct row.
In addition, there are some new entry points (currently called
rtalloc_fib() and friends) that check the Address family being
looked up and call either rtalloc() (and friends) if the protocol
is not IPv4 forcing the action to row 0 or to the appropriate row
if it IS IPv4 (and that info is available). These are for calling
from code that is not specific to any particular protocol. The way
these are implemented would change in the non ABI preserving code
to be added later.
One feature of the first version of the code is that for ipv4,
the interface routes show up automatically on all the FIBs, so
that no matter what FIB you select you always have the basic
direct attached hosts available to you. (rtinit() does this
automatically).
You CAN delete an interface route from one FIB should you want
to but by default it's there. ARP information is also available
in each FIB. It's assumed that the same machine would have the
same MAC address, regardless of which FIB you are using to get
to it.
This brings us as to how the correct FIB is selected for an outgoing
IPV4 packet.
Firstly, all packets have a FIB associated with them. if nothing
has been done to change it, it will be FIB 0. The FIB is changed
in the following ways.
Packets fall into one of a number of classes.
1/ locally generated packets, coming from a socket/PCB.
Such packets select a FIB from a number associated with the
socket/PCB. This in turn is inherited from the process,
but can be changed by a socket option. The process in turn
inherits it on fork. I have written a utility call setfib
that acts a bit like nice..
setfib -3 ping target.example.com # will use fib 3 for ping.
It is an obvious extension to make it a property of a jail
but I have not done so. It can be achieved by combining the setfib and
jail commands.
2/ packets received on an interface for forwarding.
By default these packets would use table 0,
(or possibly a number settable in a sysctl(not yet)).
but prior to routing the firewall can inspect them (see below).
(possibly in the future you may be able to associate a FIB
with packets received on an interface.. An ifconfig arg, but not yet.)
3/ packets inspected by a packet classifier, which can arbitrarily
associate a fib with it on a packet by packet basis.
A fib assigned to a packet by a packet classifier
(such as ipfw) would over-ride a fib associated by
a more default source. (such as cases 1 or 2).
4/ a tcp listen socket associated with a fib will generate
accept sockets that are associated with that same fib.
5/ Packets generated in response to some other packet (e.g. reset
or icmp packets). These should use the FIB associated with the
packet being reponded to.
6/ Packets generated during encapsulation.
gif, tun and other tunnel interfaces will encapsulate using the FIB
that was in effect withthe proces that set up the tunnel.
thus setfib 1 ifconfig gif0 [tunnel instructions]
will set the fib for the tunnel to use to be fib 1.
Routing messages would be associated with their
process, and thus select one FIB or another.
messages from the kernel would be associated with the fib they
refer to and would only be received by a routing socket associated
with that fib. (not yet implemented)
In addition Netstat has been edited to be able to cope with the
fact that the array is now 2 dimensional. (It looks in system
memory using libkvm (!)). Old versions of netstat see only the first FIB.
In addition two sysctls are added to give:
a) the number of FIBs compiled in (active)
b) the default FIB of the calling process.
Early testing experience:
-------------------------
Basically our (IronPort's) appliance does this functionality already
using ipfw fwd but that method has some drawbacks.
For example,
It can't fully simulate a routing table because it can't influence the
socket's choice of local address when a connect() is done.
Testing during the generating of these changes has been
remarkably smooth so far. Multiple tables have co-existed
with no notable side effects, and packets have been routes
accordingly.
ipfw has grown 2 new keywords:
setfib N ip from anay to any
count ip from any to any fib N
In pf there seems to be a requirement to be able to give symbolic names to the
fibs but I do not have that capacity. I am not sure if it is required.
SCTP has interestingly enough built in support for this, called VRFs
in Cisco parlance. it will be interesting to see how that handles it
when it suddenly actually does something.
Where to next:
--------------------
After committing the ABI compatible version and MFCing it, I'd
like to proceed in a forward direction in -current. this will
result in some roto-tilling in the routing code.
Firstly: the current code's idea of having a separate tree per
protocol family, all of the same format, and pointed to by the
1 dimensional array is a bit silly. Especially when one considers that
there is code that makes assumptions about every protocol having the
same internal structures there. Some protocols don't WANT that
sort of structure. (for example the whole idea of a netmask is foreign
to appletalk). This needs to be made opaque to the external code.
My suggested first change is to add routing method pointers to the
'domain' structure, along with information pointing the data.
instead of having an array of pointers to uniform structures,
there would be an array pointing to the 'domain' structures
for each protocol address domain (protocol family),
and the methods this reached would be called. The methods would have
an argument that gives FIB number, but the protocol would be free
to ignore it.
When the ABI can be changed it raises the possibilty of the
addition of a fib entry into the "struct route". Currently,
the structure contains the sockaddr of the desination, and the resulting
fib entry. To make this work fully, one could add a fib number
so that given an address and a fib, one can find the third element, the
fib entry.
Interaction with the ARP layer/ LL layer would need to be
revisited as well. Qing Li has been working on this already.
This work was sponsored by Ironport Systems/Cisco
Reviewed by: several including rwatson, bz and mlair (parts each)
Obtained from: Ironport systems/Cisco
2008-05-09 23:03:00 +00:00
|
|
|
/* XXX MRT 0 for ipv6 at this time */
|
This main goals of this project are:
1. separating L2 tables (ARP, NDP) from the L3 routing tables
2. removing as much locking dependencies among these layers as
possible to allow for some parallelism in the search operations
3. simplify the logic in the routing code,
The most notable end result is the obsolescent of the route
cloning (RTF_CLONING) concept, which translated into code reduction
in both IPv4 ARP and IPv6 NDP related modules, and size reduction in
struct rtentry{}. The change in design obsoletes the semantics of
RTF_CLONING, RTF_WASCLONE and RTF_LLINFO routing flags. The userland
applications such as "arp" and "ndp" have been modified to reflect
those changes. The output from "netstat -r" shows only the routing
entries.
Quite a few developers have contributed to this project in the
past: Glebius Smirnoff, Luigi Rizzo, Alessandro Cerri, and
Andre Oppermann. And most recently:
- Kip Macy revised the locking code completely, thus completing
the last piece of the puzzle, Kip has also been conducting
active functional testing
- Sam Leffler has helped me improving/refactoring the code, and
provided valuable reviews
- Julian Elischer setup the perforce tree for me and has helped
me maintaining that branch before the svn conversion
2008-12-15 06:10:57 +00:00
|
|
|
rtalloc_ign((struct route *)&ro, 0);
|
2005-06-16 14:55:58 +00:00
|
|
|
|
|
|
|
if (ro.ro_rt == NULL)
|
|
|
|
return 0;
|
|
|
|
|
2005-11-10 22:10:39 +00:00
|
|
|
/*
|
|
|
|
* if ifp is provided, check for equality with rtentry
|
|
|
|
* We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp,
|
|
|
|
* to support the case of sending packets to an address of our own.
|
|
|
|
* (where the former interface is the first argument of if_simloop()
|
|
|
|
* (=ifp), the latter is lo0)
|
|
|
|
*/
|
|
|
|
if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) {
|
2005-06-16 14:55:58 +00:00
|
|
|
RTFREE(ro.ro_rt);
|
|
|
|
return 0;
|
|
|
|
}
|
2005-06-16 13:20:36 +00:00
|
|
|
|
2005-06-16 14:55:58 +00:00
|
|
|
/* if no ifp provided, check if rtentry is not default route */
|
|
|
|
if (ifp == NULL &&
|
|
|
|
IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(ro.ro_rt))->sin6_addr)) {
|
|
|
|
RTFREE(ro.ro_rt);
|
|
|
|
return 0;
|
2005-06-16 13:20:36 +00:00
|
|
|
}
|
2005-06-16 14:55:58 +00:00
|
|
|
|
|
|
|
/* or if this is a blackhole/reject route */
|
|
|
|
if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
|
|
|
|
RTFREE(ro.ro_rt);
|
2005-06-16 13:20:36 +00:00
|
|
|
return 0;
|
2005-06-16 14:55:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* found valid route */
|
|
|
|
RTFREE(ro.ro_rt);
|
2005-06-16 13:20:36 +00:00
|
|
|
return 1;
|
2005-06-16 14:55:58 +00:00
|
|
|
|
2005-04-18 18:35:05 +00:00
|
|
|
}
|
2009-12-16 10:48:40 +00:00
|
|
|
|
2005-08-13 11:02:34 +00:00
|
|
|
static int
|
|
|
|
is_icmp6_query(int icmp6_type)
|
|
|
|
{
|
|
|
|
if ((icmp6_type <= ICMP6_MAXTYPE) &&
|
|
|
|
(icmp6_type == ICMP6_ECHO_REQUEST ||
|
|
|
|
icmp6_type == ICMP6_MEMBERSHIP_QUERY ||
|
|
|
|
icmp6_type == ICMP6_WRUREQUEST ||
|
|
|
|
icmp6_type == ICMP6_FQDN_QUERY ||
|
|
|
|
icmp6_type == ICMP6_NI_QUERY))
|
|
|
|
return (1);
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2007-01-02 19:57:31 +00:00
|
|
|
send_reject6(struct ip_fw_args *args, int code, u_int hlen, struct ip6_hdr *ip6)
|
2005-08-13 11:02:34 +00:00
|
|
|
{
|
2007-01-02 19:57:31 +00:00
|
|
|
struct mbuf *m;
|
|
|
|
|
|
|
|
m = args->m;
|
2006-06-29 11:17:16 +00:00
|
|
|
if (code == ICMP6_UNREACH_RST && args->f_id.proto == IPPROTO_TCP) {
|
2005-08-13 11:02:34 +00:00
|
|
|
struct tcphdr *tcp;
|
2007-01-02 19:57:31 +00:00
|
|
|
tcp = (struct tcphdr *)((char *)ip6 + hlen);
|
2005-08-13 11:02:34 +00:00
|
|
|
|
2009-12-02 14:32:01 +00:00
|
|
|
if ((tcp->th_flags & TH_RST) == 0) {
|
|
|
|
struct mbuf *m0;
|
2009-12-16 10:48:40 +00:00
|
|
|
m0 = ipfw_send_pkt(args->m, &(args->f_id),
|
2009-12-02 14:32:01 +00:00
|
|
|
ntohl(tcp->th_seq), ntohl(tcp->th_ack),
|
|
|
|
tcp->th_flags | TH_RST);
|
|
|
|
if (m0 != NULL)
|
|
|
|
ip6_output(m0, NULL, NULL, 0, NULL, NULL,
|
|
|
|
NULL);
|
2005-08-13 11:02:34 +00:00
|
|
|
}
|
2010-01-04 19:01:22 +00:00
|
|
|
FREE_PKT(m);
|
2005-08-13 11:02:34 +00:00
|
|
|
} else if (code != ICMP6_UNREACH_RST) { /* Send an ICMPv6 unreach. */
|
2007-01-02 19:57:31 +00:00
|
|
|
#if 0
|
|
|
|
/*
|
|
|
|
* Unlike above, the mbufs need to line up with the ip6 hdr,
|
|
|
|
* as the contents are read. We need to m_adj() the
|
|
|
|
* needed amount.
|
|
|
|
* The mbuf will however be thrown away so we can adjust it.
|
|
|
|
* Remember we did an m_pullup on it already so we
|
|
|
|
* can make some assumptions about contiguousness.
|
|
|
|
*/
|
|
|
|
if (args->L3offset)
|
|
|
|
m_adj(m, args->L3offset);
|
|
|
|
#endif
|
|
|
|
icmp6_error(m, ICMP6_DST_UNREACH, code, 0);
|
2005-08-13 11:02:34 +00:00
|
|
|
} else
|
2010-01-04 19:01:22 +00:00
|
|
|
FREE_PKT(m);
|
2005-08-13 11:02:34 +00:00
|
|
|
|
|
|
|
args->m = NULL;
|
|
|
|
}
|
2003-03-15 01:13:00 +00:00
|
|
|
|
2005-04-19 09:56:14 +00:00
|
|
|
#endif /* INET6 */
|
|
|
|
|
2002-07-05 22:43:06 +00:00
|
|
|
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
/*
|
|
|
|
* sends a reject message, consuming the mbuf passed as an argument.
|
|
|
|
*/
|
|
|
|
static void
|
2009-12-28 10:47:04 +00:00
|
|
|
send_reject(struct ip_fw_args *args, int code, int iplen, struct ip *ip)
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
{
|
2002-08-13 19:13:23 +00:00
|
|
|
|
2007-01-02 19:57:31 +00:00
|
|
|
#if 0
|
|
|
|
/* XXX When ip is not guaranteed to be at mtod() we will
|
|
|
|
* need to account for this */
|
|
|
|
* The mbuf will however be thrown away so we can adjust it.
|
|
|
|
* Remember we did an m_pullup on it already so we
|
|
|
|
* can make some assumptions about contiguousness.
|
|
|
|
*/
|
|
|
|
if (args->L3offset)
|
|
|
|
m_adj(m, args->L3offset);
|
|
|
|
#endif
|
2002-12-27 17:43:25 +00:00
|
|
|
if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */
|
|
|
|
/* We need the IP header in host order for icmp_error(). */
|
2010-01-04 19:01:22 +00:00
|
|
|
SET_HOST_IPLEN(ip);
|
2002-07-05 22:43:06 +00:00
|
|
|
icmp_error(args->m, ICMP_UNREACH, code, 0L, 0);
|
2006-06-29 11:17:16 +00:00
|
|
|
} else if (args->f_id.proto == IPPROTO_TCP) {
|
2002-07-05 22:43:06 +00:00
|
|
|
struct tcphdr *const tcp =
|
|
|
|
L3HDR(struct tcphdr, mtod(args->m, struct ip *));
|
2005-06-10 12:28:17 +00:00
|
|
|
if ( (tcp->th_flags & TH_RST) == 0) {
|
|
|
|
struct mbuf *m;
|
2009-12-16 10:48:40 +00:00
|
|
|
m = ipfw_send_pkt(args->m, &(args->f_id),
|
2006-09-12 04:25:13 +00:00
|
|
|
ntohl(tcp->th_seq), ntohl(tcp->th_ack),
|
Implement keepalives for dynamic rules, so they will not expire
just because you leave your session idle.
Also, put in a fix for 64-bit architectures (to be revised).
In detail:
ip_fw.h
* Reorder fields in struct ip_fw to avoid alignment problems on
64-bit machines. This only masks the problem, I am still not
sure whether I am doing something wrong in the code or there
is a problem elsewhere (e.g. different aligmnent of structures
between userland and kernel because of pragmas etc.)
* added fields in dyn_rule to store ack numbers, so we can
generate keepalives when the dynamic rule is about to expire
ip_fw2.c
* use a local function, send_pkt(), to generate TCP RST for Reset rules;
* save about 250 bytes by cleaning up the various snprintf()
in ipfw_log() ...
* ... and use twice as many bytes to implement keepalives
(this seems to be working, but i have not tested it extensively).
Keepalives are generated once every 5 seconds for the last 20 seconds
of the lifetime of a dynamic rule for an established TCP flow. The
packets are sent to both sides, so if at least one of the endpoints
is responding, the timeout is refreshed and the rule will not expire.
You can disable this feature with
sysctl net.inet.ip.fw.dyn_keepalive=0
(the default is 1, to have them enabled).
MFC after: 1 day
(just kidding... I will supply an updated version of ipfw2 for
RELENG_4 tomorrow).
2002-07-14 23:47:18 +00:00
|
|
|
tcp->th_flags | TH_RST);
|
2005-06-10 12:28:17 +00:00
|
|
|
if (m != NULL)
|
|
|
|
ip_output(m, NULL, NULL, 0, NULL, NULL);
|
|
|
|
}
|
2010-01-04 19:01:22 +00:00
|
|
|
FREE_PKT(args->m);
|
2002-07-05 22:43:06 +00:00
|
|
|
} else
|
2010-01-04 19:01:22 +00:00
|
|
|
FREE_PKT(args->m);
|
2002-07-05 22:43:06 +00:00
|
|
|
args->m = NULL;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
}
|
|
|
|
|
2009-12-16 10:48:40 +00:00
|
|
|
/*
|
|
|
|
* Support for uid/gid/jail lookup. These tests are expensive
|
|
|
|
* (because we may need to look into the list of active sockets)
|
|
|
|
* so we cache the results. ugid_lookupp is 0 if we have not
|
|
|
|
* yet done a lookup, 1 if we succeeded, and -1 if we tried
|
|
|
|
* and failed. The function always returns the match value.
|
|
|
|
* We could actually spare the variable and use *uc, setting
|
|
|
|
* it to '(void *)check_uidgid if we have no info, NULL if
|
|
|
|
* we tried and failed, or any other value if successful.
|
|
|
|
*/
|
2003-11-07 23:26:57 +00:00
|
|
|
static int
|
2007-05-10 15:58:48 +00:00
|
|
|
check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif,
|
|
|
|
struct in_addr dst_ip, u_int16_t dst_port, struct in_addr src_ip,
|
Bring in the most recent version of ipfw and dummynet, developed
and tested over the past two months in the ipfw3-head branch. This
also happens to be the same code available in the Linux and Windows
ports of ipfw and dummynet.
The major enhancement is a completely restructured version of
dummynet, with support for different packet scheduling algorithms
(loadable at runtime), faster queue/pipe lookup, and a much cleaner
internal architecture and kernel/userland ABI which simplifies
future extensions.
In addition to the existing schedulers (FIFO and WF2Q+), we include
a Deficit Round Robin (DRR or RR for brevity) scheduler, and a new,
very fast version of WF2Q+ called QFQ.
Some test code is also present (in sys/netinet/ipfw/test) that
lets you build and test schedulers in userland.
Also, we have added a compatibility layer that understands requests
from the RELENG_7 and RELENG_8 versions of the /sbin/ipfw binaries,
and replies correctly (at least, it does its best; sometimes you
just cannot tell who sent the request and how to answer).
The compatibility layer should make it possible to MFC this code in a
relatively short time.
Some minor glitches (e.g. handling of ipfw set enable/disable,
and a workaround for a bug in RELENG_7's /sbin/ipfw) will be
fixed with separate commits.
CREDITS:
This work has been partly supported by the ONELAB2 project, and
mostly developed by Riccardo Panicucci and myself.
The code for the qfq scheduler is mostly from Fabio Checconi,
and Marta Carbone and Francesco Magno have helped with testing,
debugging and some bug fixes.
2010-03-02 17:40:48 +00:00
|
|
|
u_int16_t src_port, int *ugid_lookupp,
|
|
|
|
struct ucred **uc, struct inpcb *inp)
|
2003-11-07 23:26:57 +00:00
|
|
|
{
|
Bring in the most recent version of ipfw and dummynet, developed
and tested over the past two months in the ipfw3-head branch. This
also happens to be the same code available in the Linux and Windows
ports of ipfw and dummynet.
The major enhancement is a completely restructured version of
dummynet, with support for different packet scheduling algorithms
(loadable at runtime), faster queue/pipe lookup, and a much cleaner
internal architecture and kernel/userland ABI which simplifies
future extensions.
In addition to the existing schedulers (FIFO and WF2Q+), we include
a Deficit Round Robin (DRR or RR for brevity) scheduler, and a new,
very fast version of WF2Q+ called QFQ.
Some test code is also present (in sys/netinet/ipfw/test) that
lets you build and test schedulers in userland.
Also, we have added a compatibility layer that understands requests
from the RELENG_7 and RELENG_8 versions of the /sbin/ipfw binaries,
and replies correctly (at least, it does its best; sometimes you
just cannot tell who sent the request and how to answer).
The compatibility layer should make it possible to MFC this code in a
relatively short time.
Some minor glitches (e.g. handling of ipfw set enable/disable,
and a workaround for a bug in RELENG_7's /sbin/ipfw) will be
fixed with separate commits.
CREDITS:
This work has been partly supported by the ONELAB2 project, and
mostly developed by Riccardo Panicucci and myself.
The code for the qfq scheduler is mostly from Fabio Checconi,
and Marta Carbone and Francesco Magno have helped with testing,
debugging and some bug fixes.
2010-03-02 17:40:48 +00:00
|
|
|
#ifndef __FreeBSD__
|
|
|
|
return cred_check(insn, proto, oif,
|
|
|
|
dst_ip, dst_port, src_ip, src_port,
|
|
|
|
(struct bsd_ucred *)uc, ugid_lookupp, ((struct mbuf *)inp)->m_skb);
|
|
|
|
#else /* FreeBSD */
|
2003-11-07 23:26:57 +00:00
|
|
|
struct inpcbinfo *pi;
|
Decompose the current single inpcbinfo lock into two locks:
- The existing ipi_lock continues to protect the global inpcb list and
inpcb counter. This lock is now relegated to a small number of
allocation and free operations, and occasional operations that walk
all connections (including, awkwardly, certain UDP multicast receive
operations -- something to revisit).
- A new ipi_hash_lock protects the two inpcbinfo hash tables for
looking up connections and bound sockets, manipulated using new
INP_HASH_*() macros. This lock, combined with inpcb locks, protects
the 4-tuple address space.
Unlike the current ipi_lock, ipi_hash_lock follows the individual inpcb
connection locks, so may be acquired while manipulating a connection on
which a lock is already held, avoiding the need to acquire the inpcbinfo
lock preemptively when a binding change might later be required. As a
result, however, lookup operations necessarily go through a reference
acquire while holding the lookup lock, later acquiring an inpcb lock --
if required.
A new function in_pcblookup() looks up connections, and accepts flags
indicating how to return the inpcb. Due to lock order changes, callers
no longer need acquire locks before performing a lookup: the lookup
routine will acquire the ipi_hash_lock as needed. In the future, it will
also be able to use alternative lookup and locking strategies
transparently to callers, such as pcbgroup lookup. New lookup flags are,
supplementing the existing INPLOOKUP_WILDCARD flag:
INPLOOKUP_RLOCKPCB - Acquire a read lock on the returned inpcb
INPLOOKUP_WLOCKPCB - Acquire a write lock on the returned inpcb
Callers must pass exactly one of these flags (for the time being).
Some notes:
- All protocols are updated to work within the new regime; especially,
TCP, UDPv4, and UDPv6. pcbinfo ipi_lock acquisitions are largely
eliminated, and global hash lock hold times are dramatically reduced
compared to previous locking.
- The TCP syncache still relies on the pcbinfo lock, something that we
may want to revisit.
- Support for reverting to the FreeBSD 7.x locking strategy in TCP input
is no longer available -- hash lookup locks are now held only very
briefly during inpcb lookup, rather than for potentially extended
periods. However, the pcbinfo ipi_lock will still be acquired if a
connection state might change such that a connection is added or
removed.
- Raw IP sockets continue to use the pcbinfo ipi_lock for protection,
due to maintaining their own hash tables.
- The interface in6_pcblookup_hash_locked() is maintained, which allows
callers to acquire hash locks and perform one or more lookups atomically
with 4-tuple allocation: this is required only for TCPv6, as there is no
in6_pcbconnect_setup(), which there should be.
- UDPv6 locking remains significantly more conservative than UDPv4
locking, which relates to source address selection. This needs
attention, as it likely significantly reduces parallelism in this code
for multithreaded socket use (such as in BIND).
- In the UDPv4 and UDPv6 multicast cases, we need to revisit locking
somewhat, as they relied on ipi_lock to stablise 4-tuple matches, which
is no longer sufficient. A second check once the inpcb lock is held
should do the trick, keeping the general case from requiring the inpcb
lock for every inpcb visited.
- This work reminds us that we need to revisit locking of the v4/v6 flags,
which may be accessed lock-free both before and after this change.
- Right now, a single lock name is used for the pcbhash lock -- this is
undesirable, and probably another argument is required to take care of
this (or a char array name field in the pcbinfo?).
This is not an MFC candidate for 8.x due to its impact on lookup and
locking semantics. It's possible some of these issues could be worked
around with compatibility wrappers, if necessary.
Reviewed by: bz
Sponsored by: Juniper Networks, Inc.
2011-05-30 09:43:55 +00:00
|
|
|
int lookupflags;
|
2003-11-07 23:26:57 +00:00
|
|
|
struct inpcb *pcb;
|
|
|
|
int match;
|
|
|
|
|
2004-09-29 04:54:33 +00:00
|
|
|
/*
|
|
|
|
* Check to see if the UDP or TCP stack supplied us with
|
|
|
|
* the PCB. If so, rather then holding a lock and looking
|
|
|
|
* up the PCB, we can use the one that was supplied.
|
|
|
|
*/
|
2008-09-27 10:14:02 +00:00
|
|
|
if (inp && *ugid_lookupp == 0) {
|
2008-04-20 00:21:54 +00:00
|
|
|
INP_LOCK_ASSERT(inp);
|
2004-09-29 04:54:33 +00:00
|
|
|
if (inp->inp_socket != NULL) {
|
2009-06-19 17:10:35 +00:00
|
|
|
*uc = crhold(inp->inp_cred);
|
2008-09-27 10:14:02 +00:00
|
|
|
*ugid_lookupp = 1;
|
2008-09-27 19:28:28 +00:00
|
|
|
} else
|
|
|
|
*ugid_lookupp = -1;
|
2004-09-29 04:54:33 +00:00
|
|
|
}
|
2004-06-11 22:17:14 +00:00
|
|
|
/*
|
|
|
|
* If we have already been here and the packet has no
|
|
|
|
* PCB entry associated with it, then we can safely
|
|
|
|
* assume that this is a no match.
|
|
|
|
*/
|
2008-09-27 10:14:02 +00:00
|
|
|
if (*ugid_lookupp == -1)
|
2004-06-11 22:17:14 +00:00
|
|
|
return (0);
|
2003-11-07 23:26:57 +00:00
|
|
|
if (proto == IPPROTO_TCP) {
|
Decompose the current single inpcbinfo lock into two locks:
- The existing ipi_lock continues to protect the global inpcb list and
inpcb counter. This lock is now relegated to a small number of
allocation and free operations, and occasional operations that walk
all connections (including, awkwardly, certain UDP multicast receive
operations -- something to revisit).
- A new ipi_hash_lock protects the two inpcbinfo hash tables for
looking up connections and bound sockets, manipulated using new
INP_HASH_*() macros. This lock, combined with inpcb locks, protects
the 4-tuple address space.
Unlike the current ipi_lock, ipi_hash_lock follows the individual inpcb
connection locks, so may be acquired while manipulating a connection on
which a lock is already held, avoiding the need to acquire the inpcbinfo
lock preemptively when a binding change might later be required. As a
result, however, lookup operations necessarily go through a reference
acquire while holding the lookup lock, later acquiring an inpcb lock --
if required.
A new function in_pcblookup() looks up connections, and accepts flags
indicating how to return the inpcb. Due to lock order changes, callers
no longer need acquire locks before performing a lookup: the lookup
routine will acquire the ipi_hash_lock as needed. In the future, it will
also be able to use alternative lookup and locking strategies
transparently to callers, such as pcbgroup lookup. New lookup flags are,
supplementing the existing INPLOOKUP_WILDCARD flag:
INPLOOKUP_RLOCKPCB - Acquire a read lock on the returned inpcb
INPLOOKUP_WLOCKPCB - Acquire a write lock on the returned inpcb
Callers must pass exactly one of these flags (for the time being).
Some notes:
- All protocols are updated to work within the new regime; especially,
TCP, UDPv4, and UDPv6. pcbinfo ipi_lock acquisitions are largely
eliminated, and global hash lock hold times are dramatically reduced
compared to previous locking.
- The TCP syncache still relies on the pcbinfo lock, something that we
may want to revisit.
- Support for reverting to the FreeBSD 7.x locking strategy in TCP input
is no longer available -- hash lookup locks are now held only very
briefly during inpcb lookup, rather than for potentially extended
periods. However, the pcbinfo ipi_lock will still be acquired if a
connection state might change such that a connection is added or
removed.
- Raw IP sockets continue to use the pcbinfo ipi_lock for protection,
due to maintaining their own hash tables.
- The interface in6_pcblookup_hash_locked() is maintained, which allows
callers to acquire hash locks and perform one or more lookups atomically
with 4-tuple allocation: this is required only for TCPv6, as there is no
in6_pcbconnect_setup(), which there should be.
- UDPv6 locking remains significantly more conservative than UDPv4
locking, which relates to source address selection. This needs
attention, as it likely significantly reduces parallelism in this code
for multithreaded socket use (such as in BIND).
- In the UDPv4 and UDPv6 multicast cases, we need to revisit locking
somewhat, as they relied on ipi_lock to stablise 4-tuple matches, which
is no longer sufficient. A second check once the inpcb lock is held
should do the trick, keeping the general case from requiring the inpcb
lock for every inpcb visited.
- This work reminds us that we need to revisit locking of the v4/v6 flags,
which may be accessed lock-free both before and after this change.
- Right now, a single lock name is used for the pcbhash lock -- this is
undesirable, and probably another argument is required to take care of
this (or a char array name field in the pcbinfo?).
This is not an MFC candidate for 8.x due to its impact on lookup and
locking semantics. It's possible some of these issues could be worked
around with compatibility wrappers, if necessary.
Reviewed by: bz
Sponsored by: Juniper Networks, Inc.
2011-05-30 09:43:55 +00:00
|
|
|
lookupflags = 0;
|
Commit step 1 of the vimage project, (network stack)
virtualization work done by Marko Zec (zec@).
This is the first in a series of commits over the course
of the next few weeks.
Mark all uses of global variables to be virtualized
with a V_ prefix.
Use macros to map them back to their global names for
now, so this is a NOP change only.
We hope to have caught at least 85-90% of what is needed
so we do not invalidate a lot of outstanding patches again.
Obtained from: //depot/projects/vimage-commit2/...
Reviewed by: brooks, des, ed, mav, julian,
jamie, kris, rwatson, zec, ...
(various people I forgot, different versions)
md5 (with a bit of help)
Sponsored by: NLnet Foundation, The FreeBSD Foundation
X-MFC after: never
V_Commit_Message_Reviewed_By: more people than the patch
2008-08-17 23:27:27 +00:00
|
|
|
pi = &V_tcbinfo;
|
2003-11-07 23:26:57 +00:00
|
|
|
} else if (proto == IPPROTO_UDP) {
|
Decompose the current single inpcbinfo lock into two locks:
- The existing ipi_lock continues to protect the global inpcb list and
inpcb counter. This lock is now relegated to a small number of
allocation and free operations, and occasional operations that walk
all connections (including, awkwardly, certain UDP multicast receive
operations -- something to revisit).
- A new ipi_hash_lock protects the two inpcbinfo hash tables for
looking up connections and bound sockets, manipulated using new
INP_HASH_*() macros. This lock, combined with inpcb locks, protects
the 4-tuple address space.
Unlike the current ipi_lock, ipi_hash_lock follows the individual inpcb
connection locks, so may be acquired while manipulating a connection on
which a lock is already held, avoiding the need to acquire the inpcbinfo
lock preemptively when a binding change might later be required. As a
result, however, lookup operations necessarily go through a reference
acquire while holding the lookup lock, later acquiring an inpcb lock --
if required.
A new function in_pcblookup() looks up connections, and accepts flags
indicating how to return the inpcb. Due to lock order changes, callers
no longer need acquire locks before performing a lookup: the lookup
routine will acquire the ipi_hash_lock as needed. In the future, it will
also be able to use alternative lookup and locking strategies
transparently to callers, such as pcbgroup lookup. New lookup flags are,
supplementing the existing INPLOOKUP_WILDCARD flag:
INPLOOKUP_RLOCKPCB - Acquire a read lock on the returned inpcb
INPLOOKUP_WLOCKPCB - Acquire a write lock on the returned inpcb
Callers must pass exactly one of these flags (for the time being).
Some notes:
- All protocols are updated to work within the new regime; especially,
TCP, UDPv4, and UDPv6. pcbinfo ipi_lock acquisitions are largely
eliminated, and global hash lock hold times are dramatically reduced
compared to previous locking.
- The TCP syncache still relies on the pcbinfo lock, something that we
may want to revisit.
- Support for reverting to the FreeBSD 7.x locking strategy in TCP input
is no longer available -- hash lookup locks are now held only very
briefly during inpcb lookup, rather than for potentially extended
periods. However, the pcbinfo ipi_lock will still be acquired if a
connection state might change such that a connection is added or
removed.
- Raw IP sockets continue to use the pcbinfo ipi_lock for protection,
due to maintaining their own hash tables.
- The interface in6_pcblookup_hash_locked() is maintained, which allows
callers to acquire hash locks and perform one or more lookups atomically
with 4-tuple allocation: this is required only for TCPv6, as there is no
in6_pcbconnect_setup(), which there should be.
- UDPv6 locking remains significantly more conservative than UDPv4
locking, which relates to source address selection. This needs
attention, as it likely significantly reduces parallelism in this code
for multithreaded socket use (such as in BIND).
- In the UDPv4 and UDPv6 multicast cases, we need to revisit locking
somewhat, as they relied on ipi_lock to stablise 4-tuple matches, which
is no longer sufficient. A second check once the inpcb lock is held
should do the trick, keeping the general case from requiring the inpcb
lock for every inpcb visited.
- This work reminds us that we need to revisit locking of the v4/v6 flags,
which may be accessed lock-free both before and after this change.
- Right now, a single lock name is used for the pcbhash lock -- this is
undesirable, and probably another argument is required to take care of
this (or a char array name field in the pcbinfo?).
This is not an MFC candidate for 8.x due to its impact on lookup and
locking semantics. It's possible some of these issues could be worked
around with compatibility wrappers, if necessary.
Reviewed by: bz
Sponsored by: Juniper Networks, Inc.
2011-05-30 09:43:55 +00:00
|
|
|
lookupflags = INPLOOKUP_WILDCARD;
|
Commit step 1 of the vimage project, (network stack)
virtualization work done by Marko Zec (zec@).
This is the first in a series of commits over the course
of the next few weeks.
Mark all uses of global variables to be virtualized
with a V_ prefix.
Use macros to map them back to their global names for
now, so this is a NOP change only.
We hope to have caught at least 85-90% of what is needed
so we do not invalidate a lot of outstanding patches again.
Obtained from: //depot/projects/vimage-commit2/...
Reviewed by: brooks, des, ed, mav, julian,
jamie, kris, rwatson, zec, ...
(various people I forgot, different versions)
md5 (with a bit of help)
Sponsored by: NLnet Foundation, The FreeBSD Foundation
X-MFC after: never
V_Commit_Message_Reviewed_By: more people than the patch
2008-08-17 23:27:27 +00:00
|
|
|
pi = &V_udbinfo;
|
2003-11-07 23:26:57 +00:00
|
|
|
} else
|
|
|
|
return 0;
|
Decompose the current single inpcbinfo lock into two locks:
- The existing ipi_lock continues to protect the global inpcb list and
inpcb counter. This lock is now relegated to a small number of
allocation and free operations, and occasional operations that walk
all connections (including, awkwardly, certain UDP multicast receive
operations -- something to revisit).
- A new ipi_hash_lock protects the two inpcbinfo hash tables for
looking up connections and bound sockets, manipulated using new
INP_HASH_*() macros. This lock, combined with inpcb locks, protects
the 4-tuple address space.
Unlike the current ipi_lock, ipi_hash_lock follows the individual inpcb
connection locks, so may be acquired while manipulating a connection on
which a lock is already held, avoiding the need to acquire the inpcbinfo
lock preemptively when a binding change might later be required. As a
result, however, lookup operations necessarily go through a reference
acquire while holding the lookup lock, later acquiring an inpcb lock --
if required.
A new function in_pcblookup() looks up connections, and accepts flags
indicating how to return the inpcb. Due to lock order changes, callers
no longer need acquire locks before performing a lookup: the lookup
routine will acquire the ipi_hash_lock as needed. In the future, it will
also be able to use alternative lookup and locking strategies
transparently to callers, such as pcbgroup lookup. New lookup flags are,
supplementing the existing INPLOOKUP_WILDCARD flag:
INPLOOKUP_RLOCKPCB - Acquire a read lock on the returned inpcb
INPLOOKUP_WLOCKPCB - Acquire a write lock on the returned inpcb
Callers must pass exactly one of these flags (for the time being).
Some notes:
- All protocols are updated to work within the new regime; especially,
TCP, UDPv4, and UDPv6. pcbinfo ipi_lock acquisitions are largely
eliminated, and global hash lock hold times are dramatically reduced
compared to previous locking.
- The TCP syncache still relies on the pcbinfo lock, something that we
may want to revisit.
- Support for reverting to the FreeBSD 7.x locking strategy in TCP input
is no longer available -- hash lookup locks are now held only very
briefly during inpcb lookup, rather than for potentially extended
periods. However, the pcbinfo ipi_lock will still be acquired if a
connection state might change such that a connection is added or
removed.
- Raw IP sockets continue to use the pcbinfo ipi_lock for protection,
due to maintaining their own hash tables.
- The interface in6_pcblookup_hash_locked() is maintained, which allows
callers to acquire hash locks and perform one or more lookups atomically
with 4-tuple allocation: this is required only for TCPv6, as there is no
in6_pcbconnect_setup(), which there should be.
- UDPv6 locking remains significantly more conservative than UDPv4
locking, which relates to source address selection. This needs
attention, as it likely significantly reduces parallelism in this code
for multithreaded socket use (such as in BIND).
- In the UDPv4 and UDPv6 multicast cases, we need to revisit locking
somewhat, as they relied on ipi_lock to stablise 4-tuple matches, which
is no longer sufficient. A second check once the inpcb lock is held
should do the trick, keeping the general case from requiring the inpcb
lock for every inpcb visited.
- This work reminds us that we need to revisit locking of the v4/v6 flags,
which may be accessed lock-free both before and after this change.
- Right now, a single lock name is used for the pcbhash lock -- this is
undesirable, and probably another argument is required to take care of
this (or a char array name field in the pcbinfo?).
This is not an MFC candidate for 8.x due to its impact on lookup and
locking semantics. It's possible some of these issues could be worked
around with compatibility wrappers, if necessary.
Reviewed by: bz
Sponsored by: Juniper Networks, Inc.
2011-05-30 09:43:55 +00:00
|
|
|
lookupflags |= INPLOOKUP_RLOCKPCB;
|
2003-11-07 23:26:57 +00:00
|
|
|
match = 0;
|
2008-09-27 10:14:02 +00:00
|
|
|
if (*ugid_lookupp == 0) {
|
Implement a CPU-affine TCP and UDP connection lookup data structure,
struct inpcbgroup. pcbgroups, or "connection groups", supplement the
existing inpcbinfo connection hash table, which when pcbgroups are
enabled, might now be thought of more usefully as a per-protocol
4-tuple reservation table.
Connections are assigned to connection groups base on a hash of their
4-tuple; wildcard sockets require special handling, and are members
of all connection groups. During a connection lookup, a
per-connection group lock is employed rather than the global pcbinfo
lock. By aligning connection groups with input path processing,
connection groups take on an effective CPU affinity, especially when
aligned with RSS work placement (see a forthcoming commit for
details). This eliminates cache line migration associated with
global, protocol-layer data structures in steady state TCP and UDP
processing (with the exception of protocol-layer statistics; further
commit to follow).
Elements of this approach were inspired by Willman, Rixner, and Cox's
2006 USENIX paper, "An Evaluation of Network Stack Parallelization
Strategies in Modern Operating Systems". However, there are also
significant differences: we maintain the inpcb lock, rather than using
the connection group lock for per-connection state.
Likewise, the focus of this implementation is alignment with NIC
packet distribution strategies such as RSS, rather than pure software
strategies. Despite that focus, software distribution is supported
through the parallel netisr implementation, and works well in
configurations where the number of hardware threads is greater than
the number of NIC input queues, such as in the RMI XLR threaded MIPS
architecture.
Another important difference is the continued maintenance of existing
hash tables as "reservation tables" -- these are useful both to
distinguish the resource allocation aspect of protocol name management
and the more common-case lookup aspect. In configurations where
connection tables are aligned with hardware hashes, it is desirable to
use the traditional lookup tables for loopback or encapsulated traffic
rather than take the expense of hardware hashes that are hard to
implement efficiently in software (such as RSS Toeplitz).
Connection group support is enabled by compiling "options PCBGROUP"
into your kernel configuration; for the time being, this is an
experimental feature, and hence is not enabled by default.
Subject to the limited MFCability of change dependencies in inpcb,
and its change to the inpcbinfo init function signature, this change
in principle could be merged to FreeBSD 8.x.
Reviewed by: bz
Sponsored by: Juniper Networks, Inc.
2011-06-06 12:55:02 +00:00
|
|
|
/*
|
|
|
|
* XXXRW: If we had the mbuf here, could use
|
|
|
|
* in_pcblookup_mbuf().
|
|
|
|
*/
|
2004-06-11 22:17:14 +00:00
|
|
|
pcb = (oif) ?
|
Decompose the current single inpcbinfo lock into two locks:
- The existing ipi_lock continues to protect the global inpcb list and
inpcb counter. This lock is now relegated to a small number of
allocation and free operations, and occasional operations that walk
all connections (including, awkwardly, certain UDP multicast receive
operations -- something to revisit).
- A new ipi_hash_lock protects the two inpcbinfo hash tables for
looking up connections and bound sockets, manipulated using new
INP_HASH_*() macros. This lock, combined with inpcb locks, protects
the 4-tuple address space.
Unlike the current ipi_lock, ipi_hash_lock follows the individual inpcb
connection locks, so may be acquired while manipulating a connection on
which a lock is already held, avoiding the need to acquire the inpcbinfo
lock preemptively when a binding change might later be required. As a
result, however, lookup operations necessarily go through a reference
acquire while holding the lookup lock, later acquiring an inpcb lock --
if required.
A new function in_pcblookup() looks up connections, and accepts flags
indicating how to return the inpcb. Due to lock order changes, callers
no longer need acquire locks before performing a lookup: the lookup
routine will acquire the ipi_hash_lock as needed. In the future, it will
also be able to use alternative lookup and locking strategies
transparently to callers, such as pcbgroup lookup. New lookup flags are,
supplementing the existing INPLOOKUP_WILDCARD flag:
INPLOOKUP_RLOCKPCB - Acquire a read lock on the returned inpcb
INPLOOKUP_WLOCKPCB - Acquire a write lock on the returned inpcb
Callers must pass exactly one of these flags (for the time being).
Some notes:
- All protocols are updated to work within the new regime; especially,
TCP, UDPv4, and UDPv6. pcbinfo ipi_lock acquisitions are largely
eliminated, and global hash lock hold times are dramatically reduced
compared to previous locking.
- The TCP syncache still relies on the pcbinfo lock, something that we
may want to revisit.
- Support for reverting to the FreeBSD 7.x locking strategy in TCP input
is no longer available -- hash lookup locks are now held only very
briefly during inpcb lookup, rather than for potentially extended
periods. However, the pcbinfo ipi_lock will still be acquired if a
connection state might change such that a connection is added or
removed.
- Raw IP sockets continue to use the pcbinfo ipi_lock for protection,
due to maintaining their own hash tables.
- The interface in6_pcblookup_hash_locked() is maintained, which allows
callers to acquire hash locks and perform one or more lookups atomically
with 4-tuple allocation: this is required only for TCPv6, as there is no
in6_pcbconnect_setup(), which there should be.
- UDPv6 locking remains significantly more conservative than UDPv4
locking, which relates to source address selection. This needs
attention, as it likely significantly reduces parallelism in this code
for multithreaded socket use (such as in BIND).
- In the UDPv4 and UDPv6 multicast cases, we need to revisit locking
somewhat, as they relied on ipi_lock to stablise 4-tuple matches, which
is no longer sufficient. A second check once the inpcb lock is held
should do the trick, keeping the general case from requiring the inpcb
lock for every inpcb visited.
- This work reminds us that we need to revisit locking of the v4/v6 flags,
which may be accessed lock-free both before and after this change.
- Right now, a single lock name is used for the pcbhash lock -- this is
undesirable, and probably another argument is required to take care of
this (or a char array name field in the pcbinfo?).
This is not an MFC candidate for 8.x due to its impact on lookup and
locking semantics. It's possible some of these issues could be worked
around with compatibility wrappers, if necessary.
Reviewed by: bz
Sponsored by: Juniper Networks, Inc.
2011-05-30 09:43:55 +00:00
|
|
|
in_pcblookup(pi,
|
2004-06-11 22:17:14 +00:00
|
|
|
dst_ip, htons(dst_port),
|
|
|
|
src_ip, htons(src_port),
|
Decompose the current single inpcbinfo lock into two locks:
- The existing ipi_lock continues to protect the global inpcb list and
inpcb counter. This lock is now relegated to a small number of
allocation and free operations, and occasional operations that walk
all connections (including, awkwardly, certain UDP multicast receive
operations -- something to revisit).
- A new ipi_hash_lock protects the two inpcbinfo hash tables for
looking up connections and bound sockets, manipulated using new
INP_HASH_*() macros. This lock, combined with inpcb locks, protects
the 4-tuple address space.
Unlike the current ipi_lock, ipi_hash_lock follows the individual inpcb
connection locks, so may be acquired while manipulating a connection on
which a lock is already held, avoiding the need to acquire the inpcbinfo
lock preemptively when a binding change might later be required. As a
result, however, lookup operations necessarily go through a reference
acquire while holding the lookup lock, later acquiring an inpcb lock --
if required.
A new function in_pcblookup() looks up connections, and accepts flags
indicating how to return the inpcb. Due to lock order changes, callers
no longer need acquire locks before performing a lookup: the lookup
routine will acquire the ipi_hash_lock as needed. In the future, it will
also be able to use alternative lookup and locking strategies
transparently to callers, such as pcbgroup lookup. New lookup flags are,
supplementing the existing INPLOOKUP_WILDCARD flag:
INPLOOKUP_RLOCKPCB - Acquire a read lock on the returned inpcb
INPLOOKUP_WLOCKPCB - Acquire a write lock on the returned inpcb
Callers must pass exactly one of these flags (for the time being).
Some notes:
- All protocols are updated to work within the new regime; especially,
TCP, UDPv4, and UDPv6. pcbinfo ipi_lock acquisitions are largely
eliminated, and global hash lock hold times are dramatically reduced
compared to previous locking.
- The TCP syncache still relies on the pcbinfo lock, something that we
may want to revisit.
- Support for reverting to the FreeBSD 7.x locking strategy in TCP input
is no longer available -- hash lookup locks are now held only very
briefly during inpcb lookup, rather than for potentially extended
periods. However, the pcbinfo ipi_lock will still be acquired if a
connection state might change such that a connection is added or
removed.
- Raw IP sockets continue to use the pcbinfo ipi_lock for protection,
due to maintaining their own hash tables.
- The interface in6_pcblookup_hash_locked() is maintained, which allows
callers to acquire hash locks and perform one or more lookups atomically
with 4-tuple allocation: this is required only for TCPv6, as there is no
in6_pcbconnect_setup(), which there should be.
- UDPv6 locking remains significantly more conservative than UDPv4
locking, which relates to source address selection. This needs
attention, as it likely significantly reduces parallelism in this code
for multithreaded socket use (such as in BIND).
- In the UDPv4 and UDPv6 multicast cases, we need to revisit locking
somewhat, as they relied on ipi_lock to stablise 4-tuple matches, which
is no longer sufficient. A second check once the inpcb lock is held
should do the trick, keeping the general case from requiring the inpcb
lock for every inpcb visited.
- This work reminds us that we need to revisit locking of the v4/v6 flags,
which may be accessed lock-free both before and after this change.
- Right now, a single lock name is used for the pcbhash lock -- this is
undesirable, and probably another argument is required to take care of
this (or a char array name field in the pcbinfo?).
This is not an MFC candidate for 8.x due to its impact on lookup and
locking semantics. It's possible some of these issues could be worked
around with compatibility wrappers, if necessary.
Reviewed by: bz
Sponsored by: Juniper Networks, Inc.
2011-05-30 09:43:55 +00:00
|
|
|
lookupflags, oif) :
|
|
|
|
in_pcblookup(pi,
|
2004-06-11 22:17:14 +00:00
|
|
|
src_ip, htons(src_port),
|
|
|
|
dst_ip, htons(dst_port),
|
Decompose the current single inpcbinfo lock into two locks:
- The existing ipi_lock continues to protect the global inpcb list and
inpcb counter. This lock is now relegated to a small number of
allocation and free operations, and occasional operations that walk
all connections (including, awkwardly, certain UDP multicast receive
operations -- something to revisit).
- A new ipi_hash_lock protects the two inpcbinfo hash tables for
looking up connections and bound sockets, manipulated using new
INP_HASH_*() macros. This lock, combined with inpcb locks, protects
the 4-tuple address space.
Unlike the current ipi_lock, ipi_hash_lock follows the individual inpcb
connection locks, so may be acquired while manipulating a connection on
which a lock is already held, avoiding the need to acquire the inpcbinfo
lock preemptively when a binding change might later be required. As a
result, however, lookup operations necessarily go through a reference
acquire while holding the lookup lock, later acquiring an inpcb lock --
if required.
A new function in_pcblookup() looks up connections, and accepts flags
indicating how to return the inpcb. Due to lock order changes, callers
no longer need acquire locks before performing a lookup: the lookup
routine will acquire the ipi_hash_lock as needed. In the future, it will
also be able to use alternative lookup and locking strategies
transparently to callers, such as pcbgroup lookup. New lookup flags are,
supplementing the existing INPLOOKUP_WILDCARD flag:
INPLOOKUP_RLOCKPCB - Acquire a read lock on the returned inpcb
INPLOOKUP_WLOCKPCB - Acquire a write lock on the returned inpcb
Callers must pass exactly one of these flags (for the time being).
Some notes:
- All protocols are updated to work within the new regime; especially,
TCP, UDPv4, and UDPv6. pcbinfo ipi_lock acquisitions are largely
eliminated, and global hash lock hold times are dramatically reduced
compared to previous locking.
- The TCP syncache still relies on the pcbinfo lock, something that we
may want to revisit.
- Support for reverting to the FreeBSD 7.x locking strategy in TCP input
is no longer available -- hash lookup locks are now held only very
briefly during inpcb lookup, rather than for potentially extended
periods. However, the pcbinfo ipi_lock will still be acquired if a
connection state might change such that a connection is added or
removed.
- Raw IP sockets continue to use the pcbinfo ipi_lock for protection,
due to maintaining their own hash tables.
- The interface in6_pcblookup_hash_locked() is maintained, which allows
callers to acquire hash locks and perform one or more lookups atomically
with 4-tuple allocation: this is required only for TCPv6, as there is no
in6_pcbconnect_setup(), which there should be.
- UDPv6 locking remains significantly more conservative than UDPv4
locking, which relates to source address selection. This needs
attention, as it likely significantly reduces parallelism in this code
for multithreaded socket use (such as in BIND).
- In the UDPv4 and UDPv6 multicast cases, we need to revisit locking
somewhat, as they relied on ipi_lock to stablise 4-tuple matches, which
is no longer sufficient. A second check once the inpcb lock is held
should do the trick, keeping the general case from requiring the inpcb
lock for every inpcb visited.
- This work reminds us that we need to revisit locking of the v4/v6 flags,
which may be accessed lock-free both before and after this change.
- Right now, a single lock name is used for the pcbhash lock -- this is
undesirable, and probably another argument is required to take care of
this (or a char array name field in the pcbinfo?).
This is not an MFC candidate for 8.x due to its impact on lookup and
locking semantics. It's possible some of these issues could be worked
around with compatibility wrappers, if necessary.
Reviewed by: bz
Sponsored by: Juniper Networks, Inc.
2011-05-30 09:43:55 +00:00
|
|
|
lookupflags, NULL);
|
2004-06-11 22:17:14 +00:00
|
|
|
if (pcb != NULL) {
|
Decompose the current single inpcbinfo lock into two locks:
- The existing ipi_lock continues to protect the global inpcb list and
inpcb counter. This lock is now relegated to a small number of
allocation and free operations, and occasional operations that walk
all connections (including, awkwardly, certain UDP multicast receive
operations -- something to revisit).
- A new ipi_hash_lock protects the two inpcbinfo hash tables for
looking up connections and bound sockets, manipulated using new
INP_HASH_*() macros. This lock, combined with inpcb locks, protects
the 4-tuple address space.
Unlike the current ipi_lock, ipi_hash_lock follows the individual inpcb
connection locks, so may be acquired while manipulating a connection on
which a lock is already held, avoiding the need to acquire the inpcbinfo
lock preemptively when a binding change might later be required. As a
result, however, lookup operations necessarily go through a reference
acquire while holding the lookup lock, later acquiring an inpcb lock --
if required.
A new function in_pcblookup() looks up connections, and accepts flags
indicating how to return the inpcb. Due to lock order changes, callers
no longer need acquire locks before performing a lookup: the lookup
routine will acquire the ipi_hash_lock as needed. In the future, it will
also be able to use alternative lookup and locking strategies
transparently to callers, such as pcbgroup lookup. New lookup flags are,
supplementing the existing INPLOOKUP_WILDCARD flag:
INPLOOKUP_RLOCKPCB - Acquire a read lock on the returned inpcb
INPLOOKUP_WLOCKPCB - Acquire a write lock on the returned inpcb
Callers must pass exactly one of these flags (for the time being).
Some notes:
- All protocols are updated to work within the new regime; especially,
TCP, UDPv4, and UDPv6. pcbinfo ipi_lock acquisitions are largely
eliminated, and global hash lock hold times are dramatically reduced
compared to previous locking.
- The TCP syncache still relies on the pcbinfo lock, something that we
may want to revisit.
- Support for reverting to the FreeBSD 7.x locking strategy in TCP input
is no longer available -- hash lookup locks are now held only very
briefly during inpcb lookup, rather than for potentially extended
periods. However, the pcbinfo ipi_lock will still be acquired if a
connection state might change such that a connection is added or
removed.
- Raw IP sockets continue to use the pcbinfo ipi_lock for protection,
due to maintaining their own hash tables.
- The interface in6_pcblookup_hash_locked() is maintained, which allows
callers to acquire hash locks and perform one or more lookups atomically
with 4-tuple allocation: this is required only for TCPv6, as there is no
in6_pcbconnect_setup(), which there should be.
- UDPv6 locking remains significantly more conservative than UDPv4
locking, which relates to source address selection. This needs
attention, as it likely significantly reduces parallelism in this code
for multithreaded socket use (such as in BIND).
- In the UDPv4 and UDPv6 multicast cases, we need to revisit locking
somewhat, as they relied on ipi_lock to stablise 4-tuple matches, which
is no longer sufficient. A second check once the inpcb lock is held
should do the trick, keeping the general case from requiring the inpcb
lock for every inpcb visited.
- This work reminds us that we need to revisit locking of the v4/v6 flags,
which may be accessed lock-free both before and after this change.
- Right now, a single lock name is used for the pcbhash lock -- this is
undesirable, and probably another argument is required to take care of
this (or a char array name field in the pcbinfo?).
This is not an MFC candidate for 8.x due to its impact on lookup and
locking semantics. It's possible some of these issues could be worked
around with compatibility wrappers, if necessary.
Reviewed by: bz
Sponsored by: Juniper Networks, Inc.
2011-05-30 09:43:55 +00:00
|
|
|
INP_RLOCK_ASSERT(pcb);
|
2009-08-14 10:09:45 +00:00
|
|
|
*uc = crhold(pcb->inp_cred);
|
2008-10-04 15:06:34 +00:00
|
|
|
*ugid_lookupp = 1;
|
Decompose the current single inpcbinfo lock into two locks:
- The existing ipi_lock continues to protect the global inpcb list and
inpcb counter. This lock is now relegated to a small number of
allocation and free operations, and occasional operations that walk
all connections (including, awkwardly, certain UDP multicast receive
operations -- something to revisit).
- A new ipi_hash_lock protects the two inpcbinfo hash tables for
looking up connections and bound sockets, manipulated using new
INP_HASH_*() macros. This lock, combined with inpcb locks, protects
the 4-tuple address space.
Unlike the current ipi_lock, ipi_hash_lock follows the individual inpcb
connection locks, so may be acquired while manipulating a connection on
which a lock is already held, avoiding the need to acquire the inpcbinfo
lock preemptively when a binding change might later be required. As a
result, however, lookup operations necessarily go through a reference
acquire while holding the lookup lock, later acquiring an inpcb lock --
if required.
A new function in_pcblookup() looks up connections, and accepts flags
indicating how to return the inpcb. Due to lock order changes, callers
no longer need acquire locks before performing a lookup: the lookup
routine will acquire the ipi_hash_lock as needed. In the future, it will
also be able to use alternative lookup and locking strategies
transparently to callers, such as pcbgroup lookup. New lookup flags are,
supplementing the existing INPLOOKUP_WILDCARD flag:
INPLOOKUP_RLOCKPCB - Acquire a read lock on the returned inpcb
INPLOOKUP_WLOCKPCB - Acquire a write lock on the returned inpcb
Callers must pass exactly one of these flags (for the time being).
Some notes:
- All protocols are updated to work within the new regime; especially,
TCP, UDPv4, and UDPv6. pcbinfo ipi_lock acquisitions are largely
eliminated, and global hash lock hold times are dramatically reduced
compared to previous locking.
- The TCP syncache still relies on the pcbinfo lock, something that we
may want to revisit.
- Support for reverting to the FreeBSD 7.x locking strategy in TCP input
is no longer available -- hash lookup locks are now held only very
briefly during inpcb lookup, rather than for potentially extended
periods. However, the pcbinfo ipi_lock will still be acquired if a
connection state might change such that a connection is added or
removed.
- Raw IP sockets continue to use the pcbinfo ipi_lock for protection,
due to maintaining their own hash tables.
- The interface in6_pcblookup_hash_locked() is maintained, which allows
callers to acquire hash locks and perform one or more lookups atomically
with 4-tuple allocation: this is required only for TCPv6, as there is no
in6_pcbconnect_setup(), which there should be.
- UDPv6 locking remains significantly more conservative than UDPv4
locking, which relates to source address selection. This needs
attention, as it likely significantly reduces parallelism in this code
for multithreaded socket use (such as in BIND).
- In the UDPv4 and UDPv6 multicast cases, we need to revisit locking
somewhat, as they relied on ipi_lock to stablise 4-tuple matches, which
is no longer sufficient. A second check once the inpcb lock is held
should do the trick, keeping the general case from requiring the inpcb
lock for every inpcb visited.
- This work reminds us that we need to revisit locking of the v4/v6 flags,
which may be accessed lock-free both before and after this change.
- Right now, a single lock name is used for the pcbhash lock -- this is
undesirable, and probably another argument is required to take care of
this (or a char array name field in the pcbinfo?).
This is not an MFC candidate for 8.x due to its impact on lookup and
locking semantics. It's possible some of these issues could be worked
around with compatibility wrappers, if necessary.
Reviewed by: bz
Sponsored by: Juniper Networks, Inc.
2011-05-30 09:43:55 +00:00
|
|
|
INP_RUNLOCK(pcb);
|
2003-11-07 23:26:57 +00:00
|
|
|
}
|
2008-09-27 10:14:02 +00:00
|
|
|
if (*ugid_lookupp == 0) {
|
2004-06-11 22:17:14 +00:00
|
|
|
/*
|
2009-12-16 10:48:40 +00:00
|
|
|
* We tried and failed, set the variable to -1
|
|
|
|
* so we will not try again on this packet.
|
2004-06-11 22:17:14 +00:00
|
|
|
*/
|
2008-09-27 10:14:02 +00:00
|
|
|
*ugid_lookupp = -1;
|
2004-06-11 22:17:14 +00:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (insn->o.opcode == O_UID)
|
2009-06-19 17:10:35 +00:00
|
|
|
match = ((*uc)->cr_uid == (uid_t)insn->d[0]);
|
|
|
|
else if (insn->o.opcode == O_GID)
|
|
|
|
match = groupmember((gid_t)insn->d[0], *uc);
|
|
|
|
else if (insn->o.opcode == O_JAIL)
|
|
|
|
match = ((*uc)->cr_prison->pr_id == (int)insn->d[0]);
|
2003-11-07 23:26:57 +00:00
|
|
|
return match;
|
Bring in the most recent version of ipfw and dummynet, developed
and tested over the past two months in the ipfw3-head branch. This
also happens to be the same code available in the Linux and Windows
ports of ipfw and dummynet.
The major enhancement is a completely restructured version of
dummynet, with support for different packet scheduling algorithms
(loadable at runtime), faster queue/pipe lookup, and a much cleaner
internal architecture and kernel/userland ABI which simplifies
future extensions.
In addition to the existing schedulers (FIFO and WF2Q+), we include
a Deficit Round Robin (DRR or RR for brevity) scheduler, and a new,
very fast version of WF2Q+ called QFQ.
Some test code is also present (in sys/netinet/ipfw/test) that
lets you build and test schedulers in userland.
Also, we have added a compatibility layer that understands requests
from the RELENG_7 and RELENG_8 versions of the /sbin/ipfw binaries,
and replies correctly (at least, it does its best; sometimes you
just cannot tell who sent the request and how to answer).
The compatibility layer should make it possible to MFC this code in a
relatively short time.
Some minor glitches (e.g. handling of ipfw set enable/disable,
and a workaround for a bug in RELENG_7's /sbin/ipfw) will be
fixed with separate commits.
CREDITS:
This work has been partly supported by the ONELAB2 project, and
mostly developed by Riccardo Panicucci and myself.
The code for the qfq scheduler is mostly from Fabio Checconi,
and Marta Carbone and Francesco Magno have helped with testing,
debugging and some bug fixes.
2010-03-02 17:40:48 +00:00
|
|
|
#endif /* __FreeBSD__ */
|
2003-11-07 23:26:57 +00:00
|
|
|
}
|
|
|
|
|
2009-12-22 13:53:34 +00:00
|
|
|
/*
|
2010-01-04 19:01:22 +00:00
|
|
|
* Helper function to set args with info on the rule after the matching
|
|
|
|
* one. slot is precise, whereas we guess rule_id as they are
|
|
|
|
* assigned sequentially.
|
2009-12-22 13:53:34 +00:00
|
|
|
*/
|
|
|
|
static inline void
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
set_match(struct ip_fw_args *args, int slot,
|
|
|
|
struct ip_fw_chain *chain)
|
2009-12-22 13:53:34 +00:00
|
|
|
{
|
2010-01-04 19:01:22 +00:00
|
|
|
args->rule.chain_id = chain->id;
|
|
|
|
args->rule.slot = slot + 1; /* we use 0 as a marker */
|
|
|
|
args->rule.rule_id = 1 + chain->map[slot]->id;
|
|
|
|
args->rule.rulenum = chain->map[slot]->rulenum;
|
2009-12-22 13:53:34 +00:00
|
|
|
}
|
|
|
|
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
/*
|
|
|
|
* The main check routine for the firewall.
|
|
|
|
*
|
|
|
|
* All arguments are in args so we can modify them and return them
|
|
|
|
* back to the caller.
|
|
|
|
*
|
|
|
|
* Parameters:
|
|
|
|
*
|
|
|
|
* args->m (in/out) The packet; we set to NULL when/if we nuke it.
|
|
|
|
* Starts with the IP header.
|
2009-12-16 10:48:40 +00:00
|
|
|
* args->eh (in) Mac header if present, NULL for layer3 packet.
|
2007-01-02 19:57:31 +00:00
|
|
|
* args->L3offset Number of bytes bypassed if we came from L2.
|
|
|
|
* e.g. often sizeof(eh) ** NOTYET **
|
2009-12-16 10:48:40 +00:00
|
|
|
* args->oif Outgoing interface, NULL if packet is incoming.
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
* The incoming interface is in the mbuf. (in)
|
|
|
|
* args->divert_rule (in/out)
|
|
|
|
* Skip up to the first rule past this rule number;
|
|
|
|
* upon return, non-zero port number for divert or tee.
|
|
|
|
*
|
|
|
|
* args->rule Pointer to the last matching rule (in/out)
|
|
|
|
* args->next_hop Socket we are forwarding to (out).
|
|
|
|
* args->f_id Addresses grabbed from the packet (out)
|
2010-01-04 19:01:22 +00:00
|
|
|
* args->rule.info a cookie depending on rule action
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
*
|
|
|
|
* Return value:
|
|
|
|
*
|
2005-01-14 09:00:46 +00:00
|
|
|
* IP_FW_PASS the packet must be accepted
|
|
|
|
* IP_FW_DENY the packet must be dropped
|
|
|
|
* IP_FW_DIVERT divert packet, port in m_tag
|
|
|
|
* IP_FW_TEE tee packet, port in m_tag
|
|
|
|
* IP_FW_DUMMYNET to dummynet, pipe in args->cookie
|
|
|
|
* IP_FW_NETGRAPH into netgraph, cookie args->cookie
|
2010-01-04 19:01:22 +00:00
|
|
|
* args->rule contains the matching rule,
|
|
|
|
* args->rule.info has additional information.
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
*
|
|
|
|
*/
|
2004-08-17 22:05:54 +00:00
|
|
|
int
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
ipfw_chk(struct ip_fw_args *args)
|
|
|
|
{
|
Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit
Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.
Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().
Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).
All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).
(*) netipsec/keysock.c did not validate depending on compile time options.
Implemented by: julian, bz, brooks, zec
Reviewed by: julian, bz, brooks, kris, rwatson, ...
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00
|
|
|
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
/*
|
2009-12-16 10:48:40 +00:00
|
|
|
* Local variables holding state while processing a packet:
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
*
|
|
|
|
* IMPORTANT NOTE: to speed up the processing of rules, there
|
|
|
|
* are some assumption on the values of the variables, which
|
|
|
|
* are documented here. Should you change them, please check
|
|
|
|
* the implementation of the various instructions to make sure
|
|
|
|
* that they still work.
|
2002-08-16 10:31:47 +00:00
|
|
|
*
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
* args->eh The MAC header. It is non-null for a layer2
|
|
|
|
* packet, it is NULL for a layer-3 packet.
|
2007-01-02 19:57:31 +00:00
|
|
|
* **notyet**
|
|
|
|
* args->L3offset Offset in the packet to the L3 (IP or equiv.) header.
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
*
|
|
|
|
* m | args->m Pointer to the mbuf, as received from the caller.
|
|
|
|
* It may change if ipfw_chk() does an m_pullup, or if it
|
|
|
|
* consumes the packet because it calls send_reject().
|
|
|
|
* XXX This has to change, so that ipfw_chk() never modifies
|
|
|
|
* or consumes the buffer.
|
2007-01-02 19:57:31 +00:00
|
|
|
* ip is the beginning of the ip(4 or 6) header.
|
|
|
|
* Calculated by adding the L3offset to the start of data.
|
|
|
|
* (Until we start using L3offset, the packet is
|
|
|
|
* supposed to start with the ip header).
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
*/
|
|
|
|
struct mbuf *m = args->m;
|
|
|
|
struct ip *ip = mtod(m, struct ip *);
|
|
|
|
|
2004-06-11 22:17:14 +00:00
|
|
|
/*
|
|
|
|
* For rules which contain uid/gid or jail constraints, cache
|
|
|
|
* a copy of the users credentials after the pcb lookup has been
|
|
|
|
* executed. This will speed up the processing of rules with
|
|
|
|
* these types of constraints, as well as decrease contention
|
|
|
|
* on pcb related locks.
|
|
|
|
*/
|
Bring in the most recent version of ipfw and dummynet, developed
and tested over the past two months in the ipfw3-head branch. This
also happens to be the same code available in the Linux and Windows
ports of ipfw and dummynet.
The major enhancement is a completely restructured version of
dummynet, with support for different packet scheduling algorithms
(loadable at runtime), faster queue/pipe lookup, and a much cleaner
internal architecture and kernel/userland ABI which simplifies
future extensions.
In addition to the existing schedulers (FIFO and WF2Q+), we include
a Deficit Round Robin (DRR or RR for brevity) scheduler, and a new,
very fast version of WF2Q+ called QFQ.
Some test code is also present (in sys/netinet/ipfw/test) that
lets you build and test schedulers in userland.
Also, we have added a compatibility layer that understands requests
from the RELENG_7 and RELENG_8 versions of the /sbin/ipfw binaries,
and replies correctly (at least, it does its best; sometimes you
just cannot tell who sent the request and how to answer).
The compatibility layer should make it possible to MFC this code in a
relatively short time.
Some minor glitches (e.g. handling of ipfw set enable/disable,
and a workaround for a bug in RELENG_7's /sbin/ipfw) will be
fixed with separate commits.
CREDITS:
This work has been partly supported by the ONELAB2 project, and
mostly developed by Riccardo Panicucci and myself.
The code for the qfq scheduler is mostly from Fabio Checconi,
and Marta Carbone and Francesco Magno have helped with testing,
debugging and some bug fixes.
2010-03-02 17:40:48 +00:00
|
|
|
#ifndef __FreeBSD__
|
|
|
|
struct bsd_ucred ucred_cache;
|
|
|
|
#else
|
2009-06-19 17:10:35 +00:00
|
|
|
struct ucred *ucred_cache = NULL;
|
Bring in the most recent version of ipfw and dummynet, developed
and tested over the past two months in the ipfw3-head branch. This
also happens to be the same code available in the Linux and Windows
ports of ipfw and dummynet.
The major enhancement is a completely restructured version of
dummynet, with support for different packet scheduling algorithms
(loadable at runtime), faster queue/pipe lookup, and a much cleaner
internal architecture and kernel/userland ABI which simplifies
future extensions.
In addition to the existing schedulers (FIFO and WF2Q+), we include
a Deficit Round Robin (DRR or RR for brevity) scheduler, and a new,
very fast version of WF2Q+ called QFQ.
Some test code is also present (in sys/netinet/ipfw/test) that
lets you build and test schedulers in userland.
Also, we have added a compatibility layer that understands requests
from the RELENG_7 and RELENG_8 versions of the /sbin/ipfw binaries,
and replies correctly (at least, it does its best; sometimes you
just cannot tell who sent the request and how to answer).
The compatibility layer should make it possible to MFC this code in a
relatively short time.
Some minor glitches (e.g. handling of ipfw set enable/disable,
and a workaround for a bug in RELENG_7's /sbin/ipfw) will be
fixed with separate commits.
CREDITS:
This work has been partly supported by the ONELAB2 project, and
mostly developed by Riccardo Panicucci and myself.
The code for the qfq scheduler is mostly from Fabio Checconi,
and Marta Carbone and Francesco Magno have helped with testing,
debugging and some bug fixes.
2010-03-02 17:40:48 +00:00
|
|
|
#endif
|
2009-06-19 17:10:35 +00:00
|
|
|
int ucred_lookup = 0;
|
2004-06-11 22:17:14 +00:00
|
|
|
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
/*
|
|
|
|
* oif | args->oif If NULL, ipfw_chk has been called on the
|
2005-09-27 18:10:43 +00:00
|
|
|
* inbound path (ether_input, ip_input).
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
* If non-NULL, ipfw_chk has been called on the outbound path
|
|
|
|
* (ether_output, ip_output).
|
|
|
|
*/
|
|
|
|
struct ifnet *oif = args->oif;
|
|
|
|
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
int f_pos = 0; /* index of current rule in the array */
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
int retval = 0;
|
|
|
|
|
|
|
|
/*
|
2005-07-03 15:42:22 +00:00
|
|
|
* hlen The length of the IP header.
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
*/
|
|
|
|
u_int hlen = 0; /* hlen >0 means we have an IP pkt */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* offset The offset of a fragment. offset != 0 means that
|
|
|
|
* we have a fragment at this offset of an IPv4 packet.
|
|
|
|
* offset == 0 means that (if this is an IPv4 packet)
|
|
|
|
* this is the first or only fragment.
|
2005-08-13 11:02:34 +00:00
|
|
|
* For IPv6 offset == 0 means there is no Fragment Header.
|
|
|
|
* If offset != 0 for IPv6 always use correct mask to
|
|
|
|
* get the correct offset because we add IP6F_MORE_FRAG
|
|
|
|
* to be able to dectect the first fragment which would
|
|
|
|
* otherwise have offset = 0.
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
*/
|
|
|
|
u_short offset = 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Local copies of addresses. They are only valid if we have
|
|
|
|
* an IP packet.
|
|
|
|
*
|
|
|
|
* proto The protocol. Set to 0 for non-ip packets,
|
|
|
|
* or to the protocol read from the packet otherwise.
|
|
|
|
* proto != 0 means that we have an IPv4 packet.
|
|
|
|
*
|
|
|
|
* src_port, dst_port port numbers, in HOST format. Only
|
|
|
|
* valid for TCP and UDP packets.
|
|
|
|
*
|
|
|
|
* src_ip, dst_ip ip addresses, in NETWORK format.
|
|
|
|
* Only valid for IPv4 packets.
|
|
|
|
*/
|
2009-12-28 10:47:04 +00:00
|
|
|
uint8_t proto;
|
|
|
|
uint16_t src_port = 0, dst_port = 0; /* NOTE: host format */
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
struct in_addr src_ip, dst_ip; /* NOTE: network format */
|
2009-12-28 10:47:04 +00:00
|
|
|
uint16_t iplen=0;
|
2003-06-02 23:54:09 +00:00
|
|
|
int pktlen;
|
2009-12-28 10:47:04 +00:00
|
|
|
uint16_t etype = 0; /* Host order stored ether type */
|
2005-04-15 00:47:44 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* dyn_dir = MATCH_UNKNOWN when rules unchecked,
|
|
|
|
* MATCH_NONE when checked and not matched (q = NULL),
|
|
|
|
* MATCH_FORWARD or MATCH_REVERSE otherwise (q != NULL)
|
|
|
|
*/
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
int dyn_dir = MATCH_UNKNOWN;
|
|
|
|
ipfw_dyn_rule *q = NULL;
|
Commit step 1 of the vimage project, (network stack)
virtualization work done by Marko Zec (zec@).
This is the first in a series of commits over the course
of the next few weeks.
Mark all uses of global variables to be virtualized
with a V_ prefix.
Use macros to map them back to their global names for
now, so this is a NOP change only.
We hope to have caught at least 85-90% of what is needed
so we do not invalidate a lot of outstanding patches again.
Obtained from: //depot/projects/vimage-commit2/...
Reviewed by: brooks, des, ed, mav, julian,
jamie, kris, rwatson, zec, ...
(various people I forgot, different versions)
md5 (with a bit of help)
Sponsored by: NLnet Foundation, The FreeBSD Foundation
X-MFC after: never
V_Commit_Message_Reviewed_By: more people than the patch
2008-08-17 23:27:27 +00:00
|
|
|
struct ip_fw_chain *chain = &V_layer3_chain;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
|
|
|
/*
|
2005-04-15 00:47:44 +00:00
|
|
|
* We store in ulp a pointer to the upper layer protocol header.
|
|
|
|
* In the ipv4 case this is easy to determine from the header,
|
|
|
|
* but for ipv6 we might have some additional headers in the middle.
|
|
|
|
* ulp is NULL if not found.
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
*/
|
2005-04-15 00:47:44 +00:00
|
|
|
void *ulp = NULL; /* upper layer protocol pointer. */
|
2010-03-15 17:14:27 +00:00
|
|
|
|
2005-04-18 18:35:05 +00:00
|
|
|
/* XXX ipv6 variables */
|
|
|
|
int is_ipv6 = 0;
|
2010-03-15 17:14:27 +00:00
|
|
|
uint8_t icmp6_type = 0;
|
|
|
|
uint16_t ext_hd = 0; /* bits vector for extension header filtering */
|
2005-04-18 18:35:05 +00:00
|
|
|
/* end of ipv6 variables */
|
2010-03-15 17:14:27 +00:00
|
|
|
|
2005-06-03 01:10:28 +00:00
|
|
|
int is_ipv4 = 0;
|
2005-04-15 00:47:44 +00:00
|
|
|
|
2009-12-03 14:22:15 +00:00
|
|
|
int done = 0; /* flag to exit the outer loop */
|
|
|
|
|
2009-08-21 11:20:10 +00:00
|
|
|
if (m->m_flags & M_SKIP_FIREWALL || (! V_ipfw_vnet_ready))
|
2005-04-15 00:47:44 +00:00
|
|
|
return (IP_FW_PASS); /* accept */
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
2009-01-28 13:39:01 +00:00
|
|
|
dst_ip.s_addr = 0; /* make sure it is initialized */
|
2009-12-02 17:50:52 +00:00
|
|
|
src_ip.s_addr = 0; /* make sure it is initialized */
|
2003-06-02 23:54:09 +00:00
|
|
|
pktlen = m->m_pkthdr.len;
|
Add code to allow the system to handle multiple routing tables.
This particular implementation is designed to be fully backwards compatible
and to be MFC-able to 7.x (and 6.x)
Currently the only protocol that can make use of the multiple tables is IPv4
Similar functionality exists in OpenBSD and Linux.
From my notes:
-----
One thing where FreeBSD has been falling behind, and which by chance I
have some time to work on is "policy based routing", which allows
different
packet streams to be routed by more than just the destination address.
Constraints:
------------
I want to make some form of this available in the 6.x tree
(and by extension 7.x) , but FreeBSD in general needs it so I might as
well do it in -current and back port the portions I need.
One of the ways that this can be done is to have the ability to
instantiate multiple kernel routing tables (which I will now
refer to as "Forwarding Information Bases" or "FIBs" for political
correctness reasons). Which FIB a particular packet uses to make
the next hop decision can be decided by a number of mechanisms.
The policies these mechanisms implement are the "Policies" referred
to in "Policy based routing".
One of the constraints I have if I try to back port this work to
6.x is that it must be implemented as a EXTENSION to the existing
ABIs in 6.x so that third party applications do not need to be
recompiled in timespan of the branch.
This first version will not have some of the bells and whistles that
will come with later versions. It will, for example, be limited to 16
tables in the first commit.
Implementation method, Compatible version. (part 1)
-------------------------------
For this reason I have implemented a "sufficient subset" of a
multiple routing table solution in Perforce, and back-ported it
to 6.x. (also in Perforce though not always caught up with what I
have done in -current/P4). The subset allows a number of FIBs
to be defined at compile time (8 is sufficient for my purposes in 6.x)
and implements the changes needed to allow IPV4 to use them. I have not
done the changes for ipv6 simply because I do not need it, and I do not
have enough knowledge of ipv6 (e.g. neighbor discovery) needed to do it.
Other protocol families are left untouched and should there be
users with proprietary protocol families, they should continue to work
and be oblivious to the existence of the extra FIBs.
To understand how this is done, one must know that the current FIB
code starts everything off with a single dimensional array of
pointers to FIB head structures (One per protocol family), each of
which in turn points to the trie of routes available to that family.
The basic change in the ABI compatible version of the change is to
extent that array to be a 2 dimensional array, so that
instead of protocol family X looking at rt_tables[X] for the
table it needs, it looks at rt_tables[Y][X] when for all
protocol families except ipv4 Y is always 0.
Code that is unaware of the change always just sees the first row
of the table, which of course looks just like the one dimensional
array that existed before.
The entry points rtrequest(), rtalloc(), rtalloc1(), rtalloc_ign()
are all maintained, but refer only to the first row of the array,
so that existing callers in proprietary protocols can continue to
do the "right thing".
Some new entry points are added, for the exclusive use of ipv4 code
called in_rtrequest(), in_rtalloc(), in_rtalloc1() and in_rtalloc_ign(),
which have an extra argument which refers the code to the correct row.
In addition, there are some new entry points (currently called
rtalloc_fib() and friends) that check the Address family being
looked up and call either rtalloc() (and friends) if the protocol
is not IPv4 forcing the action to row 0 or to the appropriate row
if it IS IPv4 (and that info is available). These are for calling
from code that is not specific to any particular protocol. The way
these are implemented would change in the non ABI preserving code
to be added later.
One feature of the first version of the code is that for ipv4,
the interface routes show up automatically on all the FIBs, so
that no matter what FIB you select you always have the basic
direct attached hosts available to you. (rtinit() does this
automatically).
You CAN delete an interface route from one FIB should you want
to but by default it's there. ARP information is also available
in each FIB. It's assumed that the same machine would have the
same MAC address, regardless of which FIB you are using to get
to it.
This brings us as to how the correct FIB is selected for an outgoing
IPV4 packet.
Firstly, all packets have a FIB associated with them. if nothing
has been done to change it, it will be FIB 0. The FIB is changed
in the following ways.
Packets fall into one of a number of classes.
1/ locally generated packets, coming from a socket/PCB.
Such packets select a FIB from a number associated with the
socket/PCB. This in turn is inherited from the process,
but can be changed by a socket option. The process in turn
inherits it on fork. I have written a utility call setfib
that acts a bit like nice..
setfib -3 ping target.example.com # will use fib 3 for ping.
It is an obvious extension to make it a property of a jail
but I have not done so. It can be achieved by combining the setfib and
jail commands.
2/ packets received on an interface for forwarding.
By default these packets would use table 0,
(or possibly a number settable in a sysctl(not yet)).
but prior to routing the firewall can inspect them (see below).
(possibly in the future you may be able to associate a FIB
with packets received on an interface.. An ifconfig arg, but not yet.)
3/ packets inspected by a packet classifier, which can arbitrarily
associate a fib with it on a packet by packet basis.
A fib assigned to a packet by a packet classifier
(such as ipfw) would over-ride a fib associated by
a more default source. (such as cases 1 or 2).
4/ a tcp listen socket associated with a fib will generate
accept sockets that are associated with that same fib.
5/ Packets generated in response to some other packet (e.g. reset
or icmp packets). These should use the FIB associated with the
packet being reponded to.
6/ Packets generated during encapsulation.
gif, tun and other tunnel interfaces will encapsulate using the FIB
that was in effect withthe proces that set up the tunnel.
thus setfib 1 ifconfig gif0 [tunnel instructions]
will set the fib for the tunnel to use to be fib 1.
Routing messages would be associated with their
process, and thus select one FIB or another.
messages from the kernel would be associated with the fib they
refer to and would only be received by a routing socket associated
with that fib. (not yet implemented)
In addition Netstat has been edited to be able to cope with the
fact that the array is now 2 dimensional. (It looks in system
memory using libkvm (!)). Old versions of netstat see only the first FIB.
In addition two sysctls are added to give:
a) the number of FIBs compiled in (active)
b) the default FIB of the calling process.
Early testing experience:
-------------------------
Basically our (IronPort's) appliance does this functionality already
using ipfw fwd but that method has some drawbacks.
For example,
It can't fully simulate a routing table because it can't influence the
socket's choice of local address when a connect() is done.
Testing during the generating of these changes has been
remarkably smooth so far. Multiple tables have co-existed
with no notable side effects, and packets have been routes
accordingly.
ipfw has grown 2 new keywords:
setfib N ip from anay to any
count ip from any to any fib N
In pf there seems to be a requirement to be able to give symbolic names to the
fibs but I do not have that capacity. I am not sure if it is required.
SCTP has interestingly enough built in support for this, called VRFs
in Cisco parlance. it will be interesting to see how that handles it
when it suddenly actually does something.
Where to next:
--------------------
After committing the ABI compatible version and MFCing it, I'd
like to proceed in a forward direction in -current. this will
result in some roto-tilling in the routing code.
Firstly: the current code's idea of having a separate tree per
protocol family, all of the same format, and pointed to by the
1 dimensional array is a bit silly. Especially when one considers that
there is code that makes assumptions about every protocol having the
same internal structures there. Some protocols don't WANT that
sort of structure. (for example the whole idea of a netmask is foreign
to appletalk). This needs to be made opaque to the external code.
My suggested first change is to add routing method pointers to the
'domain' structure, along with information pointing the data.
instead of having an array of pointers to uniform structures,
there would be an array pointing to the 'domain' structures
for each protocol address domain (protocol family),
and the methods this reached would be called. The methods would have
an argument that gives FIB number, but the protocol would be free
to ignore it.
When the ABI can be changed it raises the possibilty of the
addition of a fib entry into the "struct route". Currently,
the structure contains the sockaddr of the desination, and the resulting
fib entry. To make this work fully, one could add a fib number
so that given an address and a fib, one can find the third element, the
fib entry.
Interaction with the ARP layer/ LL layer would need to be
revisited as well. Qing Li has been working on this already.
This work was sponsored by Ironport Systems/Cisco
Reviewed by: several including rwatson, bz and mlair (parts each)
Obtained from: Ironport systems/Cisco
2008-05-09 23:03:00 +00:00
|
|
|
args->f_id.fib = M_GETFIB(m); /* note mbuf not altered) */
|
2005-04-15 00:47:44 +00:00
|
|
|
proto = args->f_id.proto = 0; /* mark f_id invalid */
|
2005-08-13 11:02:34 +00:00
|
|
|
/* XXX 0 is a valid proto: IP/IPv6 Hop-by-Hop Option */
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
2005-04-15 00:47:44 +00:00
|
|
|
/*
|
|
|
|
* PULLUP_TO(len, p, T) makes sure that len + sizeof(T) is contiguous,
|
|
|
|
* then it sets p to point at the offset "len" in the mbuf. WARNING: the
|
|
|
|
* pointer might become stale after other pullups (but we never use it
|
|
|
|
* this way).
|
|
|
|
*/
|
2011-04-18 18:22:10 +00:00
|
|
|
#define PULLUP_TO(_len, p, T) PULLUP_LEN(_len, p, sizeof(T))
|
|
|
|
#define PULLUP_LEN(_len, p, T) \
|
2009-12-16 10:48:40 +00:00
|
|
|
do { \
|
2011-04-18 18:22:10 +00:00
|
|
|
int x = (_len) + T; \
|
2009-12-16 10:48:40 +00:00
|
|
|
if ((m)->m_len < x) { \
|
|
|
|
args->m = m = m_pullup(m, x); \
|
|
|
|
if (m == NULL) \
|
|
|
|
goto pullup_failed; \
|
|
|
|
} \
|
|
|
|
p = (mtod(m, char *) + (_len)); \
|
2005-04-15 00:47:44 +00:00
|
|
|
} while (0)
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
2007-01-02 19:57:31 +00:00
|
|
|
/*
|
|
|
|
* if we have an ether header,
|
|
|
|
*/
|
|
|
|
if (args->eh)
|
|
|
|
etype = ntohs(args->eh->ether_type);
|
|
|
|
|
2005-04-18 18:35:05 +00:00
|
|
|
/* Identify IP packets and fill up variables. */
|
|
|
|
if (pktlen >= sizeof(struct ip6_hdr) &&
|
2007-01-02 19:57:31 +00:00
|
|
|
(args->eh == NULL || etype == ETHERTYPE_IPV6) && ip->ip_v == 6) {
|
|
|
|
struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;
|
2005-04-18 18:35:05 +00:00
|
|
|
is_ipv6 = 1;
|
|
|
|
args->f_id.addr_type = 6;
|
|
|
|
hlen = sizeof(struct ip6_hdr);
|
2007-01-02 19:57:31 +00:00
|
|
|
proto = ip6->ip6_nxt;
|
2005-04-18 18:35:05 +00:00
|
|
|
|
|
|
|
/* Search extension headers to find upper layer protocols */
|
|
|
|
while (ulp == NULL) {
|
|
|
|
switch (proto) {
|
|
|
|
case IPPROTO_ICMPV6:
|
|
|
|
PULLUP_TO(hlen, ulp, struct icmp6_hdr);
|
2010-03-15 17:14:27 +00:00
|
|
|
icmp6_type = ICMP6(ulp)->icmp6_type;
|
2005-04-18 18:35:05 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case IPPROTO_TCP:
|
|
|
|
PULLUP_TO(hlen, ulp, struct tcphdr);
|
|
|
|
dst_port = TCP(ulp)->th_dport;
|
|
|
|
src_port = TCP(ulp)->th_sport;
|
2010-03-15 17:14:27 +00:00
|
|
|
/* save flags for dynamic rules */
|
|
|
|
args->f_id._flags = TCP(ulp)->th_flags;
|
2005-04-18 18:35:05 +00:00
|
|
|
break;
|
|
|
|
|
2006-11-13 19:07:32 +00:00
|
|
|
case IPPROTO_SCTP:
|
|
|
|
PULLUP_TO(hlen, ulp, struct sctphdr);
|
|
|
|
src_port = SCTP(ulp)->src_port;
|
|
|
|
dst_port = SCTP(ulp)->dest_port;
|
|
|
|
break;
|
|
|
|
|
2005-04-18 18:35:05 +00:00
|
|
|
case IPPROTO_UDP:
|
|
|
|
PULLUP_TO(hlen, ulp, struct udphdr);
|
|
|
|
dst_port = UDP(ulp)->uh_dport;
|
|
|
|
src_port = UDP(ulp)->uh_sport;
|
|
|
|
break;
|
|
|
|
|
2005-08-13 11:02:34 +00:00
|
|
|
case IPPROTO_HOPOPTS: /* RFC 2460 */
|
2005-04-18 18:35:05 +00:00
|
|
|
PULLUP_TO(hlen, ulp, struct ip6_hbh);
|
|
|
|
ext_hd |= EXT_HOPOPTS;
|
2005-08-13 11:02:34 +00:00
|
|
|
hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
|
2005-04-18 18:35:05 +00:00
|
|
|
proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
|
|
|
|
ulp = NULL;
|
|
|
|
break;
|
|
|
|
|
2005-08-13 11:02:34 +00:00
|
|
|
case IPPROTO_ROUTING: /* RFC 2460 */
|
2005-04-18 18:35:05 +00:00
|
|
|
PULLUP_TO(hlen, ulp, struct ip6_rthdr);
|
2006-06-22 13:22:54 +00:00
|
|
|
switch (((struct ip6_rthdr *)ulp)->ip6r_type) {
|
|
|
|
case 0:
|
2007-05-04 11:15:41 +00:00
|
|
|
ext_hd |= EXT_RTHDR0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
ext_hd |= EXT_RTHDR2;
|
2006-06-22 13:22:54 +00:00
|
|
|
break;
|
|
|
|
default:
|
2005-08-13 11:02:34 +00:00
|
|
|
printf("IPFW2: IPV6 - Unknown Routing "
|
|
|
|
"Header type(%d)\n",
|
|
|
|
((struct ip6_rthdr *)ulp)->ip6r_type);
|
Commit step 1 of the vimage project, (network stack)
virtualization work done by Marko Zec (zec@).
This is the first in a series of commits over the course
of the next few weeks.
Mark all uses of global variables to be virtualized
with a V_ prefix.
Use macros to map them back to their global names for
now, so this is a NOP change only.
We hope to have caught at least 85-90% of what is needed
so we do not invalidate a lot of outstanding patches again.
Obtained from: //depot/projects/vimage-commit2/...
Reviewed by: brooks, des, ed, mav, julian,
jamie, kris, rwatson, zec, ...
(various people I forgot, different versions)
md5 (with a bit of help)
Sponsored by: NLnet Foundation, The FreeBSD Foundation
X-MFC after: never
V_Commit_Message_Reviewed_By: more people than the patch
2008-08-17 23:27:27 +00:00
|
|
|
if (V_fw_deny_unknown_exthdrs)
|
2005-08-13 11:02:34 +00:00
|
|
|
return (IP_FW_DENY);
|
|
|
|
break;
|
|
|
|
}
|
2005-04-18 18:35:05 +00:00
|
|
|
ext_hd |= EXT_ROUTING;
|
2005-08-13 11:02:34 +00:00
|
|
|
hlen += (((struct ip6_rthdr *)ulp)->ip6r_len + 1) << 3;
|
2005-04-18 18:35:05 +00:00
|
|
|
proto = ((struct ip6_rthdr *)ulp)->ip6r_nxt;
|
|
|
|
ulp = NULL;
|
|
|
|
break;
|
|
|
|
|
2005-08-13 11:02:34 +00:00
|
|
|
case IPPROTO_FRAGMENT: /* RFC 2460 */
|
2005-04-18 18:35:05 +00:00
|
|
|
PULLUP_TO(hlen, ulp, struct ip6_frag);
|
|
|
|
ext_hd |= EXT_FRAGMENT;
|
|
|
|
hlen += sizeof (struct ip6_frag);
|
|
|
|
proto = ((struct ip6_frag *)ulp)->ip6f_nxt;
|
2005-08-13 11:02:34 +00:00
|
|
|
offset = ((struct ip6_frag *)ulp)->ip6f_offlg &
|
|
|
|
IP6F_OFF_MASK;
|
|
|
|
/* Add IP6F_MORE_FRAG for offset of first
|
|
|
|
* fragment to be != 0. */
|
|
|
|
offset |= ((struct ip6_frag *)ulp)->ip6f_offlg &
|
|
|
|
IP6F_MORE_FRAG;
|
|
|
|
if (offset == 0) {
|
|
|
|
printf("IPFW2: IPV6 - Invalid Fragment "
|
|
|
|
"Header\n");
|
Commit step 1 of the vimage project, (network stack)
virtualization work done by Marko Zec (zec@).
This is the first in a series of commits over the course
of the next few weeks.
Mark all uses of global variables to be virtualized
with a V_ prefix.
Use macros to map them back to their global names for
now, so this is a NOP change only.
We hope to have caught at least 85-90% of what is needed
so we do not invalidate a lot of outstanding patches again.
Obtained from: //depot/projects/vimage-commit2/...
Reviewed by: brooks, des, ed, mav, julian,
jamie, kris, rwatson, zec, ...
(various people I forgot, different versions)
md5 (with a bit of help)
Sponsored by: NLnet Foundation, The FreeBSD Foundation
X-MFC after: never
V_Commit_Message_Reviewed_By: more people than the patch
2008-08-17 23:27:27 +00:00
|
|
|
if (V_fw_deny_unknown_exthdrs)
|
2005-08-13 11:02:34 +00:00
|
|
|
return (IP_FW_DENY);
|
|
|
|
break;
|
|
|
|
}
|
2010-03-15 17:14:27 +00:00
|
|
|
args->f_id.extra =
|
2005-08-13 11:02:34 +00:00
|
|
|
ntohl(((struct ip6_frag *)ulp)->ip6f_ident);
|
|
|
|
ulp = NULL;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case IPPROTO_DSTOPTS: /* RFC 2460 */
|
|
|
|
PULLUP_TO(hlen, ulp, struct ip6_hbh);
|
|
|
|
ext_hd |= EXT_DSTOPTS;
|
|
|
|
hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
|
|
|
|
proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
|
|
|
|
ulp = NULL;
|
2005-04-18 18:35:05 +00:00
|
|
|
break;
|
|
|
|
|
2005-08-13 11:02:34 +00:00
|
|
|
case IPPROTO_AH: /* RFC 2402 */
|
2005-04-18 18:35:05 +00:00
|
|
|
PULLUP_TO(hlen, ulp, struct ip6_ext);
|
|
|
|
ext_hd |= EXT_AH;
|
2005-08-13 11:02:34 +00:00
|
|
|
hlen += (((struct ip6_ext *)ulp)->ip6e_len + 2) << 2;
|
2005-04-18 18:35:05 +00:00
|
|
|
proto = ((struct ip6_ext *)ulp)->ip6e_nxt;
|
|
|
|
ulp = NULL;
|
|
|
|
break;
|
|
|
|
|
2005-08-13 11:02:34 +00:00
|
|
|
case IPPROTO_ESP: /* RFC 2406 */
|
|
|
|
PULLUP_TO(hlen, ulp, uint32_t); /* SPI, Seq# */
|
|
|
|
/* Anything past Seq# is variable length and
|
|
|
|
* data past this ext. header is encrypted. */
|
|
|
|
ext_hd |= EXT_ESP;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case IPPROTO_NONE: /* RFC 2460 */
|
2007-12-09 15:35:09 +00:00
|
|
|
/*
|
|
|
|
* Packet ends here, and IPv6 header has
|
|
|
|
* already been pulled up. If ip6e_len!=0
|
|
|
|
* then octets must be ignored.
|
|
|
|
*/
|
|
|
|
ulp = ip; /* non-NULL to get out of loop. */
|
2005-08-13 11:02:34 +00:00
|
|
|
break;
|
|
|
|
|
2005-05-28 07:46:44 +00:00
|
|
|
case IPPROTO_OSPFIGP:
|
|
|
|
/* XXX OSPF header check? */
|
|
|
|
PULLUP_TO(hlen, ulp, struct ip6_ext);
|
|
|
|
break;
|
|
|
|
|
2006-08-31 16:56:45 +00:00
|
|
|
case IPPROTO_PIM:
|
|
|
|
/* XXX PIM header check? */
|
|
|
|
PULLUP_TO(hlen, ulp, struct pim);
|
|
|
|
break;
|
|
|
|
|
2006-10-07 10:19:58 +00:00
|
|
|
case IPPROTO_CARP:
|
|
|
|
PULLUP_TO(hlen, ulp, struct carp_header);
|
|
|
|
if (((struct carp_header *)ulp)->carp_version !=
|
|
|
|
CARP_VERSION)
|
|
|
|
return (IP_FW_DENY);
|
|
|
|
if (((struct carp_header *)ulp)->carp_type !=
|
|
|
|
CARP_ADVERTISEMENT)
|
|
|
|
return (IP_FW_DENY);
|
|
|
|
break;
|
|
|
|
|
2006-06-22 13:22:54 +00:00
|
|
|
case IPPROTO_IPV6: /* RFC 2893 */
|
|
|
|
PULLUP_TO(hlen, ulp, struct ip6_hdr);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case IPPROTO_IPV4: /* RFC 2893 */
|
|
|
|
PULLUP_TO(hlen, ulp, struct ip);
|
|
|
|
break;
|
|
|
|
|
2005-04-18 18:35:05 +00:00
|
|
|
default:
|
2005-08-13 11:02:34 +00:00
|
|
|
printf("IPFW2: IPV6 - Unknown Extension "
|
|
|
|
"Header(%d), ext_hd=%x\n", proto, ext_hd);
|
Commit step 1 of the vimage project, (network stack)
virtualization work done by Marko Zec (zec@).
This is the first in a series of commits over the course
of the next few weeks.
Mark all uses of global variables to be virtualized
with a V_ prefix.
Use macros to map them back to their global names for
now, so this is a NOP change only.
We hope to have caught at least 85-90% of what is needed
so we do not invalidate a lot of outstanding patches again.
Obtained from: //depot/projects/vimage-commit2/...
Reviewed by: brooks, des, ed, mav, julian,
jamie, kris, rwatson, zec, ...
(various people I forgot, different versions)
md5 (with a bit of help)
Sponsored by: NLnet Foundation, The FreeBSD Foundation
X-MFC after: never
V_Commit_Message_Reviewed_By: more people than the patch
2008-08-17 23:27:27 +00:00
|
|
|
if (V_fw_deny_unknown_exthdrs)
|
2005-08-13 11:02:34 +00:00
|
|
|
return (IP_FW_DENY);
|
2006-06-22 13:22:54 +00:00
|
|
|
PULLUP_TO(hlen, ulp, struct ip6_ext);
|
2005-04-18 18:35:05 +00:00
|
|
|
break;
|
|
|
|
} /*switch */
|
|
|
|
}
|
2007-01-02 19:57:31 +00:00
|
|
|
ip = mtod(m, struct ip *);
|
|
|
|
ip6 = (struct ip6_hdr *)ip;
|
|
|
|
args->f_id.src_ip6 = ip6->ip6_src;
|
|
|
|
args->f_id.dst_ip6 = ip6->ip6_dst;
|
2005-04-18 18:35:05 +00:00
|
|
|
args->f_id.src_ip = 0;
|
|
|
|
args->f_id.dst_ip = 0;
|
2007-01-02 19:57:31 +00:00
|
|
|
args->f_id.flow_id6 = ntohl(ip6->ip6_flow);
|
2005-04-18 18:35:05 +00:00
|
|
|
} else if (pktlen >= sizeof(struct ip) &&
|
2007-01-02 19:57:31 +00:00
|
|
|
(args->eh == NULL || etype == ETHERTYPE_IP) && ip->ip_v == 4) {
|
2005-06-03 01:10:28 +00:00
|
|
|
is_ipv4 = 1;
|
2005-04-15 00:47:44 +00:00
|
|
|
hlen = ip->ip_hl << 2;
|
|
|
|
args->f_id.addr_type = 4;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
2005-04-15 00:47:44 +00:00
|
|
|
/*
|
|
|
|
* Collect parameters into local variables for faster matching.
|
|
|
|
*/
|
|
|
|
proto = ip->ip_p;
|
|
|
|
src_ip = ip->ip_src;
|
|
|
|
dst_ip = ip->ip_dst;
|
2010-01-04 19:01:22 +00:00
|
|
|
offset = ntohs(ip->ip_off) & IP_OFFMASK;
|
|
|
|
iplen = ntohs(ip->ip_len);
|
2009-12-28 10:47:04 +00:00
|
|
|
pktlen = iplen < pktlen ? iplen : pktlen;
|
2005-04-15 00:47:44 +00:00
|
|
|
|
|
|
|
if (offset == 0) {
|
|
|
|
switch (proto) {
|
|
|
|
case IPPROTO_TCP:
|
|
|
|
PULLUP_TO(hlen, ulp, struct tcphdr);
|
|
|
|
dst_port = TCP(ulp)->th_dport;
|
|
|
|
src_port = TCP(ulp)->th_sport;
|
2010-03-15 17:14:27 +00:00
|
|
|
/* save flags for dynamic rules */
|
|
|
|
args->f_id._flags = TCP(ulp)->th_flags;
|
2005-04-15 00:47:44 +00:00
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
2011-03-31 16:30:14 +00:00
|
|
|
case IPPROTO_SCTP:
|
|
|
|
PULLUP_TO(hlen, ulp, struct sctphdr);
|
|
|
|
src_port = SCTP(ulp)->src_port;
|
|
|
|
dst_port = SCTP(ulp)->dest_port;
|
|
|
|
break;
|
|
|
|
|
2005-04-15 00:47:44 +00:00
|
|
|
case IPPROTO_UDP:
|
|
|
|
PULLUP_TO(hlen, ulp, struct udphdr);
|
|
|
|
dst_port = UDP(ulp)->uh_dport;
|
|
|
|
src_port = UDP(ulp)->uh_sport;
|
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
2005-04-15 00:47:44 +00:00
|
|
|
case IPPROTO_ICMP:
|
2005-04-26 18:10:21 +00:00
|
|
|
PULLUP_TO(hlen, ulp, struct icmphdr);
|
2010-03-15 17:14:27 +00:00
|
|
|
//args->f_id.flags = ICMP(ulp)->icmp_type;
|
2005-04-15 00:47:44 +00:00
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
2005-04-15 00:47:44 +00:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
}
|
2005-04-15 00:47:44 +00:00
|
|
|
|
2007-01-02 19:57:31 +00:00
|
|
|
ip = mtod(m, struct ip *);
|
2005-04-15 00:47:44 +00:00
|
|
|
args->f_id.src_ip = ntohl(src_ip.s_addr);
|
|
|
|
args->f_id.dst_ip = ntohl(dst_ip.s_addr);
|
|
|
|
}
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
#undef PULLUP_TO
|
2005-04-15 00:47:44 +00:00
|
|
|
if (proto) { /* we may have port numbers, store them */
|
|
|
|
args->f_id.proto = proto;
|
|
|
|
args->f_id.src_port = src_port = ntohs(src_port);
|
|
|
|
args->f_id.dst_port = dst_port = ntohs(dst_port);
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
}
|
|
|
|
|
2004-12-10 02:17:18 +00:00
|
|
|
IPFW_RLOCK(chain);
|
2009-10-11 05:59:43 +00:00
|
|
|
if (! V_ipfw_vnet_ready) { /* shutting down, leave NOW. */
|
|
|
|
IPFW_RUNLOCK(chain);
|
|
|
|
return (IP_FW_PASS); /* accept */
|
|
|
|
}
|
2010-01-04 19:01:22 +00:00
|
|
|
if (args->rule.slot) {
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
/*
|
2009-12-17 12:27:54 +00:00
|
|
|
* Packet has already been tagged as a result of a previous
|
|
|
|
* match on rule args->rule aka args->rule_id (PIPE, QUEUE,
|
2010-01-04 19:01:22 +00:00
|
|
|
* REASS, NETGRAPH, DIVERT/TEE...)
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
* Validate the slot and continue from the next one
|
|
|
|
* if still present, otherwise do a lookup.
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
*/
|
2010-01-04 19:01:22 +00:00
|
|
|
f_pos = (args->rule.chain_id == chain->id) ?
|
|
|
|
args->rule.slot :
|
|
|
|
ipfw_find_rule(chain, args->rule.rulenum,
|
|
|
|
args->rule.rule_id);
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
} else {
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
f_pos = 0;
|
2004-10-03 00:26:35 +00:00
|
|
|
}
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Now scan the rules, and parse microinstructions for each rule.
|
2009-12-03 14:22:15 +00:00
|
|
|
* We have two nested loops and an inner switch. Sometimes we
|
|
|
|
* need to break out of one or both loops, or re-enter one of
|
|
|
|
* the loops with updated variables. Loop variables are:
|
|
|
|
*
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
* f_pos (outer loop) points to the current rule.
|
2009-12-03 14:22:15 +00:00
|
|
|
* On output it points to the matching rule.
|
|
|
|
* done (outer loop) is used as a flag to break the loop.
|
|
|
|
* l (inner loop) residual length of current rule.
|
|
|
|
* cmd points to the current microinstruction.
|
|
|
|
*
|
|
|
|
* We break the inner loop by setting l=0 and possibly
|
|
|
|
* cmdlen=0 if we don't want to advance cmd.
|
|
|
|
* We break the outer loop by setting done=1
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
* We can restart the inner loop by setting l>0 and f_pos, f, cmd
|
2009-12-03 14:22:15 +00:00
|
|
|
* as needed.
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
*/
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
for (; f_pos < chain->n_rules; f_pos++) {
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
ipfw_insn *cmd;
|
2005-12-13 12:16:03 +00:00
|
|
|
uint32_t tablearg = 0;
|
|
|
|
int l, cmdlen, skip_or; /* skip rest of OR block */
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
struct ip_fw *f;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
f = chain->map[f_pos];
|
Commit step 1 of the vimage project, (network stack)
virtualization work done by Marko Zec (zec@).
This is the first in a series of commits over the course
of the next few weeks.
Mark all uses of global variables to be virtualized
with a V_ prefix.
Use macros to map them back to their global names for
now, so this is a NOP change only.
We hope to have caught at least 85-90% of what is needed
so we do not invalidate a lot of outstanding patches again.
Obtained from: //depot/projects/vimage-commit2/...
Reviewed by: brooks, des, ed, mav, julian,
jamie, kris, rwatson, zec, ...
(various people I forgot, different versions)
md5 (with a bit of help)
Sponsored by: NLnet Foundation, The FreeBSD Foundation
X-MFC after: never
V_Commit_Message_Reviewed_By: more people than the patch
2008-08-17 23:27:27 +00:00
|
|
|
if (V_set_disable & (1 << f->set) )
|
One bugfix and one new feature.
The bugfix (ipfw2.c) makes the handling of port numbers with
a dash in the name, e.g. ftp-data, consistent with old ipfw:
use \\ before the - to consider it as part of the name and not
a range separator.
The new feature (all this description will go in the manpage):
each rule now belongs to one of 32 different sets, which can
be optionally specified in the following form:
ipfw add 100 set 23 allow ip from any to any
If "set N" is not specified, the rule belongs to set 0.
Individual sets can be disabled, enabled, and deleted with the commands:
ipfw disable set N
ipfw enable set N
ipfw delete set N
Enabling/disabling of a set is atomic. Rules belonging to a disabled
set are skipped during packet matching, and they are not listed
unless you use the '-S' flag in the show/list commands.
Note that dynamic rules, once created, are always active until
they expire or their parent rule is deleted.
Set 31 is reserved for the default rule and cannot be disabled.
All sets are enabled by default. The enable/disable status of the sets
can be shown with the command
ipfw show sets
Hopefully, this feature will make life easier to those who want to
have atomic ruleset addition/deletion/tests. Examples:
To add a set of rules atomically:
ipfw disable set 18
ipfw add ... set 18 ... # repeat as needed
ipfw enable set 18
To delete a set of rules atomically
ipfw disable set 18
ipfw delete set 18
ipfw enable set 18
To test a ruleset and disable it and regain control if something
goes wrong:
ipfw disable set 18
ipfw add ... set 18 ... # repeat as needed
ipfw enable set 18 ; echo "done "; sleep 30 && ipfw disable set 18
here if everything goes well, you press control-C before
the "sleep" terminates, and your ruleset will be left
active. Otherwise, e.g. if you cannot access your box,
the ruleset will be disabled after the sleep terminates.
I think there is only one more thing that one might want, namely
a command to assign all rules in set X to set Y, so one can
test a ruleset using the above mechanisms, and once it is
considered acceptable, make it part of an existing ruleset.
2002-08-10 04:37:32 +00:00
|
|
|
continue;
|
|
|
|
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
skip_or = 0;
|
|
|
|
for (l = f->cmd_len, cmd = f->cmd ; l > 0 ;
|
|
|
|
l -= cmdlen, cmd += cmdlen) {
|
2002-07-08 22:46:01 +00:00
|
|
|
int match;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* check_body is a jump target used when we find a
|
|
|
|
* CHECK_STATE, and need to jump to the body of
|
|
|
|
* the target rule.
|
|
|
|
*/
|
|
|
|
|
2009-12-03 14:22:15 +00:00
|
|
|
/* check_body: */
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
cmdlen = F_LEN(cmd);
|
|
|
|
/*
|
|
|
|
* An OR block (insn_1 || .. || insn_n) has the
|
|
|
|
* F_OR bit set in all but the last instruction.
|
|
|
|
* The first match will set "skip_or", and cause
|
|
|
|
* the following instructions to be skipped until
|
|
|
|
* past the one with the F_OR bit clear.
|
|
|
|
*/
|
|
|
|
if (skip_or) { /* skip this instruction */
|
|
|
|
if ((cmd->len & F_OR) == 0)
|
|
|
|
skip_or = 0; /* next one is good */
|
|
|
|
continue;
|
|
|
|
}
|
2002-07-08 22:46:01 +00:00
|
|
|
match = 0; /* set to 1 if we succeed */
|
|
|
|
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
switch (cmd->opcode) {
|
2002-07-08 22:46:01 +00:00
|
|
|
/*
|
|
|
|
* The first set of opcodes compares the packet's
|
|
|
|
* fields with some pattern, setting 'match' if a
|
|
|
|
* match is found. At the end of the loop there is
|
|
|
|
* logic to deal with F_NOT and F_OR flags associated
|
|
|
|
* with the opcode.
|
|
|
|
*/
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
case O_NOP:
|
2002-07-08 22:46:01 +00:00
|
|
|
match = 1;
|
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
|
|
|
case O_FORWARD_MAC:
|
|
|
|
printf("ipfw: opcode %d unimplemented\n",
|
|
|
|
cmd->opcode);
|
2002-07-08 22:46:01 +00:00
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
|
|
|
case O_GID:
|
|
|
|
case O_UID:
|
2004-08-12 22:06:55 +00:00
|
|
|
case O_JAIL:
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
/*
|
|
|
|
* We only check offset == 0 && proto != 0,
|
2005-04-18 18:35:05 +00:00
|
|
|
* as this ensures that we have a
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
* packet with the ports info.
|
|
|
|
*/
|
|
|
|
if (offset!=0)
|
2002-07-08 22:46:01 +00:00
|
|
|
break;
|
2005-04-18 18:35:05 +00:00
|
|
|
if (is_ipv6) /* XXX to be fixed later */
|
|
|
|
break;
|
2003-11-07 23:26:57 +00:00
|
|
|
if (proto == IPPROTO_TCP ||
|
|
|
|
proto == IPPROTO_UDP)
|
|
|
|
match = check_uidgid(
|
|
|
|
(ipfw_insn_u32 *)cmd,
|
|
|
|
proto, oif,
|
2003-11-20 10:28:33 +00:00
|
|
|
dst_ip, dst_port,
|
Bring in the most recent version of ipfw and dummynet, developed
and tested over the past two months in the ipfw3-head branch. This
also happens to be the same code available in the Linux and Windows
ports of ipfw and dummynet.
The major enhancement is a completely restructured version of
dummynet, with support for different packet scheduling algorithms
(loadable at runtime), faster queue/pipe lookup, and a much cleaner
internal architecture and kernel/userland ABI which simplifies
future extensions.
In addition to the existing schedulers (FIFO and WF2Q+), we include
a Deficit Round Robin (DRR or RR for brevity) scheduler, and a new,
very fast version of WF2Q+ called QFQ.
Some test code is also present (in sys/netinet/ipfw/test) that
lets you build and test schedulers in userland.
Also, we have added a compatibility layer that understands requests
from the RELENG_7 and RELENG_8 versions of the /sbin/ipfw binaries,
and replies correctly (at least, it does its best; sometimes you
just cannot tell who sent the request and how to answer).
The compatibility layer should make it possible to MFC this code in a
relatively short time.
Some minor glitches (e.g. handling of ipfw set enable/disable,
and a workaround for a bug in RELENG_7's /sbin/ipfw) will be
fixed with separate commits.
CREDITS:
This work has been partly supported by the ONELAB2 project, and
mostly developed by Riccardo Panicucci and myself.
The code for the qfq scheduler is mostly from Fabio Checconi,
and Marta Carbone and Francesco Magno have helped with testing,
debugging and some bug fixes.
2010-03-02 17:40:48 +00:00
|
|
|
src_ip, src_port, &ucred_lookup,
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
&ucred_cache, args->inp);
|
|
|
|
#else
|
|
|
|
(void *)&ucred_cache,
|
|
|
|
(struct inpcb *)args->m);
|
|
|
|
#endif
|
2002-07-08 22:46:01 +00:00
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
|
|
|
case O_RECV:
|
2002-07-08 22:46:01 +00:00
|
|
|
match = iface_match(m->m_pkthdr.rcvif,
|
|
|
|
(ipfw_insn_if *)cmd);
|
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
|
|
|
case O_XMIT:
|
2002-07-08 22:46:01 +00:00
|
|
|
match = iface_match(oif, (ipfw_insn_if *)cmd);
|
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
|
|
|
case O_VIA:
|
2002-07-08 22:46:01 +00:00
|
|
|
match = iface_match(oif ? oif :
|
|
|
|
m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd);
|
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
|
|
|
case O_MACADDR2:
|
|
|
|
if (args->eh != NULL) { /* have MAC header */
|
|
|
|
u_int32_t *want = (u_int32_t *)
|
|
|
|
((ipfw_insn_mac *)cmd)->addr;
|
|
|
|
u_int32_t *mask = (u_int32_t *)
|
|
|
|
((ipfw_insn_mac *)cmd)->mask;
|
|
|
|
u_int32_t *hdr = (u_int32_t *)args->eh;
|
|
|
|
|
2002-07-08 22:46:01 +00:00
|
|
|
match =
|
|
|
|
( want[0] == (hdr[0] & mask[0]) &&
|
|
|
|
want[1] == (hdr[1] & mask[1]) &&
|
|
|
|
want[2] == (hdr[2] & mask[2]) );
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
}
|
2002-07-08 22:46:01 +00:00
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
|
|
|
case O_MAC_TYPE:
|
|
|
|
if (args->eh != NULL) {
|
|
|
|
u_int16_t *p =
|
|
|
|
((ipfw_insn_u16 *)cmd)->ports;
|
|
|
|
int i;
|
|
|
|
|
2002-07-08 22:46:01 +00:00
|
|
|
for (i = cmdlen - 1; !match && i>0;
|
|
|
|
i--, p += 2)
|
2007-01-02 19:57:31 +00:00
|
|
|
match = (etype >= p[0] &&
|
|
|
|
etype <= p[1]);
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
}
|
2002-07-08 22:46:01 +00:00
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
|
|
|
case O_FRAG:
|
2005-04-18 18:35:05 +00:00
|
|
|
match = (offset != 0);
|
2002-07-08 22:46:01 +00:00
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
|
|
|
case O_IN: /* "out" is "not in" */
|
2002-07-08 22:46:01 +00:00
|
|
|
match = (oif == NULL);
|
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
|
|
|
case O_LAYER2:
|
2002-07-08 22:46:01 +00:00
|
|
|
match = (args->eh != NULL);
|
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
2004-10-03 00:26:35 +00:00
|
|
|
case O_DIVERTED:
|
2010-01-04 19:01:22 +00:00
|
|
|
{
|
|
|
|
/* For diverted packets, args->rule.info
|
|
|
|
* contains the divert port (in host format)
|
|
|
|
* reason and direction.
|
2010-07-15 14:37:59 +00:00
|
|
|
*/
|
2010-01-04 19:01:22 +00:00
|
|
|
uint32_t i = args->rule.info;
|
|
|
|
match = (i&IPFW_IS_MASK) == IPFW_IS_DIVERT &&
|
|
|
|
cmd->arg1 & ((i & IPFW_INFO_IN) ? 1 : 2);
|
|
|
|
}
|
2004-10-03 00:26:35 +00:00
|
|
|
break;
|
|
|
|
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
case O_PROTO:
|
|
|
|
/*
|
|
|
|
* We do not allow an arg of 0 so the
|
|
|
|
* check of "proto" only suffices.
|
|
|
|
*/
|
2002-07-08 22:46:01 +00:00
|
|
|
match = (proto == cmd->arg1);
|
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
|
|
|
case O_IP_SRC:
|
2005-07-03 15:42:22 +00:00
|
|
|
match = is_ipv4 &&
|
|
|
|
(((ipfw_insn_ip *)cmd)->addr.s_addr ==
|
2002-07-08 22:46:01 +00:00
|
|
|
src_ip.s_addr);
|
|
|
|
break;
|
2002-10-23 10:07:55 +00:00
|
|
|
|
2004-06-09 20:10:38 +00:00
|
|
|
case O_IP_SRC_LOOKUP:
|
|
|
|
case O_IP_DST_LOOKUP:
|
2005-07-03 15:42:22 +00:00
|
|
|
if (is_ipv4) {
|
2009-12-28 10:47:04 +00:00
|
|
|
uint32_t key =
|
2004-06-09 20:10:38 +00:00
|
|
|
(cmd->opcode == O_IP_DST_LOOKUP) ?
|
|
|
|
dst_ip.s_addr : src_ip.s_addr;
|
2009-01-28 13:39:01 +00:00
|
|
|
uint32_t v = 0;
|
2004-06-09 20:10:38 +00:00
|
|
|
|
2009-12-15 09:46:27 +00:00
|
|
|
if (cmdlen > F_INSN_SIZE(ipfw_insn_u32)) {
|
2009-12-28 10:47:04 +00:00
|
|
|
/* generic lookup. The key must be
|
|
|
|
* in 32bit big-endian format.
|
|
|
|
*/
|
2009-12-15 09:46:27 +00:00
|
|
|
v = ((ipfw_insn_u32 *)cmd)->d[1];
|
|
|
|
if (v == 0)
|
2009-12-28 10:47:04 +00:00
|
|
|
key = dst_ip.s_addr;
|
2009-12-15 09:46:27 +00:00
|
|
|
else if (v == 1)
|
2009-12-28 10:47:04 +00:00
|
|
|
key = src_ip.s_addr;
|
2010-03-15 17:14:27 +00:00
|
|
|
else if (v == 6) /* dscp */
|
|
|
|
key = (ip->ip_tos >> 2) & 0x3f;
|
2009-12-15 09:46:27 +00:00
|
|
|
else if (offset != 0)
|
|
|
|
break;
|
|
|
|
else if (proto != IPPROTO_TCP &&
|
|
|
|
proto != IPPROTO_UDP)
|
|
|
|
break;
|
|
|
|
else if (v == 2)
|
2009-12-28 10:47:04 +00:00
|
|
|
key = htonl(dst_port);
|
2009-12-15 09:46:27 +00:00
|
|
|
else if (v == 3)
|
2009-12-29 00:02:57 +00:00
|
|
|
key = htonl(src_port);
|
2009-12-15 09:46:27 +00:00
|
|
|
else if (v == 4 || v == 5) {
|
|
|
|
check_uidgid(
|
|
|
|
(ipfw_insn_u32 *)cmd,
|
|
|
|
proto, oif,
|
|
|
|
dst_ip, dst_port,
|
Bring in the most recent version of ipfw and dummynet, developed
and tested over the past two months in the ipfw3-head branch. This
also happens to be the same code available in the Linux and Windows
ports of ipfw and dummynet.
The major enhancement is a completely restructured version of
dummynet, with support for different packet scheduling algorithms
(loadable at runtime), faster queue/pipe lookup, and a much cleaner
internal architecture and kernel/userland ABI which simplifies
future extensions.
In addition to the existing schedulers (FIFO and WF2Q+), we include
a Deficit Round Robin (DRR or RR for brevity) scheduler, and a new,
very fast version of WF2Q+ called QFQ.
Some test code is also present (in sys/netinet/ipfw/test) that
lets you build and test schedulers in userland.
Also, we have added a compatibility layer that understands requests
from the RELENG_7 and RELENG_8 versions of the /sbin/ipfw binaries,
and replies correctly (at least, it does its best; sometimes you
just cannot tell who sent the request and how to answer).
The compatibility layer should make it possible to MFC this code in a
relatively short time.
Some minor glitches (e.g. handling of ipfw set enable/disable,
and a workaround for a bug in RELENG_7's /sbin/ipfw) will be
fixed with separate commits.
CREDITS:
This work has been partly supported by the ONELAB2 project, and
mostly developed by Riccardo Panicucci and myself.
The code for the qfq scheduler is mostly from Fabio Checconi,
and Marta Carbone and Francesco Magno have helped with testing,
debugging and some bug fixes.
2010-03-02 17:40:48 +00:00
|
|
|
src_ip, src_port, &ucred_lookup,
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
&ucred_cache, args->inp);
|
2009-12-15 09:46:27 +00:00
|
|
|
if (v == 4 /* O_UID */)
|
2009-12-28 10:47:04 +00:00
|
|
|
key = ucred_cache->cr_uid;
|
2009-12-15 09:46:27 +00:00
|
|
|
else if (v == 5 /* O_JAIL */)
|
2009-12-28 10:47:04 +00:00
|
|
|
key = ucred_cache->cr_prison->pr_id;
|
Bring in the most recent version of ipfw and dummynet, developed
and tested over the past two months in the ipfw3-head branch. This
also happens to be the same code available in the Linux and Windows
ports of ipfw and dummynet.
The major enhancement is a completely restructured version of
dummynet, with support for different packet scheduling algorithms
(loadable at runtime), faster queue/pipe lookup, and a much cleaner
internal architecture and kernel/userland ABI which simplifies
future extensions.
In addition to the existing schedulers (FIFO and WF2Q+), we include
a Deficit Round Robin (DRR or RR for brevity) scheduler, and a new,
very fast version of WF2Q+ called QFQ.
Some test code is also present (in sys/netinet/ipfw/test) that
lets you build and test schedulers in userland.
Also, we have added a compatibility layer that understands requests
from the RELENG_7 and RELENG_8 versions of the /sbin/ipfw binaries,
and replies correctly (at least, it does its best; sometimes you
just cannot tell who sent the request and how to answer).
The compatibility layer should make it possible to MFC this code in a
relatively short time.
Some minor glitches (e.g. handling of ipfw set enable/disable,
and a workaround for a bug in RELENG_7's /sbin/ipfw) will be
fixed with separate commits.
CREDITS:
This work has been partly supported by the ONELAB2 project, and
mostly developed by Riccardo Panicucci and myself.
The code for the qfq scheduler is mostly from Fabio Checconi,
and Marta Carbone and Francesco Magno have helped with testing,
debugging and some bug fixes.
2010-03-02 17:40:48 +00:00
|
|
|
#else /* !__FreeBSD__ */
|
|
|
|
(void *)&ucred_cache,
|
|
|
|
(struct inpcb *)args->m);
|
|
|
|
if (v ==4 /* O_UID */)
|
|
|
|
key = ucred_cache.uid;
|
|
|
|
else if (v == 5 /* O_JAIL */)
|
|
|
|
key = ucred_cache.xid;
|
|
|
|
#endif /* !__FreeBSD__ */
|
2009-12-28 10:47:04 +00:00
|
|
|
key = htonl(key);
|
2009-12-15 09:46:27 +00:00
|
|
|
} else
|
|
|
|
break;
|
|
|
|
}
|
2009-12-28 10:47:04 +00:00
|
|
|
match = ipfw_lookup_table(chain,
|
|
|
|
cmd->arg1, key, &v);
|
2004-06-09 20:10:38 +00:00
|
|
|
if (!match)
|
|
|
|
break;
|
|
|
|
if (cmdlen == F_INSN_SIZE(ipfw_insn_u32))
|
|
|
|
match =
|
|
|
|
((ipfw_insn_u32 *)cmd)->d[0] == v;
|
2005-12-13 12:16:03 +00:00
|
|
|
else
|
|
|
|
tablearg = v;
|
2004-06-09 20:10:38 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
case O_IP_SRC_MASK:
|
2003-07-08 07:44:42 +00:00
|
|
|
case O_IP_DST_MASK:
|
2005-07-03 15:42:22 +00:00
|
|
|
if (is_ipv4) {
|
2003-07-08 07:44:42 +00:00
|
|
|
uint32_t a =
|
|
|
|
(cmd->opcode == O_IP_DST_MASK) ?
|
|
|
|
dst_ip.s_addr : src_ip.s_addr;
|
|
|
|
uint32_t *p = ((ipfw_insn_u32 *)cmd)->d;
|
|
|
|
int i = cmdlen-1;
|
|
|
|
|
|
|
|
for (; !match && i>0; i-= 2, p+= 2)
|
|
|
|
match = (p[0] == (a & p[1]));
|
|
|
|
}
|
2002-07-08 22:46:01 +00:00
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
|
|
|
case O_IP_SRC_ME:
|
2005-07-03 15:42:22 +00:00
|
|
|
if (is_ipv4) {
|
2002-07-08 22:46:01 +00:00
|
|
|
struct ifnet *tif;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
2002-07-08 22:46:01 +00:00
|
|
|
INADDR_TO_IFP(src_ip, tif);
|
|
|
|
match = (tif != NULL);
|
2010-01-17 08:39:48 +00:00
|
|
|
break;
|
2002-07-08 22:46:01 +00:00
|
|
|
}
|
2010-01-17 08:39:48 +00:00
|
|
|
#ifdef INET6
|
Bring in the most recent version of ipfw and dummynet, developed
and tested over the past two months in the ipfw3-head branch. This
also happens to be the same code available in the Linux and Windows
ports of ipfw and dummynet.
The major enhancement is a completely restructured version of
dummynet, with support for different packet scheduling algorithms
(loadable at runtime), faster queue/pipe lookup, and a much cleaner
internal architecture and kernel/userland ABI which simplifies
future extensions.
In addition to the existing schedulers (FIFO and WF2Q+), we include
a Deficit Round Robin (DRR or RR for brevity) scheduler, and a new,
very fast version of WF2Q+ called QFQ.
Some test code is also present (in sys/netinet/ipfw/test) that
lets you build and test schedulers in userland.
Also, we have added a compatibility layer that understands requests
from the RELENG_7 and RELENG_8 versions of the /sbin/ipfw binaries,
and replies correctly (at least, it does its best; sometimes you
just cannot tell who sent the request and how to answer).
The compatibility layer should make it possible to MFC this code in a
relatively short time.
Some minor glitches (e.g. handling of ipfw set enable/disable,
and a workaround for a bug in RELENG_7's /sbin/ipfw) will be
fixed with separate commits.
CREDITS:
This work has been partly supported by the ONELAB2 project, and
mostly developed by Riccardo Panicucci and myself.
The code for the qfq scheduler is mostly from Fabio Checconi,
and Marta Carbone and Francesco Magno have helped with testing,
debugging and some bug fixes.
2010-03-02 17:40:48 +00:00
|
|
|
/* FALLTHROUGH */
|
2010-01-17 08:39:48 +00:00
|
|
|
case O_IP6_SRC_ME:
|
Bring in the most recent version of ipfw and dummynet, developed
and tested over the past two months in the ipfw3-head branch. This
also happens to be the same code available in the Linux and Windows
ports of ipfw and dummynet.
The major enhancement is a completely restructured version of
dummynet, with support for different packet scheduling algorithms
(loadable at runtime), faster queue/pipe lookup, and a much cleaner
internal architecture and kernel/userland ABI which simplifies
future extensions.
In addition to the existing schedulers (FIFO and WF2Q+), we include
a Deficit Round Robin (DRR or RR for brevity) scheduler, and a new,
very fast version of WF2Q+ called QFQ.
Some test code is also present (in sys/netinet/ipfw/test) that
lets you build and test schedulers in userland.
Also, we have added a compatibility layer that understands requests
from the RELENG_7 and RELENG_8 versions of the /sbin/ipfw binaries,
and replies correctly (at least, it does its best; sometimes you
just cannot tell who sent the request and how to answer).
The compatibility layer should make it possible to MFC this code in a
relatively short time.
Some minor glitches (e.g. handling of ipfw set enable/disable,
and a workaround for a bug in RELENG_7's /sbin/ipfw) will be
fixed with separate commits.
CREDITS:
This work has been partly supported by the ONELAB2 project, and
mostly developed by Riccardo Panicucci and myself.
The code for the qfq scheduler is mostly from Fabio Checconi,
and Marta Carbone and Francesco Magno have helped with testing,
debugging and some bug fixes.
2010-03-02 17:40:48 +00:00
|
|
|
match= is_ipv6 && search_ip6_addr_net(&args->f_id.src_ip6);
|
2010-01-17 08:39:48 +00:00
|
|
|
#endif
|
2002-07-08 22:46:01 +00:00
|
|
|
break;
|
2002-10-23 10:07:55 +00:00
|
|
|
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
case O_IP_DST_SET:
|
|
|
|
case O_IP_SRC_SET:
|
2005-07-03 15:42:22 +00:00
|
|
|
if (is_ipv4) {
|
2002-07-08 22:46:01 +00:00
|
|
|
u_int32_t *d = (u_int32_t *)(cmd+1);
|
|
|
|
u_int32_t addr =
|
|
|
|
cmd->opcode == O_IP_DST_SET ?
|
2002-10-24 18:01:53 +00:00
|
|
|
args->f_id.dst_ip :
|
|
|
|
args->f_id.src_ip;
|
2002-07-08 22:46:01 +00:00
|
|
|
|
|
|
|
if (addr < d[0])
|
|
|
|
break;
|
|
|
|
addr -= d[0]; /* subtract base */
|
|
|
|
match = (addr < cmd->arg1) &&
|
|
|
|
( d[ 1 + (addr>>5)] &
|
|
|
|
(1<<(addr & 0x1f)) );
|
|
|
|
}
|
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
|
|
|
case O_IP_DST:
|
2005-07-03 15:42:22 +00:00
|
|
|
match = is_ipv4 &&
|
|
|
|
(((ipfw_insn_ip *)cmd)->addr.s_addr ==
|
2002-07-08 22:46:01 +00:00
|
|
|
dst_ip.s_addr);
|
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
|
|
|
case O_IP_DST_ME:
|
2005-07-03 15:42:22 +00:00
|
|
|
if (is_ipv4) {
|
2002-07-08 22:46:01 +00:00
|
|
|
struct ifnet *tif;
|
|
|
|
|
|
|
|
INADDR_TO_IFP(dst_ip, tif);
|
|
|
|
match = (tif != NULL);
|
2010-01-17 08:39:48 +00:00
|
|
|
break;
|
2002-07-08 22:46:01 +00:00
|
|
|
}
|
2010-01-17 08:39:48 +00:00
|
|
|
#ifdef INET6
|
Bring in the most recent version of ipfw and dummynet, developed
and tested over the past two months in the ipfw3-head branch. This
also happens to be the same code available in the Linux and Windows
ports of ipfw and dummynet.
The major enhancement is a completely restructured version of
dummynet, with support for different packet scheduling algorithms
(loadable at runtime), faster queue/pipe lookup, and a much cleaner
internal architecture and kernel/userland ABI which simplifies
future extensions.
In addition to the existing schedulers (FIFO and WF2Q+), we include
a Deficit Round Robin (DRR or RR for brevity) scheduler, and a new,
very fast version of WF2Q+ called QFQ.
Some test code is also present (in sys/netinet/ipfw/test) that
lets you build and test schedulers in userland.
Also, we have added a compatibility layer that understands requests
from the RELENG_7 and RELENG_8 versions of the /sbin/ipfw binaries,
and replies correctly (at least, it does its best; sometimes you
just cannot tell who sent the request and how to answer).
The compatibility layer should make it possible to MFC this code in a
relatively short time.
Some minor glitches (e.g. handling of ipfw set enable/disable,
and a workaround for a bug in RELENG_7's /sbin/ipfw) will be
fixed with separate commits.
CREDITS:
This work has been partly supported by the ONELAB2 project, and
mostly developed by Riccardo Panicucci and myself.
The code for the qfq scheduler is mostly from Fabio Checconi,
and Marta Carbone and Francesco Magno have helped with testing,
debugging and some bug fixes.
2010-03-02 17:40:48 +00:00
|
|
|
/* FALLTHROUGH */
|
2010-01-17 08:39:48 +00:00
|
|
|
case O_IP6_DST_ME:
|
Bring in the most recent version of ipfw and dummynet, developed
and tested over the past two months in the ipfw3-head branch. This
also happens to be the same code available in the Linux and Windows
ports of ipfw and dummynet.
The major enhancement is a completely restructured version of
dummynet, with support for different packet scheduling algorithms
(loadable at runtime), faster queue/pipe lookup, and a much cleaner
internal architecture and kernel/userland ABI which simplifies
future extensions.
In addition to the existing schedulers (FIFO and WF2Q+), we include
a Deficit Round Robin (DRR or RR for brevity) scheduler, and a new,
very fast version of WF2Q+ called QFQ.
Some test code is also present (in sys/netinet/ipfw/test) that
lets you build and test schedulers in userland.
Also, we have added a compatibility layer that understands requests
from the RELENG_7 and RELENG_8 versions of the /sbin/ipfw binaries,
and replies correctly (at least, it does its best; sometimes you
just cannot tell who sent the request and how to answer).
The compatibility layer should make it possible to MFC this code in a
relatively short time.
Some minor glitches (e.g. handling of ipfw set enable/disable,
and a workaround for a bug in RELENG_7's /sbin/ipfw) will be
fixed with separate commits.
CREDITS:
This work has been partly supported by the ONELAB2 project, and
mostly developed by Riccardo Panicucci and myself.
The code for the qfq scheduler is mostly from Fabio Checconi,
and Marta Carbone and Francesco Magno have helped with testing,
debugging and some bug fixes.
2010-03-02 17:40:48 +00:00
|
|
|
match= is_ipv6 && search_ip6_addr_net(&args->f_id.dst_ip6);
|
2010-01-17 08:39:48 +00:00
|
|
|
#endif
|
2002-07-08 22:46:01 +00:00
|
|
|
break;
|
2002-10-23 10:07:55 +00:00
|
|
|
|
Bring in the most recent version of ipfw and dummynet, developed
and tested over the past two months in the ipfw3-head branch. This
also happens to be the same code available in the Linux and Windows
ports of ipfw and dummynet.
The major enhancement is a completely restructured version of
dummynet, with support for different packet scheduling algorithms
(loadable at runtime), faster queue/pipe lookup, and a much cleaner
internal architecture and kernel/userland ABI which simplifies
future extensions.
In addition to the existing schedulers (FIFO and WF2Q+), we include
a Deficit Round Robin (DRR or RR for brevity) scheduler, and a new,
very fast version of WF2Q+ called QFQ.
Some test code is also present (in sys/netinet/ipfw/test) that
lets you build and test schedulers in userland.
Also, we have added a compatibility layer that understands requests
from the RELENG_7 and RELENG_8 versions of the /sbin/ipfw binaries,
and replies correctly (at least, it does its best; sometimes you
just cannot tell who sent the request and how to answer).
The compatibility layer should make it possible to MFC this code in a
relatively short time.
Some minor glitches (e.g. handling of ipfw set enable/disable,
and a workaround for a bug in RELENG_7's /sbin/ipfw) will be
fixed with separate commits.
CREDITS:
This work has been partly supported by the ONELAB2 project, and
mostly developed by Riccardo Panicucci and myself.
The code for the qfq scheduler is mostly from Fabio Checconi,
and Marta Carbone and Francesco Magno have helped with testing,
debugging and some bug fixes.
2010-03-02 17:40:48 +00:00
|
|
|
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
case O_IP_SRCPORT:
|
|
|
|
case O_IP_DSTPORT:
|
|
|
|
/*
|
|
|
|
* offset == 0 && proto != 0 is enough
|
2005-04-18 18:35:05 +00:00
|
|
|
* to guarantee that we have a
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
* packet with port info.
|
|
|
|
*/
|
2002-07-08 22:46:01 +00:00
|
|
|
if ((proto==IPPROTO_UDP || proto==IPPROTO_TCP)
|
|
|
|
&& offset == 0) {
|
|
|
|
u_int16_t x =
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
(cmd->opcode == O_IP_SRCPORT) ?
|
2002-07-08 22:46:01 +00:00
|
|
|
src_port : dst_port ;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
u_int16_t *p =
|
|
|
|
((ipfw_insn_u16 *)cmd)->ports;
|
|
|
|
int i;
|
|
|
|
|
2002-07-08 22:46:01 +00:00
|
|
|
for (i = cmdlen - 1; !match && i>0;
|
|
|
|
i--, p += 2)
|
|
|
|
match = (x>=p[0] && x<=p[1]);
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
}
|
2002-07-08 22:46:01 +00:00
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
|
|
|
case O_ICMPTYPE:
|
2002-07-08 22:46:01 +00:00
|
|
|
match = (offset == 0 && proto==IPPROTO_ICMP &&
|
2005-04-15 00:47:44 +00:00
|
|
|
icmptype_match(ICMP(ulp), (ipfw_insn_u32 *)cmd) );
|
2002-07-08 22:46:01 +00:00
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
2005-04-19 09:56:14 +00:00
|
|
|
#ifdef INET6
|
2005-04-18 18:35:05 +00:00
|
|
|
case O_ICMP6TYPE:
|
|
|
|
match = is_ipv6 && offset == 0 &&
|
|
|
|
proto==IPPROTO_ICMPV6 &&
|
|
|
|
icmp6type_match(
|
|
|
|
ICMP6(ulp)->icmp6_type,
|
|
|
|
(ipfw_insn_u32 *)cmd);
|
|
|
|
break;
|
2005-04-19 09:56:14 +00:00
|
|
|
#endif /* INET6 */
|
2005-04-18 18:35:05 +00:00
|
|
|
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
case O_IPOPT:
|
2005-07-03 15:42:22 +00:00
|
|
|
match = (is_ipv4 &&
|
2007-01-02 19:57:31 +00:00
|
|
|
ipopts_match(ip, cmd) );
|
2002-07-08 22:46:01 +00:00
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
|
|
|
case O_IPVER:
|
2005-07-03 15:42:22 +00:00
|
|
|
match = (is_ipv4 &&
|
2007-01-02 19:57:31 +00:00
|
|
|
cmd->arg1 == ip->ip_v);
|
2002-07-08 22:46:01 +00:00
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
|
|
|
case O_IPID:
|
|
|
|
case O_IPLEN:
|
2003-06-22 17:33:19 +00:00
|
|
|
case O_IPTTL:
|
2005-07-03 15:42:22 +00:00
|
|
|
if (is_ipv4) { /* only for IP packets */
|
2003-06-22 17:33:19 +00:00
|
|
|
uint16_t x;
|
|
|
|
uint16_t *p;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (cmd->opcode == O_IPLEN)
|
2009-12-28 10:47:04 +00:00
|
|
|
x = iplen;
|
2003-06-22 17:33:19 +00:00
|
|
|
else if (cmd->opcode == O_IPTTL)
|
2007-01-02 19:57:31 +00:00
|
|
|
x = ip->ip_ttl;
|
2003-06-22 17:33:19 +00:00
|
|
|
else /* must be IPID */
|
2007-01-02 19:57:31 +00:00
|
|
|
x = ntohs(ip->ip_id);
|
2003-06-22 17:33:19 +00:00
|
|
|
if (cmdlen == 1) {
|
|
|
|
match = (cmd->arg1 == x);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
/* otherwise we have ranges */
|
|
|
|
p = ((ipfw_insn_u16 *)cmd)->ports;
|
|
|
|
i = cmdlen - 1;
|
|
|
|
for (; !match && i>0; i--, p += 2)
|
|
|
|
match = (x >= p[0] && x <= p[1]);
|
|
|
|
}
|
2002-07-08 22:46:01 +00:00
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
2002-07-05 22:43:06 +00:00
|
|
|
case O_IPPRECEDENCE:
|
2005-07-03 15:42:22 +00:00
|
|
|
match = (is_ipv4 &&
|
2007-01-02 19:57:31 +00:00
|
|
|
(cmd->arg1 == (ip->ip_tos & 0xe0)) );
|
2002-07-08 22:46:01 +00:00
|
|
|
break;
|
2002-07-05 22:43:06 +00:00
|
|
|
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
case O_IPTOS:
|
2005-07-03 15:42:22 +00:00
|
|
|
match = (is_ipv4 &&
|
2007-01-02 19:57:31 +00:00
|
|
|
flags_match(cmd, ip->ip_tos));
|
2002-07-08 22:46:01 +00:00
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
2004-10-03 00:47:15 +00:00
|
|
|
case O_TCPDATALEN:
|
|
|
|
if (proto == IPPROTO_TCP && offset == 0) {
|
|
|
|
struct tcphdr *tcp;
|
|
|
|
uint16_t x;
|
|
|
|
uint16_t *p;
|
|
|
|
int i;
|
|
|
|
|
2005-04-15 00:47:44 +00:00
|
|
|
tcp = TCP(ulp);
|
2009-12-28 10:47:04 +00:00
|
|
|
x = iplen -
|
2004-10-03 00:47:15 +00:00
|
|
|
((ip->ip_hl + tcp->th_off) << 2);
|
|
|
|
if (cmdlen == 1) {
|
|
|
|
match = (cmd->arg1 == x);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
/* otherwise we have ranges */
|
|
|
|
p = ((ipfw_insn_u16 *)cmd)->ports;
|
|
|
|
i = cmdlen - 1;
|
|
|
|
for (; !match && i>0; i--, p += 2)
|
|
|
|
match = (x >= p[0] && x <= p[1]);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
case O_TCPFLAGS:
|
2002-07-08 22:46:01 +00:00
|
|
|
match = (proto == IPPROTO_TCP && offset == 0 &&
|
2005-04-15 00:47:44 +00:00
|
|
|
flags_match(cmd, TCP(ulp)->th_flags));
|
2002-07-08 22:46:01 +00:00
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
|
|
|
case O_TCPOPTS:
|
2011-04-18 18:22:10 +00:00
|
|
|
PULLUP_LEN(hlen, ulp, (TCP(ulp)->th_off << 2));
|
2002-07-08 22:46:01 +00:00
|
|
|
match = (proto == IPPROTO_TCP && offset == 0 &&
|
2005-04-15 00:47:44 +00:00
|
|
|
tcpopts_match(TCP(ulp), cmd));
|
2002-07-08 22:46:01 +00:00
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
|
|
|
case O_TCPSEQ:
|
2002-07-08 22:46:01 +00:00
|
|
|
match = (proto == IPPROTO_TCP && offset == 0 &&
|
|
|
|
((ipfw_insn_u32 *)cmd)->d[0] ==
|
2005-04-15 00:47:44 +00:00
|
|
|
TCP(ulp)->th_seq);
|
2002-07-08 22:46:01 +00:00
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
|
|
|
case O_TCPACK:
|
2002-07-08 22:46:01 +00:00
|
|
|
match = (proto == IPPROTO_TCP && offset == 0 &&
|
|
|
|
((ipfw_insn_u32 *)cmd)->d[0] ==
|
2005-04-15 00:47:44 +00:00
|
|
|
TCP(ulp)->th_ack);
|
2002-07-08 22:46:01 +00:00
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
|
|
|
case O_TCPWIN:
|
2002-07-08 22:46:01 +00:00
|
|
|
match = (proto == IPPROTO_TCP && offset == 0 &&
|
2005-04-15 00:47:44 +00:00
|
|
|
cmd->arg1 == TCP(ulp)->th_win);
|
2002-07-08 22:46:01 +00:00
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
|
|
|
case O_ESTAB:
|
|
|
|
/* reject packets which have SYN only */
|
2002-07-08 22:46:01 +00:00
|
|
|
/* XXX should i also check for TH_ACK ? */
|
|
|
|
match = (proto == IPPROTO_TCP && offset == 0 &&
|
2005-04-15 00:47:44 +00:00
|
|
|
(TCP(ulp)->th_flags &
|
2002-07-08 22:46:01 +00:00
|
|
|
(TH_RST | TH_ACK | TH_SYN)) != TH_SYN);
|
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
2004-10-03 00:17:46 +00:00
|
|
|
case O_ALTQ: {
|
2007-07-03 12:46:08 +00:00
|
|
|
struct pf_mtag *at;
|
2004-10-03 00:17:46 +00:00
|
|
|
ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd;
|
|
|
|
|
|
|
|
match = 1;
|
2007-07-03 12:46:08 +00:00
|
|
|
at = pf_find_mtag(m);
|
|
|
|
if (at != NULL && at->qid != 0)
|
2005-06-04 19:04:31 +00:00
|
|
|
break;
|
2007-07-03 12:46:08 +00:00
|
|
|
at = pf_get_mtag(m);
|
|
|
|
if (at == NULL) {
|
2004-10-03 00:17:46 +00:00
|
|
|
/*
|
|
|
|
* Let the packet fall back to the
|
|
|
|
* default ALTQ.
|
|
|
|
*/
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
at->qid = altq->qid;
|
2005-07-03 15:42:22 +00:00
|
|
|
if (is_ipv4)
|
2004-10-03 00:17:46 +00:00
|
|
|
at->af = AF_INET;
|
|
|
|
else
|
|
|
|
at->af = AF_LINK;
|
|
|
|
at->hdr = ip;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
case O_LOG:
|
2009-12-17 23:11:16 +00:00
|
|
|
ipfw_log(f, hlen, args, m,
|
2007-01-02 19:57:31 +00:00
|
|
|
oif, offset, tablearg, ip);
|
2002-07-08 22:46:01 +00:00
|
|
|
match = 1;
|
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
2002-07-05 22:43:06 +00:00
|
|
|
case O_PROB:
|
2002-07-08 22:46:01 +00:00
|
|
|
match = (random()<((ipfw_insn_u32 *)cmd)->d[0]);
|
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
2003-03-15 01:13:00 +00:00
|
|
|
case O_VERREVPATH:
|
|
|
|
/* Outgoing packets automatically pass/match */
|
2005-04-18 18:35:05 +00:00
|
|
|
match = ((oif != NULL) ||
|
2003-06-23 21:18:56 +00:00
|
|
|
(m->m_pkthdr.rcvif == NULL) ||
|
2005-04-19 09:56:14 +00:00
|
|
|
(
|
|
|
|
#ifdef INET6
|
|
|
|
is_ipv6 ?
|
2005-06-16 14:55:58 +00:00
|
|
|
verify_path6(&(args->f_id.src_ip6),
|
2005-04-18 18:35:05 +00:00
|
|
|
m->m_pkthdr.rcvif) :
|
2005-04-19 09:56:14 +00:00
|
|
|
#endif
|
Add code to allow the system to handle multiple routing tables.
This particular implementation is designed to be fully backwards compatible
and to be MFC-able to 7.x (and 6.x)
Currently the only protocol that can make use of the multiple tables is IPv4
Similar functionality exists in OpenBSD and Linux.
From my notes:
-----
One thing where FreeBSD has been falling behind, and which by chance I
have some time to work on is "policy based routing", which allows
different
packet streams to be routed by more than just the destination address.
Constraints:
------------
I want to make some form of this available in the 6.x tree
(and by extension 7.x) , but FreeBSD in general needs it so I might as
well do it in -current and back port the portions I need.
One of the ways that this can be done is to have the ability to
instantiate multiple kernel routing tables (which I will now
refer to as "Forwarding Information Bases" or "FIBs" for political
correctness reasons). Which FIB a particular packet uses to make
the next hop decision can be decided by a number of mechanisms.
The policies these mechanisms implement are the "Policies" referred
to in "Policy based routing".
One of the constraints I have if I try to back port this work to
6.x is that it must be implemented as a EXTENSION to the existing
ABIs in 6.x so that third party applications do not need to be
recompiled in timespan of the branch.
This first version will not have some of the bells and whistles that
will come with later versions. It will, for example, be limited to 16
tables in the first commit.
Implementation method, Compatible version. (part 1)
-------------------------------
For this reason I have implemented a "sufficient subset" of a
multiple routing table solution in Perforce, and back-ported it
to 6.x. (also in Perforce though not always caught up with what I
have done in -current/P4). The subset allows a number of FIBs
to be defined at compile time (8 is sufficient for my purposes in 6.x)
and implements the changes needed to allow IPV4 to use them. I have not
done the changes for ipv6 simply because I do not need it, and I do not
have enough knowledge of ipv6 (e.g. neighbor discovery) needed to do it.
Other protocol families are left untouched and should there be
users with proprietary protocol families, they should continue to work
and be oblivious to the existence of the extra FIBs.
To understand how this is done, one must know that the current FIB
code starts everything off with a single dimensional array of
pointers to FIB head structures (One per protocol family), each of
which in turn points to the trie of routes available to that family.
The basic change in the ABI compatible version of the change is to
extent that array to be a 2 dimensional array, so that
instead of protocol family X looking at rt_tables[X] for the
table it needs, it looks at rt_tables[Y][X] when for all
protocol families except ipv4 Y is always 0.
Code that is unaware of the change always just sees the first row
of the table, which of course looks just like the one dimensional
array that existed before.
The entry points rtrequest(), rtalloc(), rtalloc1(), rtalloc_ign()
are all maintained, but refer only to the first row of the array,
so that existing callers in proprietary protocols can continue to
do the "right thing".
Some new entry points are added, for the exclusive use of ipv4 code
called in_rtrequest(), in_rtalloc(), in_rtalloc1() and in_rtalloc_ign(),
which have an extra argument which refers the code to the correct row.
In addition, there are some new entry points (currently called
rtalloc_fib() and friends) that check the Address family being
looked up and call either rtalloc() (and friends) if the protocol
is not IPv4 forcing the action to row 0 or to the appropriate row
if it IS IPv4 (and that info is available). These are for calling
from code that is not specific to any particular protocol. The way
these are implemented would change in the non ABI preserving code
to be added later.
One feature of the first version of the code is that for ipv4,
the interface routes show up automatically on all the FIBs, so
that no matter what FIB you select you always have the basic
direct attached hosts available to you. (rtinit() does this
automatically).
You CAN delete an interface route from one FIB should you want
to but by default it's there. ARP information is also available
in each FIB. It's assumed that the same machine would have the
same MAC address, regardless of which FIB you are using to get
to it.
This brings us as to how the correct FIB is selected for an outgoing
IPV4 packet.
Firstly, all packets have a FIB associated with them. if nothing
has been done to change it, it will be FIB 0. The FIB is changed
in the following ways.
Packets fall into one of a number of classes.
1/ locally generated packets, coming from a socket/PCB.
Such packets select a FIB from a number associated with the
socket/PCB. This in turn is inherited from the process,
but can be changed by a socket option. The process in turn
inherits it on fork. I have written a utility call setfib
that acts a bit like nice..
setfib -3 ping target.example.com # will use fib 3 for ping.
It is an obvious extension to make it a property of a jail
but I have not done so. It can be achieved by combining the setfib and
jail commands.
2/ packets received on an interface for forwarding.
By default these packets would use table 0,
(or possibly a number settable in a sysctl(not yet)).
but prior to routing the firewall can inspect them (see below).
(possibly in the future you may be able to associate a FIB
with packets received on an interface.. An ifconfig arg, but not yet.)
3/ packets inspected by a packet classifier, which can arbitrarily
associate a fib with it on a packet by packet basis.
A fib assigned to a packet by a packet classifier
(such as ipfw) would over-ride a fib associated by
a more default source. (such as cases 1 or 2).
4/ a tcp listen socket associated with a fib will generate
accept sockets that are associated with that same fib.
5/ Packets generated in response to some other packet (e.g. reset
or icmp packets). These should use the FIB associated with the
packet being reponded to.
6/ Packets generated during encapsulation.
gif, tun and other tunnel interfaces will encapsulate using the FIB
that was in effect withthe proces that set up the tunnel.
thus setfib 1 ifconfig gif0 [tunnel instructions]
will set the fib for the tunnel to use to be fib 1.
Routing messages would be associated with their
process, and thus select one FIB or another.
messages from the kernel would be associated with the fib they
refer to and would only be received by a routing socket associated
with that fib. (not yet implemented)
In addition Netstat has been edited to be able to cope with the
fact that the array is now 2 dimensional. (It looks in system
memory using libkvm (!)). Old versions of netstat see only the first FIB.
In addition two sysctls are added to give:
a) the number of FIBs compiled in (active)
b) the default FIB of the calling process.
Early testing experience:
-------------------------
Basically our (IronPort's) appliance does this functionality already
using ipfw fwd but that method has some drawbacks.
For example,
It can't fully simulate a routing table because it can't influence the
socket's choice of local address when a connect() is done.
Testing during the generating of these changes has been
remarkably smooth so far. Multiple tables have co-existed
with no notable side effects, and packets have been routes
accordingly.
ipfw has grown 2 new keywords:
setfib N ip from anay to any
count ip from any to any fib N
In pf there seems to be a requirement to be able to give symbolic names to the
fibs but I do not have that capacity. I am not sure if it is required.
SCTP has interestingly enough built in support for this, called VRFs
in Cisco parlance. it will be interesting to see how that handles it
when it suddenly actually does something.
Where to next:
--------------------
After committing the ABI compatible version and MFCing it, I'd
like to proceed in a forward direction in -current. this will
result in some roto-tilling in the routing code.
Firstly: the current code's idea of having a separate tree per
protocol family, all of the same format, and pointed to by the
1 dimensional array is a bit silly. Especially when one considers that
there is code that makes assumptions about every protocol having the
same internal structures there. Some protocols don't WANT that
sort of structure. (for example the whole idea of a netmask is foreign
to appletalk). This needs to be made opaque to the external code.
My suggested first change is to add routing method pointers to the
'domain' structure, along with information pointing the data.
instead of having an array of pointers to uniform structures,
there would be an array pointing to the 'domain' structures
for each protocol address domain (protocol family),
and the methods this reached would be called. The methods would have
an argument that gives FIB number, but the protocol would be free
to ignore it.
When the ABI can be changed it raises the possibilty of the
addition of a fib entry into the "struct route". Currently,
the structure contains the sockaddr of the desination, and the resulting
fib entry. To make this work fully, one could add a fib number
so that given an address and a fib, one can find the third element, the
fib entry.
Interaction with the ARP layer/ LL layer would need to be
revisited as well. Qing Li has been working on this already.
This work was sponsored by Ironport Systems/Cisco
Reviewed by: several including rwatson, bz and mlair (parts each)
Obtained from: Ironport systems/Cisco
2008-05-09 23:03:00 +00:00
|
|
|
verify_path(src_ip, m->m_pkthdr.rcvif,
|
|
|
|
args->f_id.fib)));
|
2004-04-23 14:28:38 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case O_VERSRCREACH:
|
|
|
|
/* Outgoing packets automatically pass/match */
|
2004-08-11 11:41:11 +00:00
|
|
|
match = (hlen > 0 && ((oif != NULL) ||
|
2005-06-16 14:55:58 +00:00
|
|
|
#ifdef INET6
|
|
|
|
is_ipv6 ?
|
|
|
|
verify_path6(&(args->f_id.src_ip6),
|
|
|
|
NULL) :
|
|
|
|
#endif
|
Add code to allow the system to handle multiple routing tables.
This particular implementation is designed to be fully backwards compatible
and to be MFC-able to 7.x (and 6.x)
Currently the only protocol that can make use of the multiple tables is IPv4
Similar functionality exists in OpenBSD and Linux.
From my notes:
-----
One thing where FreeBSD has been falling behind, and which by chance I
have some time to work on is "policy based routing", which allows
different
packet streams to be routed by more than just the destination address.
Constraints:
------------
I want to make some form of this available in the 6.x tree
(and by extension 7.x) , but FreeBSD in general needs it so I might as
well do it in -current and back port the portions I need.
One of the ways that this can be done is to have the ability to
instantiate multiple kernel routing tables (which I will now
refer to as "Forwarding Information Bases" or "FIBs" for political
correctness reasons). Which FIB a particular packet uses to make
the next hop decision can be decided by a number of mechanisms.
The policies these mechanisms implement are the "Policies" referred
to in "Policy based routing".
One of the constraints I have if I try to back port this work to
6.x is that it must be implemented as a EXTENSION to the existing
ABIs in 6.x so that third party applications do not need to be
recompiled in timespan of the branch.
This first version will not have some of the bells and whistles that
will come with later versions. It will, for example, be limited to 16
tables in the first commit.
Implementation method, Compatible version. (part 1)
-------------------------------
For this reason I have implemented a "sufficient subset" of a
multiple routing table solution in Perforce, and back-ported it
to 6.x. (also in Perforce though not always caught up with what I
have done in -current/P4). The subset allows a number of FIBs
to be defined at compile time (8 is sufficient for my purposes in 6.x)
and implements the changes needed to allow IPV4 to use them. I have not
done the changes for ipv6 simply because I do not need it, and I do not
have enough knowledge of ipv6 (e.g. neighbor discovery) needed to do it.
Other protocol families are left untouched and should there be
users with proprietary protocol families, they should continue to work
and be oblivious to the existence of the extra FIBs.
To understand how this is done, one must know that the current FIB
code starts everything off with a single dimensional array of
pointers to FIB head structures (One per protocol family), each of
which in turn points to the trie of routes available to that family.
The basic change in the ABI compatible version of the change is to
extent that array to be a 2 dimensional array, so that
instead of protocol family X looking at rt_tables[X] for the
table it needs, it looks at rt_tables[Y][X] when for all
protocol families except ipv4 Y is always 0.
Code that is unaware of the change always just sees the first row
of the table, which of course looks just like the one dimensional
array that existed before.
The entry points rtrequest(), rtalloc(), rtalloc1(), rtalloc_ign()
are all maintained, but refer only to the first row of the array,
so that existing callers in proprietary protocols can continue to
do the "right thing".
Some new entry points are added, for the exclusive use of ipv4 code
called in_rtrequest(), in_rtalloc(), in_rtalloc1() and in_rtalloc_ign(),
which have an extra argument which refers the code to the correct row.
In addition, there are some new entry points (currently called
rtalloc_fib() and friends) that check the Address family being
looked up and call either rtalloc() (and friends) if the protocol
is not IPv4 forcing the action to row 0 or to the appropriate row
if it IS IPv4 (and that info is available). These are for calling
from code that is not specific to any particular protocol. The way
these are implemented would change in the non ABI preserving code
to be added later.
One feature of the first version of the code is that for ipv4,
the interface routes show up automatically on all the FIBs, so
that no matter what FIB you select you always have the basic
direct attached hosts available to you. (rtinit() does this
automatically).
You CAN delete an interface route from one FIB should you want
to but by default it's there. ARP information is also available
in each FIB. It's assumed that the same machine would have the
same MAC address, regardless of which FIB you are using to get
to it.
This brings us as to how the correct FIB is selected for an outgoing
IPV4 packet.
Firstly, all packets have a FIB associated with them. if nothing
has been done to change it, it will be FIB 0. The FIB is changed
in the following ways.
Packets fall into one of a number of classes.
1/ locally generated packets, coming from a socket/PCB.
Such packets select a FIB from a number associated with the
socket/PCB. This in turn is inherited from the process,
but can be changed by a socket option. The process in turn
inherits it on fork. I have written a utility call setfib
that acts a bit like nice..
setfib -3 ping target.example.com # will use fib 3 for ping.
It is an obvious extension to make it a property of a jail
but I have not done so. It can be achieved by combining the setfib and
jail commands.
2/ packets received on an interface for forwarding.
By default these packets would use table 0,
(or possibly a number settable in a sysctl(not yet)).
but prior to routing the firewall can inspect them (see below).
(possibly in the future you may be able to associate a FIB
with packets received on an interface.. An ifconfig arg, but not yet.)
3/ packets inspected by a packet classifier, which can arbitrarily
associate a fib with it on a packet by packet basis.
A fib assigned to a packet by a packet classifier
(such as ipfw) would over-ride a fib associated by
a more default source. (such as cases 1 or 2).
4/ a tcp listen socket associated with a fib will generate
accept sockets that are associated with that same fib.
5/ Packets generated in response to some other packet (e.g. reset
or icmp packets). These should use the FIB associated with the
packet being reponded to.
6/ Packets generated during encapsulation.
gif, tun and other tunnel interfaces will encapsulate using the FIB
that was in effect withthe proces that set up the tunnel.
thus setfib 1 ifconfig gif0 [tunnel instructions]
will set the fib for the tunnel to use to be fib 1.
Routing messages would be associated with their
process, and thus select one FIB or another.
messages from the kernel would be associated with the fib they
refer to and would only be received by a routing socket associated
with that fib. (not yet implemented)
In addition Netstat has been edited to be able to cope with the
fact that the array is now 2 dimensional. (It looks in system
memory using libkvm (!)). Old versions of netstat see only the first FIB.
In addition two sysctls are added to give:
a) the number of FIBs compiled in (active)
b) the default FIB of the calling process.
Early testing experience:
-------------------------
Basically our (IronPort's) appliance does this functionality already
using ipfw fwd but that method has some drawbacks.
For example,
It can't fully simulate a routing table because it can't influence the
socket's choice of local address when a connect() is done.
Testing during the generating of these changes has been
remarkably smooth so far. Multiple tables have co-existed
with no notable side effects, and packets have been routes
accordingly.
ipfw has grown 2 new keywords:
setfib N ip from anay to any
count ip from any to any fib N
In pf there seems to be a requirement to be able to give symbolic names to the
fibs but I do not have that capacity. I am not sure if it is required.
SCTP has interestingly enough built in support for this, called VRFs
in Cisco parlance. it will be interesting to see how that handles it
when it suddenly actually does something.
Where to next:
--------------------
After committing the ABI compatible version and MFCing it, I'd
like to proceed in a forward direction in -current. this will
result in some roto-tilling in the routing code.
Firstly: the current code's idea of having a separate tree per
protocol family, all of the same format, and pointed to by the
1 dimensional array is a bit silly. Especially when one considers that
there is code that makes assumptions about every protocol having the
same internal structures there. Some protocols don't WANT that
sort of structure. (for example the whole idea of a netmask is foreign
to appletalk). This needs to be made opaque to the external code.
My suggested first change is to add routing method pointers to the
'domain' structure, along with information pointing the data.
instead of having an array of pointers to uniform structures,
there would be an array pointing to the 'domain' structures
for each protocol address domain (protocol family),
and the methods this reached would be called. The methods would have
an argument that gives FIB number, but the protocol would be free
to ignore it.
When the ABI can be changed it raises the possibilty of the
addition of a fib entry into the "struct route". Currently,
the structure contains the sockaddr of the desination, and the resulting
fib entry. To make this work fully, one could add a fib number
so that given an address and a fib, one can find the third element, the
fib entry.
Interaction with the ARP layer/ LL layer would need to be
revisited as well. Qing Li has been working on this already.
This work was sponsored by Ironport Systems/Cisco
Reviewed by: several including rwatson, bz and mlair (parts each)
Obtained from: Ironport systems/Cisco
2008-05-09 23:03:00 +00:00
|
|
|
verify_path(src_ip, NULL, args->f_id.fib)));
|
2003-03-15 01:13:00 +00:00
|
|
|
break;
|
|
|
|
|
2004-08-09 16:12:10 +00:00
|
|
|
case O_ANTISPOOF:
|
|
|
|
/* Outgoing packets automatically pass/match */
|
|
|
|
if (oif == NULL && hlen > 0 &&
|
2005-06-16 14:55:58 +00:00
|
|
|
( (is_ipv4 && in_localaddr(src_ip))
|
|
|
|
#ifdef INET6
|
|
|
|
|| (is_ipv6 &&
|
|
|
|
in6_localaddr(&(args->f_id.src_ip6)))
|
|
|
|
#endif
|
|
|
|
))
|
|
|
|
match =
|
|
|
|
#ifdef INET6
|
|
|
|
is_ipv6 ? verify_path6(
|
|
|
|
&(args->f_id.src_ip6),
|
|
|
|
m->m_pkthdr.rcvif) :
|
|
|
|
#endif
|
|
|
|
verify_path(src_ip,
|
Add code to allow the system to handle multiple routing tables.
This particular implementation is designed to be fully backwards compatible
and to be MFC-able to 7.x (and 6.x)
Currently the only protocol that can make use of the multiple tables is IPv4
Similar functionality exists in OpenBSD and Linux.
From my notes:
-----
One thing where FreeBSD has been falling behind, and which by chance I
have some time to work on is "policy based routing", which allows
different
packet streams to be routed by more than just the destination address.
Constraints:
------------
I want to make some form of this available in the 6.x tree
(and by extension 7.x) , but FreeBSD in general needs it so I might as
well do it in -current and back port the portions I need.
One of the ways that this can be done is to have the ability to
instantiate multiple kernel routing tables (which I will now
refer to as "Forwarding Information Bases" or "FIBs" for political
correctness reasons). Which FIB a particular packet uses to make
the next hop decision can be decided by a number of mechanisms.
The policies these mechanisms implement are the "Policies" referred
to in "Policy based routing".
One of the constraints I have if I try to back port this work to
6.x is that it must be implemented as a EXTENSION to the existing
ABIs in 6.x so that third party applications do not need to be
recompiled in timespan of the branch.
This first version will not have some of the bells and whistles that
will come with later versions. It will, for example, be limited to 16
tables in the first commit.
Implementation method, Compatible version. (part 1)
-------------------------------
For this reason I have implemented a "sufficient subset" of a
multiple routing table solution in Perforce, and back-ported it
to 6.x. (also in Perforce though not always caught up with what I
have done in -current/P4). The subset allows a number of FIBs
to be defined at compile time (8 is sufficient for my purposes in 6.x)
and implements the changes needed to allow IPV4 to use them. I have not
done the changes for ipv6 simply because I do not need it, and I do not
have enough knowledge of ipv6 (e.g. neighbor discovery) needed to do it.
Other protocol families are left untouched and should there be
users with proprietary protocol families, they should continue to work
and be oblivious to the existence of the extra FIBs.
To understand how this is done, one must know that the current FIB
code starts everything off with a single dimensional array of
pointers to FIB head structures (One per protocol family), each of
which in turn points to the trie of routes available to that family.
The basic change in the ABI compatible version of the change is to
extent that array to be a 2 dimensional array, so that
instead of protocol family X looking at rt_tables[X] for the
table it needs, it looks at rt_tables[Y][X] when for all
protocol families except ipv4 Y is always 0.
Code that is unaware of the change always just sees the first row
of the table, which of course looks just like the one dimensional
array that existed before.
The entry points rtrequest(), rtalloc(), rtalloc1(), rtalloc_ign()
are all maintained, but refer only to the first row of the array,
so that existing callers in proprietary protocols can continue to
do the "right thing".
Some new entry points are added, for the exclusive use of ipv4 code
called in_rtrequest(), in_rtalloc(), in_rtalloc1() and in_rtalloc_ign(),
which have an extra argument which refers the code to the correct row.
In addition, there are some new entry points (currently called
rtalloc_fib() and friends) that check the Address family being
looked up and call either rtalloc() (and friends) if the protocol
is not IPv4 forcing the action to row 0 or to the appropriate row
if it IS IPv4 (and that info is available). These are for calling
from code that is not specific to any particular protocol. The way
these are implemented would change in the non ABI preserving code
to be added later.
One feature of the first version of the code is that for ipv4,
the interface routes show up automatically on all the FIBs, so
that no matter what FIB you select you always have the basic
direct attached hosts available to you. (rtinit() does this
automatically).
You CAN delete an interface route from one FIB should you want
to but by default it's there. ARP information is also available
in each FIB. It's assumed that the same machine would have the
same MAC address, regardless of which FIB you are using to get
to it.
This brings us as to how the correct FIB is selected for an outgoing
IPV4 packet.
Firstly, all packets have a FIB associated with them. if nothing
has been done to change it, it will be FIB 0. The FIB is changed
in the following ways.
Packets fall into one of a number of classes.
1/ locally generated packets, coming from a socket/PCB.
Such packets select a FIB from a number associated with the
socket/PCB. This in turn is inherited from the process,
but can be changed by a socket option. The process in turn
inherits it on fork. I have written a utility call setfib
that acts a bit like nice..
setfib -3 ping target.example.com # will use fib 3 for ping.
It is an obvious extension to make it a property of a jail
but I have not done so. It can be achieved by combining the setfib and
jail commands.
2/ packets received on an interface for forwarding.
By default these packets would use table 0,
(or possibly a number settable in a sysctl(not yet)).
but prior to routing the firewall can inspect them (see below).
(possibly in the future you may be able to associate a FIB
with packets received on an interface.. An ifconfig arg, but not yet.)
3/ packets inspected by a packet classifier, which can arbitrarily
associate a fib with it on a packet by packet basis.
A fib assigned to a packet by a packet classifier
(such as ipfw) would over-ride a fib associated by
a more default source. (such as cases 1 or 2).
4/ a tcp listen socket associated with a fib will generate
accept sockets that are associated with that same fib.
5/ Packets generated in response to some other packet (e.g. reset
or icmp packets). These should use the FIB associated with the
packet being reponded to.
6/ Packets generated during encapsulation.
gif, tun and other tunnel interfaces will encapsulate using the FIB
that was in effect withthe proces that set up the tunnel.
thus setfib 1 ifconfig gif0 [tunnel instructions]
will set the fib for the tunnel to use to be fib 1.
Routing messages would be associated with their
process, and thus select one FIB or another.
messages from the kernel would be associated with the fib they
refer to and would only be received by a routing socket associated
with that fib. (not yet implemented)
In addition Netstat has been edited to be able to cope with the
fact that the array is now 2 dimensional. (It looks in system
memory using libkvm (!)). Old versions of netstat see only the first FIB.
In addition two sysctls are added to give:
a) the number of FIBs compiled in (active)
b) the default FIB of the calling process.
Early testing experience:
-------------------------
Basically our (IronPort's) appliance does this functionality already
using ipfw fwd but that method has some drawbacks.
For example,
It can't fully simulate a routing table because it can't influence the
socket's choice of local address when a connect() is done.
Testing during the generating of these changes has been
remarkably smooth so far. Multiple tables have co-existed
with no notable side effects, and packets have been routes
accordingly.
ipfw has grown 2 new keywords:
setfib N ip from anay to any
count ip from any to any fib N
In pf there seems to be a requirement to be able to give symbolic names to the
fibs but I do not have that capacity. I am not sure if it is required.
SCTP has interestingly enough built in support for this, called VRFs
in Cisco parlance. it will be interesting to see how that handles it
when it suddenly actually does something.
Where to next:
--------------------
After committing the ABI compatible version and MFCing it, I'd
like to proceed in a forward direction in -current. this will
result in some roto-tilling in the routing code.
Firstly: the current code's idea of having a separate tree per
protocol family, all of the same format, and pointed to by the
1 dimensional array is a bit silly. Especially when one considers that
there is code that makes assumptions about every protocol having the
same internal structures there. Some protocols don't WANT that
sort of structure. (for example the whole idea of a netmask is foreign
to appletalk). This needs to be made opaque to the external code.
My suggested first change is to add routing method pointers to the
'domain' structure, along with information pointing the data.
instead of having an array of pointers to uniform structures,
there would be an array pointing to the 'domain' structures
for each protocol address domain (protocol family),
and the methods this reached would be called. The methods would have
an argument that gives FIB number, but the protocol would be free
to ignore it.
When the ABI can be changed it raises the possibilty of the
addition of a fib entry into the "struct route". Currently,
the structure contains the sockaddr of the desination, and the resulting
fib entry. To make this work fully, one could add a fib number
so that given an address and a fib, one can find the third element, the
fib entry.
Interaction with the ARP layer/ LL layer would need to be
revisited as well. Qing Li has been working on this already.
This work was sponsored by Ironport Systems/Cisco
Reviewed by: several including rwatson, bz and mlair (parts each)
Obtained from: Ironport systems/Cisco
2008-05-09 23:03:00 +00:00
|
|
|
m->m_pkthdr.rcvif,
|
|
|
|
args->f_id.fib);
|
2004-08-09 16:12:10 +00:00
|
|
|
else
|
|
|
|
match = 1;
|
|
|
|
break;
|
|
|
|
|
2003-07-04 21:42:32 +00:00
|
|
|
case O_IPSEC:
|
2007-07-03 12:13:45 +00:00
|
|
|
#ifdef IPSEC
|
2003-07-04 21:42:32 +00:00
|
|
|
match = (m_tag_find(m,
|
|
|
|
PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL);
|
|
|
|
#endif
|
|
|
|
/* otherwise no match */
|
|
|
|
break;
|
|
|
|
|
2005-09-14 07:53:54 +00:00
|
|
|
#ifdef INET6
|
2005-04-18 18:35:05 +00:00
|
|
|
case O_IP6_SRC:
|
|
|
|
match = is_ipv6 &&
|
|
|
|
IN6_ARE_ADDR_EQUAL(&args->f_id.src_ip6,
|
|
|
|
&((ipfw_insn_ip6 *)cmd)->addr6);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case O_IP6_DST:
|
|
|
|
match = is_ipv6 &&
|
|
|
|
IN6_ARE_ADDR_EQUAL(&args->f_id.dst_ip6,
|
|
|
|
&((ipfw_insn_ip6 *)cmd)->addr6);
|
|
|
|
break;
|
|
|
|
case O_IP6_SRC_MASK:
|
|
|
|
case O_IP6_DST_MASK:
|
|
|
|
if (is_ipv6) {
|
2006-09-16 10:27:05 +00:00
|
|
|
int i = cmdlen - 1;
|
|
|
|
struct in6_addr p;
|
|
|
|
struct in6_addr *d =
|
|
|
|
&((ipfw_insn_ip6 *)cmd)->addr6;
|
|
|
|
|
|
|
|
for (; !match && i > 0; d += 2,
|
|
|
|
i -= F_INSN_SIZE(struct in6_addr)
|
|
|
|
* 2) {
|
|
|
|
p = (cmd->opcode ==
|
|
|
|
O_IP6_SRC_MASK) ?
|
|
|
|
args->f_id.src_ip6:
|
|
|
|
args->f_id.dst_ip6;
|
|
|
|
APPLY_MASK(&p, &d[1]);
|
|
|
|
match =
|
|
|
|
IN6_ARE_ADDR_EQUAL(&d[0],
|
|
|
|
&p);
|
|
|
|
}
|
2005-04-18 18:35:05 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case O_FLOW6ID:
|
|
|
|
match = is_ipv6 &&
|
|
|
|
flow6id_match(args->f_id.flow_id6,
|
|
|
|
(ipfw_insn_u32 *) cmd);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case O_EXT_HDR:
|
|
|
|
match = is_ipv6 &&
|
|
|
|
(ext_hd & ((ipfw_insn *) cmd)->arg1);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case O_IP6:
|
|
|
|
match = is_ipv6;
|
|
|
|
break;
|
2005-04-19 09:56:14 +00:00
|
|
|
#endif
|
2005-04-18 18:35:05 +00:00
|
|
|
|
2005-06-03 01:10:28 +00:00
|
|
|
case O_IP4:
|
|
|
|
match = is_ipv4;
|
|
|
|
break;
|
|
|
|
|
2006-06-15 09:39:22 +00:00
|
|
|
case O_TAG: {
|
2010-01-04 19:01:22 +00:00
|
|
|
struct m_tag *mtag;
|
2006-06-15 09:39:22 +00:00
|
|
|
uint32_t tag = (cmd->arg1 == IP_FW_TABLEARG) ?
|
|
|
|
tablearg : cmd->arg1;
|
|
|
|
|
2006-05-24 13:09:55 +00:00
|
|
|
/* Packet is already tagged with this tag? */
|
2006-06-15 09:39:22 +00:00
|
|
|
mtag = m_tag_locate(m, MTAG_IPFW, tag, NULL);
|
|
|
|
|
2006-05-24 13:09:55 +00:00
|
|
|
/* We have `untag' action when F_NOT flag is
|
|
|
|
* present. And we must remove this mtag from
|
|
|
|
* mbuf and reset `match' to zero (`match' will
|
|
|
|
* be inversed later).
|
|
|
|
* Otherwise we should allocate new mtag and
|
|
|
|
* push it into mbuf.
|
|
|
|
*/
|
|
|
|
if (cmd->len & F_NOT) { /* `untag' action */
|
|
|
|
if (mtag != NULL)
|
|
|
|
m_tag_delete(m, mtag);
|
2010-01-04 19:01:22 +00:00
|
|
|
match = 0;
|
2011-04-12 15:20:34 +00:00
|
|
|
} else {
|
|
|
|
if (mtag == NULL) {
|
|
|
|
mtag = m_tag_alloc( MTAG_IPFW,
|
|
|
|
tag, 0, M_NOWAIT);
|
|
|
|
if (mtag != NULL)
|
|
|
|
m_tag_prepend(m, mtag);
|
|
|
|
}
|
2010-01-04 19:01:22 +00:00
|
|
|
match = 1;
|
2006-05-24 13:09:55 +00:00
|
|
|
}
|
|
|
|
break;
|
2006-06-15 09:39:22 +00:00
|
|
|
}
|
|
|
|
|
Add code to allow the system to handle multiple routing tables.
This particular implementation is designed to be fully backwards compatible
and to be MFC-able to 7.x (and 6.x)
Currently the only protocol that can make use of the multiple tables is IPv4
Similar functionality exists in OpenBSD and Linux.
From my notes:
-----
One thing where FreeBSD has been falling behind, and which by chance I
have some time to work on is "policy based routing", which allows
different
packet streams to be routed by more than just the destination address.
Constraints:
------------
I want to make some form of this available in the 6.x tree
(and by extension 7.x) , but FreeBSD in general needs it so I might as
well do it in -current and back port the portions I need.
One of the ways that this can be done is to have the ability to
instantiate multiple kernel routing tables (which I will now
refer to as "Forwarding Information Bases" or "FIBs" for political
correctness reasons). Which FIB a particular packet uses to make
the next hop decision can be decided by a number of mechanisms.
The policies these mechanisms implement are the "Policies" referred
to in "Policy based routing".
One of the constraints I have if I try to back port this work to
6.x is that it must be implemented as a EXTENSION to the existing
ABIs in 6.x so that third party applications do not need to be
recompiled in timespan of the branch.
This first version will not have some of the bells and whistles that
will come with later versions. It will, for example, be limited to 16
tables in the first commit.
Implementation method, Compatible version. (part 1)
-------------------------------
For this reason I have implemented a "sufficient subset" of a
multiple routing table solution in Perforce, and back-ported it
to 6.x. (also in Perforce though not always caught up with what I
have done in -current/P4). The subset allows a number of FIBs
to be defined at compile time (8 is sufficient for my purposes in 6.x)
and implements the changes needed to allow IPV4 to use them. I have not
done the changes for ipv6 simply because I do not need it, and I do not
have enough knowledge of ipv6 (e.g. neighbor discovery) needed to do it.
Other protocol families are left untouched and should there be
users with proprietary protocol families, they should continue to work
and be oblivious to the existence of the extra FIBs.
To understand how this is done, one must know that the current FIB
code starts everything off with a single dimensional array of
pointers to FIB head structures (One per protocol family), each of
which in turn points to the trie of routes available to that family.
The basic change in the ABI compatible version of the change is to
extent that array to be a 2 dimensional array, so that
instead of protocol family X looking at rt_tables[X] for the
table it needs, it looks at rt_tables[Y][X] when for all
protocol families except ipv4 Y is always 0.
Code that is unaware of the change always just sees the first row
of the table, which of course looks just like the one dimensional
array that existed before.
The entry points rtrequest(), rtalloc(), rtalloc1(), rtalloc_ign()
are all maintained, but refer only to the first row of the array,
so that existing callers in proprietary protocols can continue to
do the "right thing".
Some new entry points are added, for the exclusive use of ipv4 code
called in_rtrequest(), in_rtalloc(), in_rtalloc1() and in_rtalloc_ign(),
which have an extra argument which refers the code to the correct row.
In addition, there are some new entry points (currently called
rtalloc_fib() and friends) that check the Address family being
looked up and call either rtalloc() (and friends) if the protocol
is not IPv4 forcing the action to row 0 or to the appropriate row
if it IS IPv4 (and that info is available). These are for calling
from code that is not specific to any particular protocol. The way
these are implemented would change in the non ABI preserving code
to be added later.
One feature of the first version of the code is that for ipv4,
the interface routes show up automatically on all the FIBs, so
that no matter what FIB you select you always have the basic
direct attached hosts available to you. (rtinit() does this
automatically).
You CAN delete an interface route from one FIB should you want
to but by default it's there. ARP information is also available
in each FIB. It's assumed that the same machine would have the
same MAC address, regardless of which FIB you are using to get
to it.
This brings us as to how the correct FIB is selected for an outgoing
IPV4 packet.
Firstly, all packets have a FIB associated with them. if nothing
has been done to change it, it will be FIB 0. The FIB is changed
in the following ways.
Packets fall into one of a number of classes.
1/ locally generated packets, coming from a socket/PCB.
Such packets select a FIB from a number associated with the
socket/PCB. This in turn is inherited from the process,
but can be changed by a socket option. The process in turn
inherits it on fork. I have written a utility call setfib
that acts a bit like nice..
setfib -3 ping target.example.com # will use fib 3 for ping.
It is an obvious extension to make it a property of a jail
but I have not done so. It can be achieved by combining the setfib and
jail commands.
2/ packets received on an interface for forwarding.
By default these packets would use table 0,
(or possibly a number settable in a sysctl(not yet)).
but prior to routing the firewall can inspect them (see below).
(possibly in the future you may be able to associate a FIB
with packets received on an interface.. An ifconfig arg, but not yet.)
3/ packets inspected by a packet classifier, which can arbitrarily
associate a fib with it on a packet by packet basis.
A fib assigned to a packet by a packet classifier
(such as ipfw) would over-ride a fib associated by
a more default source. (such as cases 1 or 2).
4/ a tcp listen socket associated with a fib will generate
accept sockets that are associated with that same fib.
5/ Packets generated in response to some other packet (e.g. reset
or icmp packets). These should use the FIB associated with the
packet being reponded to.
6/ Packets generated during encapsulation.
gif, tun and other tunnel interfaces will encapsulate using the FIB
that was in effect withthe proces that set up the tunnel.
thus setfib 1 ifconfig gif0 [tunnel instructions]
will set the fib for the tunnel to use to be fib 1.
Routing messages would be associated with their
process, and thus select one FIB or another.
messages from the kernel would be associated with the fib they
refer to and would only be received by a routing socket associated
with that fib. (not yet implemented)
In addition Netstat has been edited to be able to cope with the
fact that the array is now 2 dimensional. (It looks in system
memory using libkvm (!)). Old versions of netstat see only the first FIB.
In addition two sysctls are added to give:
a) the number of FIBs compiled in (active)
b) the default FIB of the calling process.
Early testing experience:
-------------------------
Basically our (IronPort's) appliance does this functionality already
using ipfw fwd but that method has some drawbacks.
For example,
It can't fully simulate a routing table because it can't influence the
socket's choice of local address when a connect() is done.
Testing during the generating of these changes has been
remarkably smooth so far. Multiple tables have co-existed
with no notable side effects, and packets have been routes
accordingly.
ipfw has grown 2 new keywords:
setfib N ip from anay to any
count ip from any to any fib N
In pf there seems to be a requirement to be able to give symbolic names to the
fibs but I do not have that capacity. I am not sure if it is required.
SCTP has interestingly enough built in support for this, called VRFs
in Cisco parlance. it will be interesting to see how that handles it
when it suddenly actually does something.
Where to next:
--------------------
After committing the ABI compatible version and MFCing it, I'd
like to proceed in a forward direction in -current. this will
result in some roto-tilling in the routing code.
Firstly: the current code's idea of having a separate tree per
protocol family, all of the same format, and pointed to by the
1 dimensional array is a bit silly. Especially when one considers that
there is code that makes assumptions about every protocol having the
same internal structures there. Some protocols don't WANT that
sort of structure. (for example the whole idea of a netmask is foreign
to appletalk). This needs to be made opaque to the external code.
My suggested first change is to add routing method pointers to the
'domain' structure, along with information pointing the data.
instead of having an array of pointers to uniform structures,
there would be an array pointing to the 'domain' structures
for each protocol address domain (protocol family),
and the methods this reached would be called. The methods would have
an argument that gives FIB number, but the protocol would be free
to ignore it.
When the ABI can be changed it raises the possibilty of the
addition of a fib entry into the "struct route". Currently,
the structure contains the sockaddr of the desination, and the resulting
fib entry. To make this work fully, one could add a fib number
so that given an address and a fib, one can find the third element, the
fib entry.
Interaction with the ARP layer/ LL layer would need to be
revisited as well. Qing Li has been working on this already.
This work was sponsored by Ironport Systems/Cisco
Reviewed by: several including rwatson, bz and mlair (parts each)
Obtained from: Ironport systems/Cisco
2008-05-09 23:03:00 +00:00
|
|
|
case O_FIB: /* try match the specified fib */
|
|
|
|
if (args->f_id.fib == cmd->arg1)
|
|
|
|
match = 1;
|
|
|
|
break;
|
|
|
|
|
2010-11-12 13:05:17 +00:00
|
|
|
case O_SOCKARG: {
|
|
|
|
struct inpcb *inp = args->inp;
|
|
|
|
struct inpcbinfo *pi;
|
|
|
|
|
|
|
|
if (is_ipv6) /* XXX can we remove this ? */
|
|
|
|
break;
|
|
|
|
|
|
|
|
if (proto == IPPROTO_TCP)
|
|
|
|
pi = &V_tcbinfo;
|
|
|
|
else if (proto == IPPROTO_UDP)
|
|
|
|
pi = &V_udbinfo;
|
|
|
|
else
|
|
|
|
break;
|
|
|
|
|
Decompose the current single inpcbinfo lock into two locks:
- The existing ipi_lock continues to protect the global inpcb list and
inpcb counter. This lock is now relegated to a small number of
allocation and free operations, and occasional operations that walk
all connections (including, awkwardly, certain UDP multicast receive
operations -- something to revisit).
- A new ipi_hash_lock protects the two inpcbinfo hash tables for
looking up connections and bound sockets, manipulated using new
INP_HASH_*() macros. This lock, combined with inpcb locks, protects
the 4-tuple address space.
Unlike the current ipi_lock, ipi_hash_lock follows the individual inpcb
connection locks, so may be acquired while manipulating a connection on
which a lock is already held, avoiding the need to acquire the inpcbinfo
lock preemptively when a binding change might later be required. As a
result, however, lookup operations necessarily go through a reference
acquire while holding the lookup lock, later acquiring an inpcb lock --
if required.
A new function in_pcblookup() looks up connections, and accepts flags
indicating how to return the inpcb. Due to lock order changes, callers
no longer need acquire locks before performing a lookup: the lookup
routine will acquire the ipi_hash_lock as needed. In the future, it will
also be able to use alternative lookup and locking strategies
transparently to callers, such as pcbgroup lookup. New lookup flags are,
supplementing the existing INPLOOKUP_WILDCARD flag:
INPLOOKUP_RLOCKPCB - Acquire a read lock on the returned inpcb
INPLOOKUP_WLOCKPCB - Acquire a write lock on the returned inpcb
Callers must pass exactly one of these flags (for the time being).
Some notes:
- All protocols are updated to work within the new regime; especially,
TCP, UDPv4, and UDPv6. pcbinfo ipi_lock acquisitions are largely
eliminated, and global hash lock hold times are dramatically reduced
compared to previous locking.
- The TCP syncache still relies on the pcbinfo lock, something that we
may want to revisit.
- Support for reverting to the FreeBSD 7.x locking strategy in TCP input
is no longer available -- hash lookup locks are now held only very
briefly during inpcb lookup, rather than for potentially extended
periods. However, the pcbinfo ipi_lock will still be acquired if a
connection state might change such that a connection is added or
removed.
- Raw IP sockets continue to use the pcbinfo ipi_lock for protection,
due to maintaining their own hash tables.
- The interface in6_pcblookup_hash_locked() is maintained, which allows
callers to acquire hash locks and perform one or more lookups atomically
with 4-tuple allocation: this is required only for TCPv6, as there is no
in6_pcbconnect_setup(), which there should be.
- UDPv6 locking remains significantly more conservative than UDPv4
locking, which relates to source address selection. This needs
attention, as it likely significantly reduces parallelism in this code
for multithreaded socket use (such as in BIND).
- In the UDPv4 and UDPv6 multicast cases, we need to revisit locking
somewhat, as they relied on ipi_lock to stablise 4-tuple matches, which
is no longer sufficient. A second check once the inpcb lock is held
should do the trick, keeping the general case from requiring the inpcb
lock for every inpcb visited.
- This work reminds us that we need to revisit locking of the v4/v6 flags,
which may be accessed lock-free both before and after this change.
- Right now, a single lock name is used for the pcbhash lock -- this is
undesirable, and probably another argument is required to take care of
this (or a char array name field in the pcbinfo?).
This is not an MFC candidate for 8.x due to its impact on lookup and
locking semantics. It's possible some of these issues could be worked
around with compatibility wrappers, if necessary.
Reviewed by: bz
Sponsored by: Juniper Networks, Inc.
2011-05-30 09:43:55 +00:00
|
|
|
/*
|
|
|
|
* XXXRW: so_user_cookie should almost
|
|
|
|
* certainly be inp_user_cookie?
|
|
|
|
*/
|
|
|
|
|
2010-11-12 13:05:17 +00:00
|
|
|
/* For incomming packet, lookup up the
|
|
|
|
inpcb using the src/dest ip/port tuple */
|
|
|
|
if (inp == NULL) {
|
Decompose the current single inpcbinfo lock into two locks:
- The existing ipi_lock continues to protect the global inpcb list and
inpcb counter. This lock is now relegated to a small number of
allocation and free operations, and occasional operations that walk
all connections (including, awkwardly, certain UDP multicast receive
operations -- something to revisit).
- A new ipi_hash_lock protects the two inpcbinfo hash tables for
looking up connections and bound sockets, manipulated using new
INP_HASH_*() macros. This lock, combined with inpcb locks, protects
the 4-tuple address space.
Unlike the current ipi_lock, ipi_hash_lock follows the individual inpcb
connection locks, so may be acquired while manipulating a connection on
which a lock is already held, avoiding the need to acquire the inpcbinfo
lock preemptively when a binding change might later be required. As a
result, however, lookup operations necessarily go through a reference
acquire while holding the lookup lock, later acquiring an inpcb lock --
if required.
A new function in_pcblookup() looks up connections, and accepts flags
indicating how to return the inpcb. Due to lock order changes, callers
no longer need acquire locks before performing a lookup: the lookup
routine will acquire the ipi_hash_lock as needed. In the future, it will
also be able to use alternative lookup and locking strategies
transparently to callers, such as pcbgroup lookup. New lookup flags are,
supplementing the existing INPLOOKUP_WILDCARD flag:
INPLOOKUP_RLOCKPCB - Acquire a read lock on the returned inpcb
INPLOOKUP_WLOCKPCB - Acquire a write lock on the returned inpcb
Callers must pass exactly one of these flags (for the time being).
Some notes:
- All protocols are updated to work within the new regime; especially,
TCP, UDPv4, and UDPv6. pcbinfo ipi_lock acquisitions are largely
eliminated, and global hash lock hold times are dramatically reduced
compared to previous locking.
- The TCP syncache still relies on the pcbinfo lock, something that we
may want to revisit.
- Support for reverting to the FreeBSD 7.x locking strategy in TCP input
is no longer available -- hash lookup locks are now held only very
briefly during inpcb lookup, rather than for potentially extended
periods. However, the pcbinfo ipi_lock will still be acquired if a
connection state might change such that a connection is added or
removed.
- Raw IP sockets continue to use the pcbinfo ipi_lock for protection,
due to maintaining their own hash tables.
- The interface in6_pcblookup_hash_locked() is maintained, which allows
callers to acquire hash locks and perform one or more lookups atomically
with 4-tuple allocation: this is required only for TCPv6, as there is no
in6_pcbconnect_setup(), which there should be.
- UDPv6 locking remains significantly more conservative than UDPv4
locking, which relates to source address selection. This needs
attention, as it likely significantly reduces parallelism in this code
for multithreaded socket use (such as in BIND).
- In the UDPv4 and UDPv6 multicast cases, we need to revisit locking
somewhat, as they relied on ipi_lock to stablise 4-tuple matches, which
is no longer sufficient. A second check once the inpcb lock is held
should do the trick, keeping the general case from requiring the inpcb
lock for every inpcb visited.
- This work reminds us that we need to revisit locking of the v4/v6 flags,
which may be accessed lock-free both before and after this change.
- Right now, a single lock name is used for the pcbhash lock -- this is
undesirable, and probably another argument is required to take care of
this (or a char array name field in the pcbinfo?).
This is not an MFC candidate for 8.x due to its impact on lookup and
locking semantics. It's possible some of these issues could be worked
around with compatibility wrappers, if necessary.
Reviewed by: bz
Sponsored by: Juniper Networks, Inc.
2011-05-30 09:43:55 +00:00
|
|
|
inp = in_pcblookup(pi,
|
2010-11-12 13:05:17 +00:00
|
|
|
src_ip, htons(src_port),
|
|
|
|
dst_ip, htons(dst_port),
|
Decompose the current single inpcbinfo lock into two locks:
- The existing ipi_lock continues to protect the global inpcb list and
inpcb counter. This lock is now relegated to a small number of
allocation and free operations, and occasional operations that walk
all connections (including, awkwardly, certain UDP multicast receive
operations -- something to revisit).
- A new ipi_hash_lock protects the two inpcbinfo hash tables for
looking up connections and bound sockets, manipulated using new
INP_HASH_*() macros. This lock, combined with inpcb locks, protects
the 4-tuple address space.
Unlike the current ipi_lock, ipi_hash_lock follows the individual inpcb
connection locks, so may be acquired while manipulating a connection on
which a lock is already held, avoiding the need to acquire the inpcbinfo
lock preemptively when a binding change might later be required. As a
result, however, lookup operations necessarily go through a reference
acquire while holding the lookup lock, later acquiring an inpcb lock --
if required.
A new function in_pcblookup() looks up connections, and accepts flags
indicating how to return the inpcb. Due to lock order changes, callers
no longer need acquire locks before performing a lookup: the lookup
routine will acquire the ipi_hash_lock as needed. In the future, it will
also be able to use alternative lookup and locking strategies
transparently to callers, such as pcbgroup lookup. New lookup flags are,
supplementing the existing INPLOOKUP_WILDCARD flag:
INPLOOKUP_RLOCKPCB - Acquire a read lock on the returned inpcb
INPLOOKUP_WLOCKPCB - Acquire a write lock on the returned inpcb
Callers must pass exactly one of these flags (for the time being).
Some notes:
- All protocols are updated to work within the new regime; especially,
TCP, UDPv4, and UDPv6. pcbinfo ipi_lock acquisitions are largely
eliminated, and global hash lock hold times are dramatically reduced
compared to previous locking.
- The TCP syncache still relies on the pcbinfo lock, something that we
may want to revisit.
- Support for reverting to the FreeBSD 7.x locking strategy in TCP input
is no longer available -- hash lookup locks are now held only very
briefly during inpcb lookup, rather than for potentially extended
periods. However, the pcbinfo ipi_lock will still be acquired if a
connection state might change such that a connection is added or
removed.
- Raw IP sockets continue to use the pcbinfo ipi_lock for protection,
due to maintaining their own hash tables.
- The interface in6_pcblookup_hash_locked() is maintained, which allows
callers to acquire hash locks and perform one or more lookups atomically
with 4-tuple allocation: this is required only for TCPv6, as there is no
in6_pcbconnect_setup(), which there should be.
- UDPv6 locking remains significantly more conservative than UDPv4
locking, which relates to source address selection. This needs
attention, as it likely significantly reduces parallelism in this code
for multithreaded socket use (such as in BIND).
- In the UDPv4 and UDPv6 multicast cases, we need to revisit locking
somewhat, as they relied on ipi_lock to stablise 4-tuple matches, which
is no longer sufficient. A second check once the inpcb lock is held
should do the trick, keeping the general case from requiring the inpcb
lock for every inpcb visited.
- This work reminds us that we need to revisit locking of the v4/v6 flags,
which may be accessed lock-free both before and after this change.
- Right now, a single lock name is used for the pcbhash lock -- this is
undesirable, and probably another argument is required to take care of
this (or a char array name field in the pcbinfo?).
This is not an MFC candidate for 8.x due to its impact on lookup and
locking semantics. It's possible some of these issues could be worked
around with compatibility wrappers, if necessary.
Reviewed by: bz
Sponsored by: Juniper Networks, Inc.
2011-05-30 09:43:55 +00:00
|
|
|
INPLOOKUP_RLOCKPCB, NULL);
|
|
|
|
if (inp != NULL) {
|
|
|
|
tablearg =
|
|
|
|
inp->inp_socket->so_user_cookie;
|
|
|
|
if (tablearg)
|
|
|
|
match = 1;
|
|
|
|
INP_RUNLOCK(inp);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (inp->inp_socket) {
|
|
|
|
tablearg =
|
|
|
|
inp->inp_socket->so_user_cookie;
|
|
|
|
if (tablearg)
|
|
|
|
match = 1;
|
|
|
|
}
|
2010-11-12 13:05:17 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2006-06-15 09:39:22 +00:00
|
|
|
case O_TAGGED: {
|
2010-01-04 19:01:22 +00:00
|
|
|
struct m_tag *mtag;
|
2006-06-15 09:39:22 +00:00
|
|
|
uint32_t tag = (cmd->arg1 == IP_FW_TABLEARG) ?
|
|
|
|
tablearg : cmd->arg1;
|
2006-05-24 13:09:55 +00:00
|
|
|
|
|
|
|
if (cmdlen == 1) {
|
2006-06-15 09:39:22 +00:00
|
|
|
match = m_tag_locate(m, MTAG_IPFW,
|
|
|
|
tag, NULL) != NULL;
|
2006-05-24 13:09:55 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* we have ranges */
|
|
|
|
for (mtag = m_tag_first(m);
|
|
|
|
mtag != NULL && !match;
|
|
|
|
mtag = m_tag_next(m, mtag)) {
|
|
|
|
uint16_t *p;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (mtag->m_tag_cookie != MTAG_IPFW)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
p = ((ipfw_insn_u16 *)cmd)->ports;
|
|
|
|
i = cmdlen - 1;
|
|
|
|
for(; !match && i > 0; i--, p += 2)
|
|
|
|
match =
|
|
|
|
mtag->m_tag_id >= p[0] &&
|
|
|
|
mtag->m_tag_id <= p[1];
|
|
|
|
}
|
|
|
|
break;
|
2006-06-15 09:39:22 +00:00
|
|
|
}
|
2006-05-24 13:09:55 +00:00
|
|
|
|
2002-07-08 22:46:01 +00:00
|
|
|
/*
|
|
|
|
* The second set of opcodes represents 'actions',
|
|
|
|
* i.e. the terminal part of a rule once the packet
|
|
|
|
* matches all previous patterns.
|
|
|
|
* Typically there is only one action for each rule,
|
|
|
|
* and the opcode is stored at the end of the rule
|
|
|
|
* (but there are exceptions -- see below).
|
|
|
|
*
|
|
|
|
* In general, here we set retval and terminate the
|
|
|
|
* outer loop (would be a 'break 3' in some language,
|
2009-12-03 14:22:15 +00:00
|
|
|
* but we need to set l=0, done=1)
|
2002-07-08 22:46:01 +00:00
|
|
|
*
|
|
|
|
* Exceptions:
|
|
|
|
* O_COUNT and O_SKIPTO actions:
|
|
|
|
* instead of terminating, we jump to the next rule
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
* (setting l=0), or to the SKIPTO target (setting
|
|
|
|
* f/f_len, cmd and l as needed), respectively.
|
2002-07-08 22:46:01 +00:00
|
|
|
*
|
2006-05-24 13:09:55 +00:00
|
|
|
* O_TAG, O_LOG and O_ALTQ action parameters:
|
2004-10-03 00:17:46 +00:00
|
|
|
* perform some action and set match = 1;
|
|
|
|
*
|
2002-07-08 22:46:01 +00:00
|
|
|
* O_LIMIT and O_KEEP_STATE: these opcodes are
|
|
|
|
* not real 'actions', and are stored right
|
|
|
|
* before the 'action' part of the rule.
|
|
|
|
* These opcodes try to install an entry in the
|
|
|
|
* state tables; if successful, we continue with
|
|
|
|
* the next opcode (match=1; break;), otherwise
|
2009-12-03 14:22:15 +00:00
|
|
|
* the packet must be dropped (set retval,
|
|
|
|
* break loops with l=0, done=1)
|
2002-07-08 22:46:01 +00:00
|
|
|
*
|
|
|
|
* O_PROBE_STATE and O_CHECK_STATE: these opcodes
|
|
|
|
* cause a lookup of the state table, and a jump
|
|
|
|
* to the 'action' part of the parent rule
|
2009-12-03 14:22:15 +00:00
|
|
|
* if an entry is found, or
|
2002-07-08 22:46:01 +00:00
|
|
|
* (CHECK_STATE only) a jump to the next rule if
|
2009-12-03 14:22:15 +00:00
|
|
|
* the entry is not found.
|
|
|
|
* The result of the lookup is cached so that
|
|
|
|
* further instances of these opcodes become NOPs.
|
|
|
|
* The jump to the next rule is done by setting
|
|
|
|
* l=0, cmdlen=0.
|
2002-07-08 22:46:01 +00:00
|
|
|
*/
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
case O_LIMIT:
|
|
|
|
case O_KEEP_STATE:
|
2009-12-16 10:48:40 +00:00
|
|
|
if (ipfw_install_state(f,
|
2006-06-15 09:39:22 +00:00
|
|
|
(ipfw_insn_limit *)cmd, args, tablearg)) {
|
2009-12-03 14:22:15 +00:00
|
|
|
/* error or limit violation */
|
2005-01-14 09:00:46 +00:00
|
|
|
retval = IP_FW_DENY;
|
2009-12-03 14:22:15 +00:00
|
|
|
l = 0; /* exit inner loop */
|
|
|
|
done = 1; /* exit outer loop */
|
2002-07-08 22:46:01 +00:00
|
|
|
}
|
|
|
|
match = 1;
|
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
|
|
|
case O_PROBE_STATE:
|
|
|
|
case O_CHECK_STATE:
|
|
|
|
/*
|
|
|
|
* dynamic rules are checked at the first
|
2002-07-08 22:46:01 +00:00
|
|
|
* keep-state or check-state occurrence,
|
|
|
|
* with the result being stored in dyn_dir.
|
|
|
|
* The compiler introduces a PROBE_STATE
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
* instruction for us when we have a
|
2002-07-08 22:46:01 +00:00
|
|
|
* KEEP_STATE (because PROBE_STATE needs
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
* to be run first).
|
|
|
|
*/
|
2002-07-08 22:46:01 +00:00
|
|
|
if (dyn_dir == MATCH_UNKNOWN &&
|
2009-12-16 10:48:40 +00:00
|
|
|
(q = ipfw_lookup_dyn_rule(&args->f_id,
|
Implement keepalives for dynamic rules, so they will not expire
just because you leave your session idle.
Also, put in a fix for 64-bit architectures (to be revised).
In detail:
ip_fw.h
* Reorder fields in struct ip_fw to avoid alignment problems on
64-bit machines. This only masks the problem, I am still not
sure whether I am doing something wrong in the code or there
is a problem elsewhere (e.g. different aligmnent of structures
between userland and kernel because of pragmas etc.)
* added fields in dyn_rule to store ack numbers, so we can
generate keepalives when the dynamic rule is about to expire
ip_fw2.c
* use a local function, send_pkt(), to generate TCP RST for Reset rules;
* save about 250 bytes by cleaning up the various snprintf()
in ipfw_log() ...
* ... and use twice as many bytes to implement keepalives
(this seems to be working, but i have not tested it extensively).
Keepalives are generated once every 5 seconds for the last 20 seconds
of the lifetime of a dynamic rule for an established TCP flow. The
packets are sent to both sides, so if at least one of the endpoints
is responding, the timeout is refreshed and the rule will not expire.
You can disable this feature with
sysctl net.inet.ip.fw.dyn_keepalive=0
(the default is 1, to have them enabled).
MFC after: 1 day
(just kidding... I will supply an updated version of ipfw2 for
RELENG_4 tomorrow).
2002-07-14 23:47:18 +00:00
|
|
|
&dyn_dir, proto == IPPROTO_TCP ?
|
2005-04-15 00:47:44 +00:00
|
|
|
TCP(ulp) : NULL))
|
Implement keepalives for dynamic rules, so they will not expire
just because you leave your session idle.
Also, put in a fix for 64-bit architectures (to be revised).
In detail:
ip_fw.h
* Reorder fields in struct ip_fw to avoid alignment problems on
64-bit machines. This only masks the problem, I am still not
sure whether I am doing something wrong in the code or there
is a problem elsewhere (e.g. different aligmnent of structures
between userland and kernel because of pragmas etc.)
* added fields in dyn_rule to store ack numbers, so we can
generate keepalives when the dynamic rule is about to expire
ip_fw2.c
* use a local function, send_pkt(), to generate TCP RST for Reset rules;
* save about 250 bytes by cleaning up the various snprintf()
in ipfw_log() ...
* ... and use twice as many bytes to implement keepalives
(this seems to be working, but i have not tested it extensively).
Keepalives are generated once every 5 seconds for the last 20 seconds
of the lifetime of a dynamic rule for an established TCP flow. The
packets are sent to both sides, so if at least one of the endpoints
is responding, the timeout is refreshed and the rule will not expire.
You can disable this feature with
sysctl net.inet.ip.fw.dyn_keepalive=0
(the default is 1, to have them enabled).
MFC after: 1 day
(just kidding... I will supply an updated version of ipfw2 for
RELENG_4 tomorrow).
2002-07-14 23:47:18 +00:00
|
|
|
!= NULL) {
|
2002-07-08 22:46:01 +00:00
|
|
|
/*
|
|
|
|
* Found dynamic entry, update stats
|
|
|
|
* and jump to the 'action' part of
|
2009-12-03 14:22:15 +00:00
|
|
|
* the parent rule by setting
|
|
|
|
* f, cmd, l and clearing cmdlen.
|
2002-07-08 22:46:01 +00:00
|
|
|
*/
|
|
|
|
q->pcnt++;
|
2003-06-02 23:54:09 +00:00
|
|
|
q->bcnt += pktlen;
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
/* XXX we would like to have f_pos
|
|
|
|
* readily accessible in the dynamic
|
|
|
|
* rule, instead of having to
|
|
|
|
* lookup q->rule.
|
|
|
|
*/
|
2002-07-08 22:46:01 +00:00
|
|
|
f = q->rule;
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
f_pos = ipfw_find_rule(chain,
|
|
|
|
f->rulenum, f->id);
|
2002-07-08 22:46:01 +00:00
|
|
|
cmd = ACTION_PTR(f);
|
|
|
|
l = f->cmd_len - f->act_ofs;
|
2009-12-15 16:15:14 +00:00
|
|
|
ipfw_dyn_unlock();
|
2009-12-03 14:22:15 +00:00
|
|
|
cmdlen = 0;
|
|
|
|
match = 1;
|
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
}
|
2002-07-08 22:46:01 +00:00
|
|
|
/*
|
|
|
|
* Dynamic entry not found. If CHECK_STATE,
|
|
|
|
* skip to next rule, if PROBE_STATE just
|
|
|
|
* ignore and continue with next opcode.
|
|
|
|
*/
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
if (cmd->opcode == O_CHECK_STATE)
|
2009-12-03 14:22:15 +00:00
|
|
|
l = 0; /* exit inner loop */
|
2002-07-08 22:46:01 +00:00
|
|
|
match = 1;
|
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
|
|
|
case O_ACCEPT:
|
|
|
|
retval = 0; /* accept */
|
2009-12-03 14:22:15 +00:00
|
|
|
l = 0; /* exit inner loop */
|
|
|
|
done = 1; /* exit outer loop */
|
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
|
|
|
case O_PIPE:
|
|
|
|
case O_QUEUE:
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
set_match(args, f_pos, chain);
|
2010-01-04 19:01:22 +00:00
|
|
|
args->rule.info = (cmd->arg1 == IP_FW_TABLEARG) ?
|
2009-12-22 13:53:34 +00:00
|
|
|
tablearg : cmd->arg1;
|
2010-01-04 19:01:22 +00:00
|
|
|
if (cmd->opcode == O_PIPE)
|
|
|
|
args->rule.info |= IPFW_IS_PIPE;
|
|
|
|
if (V_fw_one_pass)
|
|
|
|
args->rule.info |= IPFW_ONEPASS;
|
2005-01-14 09:00:46 +00:00
|
|
|
retval = IP_FW_DUMMYNET;
|
2009-12-03 14:22:15 +00:00
|
|
|
l = 0; /* exit inner loop */
|
|
|
|
done = 1; /* exit outer loop */
|
|
|
|
break;
|
2002-10-23 10:07:55 +00:00
|
|
|
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
case O_DIVERT:
|
2009-12-03 14:22:15 +00:00
|
|
|
case O_TEE:
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
if (args->eh) /* not on layer 2 */
|
2009-12-03 14:22:15 +00:00
|
|
|
break;
|
|
|
|
/* otherwise this is terminal */
|
|
|
|
l = 0; /* exit inner loop */
|
|
|
|
done = 1; /* exit outer loop */
|
2010-01-04 19:01:22 +00:00
|
|
|
retval = (cmd->opcode == O_DIVERT) ?
|
2009-12-03 14:22:15 +00:00
|
|
|
IP_FW_DIVERT : IP_FW_TEE;
|
2010-01-04 19:01:22 +00:00
|
|
|
set_match(args, f_pos, chain);
|
|
|
|
args->rule.info = (cmd->arg1 == IP_FW_TABLEARG) ?
|
|
|
|
tablearg : cmd->arg1;
|
2009-12-03 14:22:15 +00:00
|
|
|
break;
|
|
|
|
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
case O_COUNT:
|
|
|
|
f->pcnt++; /* update stats */
|
2003-06-02 23:54:09 +00:00
|
|
|
f->bcnt += pktlen;
|
2005-09-19 22:31:45 +00:00
|
|
|
f->timestamp = time_uptime;
|
2009-12-23 12:00:50 +00:00
|
|
|
l = 0; /* exit inner loop */
|
|
|
|
break;
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
|
|
|
|
case O_SKIPTO:
|
|
|
|
f->pcnt++; /* update stats */
|
|
|
|
f->bcnt += pktlen;
|
|
|
|
f->timestamp = time_uptime;
|
|
|
|
/* If possible use cached f_pos (in f->next_rule),
|
|
|
|
* whose version is written in f->next_rule
|
|
|
|
* (horrible hacks to avoid changing the ABI).
|
|
|
|
*/
|
|
|
|
if (cmd->arg1 != IP_FW_TABLEARG &&
|
2009-12-23 12:00:50 +00:00
|
|
|
(uintptr_t)f->x_next == chain->id) {
|
|
|
|
f_pos = (uintptr_t)f->next_rule;
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
} else {
|
|
|
|
int i = (cmd->arg1 == IP_FW_TABLEARG) ?
|
|
|
|
tablearg : cmd->arg1;
|
|
|
|
/* make sure we do not jump backward */
|
|
|
|
if (i <= f->rulenum)
|
|
|
|
i = f->rulenum + 1;
|
|
|
|
f_pos = ipfw_find_rule(chain, i, 0);
|
|
|
|
/* update the cache */
|
|
|
|
if (cmd->arg1 != IP_FW_TABLEARG) {
|
2009-12-23 12:00:50 +00:00
|
|
|
f->next_rule =
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
(void *)(uintptr_t)f_pos;
|
|
|
|
f->x_next =
|
|
|
|
(void *)(uintptr_t)chain->id;
|
|
|
|
}
|
2009-12-23 12:00:50 +00:00
|
|
|
}
|
|
|
|
/*
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
* Skip disabled rules, and re-enter
|
|
|
|
* the inner loop with the correct
|
|
|
|
* f_pos, f, l and cmd.
|
2009-12-23 12:00:50 +00:00
|
|
|
* Also clear cmdlen and skip_or
|
|
|
|
*/
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
for (; f_pos < chain->n_rules - 1 &&
|
|
|
|
(V_set_disable &
|
|
|
|
(1 << chain->map[f_pos]->set));
|
|
|
|
f_pos++)
|
|
|
|
;
|
2010-06-29 16:57:30 +00:00
|
|
|
/* Re-enter the inner loop at the skipto rule. */
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
f = chain->map[f_pos];
|
2009-12-23 12:00:50 +00:00
|
|
|
l = f->cmd_len;
|
|
|
|
cmd = f->cmd;
|
|
|
|
match = 1;
|
|
|
|
cmdlen = 0;
|
|
|
|
skip_or = 0;
|
2010-06-29 16:57:30 +00:00
|
|
|
continue;
|
|
|
|
break; /* not reached */
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
|
|
|
case O_REJECT:
|
|
|
|
/*
|
|
|
|
* Drop the packet and send a reject notice
|
|
|
|
* if the packet is not ICMP (or is an ICMP
|
|
|
|
* query), and it is not multicast/broadcast.
|
|
|
|
*/
|
2006-01-11 08:02:16 +00:00
|
|
|
if (hlen > 0 && is_ipv4 && offset == 0 &&
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
(proto != IPPROTO_ICMP ||
|
2005-04-15 00:47:44 +00:00
|
|
|
is_icmp_query(ICMP(ulp))) &&
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
!(m->m_flags & (M_BCAST|M_MCAST)) &&
|
2003-12-16 18:21:47 +00:00
|
|
|
!IN_MULTICAST(ntohl(dst_ip.s_addr))) {
|
2009-12-28 10:47:04 +00:00
|
|
|
send_reject(args, cmd->arg1, iplen, ip);
|
2002-07-05 22:43:06 +00:00
|
|
|
m = args->m;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
}
|
2002-07-08 22:46:01 +00:00
|
|
|
/* FALLTHROUGH */
|
2005-08-13 11:02:34 +00:00
|
|
|
#ifdef INET6
|
|
|
|
case O_UNREACH6:
|
|
|
|
if (hlen > 0 && is_ipv6 &&
|
2006-06-29 11:17:16 +00:00
|
|
|
((offset & IP6F_OFF_MASK) == 0) &&
|
2005-08-13 11:02:34 +00:00
|
|
|
(proto != IPPROTO_ICMPV6 ||
|
2010-03-15 17:14:27 +00:00
|
|
|
(is_icmp6_query(icmp6_type) == 1)) &&
|
2005-08-13 11:02:34 +00:00
|
|
|
!(m->m_flags & (M_BCAST|M_MCAST)) &&
|
|
|
|
!IN6_IS_ADDR_MULTICAST(&args->f_id.dst_ip6)) {
|
2007-01-02 19:57:31 +00:00
|
|
|
send_reject6(
|
|
|
|
args, cmd->arg1, hlen,
|
|
|
|
(struct ip6_hdr *)ip);
|
2005-08-13 11:02:34 +00:00
|
|
|
m = args->m;
|
|
|
|
}
|
|
|
|
/* FALLTHROUGH */
|
|
|
|
#endif
|
2002-07-08 22:46:01 +00:00
|
|
|
case O_DENY:
|
2005-01-14 09:00:46 +00:00
|
|
|
retval = IP_FW_DENY;
|
2009-12-03 14:22:15 +00:00
|
|
|
l = 0; /* exit inner loop */
|
|
|
|
done = 1; /* exit outer loop */
|
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
2009-12-03 14:22:15 +00:00
|
|
|
case O_FORWARD_IP:
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
if (args->eh) /* not valid on layer2 pkts */
|
2002-07-08 22:46:01 +00:00
|
|
|
break;
|
2011-06-01 19:44:52 +00:00
|
|
|
if (q == NULL || q->rule != f ||
|
|
|
|
dyn_dir == MATCH_FORWARD) {
|
2009-12-03 14:22:15 +00:00
|
|
|
struct sockaddr_in *sa;
|
|
|
|
sa = &(((ipfw_insn_sa *)cmd)->sa);
|
|
|
|
if (sa->sin_addr.s_addr == INADDR_ANY) {
|
|
|
|
bcopy(sa, &args->hopstore,
|
2006-08-17 22:49:50 +00:00
|
|
|
sizeof(*sa));
|
2009-12-03 14:22:15 +00:00
|
|
|
args->hopstore.sin_addr.s_addr =
|
2006-08-18 22:36:05 +00:00
|
|
|
htonl(tablearg);
|
2009-12-03 14:22:15 +00:00
|
|
|
args->next_hop = &args->hopstore;
|
|
|
|
} else {
|
|
|
|
args->next_hop = sa;
|
|
|
|
}
|
2006-08-17 22:49:50 +00:00
|
|
|
}
|
2005-01-14 09:00:46 +00:00
|
|
|
retval = IP_FW_PASS;
|
2009-12-03 14:22:15 +00:00
|
|
|
l = 0; /* exit inner loop */
|
|
|
|
done = 1; /* exit outer loop */
|
|
|
|
break;
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
2005-02-05 12:06:33 +00:00
|
|
|
case O_NETGRAPH:
|
|
|
|
case O_NGTEE:
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
set_match(args, f_pos, chain);
|
2010-01-04 19:01:22 +00:00
|
|
|
args->rule.info = (cmd->arg1 == IP_FW_TABLEARG) ?
|
2009-12-22 13:53:34 +00:00
|
|
|
tablearg : cmd->arg1;
|
2010-07-27 14:26:34 +00:00
|
|
|
if (V_fw_one_pass)
|
|
|
|
args->rule.info |= IPFW_ONEPASS;
|
2005-02-05 12:06:33 +00:00
|
|
|
retval = (cmd->opcode == O_NETGRAPH) ?
|
|
|
|
IP_FW_NETGRAPH : IP_FW_NGTEE;
|
2009-12-03 14:22:15 +00:00
|
|
|
l = 0; /* exit inner loop */
|
|
|
|
done = 1; /* exit outer loop */
|
|
|
|
break;
|
2005-02-05 12:06:33 +00:00
|
|
|
|
2011-05-30 05:37:26 +00:00
|
|
|
case O_SETFIB: {
|
|
|
|
uint32_t fib;
|
|
|
|
|
Add code to allow the system to handle multiple routing tables.
This particular implementation is designed to be fully backwards compatible
and to be MFC-able to 7.x (and 6.x)
Currently the only protocol that can make use of the multiple tables is IPv4
Similar functionality exists in OpenBSD and Linux.
From my notes:
-----
One thing where FreeBSD has been falling behind, and which by chance I
have some time to work on is "policy based routing", which allows
different
packet streams to be routed by more than just the destination address.
Constraints:
------------
I want to make some form of this available in the 6.x tree
(and by extension 7.x) , but FreeBSD in general needs it so I might as
well do it in -current and back port the portions I need.
One of the ways that this can be done is to have the ability to
instantiate multiple kernel routing tables (which I will now
refer to as "Forwarding Information Bases" or "FIBs" for political
correctness reasons). Which FIB a particular packet uses to make
the next hop decision can be decided by a number of mechanisms.
The policies these mechanisms implement are the "Policies" referred
to in "Policy based routing".
One of the constraints I have if I try to back port this work to
6.x is that it must be implemented as a EXTENSION to the existing
ABIs in 6.x so that third party applications do not need to be
recompiled in timespan of the branch.
This first version will not have some of the bells and whistles that
will come with later versions. It will, for example, be limited to 16
tables in the first commit.
Implementation method, Compatible version. (part 1)
-------------------------------
For this reason I have implemented a "sufficient subset" of a
multiple routing table solution in Perforce, and back-ported it
to 6.x. (also in Perforce though not always caught up with what I
have done in -current/P4). The subset allows a number of FIBs
to be defined at compile time (8 is sufficient for my purposes in 6.x)
and implements the changes needed to allow IPV4 to use them. I have not
done the changes for ipv6 simply because I do not need it, and I do not
have enough knowledge of ipv6 (e.g. neighbor discovery) needed to do it.
Other protocol families are left untouched and should there be
users with proprietary protocol families, they should continue to work
and be oblivious to the existence of the extra FIBs.
To understand how this is done, one must know that the current FIB
code starts everything off with a single dimensional array of
pointers to FIB head structures (One per protocol family), each of
which in turn points to the trie of routes available to that family.
The basic change in the ABI compatible version of the change is to
extent that array to be a 2 dimensional array, so that
instead of protocol family X looking at rt_tables[X] for the
table it needs, it looks at rt_tables[Y][X] when for all
protocol families except ipv4 Y is always 0.
Code that is unaware of the change always just sees the first row
of the table, which of course looks just like the one dimensional
array that existed before.
The entry points rtrequest(), rtalloc(), rtalloc1(), rtalloc_ign()
are all maintained, but refer only to the first row of the array,
so that existing callers in proprietary protocols can continue to
do the "right thing".
Some new entry points are added, for the exclusive use of ipv4 code
called in_rtrequest(), in_rtalloc(), in_rtalloc1() and in_rtalloc_ign(),
which have an extra argument which refers the code to the correct row.
In addition, there are some new entry points (currently called
rtalloc_fib() and friends) that check the Address family being
looked up and call either rtalloc() (and friends) if the protocol
is not IPv4 forcing the action to row 0 or to the appropriate row
if it IS IPv4 (and that info is available). These are for calling
from code that is not specific to any particular protocol. The way
these are implemented would change in the non ABI preserving code
to be added later.
One feature of the first version of the code is that for ipv4,
the interface routes show up automatically on all the FIBs, so
that no matter what FIB you select you always have the basic
direct attached hosts available to you. (rtinit() does this
automatically).
You CAN delete an interface route from one FIB should you want
to but by default it's there. ARP information is also available
in each FIB. It's assumed that the same machine would have the
same MAC address, regardless of which FIB you are using to get
to it.
This brings us as to how the correct FIB is selected for an outgoing
IPV4 packet.
Firstly, all packets have a FIB associated with them. if nothing
has been done to change it, it will be FIB 0. The FIB is changed
in the following ways.
Packets fall into one of a number of classes.
1/ locally generated packets, coming from a socket/PCB.
Such packets select a FIB from a number associated with the
socket/PCB. This in turn is inherited from the process,
but can be changed by a socket option. The process in turn
inherits it on fork. I have written a utility call setfib
that acts a bit like nice..
setfib -3 ping target.example.com # will use fib 3 for ping.
It is an obvious extension to make it a property of a jail
but I have not done so. It can be achieved by combining the setfib and
jail commands.
2/ packets received on an interface for forwarding.
By default these packets would use table 0,
(or possibly a number settable in a sysctl(not yet)).
but prior to routing the firewall can inspect them (see below).
(possibly in the future you may be able to associate a FIB
with packets received on an interface.. An ifconfig arg, but not yet.)
3/ packets inspected by a packet classifier, which can arbitrarily
associate a fib with it on a packet by packet basis.
A fib assigned to a packet by a packet classifier
(such as ipfw) would over-ride a fib associated by
a more default source. (such as cases 1 or 2).
4/ a tcp listen socket associated with a fib will generate
accept sockets that are associated with that same fib.
5/ Packets generated in response to some other packet (e.g. reset
or icmp packets). These should use the FIB associated with the
packet being reponded to.
6/ Packets generated during encapsulation.
gif, tun and other tunnel interfaces will encapsulate using the FIB
that was in effect withthe proces that set up the tunnel.
thus setfib 1 ifconfig gif0 [tunnel instructions]
will set the fib for the tunnel to use to be fib 1.
Routing messages would be associated with their
process, and thus select one FIB or another.
messages from the kernel would be associated with the fib they
refer to and would only be received by a routing socket associated
with that fib. (not yet implemented)
In addition Netstat has been edited to be able to cope with the
fact that the array is now 2 dimensional. (It looks in system
memory using libkvm (!)). Old versions of netstat see only the first FIB.
In addition two sysctls are added to give:
a) the number of FIBs compiled in (active)
b) the default FIB of the calling process.
Early testing experience:
-------------------------
Basically our (IronPort's) appliance does this functionality already
using ipfw fwd but that method has some drawbacks.
For example,
It can't fully simulate a routing table because it can't influence the
socket's choice of local address when a connect() is done.
Testing during the generating of these changes has been
remarkably smooth so far. Multiple tables have co-existed
with no notable side effects, and packets have been routes
accordingly.
ipfw has grown 2 new keywords:
setfib N ip from anay to any
count ip from any to any fib N
In pf there seems to be a requirement to be able to give symbolic names to the
fibs but I do not have that capacity. I am not sure if it is required.
SCTP has interestingly enough built in support for this, called VRFs
in Cisco parlance. it will be interesting to see how that handles it
when it suddenly actually does something.
Where to next:
--------------------
After committing the ABI compatible version and MFCing it, I'd
like to proceed in a forward direction in -current. this will
result in some roto-tilling in the routing code.
Firstly: the current code's idea of having a separate tree per
protocol family, all of the same format, and pointed to by the
1 dimensional array is a bit silly. Especially when one considers that
there is code that makes assumptions about every protocol having the
same internal structures there. Some protocols don't WANT that
sort of structure. (for example the whole idea of a netmask is foreign
to appletalk). This needs to be made opaque to the external code.
My suggested first change is to add routing method pointers to the
'domain' structure, along with information pointing the data.
instead of having an array of pointers to uniform structures,
there would be an array pointing to the 'domain' structures
for each protocol address domain (protocol family),
and the methods this reached would be called. The methods would have
an argument that gives FIB number, but the protocol would be free
to ignore it.
When the ABI can be changed it raises the possibilty of the
addition of a fib entry into the "struct route". Currently,
the structure contains the sockaddr of the desination, and the resulting
fib entry. To make this work fully, one could add a fib number
so that given an address and a fib, one can find the third element, the
fib entry.
Interaction with the ARP layer/ LL layer would need to be
revisited as well. Qing Li has been working on this already.
This work was sponsored by Ironport Systems/Cisco
Reviewed by: several including rwatson, bz and mlair (parts each)
Obtained from: Ironport systems/Cisco
2008-05-09 23:03:00 +00:00
|
|
|
f->pcnt++; /* update stats */
|
|
|
|
f->bcnt += pktlen;
|
|
|
|
f->timestamp = time_uptime;
|
2011-05-30 05:37:26 +00:00
|
|
|
fib = (cmd->arg1 == IP_FW_TABLEARG) ? tablearg:
|
|
|
|
cmd->arg1;
|
|
|
|
if (fib >= rt_numfibs)
|
|
|
|
fib = 0;
|
|
|
|
M_SETFIB(m, fib);
|
|
|
|
args->f_id.fib = fib;
|
2009-12-03 14:22:15 +00:00
|
|
|
l = 0; /* exit inner loop */
|
|
|
|
break;
|
2011-05-30 05:37:26 +00:00
|
|
|
}
|
2006-12-29 21:59:17 +00:00
|
|
|
|
2009-12-03 14:22:15 +00:00
|
|
|
case O_NAT:
|
|
|
|
if (!IPFW_NAT_LOADED) {
|
|
|
|
retval = IP_FW_DENY;
|
|
|
|
} else {
|
|
|
|
struct cfg_nat *t;
|
|
|
|
int nat_id;
|
|
|
|
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
set_match(args, f_pos, chain);
|
2009-12-03 14:22:15 +00:00
|
|
|
t = ((ipfw_insn_nat *)cmd)->nat;
|
|
|
|
if (t == NULL) {
|
|
|
|
nat_id = (cmd->arg1 == IP_FW_TABLEARG) ?
|
|
|
|
tablearg : cmd->arg1;
|
2009-12-22 13:53:34 +00:00
|
|
|
t = (*lookup_nat_ptr)(&chain->nat, nat_id);
|
2009-12-15 16:15:14 +00:00
|
|
|
|
2008-02-29 22:27:19 +00:00
|
|
|
if (t == NULL) {
|
2009-12-03 14:22:15 +00:00
|
|
|
retval = IP_FW_DENY;
|
|
|
|
l = 0; /* exit inner loop */
|
|
|
|
done = 1; /* exit outer loop */
|
|
|
|
break;
|
2006-12-29 21:59:17 +00:00
|
|
|
}
|
2009-12-03 14:22:15 +00:00
|
|
|
if (cmd->arg1 != IP_FW_TABLEARG)
|
|
|
|
((ipfw_insn_nat *)cmd)->nat = t;
|
|
|
|
}
|
|
|
|
retval = ipfw_nat_ptr(args, t, m);
|
|
|
|
}
|
|
|
|
l = 0; /* exit inner loop */
|
|
|
|
done = 1; /* exit outer loop */
|
|
|
|
break;
|
2006-12-29 21:59:17 +00:00
|
|
|
|
2009-04-01 20:23:47 +00:00
|
|
|
case O_REASS: {
|
|
|
|
int ip_off;
|
|
|
|
|
|
|
|
f->pcnt++;
|
|
|
|
f->bcnt += pktlen;
|
2009-12-03 14:22:15 +00:00
|
|
|
l = 0; /* in any case exit inner loop */
|
2009-12-28 10:47:04 +00:00
|
|
|
ip_off = ntohs(ip->ip_off);
|
|
|
|
|
2009-12-03 14:22:15 +00:00
|
|
|
/* if not fragmented, go to next rule */
|
|
|
|
if ((ip_off & (IP_MF | IP_OFFMASK)) == 0)
|
|
|
|
break;
|
|
|
|
/*
|
|
|
|
* ip_reass() expects len & off in host
|
2010-01-04 19:01:22 +00:00
|
|
|
* byte order.
|
2009-12-03 14:22:15 +00:00
|
|
|
*/
|
2010-01-04 19:01:22 +00:00
|
|
|
SET_HOST_IPLEN(ip);
|
2009-12-03 14:22:15 +00:00
|
|
|
|
|
|
|
args->m = m = ip_reass(m);
|
|
|
|
|
|
|
|
/*
|
2010-01-04 19:01:22 +00:00
|
|
|
* do IP header checksum fixup.
|
2009-12-03 14:22:15 +00:00
|
|
|
*/
|
|
|
|
if (m == NULL) { /* fragment got swallowed */
|
|
|
|
retval = IP_FW_DENY;
|
|
|
|
} else { /* good, packet complete */
|
|
|
|
int hlen;
|
|
|
|
|
|
|
|
ip = mtod(m, struct ip *);
|
|
|
|
hlen = ip->ip_hl << 2;
|
2010-01-04 19:01:22 +00:00
|
|
|
SET_NET_IPLEN(ip);
|
2009-12-03 14:22:15 +00:00
|
|
|
ip->ip_sum = 0;
|
|
|
|
if (hlen == sizeof(struct ip))
|
|
|
|
ip->ip_sum = in_cksum_hdr(ip);
|
|
|
|
else
|
|
|
|
ip->ip_sum = in_cksum(m, hlen);
|
|
|
|
retval = IP_FW_REASS;
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
set_match(args, f_pos, chain);
|
2009-04-01 20:23:47 +00:00
|
|
|
}
|
2009-12-03 14:22:15 +00:00
|
|
|
done = 1; /* exit outer loop */
|
|
|
|
break;
|
2009-04-01 20:23:47 +00:00
|
|
|
}
|
|
|
|
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
default:
|
|
|
|
panic("-- unknown opcode %d\n", cmd->opcode);
|
2002-07-08 22:46:01 +00:00
|
|
|
} /* end of switch() on opcodes */
|
2009-12-03 14:22:15 +00:00
|
|
|
/*
|
|
|
|
* if we get here with l=0, then match is irrelevant.
|
|
|
|
*/
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
2002-07-08 22:46:01 +00:00
|
|
|
if (cmd->len & F_NOT)
|
|
|
|
match = !match;
|
|
|
|
|
|
|
|
if (match) {
|
|
|
|
if (cmd->len & F_OR)
|
|
|
|
skip_or = 1;
|
|
|
|
} else {
|
|
|
|
if (!(cmd->len & F_OR)) /* not an OR block, */
|
|
|
|
break; /* try next rule */
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
}
|
|
|
|
|
2009-12-03 14:22:15 +00:00
|
|
|
} /* end of inner loop, scan opcodes */
|
2011-04-18 18:22:10 +00:00
|
|
|
#undef PULLUP_LEN
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
2009-12-03 14:22:15 +00:00
|
|
|
if (done)
|
|
|
|
break;
|
|
|
|
|
|
|
|
/* next_rule:; */ /* try next rule */
|
2002-10-23 10:07:55 +00:00
|
|
|
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
} /* end of outer for, scan rules */
|
|
|
|
|
2009-12-03 14:22:15 +00:00
|
|
|
if (done) {
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
struct ip_fw *rule = chain->map[f_pos];
|
2009-12-03 14:22:15 +00:00
|
|
|
/* Update statistics */
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
rule->pcnt++;
|
|
|
|
rule->bcnt += pktlen;
|
|
|
|
rule->timestamp = time_uptime;
|
2009-12-03 14:22:15 +00:00
|
|
|
} else {
|
|
|
|
retval = IP_FW_DENY;
|
|
|
|
printf("ipfw: ouch!, skip past end of rules, denying packet\n");
|
|
|
|
}
|
2004-12-10 02:17:18 +00:00
|
|
|
IPFW_RUNLOCK(chain);
|
Bring in the most recent version of ipfw and dummynet, developed
and tested over the past two months in the ipfw3-head branch. This
also happens to be the same code available in the Linux and Windows
ports of ipfw and dummynet.
The major enhancement is a completely restructured version of
dummynet, with support for different packet scheduling algorithms
(loadable at runtime), faster queue/pipe lookup, and a much cleaner
internal architecture and kernel/userland ABI which simplifies
future extensions.
In addition to the existing schedulers (FIFO and WF2Q+), we include
a Deficit Round Robin (DRR or RR for brevity) scheduler, and a new,
very fast version of WF2Q+ called QFQ.
Some test code is also present (in sys/netinet/ipfw/test) that
lets you build and test schedulers in userland.
Also, we have added a compatibility layer that understands requests
from the RELENG_7 and RELENG_8 versions of the /sbin/ipfw binaries,
and replies correctly (at least, it does its best; sometimes you
just cannot tell who sent the request and how to answer).
The compatibility layer should make it possible to MFC this code in a
relatively short time.
Some minor glitches (e.g. handling of ipfw set enable/disable,
and a workaround for a bug in RELENG_7's /sbin/ipfw) will be
fixed with separate commits.
CREDITS:
This work has been partly supported by the ONELAB2 project, and
mostly developed by Riccardo Panicucci and myself.
The code for the qfq scheduler is mostly from Fabio Checconi,
and Marta Carbone and Francesco Magno have helped with testing,
debugging and some bug fixes.
2010-03-02 17:40:48 +00:00
|
|
|
#ifdef __FreeBSD__
|
2009-06-19 17:10:35 +00:00
|
|
|
if (ucred_cache != NULL)
|
|
|
|
crfree(ucred_cache);
|
Bring in the most recent version of ipfw and dummynet, developed
and tested over the past two months in the ipfw3-head branch. This
also happens to be the same code available in the Linux and Windows
ports of ipfw and dummynet.
The major enhancement is a completely restructured version of
dummynet, with support for different packet scheduling algorithms
(loadable at runtime), faster queue/pipe lookup, and a much cleaner
internal architecture and kernel/userland ABI which simplifies
future extensions.
In addition to the existing schedulers (FIFO and WF2Q+), we include
a Deficit Round Robin (DRR or RR for brevity) scheduler, and a new,
very fast version of WF2Q+ called QFQ.
Some test code is also present (in sys/netinet/ipfw/test) that
lets you build and test schedulers in userland.
Also, we have added a compatibility layer that understands requests
from the RELENG_7 and RELENG_8 versions of the /sbin/ipfw binaries,
and replies correctly (at least, it does its best; sometimes you
just cannot tell who sent the request and how to answer).
The compatibility layer should make it possible to MFC this code in a
relatively short time.
Some minor glitches (e.g. handling of ipfw set enable/disable,
and a workaround for a bug in RELENG_7's /sbin/ipfw) will be
fixed with separate commits.
CREDITS:
This work has been partly supported by the ONELAB2 project, and
mostly developed by Riccardo Panicucci and myself.
The code for the qfq scheduler is mostly from Fabio Checconi,
and Marta Carbone and Francesco Magno have helped with testing,
debugging and some bug fixes.
2010-03-02 17:40:48 +00:00
|
|
|
#endif
|
2005-01-14 09:00:46 +00:00
|
|
|
return (retval);
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
|
|
|
pullup_failed:
|
Commit step 1 of the vimage project, (network stack)
virtualization work done by Marko Zec (zec@).
This is the first in a series of commits over the course
of the next few weeks.
Mark all uses of global variables to be virtualized
with a V_ prefix.
Use macros to map them back to their global names for
now, so this is a NOP change only.
We hope to have caught at least 85-90% of what is needed
so we do not invalidate a lot of outstanding patches again.
Obtained from: //depot/projects/vimage-commit2/...
Reviewed by: brooks, des, ed, mav, julian,
jamie, kris, rwatson, zec, ...
(various people I forgot, different versions)
md5 (with a bit of help)
Sponsored by: NLnet Foundation, The FreeBSD Foundation
X-MFC after: never
V_Commit_Message_Reviewed_By: more people than the patch
2008-08-17 23:27:27 +00:00
|
|
|
if (V_fw_verbose)
|
2002-12-24 13:45:24 +00:00
|
|
|
printf("ipfw: pullup failed\n");
|
2005-01-14 09:00:46 +00:00
|
|
|
return (IP_FW_DENY);
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
}
|
|
|
|
|
2009-12-16 10:48:40 +00:00
|
|
|
/*
|
|
|
|
* Module and VNET glue
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
2009-07-25 06:42:42 +00:00
|
|
|
* Stuff that must be initialised only on boot or module load
|
|
|
|
*/
|
2009-08-21 11:20:10 +00:00
|
|
|
static int
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
ipfw_init(void)
|
|
|
|
{
|
2009-07-25 06:42:42 +00:00
|
|
|
int error = 0;
|
|
|
|
|
2009-12-15 16:15:14 +00:00
|
|
|
ipfw_dyn_attach();
|
2009-07-25 06:42:42 +00:00
|
|
|
/*
|
|
|
|
* Only print out this stuff the first time around,
|
|
|
|
* when called from the sysinit code.
|
|
|
|
*/
|
|
|
|
printf("ipfw2 "
|
|
|
|
#ifdef INET6
|
|
|
|
"(+ipv6) "
|
|
|
|
#endif
|
|
|
|
"initialized, divert %s, nat %s, "
|
|
|
|
"rule-based forwarding "
|
|
|
|
#ifdef IPFIREWALL_FORWARD
|
|
|
|
"enabled, "
|
|
|
|
#else
|
|
|
|
"disabled, "
|
|
|
|
#endif
|
|
|
|
"default to %s, logging ",
|
|
|
|
#ifdef IPDIVERT
|
|
|
|
"enabled",
|
|
|
|
#else
|
|
|
|
"loadable",
|
|
|
|
#endif
|
|
|
|
#ifdef IPFIREWALL_NAT
|
|
|
|
"enabled",
|
|
|
|
#else
|
|
|
|
"loadable",
|
|
|
|
#endif
|
|
|
|
default_to_accept ? "accept" : "deny");
|
|
|
|
|
|
|
|
/*
|
2009-08-21 11:20:10 +00:00
|
|
|
* Note: V_xxx variables can be accessed here but the vnet specific
|
|
|
|
* initializer may not have been called yet for the VIMAGE case.
|
|
|
|
* Tuneables will have been processed. We will print out values for
|
|
|
|
* the default vnet.
|
|
|
|
* XXX This should all be rationalized AFTER 8.0
|
2009-07-25 06:42:42 +00:00
|
|
|
*/
|
|
|
|
if (V_fw_verbose == 0)
|
|
|
|
printf("disabled\n");
|
|
|
|
else if (V_verbose_limit == 0)
|
|
|
|
printf("unlimited\n");
|
|
|
|
else
|
|
|
|
printf("limited to %d packets/entry by default\n",
|
|
|
|
V_verbose_limit);
|
|
|
|
|
2009-12-17 23:11:16 +00:00
|
|
|
ipfw_log_bpf(1); /* init */
|
2009-07-25 06:42:42 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2009-12-16 10:48:40 +00:00
|
|
|
/*
|
2009-10-11 05:59:43 +00:00
|
|
|
* Called for the removal of the last instance only on module unload.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
ipfw_destroy(void)
|
|
|
|
{
|
|
|
|
|
2009-12-17 23:11:16 +00:00
|
|
|
ipfw_log_bpf(0); /* uninit */
|
2009-12-15 16:15:14 +00:00
|
|
|
ipfw_dyn_detach();
|
2009-10-11 05:59:43 +00:00
|
|
|
printf("IP firewall unloaded\n");
|
|
|
|
}
|
|
|
|
|
2009-12-16 10:48:40 +00:00
|
|
|
/*
|
2009-08-21 11:20:10 +00:00
|
|
|
* Stuff that must be initialized for every instance
|
|
|
|
* (including the first of course).
|
2009-07-25 06:42:42 +00:00
|
|
|
*/
|
|
|
|
static int
|
|
|
|
vnet_ipfw_init(const void *unused)
|
|
|
|
{
|
2003-09-17 00:56:50 +00:00
|
|
|
int error;
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
struct ip_fw *rule = NULL;
|
2009-12-22 13:53:34 +00:00
|
|
|
struct ip_fw_chain *chain;
|
|
|
|
|
|
|
|
chain = &V_layer3_chain;
|
2009-07-25 06:42:42 +00:00
|
|
|
|
|
|
|
/* First set up some values that are compile time options */
|
2009-12-22 13:53:34 +00:00
|
|
|
V_autoinc_step = 100; /* bounded to 1..1000 in add_rule() */
|
|
|
|
V_fw_deny_unknown_exthdrs = 1;
|
2009-07-25 06:42:42 +00:00
|
|
|
#ifdef IPFIREWALL_VERBOSE
|
|
|
|
V_fw_verbose = 1;
|
|
|
|
#endif
|
|
|
|
#ifdef IPFIREWALL_VERBOSE_LIMIT
|
|
|
|
V_verbose_limit = IPFIREWALL_VERBOSE_LIMIT;
|
|
|
|
#endif
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
#ifdef IPFIREWALL_NAT
|
|
|
|
LIST_INIT(&chain->nat);
|
|
|
|
#endif
|
2009-07-25 06:42:42 +00:00
|
|
|
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
/* insert the default rule and create the initial map */
|
|
|
|
chain->n_rules = 1;
|
|
|
|
chain->static_len = sizeof(struct ip_fw);
|
|
|
|
chain->map = malloc(sizeof(struct ip_fw *), M_IPFW, M_NOWAIT | M_ZERO);
|
|
|
|
if (chain->map)
|
|
|
|
rule = malloc(chain->static_len, M_IPFW, M_NOWAIT | M_ZERO);
|
|
|
|
if (rule == NULL) {
|
|
|
|
if (chain->map)
|
|
|
|
free(chain->map, M_IPFW);
|
|
|
|
printf("ipfw2: ENOSPC initializing default rule "
|
|
|
|
"(support disabled)\n");
|
|
|
|
return (ENOSPC);
|
|
|
|
}
|
2009-12-22 13:53:34 +00:00
|
|
|
error = ipfw_init_tables(chain);
|
2009-07-25 06:42:42 +00:00
|
|
|
if (error) {
|
|
|
|
panic("init_tables"); /* XXX Marko fix this ! */
|
|
|
|
}
|
The new ipfw code.
This code makes use of variable-size kernel representation of rules
(exactly the same concept of BPF instructions, as used in the BSDI's
firewall), which makes firewall operation a lot faster, and the
code more readable and easier to extend and debug.
The interface with the rest of the system is unchanged, as witnessed
by this commit. The only extra kernel files that I am touching
are if_fw.h and ip_dummynet.c, which is quite tied to ipfw. In
userland I only had to touch those programs which manipulate the
internal representation of firewall rules).
The code is almost entirely new (and I believe I have written the
vast majority of those sections which were taken from the former
ip_fw.c), so rather than modifying the old ip_fw.c I decided to
create a new file, sys/netinet/ip_fw2.c . Same for the user
interface, which is in sbin/ipfw/ipfw2.c (it still compiles to
/sbin/ipfw). The old files are still there, and will be removed
in due time.
I have not renamed the header file because it would have required
touching a one-line change to a number of kernel files.
In terms of user interface, the new "ipfw" is supposed to accepts
the old syntax for ipfw rules (and produce the same output with
"ipfw show". Only a couple of the old options (out of some 30 of
them) has not been implemented, but they will be soon.
On the other hand, the new code has some very powerful extensions.
First, you can put "or" connectives between match fields (and soon
also between options), and write things like
ipfw add allow ip from { 1.2.3.4/27 or 5.6.7.8/30 } 10-23,25,1024-3000 to any
This should make rulesets slightly more compact (and lines longer!),
by condensing 2 or more of the old rules into single ones.
Also, as an example of how easy the rules can be extended, I have
implemented an 'address set' match pattern, where you can specify
an IP address in a format like this:
10.20.30.0/26{18,44,33,22,9}
which will match the set of hosts listed in braces belonging to the
subnet 10.20.30.0/26 . The match is done using a bitmap, so it is
essentially a constant time operation requiring a handful of CPU
instructions (and a very small amount of memmory -- for a full /24
subnet, the instruction only consumes 40 bytes).
Again, in this commit I have focused on functionality and tried
to minimize changes to the other parts of the system. Some performance
improvement can be achieved with minor changes to the interface of
ip_fw_chk_t. This will be done later when this code is settled.
The code is meant to compile unmodified on RELENG_4 (once the
PACKET_TAG_* changes have been merged), for this reason
you will see #ifdef __FreeBSD_version in a couple of places.
This should minimize errors when (hopefully soon) it will be time
to do the MFC.
2002-06-27 23:02:18 +00:00
|
|
|
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
/* fill and insert the default rule */
|
|
|
|
rule->act_ofs = 0;
|
|
|
|
rule->rulenum = IPFW_DEFAULT_RULE;
|
|
|
|
rule->cmd_len = 1;
|
|
|
|
rule->set = RESVD_SET;
|
|
|
|
rule->cmd[0].len = 1;
|
|
|
|
rule->cmd[0].opcode = default_to_accept ? O_ACCEPT : O_DENY;
|
|
|
|
chain->rules = chain->default_rule = chain->map[0] = rule;
|
|
|
|
chain->id = rule->id = 1;
|
Conditionally compile out V_ globals while instantiating the appropriate
container structures, depending on VIMAGE_GLOBALS compile time option.
Make VIMAGE_GLOBALS a new compile-time option, which by default will not
be defined, resulting in instatiations of global variables selected for
V_irtualization (enclosed in #ifdef VIMAGE_GLOBALS blocks) to be
effectively compiled out. Instantiate new global container structures
to hold V_irtualized variables: vnet_net_0, vnet_inet_0, vnet_inet6_0,
vnet_ipsec_0, vnet_netgraph_0, and vnet_gif_0.
Update the VSYM() macro so that depending on VIMAGE_GLOBALS the V_
macros resolve either to the original globals, or to fields inside
container structures, i.e. effectively
#ifdef VIMAGE_GLOBALS
#define V_rt_tables rt_tables
#else
#define V_rt_tables vnet_net_0._rt_tables
#endif
Update SYSCTL_V_*() macros to operate either on globals or on fields
inside container structs.
Extend the internal kldsym() lookups with the ability to resolve
selected fields inside the virtualization container structs. This
applies only to the fields which are explicitly registered for kldsym()
visibility via VNET_MOD_DECLARE() and vnet_mod_register(), currently
this is done only in sys/net/if.c.
Fix a few broken instances of MODULE_GLOBAL() macro use in SCTP code,
and modify the MODULE_GLOBAL() macro to resolve to V_ macros, which in
turn result in proper code being generated depending on VIMAGE_GLOBALS.
De-virtualize local static variables in sys/contrib/pf/net/pf_subr.c
which were prematurely V_irtualized by automated V_ prepending scripts
during earlier merging steps. PF virtualization will be done
separately, most probably after next PF import.
Convert a few variable initializations at instantiation to
initialization in init functions, most notably in ipfw. Also convert
TUNABLE_INT() initializers for V_ variables to TUNABLE_FETCH_INT() in
initializer functions.
Discussed at: devsummit Strassburg
Reviewed by: bz, julian
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-12-10 23:12:39 +00:00
|
|
|
|
2009-12-22 13:53:34 +00:00
|
|
|
IPFW_LOCK_INIT(chain);
|
2009-12-15 16:15:14 +00:00
|
|
|
ipfw_dyn_init();
|
2009-07-25 06:42:42 +00:00
|
|
|
|
|
|
|
/* First set up some values that are compile time options */
|
2009-08-21 11:20:10 +00:00
|
|
|
V_ipfw_vnet_ready = 1; /* Open for business */
|
2003-09-17 00:56:50 +00:00
|
|
|
|
2009-10-11 05:59:43 +00:00
|
|
|
/*
|
2009-12-05 09:13:06 +00:00
|
|
|
* Hook the sockopt handler, and the layer2 (V_ip_fw_chk_ptr)
|
|
|
|
* and pfil hooks for ipv4 and ipv6. Even if the latter two fail
|
2009-12-05 11:51:32 +00:00
|
|
|
* we still keep the module alive because the sockopt and
|
|
|
|
* layer2 paths are still useful.
|
|
|
|
* ipfw[6]_hook return 0 on success, ENOENT on failure,
|
|
|
|
* so we can ignore the exact return value and just set a flag.
|
2009-12-05 09:13:06 +00:00
|
|
|
*
|
2009-12-05 11:51:32 +00:00
|
|
|
* Note that V_fw[6]_enable are manipulated by a SYSCTL_PROC so
|
|
|
|
* changes in the underlying (per-vnet) variables trigger
|
|
|
|
* immediate hook()/unhook() calls.
|
|
|
|
* In layer2 we have the same behaviour, except that V_ether_ipfw
|
|
|
|
* is checked on each packet because there are no pfil hooks.
|
2009-10-11 05:59:43 +00:00
|
|
|
*/
|
2009-12-05 09:13:06 +00:00
|
|
|
V_ip_fw_ctl_ptr = ipfw_ctl;
|
|
|
|
V_ip_fw_chk_ptr = ipfw_chk;
|
2009-12-28 10:47:04 +00:00
|
|
|
error = ipfw_attach_hooks(1);
|
2009-12-05 09:13:06 +00:00
|
|
|
return (error);
|
2009-07-25 06:42:42 +00:00
|
|
|
}
|
|
|
|
|
2009-12-16 10:48:40 +00:00
|
|
|
/*
|
2009-07-25 06:42:42 +00:00
|
|
|
* Called for the removal of each instance.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
vnet_ipfw_uninit(const void *unused)
|
|
|
|
{
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
struct ip_fw *reap, *rule;
|
2009-12-22 13:53:34 +00:00
|
|
|
struct ip_fw_chain *chain = &V_layer3_chain;
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
int i;
|
2009-07-25 06:42:42 +00:00
|
|
|
|
2009-08-21 11:20:10 +00:00
|
|
|
V_ipfw_vnet_ready = 0; /* tell new callers to go away */
|
2009-12-05 09:13:06 +00:00
|
|
|
/*
|
|
|
|
* disconnect from ipv4, ipv6, layer2 and sockopt.
|
|
|
|
* Then grab, release and grab again the WLOCK so we make
|
|
|
|
* sure the update is propagated and nobody will be in.
|
|
|
|
*/
|
2009-12-28 10:47:04 +00:00
|
|
|
(void)ipfw_attach_hooks(0 /* detach */);
|
2009-10-11 05:59:43 +00:00
|
|
|
V_ip_fw_chk_ptr = NULL;
|
|
|
|
V_ip_fw_ctl_ptr = NULL;
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
IPFW_UH_WLOCK(chain);
|
|
|
|
IPFW_UH_WUNLOCK(chain);
|
|
|
|
IPFW_UH_WLOCK(chain);
|
2009-12-05 09:13:06 +00:00
|
|
|
|
2009-12-22 13:53:34 +00:00
|
|
|
IPFW_WLOCK(chain);
|
|
|
|
IPFW_WUNLOCK(chain);
|
|
|
|
IPFW_WLOCK(chain);
|
2009-12-05 09:13:06 +00:00
|
|
|
|
2009-12-15 16:15:14 +00:00
|
|
|
ipfw_dyn_uninit(0); /* run the callout_drain */
|
2010-03-07 15:37:58 +00:00
|
|
|
ipfw_destroy_tables(chain);
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
reap = NULL;
|
|
|
|
for (i = 0; i < chain->n_rules; i++) {
|
|
|
|
rule = chain->map[i];
|
|
|
|
rule->x_next = reap;
|
|
|
|
reap = rule;
|
|
|
|
}
|
|
|
|
if (chain->map)
|
|
|
|
free(chain->map, M_IPFW);
|
2009-12-22 13:53:34 +00:00
|
|
|
IPFW_WUNLOCK(chain);
|
merge code from ipfw3-head to reduce contention on the ipfw lock
and remove all O(N) sequences from kernel critical sections in ipfw.
In detail:
1. introduce a IPFW_UH_LOCK to arbitrate requests from
the upper half of the kernel. Some things, such as 'ipfw show',
can be done holding this lock in read mode, whereas insert and
delete require IPFW_UH_WLOCK.
2. introduce a mapping structure to keep rules together. This replaces
the 'next' chain currently used in ipfw rules. At the moment
the map is a simple array (sorted by rule number and then rule_id),
so we can find a rule quickly instead of having to scan the list.
This reduces many expensive lookups from O(N) to O(log N).
3. when an expensive operation (such as insert or delete) is done
by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
without blocking the bottom half of the kernel, then acquire
IPFW_WLOCK and quickly update pointers to the map and related info.
After dropping IPFW_LOCK we can then continue the cleanup protected
by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
is only blocked for O(1).
4. do not pass pointers to rules through dummynet, netgraph, divert etc,
but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
We validate the slot index (in the array of #2) with chain_id,
and if successful do a O(1) dereference; otherwise, we can find
the rule in O(log N) through <rulenum, rule_id>
All the above does not change the userland/kernel ABI, though there
are some disgusting casts between pointers and uint32_t
Operation costs now are as follows:
Function Old Now Planned
-------------------------------------------------------------------
+ skipto X, non cached O(N) O(log N)
+ skipto X, cached O(1) O(1)
XXX dynamic rule lookup O(1) O(log N) O(1)
+ skipto tablearg O(N) O(1)
+ reinject, non cached O(N) O(log N)
+ reinject, cached O(1) O(1)
+ kernel blocked during setsockopt() O(N) O(1)
-------------------------------------------------------------------
The only (very small) regression is on dynamic rule lookup and this will
be fixed in a day or two, without changing the userland/kernel ABI
Supported by: Valeria Paoli
MFC after: 1 month
2009-12-22 19:01:47 +00:00
|
|
|
IPFW_UH_WUNLOCK(chain);
|
2009-07-25 06:42:42 +00:00
|
|
|
if (reap != NULL)
|
2009-12-15 21:24:12 +00:00
|
|
|
ipfw_reap_rules(reap);
|
2009-12-22 13:53:34 +00:00
|
|
|
IPFW_LOCK_DESTROY(chain);
|
2009-12-15 16:15:14 +00:00
|
|
|
ipfw_dyn_uninit(1); /* free the remaining parts */
|
2009-07-25 06:42:42 +00:00
|
|
|
return 0;
|
|
|
|
}
|
2005-08-13 11:02:34 +00:00
|
|
|
|
2009-08-21 11:20:10 +00:00
|
|
|
/*
|
|
|
|
* Module event handler.
|
|
|
|
* In general we have the choice of handling most of these events by the
|
|
|
|
* event handler or by the (VNET_)SYS(UN)INIT handlers. I have chosen to
|
|
|
|
* use the SYSINIT handlers as they are more capable of expressing the
|
|
|
|
* flow of control during module and vnet operations, so this is just
|
|
|
|
* a skeleton. Note there is no SYSINIT equivalent of the module
|
|
|
|
* SHUTDOWN handler, but we don't have anything to do in that case anyhow.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
ipfw_modevent(module_t mod, int type, void *unused)
|
|
|
|
{
|
|
|
|
int err = 0;
|
2005-08-13 11:02:34 +00:00
|
|
|
|
2009-08-21 11:20:10 +00:00
|
|
|
switch (type) {
|
|
|
|
case MOD_LOAD:
|
|
|
|
/* Called once at module load or
|
|
|
|
* system boot if compiled in. */
|
|
|
|
break;
|
|
|
|
case MOD_QUIESCE:
|
2009-10-11 05:59:43 +00:00
|
|
|
/* Called before unload. May veto unloading. */
|
|
|
|
break;
|
|
|
|
case MOD_UNLOAD:
|
2009-08-21 11:20:10 +00:00
|
|
|
/* Called during unload. */
|
|
|
|
break;
|
|
|
|
case MOD_SHUTDOWN:
|
|
|
|
/* Called during system shutdown. */
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
err = EOPNOTSUPP;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static moduledata_t ipfwmod = {
|
|
|
|
"ipfw",
|
|
|
|
ipfw_modevent,
|
|
|
|
0
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Define startup order. */
|
|
|
|
#define IPFW_SI_SUB_FIREWALL SI_SUB_PROTO_IFATTACHDOMAIN
|
|
|
|
#define IPFW_MODEVENT_ORDER (SI_ORDER_ANY - 255) /* On boot slot in here. */
|
|
|
|
#define IPFW_MODULE_ORDER (IPFW_MODEVENT_ORDER + 1) /* A little later. */
|
|
|
|
#define IPFW_VNET_ORDER (IPFW_MODEVENT_ORDER + 2) /* Later still. */
|
|
|
|
|
|
|
|
DECLARE_MODULE(ipfw, ipfwmod, IPFW_SI_SUB_FIREWALL, IPFW_MODEVENT_ORDER);
|
|
|
|
MODULE_VERSION(ipfw, 2);
|
|
|
|
/* should declare some dependencies here */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Starting up. Done in order after ipfwmod() has been called.
|
|
|
|
* VNET_SYSINIT is also called for each existing vnet and each new vnet.
|
|
|
|
*/
|
|
|
|
SYSINIT(ipfw_init, IPFW_SI_SUB_FIREWALL, IPFW_MODULE_ORDER,
|
|
|
|
ipfw_init, NULL);
|
|
|
|
VNET_SYSINIT(vnet_ipfw_init, IPFW_SI_SUB_FIREWALL, IPFW_VNET_ORDER,
|
|
|
|
vnet_ipfw_init, NULL);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Closing up shop. These are done in REVERSE ORDER, but still
|
|
|
|
* after ipfwmod() has been called. Not called on reboot.
|
|
|
|
* VNET_SYSUNINIT is also called for each exiting vnet as it exits.
|
|
|
|
* or when the module is unloaded.
|
|
|
|
*/
|
|
|
|
SYSUNINIT(ipfw_destroy, IPFW_SI_SUB_FIREWALL, IPFW_MODULE_ORDER,
|
|
|
|
ipfw_destroy, NULL);
|
|
|
|
VNET_SYSUNINIT(vnet_ipfw_uninit, IPFW_SI_SUB_FIREWALL, IPFW_VNET_ORDER,
|
|
|
|
vnet_ipfw_uninit, NULL);
|
2009-12-16 10:48:40 +00:00
|
|
|
/* end of file */
|