2011-06-28 11:57:25 +00:00
|
|
|
/* $OpenBSD: pf.c,v 1.634 2009/02/27 12:37:45 henning Exp $ */
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Copyright (c) 2001 Daniel Hartmeier
|
2011-06-28 11:57:25 +00:00
|
|
|
* Copyright (c) 2002 - 2008 Henning Brauer
|
2004-02-26 02:04:28 +00:00
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
*
|
|
|
|
* - Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* - Redistributions in binary form must reproduce the above
|
|
|
|
* copyright notice, this list of conditions and the following
|
|
|
|
* disclaimer in the documentation and/or other materials provided
|
|
|
|
* with the distribution.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
|
|
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
|
|
* COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
|
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
|
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
|
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
|
|
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
|
|
|
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*
|
|
|
|
* Effort sponsored in part by the Defense Advanced Research Projects
|
|
|
|
* Agency (DARPA) and Air Force Research Laboratory, Air Force
|
|
|
|
* Materiel Command, USAF, under agreement number F30602-01-2-0537.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2004-02-26 02:34:12 +00:00
|
|
|
#include "opt_inet.h"
|
|
|
|
#include "opt_inet6.h"
|
2007-07-03 12:16:07 +00:00
|
|
|
|
|
|
|
#include <sys/cdefs.h>
|
|
|
|
__FBSDID("$FreeBSD$");
|
2004-02-26 02:34:12 +00:00
|
|
|
#endif
|
|
|
|
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2004-02-26 02:34:12 +00:00
|
|
|
#include "opt_bpf.h"
|
|
|
|
#include "opt_pf.h"
|
2005-12-05 11:58:35 +00:00
|
|
|
|
|
|
|
#ifdef DEV_BPF
|
2004-03-17 21:11:02 +00:00
|
|
|
#define NBPFILTER DEV_BPF
|
2005-12-05 11:58:35 +00:00
|
|
|
#else
|
|
|
|
#define NBPFILTER 0
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef DEV_PFLOG
|
2004-03-17 21:11:02 +00:00
|
|
|
#define NPFLOG DEV_PFLOG
|
2005-12-05 11:58:35 +00:00
|
|
|
#else
|
|
|
|
#define NPFLOG 0
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef DEV_PFSYNC
|
2004-03-17 21:11:02 +00:00
|
|
|
#define NPFSYNC DEV_PFSYNC
|
2005-12-05 11:58:35 +00:00
|
|
|
#else
|
|
|
|
#define NPFSYNC 0
|
|
|
|
#endif
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef DEV_PFLOW
|
|
|
|
#define NPFLOW DEV_PFLOW
|
|
|
|
#else
|
|
|
|
#define NPFLOW 0
|
|
|
|
#endif
|
|
|
|
|
2004-02-26 02:34:12 +00:00
|
|
|
#else
|
2004-02-26 02:04:28 +00:00
|
|
|
#include "bpfilter.h"
|
|
|
|
#include "pflog.h"
|
|
|
|
#include "pfsync.h"
|
2011-06-28 11:57:25 +00:00
|
|
|
#include "pflow.h"
|
2004-02-26 02:34:12 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
#include <sys/param.h>
|
|
|
|
#include <sys/systm.h>
|
|
|
|
#include <sys/mbuf.h>
|
|
|
|
#include <sys/filio.h>
|
|
|
|
#include <sys/socket.h>
|
|
|
|
#include <sys/socketvar.h>
|
|
|
|
#include <sys/kernel.h>
|
|
|
|
#include <sys/time.h>
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2011-06-28 11:57:25 +00:00
|
|
|
#include <sys/random.h>
|
2004-02-26 02:34:12 +00:00
|
|
|
#include <sys/sysctl.h>
|
2004-06-16 23:24:02 +00:00
|
|
|
#include <sys/endian.h>
|
2011-06-28 11:57:25 +00:00
|
|
|
#define betoh64 be64toh
|
2004-02-26 02:34:12 +00:00
|
|
|
#else
|
2004-02-26 02:04:28 +00:00
|
|
|
#include <sys/pool.h>
|
2004-02-26 02:34:12 +00:00
|
|
|
#endif
|
2007-07-03 12:16:07 +00:00
|
|
|
#include <sys/proc.h>
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
#include <sys/kthread.h>
|
|
|
|
#include <sys/lock.h>
|
|
|
|
#include <sys/sx.h>
|
|
|
|
#else
|
|
|
|
#include <sys/rwlock.h>
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
#include <sys/md5.h>
|
|
|
|
#else
|
|
|
|
#include <crypto/md5.h>
|
|
|
|
#endif
|
|
|
|
|
2004-02-26 02:04:28 +00:00
|
|
|
#include <net/if.h>
|
|
|
|
#include <net/if_types.h>
|
|
|
|
#include <net/bpf.h>
|
|
|
|
#include <net/route.h>
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
#ifdef RADIX_MPATH
|
|
|
|
#include <net/radix_mpath.h>
|
|
|
|
#endif
|
|
|
|
#else
|
2007-07-03 12:16:07 +00:00
|
|
|
#include <net/radix_mpath.h>
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
#include <netinet/in.h>
|
|
|
|
#include <netinet/in_var.h>
|
|
|
|
#include <netinet/in_systm.h>
|
|
|
|
#include <netinet/ip.h>
|
|
|
|
#include <netinet/ip_var.h>
|
|
|
|
#include <netinet/tcp.h>
|
|
|
|
#include <netinet/tcp_seq.h>
|
|
|
|
#include <netinet/udp.h>
|
|
|
|
#include <netinet/ip_icmp.h>
|
|
|
|
#include <netinet/in_pcb.h>
|
|
|
|
#include <netinet/tcp_timer.h>
|
|
|
|
#include <netinet/tcp_var.h>
|
|
|
|
#include <netinet/udp_var.h>
|
|
|
|
#include <netinet/icmp_var.h>
|
2005-05-03 16:43:32 +00:00
|
|
|
#include <netinet/if_ether.h>
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
#include <netinet/ip_fw.h>
|
|
|
|
#include <netinet/ipfw/ip_fw_private.h> /* XXX: only for DIR_IN/DIR_OUT */
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifndef __FreeBSD__
|
2004-02-26 02:04:28 +00:00
|
|
|
#include <dev/rndvar.h>
|
2004-02-26 02:34:12 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
#include <net/pfvar.h>
|
|
|
|
#include <net/if_pflog.h>
|
2011-06-28 11:57:25 +00:00
|
|
|
#include <net/if_pflow.h>
|
2004-02-26 02:04:28 +00:00
|
|
|
#include <net/if_pfsync.h>
|
|
|
|
|
|
|
|
#ifdef INET6
|
|
|
|
#include <netinet/ip6.h>
|
|
|
|
#include <netinet/in_pcb.h>
|
|
|
|
#include <netinet/icmp6.h>
|
|
|
|
#include <netinet6/nd6.h>
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2004-02-26 02:34:12 +00:00
|
|
|
#include <netinet6/ip6_var.h>
|
|
|
|
#include <netinet6/in6_pcb.h>
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
#endif /* INET6 */
|
|
|
|
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2004-02-26 02:34:12 +00:00
|
|
|
#include <machine/in_cksum.h>
|
|
|
|
#include <sys/limits.h>
|
|
|
|
#include <sys/ucred.h>
|
2006-10-22 11:52:19 +00:00
|
|
|
#include <security/mac/mac_framework.h>
|
2004-02-26 02:34:12 +00:00
|
|
|
|
|
|
|
extern int ip_optcopy(struct ip *, struct ip *);
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
#define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x
|
|
|
|
#else
|
|
|
|
#define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Global variables
|
|
|
|
*/
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
/* state tables */
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
VNET_DEFINE(struct pf_state_tree, pf_statetbl);
|
|
|
|
|
|
|
|
VNET_DEFINE(struct pf_altqqueue, pf_altqs[2]);
|
|
|
|
VNET_DEFINE(struct pf_palist, pf_pabuf);
|
|
|
|
VNET_DEFINE(struct pf_altqqueue *, pf_altqs_active);
|
|
|
|
VNET_DEFINE(struct pf_altqqueue *, pf_altqs_inactive);
|
|
|
|
VNET_DEFINE(struct pf_status, pf_status);
|
|
|
|
|
|
|
|
VNET_DEFINE(u_int32_t, ticket_altqs_active);
|
|
|
|
VNET_DEFINE(u_int32_t, ticket_altqs_inactive);
|
|
|
|
VNET_DEFINE(int, altqs_inactive_open);
|
|
|
|
VNET_DEFINE(u_int32_t, ticket_pabuf);
|
|
|
|
|
|
|
|
VNET_DEFINE(MD5_CTX, pf_tcp_secret_ctx);
|
|
|
|
#define V_pf_tcp_secret_ctx VNET(pf_tcp_secret_ctx)
|
|
|
|
VNET_DEFINE(u_char, pf_tcp_secret[16]);
|
|
|
|
#define V_pf_tcp_secret VNET(pf_tcp_secret)
|
|
|
|
VNET_DEFINE(int, pf_tcp_secret_init);
|
|
|
|
#define V_pf_tcp_secret_init VNET(pf_tcp_secret_init)
|
|
|
|
VNET_DEFINE(int, pf_tcp_iss_off);
|
|
|
|
#define V_pf_tcp_iss_off VNET(pf_tcp_iss_off)
|
|
|
|
|
|
|
|
struct pf_anchor_stackframe {
|
|
|
|
struct pf_ruleset *rs;
|
|
|
|
struct pf_rule *r;
|
|
|
|
struct pf_anchor_node *parent;
|
|
|
|
struct pf_anchor *child;
|
|
|
|
};
|
|
|
|
VNET_DEFINE(struct pf_anchor_stackframe, pf_anchor_stack[64]);
|
|
|
|
#define V_pf_anchor_stack VNET(pf_anchor_stack)
|
|
|
|
|
|
|
|
VNET_DEFINE(uma_zone_t, pf_src_tree_pl);
|
|
|
|
VNET_DEFINE(uma_zone_t, pf_rule_pl);
|
|
|
|
VNET_DEFINE(uma_zone_t, pf_pooladdr_pl);
|
|
|
|
VNET_DEFINE(uma_zone_t, pf_state_pl);
|
|
|
|
VNET_DEFINE(uma_zone_t, pf_state_key_pl);
|
|
|
|
VNET_DEFINE(uma_zone_t, pf_state_item_pl);
|
|
|
|
VNET_DEFINE(uma_zone_t, pf_altq_pl);
|
|
|
|
#else
|
|
|
|
struct pf_state_tree pf_statetbl;
|
|
|
|
|
2004-02-26 02:04:28 +00:00
|
|
|
struct pf_altqqueue pf_altqs[2];
|
|
|
|
struct pf_palist pf_pabuf;
|
|
|
|
struct pf_altqqueue *pf_altqs_active;
|
|
|
|
struct pf_altqqueue *pf_altqs_inactive;
|
|
|
|
struct pf_status pf_status;
|
|
|
|
|
|
|
|
u_int32_t ticket_altqs_active;
|
|
|
|
u_int32_t ticket_altqs_inactive;
|
2004-06-16 23:24:02 +00:00
|
|
|
int altqs_inactive_open;
|
2004-02-26 02:04:28 +00:00
|
|
|
u_int32_t ticket_pabuf;
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
MD5_CTX pf_tcp_secret_ctx;
|
|
|
|
u_char pf_tcp_secret[16];
|
|
|
|
int pf_tcp_secret_init;
|
|
|
|
int pf_tcp_iss_off;
|
|
|
|
|
2005-05-03 16:43:32 +00:00
|
|
|
struct pf_anchor_stackframe {
|
|
|
|
struct pf_ruleset *rs;
|
|
|
|
struct pf_rule *r;
|
|
|
|
struct pf_anchor_node *parent;
|
|
|
|
struct pf_anchor *child;
|
|
|
|
} pf_anchor_stack[64];
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
struct pool pf_src_tree_pl, pf_rule_pl, pf_pooladdr_pl;
|
|
|
|
struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl;
|
|
|
|
struct pool pf_altq_pl;
|
2004-02-26 02:34:12 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2005-05-03 16:43:32 +00:00
|
|
|
void pf_init_threshold(struct pf_threshold *, u_int32_t,
|
|
|
|
u_int32_t);
|
|
|
|
void pf_add_threshold(struct pf_threshold *);
|
|
|
|
int pf_check_threshold(struct pf_threshold *);
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
void pf_change_ap(struct pf_addr *, u_int16_t *,
|
|
|
|
u_int16_t *, u_int16_t *, struct pf_addr *,
|
|
|
|
u_int16_t, u_int8_t, sa_family_t);
|
2007-07-03 12:16:07 +00:00
|
|
|
int pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *,
|
|
|
|
struct tcphdr *, struct pf_state_peer *);
|
2004-02-26 02:04:28 +00:00
|
|
|
#ifdef INET6
|
|
|
|
void pf_change_a6(struct pf_addr *, u_int16_t *,
|
|
|
|
struct pf_addr *, u_int8_t);
|
|
|
|
#endif /* INET6 */
|
|
|
|
void pf_change_icmp(struct pf_addr *, u_int16_t *,
|
|
|
|
struct pf_addr *, struct pf_addr *, u_int16_t,
|
|
|
|
u_int16_t *, u_int16_t *, u_int16_t *,
|
|
|
|
u_int16_t *, u_int8_t, sa_family_t);
|
2006-09-12 04:25:13 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
void pf_send_tcp(struct mbuf *,
|
|
|
|
const struct pf_rule *, sa_family_t,
|
|
|
|
#else
|
2004-02-26 02:04:28 +00:00
|
|
|
void pf_send_tcp(const struct pf_rule *, sa_family_t,
|
2006-09-12 04:25:13 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
const struct pf_addr *, const struct pf_addr *,
|
|
|
|
u_int16_t, u_int16_t, u_int32_t, u_int32_t,
|
2005-05-03 16:43:32 +00:00
|
|
|
u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
|
2007-07-03 12:16:07 +00:00
|
|
|
u_int16_t, struct ether_header *, struct ifnet *);
|
2011-06-28 11:57:25 +00:00
|
|
|
static void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
|
2004-02-26 02:04:28 +00:00
|
|
|
sa_family_t, struct pf_rule *);
|
2011-06-28 11:57:25 +00:00
|
|
|
void pf_detach_state(struct pf_state *);
|
|
|
|
void pf_state_key_detach(struct pf_state *, int);
|
|
|
|
u_int32_t pf_tcp_iss(struct pf_pdesc *);
|
|
|
|
int pf_test_rule(struct pf_rule **, struct pf_state **,
|
2004-06-16 23:24:02 +00:00
|
|
|
int, struct pfi_kif *, struct mbuf *, int,
|
2004-02-26 02:04:28 +00:00
|
|
|
void *, struct pf_pdesc *, struct pf_rule **,
|
2004-09-29 04:54:33 +00:00
|
|
|
#ifdef __FreeBSD__
|
2005-05-03 16:43:32 +00:00
|
|
|
struct pf_ruleset **, struct ifqueue *,
|
|
|
|
struct inpcb *);
|
2004-09-29 04:54:33 +00:00
|
|
|
#else
|
2005-05-03 16:43:32 +00:00
|
|
|
struct pf_ruleset **, struct ifqueue *);
|
2004-09-29 04:54:33 +00:00
|
|
|
#endif
|
2011-06-28 11:57:25 +00:00
|
|
|
static __inline int pf_create_state(struct pf_rule *, struct pf_rule *,
|
|
|
|
struct pf_rule *, struct pf_pdesc *,
|
|
|
|
struct pf_src_node *, struct pf_state_key *,
|
|
|
|
struct pf_state_key *, struct pf_state_key *,
|
|
|
|
struct pf_state_key *, struct mbuf *, int,
|
|
|
|
u_int16_t, u_int16_t, int *, struct pfi_kif *,
|
|
|
|
struct pf_state **, int, u_int16_t, u_int16_t,
|
|
|
|
int);
|
2004-02-26 02:04:28 +00:00
|
|
|
int pf_test_fragment(struct pf_rule **, int,
|
2004-06-16 23:24:02 +00:00
|
|
|
struct pfi_kif *, struct mbuf *, void *,
|
2004-02-26 02:04:28 +00:00
|
|
|
struct pf_pdesc *, struct pf_rule **,
|
|
|
|
struct pf_ruleset **);
|
2009-12-24 00:43:44 +00:00
|
|
|
int pf_tcp_track_full(struct pf_state_peer *,
|
|
|
|
struct pf_state_peer *, struct pf_state **,
|
|
|
|
struct pfi_kif *, struct mbuf *, int,
|
|
|
|
struct pf_pdesc *, u_short *, int *);
|
2011-06-28 11:57:25 +00:00
|
|
|
int pf_tcp_track_sloppy(struct pf_state_peer *,
|
2009-12-24 00:43:44 +00:00
|
|
|
struct pf_state_peer *, struct pf_state **,
|
|
|
|
struct pf_pdesc *, u_short *);
|
2004-02-26 02:04:28 +00:00
|
|
|
int pf_test_state_tcp(struct pf_state **, int,
|
2004-06-16 23:24:02 +00:00
|
|
|
struct pfi_kif *, struct mbuf *, int,
|
2004-02-26 02:04:28 +00:00
|
|
|
void *, struct pf_pdesc *, u_short *);
|
|
|
|
int pf_test_state_udp(struct pf_state **, int,
|
2004-06-16 23:24:02 +00:00
|
|
|
struct pfi_kif *, struct mbuf *, int,
|
2004-02-26 02:04:28 +00:00
|
|
|
void *, struct pf_pdesc *);
|
|
|
|
int pf_test_state_icmp(struct pf_state **, int,
|
2004-06-16 23:24:02 +00:00
|
|
|
struct pfi_kif *, struct mbuf *, int,
|
2005-05-03 16:43:32 +00:00
|
|
|
void *, struct pf_pdesc *, u_short *);
|
2004-02-26 02:04:28 +00:00
|
|
|
int pf_test_state_other(struct pf_state **, int,
|
2011-06-28 11:57:25 +00:00
|
|
|
struct pfi_kif *, struct mbuf *, struct pf_pdesc *);
|
2004-02-26 02:04:28 +00:00
|
|
|
void pf_route(struct mbuf **, struct pf_rule *, int,
|
2007-07-03 12:16:07 +00:00
|
|
|
struct ifnet *, struct pf_state *,
|
|
|
|
struct pf_pdesc *);
|
2004-02-26 02:04:28 +00:00
|
|
|
void pf_route6(struct mbuf **, struct pf_rule *, int,
|
2007-07-03 12:16:07 +00:00
|
|
|
struct ifnet *, struct pf_state *,
|
|
|
|
struct pf_pdesc *);
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifndef __FreeBSD__
|
2007-07-03 12:16:07 +00:00
|
|
|
int pf_socket_lookup(int, struct pf_pdesc *);
|
2004-09-29 04:54:33 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t,
|
|
|
|
sa_family_t);
|
|
|
|
u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t,
|
|
|
|
sa_family_t);
|
|
|
|
u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t,
|
|
|
|
u_int16_t);
|
|
|
|
void pf_set_rt_ifp(struct pf_state *,
|
|
|
|
struct pf_addr *);
|
|
|
|
int pf_check_proto_cksum(struct mbuf *, int, int,
|
|
|
|
u_int8_t, sa_family_t);
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifndef __FreeBSD__
|
|
|
|
struct pf_divert *pf_get_divert(struct mbuf *);
|
|
|
|
#endif
|
|
|
|
void pf_print_state_parts(struct pf_state *,
|
|
|
|
struct pf_state_key *, struct pf_state_key *);
|
2004-02-26 02:04:28 +00:00
|
|
|
int pf_addr_wrap_neq(struct pf_addr_wrap *,
|
|
|
|
struct pf_addr_wrap *);
|
2011-06-28 11:57:25 +00:00
|
|
|
int pf_compare_state_keys(struct pf_state_key *,
|
|
|
|
struct pf_state_key *, struct pfi_kif *, u_int);
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
struct pf_state *pf_find_state(struct pfi_kif *,
|
|
|
|
struct pf_state_key_cmp *, u_int, struct mbuf *,
|
|
|
|
struct pf_mtag *);
|
|
|
|
#else
|
|
|
|
struct pf_state *pf_find_state(struct pfi_kif *,
|
|
|
|
struct pf_state_key_cmp *, u_int, struct mbuf *);
|
|
|
|
#endif
|
2005-05-03 16:43:32 +00:00
|
|
|
int pf_src_connlimit(struct pf_state **);
|
|
|
|
int pf_check_congestion(struct ifqueue *);
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2004-02-26 02:34:12 +00:00
|
|
|
int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
VNET_DECLARE(int, pf_end_threads);
|
2005-05-03 16:43:32 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
VNET_DEFINE(struct pf_pool_limit, pf_pool_limits[PF_LIMIT_MAX]);
|
2004-02-26 02:34:12 +00:00
|
|
|
#else
|
2007-07-03 12:16:07 +00:00
|
|
|
extern struct pool pfr_ktable_pl;
|
|
|
|
extern struct pool pfr_kentry_pl;
|
|
|
|
|
2004-06-16 23:24:02 +00:00
|
|
|
struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = {
|
|
|
|
{ &pf_state_pl, PFSTATE_HIWAT },
|
|
|
|
{ &pf_src_tree_pl, PFSNODE_HIWAT },
|
2007-07-03 12:16:07 +00:00
|
|
|
{ &pf_frent_pl, PFFRAG_FRENT_HIWAT },
|
|
|
|
{ &pfr_ktable_pl, PFR_KTABLE_HIWAT },
|
|
|
|
{ &pfr_kentry_pl, PFR_KENTRY_HIWAT }
|
2004-06-16 23:24:02 +00:00
|
|
|
};
|
2004-02-26 02:34:12 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
#define PPACKET_LOOPED() \
|
|
|
|
(pd->pf_mtag->flags & PF_PACKET_LOOPED)
|
|
|
|
|
|
|
|
#define PACKET_LOOPED() \
|
|
|
|
(pd.pf_mtag->flags & PF_PACKET_LOOPED)
|
|
|
|
|
|
|
|
#define STATE_LOOKUP(i, k, d, s, m, pt) \
|
2004-02-26 02:04:28 +00:00
|
|
|
do { \
|
2011-06-28 11:57:25 +00:00
|
|
|
s = pf_find_state(i, k, d, m, pt); \
|
|
|
|
if (s == NULL || (s)->timeout == PFTM_PURGE) \
|
2004-02-26 02:04:28 +00:00
|
|
|
return (PF_DROP); \
|
2011-06-28 11:57:25 +00:00
|
|
|
if (PPACKET_LOOPED()) \
|
|
|
|
return (PF_PASS); \
|
|
|
|
if (d == PF_OUT && \
|
|
|
|
(((s)->rule.ptr->rt == PF_ROUTETO && \
|
|
|
|
(s)->rule.ptr->direction == PF_OUT) || \
|
|
|
|
((s)->rule.ptr->rt == PF_REPLYTO && \
|
|
|
|
(s)->rule.ptr->direction == PF_IN)) && \
|
|
|
|
(s)->rt_kif != NULL && \
|
|
|
|
(s)->rt_kif != i) \
|
2004-02-26 02:04:28 +00:00
|
|
|
return (PF_PASS); \
|
|
|
|
} while (0)
|
2011-06-28 11:57:25 +00:00
|
|
|
#else
|
|
|
|
#define STATE_LOOKUP(i, k, d, s, m) \
|
|
|
|
do { \
|
|
|
|
s = pf_find_state(i, k, d, m); \
|
|
|
|
if (s == NULL || (s)->timeout == PFTM_PURGE) \
|
|
|
|
return (PF_DROP); \
|
|
|
|
if (d == PF_OUT && \
|
|
|
|
(((s)->rule.ptr->rt == PF_ROUTETO && \
|
|
|
|
(s)->rule.ptr->direction == PF_OUT) || \
|
|
|
|
((s)->rule.ptr->rt == PF_REPLYTO && \
|
|
|
|
(s)->rule.ptr->direction == PF_IN)) && \
|
|
|
|
(s)->rt_kif != NULL && \
|
|
|
|
(s)->rt_kif != i) \
|
|
|
|
return (PF_PASS); \
|
|
|
|
} while (0)
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
#define BOUND_IFACE(r, k) \
|
|
|
|
((r)->rule_flag & PFRULE_IFBOUND) ? (k) : V_pfi_all
|
|
|
|
#else
|
|
|
|
#define BOUND_IFACE(r, k) \
|
2007-07-03 12:16:07 +00:00
|
|
|
((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2004-06-16 23:24:02 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
#define STATE_INC_COUNTERS(s) \
|
2005-05-03 16:43:32 +00:00
|
|
|
do { \
|
2011-06-28 11:57:25 +00:00
|
|
|
s->rule.ptr->states_cur++; \
|
|
|
|
s->rule.ptr->states_tot++; \
|
|
|
|
if (s->anchor.ptr != NULL) { \
|
|
|
|
s->anchor.ptr->states_cur++; \
|
|
|
|
s->anchor.ptr->states_tot++; \
|
|
|
|
} \
|
|
|
|
if (s->nat_rule.ptr != NULL) { \
|
|
|
|
s->nat_rule.ptr->states_cur++; \
|
|
|
|
s->nat_rule.ptr->states_tot++; \
|
|
|
|
} \
|
2005-05-03 16:43:32 +00:00
|
|
|
} while (0)
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
#define STATE_DEC_COUNTERS(s) \
|
2005-05-03 16:43:32 +00:00
|
|
|
do { \
|
|
|
|
if (s->nat_rule.ptr != NULL) \
|
2011-06-28 11:57:25 +00:00
|
|
|
s->nat_rule.ptr->states_cur--; \
|
2005-05-03 16:43:32 +00:00
|
|
|
if (s->anchor.ptr != NULL) \
|
2011-06-28 11:57:25 +00:00
|
|
|
s->anchor.ptr->states_cur--; \
|
|
|
|
s->rule.ptr->states_cur--; \
|
2005-05-03 16:43:32 +00:00
|
|
|
} while (0)
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
|
|
|
|
static __inline int pf_state_compare_key(struct pf_state_key *,
|
|
|
|
struct pf_state_key *);
|
|
|
|
static __inline int pf_state_compare_id(struct pf_state *,
|
|
|
|
struct pf_state *);
|
|
|
|
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
VNET_DEFINE(struct pf_src_tree, tree_src_tracking);
|
|
|
|
|
|
|
|
VNET_DEFINE(struct pf_state_tree_id, tree_id);
|
|
|
|
VNET_DEFINE(struct pf_state_queue, state_list);
|
|
|
|
#else
|
2004-06-16 23:24:02 +00:00
|
|
|
struct pf_src_tree tree_src_tracking;
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2004-06-16 23:24:02 +00:00
|
|
|
struct pf_state_tree_id tree_id;
|
2007-07-03 12:16:07 +00:00
|
|
|
struct pf_state_queue state_list;
|
|
|
|
#endif
|
2004-06-16 23:24:02 +00:00
|
|
|
|
|
|
|
RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
|
2011-06-28 11:57:25 +00:00
|
|
|
RB_GENERATE(pf_state_tree, pf_state_key, entry, pf_state_compare_key);
|
2004-06-16 23:24:02 +00:00
|
|
|
RB_GENERATE(pf_state_tree_id, pf_state,
|
2011-06-28 11:57:25 +00:00
|
|
|
entry_id, pf_state_compare_id);
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
static __inline int
|
2004-06-16 23:24:02 +00:00
|
|
|
pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
|
|
|
|
{
|
|
|
|
int diff;
|
|
|
|
|
|
|
|
if (a->rule.ptr > b->rule.ptr)
|
|
|
|
return (1);
|
|
|
|
if (a->rule.ptr < b->rule.ptr)
|
|
|
|
return (-1);
|
|
|
|
if ((diff = a->af - b->af) != 0)
|
|
|
|
return (diff);
|
|
|
|
switch (a->af) {
|
|
|
|
#ifdef INET
|
|
|
|
case AF_INET:
|
|
|
|
if (a->addr.addr32[0] > b->addr.addr32[0])
|
|
|
|
return (1);
|
|
|
|
if (a->addr.addr32[0] < b->addr.addr32[0])
|
|
|
|
return (-1);
|
|
|
|
break;
|
|
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
|
|
|
if (a->addr.addr32[3] > b->addr.addr32[3])
|
|
|
|
return (1);
|
|
|
|
if (a->addr.addr32[3] < b->addr.addr32[3])
|
|
|
|
return (-1);
|
|
|
|
if (a->addr.addr32[2] > b->addr.addr32[2])
|
|
|
|
return (1);
|
|
|
|
if (a->addr.addr32[2] < b->addr.addr32[2])
|
|
|
|
return (-1);
|
|
|
|
if (a->addr.addr32[1] > b->addr.addr32[1])
|
|
|
|
return (1);
|
|
|
|
if (a->addr.addr32[1] < b->addr.addr32[1])
|
|
|
|
return (-1);
|
|
|
|
if (a->addr.addr32[0] > b->addr.addr32[0])
|
|
|
|
return (1);
|
|
|
|
if (a->addr.addr32[0] < b->addr.addr32[0])
|
|
|
|
return (-1);
|
|
|
|
break;
|
|
|
|
#endif /* INET6 */
|
|
|
|
}
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2004-02-26 02:04:28 +00:00
|
|
|
#ifdef INET6
|
|
|
|
void
|
|
|
|
pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
|
|
|
|
{
|
|
|
|
switch (af) {
|
|
|
|
#ifdef INET
|
|
|
|
case AF_INET:
|
|
|
|
dst->addr32[0] = src->addr32[0];
|
|
|
|
break;
|
|
|
|
#endif /* INET */
|
|
|
|
case AF_INET6:
|
|
|
|
dst->addr32[0] = src->addr32[0];
|
|
|
|
dst->addr32[1] = src->addr32[1];
|
|
|
|
dst->addr32[2] = src->addr32[2];
|
|
|
|
dst->addr32[3] = src->addr32[3];
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2005-05-03 16:43:32 +00:00
|
|
|
#endif /* INET6 */
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2005-05-03 16:43:32 +00:00
|
|
|
void
|
|
|
|
pf_init_threshold(struct pf_threshold *threshold,
|
|
|
|
u_int32_t limit, u_int32_t seconds)
|
|
|
|
{
|
|
|
|
threshold->limit = limit * PF_THRESHOLD_MULT;
|
|
|
|
threshold->seconds = seconds;
|
|
|
|
threshold->count = 0;
|
|
|
|
threshold->last = time_second;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
pf_add_threshold(struct pf_threshold *threshold)
|
|
|
|
{
|
|
|
|
u_int32_t t = time_second, diff = t - threshold->last;
|
|
|
|
|
|
|
|
if (diff >= threshold->seconds)
|
|
|
|
threshold->count = 0;
|
|
|
|
else
|
|
|
|
threshold->count -= threshold->count * diff /
|
|
|
|
threshold->seconds;
|
|
|
|
threshold->count += PF_THRESHOLD_MULT;
|
|
|
|
threshold->last = t;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
pf_check_threshold(struct pf_threshold *threshold)
|
|
|
|
{
|
|
|
|
return (threshold->count > threshold->limit);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
pf_src_connlimit(struct pf_state **state)
|
|
|
|
{
|
|
|
|
int bad = 0;
|
|
|
|
|
|
|
|
(*state)->src_node->conn++;
|
2007-07-03 12:16:07 +00:00
|
|
|
(*state)->src.tcp_est = 1;
|
2005-05-03 16:43:32 +00:00
|
|
|
pf_add_threshold(&(*state)->src_node->conn_rate);
|
|
|
|
|
|
|
|
if ((*state)->rule.ptr->max_src_conn &&
|
|
|
|
(*state)->rule.ptr->max_src_conn <
|
|
|
|
(*state)->src_node->conn) {
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
V_pf_status.lcounters[LCNT_SRCCONN]++;
|
|
|
|
#else
|
2005-05-03 16:43:32 +00:00
|
|
|
pf_status.lcounters[LCNT_SRCCONN]++;
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2005-05-03 16:43:32 +00:00
|
|
|
bad++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((*state)->rule.ptr->max_src_conn_rate.limit &&
|
|
|
|
pf_check_threshold(&(*state)->src_node->conn_rate)) {
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
V_pf_status.lcounters[LCNT_SRCCONNRATE]++;
|
|
|
|
#else
|
2005-05-03 16:43:32 +00:00
|
|
|
pf_status.lcounters[LCNT_SRCCONNRATE]++;
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2005-05-03 16:43:32 +00:00
|
|
|
bad++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!bad)
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
if ((*state)->rule.ptr->overload_tbl) {
|
|
|
|
struct pfr_addr p;
|
|
|
|
u_int32_t killed = 0;
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
V_pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
|
|
|
|
if (V_pf_status.debug >= PF_DEBUG_MISC) {
|
|
|
|
#else
|
2005-05-03 16:43:32 +00:00
|
|
|
pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
|
|
|
|
if (pf_status.debug >= PF_DEBUG_MISC) {
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2005-05-03 16:43:32 +00:00
|
|
|
printf("pf_src_connlimit: blocking address ");
|
|
|
|
pf_print_host(&(*state)->src_node->addr, 0,
|
2011-06-28 11:57:25 +00:00
|
|
|
(*state)->key[PF_SK_WIRE]->af);
|
2005-05-03 16:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bzero(&p, sizeof(p));
|
2011-06-28 11:57:25 +00:00
|
|
|
p.pfra_af = (*state)->key[PF_SK_WIRE]->af;
|
|
|
|
switch ((*state)->key[PF_SK_WIRE]->af) {
|
2005-05-03 16:43:32 +00:00
|
|
|
#ifdef INET
|
|
|
|
case AF_INET:
|
|
|
|
p.pfra_net = 32;
|
|
|
|
p.pfra_ip4addr = (*state)->src_node->addr.v4;
|
|
|
|
break;
|
|
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
|
|
|
p.pfra_net = 128;
|
|
|
|
p.pfra_ip6addr = (*state)->src_node->addr.v6;
|
|
|
|
break;
|
|
|
|
#endif /* INET6 */
|
|
|
|
}
|
|
|
|
|
|
|
|
pfr_insert_kentry((*state)->rule.ptr->overload_tbl,
|
|
|
|
&p, time_second);
|
|
|
|
|
|
|
|
/* kill existing states if that's required. */
|
|
|
|
if ((*state)->rule.ptr->flush) {
|
2011-06-28 11:57:25 +00:00
|
|
|
struct pf_state_key *sk;
|
|
|
|
struct pf_state *st;
|
2005-05-03 16:43:32 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
V_pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
|
|
|
|
RB_FOREACH(st, pf_state_tree_id, &V_tree_id) {
|
|
|
|
#else
|
|
|
|
pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
|
|
|
|
RB_FOREACH(st, pf_state_tree_id, &tree_id) {
|
|
|
|
#endif
|
|
|
|
sk = st->key[PF_SK_WIRE];
|
2005-05-03 16:43:32 +00:00
|
|
|
/*
|
|
|
|
* Kill states from this source. (Only those
|
|
|
|
* from the same rule if PF_FLUSH_GLOBAL is not
|
|
|
|
* set)
|
|
|
|
*/
|
2011-06-28 11:57:25 +00:00
|
|
|
if (sk->af ==
|
|
|
|
(*state)->key[PF_SK_WIRE]->af &&
|
2005-05-03 16:43:32 +00:00
|
|
|
(((*state)->direction == PF_OUT &&
|
|
|
|
PF_AEQ(&(*state)->src_node->addr,
|
2011-06-28 11:57:25 +00:00
|
|
|
&sk->addr[0], sk->af)) ||
|
2005-05-03 16:43:32 +00:00
|
|
|
((*state)->direction == PF_IN &&
|
|
|
|
PF_AEQ(&(*state)->src_node->addr,
|
2011-06-28 11:57:25 +00:00
|
|
|
&sk->addr[1], sk->af))) &&
|
2005-05-03 16:43:32 +00:00
|
|
|
((*state)->rule.ptr->flush &
|
|
|
|
PF_FLUSH_GLOBAL ||
|
2011-06-28 11:57:25 +00:00
|
|
|
(*state)->rule.ptr == st->rule.ptr)) {
|
|
|
|
st->timeout = PFTM_PURGE;
|
|
|
|
st->src.state = st->dst.state =
|
2005-05-03 16:43:32 +00:00
|
|
|
TCPS_CLOSED;
|
|
|
|
killed++;
|
|
|
|
}
|
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (V_pf_status.debug >= PF_DEBUG_MISC)
|
|
|
|
#else
|
2005-05-03 16:43:32 +00:00
|
|
|
if (pf_status.debug >= PF_DEBUG_MISC)
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2005-05-03 16:43:32 +00:00
|
|
|
printf(", %u states killed", killed);
|
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (V_pf_status.debug >= PF_DEBUG_MISC)
|
|
|
|
#else
|
2005-05-03 16:43:32 +00:00
|
|
|
if (pf_status.debug >= PF_DEBUG_MISC)
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2005-05-03 16:43:32 +00:00
|
|
|
printf("\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
/* kill this state */
|
|
|
|
(*state)->timeout = PFTM_PURGE;
|
|
|
|
(*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
|
2004-06-16 23:24:02 +00:00
|
|
|
int
|
|
|
|
pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
|
|
|
|
struct pf_addr *src, sa_family_t af)
|
|
|
|
{
|
|
|
|
struct pf_src_node k;
|
|
|
|
|
|
|
|
if (*sn == NULL) {
|
|
|
|
k.af = af;
|
|
|
|
PF_ACPY(&k.addr, src, af);
|
|
|
|
if (rule->rule_flag & PFRULE_RULESRCTRACK ||
|
|
|
|
rule->rpool.opts & PF_POOL_STICKYADDR)
|
|
|
|
k.rule.ptr = rule;
|
|
|
|
else
|
|
|
|
k.rule.ptr = NULL;
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
V_pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
|
|
|
|
*sn = RB_FIND(pf_src_tree, &V_tree_src_tracking, &k);
|
|
|
|
#else
|
2004-06-16 23:24:02 +00:00
|
|
|
pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
|
|
|
|
*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2004-06-16 23:24:02 +00:00
|
|
|
}
|
|
|
|
if (*sn == NULL) {
|
|
|
|
if (!rule->max_src_nodes ||
|
|
|
|
rule->src_nodes < rule->max_src_nodes)
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
(*sn) = pool_get(&V_pf_src_tree_pl, PR_NOWAIT | PR_ZERO);
|
|
|
|
#else
|
|
|
|
(*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT | PR_ZERO);
|
|
|
|
#endif
|
2005-05-03 16:43:32 +00:00
|
|
|
else
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
V_pf_status.lcounters[LCNT_SRCNODES]++;
|
|
|
|
#else
|
2005-05-03 16:43:32 +00:00
|
|
|
pf_status.lcounters[LCNT_SRCNODES]++;
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2004-06-16 23:24:02 +00:00
|
|
|
if ((*sn) == NULL)
|
|
|
|
return (-1);
|
2005-05-03 16:43:32 +00:00
|
|
|
|
|
|
|
pf_init_threshold(&(*sn)->conn_rate,
|
|
|
|
rule->max_src_conn_rate.limit,
|
|
|
|
rule->max_src_conn_rate.seconds);
|
|
|
|
|
2004-06-16 23:24:02 +00:00
|
|
|
(*sn)->af = af;
|
|
|
|
if (rule->rule_flag & PFRULE_RULESRCTRACK ||
|
|
|
|
rule->rpool.opts & PF_POOL_STICKYADDR)
|
|
|
|
(*sn)->rule.ptr = rule;
|
|
|
|
else
|
|
|
|
(*sn)->rule.ptr = NULL;
|
|
|
|
PF_ACPY(&(*sn)->addr, src, af);
|
|
|
|
if (RB_INSERT(pf_src_tree,
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
&V_tree_src_tracking, *sn) != NULL) {
|
|
|
|
if (V_pf_status.debug >= PF_DEBUG_MISC) {
|
|
|
|
#else
|
2004-06-16 23:24:02 +00:00
|
|
|
&tree_src_tracking, *sn) != NULL) {
|
|
|
|
if (pf_status.debug >= PF_DEBUG_MISC) {
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2004-06-16 23:24:02 +00:00
|
|
|
printf("pf: src_tree insert failed: ");
|
|
|
|
pf_print_host(&(*sn)->addr, 0, af);
|
|
|
|
printf("\n");
|
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
pool_put(&V_pf_src_tree_pl, *sn);
|
|
|
|
#else
|
2004-06-16 23:24:02 +00:00
|
|
|
pool_put(&pf_src_tree_pl, *sn);
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2004-06-16 23:24:02 +00:00
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
(*sn)->creation = time_second;
|
|
|
|
(*sn)->ruletype = rule->action;
|
|
|
|
if ((*sn)->rule.ptr != NULL)
|
|
|
|
(*sn)->rule.ptr->src_nodes++;
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
V_pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
|
|
|
|
V_pf_status.src_nodes++;
|
|
|
|
#else
|
2004-06-16 23:24:02 +00:00
|
|
|
pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
|
|
|
|
pf_status.src_nodes++;
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2004-06-16 23:24:02 +00:00
|
|
|
} else {
|
|
|
|
if (rule->max_src_states &&
|
2005-05-03 16:43:32 +00:00
|
|
|
(*sn)->states >= rule->max_src_states) {
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
V_pf_status.lcounters[LCNT_SRCSTATES]++;
|
|
|
|
#else
|
2005-05-03 16:43:32 +00:00
|
|
|
pf_status.lcounters[LCNT_SRCSTATES]++;
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2004-06-16 23:24:02 +00:00
|
|
|
return (-1);
|
2005-05-03 16:43:32 +00:00
|
|
|
}
|
2004-06-16 23:24:02 +00:00
|
|
|
}
|
|
|
|
return (0);
|
|
|
|
}
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
/* state table stuff */
|
|
|
|
|
|
|
|
static __inline int
|
|
|
|
pf_state_compare_key(struct pf_state_key *a, struct pf_state_key *b)
|
2004-06-16 23:24:02 +00:00
|
|
|
{
|
2011-06-28 11:57:25 +00:00
|
|
|
int diff;
|
|
|
|
|
|
|
|
if ((diff = a->proto - b->proto) != 0)
|
|
|
|
return (diff);
|
|
|
|
if ((diff = a->af - b->af) != 0)
|
|
|
|
return (diff);
|
|
|
|
switch (a->af) {
|
|
|
|
#ifdef INET
|
|
|
|
case AF_INET:
|
|
|
|
if (a->addr[0].addr32[0] > b->addr[0].addr32[0])
|
|
|
|
return (1);
|
|
|
|
if (a->addr[0].addr32[0] < b->addr[0].addr32[0])
|
|
|
|
return (-1);
|
|
|
|
if (a->addr[1].addr32[0] > b->addr[1].addr32[0])
|
|
|
|
return (1);
|
|
|
|
if (a->addr[1].addr32[0] < b->addr[1].addr32[0])
|
|
|
|
return (-1);
|
|
|
|
break;
|
|
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
|
|
|
if (a->addr[0].addr32[3] > b->addr[0].addr32[3])
|
|
|
|
return (1);
|
|
|
|
if (a->addr[0].addr32[3] < b->addr[0].addr32[3])
|
|
|
|
return (-1);
|
|
|
|
if (a->addr[1].addr32[3] > b->addr[1].addr32[3])
|
|
|
|
return (1);
|
|
|
|
if (a->addr[1].addr32[3] < b->addr[1].addr32[3])
|
|
|
|
return (-1);
|
|
|
|
if (a->addr[0].addr32[2] > b->addr[0].addr32[2])
|
|
|
|
return (1);
|
|
|
|
if (a->addr[0].addr32[2] < b->addr[0].addr32[2])
|
|
|
|
return (-1);
|
|
|
|
if (a->addr[1].addr32[2] > b->addr[1].addr32[2])
|
|
|
|
return (1);
|
|
|
|
if (a->addr[1].addr32[2] < b->addr[1].addr32[2])
|
|
|
|
return (-1);
|
|
|
|
if (a->addr[0].addr32[1] > b->addr[0].addr32[1])
|
|
|
|
return (1);
|
|
|
|
if (a->addr[0].addr32[1] < b->addr[0].addr32[1])
|
|
|
|
return (-1);
|
|
|
|
if (a->addr[1].addr32[1] > b->addr[1].addr32[1])
|
|
|
|
return (1);
|
|
|
|
if (a->addr[1].addr32[1] < b->addr[1].addr32[1])
|
|
|
|
return (-1);
|
|
|
|
if (a->addr[0].addr32[0] > b->addr[0].addr32[0])
|
|
|
|
return (1);
|
|
|
|
if (a->addr[0].addr32[0] < b->addr[0].addr32[0])
|
|
|
|
return (-1);
|
|
|
|
if (a->addr[1].addr32[0] > b->addr[1].addr32[0])
|
|
|
|
return (1);
|
|
|
|
if (a->addr[1].addr32[0] < b->addr[1].addr32[0])
|
|
|
|
return (-1);
|
|
|
|
break;
|
|
|
|
#endif /* INET6 */
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
if ((diff = a->port[0] - b->port[0]) != 0)
|
|
|
|
return (diff);
|
|
|
|
if ((diff = a->port[1] - b->port[1]) != 0)
|
|
|
|
return (diff);
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline int
|
|
|
|
pf_state_compare_id(struct pf_state *a, struct pf_state *b)
|
|
|
|
{
|
|
|
|
if (a->id > b->id)
|
|
|
|
return (1);
|
|
|
|
if (a->id < b->id)
|
|
|
|
return (-1);
|
|
|
|
if (a->creatorid > b->creatorid)
|
|
|
|
return (1);
|
|
|
|
if (a->creatorid < b->creatorid)
|
2004-06-16 23:24:02 +00:00
|
|
|
return (-1);
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx)
|
|
|
|
{
|
|
|
|
struct pf_state_item *si;
|
|
|
|
struct pf_state_key *cur;
|
|
|
|
struct pf_state *olds = NULL;
|
|
|
|
|
2004-06-16 23:24:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2011-06-28 11:57:25 +00:00
|
|
|
KASSERT(s->key[idx] == NULL, ("%s: key is null!", __FUNCTION__));
|
2004-06-16 23:24:02 +00:00
|
|
|
#else
|
2011-06-28 11:57:25 +00:00
|
|
|
KASSERT(s->key[idx] == NULL); /* XXX handle this? */
|
2004-06-16 23:24:02 +00:00
|
|
|
#endif
|
2011-06-28 11:57:25 +00:00
|
|
|
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if ((cur = RB_INSERT(pf_state_tree, &V_pf_statetbl, sk)) != NULL) {
|
|
|
|
#else
|
|
|
|
if ((cur = RB_INSERT(pf_state_tree, &pf_statetbl, sk)) != NULL) {
|
|
|
|
#endif
|
|
|
|
/* key exists. check for same kif, if none, add to key */
|
|
|
|
TAILQ_FOREACH(si, &cur->states, entry)
|
|
|
|
if (si->s->kif == s->kif &&
|
|
|
|
si->s->direction == s->direction) {
|
|
|
|
if (sk->proto == IPPROTO_TCP &&
|
|
|
|
si->s->src.state >= TCPS_FIN_WAIT_2 &&
|
|
|
|
si->s->dst.state >= TCPS_FIN_WAIT_2) {
|
|
|
|
si->s->src.state = si->s->dst.state =
|
|
|
|
TCPS_CLOSED;
|
|
|
|
/* unlink late or sks can go away */
|
|
|
|
olds = si->s;
|
|
|
|
} else {
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (V_pf_status.debug >= PF_DEBUG_MISC) {
|
|
|
|
#else
|
|
|
|
if (pf_status.debug >= PF_DEBUG_MISC) {
|
|
|
|
#endif
|
|
|
|
printf("pf: %s key attach "
|
|
|
|
"failed on %s: ",
|
|
|
|
(idx == PF_SK_WIRE) ?
|
|
|
|
"wire" : "stack",
|
|
|
|
s->kif->pfik_name);
|
|
|
|
pf_print_state_parts(s,
|
|
|
|
(idx == PF_SK_WIRE) ?
|
|
|
|
sk : NULL,
|
|
|
|
(idx == PF_SK_STACK) ?
|
|
|
|
sk : NULL);
|
|
|
|
printf(", existing: ");
|
|
|
|
pf_print_state_parts(si->s,
|
|
|
|
(idx == PF_SK_WIRE) ?
|
|
|
|
sk : NULL,
|
|
|
|
(idx == PF_SK_STACK) ?
|
|
|
|
sk : NULL);
|
|
|
|
printf("\n");
|
|
|
|
}
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
pool_put(&V_pf_state_key_pl, sk);
|
|
|
|
#else
|
|
|
|
pool_put(&pf_state_key_pl, sk);
|
|
|
|
#endif
|
|
|
|
return (-1); /* collision! */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
pool_put(&V_pf_state_key_pl, sk);
|
|
|
|
#else
|
|
|
|
pool_put(&pf_state_key_pl, sk);
|
|
|
|
#endif
|
|
|
|
s->key[idx] = cur;
|
|
|
|
} else
|
|
|
|
s->key[idx] = sk;
|
|
|
|
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if ((si = pool_get(&V_pf_state_item_pl, PR_NOWAIT)) == NULL) {
|
|
|
|
#else
|
|
|
|
if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) {
|
|
|
|
#endif
|
|
|
|
pf_state_key_detach(s, idx);
|
2004-02-26 02:04:28 +00:00
|
|
|
return (-1);
|
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
si->s = s;
|
|
|
|
|
|
|
|
/* list is sorted, if-bound states before floating */
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (s->kif == V_pfi_all)
|
|
|
|
#else
|
|
|
|
if (s->kif == pfi_all)
|
2004-02-26 02:04:28 +00:00
|
|
|
#endif
|
2011-06-28 11:57:25 +00:00
|
|
|
TAILQ_INSERT_TAIL(&s->key[idx]->states, si, entry);
|
|
|
|
else
|
|
|
|
TAILQ_INSERT_HEAD(&s->key[idx]->states, si, entry);
|
|
|
|
|
|
|
|
if (olds)
|
|
|
|
pf_unlink_state(olds);
|
|
|
|
|
2004-02-26 02:04:28 +00:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2011-06-28 11:57:25 +00:00
|
|
|
pf_detach_state(struct pf_state *s)
|
2004-02-26 02:04:28 +00:00
|
|
|
{
|
2011-06-28 11:57:25 +00:00
|
|
|
if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK])
|
|
|
|
s->key[PF_SK_WIRE] = NULL;
|
2007-07-03 12:16:07 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
if (s->key[PF_SK_STACK] != NULL)
|
|
|
|
pf_state_key_detach(s, PF_SK_STACK);
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
if (s->key[PF_SK_WIRE] != NULL)
|
|
|
|
pf_state_key_detach(s, PF_SK_WIRE);
|
|
|
|
}
|
2007-07-03 12:16:07 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
void
|
|
|
|
pf_state_key_detach(struct pf_state *s, int idx)
|
|
|
|
{
|
|
|
|
struct pf_state_item *si;
|
2007-07-03 12:16:07 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
si = TAILQ_FIRST(&s->key[idx]->states);
|
|
|
|
while (si && si->s != s)
|
|
|
|
si = TAILQ_NEXT(si, entry);
|
2007-07-03 12:16:07 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
if (si) {
|
|
|
|
TAILQ_REMOVE(&s->key[idx]->states, si, entry);
|
2009-08-19 00:10:10 +00:00
|
|
|
#ifdef __FreeBSD__
|
2011-06-28 11:57:25 +00:00
|
|
|
pool_put(&V_pf_state_item_pl, si);
|
2009-08-19 00:10:10 +00:00
|
|
|
#else
|
2011-06-28 11:57:25 +00:00
|
|
|
pool_put(&pf_state_item_pl, si);
|
2009-08-19 00:10:10 +00:00
|
|
|
#endif
|
2011-06-28 11:57:25 +00:00
|
|
|
}
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
if (TAILQ_EMPTY(&s->key[idx]->states)) {
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2011-06-28 11:57:25 +00:00
|
|
|
RB_REMOVE(pf_state_tree, &V_pf_statetbl, s->key[idx]);
|
|
|
|
#else
|
|
|
|
RB_REMOVE(pf_state_tree, &pf_statetbl, s->key[idx]);
|
|
|
|
#endif
|
|
|
|
if (s->key[idx]->reverse)
|
|
|
|
s->key[idx]->reverse->reverse = NULL;
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
/* XXX: implement this */
|
|
|
|
#else
|
|
|
|
if (s->key[idx]->inp)
|
|
|
|
s->key[idx]->inp->inp_pf_sk = NULL;
|
|
|
|
#endif
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
pool_put(&V_pf_state_key_pl, s->key[idx]);
|
|
|
|
#else
|
|
|
|
pool_put(&pf_state_key_pl, s->key[idx]);
|
2004-02-26 02:34:12 +00:00
|
|
|
#endif
|
2007-07-03 12:16:07 +00:00
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
s->key[idx] = NULL;
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
struct pf_state_key *
|
|
|
|
pf_alloc_state_key(int pool_flags)
|
2004-02-26 02:04:28 +00:00
|
|
|
{
|
2011-06-28 11:57:25 +00:00
|
|
|
struct pf_state_key *sk;
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if ((sk = pool_get(&V_pf_state_key_pl, pool_flags)) == NULL)
|
2004-02-26 02:34:12 +00:00
|
|
|
#else
|
2011-06-28 11:57:25 +00:00
|
|
|
if ((sk = pool_get(&pf_state_key_pl, pool_flags)) == NULL)
|
2004-02-26 02:34:12 +00:00
|
|
|
#endif
|
2011-06-28 11:57:25 +00:00
|
|
|
return (NULL);
|
|
|
|
TAILQ_INIT(&sk->states);
|
|
|
|
|
|
|
|
return (sk);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
pf_state_key_setup(struct pf_pdesc *pd, struct pf_rule *nr,
|
|
|
|
struct pf_state_key **skw, struct pf_state_key **sks,
|
|
|
|
struct pf_state_key **skp, struct pf_state_key **nkp,
|
|
|
|
struct pf_addr *saddr, struct pf_addr *daddr,
|
|
|
|
u_int16_t sport, u_int16_t dport)
|
|
|
|
{
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
KASSERT((*skp == NULL && *nkp == NULL),
|
|
|
|
("%s: skp == NULL && nkp == NULL", __FUNCTION__));
|
|
|
|
#else
|
|
|
|
KASSERT((*skp == NULL && *nkp == NULL));
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if ((*skp = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL)
|
|
|
|
return (ENOMEM);
|
|
|
|
|
|
|
|
PF_ACPY(&(*skp)->addr[pd->sidx], saddr, pd->af);
|
|
|
|
PF_ACPY(&(*skp)->addr[pd->didx], daddr, pd->af);
|
|
|
|
(*skp)->port[pd->sidx] = sport;
|
|
|
|
(*skp)->port[pd->didx] = dport;
|
|
|
|
(*skp)->proto = pd->proto;
|
|
|
|
(*skp)->af = pd->af;
|
|
|
|
|
|
|
|
if (nr != NULL) {
|
|
|
|
if ((*nkp = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL)
|
|
|
|
return (ENOMEM); /* caller must handle cleanup */
|
|
|
|
|
|
|
|
/* XXX maybe just bcopy and TAILQ_INIT(&(*nkp)->states) */
|
|
|
|
PF_ACPY(&(*nkp)->addr[0], &(*skp)->addr[0], pd->af);
|
|
|
|
PF_ACPY(&(*nkp)->addr[1], &(*skp)->addr[1], pd->af);
|
|
|
|
(*nkp)->port[0] = (*skp)->port[0];
|
|
|
|
(*nkp)->port[1] = (*skp)->port[1];
|
|
|
|
(*nkp)->proto = pd->proto;
|
|
|
|
(*nkp)->af = pd->af;
|
|
|
|
} else
|
|
|
|
*nkp = *skp;
|
|
|
|
|
|
|
|
if (pd->dir == PF_IN) {
|
|
|
|
*skw = *skp;
|
|
|
|
*sks = *nkp;
|
2004-02-26 02:04:28 +00:00
|
|
|
} else {
|
2011-06-28 11:57:25 +00:00
|
|
|
*sks = *skp;
|
|
|
|
*skw = *nkp;
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
return (0);
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
|
2009-08-19 00:10:10 +00:00
|
|
|
int
|
2011-06-28 11:57:25 +00:00
|
|
|
pf_state_insert(struct pfi_kif *kif, struct pf_state_key *skw,
|
|
|
|
struct pf_state_key *sks, struct pf_state *s)
|
2004-02-26 02:04:28 +00:00
|
|
|
{
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifndef __FreeBSD__
|
|
|
|
splassert(IPL_SOFTNET);
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
s->kif = kif;
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
if (skw == sks) {
|
|
|
|
if (pf_state_key_attach(skw, s, PF_SK_WIRE))
|
|
|
|
return (-1);
|
|
|
|
s->key[PF_SK_STACK] = s->key[PF_SK_WIRE];
|
|
|
|
} else {
|
|
|
|
if (pf_state_key_attach(skw, s, PF_SK_WIRE)) {
|
2007-07-03 12:16:07 +00:00
|
|
|
#ifdef __FreeBSD__
|
2011-06-28 11:57:25 +00:00
|
|
|
pool_put(&V_pf_state_key_pl, sks);
|
2007-07-03 12:16:07 +00:00
|
|
|
#else
|
2011-06-28 11:57:25 +00:00
|
|
|
pool_put(&pf_state_key_pl, sks);
|
2007-07-03 12:16:07 +00:00
|
|
|
#endif
|
2011-06-28 11:57:25 +00:00
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
if (pf_state_key_attach(sks, s, PF_SK_STACK)) {
|
|
|
|
pf_state_key_detach(s, PF_SK_WIRE);
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
}
|
2007-07-03 12:16:07 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
if (s->id == 0 && s->creatorid == 0) {
|
2007-07-03 12:16:07 +00:00
|
|
|
#ifdef __FreeBSD__
|
2011-06-28 11:57:25 +00:00
|
|
|
s->id = htobe64(V_pf_status.stateid++);
|
|
|
|
s->creatorid = V_pf_status.hostid;
|
2007-07-03 12:16:07 +00:00
|
|
|
#else
|
2011-06-28 11:57:25 +00:00
|
|
|
s->id = htobe64(pf_status.stateid++);
|
|
|
|
s->creatorid = pf_status.hostid;
|
2007-07-03 12:16:07 +00:00
|
|
|
#endif
|
2011-06-28 11:57:25 +00:00
|
|
|
}
|
2009-08-19 00:10:10 +00:00
|
|
|
#ifdef __FreeBSD__
|
2011-06-28 11:57:25 +00:00
|
|
|
if (RB_INSERT(pf_state_tree_id, &V_tree_id, s) != NULL) {
|
|
|
|
if (V_pf_status.debug >= PF_DEBUG_MISC) {
|
|
|
|
#else
|
|
|
|
if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) {
|
|
|
|
if (pf_status.debug >= PF_DEBUG_MISC) {
|
|
|
|
#endif
|
|
|
|
printf("pf: state insert failed: "
|
|
|
|
"id: %016llx creatorid: %08x",
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
(unsigned long long)betoh64(s->id), ntohl(s->creatorid));
|
|
|
|
#else
|
|
|
|
betoh64(s->id), ntohl(s->creatorid));
|
|
|
|
#endif
|
|
|
|
printf("\n");
|
|
|
|
}
|
|
|
|
pf_detach_state(s);
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
TAILQ_INSERT_TAIL(&V_state_list, s, entry_list);
|
|
|
|
V_pf_status.fcounters[FCNT_STATE_INSERT]++;
|
|
|
|
V_pf_status.states++;
|
|
|
|
#else
|
|
|
|
TAILQ_INSERT_TAIL(&state_list, s, entry_list);
|
|
|
|
pf_status.fcounters[FCNT_STATE_INSERT]++;
|
|
|
|
pf_status.states++;
|
|
|
|
#endif
|
|
|
|
pfi_kif_ref(kif, PFI_KIF_REF_STATE);
|
|
|
|
#if NPFSYNC > 0
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (pfsync_insert_state_ptr != NULL)
|
|
|
|
pfsync_insert_state_ptr(s);
|
|
|
|
#else
|
|
|
|
pfsync_insert_state(s);
|
2009-08-19 00:10:10 +00:00
|
|
|
#endif
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
|
|
|
return (0);
|
2004-06-16 23:24:02 +00:00
|
|
|
}
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
struct pf_state *
|
|
|
|
pf_find_state_byid(struct pf_state_cmp *key)
|
2004-06-16 23:24:02 +00:00
|
|
|
{
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
V_pf_status.fcounters[FCNT_STATE_SEARCH]++;
|
2004-06-16 23:24:02 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
return (RB_FIND(pf_state_tree_id, &V_tree_id, (struct pf_state *)key));
|
|
|
|
#else
|
|
|
|
pf_status.fcounters[FCNT_STATE_SEARCH]++;
|
|
|
|
|
|
|
|
return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key));
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
/* XXX debug function, intended to be removed one day */
|
|
|
|
int
|
|
|
|
pf_compare_state_keys(struct pf_state_key *a, struct pf_state_key *b,
|
|
|
|
struct pfi_kif *kif, u_int dir)
|
|
|
|
{
|
|
|
|
/* a (from hdr) and b (new) must be exact opposites of each other */
|
|
|
|
if (a->af == b->af && a->proto == b->proto &&
|
|
|
|
PF_AEQ(&a->addr[0], &b->addr[1], a->af) &&
|
|
|
|
PF_AEQ(&a->addr[1], &b->addr[0], a->af) &&
|
|
|
|
a->port[0] == b->port[1] &&
|
|
|
|
a->port[1] == b->port[0])
|
|
|
|
return (0);
|
|
|
|
else {
|
|
|
|
/* mismatch. must not happen. */
|
|
|
|
printf("pf: state key linking mismatch! dir=%s, "
|
|
|
|
"if=%s, stored af=%u, a0: ",
|
|
|
|
dir == PF_OUT ? "OUT" : "IN", kif->pfik_name, a->af);
|
|
|
|
pf_print_host(&a->addr[0], a->port[0], a->af);
|
|
|
|
printf(", a1: ");
|
|
|
|
pf_print_host(&a->addr[1], a->port[1], a->af);
|
|
|
|
printf(", proto=%u", a->proto);
|
|
|
|
printf(", found af=%u, a0: ", b->af);
|
|
|
|
pf_print_host(&b->addr[0], b->port[0], b->af);
|
|
|
|
printf(", a1: ");
|
|
|
|
pf_print_host(&b->addr[1], b->port[1], b->af);
|
|
|
|
printf(", proto=%u", b->proto);
|
|
|
|
printf(".\n");
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
struct pf_state *
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir,
|
|
|
|
struct mbuf *m, struct pf_mtag *pftag)
|
|
|
|
#else
|
|
|
|
pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir,
|
|
|
|
struct mbuf *m)
|
|
|
|
#endif
|
|
|
|
{
|
|
|
|
struct pf_state_key *sk;
|
|
|
|
struct pf_state_item *si;
|
|
|
|
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
V_pf_status.fcounters[FCNT_STATE_SEARCH]++;
|
|
|
|
#else
|
|
|
|
pf_status.fcounters[FCNT_STATE_SEARCH]++;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (dir == PF_OUT && pftag->statekey &&
|
|
|
|
((struct pf_state_key *)pftag->statekey)->reverse)
|
|
|
|
sk = ((struct pf_state_key *)pftag->statekey)->reverse;
|
|
|
|
else {
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if ((sk = RB_FIND(pf_state_tree, &V_pf_statetbl,
|
|
|
|
#else
|
|
|
|
if ((sk = RB_FIND(pf_state_tree, &pf_statetbl,
|
|
|
|
#endif
|
|
|
|
(struct pf_state_key *)key)) == NULL)
|
|
|
|
return (NULL);
|
|
|
|
if (dir == PF_OUT && pftag->statekey &&
|
|
|
|
pf_compare_state_keys(pftag->statekey, sk,
|
|
|
|
kif, dir) == 0) {
|
|
|
|
((struct pf_state_key *)
|
|
|
|
pftag->statekey)->reverse = sk;
|
|
|
|
sk->reverse = pftag->statekey;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
if (dir == PF_OUT && m->m_pkthdr.pf.statekey &&
|
|
|
|
((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse)
|
|
|
|
sk = ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse;
|
|
|
|
else {
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if ((sk = RB_FIND(pf_state_tree, &V_pf_statetbl,
|
|
|
|
#else
|
|
|
|
if ((sk = RB_FIND(pf_state_tree, &pf_statetbl,
|
|
|
|
#endif
|
|
|
|
(struct pf_state_key *)key)) == NULL)
|
|
|
|
return (NULL);
|
|
|
|
if (dir == PF_OUT && m->m_pkthdr.pf.statekey &&
|
|
|
|
pf_compare_state_keys(m->m_pkthdr.pf.statekey, sk,
|
|
|
|
kif, dir) == 0) {
|
|
|
|
((struct pf_state_key *)
|
|
|
|
m->m_pkthdr.pf.statekey)->reverse = sk;
|
|
|
|
sk->reverse = m->m_pkthdr.pf.statekey;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if (dir == PF_OUT)
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
pftag->statekey = NULL;
|
|
|
|
#else
|
|
|
|
m->m_pkthdr.pf.statekey = NULL;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* list is sorted, if-bound states before floating ones */
|
|
|
|
TAILQ_FOREACH(si, &sk->states, entry)
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if ((si->s->kif == V_pfi_all || si->s->kif == kif) &&
|
|
|
|
#else
|
|
|
|
if ((si->s->kif == pfi_all || si->s->kif == kif) &&
|
|
|
|
#endif
|
|
|
|
sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] :
|
|
|
|
si->s->key[PF_SK_STACK]))
|
|
|
|
return (si->s);
|
|
|
|
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct pf_state *
|
|
|
|
pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more)
|
|
|
|
{
|
|
|
|
struct pf_state_key *sk;
|
|
|
|
struct pf_state_item *si, *ret = NULL;
|
|
|
|
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
V_pf_status.fcounters[FCNT_STATE_SEARCH]++;
|
|
|
|
#else
|
|
|
|
pf_status.fcounters[FCNT_STATE_SEARCH]++;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
sk = RB_FIND(pf_state_tree, &V_pf_statetbl, (struct pf_state_key *)key);
|
|
|
|
#else
|
|
|
|
sk = RB_FIND(pf_state_tree, &pf_statetbl, (struct pf_state_key *)key);
|
|
|
|
#endif
|
|
|
|
if (sk != NULL) {
|
|
|
|
TAILQ_FOREACH(si, &sk->states, entry)
|
|
|
|
if (dir == PF_INOUT ||
|
|
|
|
(sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] :
|
|
|
|
si->s->key[PF_SK_STACK]))) {
|
|
|
|
if (more == NULL)
|
|
|
|
return (si->s);
|
|
|
|
|
|
|
|
if (ret)
|
|
|
|
(*more)++;
|
|
|
|
else
|
|
|
|
ret = si;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return (ret ? ret->s : NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* END state table stuff */
|
|
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
pf_purge_thread(void *v)
|
|
|
|
{
|
|
|
|
int nloops = 0, s;
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
int locked;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
CURVNET_SET((struct vnet *)v);
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz);
|
|
|
|
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
sx_slock(&V_pf_consistency_lock);
|
|
|
|
PF_LOCK();
|
|
|
|
locked = 0;
|
|
|
|
|
|
|
|
if (V_pf_end_threads) {
|
|
|
|
PF_UNLOCK();
|
|
|
|
sx_sunlock(&V_pf_consistency_lock);
|
|
|
|
sx_xlock(&V_pf_consistency_lock);
|
|
|
|
PF_LOCK();
|
|
|
|
|
|
|
|
pf_purge_expired_states(V_pf_status.states, 1);
|
|
|
|
pf_purge_expired_fragments();
|
|
|
|
pf_purge_expired_src_nodes(1);
|
|
|
|
V_pf_end_threads++;
|
|
|
|
|
|
|
|
sx_xunlock(&V_pf_consistency_lock);
|
|
|
|
PF_UNLOCK();
|
|
|
|
wakeup(pf_purge_thread);
|
|
|
|
kproc_exit(0);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
s = splsoftnet();
|
|
|
|
|
|
|
|
/* process a fraction of the state table every second */
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (!pf_purge_expired_states(1 + (V_pf_status.states /
|
|
|
|
V_pf_default_rule.timeout[PFTM_INTERVAL]), 0)) {
|
|
|
|
PF_UNLOCK();
|
|
|
|
sx_sunlock(&V_pf_consistency_lock);
|
|
|
|
sx_xlock(&V_pf_consistency_lock);
|
|
|
|
PF_LOCK();
|
|
|
|
locked = 1;
|
|
|
|
|
|
|
|
pf_purge_expired_states(1 + (V_pf_status.states /
|
|
|
|
V_pf_default_rule.timeout[PFTM_INTERVAL]), 1);
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
pf_purge_expired_states(1 + (pf_status.states
|
|
|
|
/ pf_default_rule.timeout[PFTM_INTERVAL]));
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* purge other expired types every PFTM_INTERVAL seconds */
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (++nloops >= V_pf_default_rule.timeout[PFTM_INTERVAL]) {
|
|
|
|
#else
|
|
|
|
if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
|
|
|
|
#endif
|
|
|
|
pf_purge_expired_fragments();
|
|
|
|
pf_purge_expired_src_nodes(0);
|
|
|
|
nloops = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
splx(s);
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
PF_UNLOCK();
|
|
|
|
if (locked)
|
|
|
|
sx_xunlock(&V_pf_consistency_lock);
|
|
|
|
else
|
|
|
|
sx_sunlock(&V_pf_consistency_lock);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
CURVNET_RESTORE();
|
|
|
|
}
|
|
|
|
|
|
|
|
u_int32_t
|
|
|
|
pf_state_expires(const struct pf_state *state)
|
|
|
|
{
|
|
|
|
u_int32_t timeout;
|
|
|
|
u_int32_t start;
|
|
|
|
u_int32_t end;
|
|
|
|
u_int32_t states;
|
|
|
|
|
|
|
|
/* handle all PFTM_* > PFTM_MAX here */
|
|
|
|
if (state->timeout == PFTM_PURGE)
|
|
|
|
return (time_second);
|
|
|
|
if (state->timeout == PFTM_UNTIL_PACKET)
|
|
|
|
return (0);
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
KASSERT(state->timeout != PFTM_UNLINKED,
|
|
|
|
("pf_state_expires: timeout == PFTM_UNLINKED"));
|
|
|
|
KASSERT((state->timeout < PFTM_MAX),
|
|
|
|
("pf_state_expires: timeout > PFTM_MAX"));
|
|
|
|
#else
|
|
|
|
KASSERT(state->timeout != PFTM_UNLINKED);
|
|
|
|
KASSERT(state->timeout < PFTM_MAX);
|
|
|
|
#endif
|
|
|
|
timeout = state->rule.ptr->timeout[state->timeout];
|
|
|
|
if (!timeout)
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
timeout = V_pf_default_rule.timeout[state->timeout];
|
|
|
|
#else
|
|
|
|
timeout = pf_default_rule.timeout[state->timeout];
|
|
|
|
#endif
|
|
|
|
start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
|
|
|
|
if (start) {
|
|
|
|
end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
|
|
|
|
states = state->rule.ptr->states_cur;
|
|
|
|
} else {
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
start = V_pf_default_rule.timeout[PFTM_ADAPTIVE_START];
|
|
|
|
end = V_pf_default_rule.timeout[PFTM_ADAPTIVE_END];
|
|
|
|
states = V_pf_status.states;
|
|
|
|
#else
|
|
|
|
start = pf_default_rule.timeout[PFTM_ADAPTIVE_START];
|
|
|
|
end = pf_default_rule.timeout[PFTM_ADAPTIVE_END];
|
|
|
|
states = pf_status.states;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
if (end && states > start && start < end) {
|
|
|
|
if (states < end)
|
|
|
|
return (state->expire + timeout * (end - states) /
|
|
|
|
(end - start));
|
|
|
|
else
|
|
|
|
return (time_second);
|
|
|
|
}
|
|
|
|
return (state->expire + timeout);
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
int
|
|
|
|
pf_purge_expired_src_nodes(int waslocked)
|
|
|
|
#else
|
|
|
|
void
|
|
|
|
pf_purge_expired_src_nodes(int waslocked)
|
|
|
|
#endif
|
|
|
|
{
|
|
|
|
struct pf_src_node *cur, *next;
|
|
|
|
int locked = waslocked;
|
|
|
|
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
for (cur = RB_MIN(pf_src_tree, &V_tree_src_tracking); cur; cur = next) {
|
|
|
|
next = RB_NEXT(pf_src_tree, &V_tree_src_tracking, cur);
|
|
|
|
#else
|
|
|
|
for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
|
|
|
|
next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if (cur->states <= 0 && cur->expire <= time_second) {
|
|
|
|
if (! locked) {
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (!sx_try_upgrade(&V_pf_consistency_lock))
|
|
|
|
return (0);
|
|
|
|
#else
|
|
|
|
rw_enter_write(&pf_consistency_lock);
|
|
|
|
#endif
|
|
|
|
next = RB_NEXT(pf_src_tree,
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
&V_tree_src_tracking, cur);
|
|
|
|
#else
|
|
|
|
&tree_src_tracking, cur);
|
|
|
|
#endif
|
|
|
|
locked = 1;
|
|
|
|
}
|
|
|
|
if (cur->rule.ptr != NULL) {
|
|
|
|
cur->rule.ptr->src_nodes--;
|
|
|
|
if (cur->rule.ptr->states_cur <= 0 &&
|
|
|
|
cur->rule.ptr->max_src_nodes <= 0)
|
|
|
|
pf_rm_rule(NULL, cur->rule.ptr);
|
|
|
|
}
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
RB_REMOVE(pf_src_tree, &V_tree_src_tracking, cur);
|
|
|
|
V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
|
|
|
|
V_pf_status.src_nodes--;
|
|
|
|
pool_put(&V_pf_src_tree_pl, cur);
|
|
|
|
#else
|
|
|
|
RB_REMOVE(pf_src_tree, &tree_src_tracking, cur);
|
|
|
|
pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
|
|
|
|
pf_status.src_nodes--;
|
|
|
|
pool_put(&pf_src_tree_pl, cur);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (locked && !waslocked)
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
{
|
|
|
|
sx_downgrade(&V_pf_consistency_lock);
|
|
|
|
}
|
|
|
|
return (1);
|
|
|
|
#else
|
|
|
|
rw_exit_write(&pf_consistency_lock);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
pf_src_tree_remove_state(struct pf_state *s)
|
|
|
|
{
|
|
|
|
u_int32_t timeout;
|
|
|
|
|
|
|
|
if (s->src_node != NULL) {
|
|
|
|
if (s->src.tcp_est)
|
|
|
|
--s->src_node->conn;
|
2004-06-16 23:24:02 +00:00
|
|
|
if (--s->src_node->states <= 0) {
|
|
|
|
timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
|
|
|
|
if (!timeout)
|
|
|
|
timeout =
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
V_pf_default_rule.timeout[PFTM_SRC_NODE];
|
|
|
|
#else
|
2004-06-16 23:24:02 +00:00
|
|
|
pf_default_rule.timeout[PFTM_SRC_NODE];
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2004-06-16 23:24:02 +00:00
|
|
|
s->src_node->expire = time_second + timeout;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
|
|
|
|
if (--s->nat_src_node->states <= 0) {
|
|
|
|
timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
|
|
|
|
if (!timeout)
|
|
|
|
timeout =
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
V_pf_default_rule.timeout[PFTM_SRC_NODE];
|
|
|
|
#else
|
2004-06-16 23:24:02 +00:00
|
|
|
pf_default_rule.timeout[PFTM_SRC_NODE];
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2004-06-16 23:24:02 +00:00
|
|
|
s->nat_src_node->expire = time_second + timeout;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
s->src_node = s->nat_src_node = NULL;
|
|
|
|
}
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2007-07-03 12:16:07 +00:00
|
|
|
/* callers should be at splsoftnet */
|
2005-05-03 16:43:32 +00:00
|
|
|
void
|
2007-07-03 12:16:07 +00:00
|
|
|
pf_unlink_state(struct pf_state *cur)
|
2005-05-03 16:43:32 +00:00
|
|
|
{
|
2005-07-20 18:58:27 +00:00
|
|
|
#ifdef __FreeBSD__
|
2005-12-20 00:33:33 +00:00
|
|
|
if (cur->local_flags & PFSTATE_EXPIRING)
|
2005-07-20 18:58:27 +00:00
|
|
|
return;
|
2005-12-20 00:33:33 +00:00
|
|
|
cur->local_flags |= PFSTATE_EXPIRING;
|
2011-06-28 11:57:25 +00:00
|
|
|
#else
|
|
|
|
splassert(IPL_SOFTNET);
|
2005-07-20 18:58:27 +00:00
|
|
|
#endif
|
2011-06-28 11:57:25 +00:00
|
|
|
|
2007-07-03 12:16:07 +00:00
|
|
|
if (cur->src.state == PF_TCPS_PROXY_DST) {
|
2011-06-28 11:57:25 +00:00
|
|
|
/* XXX wire key the right one? */
|
2006-09-12 04:25:13 +00:00
|
|
|
#ifdef __FreeBSD__
|
2011-06-28 11:57:25 +00:00
|
|
|
pf_send_tcp(NULL, cur->rule.ptr, cur->key[PF_SK_WIRE]->af,
|
2006-09-12 04:25:13 +00:00
|
|
|
#else
|
2011-06-28 11:57:25 +00:00
|
|
|
pf_send_tcp(cur->rule.ptr, cur->key[PF_SK_WIRE]->af,
|
2006-09-12 04:25:13 +00:00
|
|
|
#endif
|
2011-06-28 11:57:25 +00:00
|
|
|
&cur->key[PF_SK_WIRE]->addr[1],
|
|
|
|
&cur->key[PF_SK_WIRE]->addr[0],
|
|
|
|
cur->key[PF_SK_WIRE]->port[1],
|
|
|
|
cur->key[PF_SK_WIRE]->port[0],
|
2005-05-03 16:43:32 +00:00
|
|
|
cur->src.seqhi, cur->src.seqlo + 1,
|
2007-07-03 12:16:07 +00:00
|
|
|
TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL);
|
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
RB_REMOVE(pf_state_tree_id, &V_tree_id, cur);
|
|
|
|
#else
|
2005-05-03 16:43:32 +00:00
|
|
|
RB_REMOVE(pf_state_tree_id, &tree_id, cur);
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
|
|
|
#if NPFLOW > 0
|
|
|
|
if (cur->state_flags & PFSTATE_PFLOW)
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (export_pflow_ptr != NULL)
|
|
|
|
export_pflow_ptr(cur);
|
|
|
|
#else
|
|
|
|
export_pflow(cur);
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
#if NPFSYNC > 0
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (pfsync_delete_state_ptr != NULL)
|
|
|
|
pfsync_delete_state_ptr(cur);
|
|
|
|
#else
|
|
|
|
pfsync_delete_state(cur);
|
|
|
|
#endif
|
2005-05-03 16:43:32 +00:00
|
|
|
#endif
|
2007-07-03 12:16:07 +00:00
|
|
|
cur->timeout = PFTM_UNLINKED;
|
2005-05-03 16:43:32 +00:00
|
|
|
pf_src_tree_remove_state(cur);
|
2011-06-28 11:57:25 +00:00
|
|
|
pf_detach_state(cur);
|
2007-07-03 12:16:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* callers should be at splsoftnet and hold the
|
|
|
|
* write_lock on pf_consistency_lock */
|
|
|
|
void
|
|
|
|
pf_free_state(struct pf_state *cur)
|
|
|
|
{
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifndef __FreeBSD__
|
|
|
|
splassert(IPL_SOFTNET);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if NPFSYNC > 0
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (pfsync_state_in_use_ptr != NULL)
|
|
|
|
pfsync_state_in_use_ptr(cur);
|
|
|
|
#else
|
|
|
|
if (pfsync_state_in_use(cur))
|
|
|
|
#endif
|
2007-07-03 12:16:07 +00:00
|
|
|
return;
|
|
|
|
#endif
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
KASSERT(cur->timeout == PFTM_UNLINKED,
|
|
|
|
("pf_free_state: cur->timeout != PFTM_UNLINKED"));
|
|
|
|
#else
|
|
|
|
KASSERT(cur->timeout == PFTM_UNLINKED);
|
|
|
|
#endif
|
2011-06-28 11:57:25 +00:00
|
|
|
if (--cur->rule.ptr->states_cur <= 0 &&
|
2005-05-03 16:43:32 +00:00
|
|
|
cur->rule.ptr->src_nodes <= 0)
|
|
|
|
pf_rm_rule(NULL, cur->rule.ptr);
|
|
|
|
if (cur->nat_rule.ptr != NULL)
|
2011-06-28 11:57:25 +00:00
|
|
|
if (--cur->nat_rule.ptr->states_cur <= 0 &&
|
2005-05-03 16:43:32 +00:00
|
|
|
cur->nat_rule.ptr->src_nodes <= 0)
|
|
|
|
pf_rm_rule(NULL, cur->nat_rule.ptr);
|
|
|
|
if (cur->anchor.ptr != NULL)
|
2011-06-28 11:57:25 +00:00
|
|
|
if (--cur->anchor.ptr->states_cur <= 0)
|
2005-05-03 16:43:32 +00:00
|
|
|
pf_rm_rule(NULL, cur->anchor.ptr);
|
|
|
|
pf_normalize_tcp_cleanup(cur);
|
2011-06-28 11:57:25 +00:00
|
|
|
pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE);
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
TAILQ_REMOVE(&V_state_list, cur, entry_list);
|
|
|
|
#else
|
|
|
|
TAILQ_REMOVE(&state_list, cur, entry_list);
|
|
|
|
#endif
|
2005-05-03 16:43:32 +00:00
|
|
|
if (cur->tag)
|
|
|
|
pf_tag_unref(cur->tag);
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
pool_put(&V_pf_state_pl, cur);
|
|
|
|
V_pf_status.fcounters[FCNT_STATE_REMOVALS]++;
|
|
|
|
V_pf_status.states--;
|
|
|
|
#else
|
2005-05-03 16:43:32 +00:00
|
|
|
pool_put(&pf_state_pl, cur);
|
|
|
|
pf_status.fcounters[FCNT_STATE_REMOVALS]++;
|
|
|
|
pf_status.states--;
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2005-05-03 16:43:32 +00:00
|
|
|
}
|
|
|
|
|
2009-08-19 00:10:10 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
int
|
|
|
|
pf_purge_expired_states(u_int32_t maxcheck, int waslocked)
|
|
|
|
#else
|
2004-06-16 23:24:02 +00:00
|
|
|
void
|
2007-07-03 12:16:07 +00:00
|
|
|
pf_purge_expired_states(u_int32_t maxcheck)
|
2009-08-19 00:10:10 +00:00
|
|
|
#endif
|
2004-06-16 23:24:02 +00:00
|
|
|
{
|
2007-07-03 12:16:07 +00:00
|
|
|
static struct pf_state *cur = NULL;
|
|
|
|
struct pf_state *next;
|
2009-08-19 00:10:10 +00:00
|
|
|
#ifdef __FreeBSD__
|
2011-06-28 11:57:25 +00:00
|
|
|
int locked = waslocked;
|
2009-08-19 00:10:10 +00:00
|
|
|
#else
|
2011-06-28 11:57:25 +00:00
|
|
|
int locked = 0;
|
2009-08-19 00:10:10 +00:00
|
|
|
#endif
|
2004-06-16 23:24:02 +00:00
|
|
|
|
2007-07-03 12:16:07 +00:00
|
|
|
while (maxcheck--) {
|
|
|
|
/* wrap to start of list when we hit the end */
|
|
|
|
if (cur == NULL) {
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
cur = TAILQ_FIRST(&V_state_list);
|
|
|
|
#else
|
2007-07-03 12:16:07 +00:00
|
|
|
cur = TAILQ_FIRST(&state_list);
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2007-07-03 12:16:07 +00:00
|
|
|
if (cur == NULL)
|
|
|
|
break; /* list empty */
|
|
|
|
}
|
|
|
|
|
|
|
|
/* get next state, as cur may get deleted */
|
2011-06-28 11:57:25 +00:00
|
|
|
next = TAILQ_NEXT(cur, entry_list);
|
2007-07-03 12:16:07 +00:00
|
|
|
|
|
|
|
if (cur->timeout == PFTM_UNLINKED) {
|
|
|
|
/* free unlinked state */
|
|
|
|
if (! locked) {
|
|
|
|
#ifdef __FreeBSD__
|
2011-06-28 11:57:25 +00:00
|
|
|
if (!sx_try_upgrade(&V_pf_consistency_lock))
|
|
|
|
return (0);
|
2007-07-03 12:16:07 +00:00
|
|
|
#else
|
|
|
|
rw_enter_write(&pf_consistency_lock);
|
|
|
|
#endif
|
|
|
|
locked = 1;
|
|
|
|
}
|
|
|
|
pf_free_state(cur);
|
|
|
|
} else if (pf_state_expires(cur) <= time_second) {
|
|
|
|
/* unlink and free expired state */
|
|
|
|
pf_unlink_state(cur);
|
|
|
|
if (! locked) {
|
|
|
|
#ifdef __FreeBSD__
|
2011-06-28 11:57:25 +00:00
|
|
|
if (!sx_try_upgrade(&V_pf_consistency_lock))
|
|
|
|
return (0);
|
2007-07-03 12:16:07 +00:00
|
|
|
#else
|
|
|
|
rw_enter_write(&pf_consistency_lock);
|
|
|
|
#endif
|
|
|
|
locked = 1;
|
|
|
|
}
|
|
|
|
pf_free_state(cur);
|
|
|
|
}
|
|
|
|
cur = next;
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
2007-07-03 12:16:07 +00:00
|
|
|
|
|
|
|
#ifdef __FreeBSD__
|
2009-08-19 00:10:10 +00:00
|
|
|
if (!waslocked && locked)
|
2011-06-28 11:57:25 +00:00
|
|
|
sx_downgrade(&V_pf_consistency_lock);
|
2009-08-19 00:10:10 +00:00
|
|
|
|
|
|
|
return (1);
|
2007-07-03 12:16:07 +00:00
|
|
|
#else
|
2009-08-19 00:10:10 +00:00
|
|
|
if (locked)
|
2007-07-03 12:16:07 +00:00
|
|
|
rw_exit_write(&pf_consistency_lock);
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw)
|
|
|
|
{
|
|
|
|
if (aw->type != PF_ADDR_TABLE)
|
|
|
|
return (0);
|
2011-06-28 11:57:25 +00:00
|
|
|
if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname, 1)) == NULL)
|
2004-02-26 02:04:28 +00:00
|
|
|
return (1);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
pf_tbladdr_remove(struct pf_addr_wrap *aw)
|
|
|
|
{
|
|
|
|
if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL)
|
|
|
|
return;
|
|
|
|
pfr_detach_table(aw->p.tbl);
|
|
|
|
aw->p.tbl = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
pf_tbladdr_copyout(struct pf_addr_wrap *aw)
|
|
|
|
{
|
|
|
|
struct pfr_ktable *kt = aw->p.tbl;
|
|
|
|
|
|
|
|
if (aw->type != PF_ADDR_TABLE || kt == NULL)
|
|
|
|
return;
|
|
|
|
if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
|
|
|
|
kt = kt->pfrkt_root;
|
|
|
|
aw->p.tbl = NULL;
|
|
|
|
aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
|
|
|
|
kt->pfrkt_cnt : -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
|
|
|
|
{
|
|
|
|
switch (af) {
|
|
|
|
#ifdef INET
|
|
|
|
case AF_INET: {
|
|
|
|
u_int32_t a = ntohl(addr->addr32[0]);
|
|
|
|
printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
|
|
|
|
(a>>8)&255, a&255);
|
|
|
|
if (p) {
|
|
|
|
p = ntohs(p);
|
|
|
|
printf(":%u", p);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6: {
|
|
|
|
u_int16_t b;
|
2011-06-28 11:57:25 +00:00
|
|
|
u_int8_t i, curstart, curend, maxstart, maxend;
|
|
|
|
curstart = curend = maxstart = maxend = 255;
|
2004-02-26 02:04:28 +00:00
|
|
|
for (i = 0; i < 8; i++) {
|
|
|
|
if (!addr->addr16[i]) {
|
|
|
|
if (curstart == 255)
|
|
|
|
curstart = i;
|
2011-06-28 11:57:25 +00:00
|
|
|
curend = i;
|
2004-02-26 02:04:28 +00:00
|
|
|
} else {
|
2011-06-28 11:57:25 +00:00
|
|
|
if ((curend - curstart) >
|
|
|
|
(maxend - maxstart)) {
|
|
|
|
maxstart = curstart;
|
|
|
|
maxend = curend;
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
curstart = curend = 255;
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
if ((curend - curstart) >
|
|
|
|
(maxend - maxstart)) {
|
|
|
|
maxstart = curstart;
|
|
|
|
maxend = curend;
|
|
|
|
}
|
2004-02-26 02:04:28 +00:00
|
|
|
for (i = 0; i < 8; i++) {
|
|
|
|
if (i >= maxstart && i <= maxend) {
|
2011-06-28 11:57:25 +00:00
|
|
|
if (i == 0)
|
|
|
|
printf(":");
|
|
|
|
if (i == maxend)
|
|
|
|
printf(":");
|
2004-02-26 02:04:28 +00:00
|
|
|
} else {
|
|
|
|
b = ntohs(addr->addr16[i]);
|
|
|
|
printf("%x", b);
|
|
|
|
if (i < 7)
|
|
|
|
printf(":");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (p) {
|
|
|
|
p = ntohs(p);
|
|
|
|
printf("[%u]", p);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
#endif /* INET6 */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
pf_print_state(struct pf_state *s)
|
|
|
|
{
|
2011-06-28 11:57:25 +00:00
|
|
|
pf_print_state_parts(s, NULL, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
pf_print_state_parts(struct pf_state *s,
|
|
|
|
struct pf_state_key *skwp, struct pf_state_key *sksp)
|
|
|
|
{
|
|
|
|
struct pf_state_key *skw, *sks;
|
|
|
|
u_int8_t proto, dir;
|
|
|
|
|
|
|
|
/* Do our best to fill these, but they're skipped if NULL */
|
|
|
|
skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL);
|
|
|
|
sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL);
|
|
|
|
proto = skw ? skw->proto : (sks ? sks->proto : 0);
|
|
|
|
dir = s ? s->direction : 0;
|
|
|
|
|
|
|
|
switch (proto) {
|
|
|
|
case IPPROTO_IPV4:
|
|
|
|
printf("IPv4");
|
|
|
|
break;
|
|
|
|
case IPPROTO_IPV6:
|
|
|
|
printf("IPv6");
|
|
|
|
break;
|
2004-02-26 02:04:28 +00:00
|
|
|
case IPPROTO_TCP:
|
2011-06-28 11:57:25 +00:00
|
|
|
printf("TCP");
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
|
|
|
case IPPROTO_UDP:
|
2011-06-28 11:57:25 +00:00
|
|
|
printf("UDP");
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
|
|
|
case IPPROTO_ICMP:
|
2011-06-28 11:57:25 +00:00
|
|
|
printf("ICMP");
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
|
|
|
case IPPROTO_ICMPV6:
|
2011-06-28 11:57:25 +00:00
|
|
|
printf("ICMPv6");
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
|
|
|
default:
|
2011-06-28 11:57:25 +00:00
|
|
|
printf("%u", skw->proto);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
switch (dir) {
|
|
|
|
case PF_IN:
|
|
|
|
printf(" in");
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
2011-06-28 11:57:25 +00:00
|
|
|
case PF_OUT:
|
|
|
|
printf(" out");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (skw) {
|
|
|
|
printf(" wire: ");
|
|
|
|
pf_print_host(&skw->addr[0], skw->port[0], skw->af);
|
|
|
|
printf(" ");
|
|
|
|
pf_print_host(&skw->addr[1], skw->port[1], skw->af);
|
|
|
|
}
|
|
|
|
if (sks) {
|
|
|
|
printf(" stack: ");
|
|
|
|
if (sks != skw) {
|
|
|
|
pf_print_host(&sks->addr[0], sks->port[0], sks->af);
|
|
|
|
printf(" ");
|
|
|
|
pf_print_host(&sks->addr[1], sks->port[1], sks->af);
|
|
|
|
} else
|
|
|
|
printf("-");
|
|
|
|
}
|
|
|
|
if (s) {
|
|
|
|
if (proto == IPPROTO_TCP) {
|
|
|
|
printf(" [lo=%u high=%u win=%u modulator=%u",
|
|
|
|
s->src.seqlo, s->src.seqhi,
|
|
|
|
s->src.max_win, s->src.seqdiff);
|
|
|
|
if (s->src.wscale && s->dst.wscale)
|
|
|
|
printf(" wscale=%u",
|
|
|
|
s->src.wscale & PF_WSCALE_MASK);
|
|
|
|
printf("]");
|
|
|
|
printf(" [lo=%u high=%u win=%u modulator=%u",
|
|
|
|
s->dst.seqlo, s->dst.seqhi,
|
|
|
|
s->dst.max_win, s->dst.seqdiff);
|
|
|
|
if (s->src.wscale && s->dst.wscale)
|
|
|
|
printf(" wscale=%u",
|
|
|
|
s->dst.wscale & PF_WSCALE_MASK);
|
|
|
|
printf("]");
|
|
|
|
}
|
|
|
|
printf(" %u:%u", s->src.state, s->dst.state);
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
pf_print_flags(u_int8_t f)
|
|
|
|
{
|
|
|
|
if (f)
|
|
|
|
printf(" ");
|
|
|
|
if (f & TH_FIN)
|
|
|
|
printf("F");
|
|
|
|
if (f & TH_SYN)
|
|
|
|
printf("S");
|
|
|
|
if (f & TH_RST)
|
|
|
|
printf("R");
|
|
|
|
if (f & TH_PUSH)
|
|
|
|
printf("P");
|
|
|
|
if (f & TH_ACK)
|
|
|
|
printf("A");
|
|
|
|
if (f & TH_URG)
|
|
|
|
printf("U");
|
|
|
|
if (f & TH_ECE)
|
|
|
|
printf("E");
|
|
|
|
if (f & TH_CWR)
|
|
|
|
printf("W");
|
|
|
|
}
|
|
|
|
|
|
|
|
#define PF_SET_SKIP_STEPS(i) \
|
|
|
|
do { \
|
|
|
|
while (head[i] != cur) { \
|
|
|
|
head[i]->skip[i].ptr = cur; \
|
|
|
|
head[i] = TAILQ_NEXT(head[i], entries); \
|
|
|
|
} \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
void
|
|
|
|
pf_calc_skip_steps(struct pf_rulequeue *rules)
|
|
|
|
{
|
|
|
|
struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
|
|
|
|
int i;
|
|
|
|
|
|
|
|
cur = TAILQ_FIRST(rules);
|
|
|
|
prev = cur;
|
|
|
|
for (i = 0; i < PF_SKIP_COUNT; ++i)
|
|
|
|
head[i] = cur;
|
|
|
|
while (cur != NULL) {
|
|
|
|
|
2004-06-16 23:24:02 +00:00
|
|
|
if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
|
2004-02-26 02:04:28 +00:00
|
|
|
PF_SET_SKIP_STEPS(PF_SKIP_IFP);
|
|
|
|
if (cur->direction != prev->direction)
|
|
|
|
PF_SET_SKIP_STEPS(PF_SKIP_DIR);
|
|
|
|
if (cur->af != prev->af)
|
|
|
|
PF_SET_SKIP_STEPS(PF_SKIP_AF);
|
|
|
|
if (cur->proto != prev->proto)
|
|
|
|
PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
|
2005-05-03 16:43:32 +00:00
|
|
|
if (cur->src.neg != prev->src.neg ||
|
2004-02-26 02:04:28 +00:00
|
|
|
pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
|
|
|
|
PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
|
|
|
|
if (cur->src.port[0] != prev->src.port[0] ||
|
|
|
|
cur->src.port[1] != prev->src.port[1] ||
|
|
|
|
cur->src.port_op != prev->src.port_op)
|
|
|
|
PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
|
2005-05-03 16:43:32 +00:00
|
|
|
if (cur->dst.neg != prev->dst.neg ||
|
2004-02-26 02:04:28 +00:00
|
|
|
pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
|
|
|
|
PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
|
|
|
|
if (cur->dst.port[0] != prev->dst.port[0] ||
|
|
|
|
cur->dst.port[1] != prev->dst.port[1] ||
|
|
|
|
cur->dst.port_op != prev->dst.port_op)
|
|
|
|
PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
|
|
|
|
|
|
|
|
prev = cur;
|
|
|
|
cur = TAILQ_NEXT(cur, entries);
|
|
|
|
}
|
|
|
|
for (i = 0; i < PF_SKIP_COUNT; ++i)
|
|
|
|
PF_SET_SKIP_STEPS(i);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
|
|
|
|
{
|
|
|
|
if (aw1->type != aw2->type)
|
|
|
|
return (1);
|
|
|
|
switch (aw1->type) {
|
|
|
|
case PF_ADDR_ADDRMASK:
|
2011-06-28 11:57:25 +00:00
|
|
|
case PF_ADDR_RANGE:
|
2004-02-26 02:04:28 +00:00
|
|
|
if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0))
|
|
|
|
return (1);
|
|
|
|
if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0))
|
|
|
|
return (1);
|
|
|
|
return (0);
|
|
|
|
case PF_ADDR_DYNIFTL:
|
2004-06-16 23:24:02 +00:00
|
|
|
return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
|
2004-02-26 02:04:28 +00:00
|
|
|
case PF_ADDR_NOROUTE:
|
2007-07-03 12:16:07 +00:00
|
|
|
case PF_ADDR_URPFFAILED:
|
2004-02-26 02:04:28 +00:00
|
|
|
return (0);
|
|
|
|
case PF_ADDR_TABLE:
|
|
|
|
return (aw1->p.tbl != aw2->p.tbl);
|
2007-07-03 12:16:07 +00:00
|
|
|
case PF_ADDR_RTLABEL:
|
|
|
|
return (aw1->v.rtlabel != aw2->v.rtlabel);
|
2004-02-26 02:04:28 +00:00
|
|
|
default:
|
|
|
|
printf("invalid address type: %d\n", aw1->type);
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
u_int16_t
|
|
|
|
pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
|
|
|
|
{
|
|
|
|
u_int32_t l;
|
|
|
|
|
|
|
|
if (udp && !cksum)
|
|
|
|
return (0x0000);
|
|
|
|
l = cksum + old - new;
|
|
|
|
l = (l >> 16) + (l & 65535);
|
|
|
|
l = l & 65535;
|
|
|
|
if (udp && !l)
|
|
|
|
return (0xFFFF);
|
|
|
|
return (l);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc,
|
|
|
|
struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af)
|
|
|
|
{
|
|
|
|
struct pf_addr ao;
|
|
|
|
u_int16_t po = *p;
|
|
|
|
|
|
|
|
PF_ACPY(&ao, a, af);
|
|
|
|
PF_ACPY(a, an, af);
|
|
|
|
|
|
|
|
*p = pn;
|
|
|
|
|
|
|
|
switch (af) {
|
|
|
|
#ifdef INET
|
|
|
|
case AF_INET:
|
|
|
|
*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
|
|
|
|
ao.addr16[0], an->addr16[0], 0),
|
|
|
|
ao.addr16[1], an->addr16[1], 0);
|
|
|
|
*p = pn;
|
|
|
|
*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
|
|
|
|
ao.addr16[0], an->addr16[0], u),
|
|
|
|
ao.addr16[1], an->addr16[1], u),
|
|
|
|
po, pn, u);
|
|
|
|
break;
|
|
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
|
|
|
*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
|
|
|
|
pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
|
|
|
|
pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
|
|
|
|
ao.addr16[0], an->addr16[0], u),
|
|
|
|
ao.addr16[1], an->addr16[1], u),
|
|
|
|
ao.addr16[2], an->addr16[2], u),
|
|
|
|
ao.addr16[3], an->addr16[3], u),
|
|
|
|
ao.addr16[4], an->addr16[4], u),
|
|
|
|
ao.addr16[5], an->addr16[5], u),
|
|
|
|
ao.addr16[6], an->addr16[6], u),
|
|
|
|
ao.addr16[7], an->addr16[7], u),
|
|
|
|
po, pn, u);
|
|
|
|
break;
|
|
|
|
#endif /* INET6 */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Changes a u_int32_t. Uses a void * so there are no align restrictions */
|
|
|
|
void
|
|
|
|
pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
|
|
|
|
{
|
|
|
|
u_int32_t ao;
|
|
|
|
|
|
|
|
memcpy(&ao, a, sizeof(ao));
|
|
|
|
memcpy(a, &an, sizeof(u_int32_t));
|
|
|
|
*c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
|
|
|
|
ao % 65536, an % 65536, u);
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef INET6
|
|
|
|
void
|
|
|
|
pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
|
|
|
|
{
|
|
|
|
struct pf_addr ao;
|
|
|
|
|
|
|
|
PF_ACPY(&ao, a, AF_INET6);
|
|
|
|
PF_ACPY(a, an, AF_INET6);
|
|
|
|
|
|
|
|
*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
|
|
|
|
pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
|
|
|
|
pf_cksum_fixup(pf_cksum_fixup(*c,
|
|
|
|
ao.addr16[0], an->addr16[0], u),
|
|
|
|
ao.addr16[1], an->addr16[1], u),
|
|
|
|
ao.addr16[2], an->addr16[2], u),
|
|
|
|
ao.addr16[3], an->addr16[3], u),
|
|
|
|
ao.addr16[4], an->addr16[4], u),
|
|
|
|
ao.addr16[5], an->addr16[5], u),
|
|
|
|
ao.addr16[6], an->addr16[6], u),
|
|
|
|
ao.addr16[7], an->addr16[7], u);
|
|
|
|
}
|
|
|
|
#endif /* INET6 */
|
|
|
|
|
|
|
|
void
|
|
|
|
pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
|
|
|
|
struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
|
|
|
|
u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
|
|
|
|
{
|
|
|
|
struct pf_addr oia, ooa;
|
|
|
|
|
|
|
|
PF_ACPY(&oia, ia, af);
|
2011-06-28 11:57:25 +00:00
|
|
|
if (oa)
|
|
|
|
PF_ACPY(&ooa, oa, af);
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
/* Change inner protocol port, fix inner protocol checksum. */
|
|
|
|
if (ip != NULL) {
|
|
|
|
u_int16_t oip = *ip;
|
2011-06-28 11:57:25 +00:00
|
|
|
u_int32_t opc;
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
if (pc != NULL)
|
|
|
|
opc = *pc;
|
|
|
|
*ip = np;
|
|
|
|
if (pc != NULL)
|
|
|
|
*pc = pf_cksum_fixup(*pc, oip, *ip, u);
|
|
|
|
*ic = pf_cksum_fixup(*ic, oip, *ip, 0);
|
|
|
|
if (pc != NULL)
|
|
|
|
*ic = pf_cksum_fixup(*ic, opc, *pc, 0);
|
|
|
|
}
|
|
|
|
/* Change inner ip address, fix inner ip and icmp checksums. */
|
|
|
|
PF_ACPY(ia, na, af);
|
|
|
|
switch (af) {
|
|
|
|
#ifdef INET
|
|
|
|
case AF_INET: {
|
|
|
|
u_int32_t oh2c = *h2c;
|
|
|
|
|
|
|
|
*h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
|
|
|
|
oia.addr16[0], ia->addr16[0], 0),
|
|
|
|
oia.addr16[1], ia->addr16[1], 0);
|
|
|
|
*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
|
|
|
|
oia.addr16[0], ia->addr16[0], 0),
|
|
|
|
oia.addr16[1], ia->addr16[1], 0);
|
|
|
|
*ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
|
|
|
*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
|
|
|
|
pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
|
|
|
|
pf_cksum_fixup(pf_cksum_fixup(*ic,
|
|
|
|
oia.addr16[0], ia->addr16[0], u),
|
|
|
|
oia.addr16[1], ia->addr16[1], u),
|
|
|
|
oia.addr16[2], ia->addr16[2], u),
|
|
|
|
oia.addr16[3], ia->addr16[3], u),
|
|
|
|
oia.addr16[4], ia->addr16[4], u),
|
|
|
|
oia.addr16[5], ia->addr16[5], u),
|
|
|
|
oia.addr16[6], ia->addr16[6], u),
|
|
|
|
oia.addr16[7], ia->addr16[7], u);
|
|
|
|
break;
|
|
|
|
#endif /* INET6 */
|
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
/* Outer ip address, fix outer ip or icmpv6 checksum, if necessary. */
|
|
|
|
if (oa) {
|
|
|
|
PF_ACPY(oa, na, af);
|
|
|
|
switch (af) {
|
2004-02-26 02:04:28 +00:00
|
|
|
#ifdef INET
|
2011-06-28 11:57:25 +00:00
|
|
|
case AF_INET:
|
|
|
|
*hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
|
|
|
|
ooa.addr16[0], oa->addr16[0], 0),
|
|
|
|
ooa.addr16[1], oa->addr16[1], 0);
|
|
|
|
break;
|
2004-02-26 02:04:28 +00:00
|
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
2011-06-28 11:57:25 +00:00
|
|
|
case AF_INET6:
|
|
|
|
*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
|
|
|
|
pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
|
|
|
|
pf_cksum_fixup(pf_cksum_fixup(*ic,
|
|
|
|
ooa.addr16[0], oa->addr16[0], u),
|
|
|
|
ooa.addr16[1], oa->addr16[1], u),
|
|
|
|
ooa.addr16[2], oa->addr16[2], u),
|
|
|
|
ooa.addr16[3], oa->addr16[3], u),
|
|
|
|
ooa.addr16[4], oa->addr16[4], u),
|
|
|
|
ooa.addr16[5], oa->addr16[5], u),
|
|
|
|
ooa.addr16[6], oa->addr16[6], u),
|
|
|
|
ooa.addr16[7], oa->addr16[7], u);
|
|
|
|
break;
|
2004-02-26 02:04:28 +00:00
|
|
|
#endif /* INET6 */
|
2011-06-28 11:57:25 +00:00
|
|
|
}
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-07-03 12:16:07 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Need to modulate the sequence numbers in the TCP SACK option
|
|
|
|
* (credits to Krzysztof Pfaff for report and patch)
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd,
|
|
|
|
struct tcphdr *th, struct pf_state_peer *dst)
|
|
|
|
{
|
|
|
|
int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen;
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
u_int8_t opts[TCP_MAXOLEN], *opt = opts;
|
|
|
|
#else
|
|
|
|
u_int8_t opts[MAX_TCPOPTLEN], *opt = opts;
|
|
|
|
#endif
|
|
|
|
int copyback = 0, i, olen;
|
|
|
|
struct sackblk sack;
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
#define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2)
|
2007-07-03 12:16:07 +00:00
|
|
|
if (hlen < TCPOLEN_SACKLEN ||
|
|
|
|
!pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
while (hlen >= TCPOLEN_SACKLEN) {
|
|
|
|
olen = opt[1];
|
|
|
|
switch (*opt) {
|
|
|
|
case TCPOPT_EOL: /* FALLTHROUGH */
|
|
|
|
case TCPOPT_NOP:
|
|
|
|
opt++;
|
|
|
|
hlen--;
|
|
|
|
break;
|
|
|
|
case TCPOPT_SACK:
|
|
|
|
if (olen > hlen)
|
|
|
|
olen = hlen;
|
|
|
|
if (olen >= TCPOLEN_SACKLEN) {
|
|
|
|
for (i = 2; i + TCPOLEN_SACK <= olen;
|
|
|
|
i += TCPOLEN_SACK) {
|
|
|
|
memcpy(&sack, &opt[i], sizeof(sack));
|
|
|
|
pf_change_a(&sack.start, &th->th_sum,
|
|
|
|
htonl(ntohl(sack.start) -
|
|
|
|
dst->seqdiff), 0);
|
|
|
|
pf_change_a(&sack.end, &th->th_sum,
|
|
|
|
htonl(ntohl(sack.end) -
|
|
|
|
dst->seqdiff), 0);
|
|
|
|
memcpy(&opt[i], &sack, sizeof(sack));
|
|
|
|
}
|
|
|
|
copyback = 1;
|
|
|
|
}
|
|
|
|
/* FALLTHROUGH */
|
|
|
|
default:
|
|
|
|
if (olen < 2)
|
|
|
|
olen = 2;
|
|
|
|
hlen -= olen;
|
|
|
|
opt += olen;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (copyback)
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
m_copyback(m, off + sizeof(*th), thoptlen, (caddr_t)opts);
|
|
|
|
#else
|
|
|
|
m_copyback(m, off + sizeof(*th), thoptlen, opts);
|
|
|
|
#endif
|
|
|
|
return (copyback);
|
|
|
|
}
|
|
|
|
|
2004-02-26 02:04:28 +00:00
|
|
|
void
|
2006-09-12 04:25:13 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
pf_send_tcp(struct mbuf *replyto, const struct pf_rule *r, sa_family_t af,
|
|
|
|
#else
|
2004-02-26 02:04:28 +00:00
|
|
|
pf_send_tcp(const struct pf_rule *r, sa_family_t af,
|
2006-09-12 04:25:13 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
const struct pf_addr *saddr, const struct pf_addr *daddr,
|
|
|
|
u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
|
2005-05-03 16:43:32 +00:00
|
|
|
u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
|
2007-07-03 12:16:07 +00:00
|
|
|
u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp)
|
2004-02-26 02:04:28 +00:00
|
|
|
{
|
|
|
|
struct mbuf *m;
|
2007-07-03 12:16:07 +00:00
|
|
|
int len, tlen;
|
2004-02-26 02:04:28 +00:00
|
|
|
#ifdef INET
|
2007-07-03 12:16:07 +00:00
|
|
|
struct ip *h;
|
2004-02-26 02:04:28 +00:00
|
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
2007-07-03 12:16:07 +00:00
|
|
|
struct ip6_hdr *h6;
|
2004-02-26 02:04:28 +00:00
|
|
|
#endif /* INET6 */
|
2007-07-03 12:16:07 +00:00
|
|
|
struct tcphdr *th;
|
|
|
|
char *opt;
|
|
|
|
#ifdef __FreeBSD__
|
2011-06-28 11:57:25 +00:00
|
|
|
struct pf_mtag *pf_mtag;
|
|
|
|
|
2007-07-03 12:16:07 +00:00
|
|
|
KASSERT(
|
|
|
|
#ifdef INET
|
|
|
|
af == AF_INET
|
|
|
|
#else
|
|
|
|
0
|
|
|
|
#endif
|
|
|
|
||
|
|
|
|
#ifdef INET6
|
|
|
|
af == AF_INET6
|
|
|
|
#else
|
|
|
|
0
|
|
|
|
#endif
|
|
|
|
, ("Unsupported AF %d", af));
|
|
|
|
len = 0;
|
|
|
|
th = NULL;
|
|
|
|
#ifdef INET
|
|
|
|
h = NULL;
|
|
|
|
#endif
|
|
|
|
#ifdef INET6
|
|
|
|
h6 = NULL;
|
|
|
|
#endif
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif /* __FreeBSD__ */
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
/* maximum segment size tcp option */
|
|
|
|
tlen = sizeof(struct tcphdr);
|
|
|
|
if (mss)
|
|
|
|
tlen += 4;
|
|
|
|
|
|
|
|
switch (af) {
|
|
|
|
#ifdef INET
|
|
|
|
case AF_INET:
|
|
|
|
len = sizeof(struct ip) + tlen;
|
|
|
|
break;
|
|
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
|
|
|
len = sizeof(struct ip6_hdr) + tlen;
|
|
|
|
break;
|
|
|
|
#endif /* INET6 */
|
|
|
|
}
|
|
|
|
|
|
|
|
/* create outgoing mbuf */
|
2004-07-17 05:10:06 +00:00
|
|
|
m = m_gethdr(M_DONTWAIT, MT_HEADER);
|
|
|
|
if (m == NULL)
|
|
|
|
return;
|
2006-09-12 04:25:13 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
#ifdef MAC
|
2011-06-28 11:57:25 +00:00
|
|
|
mac_netinet_firewall_send(m);
|
2006-09-12 04:25:13 +00:00
|
|
|
#endif
|
2007-07-03 12:16:07 +00:00
|
|
|
if ((pf_mtag = pf_get_mtag(m)) == NULL) {
|
|
|
|
m_freem(m);
|
|
|
|
return;
|
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2007-07-03 12:16:07 +00:00
|
|
|
if (tag)
|
2005-05-03 16:43:32 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
m->m_flags |= M_SKIP_FIREWALL;
|
2011-06-28 11:57:25 +00:00
|
|
|
pf_mtag->tag = rtag;
|
2004-07-17 05:10:06 +00:00
|
|
|
#else
|
2011-06-28 11:57:25 +00:00
|
|
|
m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
|
|
|
|
m->m_pkthdr.pf.tag = rtag;
|
2004-07-17 05:10:06 +00:00
|
|
|
#endif
|
2007-07-03 12:16:07 +00:00
|
|
|
|
|
|
|
if (r != NULL && r->rtableid >= 0)
|
Add code to allow the system to handle multiple routing tables.
This particular implementation is designed to be fully backwards compatible
and to be MFC-able to 7.x (and 6.x)
Currently the only protocol that can make use of the multiple tables is IPv4
Similar functionality exists in OpenBSD and Linux.
From my notes:
-----
One thing where FreeBSD has been falling behind, and which by chance I
have some time to work on is "policy based routing", which allows
different
packet streams to be routed by more than just the destination address.
Constraints:
------------
I want to make some form of this available in the 6.x tree
(and by extension 7.x) , but FreeBSD in general needs it so I might as
well do it in -current and back port the portions I need.
One of the ways that this can be done is to have the ability to
instantiate multiple kernel routing tables (which I will now
refer to as "Forwarding Information Bases" or "FIBs" for political
correctness reasons). Which FIB a particular packet uses to make
the next hop decision can be decided by a number of mechanisms.
The policies these mechanisms implement are the "Policies" referred
to in "Policy based routing".
One of the constraints I have if I try to back port this work to
6.x is that it must be implemented as a EXTENSION to the existing
ABIs in 6.x so that third party applications do not need to be
recompiled in timespan of the branch.
This first version will not have some of the bells and whistles that
will come with later versions. It will, for example, be limited to 16
tables in the first commit.
Implementation method, Compatible version. (part 1)
-------------------------------
For this reason I have implemented a "sufficient subset" of a
multiple routing table solution in Perforce, and back-ported it
to 6.x. (also in Perforce though not always caught up with what I
have done in -current/P4). The subset allows a number of FIBs
to be defined at compile time (8 is sufficient for my purposes in 6.x)
and implements the changes needed to allow IPV4 to use them. I have not
done the changes for ipv6 simply because I do not need it, and I do not
have enough knowledge of ipv6 (e.g. neighbor discovery) needed to do it.
Other protocol families are left untouched and should there be
users with proprietary protocol families, they should continue to work
and be oblivious to the existence of the extra FIBs.
To understand how this is done, one must know that the current FIB
code starts everything off with a single dimensional array of
pointers to FIB head structures (One per protocol family), each of
which in turn points to the trie of routes available to that family.
The basic change in the ABI compatible version of the change is to
extent that array to be a 2 dimensional array, so that
instead of protocol family X looking at rt_tables[X] for the
table it needs, it looks at rt_tables[Y][X] when for all
protocol families except ipv4 Y is always 0.
Code that is unaware of the change always just sees the first row
of the table, which of course looks just like the one dimensional
array that existed before.
The entry points rtrequest(), rtalloc(), rtalloc1(), rtalloc_ign()
are all maintained, but refer only to the first row of the array,
so that existing callers in proprietary protocols can continue to
do the "right thing".
Some new entry points are added, for the exclusive use of ipv4 code
called in_rtrequest(), in_rtalloc(), in_rtalloc1() and in_rtalloc_ign(),
which have an extra argument which refers the code to the correct row.
In addition, there are some new entry points (currently called
rtalloc_fib() and friends) that check the Address family being
looked up and call either rtalloc() (and friends) if the protocol
is not IPv4 forcing the action to row 0 or to the appropriate row
if it IS IPv4 (and that info is available). These are for calling
from code that is not specific to any particular protocol. The way
these are implemented would change in the non ABI preserving code
to be added later.
One feature of the first version of the code is that for ipv4,
the interface routes show up automatically on all the FIBs, so
that no matter what FIB you select you always have the basic
direct attached hosts available to you. (rtinit() does this
automatically).
You CAN delete an interface route from one FIB should you want
to but by default it's there. ARP information is also available
in each FIB. It's assumed that the same machine would have the
same MAC address, regardless of which FIB you are using to get
to it.
This brings us as to how the correct FIB is selected for an outgoing
IPV4 packet.
Firstly, all packets have a FIB associated with them. if nothing
has been done to change it, it will be FIB 0. The FIB is changed
in the following ways.
Packets fall into one of a number of classes.
1/ locally generated packets, coming from a socket/PCB.
Such packets select a FIB from a number associated with the
socket/PCB. This in turn is inherited from the process,
but can be changed by a socket option. The process in turn
inherits it on fork. I have written a utility call setfib
that acts a bit like nice..
setfib -3 ping target.example.com # will use fib 3 for ping.
It is an obvious extension to make it a property of a jail
but I have not done so. It can be achieved by combining the setfib and
jail commands.
2/ packets received on an interface for forwarding.
By default these packets would use table 0,
(or possibly a number settable in a sysctl(not yet)).
but prior to routing the firewall can inspect them (see below).
(possibly in the future you may be able to associate a FIB
with packets received on an interface.. An ifconfig arg, but not yet.)
3/ packets inspected by a packet classifier, which can arbitrarily
associate a fib with it on a packet by packet basis.
A fib assigned to a packet by a packet classifier
(such as ipfw) would over-ride a fib associated by
a more default source. (such as cases 1 or 2).
4/ a tcp listen socket associated with a fib will generate
accept sockets that are associated with that same fib.
5/ Packets generated in response to some other packet (e.g. reset
or icmp packets). These should use the FIB associated with the
packet being reponded to.
6/ Packets generated during encapsulation.
gif, tun and other tunnel interfaces will encapsulate using the FIB
that was in effect withthe proces that set up the tunnel.
thus setfib 1 ifconfig gif0 [tunnel instructions]
will set the fib for the tunnel to use to be fib 1.
Routing messages would be associated with their
process, and thus select one FIB or another.
messages from the kernel would be associated with the fib they
refer to and would only be received by a routing socket associated
with that fib. (not yet implemented)
In addition Netstat has been edited to be able to cope with the
fact that the array is now 2 dimensional. (It looks in system
memory using libkvm (!)). Old versions of netstat see only the first FIB.
In addition two sysctls are added to give:
a) the number of FIBs compiled in (active)
b) the default FIB of the calling process.
Early testing experience:
-------------------------
Basically our (IronPort's) appliance does this functionality already
using ipfw fwd but that method has some drawbacks.
For example,
It can't fully simulate a routing table because it can't influence the
socket's choice of local address when a connect() is done.
Testing during the generating of these changes has been
remarkably smooth so far. Multiple tables have co-existed
with no notable side effects, and packets have been routes
accordingly.
ipfw has grown 2 new keywords:
setfib N ip from anay to any
count ip from any to any fib N
In pf there seems to be a requirement to be able to give symbolic names to the
fibs but I do not have that capacity. I am not sure if it is required.
SCTP has interestingly enough built in support for this, called VRFs
in Cisco parlance. it will be interesting to see how that handles it
when it suddenly actually does something.
Where to next:
--------------------
After committing the ABI compatible version and MFCing it, I'd
like to proceed in a forward direction in -current. this will
result in some roto-tilling in the routing code.
Firstly: the current code's idea of having a separate tree per
protocol family, all of the same format, and pointed to by the
1 dimensional array is a bit silly. Especially when one considers that
there is code that makes assumptions about every protocol having the
same internal structures there. Some protocols don't WANT that
sort of structure. (for example the whole idea of a netmask is foreign
to appletalk). This needs to be made opaque to the external code.
My suggested first change is to add routing method pointers to the
'domain' structure, along with information pointing the data.
instead of having an array of pointers to uniform structures,
there would be an array pointing to the 'domain' structures
for each protocol address domain (protocol family),
and the methods this reached would be called. The methods would have
an argument that gives FIB number, but the protocol would be free
to ignore it.
When the ABI can be changed it raises the possibilty of the
addition of a fib entry into the "struct route". Currently,
the structure contains the sockaddr of the desination, and the resulting
fib entry. To make this work fully, one could add a fib number
so that given an address and a fib, one can find the third element, the
fib entry.
Interaction with the ARP layer/ LL layer would need to be
revisited as well. Qing Li has been working on this already.
This work was sponsored by Ironport Systems/Cisco
Reviewed by: several including rwatson, bz and mlair (parts each)
Obtained from: Ironport systems/Cisco
2008-05-09 23:03:00 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
{
|
|
|
|
M_SETFIB(m, r->rtableid);
|
2007-07-03 12:16:07 +00:00
|
|
|
pf_mtag->rtableid = r->rtableid;
|
2011-06-28 11:57:25 +00:00
|
|
|
#else
|
|
|
|
m->m_pkthdr.pf.rtableid = r->rtableid;
|
|
|
|
#endif
|
Add code to allow the system to handle multiple routing tables.
This particular implementation is designed to be fully backwards compatible
and to be MFC-able to 7.x (and 6.x)
Currently the only protocol that can make use of the multiple tables is IPv4
Similar functionality exists in OpenBSD and Linux.
From my notes:
-----
One thing where FreeBSD has been falling behind, and which by chance I
have some time to work on is "policy based routing", which allows
different
packet streams to be routed by more than just the destination address.
Constraints:
------------
I want to make some form of this available in the 6.x tree
(and by extension 7.x) , but FreeBSD in general needs it so I might as
well do it in -current and back port the portions I need.
One of the ways that this can be done is to have the ability to
instantiate multiple kernel routing tables (which I will now
refer to as "Forwarding Information Bases" or "FIBs" for political
correctness reasons). Which FIB a particular packet uses to make
the next hop decision can be decided by a number of mechanisms.
The policies these mechanisms implement are the "Policies" referred
to in "Policy based routing".
One of the constraints I have if I try to back port this work to
6.x is that it must be implemented as a EXTENSION to the existing
ABIs in 6.x so that third party applications do not need to be
recompiled in timespan of the branch.
This first version will not have some of the bells and whistles that
will come with later versions. It will, for example, be limited to 16
tables in the first commit.
Implementation method, Compatible version. (part 1)
-------------------------------
For this reason I have implemented a "sufficient subset" of a
multiple routing table solution in Perforce, and back-ported it
to 6.x. (also in Perforce though not always caught up with what I
have done in -current/P4). The subset allows a number of FIBs
to be defined at compile time (8 is sufficient for my purposes in 6.x)
and implements the changes needed to allow IPV4 to use them. I have not
done the changes for ipv6 simply because I do not need it, and I do not
have enough knowledge of ipv6 (e.g. neighbor discovery) needed to do it.
Other protocol families are left untouched and should there be
users with proprietary protocol families, they should continue to work
and be oblivious to the existence of the extra FIBs.
To understand how this is done, one must know that the current FIB
code starts everything off with a single dimensional array of
pointers to FIB head structures (One per protocol family), each of
which in turn points to the trie of routes available to that family.
The basic change in the ABI compatible version of the change is to
extent that array to be a 2 dimensional array, so that
instead of protocol family X looking at rt_tables[X] for the
table it needs, it looks at rt_tables[Y][X] when for all
protocol families except ipv4 Y is always 0.
Code that is unaware of the change always just sees the first row
of the table, which of course looks just like the one dimensional
array that existed before.
The entry points rtrequest(), rtalloc(), rtalloc1(), rtalloc_ign()
are all maintained, but refer only to the first row of the array,
so that existing callers in proprietary protocols can continue to
do the "right thing".
Some new entry points are added, for the exclusive use of ipv4 code
called in_rtrequest(), in_rtalloc(), in_rtalloc1() and in_rtalloc_ign(),
which have an extra argument which refers the code to the correct row.
In addition, there are some new entry points (currently called
rtalloc_fib() and friends) that check the Address family being
looked up and call either rtalloc() (and friends) if the protocol
is not IPv4 forcing the action to row 0 or to the appropriate row
if it IS IPv4 (and that info is available). These are for calling
from code that is not specific to any particular protocol. The way
these are implemented would change in the non ABI preserving code
to be added later.
One feature of the first version of the code is that for ipv4,
the interface routes show up automatically on all the FIBs, so
that no matter what FIB you select you always have the basic
direct attached hosts available to you. (rtinit() does this
automatically).
You CAN delete an interface route from one FIB should you want
to but by default it's there. ARP information is also available
in each FIB. It's assumed that the same machine would have the
same MAC address, regardless of which FIB you are using to get
to it.
This brings us as to how the correct FIB is selected for an outgoing
IPV4 packet.
Firstly, all packets have a FIB associated with them. if nothing
has been done to change it, it will be FIB 0. The FIB is changed
in the following ways.
Packets fall into one of a number of classes.
1/ locally generated packets, coming from a socket/PCB.
Such packets select a FIB from a number associated with the
socket/PCB. This in turn is inherited from the process,
but can be changed by a socket option. The process in turn
inherits it on fork. I have written a utility call setfib
that acts a bit like nice..
setfib -3 ping target.example.com # will use fib 3 for ping.
It is an obvious extension to make it a property of a jail
but I have not done so. It can be achieved by combining the setfib and
jail commands.
2/ packets received on an interface for forwarding.
By default these packets would use table 0,
(or possibly a number settable in a sysctl(not yet)).
but prior to routing the firewall can inspect them (see below).
(possibly in the future you may be able to associate a FIB
with packets received on an interface.. An ifconfig arg, but not yet.)
3/ packets inspected by a packet classifier, which can arbitrarily
associate a fib with it on a packet by packet basis.
A fib assigned to a packet by a packet classifier
(such as ipfw) would over-ride a fib associated by
a more default source. (such as cases 1 or 2).
4/ a tcp listen socket associated with a fib will generate
accept sockets that are associated with that same fib.
5/ Packets generated in response to some other packet (e.g. reset
or icmp packets). These should use the FIB associated with the
packet being reponded to.
6/ Packets generated during encapsulation.
gif, tun and other tunnel interfaces will encapsulate using the FIB
that was in effect withthe proces that set up the tunnel.
thus setfib 1 ifconfig gif0 [tunnel instructions]
will set the fib for the tunnel to use to be fib 1.
Routing messages would be associated with their
process, and thus select one FIB or another.
messages from the kernel would be associated with the fib they
refer to and would only be received by a routing socket associated
with that fib. (not yet implemented)
In addition Netstat has been edited to be able to cope with the
fact that the array is now 2 dimensional. (It looks in system
memory using libkvm (!)). Old versions of netstat see only the first FIB.
In addition two sysctls are added to give:
a) the number of FIBs compiled in (active)
b) the default FIB of the calling process.
Early testing experience:
-------------------------
Basically our (IronPort's) appliance does this functionality already
using ipfw fwd but that method has some drawbacks.
For example,
It can't fully simulate a routing table because it can't influence the
socket's choice of local address when a connect() is done.
Testing during the generating of these changes has been
remarkably smooth so far. Multiple tables have co-existed
with no notable side effects, and packets have been routes
accordingly.
ipfw has grown 2 new keywords:
setfib N ip from anay to any
count ip from any to any fib N
In pf there seems to be a requirement to be able to give symbolic names to the
fibs but I do not have that capacity. I am not sure if it is required.
SCTP has interestingly enough built in support for this, called VRFs
in Cisco parlance. it will be interesting to see how that handles it
when it suddenly actually does something.
Where to next:
--------------------
After committing the ABI compatible version and MFCing it, I'd
like to proceed in a forward direction in -current. this will
result in some roto-tilling in the routing code.
Firstly: the current code's idea of having a separate tree per
protocol family, all of the same format, and pointed to by the
1 dimensional array is a bit silly. Especially when one considers that
there is code that makes assumptions about every protocol having the
same internal structures there. Some protocols don't WANT that
sort of structure. (for example the whole idea of a netmask is foreign
to appletalk). This needs to be made opaque to the external code.
My suggested first change is to add routing method pointers to the
'domain' structure, along with information pointing the data.
instead of having an array of pointers to uniform structures,
there would be an array pointing to the 'domain' structures
for each protocol address domain (protocol family),
and the methods this reached would be called. The methods would have
an argument that gives FIB number, but the protocol would be free
to ignore it.
When the ABI can be changed it raises the possibilty of the
addition of a fib entry into the "struct route". Currently,
the structure contains the sockaddr of the desination, and the resulting
fib entry. To make this work fully, one could add a fib number
so that given an address and a fib, one can find the third element, the
fib entry.
Interaction with the ARP layer/ LL layer would need to be
revisited as well. Qing Li has been working on this already.
This work was sponsored by Ironport Systems/Cisco
Reviewed by: several including rwatson, bz and mlair (parts each)
Obtained from: Ironport systems/Cisco
2008-05-09 23:03:00 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
}
|
|
|
|
#endif
|
2011-06-28 11:57:25 +00:00
|
|
|
|
2004-02-26 02:04:28 +00:00
|
|
|
#ifdef ALTQ
|
|
|
|
if (r != NULL && r->qid) {
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
2007-07-03 12:16:07 +00:00
|
|
|
pf_mtag->qid = r->qid;
|
2011-06-28 11:57:25 +00:00
|
|
|
|
2007-07-03 12:16:07 +00:00
|
|
|
/* add hints for ecn */
|
|
|
|
pf_mtag->hdr = mtod(m, struct ip *);
|
2011-06-28 11:57:25 +00:00
|
|
|
#else
|
|
|
|
m->m_pkthdr.pf.qid = r->qid;
|
|
|
|
/* add hints for ecn */
|
|
|
|
m->m_pkthdr.pf.hdr = mtod(m, struct ip *);
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
2005-05-03 16:43:32 +00:00
|
|
|
#endif /* ALTQ */
|
2004-02-26 02:04:28 +00:00
|
|
|
m->m_data += max_linkhdr;
|
|
|
|
m->m_pkthdr.len = m->m_len = len;
|
|
|
|
m->m_pkthdr.rcvif = NULL;
|
|
|
|
bzero(m->m_data, len);
|
|
|
|
switch (af) {
|
|
|
|
#ifdef INET
|
|
|
|
case AF_INET:
|
|
|
|
h = mtod(m, struct ip *);
|
|
|
|
|
|
|
|
/* IP header fields included in the TCP checksum */
|
|
|
|
h->ip_p = IPPROTO_TCP;
|
|
|
|
h->ip_len = htons(tlen);
|
|
|
|
h->ip_src.s_addr = saddr->v4.s_addr;
|
|
|
|
h->ip_dst.s_addr = daddr->v4.s_addr;
|
|
|
|
|
|
|
|
th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
|
|
|
|
break;
|
|
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
|
|
|
h6 = mtod(m, struct ip6_hdr *);
|
|
|
|
|
|
|
|
/* IP header fields included in the TCP checksum */
|
|
|
|
h6->ip6_nxt = IPPROTO_TCP;
|
|
|
|
h6->ip6_plen = htons(tlen);
|
|
|
|
memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
|
|
|
|
memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
|
|
|
|
|
|
|
|
th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
|
|
|
|
break;
|
|
|
|
#endif /* INET6 */
|
|
|
|
}
|
|
|
|
|
|
|
|
/* TCP header */
|
|
|
|
th->th_sport = sport;
|
|
|
|
th->th_dport = dport;
|
|
|
|
th->th_seq = htonl(seq);
|
|
|
|
th->th_ack = htonl(ack);
|
|
|
|
th->th_off = tlen >> 2;
|
|
|
|
th->th_flags = flags;
|
|
|
|
th->th_win = htons(win);
|
|
|
|
|
|
|
|
if (mss) {
|
|
|
|
opt = (char *)(th + 1);
|
|
|
|
opt[0] = TCPOPT_MAXSEG;
|
|
|
|
opt[1] = 4;
|
|
|
|
HTONS(mss);
|
|
|
|
bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (af) {
|
|
|
|
#ifdef INET
|
|
|
|
case AF_INET:
|
|
|
|
/* TCP checksum */
|
|
|
|
th->th_sum = in_cksum(m, len);
|
|
|
|
|
|
|
|
/* Finish the IP header */
|
|
|
|
h->ip_v = 4;
|
|
|
|
h->ip_hl = sizeof(*h) >> 2;
|
|
|
|
h->ip_tos = IPTOS_LOWDELAY;
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
Commit step 1 of the vimage project, (network stack)
virtualization work done by Marko Zec (zec@).
This is the first in a series of commits over the course
of the next few weeks.
Mark all uses of global variables to be virtualized
with a V_ prefix.
Use macros to map them back to their global names for
now, so this is a NOP change only.
We hope to have caught at least 85-90% of what is needed
so we do not invalidate a lot of outstanding patches again.
Obtained from: //depot/projects/vimage-commit2/...
Reviewed by: brooks, des, ed, mav, julian,
jamie, kris, rwatson, zec, ...
(various people I forgot, different versions)
md5 (with a bit of help)
Sponsored by: NLnet Foundation, The FreeBSD Foundation
X-MFC after: never
V_Commit_Message_Reviewed_By: more people than the patch
2008-08-17 23:27:27 +00:00
|
|
|
h->ip_off = V_path_mtu_discovery ? IP_DF : 0;
|
2004-06-16 23:24:02 +00:00
|
|
|
h->ip_len = len;
|
2011-06-28 11:57:25 +00:00
|
|
|
h->ip_ttl = ttl ? ttl : V_ip_defttl;
|
2004-02-26 02:34:12 +00:00
|
|
|
#else
|
|
|
|
h->ip_len = htons(len);
|
2011-06-28 11:57:25 +00:00
|
|
|
h->ip_off = htons(ip_mtudisc ? IP_DF : 0);
|
|
|
|
h->ip_ttl = ttl ? ttl : ip_defttl;
|
2004-06-16 23:24:02 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
h->ip_sum = 0;
|
2005-05-03 16:43:32 +00:00
|
|
|
if (eh == NULL) {
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2011-06-28 11:57:25 +00:00
|
|
|
PF_UNLOCK();
|
|
|
|
ip_output(m, (void *)NULL, (void *)NULL, 0,
|
|
|
|
(void *)NULL, (void *)NULL);
|
|
|
|
PF_LOCK();
|
2005-05-03 16:43:32 +00:00
|
|
|
#else /* ! __FreeBSD__ */
|
|
|
|
ip_output(m, (void *)NULL, (void *)NULL, 0,
|
|
|
|
(void *)NULL, (void *)NULL);
|
|
|
|
#endif
|
|
|
|
} else {
|
|
|
|
struct route ro;
|
|
|
|
struct rtentry rt;
|
|
|
|
struct ether_header *e = (void *)ro.ro_dst.sa_data;
|
|
|
|
|
|
|
|
if (ifp == NULL) {
|
|
|
|
m_freem(m);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
rt.rt_ifp = ifp;
|
|
|
|
ro.ro_rt = &rt;
|
|
|
|
ro.ro_dst.sa_len = sizeof(ro.ro_dst);
|
|
|
|
ro.ro_dst.sa_family = pseudo_AF_HDRCMPLT;
|
|
|
|
bcopy(eh->ether_dhost, e->ether_shost, ETHER_ADDR_LEN);
|
|
|
|
bcopy(eh->ether_shost, e->ether_dhost, ETHER_ADDR_LEN);
|
|
|
|
e->ether_type = eh->ether_type;
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
PF_UNLOCK();
|
|
|
|
/* XXX_IMPORT: later */
|
|
|
|
ip_output(m, (void *)NULL, &ro, 0,
|
|
|
|
(void *)NULL, (void *)NULL);
|
|
|
|
PF_LOCK();
|
2004-02-26 02:34:12 +00:00
|
|
|
#else /* ! __FreeBSD__ */
|
2005-05-03 16:43:32 +00:00
|
|
|
ip_output(m, (void *)NULL, &ro, IP_ROUTETOETHER,
|
|
|
|
(void *)NULL, (void *)NULL);
|
2004-02-26 02:34:12 +00:00
|
|
|
#endif
|
2005-05-03 16:43:32 +00:00
|
|
|
}
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
|
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
|
|
|
/* TCP checksum */
|
|
|
|
th->th_sum = in6_cksum(m, IPPROTO_TCP,
|
|
|
|
sizeof(struct ip6_hdr), tlen);
|
|
|
|
|
|
|
|
h6->ip6_vfc |= IPV6_VERSION;
|
|
|
|
h6->ip6_hlim = IPV6_DEFHLIM;
|
|
|
|
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2004-02-26 02:34:12 +00:00
|
|
|
PF_UNLOCK();
|
|
|
|
ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
|
|
|
|
PF_LOCK();
|
|
|
|
#else
|
2011-06-28 11:57:25 +00:00
|
|
|
ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
|
2004-02-26 02:34:12 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
|
|
|
#endif /* INET6 */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
static void
|
2004-02-26 02:04:28 +00:00
|
|
|
pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
|
|
|
|
struct pf_rule *r)
|
|
|
|
{
|
|
|
|
struct mbuf *m0;
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2011-04-27 19:34:01 +00:00
|
|
|
#ifdef INET
|
2004-02-26 02:34:12 +00:00
|
|
|
struct ip *ip;
|
|
|
|
#endif
|
2011-06-28 11:57:25 +00:00
|
|
|
struct pf_mtag *pf_mtag;
|
2011-04-27 19:34:01 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2004-02-26 02:34:12 +00:00
|
|
|
m0 = m_copypacket(m, M_DONTWAIT);
|
2004-07-17 05:10:06 +00:00
|
|
|
if (m0 == NULL)
|
|
|
|
return;
|
2004-02-26 02:34:12 +00:00
|
|
|
#else
|
2011-06-28 11:57:25 +00:00
|
|
|
if ((m0 = m_copy(m, 0, M_COPYALL)) == NULL)
|
|
|
|
return;
|
2007-07-03 12:16:07 +00:00
|
|
|
#endif
|
2011-06-28 11:57:25 +00:00
|
|
|
|
|
|
|
#ifdef __FreeBSD__
|
2007-07-03 12:16:07 +00:00
|
|
|
if ((pf_mtag = pf_get_mtag(m0)) == NULL)
|
2004-02-26 02:04:28 +00:00
|
|
|
return;
|
2007-07-03 12:16:07 +00:00
|
|
|
/* XXX: revisit */
|
|
|
|
m0->m_flags |= M_SKIP_FIREWALL;
|
|
|
|
#else
|
2011-06-28 11:57:25 +00:00
|
|
|
m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
|
2004-07-17 05:10:06 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2007-07-03 12:16:07 +00:00
|
|
|
if (r->rtableid >= 0)
|
Add code to allow the system to handle multiple routing tables.
This particular implementation is designed to be fully backwards compatible
and to be MFC-able to 7.x (and 6.x)
Currently the only protocol that can make use of the multiple tables is IPv4
Similar functionality exists in OpenBSD and Linux.
From my notes:
-----
One thing where FreeBSD has been falling behind, and which by chance I
have some time to work on is "policy based routing", which allows
different
packet streams to be routed by more than just the destination address.
Constraints:
------------
I want to make some form of this available in the 6.x tree
(and by extension 7.x) , but FreeBSD in general needs it so I might as
well do it in -current and back port the portions I need.
One of the ways that this can be done is to have the ability to
instantiate multiple kernel routing tables (which I will now
refer to as "Forwarding Information Bases" or "FIBs" for political
correctness reasons). Which FIB a particular packet uses to make
the next hop decision can be decided by a number of mechanisms.
The policies these mechanisms implement are the "Policies" referred
to in "Policy based routing".
One of the constraints I have if I try to back port this work to
6.x is that it must be implemented as a EXTENSION to the existing
ABIs in 6.x so that third party applications do not need to be
recompiled in timespan of the branch.
This first version will not have some of the bells and whistles that
will come with later versions. It will, for example, be limited to 16
tables in the first commit.
Implementation method, Compatible version. (part 1)
-------------------------------
For this reason I have implemented a "sufficient subset" of a
multiple routing table solution in Perforce, and back-ported it
to 6.x. (also in Perforce though not always caught up with what I
have done in -current/P4). The subset allows a number of FIBs
to be defined at compile time (8 is sufficient for my purposes in 6.x)
and implements the changes needed to allow IPV4 to use them. I have not
done the changes for ipv6 simply because I do not need it, and I do not
have enough knowledge of ipv6 (e.g. neighbor discovery) needed to do it.
Other protocol families are left untouched and should there be
users with proprietary protocol families, they should continue to work
and be oblivious to the existence of the extra FIBs.
To understand how this is done, one must know that the current FIB
code starts everything off with a single dimensional array of
pointers to FIB head structures (One per protocol family), each of
which in turn points to the trie of routes available to that family.
The basic change in the ABI compatible version of the change is to
extent that array to be a 2 dimensional array, so that
instead of protocol family X looking at rt_tables[X] for the
table it needs, it looks at rt_tables[Y][X] when for all
protocol families except ipv4 Y is always 0.
Code that is unaware of the change always just sees the first row
of the table, which of course looks just like the one dimensional
array that existed before.
The entry points rtrequest(), rtalloc(), rtalloc1(), rtalloc_ign()
are all maintained, but refer only to the first row of the array,
so that existing callers in proprietary protocols can continue to
do the "right thing".
Some new entry points are added, for the exclusive use of ipv4 code
called in_rtrequest(), in_rtalloc(), in_rtalloc1() and in_rtalloc_ign(),
which have an extra argument which refers the code to the correct row.
In addition, there are some new entry points (currently called
rtalloc_fib() and friends) that check the Address family being
looked up and call either rtalloc() (and friends) if the protocol
is not IPv4 forcing the action to row 0 or to the appropriate row
if it IS IPv4 (and that info is available). These are for calling
from code that is not specific to any particular protocol. The way
these are implemented would change in the non ABI preserving code
to be added later.
One feature of the first version of the code is that for ipv4,
the interface routes show up automatically on all the FIBs, so
that no matter what FIB you select you always have the basic
direct attached hosts available to you. (rtinit() does this
automatically).
You CAN delete an interface route from one FIB should you want
to but by default it's there. ARP information is also available
in each FIB. It's assumed that the same machine would have the
same MAC address, regardless of which FIB you are using to get
to it.
This brings us as to how the correct FIB is selected for an outgoing
IPV4 packet.
Firstly, all packets have a FIB associated with them. if nothing
has been done to change it, it will be FIB 0. The FIB is changed
in the following ways.
Packets fall into one of a number of classes.
1/ locally generated packets, coming from a socket/PCB.
Such packets select a FIB from a number associated with the
socket/PCB. This in turn is inherited from the process,
but can be changed by a socket option. The process in turn
inherits it on fork. I have written a utility call setfib
that acts a bit like nice..
setfib -3 ping target.example.com # will use fib 3 for ping.
It is an obvious extension to make it a property of a jail
but I have not done so. It can be achieved by combining the setfib and
jail commands.
2/ packets received on an interface for forwarding.
By default these packets would use table 0,
(or possibly a number settable in a sysctl(not yet)).
but prior to routing the firewall can inspect them (see below).
(possibly in the future you may be able to associate a FIB
with packets received on an interface.. An ifconfig arg, but not yet.)
3/ packets inspected by a packet classifier, which can arbitrarily
associate a fib with it on a packet by packet basis.
A fib assigned to a packet by a packet classifier
(such as ipfw) would over-ride a fib associated by
a more default source. (such as cases 1 or 2).
4/ a tcp listen socket associated with a fib will generate
accept sockets that are associated with that same fib.
5/ Packets generated in response to some other packet (e.g. reset
or icmp packets). These should use the FIB associated with the
packet being reponded to.
6/ Packets generated during encapsulation.
gif, tun and other tunnel interfaces will encapsulate using the FIB
that was in effect withthe proces that set up the tunnel.
thus setfib 1 ifconfig gif0 [tunnel instructions]
will set the fib for the tunnel to use to be fib 1.
Routing messages would be associated with their
process, and thus select one FIB or another.
messages from the kernel would be associated with the fib they
refer to and would only be received by a routing socket associated
with that fib. (not yet implemented)
In addition Netstat has been edited to be able to cope with the
fact that the array is now 2 dimensional. (It looks in system
memory using libkvm (!)). Old versions of netstat see only the first FIB.
In addition two sysctls are added to give:
a) the number of FIBs compiled in (active)
b) the default FIB of the calling process.
Early testing experience:
-------------------------
Basically our (IronPort's) appliance does this functionality already
using ipfw fwd but that method has some drawbacks.
For example,
It can't fully simulate a routing table because it can't influence the
socket's choice of local address when a connect() is done.
Testing during the generating of these changes has been
remarkably smooth so far. Multiple tables have co-existed
with no notable side effects, and packets have been routes
accordingly.
ipfw has grown 2 new keywords:
setfib N ip from anay to any
count ip from any to any fib N
In pf there seems to be a requirement to be able to give symbolic names to the
fibs but I do not have that capacity. I am not sure if it is required.
SCTP has interestingly enough built in support for this, called VRFs
in Cisco parlance. it will be interesting to see how that handles it
when it suddenly actually does something.
Where to next:
--------------------
After committing the ABI compatible version and MFCing it, I'd
like to proceed in a forward direction in -current. this will
result in some roto-tilling in the routing code.
Firstly: the current code's idea of having a separate tree per
protocol family, all of the same format, and pointed to by the
1 dimensional array is a bit silly. Especially when one considers that
there is code that makes assumptions about every protocol having the
same internal structures there. Some protocols don't WANT that
sort of structure. (for example the whole idea of a netmask is foreign
to appletalk). This needs to be made opaque to the external code.
My suggested first change is to add routing method pointers to the
'domain' structure, along with information pointing the data.
instead of having an array of pointers to uniform structures,
there would be an array pointing to the 'domain' structures
for each protocol address domain (protocol family),
and the methods this reached would be called. The methods would have
an argument that gives FIB number, but the protocol would be free
to ignore it.
When the ABI can be changed it raises the possibilty of the
addition of a fib entry into the "struct route". Currently,
the structure contains the sockaddr of the desination, and the resulting
fib entry. To make this work fully, one could add a fib number
so that given an address and a fib, one can find the third element, the
fib entry.
Interaction with the ARP layer/ LL layer would need to be
revisited as well. Qing Li has been working on this already.
This work was sponsored by Ironport Systems/Cisco
Reviewed by: several including rwatson, bz and mlair (parts each)
Obtained from: Ironport systems/Cisco
2008-05-09 23:03:00 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
{
|
|
|
|
M_SETFIB(m0, r->rtableid);
|
2007-07-03 12:16:07 +00:00
|
|
|
pf_mtag->rtableid = r->rtableid;
|
2011-06-28 11:57:25 +00:00
|
|
|
#else
|
|
|
|
m0->m_pkthdr.pf.rtableid = r->rtableid;
|
|
|
|
#endif
|
Add code to allow the system to handle multiple routing tables.
This particular implementation is designed to be fully backwards compatible
and to be MFC-able to 7.x (and 6.x)
Currently the only protocol that can make use of the multiple tables is IPv4
Similar functionality exists in OpenBSD and Linux.
From my notes:
-----
One thing where FreeBSD has been falling behind, and which by chance I
have some time to work on is "policy based routing", which allows
different
packet streams to be routed by more than just the destination address.
Constraints:
------------
I want to make some form of this available in the 6.x tree
(and by extension 7.x) , but FreeBSD in general needs it so I might as
well do it in -current and back port the portions I need.
One of the ways that this can be done is to have the ability to
instantiate multiple kernel routing tables (which I will now
refer to as "Forwarding Information Bases" or "FIBs" for political
correctness reasons). Which FIB a particular packet uses to make
the next hop decision can be decided by a number of mechanisms.
The policies these mechanisms implement are the "Policies" referred
to in "Policy based routing".
One of the constraints I have if I try to back port this work to
6.x is that it must be implemented as a EXTENSION to the existing
ABIs in 6.x so that third party applications do not need to be
recompiled in timespan of the branch.
This first version will not have some of the bells and whistles that
will come with later versions. It will, for example, be limited to 16
tables in the first commit.
Implementation method, Compatible version. (part 1)
-------------------------------
For this reason I have implemented a "sufficient subset" of a
multiple routing table solution in Perforce, and back-ported it
to 6.x. (also in Perforce though not always caught up with what I
have done in -current/P4). The subset allows a number of FIBs
to be defined at compile time (8 is sufficient for my purposes in 6.x)
and implements the changes needed to allow IPV4 to use them. I have not
done the changes for ipv6 simply because I do not need it, and I do not
have enough knowledge of ipv6 (e.g. neighbor discovery) needed to do it.
Other protocol families are left untouched and should there be
users with proprietary protocol families, they should continue to work
and be oblivious to the existence of the extra FIBs.
To understand how this is done, one must know that the current FIB
code starts everything off with a single dimensional array of
pointers to FIB head structures (One per protocol family), each of
which in turn points to the trie of routes available to that family.
The basic change in the ABI compatible version of the change is to
extent that array to be a 2 dimensional array, so that
instead of protocol family X looking at rt_tables[X] for the
table it needs, it looks at rt_tables[Y][X] when for all
protocol families except ipv4 Y is always 0.
Code that is unaware of the change always just sees the first row
of the table, which of course looks just like the one dimensional
array that existed before.
The entry points rtrequest(), rtalloc(), rtalloc1(), rtalloc_ign()
are all maintained, but refer only to the first row of the array,
so that existing callers in proprietary protocols can continue to
do the "right thing".
Some new entry points are added, for the exclusive use of ipv4 code
called in_rtrequest(), in_rtalloc(), in_rtalloc1() and in_rtalloc_ign(),
which have an extra argument which refers the code to the correct row.
In addition, there are some new entry points (currently called
rtalloc_fib() and friends) that check the Address family being
looked up and call either rtalloc() (and friends) if the protocol
is not IPv4 forcing the action to row 0 or to the appropriate row
if it IS IPv4 (and that info is available). These are for calling
from code that is not specific to any particular protocol. The way
these are implemented would change in the non ABI preserving code
to be added later.
One feature of the first version of the code is that for ipv4,
the interface routes show up automatically on all the FIBs, so
that no matter what FIB you select you always have the basic
direct attached hosts available to you. (rtinit() does this
automatically).
You CAN delete an interface route from one FIB should you want
to but by default it's there. ARP information is also available
in each FIB. It's assumed that the same machine would have the
same MAC address, regardless of which FIB you are using to get
to it.
This brings us as to how the correct FIB is selected for an outgoing
IPV4 packet.
Firstly, all packets have a FIB associated with them. if nothing
has been done to change it, it will be FIB 0. The FIB is changed
in the following ways.
Packets fall into one of a number of classes.
1/ locally generated packets, coming from a socket/PCB.
Such packets select a FIB from a number associated with the
socket/PCB. This in turn is inherited from the process,
but can be changed by a socket option. The process in turn
inherits it on fork. I have written a utility call setfib
that acts a bit like nice..
setfib -3 ping target.example.com # will use fib 3 for ping.
It is an obvious extension to make it a property of a jail
but I have not done so. It can be achieved by combining the setfib and
jail commands.
2/ packets received on an interface for forwarding.
By default these packets would use table 0,
(or possibly a number settable in a sysctl(not yet)).
but prior to routing the firewall can inspect them (see below).
(possibly in the future you may be able to associate a FIB
with packets received on an interface.. An ifconfig arg, but not yet.)
3/ packets inspected by a packet classifier, which can arbitrarily
associate a fib with it on a packet by packet basis.
A fib assigned to a packet by a packet classifier
(such as ipfw) would over-ride a fib associated by
a more default source. (such as cases 1 or 2).
4/ a tcp listen socket associated with a fib will generate
accept sockets that are associated with that same fib.
5/ Packets generated in response to some other packet (e.g. reset
or icmp packets). These should use the FIB associated with the
packet being reponded to.
6/ Packets generated during encapsulation.
gif, tun and other tunnel interfaces will encapsulate using the FIB
that was in effect withthe proces that set up the tunnel.
thus setfib 1 ifconfig gif0 [tunnel instructions]
will set the fib for the tunnel to use to be fib 1.
Routing messages would be associated with their
process, and thus select one FIB or another.
messages from the kernel would be associated with the fib they
refer to and would only be received by a routing socket associated
with that fib. (not yet implemented)
In addition Netstat has been edited to be able to cope with the
fact that the array is now 2 dimensional. (It looks in system
memory using libkvm (!)). Old versions of netstat see only the first FIB.
In addition two sysctls are added to give:
a) the number of FIBs compiled in (active)
b) the default FIB of the calling process.
Early testing experience:
-------------------------
Basically our (IronPort's) appliance does this functionality already
using ipfw fwd but that method has some drawbacks.
For example,
It can't fully simulate a routing table because it can't influence the
socket's choice of local address when a connect() is done.
Testing during the generating of these changes has been
remarkably smooth so far. Multiple tables have co-existed
with no notable side effects, and packets have been routes
accordingly.
ipfw has grown 2 new keywords:
setfib N ip from anay to any
count ip from any to any fib N
In pf there seems to be a requirement to be able to give symbolic names to the
fibs but I do not have that capacity. I am not sure if it is required.
SCTP has interestingly enough built in support for this, called VRFs
in Cisco parlance. it will be interesting to see how that handles it
when it suddenly actually does something.
Where to next:
--------------------
After committing the ABI compatible version and MFCing it, I'd
like to proceed in a forward direction in -current. this will
result in some roto-tilling in the routing code.
Firstly: the current code's idea of having a separate tree per
protocol family, all of the same format, and pointed to by the
1 dimensional array is a bit silly. Especially when one considers that
there is code that makes assumptions about every protocol having the
same internal structures there. Some protocols don't WANT that
sort of structure. (for example the whole idea of a netmask is foreign
to appletalk). This needs to be made opaque to the external code.
My suggested first change is to add routing method pointers to the
'domain' structure, along with information pointing the data.
instead of having an array of pointers to uniform structures,
there would be an array pointing to the 'domain' structures
for each protocol address domain (protocol family),
and the methods this reached would be called. The methods would have
an argument that gives FIB number, but the protocol would be free
to ignore it.
When the ABI can be changed it raises the possibilty of the
addition of a fib entry into the "struct route". Currently,
the structure contains the sockaddr of the desination, and the resulting
fib entry. To make this work fully, one could add a fib number
so that given an address and a fib, one can find the third element, the
fib entry.
Interaction with the ARP layer/ LL layer would need to be
revisited as well. Qing Li has been working on this already.
This work was sponsored by Ironport Systems/Cisco
Reviewed by: several including rwatson, bz and mlair (parts each)
Obtained from: Ironport systems/Cisco
2008-05-09 23:03:00 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
}
|
|
|
|
#endif
|
2007-07-03 12:16:07 +00:00
|
|
|
|
2004-02-26 02:04:28 +00:00
|
|
|
#ifdef ALTQ
|
|
|
|
if (r->qid) {
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
2007-07-03 12:16:07 +00:00
|
|
|
pf_mtag->qid = r->qid;
|
|
|
|
/* add hints for ecn */
|
|
|
|
pf_mtag->hdr = mtod(m0, struct ip *);
|
2011-06-28 11:57:25 +00:00
|
|
|
#else
|
|
|
|
m0->m_pkthdr.pf.qid = r->qid;
|
|
|
|
/* add hints for ecn */
|
|
|
|
m0->m_pkthdr.pf.hdr = mtod(m0, struct ip *);
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
2005-05-03 16:43:32 +00:00
|
|
|
#endif /* ALTQ */
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
switch (af) {
|
|
|
|
#ifdef INET
|
|
|
|
case AF_INET:
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2004-02-26 02:34:12 +00:00
|
|
|
/* icmp_error() expects host byte ordering */
|
|
|
|
ip = mtod(m0, struct ip *);
|
|
|
|
NTOHS(ip->ip_len);
|
|
|
|
NTOHS(ip->ip_off);
|
|
|
|
PF_UNLOCK();
|
2005-05-04 13:09:19 +00:00
|
|
|
icmp_error(m0, type, code, 0, 0);
|
2004-02-26 02:34:12 +00:00
|
|
|
PF_LOCK();
|
2005-05-04 15:55:29 +00:00
|
|
|
#else
|
2007-07-03 12:16:07 +00:00
|
|
|
icmp_error(m0, type, code, 0, 0);
|
2004-02-26 02:34:12 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
|
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2004-02-26 02:34:12 +00:00
|
|
|
PF_UNLOCK();
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
icmp6_error(m0, type, code, 0);
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2004-02-26 02:34:12 +00:00
|
|
|
PF_LOCK();
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
|
|
|
#endif /* INET6 */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Return 1 if the addresses a and b match (with mask m), otherwise return 0.
|
|
|
|
* If n is 0, they match if they are equal. If n is != 0, they match if they
|
|
|
|
* are different.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
|
|
|
|
struct pf_addr *b, sa_family_t af)
|
|
|
|
{
|
|
|
|
int match = 0;
|
|
|
|
|
|
|
|
switch (af) {
|
|
|
|
#ifdef INET
|
|
|
|
case AF_INET:
|
|
|
|
if ((a->addr32[0] & m->addr32[0]) ==
|
|
|
|
(b->addr32[0] & m->addr32[0]))
|
|
|
|
match++;
|
|
|
|
break;
|
|
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
|
|
|
if (((a->addr32[0] & m->addr32[0]) ==
|
|
|
|
(b->addr32[0] & m->addr32[0])) &&
|
|
|
|
((a->addr32[1] & m->addr32[1]) ==
|
|
|
|
(b->addr32[1] & m->addr32[1])) &&
|
|
|
|
((a->addr32[2] & m->addr32[2]) ==
|
|
|
|
(b->addr32[2] & m->addr32[2])) &&
|
|
|
|
((a->addr32[3] & m->addr32[3]) ==
|
|
|
|
(b->addr32[3] & m->addr32[3])))
|
|
|
|
match++;
|
|
|
|
break;
|
|
|
|
#endif /* INET6 */
|
|
|
|
}
|
|
|
|
if (match) {
|
|
|
|
if (n)
|
|
|
|
return (0);
|
|
|
|
else
|
|
|
|
return (1);
|
|
|
|
} else {
|
|
|
|
if (n)
|
|
|
|
return (1);
|
|
|
|
else
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
/*
|
|
|
|
* Return 1 if b <= a <= e, otherwise return 0.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
pf_match_addr_range(struct pf_addr *b, struct pf_addr *e,
|
|
|
|
struct pf_addr *a, sa_family_t af)
|
|
|
|
{
|
|
|
|
switch (af) {
|
|
|
|
#ifdef INET
|
|
|
|
case AF_INET:
|
|
|
|
if ((a->addr32[0] < b->addr32[0]) ||
|
|
|
|
(a->addr32[0] > e->addr32[0]))
|
|
|
|
return (0);
|
|
|
|
break;
|
|
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6: {
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/* check a >= b */
|
|
|
|
for (i = 0; i < 4; ++i)
|
|
|
|
if (a->addr32[i] > b->addr32[i])
|
|
|
|
break;
|
|
|
|
else if (a->addr32[i] < b->addr32[i])
|
|
|
|
return (0);
|
|
|
|
/* check a <= e */
|
|
|
|
for (i = 0; i < 4; ++i)
|
|
|
|
if (a->addr32[i] < e->addr32[i])
|
|
|
|
break;
|
|
|
|
else if (a->addr32[i] > e->addr32[i])
|
|
|
|
return (0);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
#endif /* INET6 */
|
|
|
|
}
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
|
2004-02-26 02:04:28 +00:00
|
|
|
int
|
|
|
|
pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
|
|
|
|
{
|
|
|
|
switch (op) {
|
|
|
|
case PF_OP_IRG:
|
|
|
|
return ((p > a1) && (p < a2));
|
|
|
|
case PF_OP_XRG:
|
|
|
|
return ((p < a1) || (p > a2));
|
|
|
|
case PF_OP_RRG:
|
|
|
|
return ((p >= a1) && (p <= a2));
|
|
|
|
case PF_OP_EQ:
|
|
|
|
return (p == a1);
|
|
|
|
case PF_OP_NE:
|
|
|
|
return (p != a1);
|
|
|
|
case PF_OP_LT:
|
|
|
|
return (p < a1);
|
|
|
|
case PF_OP_LE:
|
|
|
|
return (p <= a1);
|
|
|
|
case PF_OP_GT:
|
|
|
|
return (p > a1);
|
|
|
|
case PF_OP_GE:
|
|
|
|
return (p >= a1);
|
|
|
|
}
|
|
|
|
return (0); /* never reached */
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
|
|
|
|
{
|
|
|
|
NTOHS(a1);
|
|
|
|
NTOHS(a2);
|
|
|
|
NTOHS(p);
|
|
|
|
return (pf_match(op, a1, a2, p));
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
|
|
|
|
{
|
|
|
|
if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
|
|
|
|
return (0);
|
|
|
|
return (pf_match(op, a1, a2, u));
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
|
|
|
|
{
|
|
|
|
if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
|
|
|
|
return (0);
|
|
|
|
return (pf_match(op, a1, a2, g));
|
|
|
|
}
|
|
|
|
|
2007-07-03 12:16:07 +00:00
|
|
|
int
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag,
|
|
|
|
struct pf_mtag *pf_mtag)
|
|
|
|
#else
|
|
|
|
pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag)
|
|
|
|
#endif
|
2007-07-03 12:16:07 +00:00
|
|
|
{
|
|
|
|
if (*tag == -1)
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
2007-07-03 12:16:07 +00:00
|
|
|
*tag = pf_mtag->tag;
|
2011-06-28 11:57:25 +00:00
|
|
|
#else
|
|
|
|
*tag = m->m_pkthdr.pf.tag;
|
|
|
|
#endif
|
2007-07-03 12:16:07 +00:00
|
|
|
|
2004-02-26 02:04:28 +00:00
|
|
|
return ((!r->match_tag_not && r->match_tag == *tag) ||
|
|
|
|
(r->match_tag_not && r->match_tag != *tag));
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
pf_tag_packet(struct mbuf *m, int tag, int rtableid,
|
|
|
|
struct pf_mtag *pf_mtag)
|
|
|
|
#else
|
|
|
|
pf_tag_packet(struct mbuf *m, int tag, int rtableid)
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
{
|
2007-07-03 12:16:07 +00:00
|
|
|
if (tag <= 0 && rtableid < 0)
|
2004-02-26 02:04:28 +00:00
|
|
|
return (0);
|
|
|
|
|
2007-07-03 12:16:07 +00:00
|
|
|
if (tag > 0)
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
2007-07-03 12:16:07 +00:00
|
|
|
pf_mtag->tag = tag;
|
2011-06-28 11:57:25 +00:00
|
|
|
#else
|
|
|
|
m->m_pkthdr.pf.tag = tag;
|
|
|
|
#endif
|
2007-07-03 12:16:07 +00:00
|
|
|
if (rtableid >= 0)
|
Add code to allow the system to handle multiple routing tables.
This particular implementation is designed to be fully backwards compatible
and to be MFC-able to 7.x (and 6.x)
Currently the only protocol that can make use of the multiple tables is IPv4
Similar functionality exists in OpenBSD and Linux.
From my notes:
-----
One thing where FreeBSD has been falling behind, and which by chance I
have some time to work on is "policy based routing", which allows
different
packet streams to be routed by more than just the destination address.
Constraints:
------------
I want to make some form of this available in the 6.x tree
(and by extension 7.x) , but FreeBSD in general needs it so I might as
well do it in -current and back port the portions I need.
One of the ways that this can be done is to have the ability to
instantiate multiple kernel routing tables (which I will now
refer to as "Forwarding Information Bases" or "FIBs" for political
correctness reasons). Which FIB a particular packet uses to make
the next hop decision can be decided by a number of mechanisms.
The policies these mechanisms implement are the "Policies" referred
to in "Policy based routing".
One of the constraints I have if I try to back port this work to
6.x is that it must be implemented as a EXTENSION to the existing
ABIs in 6.x so that third party applications do not need to be
recompiled in timespan of the branch.
This first version will not have some of the bells and whistles that
will come with later versions. It will, for example, be limited to 16
tables in the first commit.
Implementation method, Compatible version. (part 1)
-------------------------------
For this reason I have implemented a "sufficient subset" of a
multiple routing table solution in Perforce, and back-ported it
to 6.x. (also in Perforce though not always caught up with what I
have done in -current/P4). The subset allows a number of FIBs
to be defined at compile time (8 is sufficient for my purposes in 6.x)
and implements the changes needed to allow IPV4 to use them. I have not
done the changes for ipv6 simply because I do not need it, and I do not
have enough knowledge of ipv6 (e.g. neighbor discovery) needed to do it.
Other protocol families are left untouched and should there be
users with proprietary protocol families, they should continue to work
and be oblivious to the existence of the extra FIBs.
To understand how this is done, one must know that the current FIB
code starts everything off with a single dimensional array of
pointers to FIB head structures (One per protocol family), each of
which in turn points to the trie of routes available to that family.
The basic change in the ABI compatible version of the change is to
extent that array to be a 2 dimensional array, so that
instead of protocol family X looking at rt_tables[X] for the
table it needs, it looks at rt_tables[Y][X] when for all
protocol families except ipv4 Y is always 0.
Code that is unaware of the change always just sees the first row
of the table, which of course looks just like the one dimensional
array that existed before.
The entry points rtrequest(), rtalloc(), rtalloc1(), rtalloc_ign()
are all maintained, but refer only to the first row of the array,
so that existing callers in proprietary protocols can continue to
do the "right thing".
Some new entry points are added, for the exclusive use of ipv4 code
called in_rtrequest(), in_rtalloc(), in_rtalloc1() and in_rtalloc_ign(),
which have an extra argument which refers the code to the correct row.
In addition, there are some new entry points (currently called
rtalloc_fib() and friends) that check the Address family being
looked up and call either rtalloc() (and friends) if the protocol
is not IPv4 forcing the action to row 0 or to the appropriate row
if it IS IPv4 (and that info is available). These are for calling
from code that is not specific to any particular protocol. The way
these are implemented would change in the non ABI preserving code
to be added later.
One feature of the first version of the code is that for ipv4,
the interface routes show up automatically on all the FIBs, so
that no matter what FIB you select you always have the basic
direct attached hosts available to you. (rtinit() does this
automatically).
You CAN delete an interface route from one FIB should you want
to but by default it's there. ARP information is also available
in each FIB. It's assumed that the same machine would have the
same MAC address, regardless of which FIB you are using to get
to it.
This brings us as to how the correct FIB is selected for an outgoing
IPV4 packet.
Firstly, all packets have a FIB associated with them. if nothing
has been done to change it, it will be FIB 0. The FIB is changed
in the following ways.
Packets fall into one of a number of classes.
1/ locally generated packets, coming from a socket/PCB.
Such packets select a FIB from a number associated with the
socket/PCB. This in turn is inherited from the process,
but can be changed by a socket option. The process in turn
inherits it on fork. I have written a utility call setfib
that acts a bit like nice..
setfib -3 ping target.example.com # will use fib 3 for ping.
It is an obvious extension to make it a property of a jail
but I have not done so. It can be achieved by combining the setfib and
jail commands.
2/ packets received on an interface for forwarding.
By default these packets would use table 0,
(or possibly a number settable in a sysctl(not yet)).
but prior to routing the firewall can inspect them (see below).
(possibly in the future you may be able to associate a FIB
with packets received on an interface.. An ifconfig arg, but not yet.)
3/ packets inspected by a packet classifier, which can arbitrarily
associate a fib with it on a packet by packet basis.
A fib assigned to a packet by a packet classifier
(such as ipfw) would over-ride a fib associated by
a more default source. (such as cases 1 or 2).
4/ a tcp listen socket associated with a fib will generate
accept sockets that are associated with that same fib.
5/ Packets generated in response to some other packet (e.g. reset
or icmp packets). These should use the FIB associated with the
packet being reponded to.
6/ Packets generated during encapsulation.
gif, tun and other tunnel interfaces will encapsulate using the FIB
that was in effect withthe proces that set up the tunnel.
thus setfib 1 ifconfig gif0 [tunnel instructions]
will set the fib for the tunnel to use to be fib 1.
Routing messages would be associated with their
process, and thus select one FIB or another.
messages from the kernel would be associated with the fib they
refer to and would only be received by a routing socket associated
with that fib. (not yet implemented)
In addition Netstat has been edited to be able to cope with the
fact that the array is now 2 dimensional. (It looks in system
memory using libkvm (!)). Old versions of netstat see only the first FIB.
In addition two sysctls are added to give:
a) the number of FIBs compiled in (active)
b) the default FIB of the calling process.
Early testing experience:
-------------------------
Basically our (IronPort's) appliance does this functionality already
using ipfw fwd but that method has some drawbacks.
For example,
It can't fully simulate a routing table because it can't influence the
socket's choice of local address when a connect() is done.
Testing during the generating of these changes has been
remarkably smooth so far. Multiple tables have co-existed
with no notable side effects, and packets have been routes
accordingly.
ipfw has grown 2 new keywords:
setfib N ip from anay to any
count ip from any to any fib N
In pf there seems to be a requirement to be able to give symbolic names to the
fibs but I do not have that capacity. I am not sure if it is required.
SCTP has interestingly enough built in support for this, called VRFs
in Cisco parlance. it will be interesting to see how that handles it
when it suddenly actually does something.
Where to next:
--------------------
After committing the ABI compatible version and MFCing it, I'd
like to proceed in a forward direction in -current. this will
result in some roto-tilling in the routing code.
Firstly: the current code's idea of having a separate tree per
protocol family, all of the same format, and pointed to by the
1 dimensional array is a bit silly. Especially when one considers that
there is code that makes assumptions about every protocol having the
same internal structures there. Some protocols don't WANT that
sort of structure. (for example the whole idea of a netmask is foreign
to appletalk). This needs to be made opaque to the external code.
My suggested first change is to add routing method pointers to the
'domain' structure, along with information pointing the data.
instead of having an array of pointers to uniform structures,
there would be an array pointing to the 'domain' structures
for each protocol address domain (protocol family),
and the methods this reached would be called. The methods would have
an argument that gives FIB number, but the protocol would be free
to ignore it.
When the ABI can be changed it raises the possibilty of the
addition of a fib entry into the "struct route". Currently,
the structure contains the sockaddr of the desination, and the resulting
fib entry. To make this work fully, one could add a fib number
so that given an address and a fib, one can find the third element, the
fib entry.
Interaction with the ARP layer/ LL layer would need to be
revisited as well. Qing Li has been working on this already.
This work was sponsored by Ironport Systems/Cisco
Reviewed by: several including rwatson, bz and mlair (parts each)
Obtained from: Ironport systems/Cisco
2008-05-09 23:03:00 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
{
|
|
|
|
M_SETFIB(m, rtableid);
|
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
#else
|
|
|
|
m->m_pkthdr.pf.rtableid = rtableid;
|
Add code to allow the system to handle multiple routing tables.
This particular implementation is designed to be fully backwards compatible
and to be MFC-able to 7.x (and 6.x)
Currently the only protocol that can make use of the multiple tables is IPv4
Similar functionality exists in OpenBSD and Linux.
From my notes:
-----
One thing where FreeBSD has been falling behind, and which by chance I
have some time to work on is "policy based routing", which allows
different
packet streams to be routed by more than just the destination address.
Constraints:
------------
I want to make some form of this available in the 6.x tree
(and by extension 7.x) , but FreeBSD in general needs it so I might as
well do it in -current and back port the portions I need.
One of the ways that this can be done is to have the ability to
instantiate multiple kernel routing tables (which I will now
refer to as "Forwarding Information Bases" or "FIBs" for political
correctness reasons). Which FIB a particular packet uses to make
the next hop decision can be decided by a number of mechanisms.
The policies these mechanisms implement are the "Policies" referred
to in "Policy based routing".
One of the constraints I have if I try to back port this work to
6.x is that it must be implemented as a EXTENSION to the existing
ABIs in 6.x so that third party applications do not need to be
recompiled in timespan of the branch.
This first version will not have some of the bells and whistles that
will come with later versions. It will, for example, be limited to 16
tables in the first commit.
Implementation method, Compatible version. (part 1)
-------------------------------
For this reason I have implemented a "sufficient subset" of a
multiple routing table solution in Perforce, and back-ported it
to 6.x. (also in Perforce though not always caught up with what I
have done in -current/P4). The subset allows a number of FIBs
to be defined at compile time (8 is sufficient for my purposes in 6.x)
and implements the changes needed to allow IPV4 to use them. I have not
done the changes for ipv6 simply because I do not need it, and I do not
have enough knowledge of ipv6 (e.g. neighbor discovery) needed to do it.
Other protocol families are left untouched and should there be
users with proprietary protocol families, they should continue to work
and be oblivious to the existence of the extra FIBs.
To understand how this is done, one must know that the current FIB
code starts everything off with a single dimensional array of
pointers to FIB head structures (One per protocol family), each of
which in turn points to the trie of routes available to that family.
The basic change in the ABI compatible version of the change is to
extent that array to be a 2 dimensional array, so that
instead of protocol family X looking at rt_tables[X] for the
table it needs, it looks at rt_tables[Y][X] when for all
protocol families except ipv4 Y is always 0.
Code that is unaware of the change always just sees the first row
of the table, which of course looks just like the one dimensional
array that existed before.
The entry points rtrequest(), rtalloc(), rtalloc1(), rtalloc_ign()
are all maintained, but refer only to the first row of the array,
so that existing callers in proprietary protocols can continue to
do the "right thing".
Some new entry points are added, for the exclusive use of ipv4 code
called in_rtrequest(), in_rtalloc(), in_rtalloc1() and in_rtalloc_ign(),
which have an extra argument which refers the code to the correct row.
In addition, there are some new entry points (currently called
rtalloc_fib() and friends) that check the Address family being
looked up and call either rtalloc() (and friends) if the protocol
is not IPv4 forcing the action to row 0 or to the appropriate row
if it IS IPv4 (and that info is available). These are for calling
from code that is not specific to any particular protocol. The way
these are implemented would change in the non ABI preserving code
to be added later.
One feature of the first version of the code is that for ipv4,
the interface routes show up automatically on all the FIBs, so
that no matter what FIB you select you always have the basic
direct attached hosts available to you. (rtinit() does this
automatically).
You CAN delete an interface route from one FIB should you want
to but by default it's there. ARP information is also available
in each FIB. It's assumed that the same machine would have the
same MAC address, regardless of which FIB you are using to get
to it.
This brings us as to how the correct FIB is selected for an outgoing
IPV4 packet.
Firstly, all packets have a FIB associated with them. if nothing
has been done to change it, it will be FIB 0. The FIB is changed
in the following ways.
Packets fall into one of a number of classes.
1/ locally generated packets, coming from a socket/PCB.
Such packets select a FIB from a number associated with the
socket/PCB. This in turn is inherited from the process,
but can be changed by a socket option. The process in turn
inherits it on fork. I have written a utility call setfib
that acts a bit like nice..
setfib -3 ping target.example.com # will use fib 3 for ping.
It is an obvious extension to make it a property of a jail
but I have not done so. It can be achieved by combining the setfib and
jail commands.
2/ packets received on an interface for forwarding.
By default these packets would use table 0,
(or possibly a number settable in a sysctl(not yet)).
but prior to routing the firewall can inspect them (see below).
(possibly in the future you may be able to associate a FIB
with packets received on an interface.. An ifconfig arg, but not yet.)
3/ packets inspected by a packet classifier, which can arbitrarily
associate a fib with it on a packet by packet basis.
A fib assigned to a packet by a packet classifier
(such as ipfw) would over-ride a fib associated by
a more default source. (such as cases 1 or 2).
4/ a tcp listen socket associated with a fib will generate
accept sockets that are associated with that same fib.
5/ Packets generated in response to some other packet (e.g. reset
or icmp packets). These should use the FIB associated with the
packet being reponded to.
6/ Packets generated during encapsulation.
gif, tun and other tunnel interfaces will encapsulate using the FIB
that was in effect withthe proces that set up the tunnel.
thus setfib 1 ifconfig gif0 [tunnel instructions]
will set the fib for the tunnel to use to be fib 1.
Routing messages would be associated with their
process, and thus select one FIB or another.
messages from the kernel would be associated with the fib they
refer to and would only be received by a routing socket associated
with that fib. (not yet implemented)
In addition Netstat has been edited to be able to cope with the
fact that the array is now 2 dimensional. (It looks in system
memory using libkvm (!)). Old versions of netstat see only the first FIB.
In addition two sysctls are added to give:
a) the number of FIBs compiled in (active)
b) the default FIB of the calling process.
Early testing experience:
-------------------------
Basically our (IronPort's) appliance does this functionality already
using ipfw fwd but that method has some drawbacks.
For example,
It can't fully simulate a routing table because it can't influence the
socket's choice of local address when a connect() is done.
Testing during the generating of these changes has been
remarkably smooth so far. Multiple tables have co-existed
with no notable side effects, and packets have been routes
accordingly.
ipfw has grown 2 new keywords:
setfib N ip from anay to any
count ip from any to any fib N
In pf there seems to be a requirement to be able to give symbolic names to the
fibs but I do not have that capacity. I am not sure if it is required.
SCTP has interestingly enough built in support for this, called VRFs
in Cisco parlance. it will be interesting to see how that handles it
when it suddenly actually does something.
Where to next:
--------------------
After committing the ABI compatible version and MFCing it, I'd
like to proceed in a forward direction in -current. this will
result in some roto-tilling in the routing code.
Firstly: the current code's idea of having a separate tree per
protocol family, all of the same format, and pointed to by the
1 dimensional array is a bit silly. Especially when one considers that
there is code that makes assumptions about every protocol having the
same internal structures there. Some protocols don't WANT that
sort of structure. (for example the whole idea of a netmask is foreign
to appletalk). This needs to be made opaque to the external code.
My suggested first change is to add routing method pointers to the
'domain' structure, along with information pointing the data.
instead of having an array of pointers to uniform structures,
there would be an array pointing to the 'domain' structures
for each protocol address domain (protocol family),
and the methods this reached would be called. The methods would have
an argument that gives FIB number, but the protocol would be free
to ignore it.
When the ABI can be changed it raises the possibilty of the
addition of a fib entry into the "struct route". Currently,
the structure contains the sockaddr of the desination, and the resulting
fib entry. To make this work fully, one could add a fib number
so that given an address and a fib, one can find the third element, the
fib entry.
Interaction with the ARP layer/ LL layer would need to be
revisited as well. Qing Li has been working on this already.
This work was sponsored by Ironport Systems/Cisco
Reviewed by: several including rwatson, bz and mlair (parts each)
Obtained from: Ironport systems/Cisco
2008-05-09 23:03:00 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
void
|
2005-05-03 16:43:32 +00:00
|
|
|
pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n,
|
2011-06-28 11:57:25 +00:00
|
|
|
struct pf_rule **r, struct pf_rule **a, int *match)
|
2005-05-03 16:43:32 +00:00
|
|
|
{
|
|
|
|
struct pf_anchor_stackframe *f;
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2007-07-03 12:16:07 +00:00
|
|
|
(*r)->anchor->match = 0;
|
|
|
|
if (match)
|
|
|
|
*match = 0;
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (*depth >= sizeof(V_pf_anchor_stack) /
|
|
|
|
sizeof(V_pf_anchor_stack[0])) {
|
|
|
|
#else
|
2005-05-03 16:43:32 +00:00
|
|
|
if (*depth >= sizeof(pf_anchor_stack) /
|
|
|
|
sizeof(pf_anchor_stack[0])) {
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2005-05-03 16:43:32 +00:00
|
|
|
printf("pf_step_into_anchor: stack overflow\n");
|
|
|
|
*r = TAILQ_NEXT(*r, entries);
|
|
|
|
return;
|
|
|
|
} else if (*depth == 0 && a != NULL)
|
|
|
|
*a = *r;
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
f = V_pf_anchor_stack + (*depth)++;
|
|
|
|
#else
|
2005-05-03 16:43:32 +00:00
|
|
|
f = pf_anchor_stack + (*depth)++;
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2005-05-03 16:43:32 +00:00
|
|
|
f->rs = *rs;
|
|
|
|
f->r = *r;
|
|
|
|
if ((*r)->anchor_wildcard) {
|
|
|
|
f->parent = &(*r)->anchor->children;
|
|
|
|
if ((f->child = RB_MIN(pf_anchor_node, f->parent)) ==
|
|
|
|
NULL) {
|
|
|
|
*r = NULL;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
*rs = &f->child->ruleset;
|
|
|
|
} else {
|
|
|
|
f->parent = NULL;
|
|
|
|
f->child = NULL;
|
|
|
|
*rs = &(*r)->anchor->ruleset;
|
|
|
|
}
|
|
|
|
*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
|
|
|
|
}
|
|
|
|
|
2007-07-03 12:16:07 +00:00
|
|
|
int
|
2005-05-03 16:43:32 +00:00
|
|
|
pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n,
|
2007-07-03 12:16:07 +00:00
|
|
|
struct pf_rule **r, struct pf_rule **a, int *match)
|
2005-05-03 16:43:32 +00:00
|
|
|
{
|
|
|
|
struct pf_anchor_stackframe *f;
|
2007-07-03 12:16:07 +00:00
|
|
|
int quick = 0;
|
2005-05-03 16:43:32 +00:00
|
|
|
|
|
|
|
do {
|
|
|
|
if (*depth <= 0)
|
|
|
|
break;
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
f = V_pf_anchor_stack + *depth - 1;
|
|
|
|
#else
|
2005-05-03 16:43:32 +00:00
|
|
|
f = pf_anchor_stack + *depth - 1;
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2005-05-03 16:43:32 +00:00
|
|
|
if (f->parent != NULL && f->child != NULL) {
|
2007-07-03 12:16:07 +00:00
|
|
|
if (f->child->match ||
|
|
|
|
(match != NULL && *match)) {
|
|
|
|
f->r->anchor->match = 1;
|
|
|
|
*match = 0;
|
|
|
|
}
|
2005-05-03 16:43:32 +00:00
|
|
|
f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
|
|
|
|
if (f->child != NULL) {
|
|
|
|
*rs = &f->child->ruleset;
|
|
|
|
*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
|
|
|
|
if (*r == NULL)
|
|
|
|
continue;
|
|
|
|
else
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
(*depth)--;
|
|
|
|
if (*depth == 0 && a != NULL)
|
|
|
|
*a = NULL;
|
|
|
|
*rs = f->rs;
|
2011-06-28 11:57:25 +00:00
|
|
|
if (f->r->anchor->match || (match != NULL && *match))
|
2007-07-03 12:16:07 +00:00
|
|
|
quick = f->r->quick;
|
2005-05-03 16:43:32 +00:00
|
|
|
*r = TAILQ_NEXT(f->r, entries);
|
|
|
|
} while (*r == NULL);
|
2007-07-03 12:16:07 +00:00
|
|
|
|
|
|
|
return (quick);
|
2005-05-03 16:43:32 +00:00
|
|
|
}
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
#ifdef INET6
|
|
|
|
void
|
|
|
|
pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
|
|
|
|
struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
|
|
|
|
{
|
|
|
|
switch (af) {
|
|
|
|
#ifdef INET
|
|
|
|
case AF_INET:
|
|
|
|
naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
|
|
|
|
((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
|
|
|
|
break;
|
|
|
|
#endif /* INET */
|
|
|
|
case AF_INET6:
|
|
|
|
naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
|
|
|
|
((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
|
|
|
|
naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
|
|
|
|
((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
|
|
|
|
naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
|
|
|
|
((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
|
|
|
|
naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
|
|
|
|
((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2004-06-16 23:24:02 +00:00
|
|
|
pf_addr_inc(struct pf_addr *addr, sa_family_t af)
|
2004-02-26 02:04:28 +00:00
|
|
|
{
|
|
|
|
switch (af) {
|
|
|
|
#ifdef INET
|
|
|
|
case AF_INET:
|
|
|
|
addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
|
|
|
|
break;
|
|
|
|
#endif /* INET */
|
|
|
|
case AF_INET6:
|
|
|
|
if (addr->addr32[3] == 0xffffffff) {
|
|
|
|
addr->addr32[3] = 0;
|
|
|
|
if (addr->addr32[2] == 0xffffffff) {
|
|
|
|
addr->addr32[2] = 0;
|
|
|
|
if (addr->addr32[1] == 0xffffffff) {
|
|
|
|
addr->addr32[1] = 0;
|
|
|
|
addr->addr32[0] =
|
|
|
|
htonl(ntohl(addr->addr32[0]) + 1);
|
|
|
|
} else
|
|
|
|
addr->addr32[1] =
|
|
|
|
htonl(ntohl(addr->addr32[1]) + 1);
|
|
|
|
} else
|
|
|
|
addr->addr32[2] =
|
|
|
|
htonl(ntohl(addr->addr32[2]) + 1);
|
|
|
|
} else
|
|
|
|
addr->addr32[3] =
|
|
|
|
htonl(ntohl(addr->addr32[3]) + 1);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif /* INET6 */
|
|
|
|
|
|
|
|
int
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
pf_socket_lookup(int direction, struct pf_pdesc *pd, struct inpcb *inp_arg)
|
|
|
|
#else
|
|
|
|
pf_socket_lookup(int direction, struct pf_pdesc *pd)
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
{
|
2011-06-28 11:57:25 +00:00
|
|
|
struct pf_addr *saddr, *daddr;
|
|
|
|
u_int16_t sport, dport;
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
struct inpcbinfo *pi;
|
|
|
|
#else
|
|
|
|
struct inpcbtable *tb;
|
|
|
|
#endif
|
|
|
|
struct inpcb *inp;
|
|
|
|
|
|
|
|
if (pd == NULL)
|
|
|
|
return (-1);
|
|
|
|
pd->lookup.uid = UID_MAX;
|
|
|
|
pd->lookup.gid = GID_MAX;
|
|
|
|
pd->lookup.pid = NO_PID;
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (inp_arg != NULL) {
|
|
|
|
INP_LOCK_ASSERT(inp_arg);
|
|
|
|
pd->lookup.uid = inp_arg->inp_cred->cr_uid;
|
|
|
|
pd->lookup.gid = inp_arg->inp_cred->cr_groups[0];
|
2004-02-26 02:04:28 +00:00
|
|
|
return (1);
|
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
switch (pd->proto) {
|
|
|
|
case IPPROTO_TCP:
|
|
|
|
if (pd->hdr.tcp == NULL)
|
|
|
|
return (-1);
|
|
|
|
sport = pd->hdr.tcp->th_sport;
|
|
|
|
dport = pd->hdr.tcp->th_dport;
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
pi = &V_tcbinfo;
|
|
|
|
#else
|
|
|
|
tb = &tcbtable;
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
2011-06-28 11:57:25 +00:00
|
|
|
case IPPROTO_UDP:
|
|
|
|
if (pd->hdr.udp == NULL)
|
|
|
|
return (-1);
|
|
|
|
sport = pd->hdr.udp->uh_sport;
|
|
|
|
dport = pd->hdr.udp->uh_dport;
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
pi = &V_udbinfo;
|
|
|
|
#else
|
|
|
|
tb = &udbtable;
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
2011-06-28 11:57:25 +00:00
|
|
|
default:
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
if (direction == PF_IN) {
|
|
|
|
saddr = pd->src;
|
|
|
|
daddr = pd->dst;
|
|
|
|
} else {
|
|
|
|
u_int16_t p;
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
p = sport;
|
|
|
|
sport = dport;
|
|
|
|
dport = p;
|
|
|
|
saddr = pd->dst;
|
|
|
|
daddr = pd->src;
|
|
|
|
}
|
2004-06-16 23:24:02 +00:00
|
|
|
switch (pd->af) {
|
2005-05-03 16:43:32 +00:00
|
|
|
#ifdef INET
|
2004-02-26 02:04:28 +00:00
|
|
|
case AF_INET:
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
Add _mbuf() variants of various inpcb-related interfaces, including lookup,
hash install, etc. For now, these are arguments are unused, but as we add
RSS support, we will want to use hashes extracted from mbufs, rather than
manually calculated hashes of header fields, due to the expensive of the
software version of Toeplitz (and similar hashes).
Add notes that it would be nice to be able to pass mbufs into lookup
routines in pf(4), optimising firewall lookup in the same way, but the
code structure there doesn't facilitate that currently.
(In principle there is no reason this couldn't be MFCed -- the change
extends rather than modifies the KBI. However, it won't be useful without
other previous possibly less MFCable changes.)
Reviewed by: bz
Sponsored by: Juniper Networks, Inc.
2011-06-04 16:33:06 +00:00
|
|
|
/*
|
|
|
|
* XXXRW: would be nice if we had an mbuf here so that we
|
|
|
|
* could use in_pcblookup_mbuf().
|
|
|
|
*/
|
Decompose the current single inpcbinfo lock into two locks:
- The existing ipi_lock continues to protect the global inpcb list and
inpcb counter. This lock is now relegated to a small number of
allocation and free operations, and occasional operations that walk
all connections (including, awkwardly, certain UDP multicast receive
operations -- something to revisit).
- A new ipi_hash_lock protects the two inpcbinfo hash tables for
looking up connections and bound sockets, manipulated using new
INP_HASH_*() macros. This lock, combined with inpcb locks, protects
the 4-tuple address space.
Unlike the current ipi_lock, ipi_hash_lock follows the individual inpcb
connection locks, so may be acquired while manipulating a connection on
which a lock is already held, avoiding the need to acquire the inpcbinfo
lock preemptively when a binding change might later be required. As a
result, however, lookup operations necessarily go through a reference
acquire while holding the lookup lock, later acquiring an inpcb lock --
if required.
A new function in_pcblookup() looks up connections, and accepts flags
indicating how to return the inpcb. Due to lock order changes, callers
no longer need acquire locks before performing a lookup: the lookup
routine will acquire the ipi_hash_lock as needed. In the future, it will
also be able to use alternative lookup and locking strategies
transparently to callers, such as pcbgroup lookup. New lookup flags are,
supplementing the existing INPLOOKUP_WILDCARD flag:
INPLOOKUP_RLOCKPCB - Acquire a read lock on the returned inpcb
INPLOOKUP_WLOCKPCB - Acquire a write lock on the returned inpcb
Callers must pass exactly one of these flags (for the time being).
Some notes:
- All protocols are updated to work within the new regime; especially,
TCP, UDPv4, and UDPv6. pcbinfo ipi_lock acquisitions are largely
eliminated, and global hash lock hold times are dramatically reduced
compared to previous locking.
- The TCP syncache still relies on the pcbinfo lock, something that we
may want to revisit.
- Support for reverting to the FreeBSD 7.x locking strategy in TCP input
is no longer available -- hash lookup locks are now held only very
briefly during inpcb lookup, rather than for potentially extended
periods. However, the pcbinfo ipi_lock will still be acquired if a
connection state might change such that a connection is added or
removed.
- Raw IP sockets continue to use the pcbinfo ipi_lock for protection,
due to maintaining their own hash tables.
- The interface in6_pcblookup_hash_locked() is maintained, which allows
callers to acquire hash locks and perform one or more lookups atomically
with 4-tuple allocation: this is required only for TCPv6, as there is no
in6_pcbconnect_setup(), which there should be.
- UDPv6 locking remains significantly more conservative than UDPv4
locking, which relates to source address selection. This needs
attention, as it likely significantly reduces parallelism in this code
for multithreaded socket use (such as in BIND).
- In the UDPv4 and UDPv6 multicast cases, we need to revisit locking
somewhat, as they relied on ipi_lock to stablise 4-tuple matches, which
is no longer sufficient. A second check once the inpcb lock is held
should do the trick, keeping the general case from requiring the inpcb
lock for every inpcb visited.
- This work reminds us that we need to revisit locking of the v4/v6 flags,
which may be accessed lock-free both before and after this change.
- Right now, a single lock name is used for the pcbhash lock -- this is
undesirable, and probably another argument is required to take care of
this (or a char array name field in the pcbinfo?).
This is not an MFC candidate for 8.x due to its impact on lookup and
locking semantics. It's possible some of these issues could be worked
around with compatibility wrappers, if necessary.
Reviewed by: bz
Sponsored by: Juniper Networks, Inc.
2011-05-30 09:43:55 +00:00
|
|
|
inp = in_pcblookup(pi, saddr->v4, sport, daddr->v4,
|
|
|
|
dport, INPLOOKUP_RLOCKPCB, NULL);
|
2004-02-26 02:34:12 +00:00
|
|
|
if (inp == NULL) {
|
Decompose the current single inpcbinfo lock into two locks:
- The existing ipi_lock continues to protect the global inpcb list and
inpcb counter. This lock is now relegated to a small number of
allocation and free operations, and occasional operations that walk
all connections (including, awkwardly, certain UDP multicast receive
operations -- something to revisit).
- A new ipi_hash_lock protects the two inpcbinfo hash tables for
looking up connections and bound sockets, manipulated using new
INP_HASH_*() macros. This lock, combined with inpcb locks, protects
the 4-tuple address space.
Unlike the current ipi_lock, ipi_hash_lock follows the individual inpcb
connection locks, so may be acquired while manipulating a connection on
which a lock is already held, avoiding the need to acquire the inpcbinfo
lock preemptively when a binding change might later be required. As a
result, however, lookup operations necessarily go through a reference
acquire while holding the lookup lock, later acquiring an inpcb lock --
if required.
A new function in_pcblookup() looks up connections, and accepts flags
indicating how to return the inpcb. Due to lock order changes, callers
no longer need acquire locks before performing a lookup: the lookup
routine will acquire the ipi_hash_lock as needed. In the future, it will
also be able to use alternative lookup and locking strategies
transparently to callers, such as pcbgroup lookup. New lookup flags are,
supplementing the existing INPLOOKUP_WILDCARD flag:
INPLOOKUP_RLOCKPCB - Acquire a read lock on the returned inpcb
INPLOOKUP_WLOCKPCB - Acquire a write lock on the returned inpcb
Callers must pass exactly one of these flags (for the time being).
Some notes:
- All protocols are updated to work within the new regime; especially,
TCP, UDPv4, and UDPv6. pcbinfo ipi_lock acquisitions are largely
eliminated, and global hash lock hold times are dramatically reduced
compared to previous locking.
- The TCP syncache still relies on the pcbinfo lock, something that we
may want to revisit.
- Support for reverting to the FreeBSD 7.x locking strategy in TCP input
is no longer available -- hash lookup locks are now held only very
briefly during inpcb lookup, rather than for potentially extended
periods. However, the pcbinfo ipi_lock will still be acquired if a
connection state might change such that a connection is added or
removed.
- Raw IP sockets continue to use the pcbinfo ipi_lock for protection,
due to maintaining their own hash tables.
- The interface in6_pcblookup_hash_locked() is maintained, which allows
callers to acquire hash locks and perform one or more lookups atomically
with 4-tuple allocation: this is required only for TCPv6, as there is no
in6_pcbconnect_setup(), which there should be.
- UDPv6 locking remains significantly more conservative than UDPv4
locking, which relates to source address selection. This needs
attention, as it likely significantly reduces parallelism in this code
for multithreaded socket use (such as in BIND).
- In the UDPv4 and UDPv6 multicast cases, we need to revisit locking
somewhat, as they relied on ipi_lock to stablise 4-tuple matches, which
is no longer sufficient. A second check once the inpcb lock is held
should do the trick, keeping the general case from requiring the inpcb
lock for every inpcb visited.
- This work reminds us that we need to revisit locking of the v4/v6 flags,
which may be accessed lock-free both before and after this change.
- Right now, a single lock name is used for the pcbhash lock -- this is
undesirable, and probably another argument is required to take care of
this (or a char array name field in the pcbinfo?).
This is not an MFC candidate for 8.x due to its impact on lookup and
locking semantics. It's possible some of these issues could be worked
around with compatibility wrappers, if necessary.
Reviewed by: bz
Sponsored by: Juniper Networks, Inc.
2011-05-30 09:43:55 +00:00
|
|
|
inp = in_pcblookup(pi, saddr->v4, sport,
|
|
|
|
daddr->v4, dport, INPLOOKUP_WILDCARD |
|
|
|
|
INPLOOKUP_RLOCKPCB, NULL);
|
|
|
|
if (inp == NULL)
|
2007-07-03 12:16:07 +00:00
|
|
|
return (-1);
|
2004-02-26 02:34:12 +00:00
|
|
|
}
|
|
|
|
#else
|
2004-02-26 02:04:28 +00:00
|
|
|
inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport);
|
|
|
|
if (inp == NULL) {
|
2011-06-28 11:57:25 +00:00
|
|
|
inp = in_pcblookup_listen(tb, daddr->v4, dport, 0,
|
|
|
|
NULL);
|
2004-02-26 02:04:28 +00:00
|
|
|
if (inp == NULL)
|
2007-07-03 12:16:07 +00:00
|
|
|
return (-1);
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
2004-02-26 02:34:12 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
2005-05-03 16:43:32 +00:00
|
|
|
#endif /* INET */
|
2004-02-26 02:04:28 +00:00
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
Add _mbuf() variants of various inpcb-related interfaces, including lookup,
hash install, etc. For now, these are arguments are unused, but as we add
RSS support, we will want to use hashes extracted from mbufs, rather than
manually calculated hashes of header fields, due to the expensive of the
software version of Toeplitz (and similar hashes).
Add notes that it would be nice to be able to pass mbufs into lookup
routines in pf(4), optimising firewall lookup in the same way, but the
code structure there doesn't facilitate that currently.
(In principle there is no reason this couldn't be MFCed -- the change
extends rather than modifies the KBI. However, it won't be useful without
other previous possibly less MFCable changes.)
Reviewed by: bz
Sponsored by: Juniper Networks, Inc.
2011-06-04 16:33:06 +00:00
|
|
|
/*
|
|
|
|
* XXXRW: would be nice if we had an mbuf here so that we
|
|
|
|
* could use in6_pcblookup_mbuf().
|
|
|
|
*/
|
Decompose the current single inpcbinfo lock into two locks:
- The existing ipi_lock continues to protect the global inpcb list and
inpcb counter. This lock is now relegated to a small number of
allocation and free operations, and occasional operations that walk
all connections (including, awkwardly, certain UDP multicast receive
operations -- something to revisit).
- A new ipi_hash_lock protects the two inpcbinfo hash tables for
looking up connections and bound sockets, manipulated using new
INP_HASH_*() macros. This lock, combined with inpcb locks, protects
the 4-tuple address space.
Unlike the current ipi_lock, ipi_hash_lock follows the individual inpcb
connection locks, so may be acquired while manipulating a connection on
which a lock is already held, avoiding the need to acquire the inpcbinfo
lock preemptively when a binding change might later be required. As a
result, however, lookup operations necessarily go through a reference
acquire while holding the lookup lock, later acquiring an inpcb lock --
if required.
A new function in_pcblookup() looks up connections, and accepts flags
indicating how to return the inpcb. Due to lock order changes, callers
no longer need acquire locks before performing a lookup: the lookup
routine will acquire the ipi_hash_lock as needed. In the future, it will
also be able to use alternative lookup and locking strategies
transparently to callers, such as pcbgroup lookup. New lookup flags are,
supplementing the existing INPLOOKUP_WILDCARD flag:
INPLOOKUP_RLOCKPCB - Acquire a read lock on the returned inpcb
INPLOOKUP_WLOCKPCB - Acquire a write lock on the returned inpcb
Callers must pass exactly one of these flags (for the time being).
Some notes:
- All protocols are updated to work within the new regime; especially,
TCP, UDPv4, and UDPv6. pcbinfo ipi_lock acquisitions are largely
eliminated, and global hash lock hold times are dramatically reduced
compared to previous locking.
- The TCP syncache still relies on the pcbinfo lock, something that we
may want to revisit.
- Support for reverting to the FreeBSD 7.x locking strategy in TCP input
is no longer available -- hash lookup locks are now held only very
briefly during inpcb lookup, rather than for potentially extended
periods. However, the pcbinfo ipi_lock will still be acquired if a
connection state might change such that a connection is added or
removed.
- Raw IP sockets continue to use the pcbinfo ipi_lock for protection,
due to maintaining their own hash tables.
- The interface in6_pcblookup_hash_locked() is maintained, which allows
callers to acquire hash locks and perform one or more lookups atomically
with 4-tuple allocation: this is required only for TCPv6, as there is no
in6_pcbconnect_setup(), which there should be.
- UDPv6 locking remains significantly more conservative than UDPv4
locking, which relates to source address selection. This needs
attention, as it likely significantly reduces parallelism in this code
for multithreaded socket use (such as in BIND).
- In the UDPv4 and UDPv6 multicast cases, we need to revisit locking
somewhat, as they relied on ipi_lock to stablise 4-tuple matches, which
is no longer sufficient. A second check once the inpcb lock is held
should do the trick, keeping the general case from requiring the inpcb
lock for every inpcb visited.
- This work reminds us that we need to revisit locking of the v4/v6 flags,
which may be accessed lock-free both before and after this change.
- Right now, a single lock name is used for the pcbhash lock -- this is
undesirable, and probably another argument is required to take care of
this (or a char array name field in the pcbinfo?).
This is not an MFC candidate for 8.x due to its impact on lookup and
locking semantics. It's possible some of these issues could be worked
around with compatibility wrappers, if necessary.
Reviewed by: bz
Sponsored by: Juniper Networks, Inc.
2011-05-30 09:43:55 +00:00
|
|
|
inp = in6_pcblookup(pi, &saddr->v6, sport,
|
|
|
|
&daddr->v6, dport, INPLOOKUP_RLOCKPCB, NULL);
|
2004-02-26 02:34:12 +00:00
|
|
|
if (inp == NULL) {
|
Decompose the current single inpcbinfo lock into two locks:
- The existing ipi_lock continues to protect the global inpcb list and
inpcb counter. This lock is now relegated to a small number of
allocation and free operations, and occasional operations that walk
all connections (including, awkwardly, certain UDP multicast receive
operations -- something to revisit).
- A new ipi_hash_lock protects the two inpcbinfo hash tables for
looking up connections and bound sockets, manipulated using new
INP_HASH_*() macros. This lock, combined with inpcb locks, protects
the 4-tuple address space.
Unlike the current ipi_lock, ipi_hash_lock follows the individual inpcb
connection locks, so may be acquired while manipulating a connection on
which a lock is already held, avoiding the need to acquire the inpcbinfo
lock preemptively when a binding change might later be required. As a
result, however, lookup operations necessarily go through a reference
acquire while holding the lookup lock, later acquiring an inpcb lock --
if required.
A new function in_pcblookup() looks up connections, and accepts flags
indicating how to return the inpcb. Due to lock order changes, callers
no longer need acquire locks before performing a lookup: the lookup
routine will acquire the ipi_hash_lock as needed. In the future, it will
also be able to use alternative lookup and locking strategies
transparently to callers, such as pcbgroup lookup. New lookup flags are,
supplementing the existing INPLOOKUP_WILDCARD flag:
INPLOOKUP_RLOCKPCB - Acquire a read lock on the returned inpcb
INPLOOKUP_WLOCKPCB - Acquire a write lock on the returned inpcb
Callers must pass exactly one of these flags (for the time being).
Some notes:
- All protocols are updated to work within the new regime; especially,
TCP, UDPv4, and UDPv6. pcbinfo ipi_lock acquisitions are largely
eliminated, and global hash lock hold times are dramatically reduced
compared to previous locking.
- The TCP syncache still relies on the pcbinfo lock, something that we
may want to revisit.
- Support for reverting to the FreeBSD 7.x locking strategy in TCP input
is no longer available -- hash lookup locks are now held only very
briefly during inpcb lookup, rather than for potentially extended
periods. However, the pcbinfo ipi_lock will still be acquired if a
connection state might change such that a connection is added or
removed.
- Raw IP sockets continue to use the pcbinfo ipi_lock for protection,
due to maintaining their own hash tables.
- The interface in6_pcblookup_hash_locked() is maintained, which allows
callers to acquire hash locks and perform one or more lookups atomically
with 4-tuple allocation: this is required only for TCPv6, as there is no
in6_pcbconnect_setup(), which there should be.
- UDPv6 locking remains significantly more conservative than UDPv4
locking, which relates to source address selection. This needs
attention, as it likely significantly reduces parallelism in this code
for multithreaded socket use (such as in BIND).
- In the UDPv4 and UDPv6 multicast cases, we need to revisit locking
somewhat, as they relied on ipi_lock to stablise 4-tuple matches, which
is no longer sufficient. A second check once the inpcb lock is held
should do the trick, keeping the general case from requiring the inpcb
lock for every inpcb visited.
- This work reminds us that we need to revisit locking of the v4/v6 flags,
which may be accessed lock-free both before and after this change.
- Right now, a single lock name is used for the pcbhash lock -- this is
undesirable, and probably another argument is required to take care of
this (or a char array name field in the pcbinfo?).
This is not an MFC candidate for 8.x due to its impact on lookup and
locking semantics. It's possible some of these issues could be worked
around with compatibility wrappers, if necessary.
Reviewed by: bz
Sponsored by: Juniper Networks, Inc.
2011-05-30 09:43:55 +00:00
|
|
|
inp = in6_pcblookup(pi, &saddr->v6, sport,
|
|
|
|
&daddr->v6, dport, INPLOOKUP_WILDCARD |
|
|
|
|
INPLOOKUP_RLOCKPCB, NULL);
|
|
|
|
if (inp == NULL)
|
2007-07-03 12:16:07 +00:00
|
|
|
return (-1);
|
2004-02-26 02:34:12 +00:00
|
|
|
}
|
|
|
|
#else
|
2004-02-26 02:04:28 +00:00
|
|
|
inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6,
|
|
|
|
dport);
|
|
|
|
if (inp == NULL) {
|
2011-06-28 11:57:25 +00:00
|
|
|
inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 0,
|
|
|
|
NULL);
|
2004-02-26 02:04:28 +00:00
|
|
|
if (inp == NULL)
|
2007-07-03 12:16:07 +00:00
|
|
|
return (-1);
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
2004-02-26 02:34:12 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
|
|
|
#endif /* INET6 */
|
|
|
|
|
|
|
|
default:
|
2007-07-03 12:16:07 +00:00
|
|
|
return (-1);
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
Decompose the current single inpcbinfo lock into two locks:
- The existing ipi_lock continues to protect the global inpcb list and
inpcb counter. This lock is now relegated to a small number of
allocation and free operations, and occasional operations that walk
all connections (including, awkwardly, certain UDP multicast receive
operations -- something to revisit).
- A new ipi_hash_lock protects the two inpcbinfo hash tables for
looking up connections and bound sockets, manipulated using new
INP_HASH_*() macros. This lock, combined with inpcb locks, protects
the 4-tuple address space.
Unlike the current ipi_lock, ipi_hash_lock follows the individual inpcb
connection locks, so may be acquired while manipulating a connection on
which a lock is already held, avoiding the need to acquire the inpcbinfo
lock preemptively when a binding change might later be required. As a
result, however, lookup operations necessarily go through a reference
acquire while holding the lookup lock, later acquiring an inpcb lock --
if required.
A new function in_pcblookup() looks up connections, and accepts flags
indicating how to return the inpcb. Due to lock order changes, callers
no longer need acquire locks before performing a lookup: the lookup
routine will acquire the ipi_hash_lock as needed. In the future, it will
also be able to use alternative lookup and locking strategies
transparently to callers, such as pcbgroup lookup. New lookup flags are,
supplementing the existing INPLOOKUP_WILDCARD flag:
INPLOOKUP_RLOCKPCB - Acquire a read lock on the returned inpcb
INPLOOKUP_WLOCKPCB - Acquire a write lock on the returned inpcb
Callers must pass exactly one of these flags (for the time being).
Some notes:
- All protocols are updated to work within the new regime; especially,
TCP, UDPv4, and UDPv6. pcbinfo ipi_lock acquisitions are largely
eliminated, and global hash lock hold times are dramatically reduced
compared to previous locking.
- The TCP syncache still relies on the pcbinfo lock, something that we
may want to revisit.
- Support for reverting to the FreeBSD 7.x locking strategy in TCP input
is no longer available -- hash lookup locks are now held only very
briefly during inpcb lookup, rather than for potentially extended
periods. However, the pcbinfo ipi_lock will still be acquired if a
connection state might change such that a connection is added or
removed.
- Raw IP sockets continue to use the pcbinfo ipi_lock for protection,
due to maintaining their own hash tables.
- The interface in6_pcblookup_hash_locked() is maintained, which allows
callers to acquire hash locks and perform one or more lookups atomically
with 4-tuple allocation: this is required only for TCPv6, as there is no
in6_pcbconnect_setup(), which there should be.
- UDPv6 locking remains significantly more conservative than UDPv4
locking, which relates to source address selection. This needs
attention, as it likely significantly reduces parallelism in this code
for multithreaded socket use (such as in BIND).
- In the UDPv4 and UDPv6 multicast cases, we need to revisit locking
somewhat, as they relied on ipi_lock to stablise 4-tuple matches, which
is no longer sufficient. A second check once the inpcb lock is held
should do the trick, keeping the general case from requiring the inpcb
lock for every inpcb visited.
- This work reminds us that we need to revisit locking of the v4/v6 flags,
which may be accessed lock-free both before and after this change.
- Right now, a single lock name is used for the pcbhash lock -- this is
undesirable, and probably another argument is required to take care of
this (or a char array name field in the pcbinfo?).
This is not an MFC candidate for 8.x due to its impact on lookup and
locking semantics. It's possible some of these issues could be worked
around with compatibility wrappers, if necessary.
Reviewed by: bz
Sponsored by: Juniper Networks, Inc.
2011-05-30 09:43:55 +00:00
|
|
|
INP_RLOCK_ASSERT(inp);
|
2008-10-04 15:06:34 +00:00
|
|
|
pd->lookup.uid = inp->inp_cred->cr_uid;
|
|
|
|
pd->lookup.gid = inp->inp_cred->cr_groups[0];
|
Decompose the current single inpcbinfo lock into two locks:
- The existing ipi_lock continues to protect the global inpcb list and
inpcb counter. This lock is now relegated to a small number of
allocation and free operations, and occasional operations that walk
all connections (including, awkwardly, certain UDP multicast receive
operations -- something to revisit).
- A new ipi_hash_lock protects the two inpcbinfo hash tables for
looking up connections and bound sockets, manipulated using new
INP_HASH_*() macros. This lock, combined with inpcb locks, protects
the 4-tuple address space.
Unlike the current ipi_lock, ipi_hash_lock follows the individual inpcb
connection locks, so may be acquired while manipulating a connection on
which a lock is already held, avoiding the need to acquire the inpcbinfo
lock preemptively when a binding change might later be required. As a
result, however, lookup operations necessarily go through a reference
acquire while holding the lookup lock, later acquiring an inpcb lock --
if required.
A new function in_pcblookup() looks up connections, and accepts flags
indicating how to return the inpcb. Due to lock order changes, callers
no longer need acquire locks before performing a lookup: the lookup
routine will acquire the ipi_hash_lock as needed. In the future, it will
also be able to use alternative lookup and locking strategies
transparently to callers, such as pcbgroup lookup. New lookup flags are,
supplementing the existing INPLOOKUP_WILDCARD flag:
INPLOOKUP_RLOCKPCB - Acquire a read lock on the returned inpcb
INPLOOKUP_WLOCKPCB - Acquire a write lock on the returned inpcb
Callers must pass exactly one of these flags (for the time being).
Some notes:
- All protocols are updated to work within the new regime; especially,
TCP, UDPv4, and UDPv6. pcbinfo ipi_lock acquisitions are largely
eliminated, and global hash lock hold times are dramatically reduced
compared to previous locking.
- The TCP syncache still relies on the pcbinfo lock, something that we
may want to revisit.
- Support for reverting to the FreeBSD 7.x locking strategy in TCP input
is no longer available -- hash lookup locks are now held only very
briefly during inpcb lookup, rather than for potentially extended
periods. However, the pcbinfo ipi_lock will still be acquired if a
connection state might change such that a connection is added or
removed.
- Raw IP sockets continue to use the pcbinfo ipi_lock for protection,
due to maintaining their own hash tables.
- The interface in6_pcblookup_hash_locked() is maintained, which allows
callers to acquire hash locks and perform one or more lookups atomically
with 4-tuple allocation: this is required only for TCPv6, as there is no
in6_pcbconnect_setup(), which there should be.
- UDPv6 locking remains significantly more conservative than UDPv4
locking, which relates to source address selection. This needs
attention, as it likely significantly reduces parallelism in this code
for multithreaded socket use (such as in BIND).
- In the UDPv4 and UDPv6 multicast cases, we need to revisit locking
somewhat, as they relied on ipi_lock to stablise 4-tuple matches, which
is no longer sufficient. A second check once the inpcb lock is held
should do the trick, keeping the general case from requiring the inpcb
lock for every inpcb visited.
- This work reminds us that we need to revisit locking of the v4/v6 flags,
which may be accessed lock-free both before and after this change.
- Right now, a single lock name is used for the pcbhash lock -- this is
undesirable, and probably another argument is required to take care of
this (or a char array name field in the pcbinfo?).
This is not an MFC candidate for 8.x due to its impact on lookup and
locking semantics. It's possible some of these issues could be worked
around with compatibility wrappers, if necessary.
Reviewed by: bz
Sponsored by: Juniper Networks, Inc.
2011-05-30 09:43:55 +00:00
|
|
|
INP_RUNLOCK(inp);
|
2004-02-26 02:34:12 +00:00
|
|
|
#else
|
2007-07-03 12:16:07 +00:00
|
|
|
pd->lookup.uid = inp->inp_socket->so_euid;
|
|
|
|
pd->lookup.gid = inp->inp_socket->so_egid;
|
|
|
|
pd->lookup.pid = inp->inp_socket->so_cpid;
|
2004-02-26 02:34:12 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
|
|
|
|
u_int8_t
|
|
|
|
pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
|
|
|
|
{
|
|
|
|
int hlen;
|
|
|
|
u_int8_t hdr[60];
|
|
|
|
u_int8_t *opt, optlen;
|
|
|
|
u_int8_t wscale = 0;
|
|
|
|
|
|
|
|
hlen = th_off << 2; /* hlen <= sizeof(hdr) */
|
|
|
|
if (hlen <= sizeof(struct tcphdr))
|
|
|
|
return (0);
|
|
|
|
if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
|
|
|
|
return (0);
|
|
|
|
opt = hdr + sizeof(struct tcphdr);
|
|
|
|
hlen -= sizeof(struct tcphdr);
|
|
|
|
while (hlen >= 3) {
|
|
|
|
switch (*opt) {
|
|
|
|
case TCPOPT_EOL:
|
|
|
|
case TCPOPT_NOP:
|
|
|
|
++opt;
|
|
|
|
--hlen;
|
|
|
|
break;
|
|
|
|
case TCPOPT_WINDOW:
|
|
|
|
wscale = opt[2];
|
|
|
|
if (wscale > TCP_MAX_WINSHIFT)
|
|
|
|
wscale = TCP_MAX_WINSHIFT;
|
|
|
|
wscale |= PF_WSCALE_FLAG;
|
2004-06-16 23:24:02 +00:00
|
|
|
/* FALLTHROUGH */
|
2004-02-26 02:04:28 +00:00
|
|
|
default:
|
|
|
|
optlen = opt[1];
|
|
|
|
if (optlen < 2)
|
|
|
|
optlen = 2;
|
|
|
|
hlen -= optlen;
|
|
|
|
opt += optlen;
|
2004-06-16 23:24:02 +00:00
|
|
|
break;
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return (wscale);
|
|
|
|
}
|
|
|
|
|
|
|
|
u_int16_t
|
|
|
|
pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
|
|
|
|
{
|
|
|
|
int hlen;
|
|
|
|
u_int8_t hdr[60];
|
|
|
|
u_int8_t *opt, optlen;
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
Commit step 1 of the vimage project, (network stack)
virtualization work done by Marko Zec (zec@).
This is the first in a series of commits over the course
of the next few weeks.
Mark all uses of global variables to be virtualized
with a V_ prefix.
Use macros to map them back to their global names for
now, so this is a NOP change only.
We hope to have caught at least 85-90% of what is needed
so we do not invalidate a lot of outstanding patches again.
Obtained from: //depot/projects/vimage-commit2/...
Reviewed by: brooks, des, ed, mav, julian,
jamie, kris, rwatson, zec, ...
(various people I forgot, different versions)
md5 (with a bit of help)
Sponsored by: NLnet Foundation, The FreeBSD Foundation
X-MFC after: never
V_Commit_Message_Reviewed_By: more people than the patch
2008-08-17 23:27:27 +00:00
|
|
|
u_int16_t mss = V_tcp_mssdflt;
|
2011-06-28 11:57:25 +00:00
|
|
|
#else
|
|
|
|
u_int16_t mss = tcp_mssdflt;
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
hlen = th_off << 2; /* hlen <= sizeof(hdr) */
|
|
|
|
if (hlen <= sizeof(struct tcphdr))
|
|
|
|
return (0);
|
|
|
|
if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
|
|
|
|
return (0);
|
|
|
|
opt = hdr + sizeof(struct tcphdr);
|
|
|
|
hlen -= sizeof(struct tcphdr);
|
|
|
|
while (hlen >= TCPOLEN_MAXSEG) {
|
|
|
|
switch (*opt) {
|
|
|
|
case TCPOPT_EOL:
|
|
|
|
case TCPOPT_NOP:
|
|
|
|
++opt;
|
|
|
|
--hlen;
|
|
|
|
break;
|
|
|
|
case TCPOPT_MAXSEG:
|
|
|
|
bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
|
2005-04-13 21:05:55 +00:00
|
|
|
NTOHS(mss);
|
2004-06-16 23:24:02 +00:00
|
|
|
/* FALLTHROUGH */
|
2004-02-26 02:04:28 +00:00
|
|
|
default:
|
|
|
|
optlen = opt[1];
|
|
|
|
if (optlen < 2)
|
|
|
|
optlen = 2;
|
|
|
|
hlen -= optlen;
|
|
|
|
opt += optlen;
|
2004-06-16 23:24:02 +00:00
|
|
|
break;
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return (mss);
|
|
|
|
}
|
|
|
|
|
|
|
|
u_int16_t
|
|
|
|
pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
|
|
|
|
{
|
|
|
|
#ifdef INET
|
|
|
|
struct sockaddr_in *dst;
|
|
|
|
struct route ro;
|
|
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
|
|
|
struct sockaddr_in6 *dst6;
|
|
|
|
struct route_in6 ro6;
|
|
|
|
#endif /* INET6 */
|
|
|
|
struct rtentry *rt = NULL;
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
int hlen = 0;
|
Commit step 1 of the vimage project, (network stack)
virtualization work done by Marko Zec (zec@).
This is the first in a series of commits over the course
of the next few weeks.
Mark all uses of global variables to be virtualized
with a V_ prefix.
Use macros to map them back to their global names for
now, so this is a NOP change only.
We hope to have caught at least 85-90% of what is needed
so we do not invalidate a lot of outstanding patches again.
Obtained from: //depot/projects/vimage-commit2/...
Reviewed by: brooks, des, ed, mav, julian,
jamie, kris, rwatson, zec, ...
(various people I forgot, different versions)
md5 (with a bit of help)
Sponsored by: NLnet Foundation, The FreeBSD Foundation
X-MFC after: never
V_Commit_Message_Reviewed_By: more people than the patch
2008-08-17 23:27:27 +00:00
|
|
|
u_int16_t mss = V_tcp_mssdflt;
|
2011-06-28 11:57:25 +00:00
|
|
|
#else
|
|
|
|
int hlen;
|
|
|
|
u_int16_t mss = tcp_mssdflt;
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
switch (af) {
|
|
|
|
#ifdef INET
|
|
|
|
case AF_INET:
|
|
|
|
hlen = sizeof(struct ip);
|
|
|
|
bzero(&ro, sizeof(ro));
|
|
|
|
dst = (struct sockaddr_in *)&ro.ro_dst;
|
|
|
|
dst->sin_family = AF_INET;
|
|
|
|
dst->sin_len = sizeof(*dst);
|
|
|
|
dst->sin_addr = addr->v4;
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2004-02-26 02:34:12 +00:00
|
|
|
#ifdef RTF_PRCLONING
|
|
|
|
rtalloc_ign(&ro, (RTF_CLONING | RTF_PRCLONING));
|
|
|
|
#else /* !RTF_PRCLONING */
|
This main goals of this project are:
1. separating L2 tables (ARP, NDP) from the L3 routing tables
2. removing as much locking dependencies among these layers as
possible to allow for some parallelism in the search operations
3. simplify the logic in the routing code,
The most notable end result is the obsolescent of the route
cloning (RTF_CLONING) concept, which translated into code reduction
in both IPv4 ARP and IPv6 NDP related modules, and size reduction in
struct rtentry{}. The change in design obsoletes the semantics of
RTF_CLONING, RTF_WASCLONE and RTF_LLINFO routing flags. The userland
applications such as "arp" and "ndp" have been modified to reflect
those changes. The output from "netstat -r" shows only the routing
entries.
Quite a few developers have contributed to this project in the
past: Glebius Smirnoff, Luigi Rizzo, Alessandro Cerri, and
Andre Oppermann. And most recently:
- Kip Macy revised the locking code completely, thus completing
the last piece of the puzzle, Kip has also been conducting
active functional testing
- Sam Leffler has helped me improving/refactoring the code, and
provided valuable reviews
- Julian Elischer setup the perforce tree for me and has helped
me maintaining that branch before the svn conversion
2008-12-15 06:10:57 +00:00
|
|
|
in_rtalloc_ign(&ro, 0, 0);
|
2004-02-26 02:34:12 +00:00
|
|
|
#endif
|
|
|
|
#else /* ! __FreeBSD__ */
|
2004-02-26 02:04:28 +00:00
|
|
|
rtalloc_noclone(&ro, NO_CLONING);
|
2004-02-26 02:34:12 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
rt = ro.ro_rt;
|
|
|
|
break;
|
|
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
|
|
|
hlen = sizeof(struct ip6_hdr);
|
|
|
|
bzero(&ro6, sizeof(ro6));
|
|
|
|
dst6 = (struct sockaddr_in6 *)&ro6.ro_dst;
|
|
|
|
dst6->sin6_family = AF_INET6;
|
|
|
|
dst6->sin6_len = sizeof(*dst6);
|
|
|
|
dst6->sin6_addr = addr->v6;
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2004-02-26 02:34:12 +00:00
|
|
|
#ifdef RTF_PRCLONING
|
|
|
|
rtalloc_ign((struct route *)&ro6,
|
|
|
|
(RTF_CLONING | RTF_PRCLONING));
|
|
|
|
#else /* !RTF_PRCLONING */
|
This main goals of this project are:
1. separating L2 tables (ARP, NDP) from the L3 routing tables
2. removing as much locking dependencies among these layers as
possible to allow for some parallelism in the search operations
3. simplify the logic in the routing code,
The most notable end result is the obsolescent of the route
cloning (RTF_CLONING) concept, which translated into code reduction
in both IPv4 ARP and IPv6 NDP related modules, and size reduction in
struct rtentry{}. The change in design obsoletes the semantics of
RTF_CLONING, RTF_WASCLONE and RTF_LLINFO routing flags. The userland
applications such as "arp" and "ndp" have been modified to reflect
those changes. The output from "netstat -r" shows only the routing
entries.
Quite a few developers have contributed to this project in the
past: Glebius Smirnoff, Luigi Rizzo, Alessandro Cerri, and
Andre Oppermann. And most recently:
- Kip Macy revised the locking code completely, thus completing
the last piece of the puzzle, Kip has also been conducting
active functional testing
- Sam Leffler has helped me improving/refactoring the code, and
provided valuable reviews
- Julian Elischer setup the perforce tree for me and has helped
me maintaining that branch before the svn conversion
2008-12-15 06:10:57 +00:00
|
|
|
rtalloc_ign((struct route *)&ro6, 0);
|
2004-02-26 02:34:12 +00:00
|
|
|
#endif
|
|
|
|
#else /* ! __FreeBSD__ */
|
2004-02-26 02:04:28 +00:00
|
|
|
rtalloc_noclone((struct route *)&ro6, NO_CLONING);
|
2004-02-26 02:34:12 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
rt = ro6.ro_rt;
|
|
|
|
break;
|
|
|
|
#endif /* INET6 */
|
|
|
|
}
|
|
|
|
|
|
|
|
if (rt && rt->rt_ifp) {
|
|
|
|
mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr);
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
Commit step 1 of the vimage project, (network stack)
virtualization work done by Marko Zec (zec@).
This is the first in a series of commits over the course
of the next few weeks.
Mark all uses of global variables to be virtualized
with a V_ prefix.
Use macros to map them back to their global names for
now, so this is a NOP change only.
We hope to have caught at least 85-90% of what is needed
so we do not invalidate a lot of outstanding patches again.
Obtained from: //depot/projects/vimage-commit2/...
Reviewed by: brooks, des, ed, mav, julian,
jamie, kris, rwatson, zec, ...
(various people I forgot, different versions)
md5 (with a bit of help)
Sponsored by: NLnet Foundation, The FreeBSD Foundation
X-MFC after: never
V_Commit_Message_Reviewed_By: more people than the patch
2008-08-17 23:27:27 +00:00
|
|
|
mss = max(V_tcp_mssdflt, mss);
|
2011-06-28 11:57:25 +00:00
|
|
|
#else
|
|
|
|
mss = max(tcp_mssdflt, mss);
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
RTFREE(rt);
|
|
|
|
}
|
|
|
|
mss = min(mss, offer);
|
|
|
|
mss = max(mss, 64); /* sanity - at least max opt space */
|
|
|
|
return (mss);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr)
|
|
|
|
{
|
|
|
|
struct pf_rule *r = s->rule.ptr;
|
2011-06-28 11:57:25 +00:00
|
|
|
struct pf_src_node *sn = NULL;
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2004-06-16 23:24:02 +00:00
|
|
|
s->rt_kif = NULL;
|
2004-02-26 02:04:28 +00:00
|
|
|
if (!r->rt || r->rt == PF_FASTROUTE)
|
|
|
|
return;
|
2011-06-28 11:57:25 +00:00
|
|
|
switch (s->key[PF_SK_WIRE]->af) {
|
2004-02-26 02:04:28 +00:00
|
|
|
#ifdef INET
|
|
|
|
case AF_INET:
|
2011-06-28 11:57:25 +00:00
|
|
|
pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL, &sn);
|
2004-06-16 23:24:02 +00:00
|
|
|
s->rt_kif = r->rpool.cur->kif;
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
|
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
2011-06-28 11:57:25 +00:00
|
|
|
pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL, &sn);
|
2004-06-16 23:24:02 +00:00
|
|
|
s->rt_kif = r->rpool.cur->kif;
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
|
|
|
#endif /* INET6 */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
u_int32_t
|
|
|
|
pf_tcp_iss(struct pf_pdesc *pd)
|
|
|
|
{
|
|
|
|
MD5_CTX ctx;
|
|
|
|
u_int32_t digest[4];
|
|
|
|
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (V_pf_tcp_secret_init == 0) {
|
|
|
|
read_random(&V_pf_tcp_secret, sizeof(V_pf_tcp_secret));
|
|
|
|
MD5Init(&V_pf_tcp_secret_ctx);
|
|
|
|
MD5Update(&V_pf_tcp_secret_ctx, V_pf_tcp_secret,
|
|
|
|
sizeof(V_pf_tcp_secret));
|
|
|
|
V_pf_tcp_secret_init = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
ctx = V_pf_tcp_secret_ctx;
|
|
|
|
#else
|
|
|
|
if (pf_tcp_secret_init == 0) {
|
|
|
|
arc4random_buf(pf_tcp_secret, sizeof(pf_tcp_secret));
|
|
|
|
MD5Init(&pf_tcp_secret_ctx);
|
|
|
|
MD5Update(&pf_tcp_secret_ctx, pf_tcp_secret,
|
|
|
|
sizeof(pf_tcp_secret));
|
|
|
|
pf_tcp_secret_init = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
ctx = pf_tcp_secret_ctx;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
MD5Update(&ctx, (char *)&pd->hdr.tcp->th_sport, sizeof(u_short));
|
|
|
|
MD5Update(&ctx, (char *)&pd->hdr.tcp->th_dport, sizeof(u_short));
|
|
|
|
if (pd->af == AF_INET6) {
|
|
|
|
MD5Update(&ctx, (char *)&pd->src->v6, sizeof(struct in6_addr));
|
|
|
|
MD5Update(&ctx, (char *)&pd->dst->v6, sizeof(struct in6_addr));
|
|
|
|
} else {
|
|
|
|
MD5Update(&ctx, (char *)&pd->src->v4, sizeof(struct in_addr));
|
|
|
|
MD5Update(&ctx, (char *)&pd->dst->v4, sizeof(struct in_addr));
|
|
|
|
}
|
|
|
|
MD5Final((u_char *)digest, &ctx);
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
V_pf_tcp_iss_off += 4096;
|
|
|
|
#define ISN_RANDOM_INCREMENT (4096 - 1)
|
|
|
|
return (digest[0] + (arc4random() & ISN_RANDOM_INCREMENT) +
|
|
|
|
V_pf_tcp_iss_off);
|
|
|
|
#undef ISN_RANDOM_INCREMENT
|
|
|
|
#else
|
|
|
|
pf_tcp_iss_off += 4096;
|
|
|
|
return (digest[0] + tcp_iss + pf_tcp_iss_off);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2004-02-26 02:04:28 +00:00
|
|
|
int
|
2011-06-28 11:57:25 +00:00
|
|
|
pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
|
2004-06-16 23:24:02 +00:00
|
|
|
struct pfi_kif *kif, struct mbuf *m, int off, void *h,
|
2004-09-29 04:54:33 +00:00
|
|
|
struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
2005-05-03 16:43:32 +00:00
|
|
|
struct ifqueue *ifq, struct inpcb *inp)
|
2004-09-29 04:54:33 +00:00
|
|
|
#else
|
2005-05-03 16:43:32 +00:00
|
|
|
struct ifqueue *ifq)
|
2004-09-29 04:54:33 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
{
|
2004-06-16 23:24:02 +00:00
|
|
|
struct pf_rule *nr = NULL;
|
2004-02-26 02:04:28 +00:00
|
|
|
struct pf_addr *saddr = pd->src, *daddr = pd->dst;
|
|
|
|
sa_family_t af = pd->af;
|
|
|
|
struct pf_rule *r, *a = NULL;
|
|
|
|
struct pf_ruleset *ruleset = NULL;
|
2004-06-16 23:24:02 +00:00
|
|
|
struct pf_src_node *nsn = NULL;
|
2011-06-28 11:57:25 +00:00
|
|
|
struct tcphdr *th = pd->hdr.tcp;
|
|
|
|
struct pf_state_key *skw = NULL, *sks = NULL;
|
|
|
|
struct pf_state_key *sk = NULL, *nk = NULL;
|
2004-02-26 02:04:28 +00:00
|
|
|
u_short reason;
|
2011-06-28 11:57:25 +00:00
|
|
|
int rewrite = 0, hdrlen = 0;
|
2007-07-03 12:16:07 +00:00
|
|
|
int tag = -1, rtableid = -1;
|
2005-05-03 16:43:32 +00:00
|
|
|
int asd = 0;
|
2007-07-03 12:16:07 +00:00
|
|
|
int match = 0;
|
2011-06-28 11:57:25 +00:00
|
|
|
int state_icmp = 0;
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
u_int16_t sport = 0, dport = 0;
|
|
|
|
u_int16_t bproto_sum = 0, bip_sum = 0;
|
|
|
|
#else
|
|
|
|
u_int16_t sport, dport;
|
|
|
|
u_int16_t bproto_sum = 0, bip_sum;
|
|
|
|
#endif
|
|
|
|
u_int8_t icmptype = 0, icmpcode = 0;
|
|
|
|
|
2005-05-03 16:43:32 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
if (direction == PF_IN && pf_check_congestion(ifq)) {
|
2005-05-03 16:43:32 +00:00
|
|
|
REASON_SET(&reason, PFRES_CONGEST);
|
|
|
|
return (PF_DROP);
|
|
|
|
}
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2007-07-03 12:16:07 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (inp != NULL)
|
|
|
|
pd->lookup.done = pf_socket_lookup(direction, pd, inp);
|
2011-06-28 11:57:25 +00:00
|
|
|
else if (V_debug_pfugidhack) {
|
2007-07-03 12:16:07 +00:00
|
|
|
PF_UNLOCK();
|
|
|
|
DPFPRINTF(PF_DEBUG_MISC, ("pf: unlocked lookup\n"));
|
2011-06-28 11:57:25 +00:00
|
|
|
pd->lookup.done = pf_socket_lookup(direction, pd, inp);
|
2007-07-03 12:16:07 +00:00
|
|
|
PF_LOCK();
|
|
|
|
}
|
2006-12-29 13:59:03 +00:00
|
|
|
#endif
|
|
|
|
|
2004-02-26 02:04:28 +00:00
|
|
|
switch (pd->proto) {
|
2011-06-28 11:57:25 +00:00
|
|
|
case IPPROTO_TCP:
|
|
|
|
sport = th->th_sport;
|
|
|
|
dport = th->th_dport;
|
|
|
|
hdrlen = sizeof(*th);
|
|
|
|
break;
|
|
|
|
case IPPROTO_UDP:
|
|
|
|
sport = pd->hdr.udp->uh_sport;
|
|
|
|
dport = pd->hdr.udp->uh_dport;
|
|
|
|
hdrlen = sizeof(*pd->hdr.udp);
|
|
|
|
break;
|
2004-02-26 02:04:28 +00:00
|
|
|
#ifdef INET
|
|
|
|
case IPPROTO_ICMP:
|
2011-06-28 11:57:25 +00:00
|
|
|
if (pd->af != AF_INET)
|
|
|
|
break;
|
|
|
|
sport = dport = pd->hdr.icmp->icmp_id;
|
|
|
|
hdrlen = sizeof(*pd->hdr.icmp);
|
2004-02-26 02:04:28 +00:00
|
|
|
icmptype = pd->hdr.icmp->icmp_type;
|
|
|
|
icmpcode = pd->hdr.icmp->icmp_code;
|
|
|
|
|
|
|
|
if (icmptype == ICMP_UNREACH ||
|
|
|
|
icmptype == ICMP_SOURCEQUENCH ||
|
|
|
|
icmptype == ICMP_REDIRECT ||
|
|
|
|
icmptype == ICMP_TIMXCEED ||
|
|
|
|
icmptype == ICMP_PARAMPROB)
|
|
|
|
state_icmp++;
|
|
|
|
break;
|
|
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
|
|
|
case IPPROTO_ICMPV6:
|
2011-06-28 11:57:25 +00:00
|
|
|
if (af != AF_INET6)
|
|
|
|
break;
|
|
|
|
sport = dport = pd->hdr.icmp6->icmp6_id;
|
|
|
|
hdrlen = sizeof(*pd->hdr.icmp6);
|
2004-02-26 02:04:28 +00:00
|
|
|
icmptype = pd->hdr.icmp6->icmp6_type;
|
|
|
|
icmpcode = pd->hdr.icmp6->icmp6_code;
|
|
|
|
|
|
|
|
if (icmptype == ICMP6_DST_UNREACH ||
|
|
|
|
icmptype == ICMP6_PACKET_TOO_BIG ||
|
|
|
|
icmptype == ICMP6_TIME_EXCEEDED ||
|
|
|
|
icmptype == ICMP6_PARAM_PROB)
|
|
|
|
state_icmp++;
|
|
|
|
break;
|
|
|
|
#endif /* INET6 */
|
2011-06-28 11:57:25 +00:00
|
|
|
default:
|
|
|
|
sport = dport = hdrlen = 0;
|
|
|
|
break;
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
/* check packet for BINAT/NAT/RDR */
|
|
|
|
if ((nr = pf_get_translation(pd, m, off, direction, kif, &nsn,
|
|
|
|
&skw, &sks, &sk, &nk, saddr, daddr, sport, dport)) != NULL) {
|
|
|
|
if (nk == NULL || sk == NULL) {
|
|
|
|
REASON_SET(&reason, PFRES_MEMORY);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pd->ip_sum)
|
|
|
|
bip_sum = *pd->ip_sum;
|
|
|
|
|
|
|
|
switch (pd->proto) {
|
|
|
|
case IPPROTO_TCP:
|
|
|
|
bproto_sum = th->th_sum;
|
|
|
|
pd->proto_sum = &th->th_sum;
|
|
|
|
|
|
|
|
if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
|
|
|
|
nk->port[pd->sidx] != sport) {
|
|
|
|
pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
|
|
|
|
&th->th_sum, &nk->addr[pd->sidx],
|
|
|
|
nk->port[pd->sidx], 0, af);
|
|
|
|
pd->sport = &th->th_sport;
|
|
|
|
sport = th->th_sport;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
|
|
|
|
nk->port[pd->didx] != dport) {
|
|
|
|
pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
|
|
|
|
&th->th_sum, &nk->addr[pd->didx],
|
|
|
|
nk->port[pd->didx], 0, af);
|
|
|
|
dport = th->th_dport;
|
|
|
|
pd->dport = &th->th_dport;
|
|
|
|
}
|
|
|
|
rewrite++;
|
|
|
|
break;
|
|
|
|
case IPPROTO_UDP:
|
|
|
|
bproto_sum = pd->hdr.udp->uh_sum;
|
|
|
|
pd->proto_sum = &pd->hdr.udp->uh_sum;
|
|
|
|
|
|
|
|
if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
|
|
|
|
nk->port[pd->sidx] != sport) {
|
|
|
|
pf_change_ap(saddr, &pd->hdr.udp->uh_sport,
|
|
|
|
pd->ip_sum, &pd->hdr.udp->uh_sum,
|
|
|
|
&nk->addr[pd->sidx],
|
|
|
|
nk->port[pd->sidx], 1, af);
|
|
|
|
sport = pd->hdr.udp->uh_sport;
|
|
|
|
pd->sport = &pd->hdr.udp->uh_sport;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
|
|
|
|
nk->port[pd->didx] != dport) {
|
|
|
|
pf_change_ap(daddr, &pd->hdr.udp->uh_dport,
|
|
|
|
pd->ip_sum, &pd->hdr.udp->uh_sum,
|
|
|
|
&nk->addr[pd->didx],
|
|
|
|
nk->port[pd->didx], 1, af);
|
|
|
|
dport = pd->hdr.udp->uh_dport;
|
|
|
|
pd->dport = &pd->hdr.udp->uh_dport;
|
|
|
|
}
|
|
|
|
rewrite++;
|
|
|
|
break;
|
2004-02-26 02:04:28 +00:00
|
|
|
#ifdef INET
|
2011-06-28 11:57:25 +00:00
|
|
|
case IPPROTO_ICMP:
|
|
|
|
nk->port[0] = nk->port[1];
|
|
|
|
if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET))
|
2004-02-26 02:04:28 +00:00
|
|
|
pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
|
2011-06-28 11:57:25 +00:00
|
|
|
nk->addr[pd->sidx].v4.s_addr, 0);
|
|
|
|
|
|
|
|
if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET))
|
|
|
|
pf_change_a(&daddr->v4.s_addr, pd->ip_sum,
|
|
|
|
nk->addr[pd->didx].v4.s_addr, 0);
|
|
|
|
|
|
|
|
if (nk->port[1] != pd->hdr.icmp->icmp_id) {
|
Commit imported changes to HEAD:
pf_ioctl.c Revision 1.153 Sun Aug 7 11:37:33 2005 UTC by dhartmei
| verify ticket in DIOCADDADDR, from Boris Polevoy, ok deraadt@
pf_ioctl.c Revision 1.158 Mon Sep 5 14:51:08 2005 UTC by dhartmei
| in DIOCCHANGERULE, properly initialize table, if used in NAT rule.
| from Boris Polevoy <vapcom at mail dot ru>, ok mcbride@
pf.c Revision 1.502 Mon Aug 22 11:54:25 2005 UTC by dhartmei
| when nat'ing icmp 'connections', replace icmp id with proxy values
| (similar to proxy ports for tcp/udp). not all clients use
| per-invokation random ids, this allows multiple concurrent
| connections from such clients.
| thanks for testing to Rod Whitworth, "looks ok" markus@
pf.c Revision 1.501 Mon Aug 22 09:48:05 2005 UTC by dhartmei
| fix rdr to bitmask replacement address pool. patch from Max Laier,
| reported by Boris Polevoy, tested by Jean Debogue, ok henning@
Obtained from: OpenBSD
MFC after: 3 days
2005-09-08 15:06:52 +00:00
|
|
|
pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
|
2011-06-28 11:57:25 +00:00
|
|
|
pd->hdr.icmp->icmp_cksum, sport,
|
|
|
|
nk->port[1], 0);
|
|
|
|
pd->hdr.icmp->icmp_id = nk->port[1];
|
|
|
|
pd->sport = &pd->hdr.icmp->icmp_id;
|
|
|
|
}
|
|
|
|
m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp);
|
|
|
|
break;
|
2004-02-26 02:04:28 +00:00
|
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
2011-06-28 11:57:25 +00:00
|
|
|
case IPPROTO_ICMPV6:
|
|
|
|
nk->port[0] = nk->port[1];
|
|
|
|
if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET6))
|
2004-02-26 02:04:28 +00:00
|
|
|
pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
|
2011-06-28 11:57:25 +00:00
|
|
|
&nk->addr[pd->sidx], 0);
|
|
|
|
|
|
|
|
if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET6))
|
|
|
|
pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
|
|
|
|
&nk->addr[pd->didx], 0);
|
|
|
|
rewrite++;
|
|
|
|
break;
|
|
|
|
#endif /* INET */
|
|
|
|
default:
|
2004-02-26 02:04:28 +00:00
|
|
|
switch (af) {
|
|
|
|
#ifdef INET
|
|
|
|
case AF_INET:
|
2011-06-28 11:57:25 +00:00
|
|
|
if (PF_ANEQ(saddr,
|
|
|
|
&nk->addr[pd->sidx], AF_INET))
|
|
|
|
pf_change_a(&saddr->v4.s_addr,
|
|
|
|
pd->ip_sum,
|
|
|
|
nk->addr[pd->sidx].v4.s_addr, 0);
|
|
|
|
|
|
|
|
if (PF_ANEQ(daddr,
|
|
|
|
&nk->addr[pd->didx], AF_INET))
|
|
|
|
pf_change_a(&daddr->v4.s_addr,
|
|
|
|
pd->ip_sum,
|
|
|
|
nk->addr[pd->didx].v4.s_addr, 0);
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
|
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
2011-06-28 11:57:25 +00:00
|
|
|
if (PF_ANEQ(saddr,
|
|
|
|
&nk->addr[pd->sidx], AF_INET6))
|
|
|
|
PF_ACPY(saddr, &nk->addr[pd->sidx], af);
|
|
|
|
|
|
|
|
if (PF_ANEQ(daddr,
|
|
|
|
&nk->addr[pd->didx], AF_INET6))
|
|
|
|
PF_ACPY(saddr, &nk->addr[pd->didx], af);
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif /* INET */
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
break;
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
if (nr->natpass)
|
|
|
|
r = NULL;
|
|
|
|
pd->nat_rule = nr;
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
while (r != NULL) {
|
|
|
|
r->evaluations++;
|
2007-07-03 12:16:07 +00:00
|
|
|
if (pfi_kif_match(r->kif, kif) == r->ifnot)
|
2004-02-26 02:04:28 +00:00
|
|
|
r = r->skip[PF_SKIP_IFP].ptr;
|
|
|
|
else if (r->direction && r->direction != direction)
|
|
|
|
r = r->skip[PF_SKIP_DIR].ptr;
|
|
|
|
else if (r->af && r->af != af)
|
|
|
|
r = r->skip[PF_SKIP_AF].ptr;
|
|
|
|
else if (r->proto && r->proto != pd->proto)
|
|
|
|
r = r->skip[PF_SKIP_PROTO].ptr;
|
2007-07-03 12:16:07 +00:00
|
|
|
else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
|
|
|
|
r->src.neg, kif))
|
2004-02-26 02:04:28 +00:00
|
|
|
r = r->skip[PF_SKIP_SRC_ADDR].ptr;
|
2011-06-28 11:57:25 +00:00
|
|
|
/* tcp/udp only. port_op always 0 in other cases */
|
|
|
|
else if (r->src.port_op && !pf_match_port(r->src.port_op,
|
|
|
|
r->src.port[0], r->src.port[1], sport))
|
|
|
|
r = r->skip[PF_SKIP_SRC_PORT].ptr;
|
2007-07-03 12:16:07 +00:00
|
|
|
else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
|
|
|
|
r->dst.neg, NULL))
|
2004-02-26 02:04:28 +00:00
|
|
|
r = r->skip[PF_SKIP_DST_ADDR].ptr;
|
2011-06-28 11:57:25 +00:00
|
|
|
/* tcp/udp only. port_op always 0 in other cases */
|
|
|
|
else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
|
|
|
|
r->dst.port[0], r->dst.port[1], dport))
|
|
|
|
r = r->skip[PF_SKIP_DST_PORT].ptr;
|
|
|
|
/* icmp only. type always 0 in other cases */
|
2004-02-26 02:04:28 +00:00
|
|
|
else if (r->type && r->type != icmptype + 1)
|
|
|
|
r = TAILQ_NEXT(r, entries);
|
2011-06-28 11:57:25 +00:00
|
|
|
/* icmp only. type always 0 in other cases */
|
2004-02-26 02:04:28 +00:00
|
|
|
else if (r->code && r->code != icmpcode + 1)
|
|
|
|
r = TAILQ_NEXT(r, entries);
|
2007-07-03 12:16:07 +00:00
|
|
|
else if (r->tos && !(r->tos == pd->tos))
|
2004-02-26 02:04:28 +00:00
|
|
|
r = TAILQ_NEXT(r, entries);
|
|
|
|
else if (r->rule_flag & PFRULE_FRAGMENT)
|
|
|
|
r = TAILQ_NEXT(r, entries);
|
2011-06-28 11:57:25 +00:00
|
|
|
else if (pd->proto == IPPROTO_TCP &&
|
|
|
|
(r->flagset & th->th_flags) != r->flags)
|
2004-02-26 02:04:28 +00:00
|
|
|
r = TAILQ_NEXT(r, entries);
|
2011-06-28 11:57:25 +00:00
|
|
|
/* tcp/udp only. uid.op always 0 in other cases */
|
|
|
|
else if (r->uid.op && (pd->lookup.done || (pd->lookup.done =
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
pf_socket_lookup(direction, pd, inp), 1)) &&
|
|
|
|
#else
|
|
|
|
pf_socket_lookup(direction, pd), 1)) &&
|
|
|
|
#endif
|
|
|
|
!pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
|
|
|
|
pd->lookup.uid))
|
2004-02-26 02:04:28 +00:00
|
|
|
r = TAILQ_NEXT(r, entries);
|
2011-06-28 11:57:25 +00:00
|
|
|
/* tcp/udp only. gid.op always 0 in other cases */
|
|
|
|
else if (r->gid.op && (pd->lookup.done || (pd->lookup.done =
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
pf_socket_lookup(direction, pd, inp), 1)) &&
|
|
|
|
#else
|
|
|
|
pf_socket_lookup(direction, pd), 1)) &&
|
|
|
|
#endif
|
|
|
|
!pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
|
|
|
|
pd->lookup.gid))
|
2004-02-26 02:04:28 +00:00
|
|
|
r = TAILQ_NEXT(r, entries);
|
2011-06-28 11:57:25 +00:00
|
|
|
else if (r->prob &&
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
r->prob <= arc4random())
|
|
|
|
#else
|
|
|
|
r->prob <= arc4random_uniform(UINT_MAX - 1) + 1)
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
r = TAILQ_NEXT(r, entries);
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag))
|
|
|
|
#else
|
|
|
|
else if (r->match_tag && !pf_match_tag(m, r, &tag))
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
r = TAILQ_NEXT(r, entries);
|
2011-06-28 11:57:25 +00:00
|
|
|
else if (r->os_fingerprint != PF_OSFP_ANY &&
|
|
|
|
(pd->proto != IPPROTO_TCP || !pf_osfp_match(
|
|
|
|
pf_osfp_fingerprint(pd, m, off, th),
|
|
|
|
r->os_fingerprint)))
|
2004-02-26 02:04:28 +00:00
|
|
|
r = TAILQ_NEXT(r, entries);
|
|
|
|
else {
|
|
|
|
if (r->tag)
|
|
|
|
tag = r->tag;
|
2007-07-03 12:16:07 +00:00
|
|
|
if (r->rtableid >= 0)
|
|
|
|
rtableid = r->rtableid;
|
2004-02-26 02:04:28 +00:00
|
|
|
if (r->anchor == NULL) {
|
2007-07-03 12:16:07 +00:00
|
|
|
match = 1;
|
2004-02-26 02:04:28 +00:00
|
|
|
*rm = r;
|
|
|
|
*am = a;
|
|
|
|
*rsm = ruleset;
|
|
|
|
if ((*rm)->quick)
|
|
|
|
break;
|
|
|
|
r = TAILQ_NEXT(r, entries);
|
|
|
|
} else
|
2005-05-03 16:43:32 +00:00
|
|
|
pf_step_into_anchor(&asd, &ruleset,
|
2007-07-03 12:16:07 +00:00
|
|
|
PF_RULESET_FILTER, &r, &a, &match);
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
2007-07-03 12:16:07 +00:00
|
|
|
if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
|
|
|
|
PF_RULESET_FILTER, &r, &a, &match))
|
|
|
|
break;
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
|
|
|
r = *rm;
|
|
|
|
a = *am;
|
|
|
|
ruleset = *rsm;
|
|
|
|
|
|
|
|
REASON_SET(&reason, PFRES_MATCH);
|
2004-06-16 23:24:02 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
if (r->log || (nr != NULL && nr->log)) {
|
|
|
|
if (rewrite)
|
|
|
|
m_copyback(m, off, hdrlen, pd->hdr.any);
|
2007-07-03 12:16:07 +00:00
|
|
|
PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
|
|
|
|
a, ruleset, pd);
|
2011-06-28 11:57:25 +00:00
|
|
|
}
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
if ((r->action == PF_DROP) &&
|
2011-06-28 11:57:25 +00:00
|
|
|
((r->rule_flag & PFRULE_RETURNRST) ||
|
|
|
|
(r->rule_flag & PFRULE_RETURNICMP) ||
|
2004-02-26 02:04:28 +00:00
|
|
|
(r->rule_flag & PFRULE_RETURN))) {
|
2011-06-28 11:57:25 +00:00
|
|
|
/* undo NAT changes, if they have taken place */
|
2004-06-16 23:24:02 +00:00
|
|
|
if (nr != NULL) {
|
2011-06-28 11:57:25 +00:00
|
|
|
PF_ACPY(saddr, &sk->addr[pd->sidx], af);
|
|
|
|
PF_ACPY(daddr, &sk->addr[pd->didx], af);
|
|
|
|
if (pd->sport)
|
|
|
|
*pd->sport = sk->port[pd->sidx];
|
|
|
|
if (pd->dport)
|
|
|
|
*pd->dport = sk->port[pd->didx];
|
|
|
|
if (pd->proto_sum)
|
|
|
|
*pd->proto_sum = bproto_sum;
|
|
|
|
if (pd->ip_sum)
|
|
|
|
*pd->ip_sum = bip_sum;
|
|
|
|
m_copyback(m, off, hdrlen, pd->hdr.any);
|
|
|
|
}
|
|
|
|
if (pd->proto == IPPROTO_TCP &&
|
|
|
|
((r->rule_flag & PFRULE_RETURNRST) ||
|
|
|
|
(r->rule_flag & PFRULE_RETURN)) &&
|
|
|
|
!(th->th_flags & TH_RST)) {
|
|
|
|
u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
|
|
|
|
int len = 0;
|
|
|
|
#ifdef INET
|
|
|
|
struct ip *h4;
|
|
|
|
#endif
|
|
|
|
#ifdef INET6
|
|
|
|
struct ip6_hdr *h6;
|
|
|
|
#endif
|
|
|
|
|
2004-02-26 02:04:28 +00:00
|
|
|
switch (af) {
|
|
|
|
#ifdef INET
|
|
|
|
case AF_INET:
|
2011-06-28 11:57:25 +00:00
|
|
|
h4 = mtod(m, struct ip *);
|
|
|
|
len = ntohs(h4->ip_len) - off;
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
2011-06-28 11:57:25 +00:00
|
|
|
h6 = mtod(m, struct ip6_hdr *);
|
|
|
|
len = ntohs(h6->ip6_plen) - (off - sizeof(*h6));
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pf_check_proto_cksum(m, off, len, IPPROTO_TCP, af))
|
|
|
|
REASON_SET(&reason, PFRES_PROTCKSUM);
|
|
|
|
else {
|
|
|
|
if (th->th_flags & TH_SYN)
|
|
|
|
ack++;
|
|
|
|
if (th->th_flags & TH_FIN)
|
|
|
|
ack++;
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
pf_send_tcp(m, r, af, pd->dst,
|
|
|
|
#else
|
|
|
|
pf_send_tcp(r, af, pd->dst,
|
|
|
|
#endif
|
|
|
|
pd->src, th->th_dport, th->th_sport,
|
|
|
|
ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
|
|
|
|
r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp);
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
} else if (pd->proto != IPPROTO_ICMP && af == AF_INET &&
|
|
|
|
r->return_icmp)
|
2004-02-26 02:04:28 +00:00
|
|
|
pf_send_icmp(m, r->return_icmp >> 8,
|
|
|
|
r->return_icmp & 255, af, r);
|
2011-06-28 11:57:25 +00:00
|
|
|
else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 &&
|
|
|
|
r->return_icmp6)
|
2004-02-26 02:04:28 +00:00
|
|
|
pf_send_icmp(m, r->return_icmp6 >> 8,
|
|
|
|
r->return_icmp6 & 255, af, r);
|
|
|
|
}
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
if (r->action == PF_DROP)
|
|
|
|
goto cleanup;
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (pf_tag_packet(m, tag, rtableid, pd->pf_mtag)) {
|
|
|
|
#else
|
|
|
|
if (pf_tag_packet(m, tag, rtableid)) {
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
REASON_SET(&reason, PFRES_MEMORY);
|
2011-06-28 11:57:25 +00:00
|
|
|
goto cleanup;
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
if (!state_icmp && (r->keep_state || nr != NULL ||
|
|
|
|
(pd->flags & PFDESC_TCP_NORM))) {
|
|
|
|
int action;
|
|
|
|
action = pf_create_state(r, nr, a, pd, nsn, skw, sks, nk, sk, m,
|
|
|
|
off, sport, dport, &rewrite, kif, sm, tag, bproto_sum,
|
|
|
|
bip_sum, hdrlen);
|
|
|
|
if (action != PF_PASS)
|
|
|
|
return (action);
|
|
|
|
} else {
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (sk != NULL)
|
|
|
|
pool_put(&V_pf_state_key_pl, sk);
|
|
|
|
if (nk != NULL)
|
|
|
|
pool_put(&V_pf_state_key_pl, nk);
|
|
|
|
#else
|
|
|
|
if (sk != NULL)
|
|
|
|
pool_put(&pf_state_key_pl, sk);
|
|
|
|
if (nk != NULL)
|
|
|
|
pool_put(&pf_state_key_pl, nk);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
/* copy back packet headers if we performed NAT operations */
|
|
|
|
if (rewrite)
|
|
|
|
m_copyback(m, off, hdrlen, pd->hdr.any);
|
|
|
|
|
|
|
|
#if NPFSYNC > 0
|
|
|
|
if (*sm != NULL && !ISSET((*sm)->state_flags, PFSTATE_NOSYNC) &&
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
direction == PF_OUT && pfsync_up_ptr != NULL && pfsync_up_ptr()) {
|
|
|
|
#else
|
|
|
|
direction == PF_OUT && pfsync_up()) {
|
|
|
|
#endif
|
|
|
|
/*
|
|
|
|
* We want the state created, but we dont
|
|
|
|
* want to send this in case a partner
|
|
|
|
* firewall has to know about it to allow
|
|
|
|
* replies through it.
|
|
|
|
*/
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (pfsync_defer_ptr != NULL)
|
|
|
|
pfsync_defer_ptr(*sm, m);
|
|
|
|
#else
|
|
|
|
if (pfsync_defer(*sm, m))
|
|
|
|
#endif
|
|
|
|
return (PF_DEFER);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
return (PF_PASS);
|
2004-06-16 23:24:02 +00:00
|
|
|
|
|
|
|
cleanup:
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (sk != NULL)
|
|
|
|
pool_put(&V_pf_state_key_pl, sk);
|
|
|
|
if (nk != NULL)
|
|
|
|
pool_put(&V_pf_state_key_pl, nk);
|
|
|
|
#else
|
|
|
|
if (sk != NULL)
|
|
|
|
pool_put(&pf_state_key_pl, sk);
|
|
|
|
if (nk != NULL)
|
|
|
|
pool_put(&pf_state_key_pl, nk);
|
|
|
|
#endif
|
|
|
|
return (PF_DROP);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline int
|
|
|
|
pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a,
|
|
|
|
struct pf_pdesc *pd, struct pf_src_node *nsn, struct pf_state_key *skw,
|
|
|
|
struct pf_state_key *sks, struct pf_state_key *nk, struct pf_state_key *sk,
|
|
|
|
struct mbuf *m, int off, u_int16_t sport, u_int16_t dport, int *rewrite,
|
|
|
|
struct pfi_kif *kif, struct pf_state **sm, int tag, u_int16_t bproto_sum,
|
|
|
|
u_int16_t bip_sum, int hdrlen)
|
|
|
|
{
|
|
|
|
struct pf_state *s = NULL;
|
|
|
|
struct pf_src_node *sn = NULL;
|
|
|
|
struct tcphdr *th = pd->hdr.tcp;
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
u_int16_t mss = V_tcp_mssdflt;
|
|
|
|
#else
|
|
|
|
u_int16_t mss = tcp_mssdflt;
|
|
|
|
#endif
|
|
|
|
u_short reason;
|
|
|
|
|
|
|
|
/* check maximums */
|
|
|
|
if (r->max_states && (r->states_cur >= r->max_states)) {
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
V_pf_status.lcounters[LCNT_STATES]++;
|
|
|
|
#else
|
|
|
|
pf_status.lcounters[LCNT_STATES]++;
|
|
|
|
#endif
|
|
|
|
REASON_SET(&reason, PFRES_MAXSTATES);
|
|
|
|
return (PF_DROP);
|
|
|
|
}
|
|
|
|
/* src node for filter rule */
|
|
|
|
if ((r->rule_flag & PFRULE_SRCTRACK ||
|
|
|
|
r->rpool.opts & PF_POOL_STICKYADDR) &&
|
|
|
|
pf_insert_src_node(&sn, r, pd->src, pd->af) != 0) {
|
|
|
|
REASON_SET(&reason, PFRES_SRCLIMIT);
|
|
|
|
goto csfailed;
|
|
|
|
}
|
|
|
|
/* src node for translation rule */
|
|
|
|
if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
|
|
|
|
pf_insert_src_node(&nsn, nr, &sk->addr[pd->sidx], pd->af)) {
|
|
|
|
REASON_SET(&reason, PFRES_SRCLIMIT);
|
|
|
|
goto csfailed;
|
|
|
|
}
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
s = pool_get(&V_pf_state_pl, PR_NOWAIT | PR_ZERO);
|
|
|
|
#else
|
|
|
|
s = pool_get(&pf_state_pl, PR_NOWAIT | PR_ZERO);
|
|
|
|
#endif
|
|
|
|
if (s == NULL) {
|
|
|
|
REASON_SET(&reason, PFRES_MEMORY);
|
|
|
|
goto csfailed;
|
|
|
|
}
|
|
|
|
s->rule.ptr = r;
|
|
|
|
s->nat_rule.ptr = nr;
|
|
|
|
s->anchor.ptr = a;
|
|
|
|
STATE_INC_COUNTERS(s);
|
|
|
|
if (r->allow_opts)
|
|
|
|
s->state_flags |= PFSTATE_ALLOWOPTS;
|
|
|
|
if (r->rule_flag & PFRULE_STATESLOPPY)
|
|
|
|
s->state_flags |= PFSTATE_SLOPPY;
|
|
|
|
if (r->rule_flag & PFRULE_PFLOW)
|
|
|
|
s->state_flags |= PFSTATE_PFLOW;
|
|
|
|
s->log = r->log & PF_LOG_ALL;
|
|
|
|
s->sync_state = PFSYNC_S_NONE;
|
|
|
|
if (nr != NULL)
|
|
|
|
s->log |= nr->log & PF_LOG_ALL;
|
|
|
|
switch (pd->proto) {
|
|
|
|
case IPPROTO_TCP:
|
|
|
|
s->src.seqlo = ntohl(th->th_seq);
|
|
|
|
s->src.seqhi = s->src.seqlo + pd->p_len + 1;
|
|
|
|
if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
|
|
|
|
r->keep_state == PF_STATE_MODULATE) {
|
|
|
|
/* Generate sequence number modulator */
|
|
|
|
if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) ==
|
|
|
|
0)
|
|
|
|
s->src.seqdiff = 1;
|
|
|
|
pf_change_a(&th->th_seq, &th->th_sum,
|
|
|
|
htonl(s->src.seqlo + s->src.seqdiff), 0);
|
|
|
|
*rewrite = 1;
|
|
|
|
} else
|
|
|
|
s->src.seqdiff = 0;
|
|
|
|
if (th->th_flags & TH_SYN) {
|
|
|
|
s->src.seqhi++;
|
|
|
|
s->src.wscale = pf_get_wscale(m, off,
|
|
|
|
th->th_off, pd->af);
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
s->src.max_win = MAX(ntohs(th->th_win), 1);
|
|
|
|
if (s->src.wscale & PF_WSCALE_MASK) {
|
|
|
|
/* Remove scale factor from initial window */
|
|
|
|
int win = s->src.max_win;
|
|
|
|
win += 1 << (s->src.wscale & PF_WSCALE_MASK);
|
|
|
|
s->src.max_win = (win - 1) >>
|
|
|
|
(s->src.wscale & PF_WSCALE_MASK);
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
if (th->th_flags & TH_FIN)
|
|
|
|
s->src.seqhi++;
|
|
|
|
s->dst.seqhi = 1;
|
|
|
|
s->dst.max_win = 1;
|
|
|
|
s->src.state = TCPS_SYN_SENT;
|
|
|
|
s->dst.state = TCPS_CLOSED;
|
|
|
|
s->timeout = PFTM_TCP_FIRST_PACKET;
|
|
|
|
break;
|
|
|
|
case IPPROTO_UDP:
|
|
|
|
s->src.state = PFUDPS_SINGLE;
|
|
|
|
s->dst.state = PFUDPS_NO_TRAFFIC;
|
|
|
|
s->timeout = PFTM_UDP_FIRST_PACKET;
|
|
|
|
break;
|
|
|
|
case IPPROTO_ICMP:
|
|
|
|
#ifdef INET6
|
|
|
|
case IPPROTO_ICMPV6:
|
|
|
|
#endif
|
|
|
|
s->timeout = PFTM_ICMP_FIRST_PACKET;
|
|
|
|
break;
|
|
|
|
default:
|
2004-02-26 02:04:28 +00:00
|
|
|
s->src.state = PFOTHERS_SINGLE;
|
|
|
|
s->dst.state = PFOTHERS_NO_TRAFFIC;
|
|
|
|
s->timeout = PFTM_OTHER_FIRST_PACKET;
|
2011-06-28 11:57:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
s->creation = time_second;
|
|
|
|
s->expire = time_second;
|
|
|
|
|
|
|
|
if (sn != NULL) {
|
|
|
|
s->src_node = sn;
|
|
|
|
s->src_node->states++;
|
|
|
|
}
|
|
|
|
if (nsn != NULL) {
|
|
|
|
/* XXX We only modify one side for now. */
|
|
|
|
PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af);
|
|
|
|
s->nat_src_node = nsn;
|
|
|
|
s->nat_src_node->states++;
|
|
|
|
}
|
|
|
|
if (pd->proto == IPPROTO_TCP) {
|
|
|
|
if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m,
|
|
|
|
off, pd, th, &s->src, &s->dst)) {
|
|
|
|
REASON_SET(&reason, PFRES_MEMORY);
|
|
|
|
pf_src_tree_remove_state(s);
|
|
|
|
STATE_DEC_COUNTERS(s);
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
pool_put(&V_pf_state_pl, s);
|
|
|
|
#else
|
|
|
|
pool_put(&pf_state_pl, s);
|
|
|
|
#endif
|
|
|
|
return (PF_DROP);
|
2004-06-16 23:24:02 +00:00
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
|
|
|
|
pf_normalize_tcp_stateful(m, off, pd, &reason, th, s,
|
|
|
|
&s->src, &s->dst, rewrite)) {
|
|
|
|
/* This really shouldn't happen!!! */
|
|
|
|
DPFPRINTF(PF_DEBUG_URGENT,
|
|
|
|
("pf_normalize_tcp_stateful failed on first pkt"));
|
|
|
|
pf_normalize_tcp_cleanup(s);
|
2004-06-16 23:24:02 +00:00
|
|
|
pf_src_tree_remove_state(s);
|
2005-05-03 16:43:32 +00:00
|
|
|
STATE_DEC_COUNTERS(s);
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
pool_put(&V_pf_state_pl, s);
|
|
|
|
#else
|
2004-02-26 02:04:28 +00:00
|
|
|
pool_put(&pf_state_pl, s);
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
return (PF_DROP);
|
2005-05-03 16:43:32 +00:00
|
|
|
}
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
s->direction = pd->dir;
|
|
|
|
|
|
|
|
if (sk == NULL && pf_state_key_setup(pd, nr, &skw, &sks, &sk, &nk,
|
|
|
|
pd->src, pd->dst, sport, dport))
|
|
|
|
goto csfailed;
|
|
|
|
|
|
|
|
if (pf_state_insert(BOUND_IFACE(r, kif), skw, sks, s)) {
|
|
|
|
if (pd->proto == IPPROTO_TCP)
|
|
|
|
pf_normalize_tcp_cleanup(s);
|
|
|
|
REASON_SET(&reason, PFRES_STATEINS);
|
|
|
|
pf_src_tree_remove_state(s);
|
|
|
|
STATE_DEC_COUNTERS(s);
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
pool_put(&V_pf_state_pl, s);
|
|
|
|
#else
|
|
|
|
pool_put(&pf_state_pl, s);
|
|
|
|
#endif
|
|
|
|
return (PF_DROP);
|
|
|
|
} else
|
|
|
|
*sm = s;
|
|
|
|
|
|
|
|
pf_set_rt_ifp(s, pd->src); /* needs s->state_key set */
|
|
|
|
if (tag > 0) {
|
|
|
|
pf_tag_ref(tag);
|
|
|
|
s->tag = tag;
|
|
|
|
}
|
|
|
|
if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) ==
|
|
|
|
TH_SYN && r->keep_state == PF_STATE_SYNPROXY) {
|
|
|
|
s->src.state = PF_TCPS_PROXY_SRC;
|
|
|
|
/* undo NAT changes, if they have taken place */
|
|
|
|
if (nr != NULL) {
|
|
|
|
struct pf_state_key *skt = s->key[PF_SK_WIRE];
|
|
|
|
if (pd->dir == PF_OUT)
|
|
|
|
skt = s->key[PF_SK_STACK];
|
|
|
|
PF_ACPY(pd->src, &skt->addr[pd->sidx], pd->af);
|
|
|
|
PF_ACPY(pd->dst, &skt->addr[pd->didx], pd->af);
|
|
|
|
if (pd->sport)
|
|
|
|
*pd->sport = skt->port[pd->sidx];
|
|
|
|
if (pd->dport)
|
|
|
|
*pd->dport = skt->port[pd->didx];
|
|
|
|
if (pd->proto_sum)
|
|
|
|
*pd->proto_sum = bproto_sum;
|
|
|
|
if (pd->ip_sum)
|
|
|
|
*pd->ip_sum = bip_sum;
|
|
|
|
m_copyback(m, off, hdrlen, pd->hdr.any);
|
|
|
|
}
|
|
|
|
s->src.seqhi = htonl(arc4random());
|
|
|
|
/* Find mss option */
|
|
|
|
mss = pf_get_mss(m, off, th->th_off, pd->af);
|
|
|
|
mss = pf_calc_mss(pd->src, pd->af, mss);
|
|
|
|
mss = pf_calc_mss(pd->dst, pd->af, mss);
|
|
|
|
s->src.mss = mss;
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
pf_send_tcp(NULL, r, pd->af, pd->dst, pd->src, th->th_dport,
|
|
|
|
#else
|
|
|
|
pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport,
|
|
|
|
#endif
|
|
|
|
th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
|
|
|
|
TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL);
|
|
|
|
REASON_SET(&reason, PFRES_SYNPROXY);
|
|
|
|
return (PF_SYNPROXY_DROP);
|
|
|
|
}
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
return (PF_PASS);
|
2011-06-28 11:57:25 +00:00
|
|
|
|
|
|
|
csfailed:
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (sk != NULL)
|
|
|
|
pool_put(&V_pf_state_key_pl, sk);
|
|
|
|
if (nk != NULL)
|
|
|
|
pool_put(&V_pf_state_key_pl, nk);
|
|
|
|
#else
|
|
|
|
if (sk != NULL)
|
|
|
|
pool_put(&pf_state_key_pl, sk);
|
|
|
|
if (nk != NULL)
|
|
|
|
pool_put(&pf_state_key_pl, nk);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if (sn != NULL && sn->states == 0 && sn->expire == 0) {
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
RB_REMOVE(pf_src_tree, &V_tree_src_tracking, sn);
|
|
|
|
V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
|
|
|
|
V_pf_status.src_nodes--;
|
|
|
|
pool_put(&V_pf_src_tree_pl, sn);
|
|
|
|
#else
|
|
|
|
RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
|
|
|
|
pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
|
|
|
|
pf_status.src_nodes--;
|
|
|
|
pool_put(&pf_src_tree_pl, sn);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
if (nsn != sn && nsn != NULL && nsn->states == 0 && nsn->expire == 0) {
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
RB_REMOVE(pf_src_tree, &V_tree_src_tracking, nsn);
|
|
|
|
V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
|
|
|
|
V_pf_status.src_nodes--;
|
|
|
|
pool_put(&V_pf_src_tree_pl, nsn);
|
|
|
|
#else
|
|
|
|
RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
|
|
|
|
pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
|
|
|
|
pf_status.src_nodes--;
|
|
|
|
pool_put(&pf_src_tree_pl, nsn);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
return (PF_DROP);
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int
|
2004-06-16 23:24:02 +00:00
|
|
|
pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
|
2004-02-26 02:04:28 +00:00
|
|
|
struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am,
|
|
|
|
struct pf_ruleset **rsm)
|
|
|
|
{
|
|
|
|
struct pf_rule *r, *a = NULL;
|
|
|
|
struct pf_ruleset *ruleset = NULL;
|
|
|
|
sa_family_t af = pd->af;
|
|
|
|
u_short reason;
|
|
|
|
int tag = -1;
|
2005-05-03 16:43:32 +00:00
|
|
|
int asd = 0;
|
2007-07-03 12:16:07 +00:00
|
|
|
int match = 0;
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
|
|
|
|
while (r != NULL) {
|
|
|
|
r->evaluations++;
|
2007-07-03 12:16:07 +00:00
|
|
|
if (pfi_kif_match(r->kif, kif) == r->ifnot)
|
2004-02-26 02:04:28 +00:00
|
|
|
r = r->skip[PF_SKIP_IFP].ptr;
|
|
|
|
else if (r->direction && r->direction != direction)
|
|
|
|
r = r->skip[PF_SKIP_DIR].ptr;
|
|
|
|
else if (r->af && r->af != af)
|
|
|
|
r = r->skip[PF_SKIP_AF].ptr;
|
|
|
|
else if (r->proto && r->proto != pd->proto)
|
|
|
|
r = r->skip[PF_SKIP_PROTO].ptr;
|
2007-07-03 12:16:07 +00:00
|
|
|
else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
|
|
|
|
r->src.neg, kif))
|
2004-02-26 02:04:28 +00:00
|
|
|
r = r->skip[PF_SKIP_SRC_ADDR].ptr;
|
2007-07-03 12:16:07 +00:00
|
|
|
else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
|
|
|
|
r->dst.neg, NULL))
|
2004-02-26 02:04:28 +00:00
|
|
|
r = r->skip[PF_SKIP_DST_ADDR].ptr;
|
2007-07-03 12:16:07 +00:00
|
|
|
else if (r->tos && !(r->tos == pd->tos))
|
2004-02-26 02:04:28 +00:00
|
|
|
r = TAILQ_NEXT(r, entries);
|
2007-11-21 10:12:52 +00:00
|
|
|
else if (r->os_fingerprint != PF_OSFP_ANY)
|
|
|
|
r = TAILQ_NEXT(r, entries);
|
|
|
|
else if (pd->proto == IPPROTO_UDP &&
|
|
|
|
(r->src.port_op || r->dst.port_op))
|
|
|
|
r = TAILQ_NEXT(r, entries);
|
|
|
|
else if (pd->proto == IPPROTO_TCP &&
|
|
|
|
(r->src.port_op || r->dst.port_op || r->flagset))
|
|
|
|
r = TAILQ_NEXT(r, entries);
|
|
|
|
else if ((pd->proto == IPPROTO_ICMP ||
|
|
|
|
pd->proto == IPPROTO_ICMPV6) &&
|
|
|
|
(r->type || r->code))
|
2004-02-26 02:04:28 +00:00
|
|
|
r = TAILQ_NEXT(r, entries);
|
2011-06-28 11:57:25 +00:00
|
|
|
else if (r->prob && r->prob <=
|
|
|
|
(arc4random() % (UINT_MAX - 1) + 1))
|
2004-02-26 02:04:28 +00:00
|
|
|
r = TAILQ_NEXT(r, entries);
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag))
|
|
|
|
#else
|
|
|
|
else if (r->match_tag && !pf_match_tag(m, r, &tag))
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
r = TAILQ_NEXT(r, entries);
|
|
|
|
else {
|
|
|
|
if (r->anchor == NULL) {
|
2007-07-03 12:16:07 +00:00
|
|
|
match = 1;
|
2004-02-26 02:04:28 +00:00
|
|
|
*rm = r;
|
|
|
|
*am = a;
|
|
|
|
*rsm = ruleset;
|
|
|
|
if ((*rm)->quick)
|
|
|
|
break;
|
|
|
|
r = TAILQ_NEXT(r, entries);
|
|
|
|
} else
|
2005-05-03 16:43:32 +00:00
|
|
|
pf_step_into_anchor(&asd, &ruleset,
|
2007-07-03 12:16:07 +00:00
|
|
|
PF_RULESET_FILTER, &r, &a, &match);
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
2007-07-03 12:16:07 +00:00
|
|
|
if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
|
|
|
|
PF_RULESET_FILTER, &r, &a, &match))
|
|
|
|
break;
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
|
|
|
r = *rm;
|
|
|
|
a = *am;
|
|
|
|
ruleset = *rsm;
|
|
|
|
|
|
|
|
REASON_SET(&reason, PFRES_MATCH);
|
2004-06-16 23:24:02 +00:00
|
|
|
|
2004-02-26 02:04:28 +00:00
|
|
|
if (r->log)
|
2007-07-03 12:16:07 +00:00
|
|
|
PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset,
|
|
|
|
pd);
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
if (r->action != PF_PASS)
|
|
|
|
return (PF_DROP);
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (pf_tag_packet(m, tag, -1, pd->pf_mtag)) {
|
|
|
|
#else
|
|
|
|
if (pf_tag_packet(m, tag, -1)) {
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
REASON_SET(&reason, PFRES_MEMORY);
|
|
|
|
return (PF_DROP);
|
|
|
|
}
|
|
|
|
|
|
|
|
return (PF_PASS);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
2009-12-24 00:43:44 +00:00
|
|
|
pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst,
|
|
|
|
struct pf_state **state, struct pfi_kif *kif, struct mbuf *m, int off,
|
|
|
|
struct pf_pdesc *pd, u_short *reason, int *copyback)
|
2004-02-26 02:04:28 +00:00
|
|
|
{
|
2011-06-28 11:57:25 +00:00
|
|
|
struct tcphdr *th = pd->hdr.tcp;
|
|
|
|
u_int16_t win = ntohs(th->th_win);
|
|
|
|
u_int32_t ack, end, seq, orig_seq;
|
|
|
|
u_int8_t sws, dws;
|
|
|
|
int ackskew;
|
2008-08-04 14:08:55 +00:00
|
|
|
|
2004-02-26 02:04:28 +00:00
|
|
|
if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) {
|
|
|
|
sws = src->wscale & PF_WSCALE_MASK;
|
|
|
|
dws = dst->wscale & PF_WSCALE_MASK;
|
|
|
|
} else
|
|
|
|
sws = dws = 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Sequence tracking algorithm from Guido van Rooij's paper:
|
|
|
|
* http://www.madison-gurkha.com/publications/tcp_filtering/
|
|
|
|
* tcp_filtering.ps
|
|
|
|
*/
|
|
|
|
|
2005-05-03 16:43:32 +00:00
|
|
|
orig_seq = seq = ntohl(th->th_seq);
|
2004-02-26 02:04:28 +00:00
|
|
|
if (src->seqlo == 0) {
|
|
|
|
/* First packet from this end. Set its state */
|
|
|
|
|
|
|
|
if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
|
|
|
|
src->scrub == NULL) {
|
|
|
|
if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) {
|
|
|
|
REASON_SET(reason, PFRES_MEMORY);
|
|
|
|
return (PF_DROP);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Deferred generation of sequence number modulator */
|
|
|
|
if (dst->seqdiff && !src->seqdiff) {
|
2011-06-28 11:57:25 +00:00
|
|
|
/* use random iss for the TCP server */
|
|
|
|
while ((src->seqdiff = arc4random() - seq) == 0)
|
2004-02-26 02:04:28 +00:00
|
|
|
;
|
|
|
|
ack = ntohl(th->th_ack) - dst->seqdiff;
|
|
|
|
pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
|
|
|
|
src->seqdiff), 0);
|
|
|
|
pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
|
2009-12-24 00:43:44 +00:00
|
|
|
*copyback = 1;
|
2004-02-26 02:04:28 +00:00
|
|
|
} else {
|
|
|
|
ack = ntohl(th->th_ack);
|
|
|
|
}
|
|
|
|
|
|
|
|
end = seq + pd->p_len;
|
|
|
|
if (th->th_flags & TH_SYN) {
|
|
|
|
end++;
|
|
|
|
if (dst->wscale & PF_WSCALE_FLAG) {
|
|
|
|
src->wscale = pf_get_wscale(m, off, th->th_off,
|
|
|
|
pd->af);
|
|
|
|
if (src->wscale & PF_WSCALE_FLAG) {
|
|
|
|
/* Remove scale factor from initial
|
|
|
|
* window */
|
|
|
|
sws = src->wscale & PF_WSCALE_MASK;
|
|
|
|
win = ((u_int32_t)win + (1 << sws) - 1)
|
|
|
|
>> sws;
|
|
|
|
dws = dst->wscale & PF_WSCALE_MASK;
|
|
|
|
} else {
|
|
|
|
/* fixup other window */
|
|
|
|
dst->max_win <<= dst->wscale &
|
|
|
|
PF_WSCALE_MASK;
|
|
|
|
/* in case of a retrans SYN|ACK */
|
|
|
|
dst->wscale = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (th->th_flags & TH_FIN)
|
|
|
|
end++;
|
|
|
|
|
|
|
|
src->seqlo = seq;
|
|
|
|
if (src->state < TCPS_SYN_SENT)
|
|
|
|
src->state = TCPS_SYN_SENT;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* May need to slide the window (seqhi may have been set by
|
|
|
|
* the crappy stack check or if we picked up the connection
|
|
|
|
* after establishment)
|
|
|
|
*/
|
|
|
|
if (src->seqhi == 1 ||
|
|
|
|
SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
|
|
|
|
src->seqhi = end + MAX(1, dst->max_win << dws);
|
|
|
|
if (win > src->max_win)
|
|
|
|
src->max_win = win;
|
|
|
|
|
|
|
|
} else {
|
|
|
|
ack = ntohl(th->th_ack) - dst->seqdiff;
|
|
|
|
if (src->seqdiff) {
|
|
|
|
/* Modulate sequence numbers */
|
|
|
|
pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
|
|
|
|
src->seqdiff), 0);
|
|
|
|
pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
|
2009-12-24 00:43:44 +00:00
|
|
|
*copyback = 1;
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
|
|
|
end = seq + pd->p_len;
|
|
|
|
if (th->th_flags & TH_SYN)
|
|
|
|
end++;
|
|
|
|
if (th->th_flags & TH_FIN)
|
|
|
|
end++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((th->th_flags & TH_ACK) == 0) {
|
|
|
|
/* Let it pass through the ack skew check */
|
|
|
|
ack = dst->seqlo;
|
|
|
|
} else if ((ack == 0 &&
|
|
|
|
(th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
|
|
|
|
/* broken tcp stacks do not set ack */
|
|
|
|
(dst->state < TCPS_SYN_SENT)) {
|
|
|
|
/*
|
|
|
|
* Many stacks (ours included) will set the ACK number in an
|
|
|
|
* FIN|ACK if the SYN times out -- no sequence to ACK.
|
|
|
|
*/
|
|
|
|
ack = dst->seqlo;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (seq == end) {
|
|
|
|
/* Ease sequencing restrictions on no data packets */
|
|
|
|
seq = src->seqlo;
|
|
|
|
end = seq;
|
|
|
|
}
|
|
|
|
|
|
|
|
ackskew = dst->seqlo - ack;
|
|
|
|
|
2007-07-03 12:16:07 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Need to demodulate the sequence numbers in any TCP SACK options
|
|
|
|
* (Selective ACK). We could optionally validate the SACK values
|
|
|
|
* against the current ACK window, either forwards or backwards, but
|
|
|
|
* I'm not confident that SACK has been implemented properly
|
|
|
|
* everywhere. It wouldn't surprise me if several stacks accidently
|
|
|
|
* SACK too far backwards of previously ACKed data. There really aren't
|
|
|
|
* any security implications of bad SACKing unless the target stack
|
|
|
|
* doesn't validate the option length correctly. Someone trying to
|
|
|
|
* spoof into a TCP connection won't bother blindly sending SACK
|
|
|
|
* options anyway.
|
|
|
|
*/
|
|
|
|
if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) {
|
|
|
|
if (pf_modulate_sack(m, off, pd, th, dst))
|
2009-12-24 00:43:44 +00:00
|
|
|
*copyback = 1;
|
2007-07-03 12:16:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
#define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */
|
2004-02-26 02:04:28 +00:00
|
|
|
if (SEQ_GEQ(src->seqhi, end) &&
|
|
|
|
/* Last octet inside other's window space */
|
|
|
|
SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
|
|
|
|
/* Retrans: not more than one window back */
|
|
|
|
(ackskew >= -MAXACKWINDOW) &&
|
|
|
|
/* Acking not more than one reassembled fragment backwards */
|
2005-05-03 16:43:32 +00:00
|
|
|
(ackskew <= (MAXACKWINDOW << sws)) &&
|
2004-02-26 02:04:28 +00:00
|
|
|
/* Acking not more than one window forward */
|
2005-05-03 16:43:32 +00:00
|
|
|
((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo ||
|
2011-06-28 11:57:25 +00:00
|
|
|
(orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) ||
|
|
|
|
(pd->flags & PFDESC_IP_REAS) == 0)) {
|
2007-07-03 12:16:07 +00:00
|
|
|
/* Require an exact/+1 sequence match on resets when possible */
|
2005-05-03 16:43:32 +00:00
|
|
|
|
|
|
|
if (dst->scrub || src->scrub) {
|
|
|
|
if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
|
2009-12-24 00:43:44 +00:00
|
|
|
*state, src, dst, copyback))
|
2005-05-03 16:43:32 +00:00
|
|
|
return (PF_DROP);
|
|
|
|
}
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
/* update max window */
|
|
|
|
if (src->max_win < win)
|
|
|
|
src->max_win = win;
|
|
|
|
/* synchronize sequencing */
|
|
|
|
if (SEQ_GT(end, src->seqlo))
|
|
|
|
src->seqlo = end;
|
|
|
|
/* slide the window of what the other end can send */
|
|
|
|
if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
|
|
|
|
dst->seqhi = ack + MAX((win << sws), 1);
|
|
|
|
|
|
|
|
|
|
|
|
/* update states */
|
|
|
|
if (th->th_flags & TH_SYN)
|
|
|
|
if (src->state < TCPS_SYN_SENT)
|
|
|
|
src->state = TCPS_SYN_SENT;
|
|
|
|
if (th->th_flags & TH_FIN)
|
|
|
|
if (src->state < TCPS_CLOSING)
|
|
|
|
src->state = TCPS_CLOSING;
|
|
|
|
if (th->th_flags & TH_ACK) {
|
2005-05-03 16:43:32 +00:00
|
|
|
if (dst->state == TCPS_SYN_SENT) {
|
2004-02-26 02:04:28 +00:00
|
|
|
dst->state = TCPS_ESTABLISHED;
|
2005-05-03 16:43:32 +00:00
|
|
|
if (src->state == TCPS_ESTABLISHED &&
|
|
|
|
(*state)->src_node != NULL &&
|
|
|
|
pf_src_connlimit(state)) {
|
|
|
|
REASON_SET(reason, PFRES_SRCLIMIT);
|
|
|
|
return (PF_DROP);
|
|
|
|
}
|
|
|
|
} else if (dst->state == TCPS_CLOSING)
|
2004-02-26 02:04:28 +00:00
|
|
|
dst->state = TCPS_FIN_WAIT_2;
|
|
|
|
}
|
|
|
|
if (th->th_flags & TH_RST)
|
|
|
|
src->state = dst->state = TCPS_TIME_WAIT;
|
|
|
|
|
|
|
|
/* update expire time */
|
2004-02-26 02:34:12 +00:00
|
|
|
(*state)->expire = time_second;
|
2004-02-26 02:04:28 +00:00
|
|
|
if (src->state >= TCPS_FIN_WAIT_2 &&
|
|
|
|
dst->state >= TCPS_FIN_WAIT_2)
|
|
|
|
(*state)->timeout = PFTM_TCP_CLOSED;
|
2007-07-03 12:16:07 +00:00
|
|
|
else if (src->state >= TCPS_CLOSING &&
|
|
|
|
dst->state >= TCPS_CLOSING)
|
2004-02-26 02:04:28 +00:00
|
|
|
(*state)->timeout = PFTM_TCP_FIN_WAIT;
|
|
|
|
else if (src->state < TCPS_ESTABLISHED ||
|
|
|
|
dst->state < TCPS_ESTABLISHED)
|
|
|
|
(*state)->timeout = PFTM_TCP_OPENING;
|
|
|
|
else if (src->state >= TCPS_CLOSING ||
|
|
|
|
dst->state >= TCPS_CLOSING)
|
|
|
|
(*state)->timeout = PFTM_TCP_CLOSING;
|
|
|
|
else
|
|
|
|
(*state)->timeout = PFTM_TCP_ESTABLISHED;
|
|
|
|
|
|
|
|
/* Fall through to PASS packet */
|
|
|
|
|
|
|
|
} else if ((dst->state < TCPS_SYN_SENT ||
|
|
|
|
dst->state >= TCPS_FIN_WAIT_2 ||
|
|
|
|
src->state >= TCPS_FIN_WAIT_2) &&
|
|
|
|
SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
|
|
|
|
/* Within a window forward of the originating packet */
|
|
|
|
SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
|
|
|
|
/* Within a window backward of the originating packet */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This currently handles three situations:
|
|
|
|
* 1) Stupid stacks will shotgun SYNs before their peer
|
|
|
|
* replies.
|
|
|
|
* 2) When PF catches an already established stream (the
|
|
|
|
* firewall rebooted, the state table was flushed, routes
|
|
|
|
* changed...)
|
|
|
|
* 3) Packets get funky immediately after the connection
|
|
|
|
* closes (this should catch Solaris spurious ACK|FINs
|
|
|
|
* that web servers like to spew after a close)
|
|
|
|
*
|
|
|
|
* This must be a little more careful than the above code
|
|
|
|
* since packet floods will also be caught here. We don't
|
|
|
|
* update the TTL here to mitigate the damage of a packet
|
|
|
|
* flood and so the same code can handle awkward establishment
|
|
|
|
* and a loosened connection close.
|
|
|
|
* In the establishment case, a correct peer response will
|
|
|
|
* validate the connection, go through the normal state code
|
|
|
|
* and keep updating the state TTL.
|
|
|
|
*/
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (V_pf_status.debug >= PF_DEBUG_MISC) {
|
|
|
|
#else
|
2004-02-26 02:04:28 +00:00
|
|
|
if (pf_status.debug >= PF_DEBUG_MISC) {
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
printf("pf: loose state match: ");
|
|
|
|
pf_print_state(*state);
|
|
|
|
pf_print_flags(th->th_flags);
|
2007-07-03 12:16:07 +00:00
|
|
|
printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
|
2011-06-28 11:57:25 +00:00
|
|
|
"pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack,
|
2007-07-03 12:16:07 +00:00
|
|
|
#ifdef __FreeBSD__
|
2011-06-28 11:57:25 +00:00
|
|
|
pd->p_len, ackskew, (unsigned long long)(*state)->packets[0],
|
|
|
|
(unsigned long long)(*state)->packets[1],
|
2007-07-03 12:16:07 +00:00
|
|
|
#else
|
2011-06-28 11:57:25 +00:00
|
|
|
pd->p_len, ackskew, (*state)->packets[0],
|
|
|
|
(*state)->packets[1],
|
2007-07-03 12:16:07 +00:00
|
|
|
#endif
|
2011-06-28 11:57:25 +00:00
|
|
|
pd->dir == PF_IN ? "in" : "out",
|
|
|
|
pd->dir == (*state)->direction ? "fwd" : "rev");
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
|
|
|
|
2005-05-03 16:43:32 +00:00
|
|
|
if (dst->scrub || src->scrub) {
|
|
|
|
if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
|
2009-12-24 00:43:44 +00:00
|
|
|
*state, src, dst, copyback))
|
2005-05-03 16:43:32 +00:00
|
|
|
return (PF_DROP);
|
|
|
|
}
|
|
|
|
|
2004-02-26 02:04:28 +00:00
|
|
|
/* update max window */
|
|
|
|
if (src->max_win < win)
|
|
|
|
src->max_win = win;
|
|
|
|
/* synchronize sequencing */
|
|
|
|
if (SEQ_GT(end, src->seqlo))
|
|
|
|
src->seqlo = end;
|
|
|
|
/* slide the window of what the other end can send */
|
|
|
|
if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
|
|
|
|
dst->seqhi = ack + MAX((win << sws), 1);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Cannot set dst->seqhi here since this could be a shotgunned
|
|
|
|
* SYN and not an already established connection.
|
|
|
|
*/
|
|
|
|
|
|
|
|
if (th->th_flags & TH_FIN)
|
|
|
|
if (src->state < TCPS_CLOSING)
|
|
|
|
src->state = TCPS_CLOSING;
|
|
|
|
if (th->th_flags & TH_RST)
|
|
|
|
src->state = dst->state = TCPS_TIME_WAIT;
|
|
|
|
|
|
|
|
/* Fall through to PASS packet */
|
|
|
|
|
|
|
|
} else {
|
|
|
|
if ((*state)->dst.state == TCPS_SYN_SENT &&
|
|
|
|
(*state)->src.state == TCPS_SYN_SENT) {
|
|
|
|
/* Send RST for state mismatches during handshake */
|
2005-05-03 16:43:32 +00:00
|
|
|
if (!(th->th_flags & TH_RST))
|
2006-09-12 04:25:13 +00:00
|
|
|
#ifdef __FreeBSD__
|
2011-06-28 11:57:25 +00:00
|
|
|
pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
|
2006-09-12 04:25:13 +00:00
|
|
|
#else
|
2004-02-26 02:04:28 +00:00
|
|
|
pf_send_tcp((*state)->rule.ptr, pd->af,
|
2006-09-12 04:25:13 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
pd->dst, pd->src, th->th_dport,
|
2005-05-03 16:43:32 +00:00
|
|
|
th->th_sport, ntohl(th->th_ack), 0,
|
|
|
|
TH_RST, 0, 0,
|
2007-07-03 12:16:07 +00:00
|
|
|
(*state)->rule.ptr->return_ttl, 1, 0,
|
2005-05-03 16:43:32 +00:00
|
|
|
pd->eh, kif->pfik_ifp);
|
2004-02-26 02:04:28 +00:00
|
|
|
src->seqlo = 0;
|
|
|
|
src->seqhi = 1;
|
|
|
|
src->max_win = 1;
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
} else if (V_pf_status.debug >= PF_DEBUG_MISC) {
|
|
|
|
#else
|
2004-02-26 02:04:28 +00:00
|
|
|
} else if (pf_status.debug >= PF_DEBUG_MISC) {
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
printf("pf: BAD state: ");
|
|
|
|
pf_print_state(*state);
|
|
|
|
pf_print_flags(th->th_flags);
|
2007-07-03 12:16:07 +00:00
|
|
|
printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
|
|
|
|
"pkts=%llu:%llu dir=%s,%s\n",
|
|
|
|
seq, orig_seq, ack, pd->p_len, ackskew,
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
(unsigned long long)(*state)->packets[0],
|
|
|
|
(unsigned long long)(*state)->packets[1],
|
|
|
|
#else
|
2004-02-26 02:04:28 +00:00
|
|
|
(*state)->packets[0], (*state)->packets[1],
|
2007-07-03 12:16:07 +00:00
|
|
|
#endif
|
2011-06-28 11:57:25 +00:00
|
|
|
pd->dir == PF_IN ? "in" : "out",
|
|
|
|
pd->dir == (*state)->direction ? "fwd" : "rev");
|
2004-02-26 02:04:28 +00:00
|
|
|
printf("pf: State failure on: %c %c %c %c | %c %c\n",
|
|
|
|
SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
|
|
|
|
SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
|
|
|
|
' ': '2',
|
|
|
|
(ackskew >= -MAXACKWINDOW) ? ' ' : '3',
|
|
|
|
(ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
|
|
|
|
SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
|
|
|
|
SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
|
|
|
|
}
|
2005-05-03 16:43:32 +00:00
|
|
|
REASON_SET(reason, PFRES_BADSTATE);
|
2004-02-26 02:04:28 +00:00
|
|
|
return (PF_DROP);
|
|
|
|
}
|
|
|
|
|
2009-12-24 00:43:44 +00:00
|
|
|
return (PF_PASS);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
pf_tcp_track_sloppy(struct pf_state_peer *src, struct pf_state_peer *dst,
|
|
|
|
struct pf_state **state, struct pf_pdesc *pd, u_short *reason)
|
|
|
|
{
|
|
|
|
struct tcphdr *th = pd->hdr.tcp;
|
|
|
|
|
|
|
|
if (th->th_flags & TH_SYN)
|
|
|
|
if (src->state < TCPS_SYN_SENT)
|
|
|
|
src->state = TCPS_SYN_SENT;
|
|
|
|
if (th->th_flags & TH_FIN)
|
|
|
|
if (src->state < TCPS_CLOSING)
|
|
|
|
src->state = TCPS_CLOSING;
|
|
|
|
if (th->th_flags & TH_ACK) {
|
|
|
|
if (dst->state == TCPS_SYN_SENT) {
|
|
|
|
dst->state = TCPS_ESTABLISHED;
|
|
|
|
if (src->state == TCPS_ESTABLISHED &&
|
|
|
|
(*state)->src_node != NULL &&
|
|
|
|
pf_src_connlimit(state)) {
|
|
|
|
REASON_SET(reason, PFRES_SRCLIMIT);
|
|
|
|
return (PF_DROP);
|
|
|
|
}
|
|
|
|
} else if (dst->state == TCPS_CLOSING) {
|
|
|
|
dst->state = TCPS_FIN_WAIT_2;
|
|
|
|
} else if (src->state == TCPS_SYN_SENT &&
|
|
|
|
dst->state < TCPS_SYN_SENT) {
|
|
|
|
/*
|
|
|
|
* Handle a special sloppy case where we only see one
|
|
|
|
* half of the connection. If there is a ACK after
|
|
|
|
* the initial SYN without ever seeing a packet from
|
|
|
|
* the destination, set the connection to established.
|
|
|
|
*/
|
|
|
|
dst->state = src->state = TCPS_ESTABLISHED;
|
|
|
|
if ((*state)->src_node != NULL &&
|
|
|
|
pf_src_connlimit(state)) {
|
|
|
|
REASON_SET(reason, PFRES_SRCLIMIT);
|
|
|
|
return (PF_DROP);
|
|
|
|
}
|
|
|
|
} else if (src->state == TCPS_CLOSING &&
|
|
|
|
dst->state == TCPS_ESTABLISHED &&
|
|
|
|
dst->seqlo == 0) {
|
|
|
|
/*
|
|
|
|
* Handle the closing of half connections where we
|
|
|
|
* don't see the full bidirectional FIN/ACK+ACK
|
|
|
|
* handshake.
|
|
|
|
*/
|
|
|
|
dst->state = TCPS_CLOSING;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (th->th_flags & TH_RST)
|
|
|
|
src->state = dst->state = TCPS_TIME_WAIT;
|
|
|
|
|
|
|
|
/* update expire time */
|
|
|
|
(*state)->expire = time_second;
|
|
|
|
if (src->state >= TCPS_FIN_WAIT_2 &&
|
|
|
|
dst->state >= TCPS_FIN_WAIT_2)
|
|
|
|
(*state)->timeout = PFTM_TCP_CLOSED;
|
|
|
|
else if (src->state >= TCPS_CLOSING &&
|
|
|
|
dst->state >= TCPS_CLOSING)
|
|
|
|
(*state)->timeout = PFTM_TCP_FIN_WAIT;
|
|
|
|
else if (src->state < TCPS_ESTABLISHED ||
|
|
|
|
dst->state < TCPS_ESTABLISHED)
|
|
|
|
(*state)->timeout = PFTM_TCP_OPENING;
|
|
|
|
else if (src->state >= TCPS_CLOSING ||
|
|
|
|
dst->state >= TCPS_CLOSING)
|
|
|
|
(*state)->timeout = PFTM_TCP_CLOSING;
|
|
|
|
else
|
|
|
|
(*state)->timeout = PFTM_TCP_ESTABLISHED;
|
|
|
|
|
|
|
|
return (PF_PASS);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
|
|
|
|
struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
|
|
|
|
u_short *reason)
|
|
|
|
{
|
2011-06-28 11:57:25 +00:00
|
|
|
struct pf_state_key_cmp key;
|
2009-12-24 00:43:44 +00:00
|
|
|
struct tcphdr *th = pd->hdr.tcp;
|
|
|
|
int copyback = 0;
|
|
|
|
struct pf_state_peer *src, *dst;
|
2011-06-28 11:57:25 +00:00
|
|
|
struct pf_state_key *sk;
|
2009-12-24 00:43:44 +00:00
|
|
|
|
|
|
|
key.af = pd->af;
|
|
|
|
key.proto = IPPROTO_TCP;
|
2011-06-28 11:57:25 +00:00
|
|
|
if (direction == PF_IN) { /* wire side, straight */
|
|
|
|
PF_ACPY(&key.addr[0], pd->src, key.af);
|
|
|
|
PF_ACPY(&key.addr[1], pd->dst, key.af);
|
|
|
|
key.port[0] = th->th_sport;
|
|
|
|
key.port[1] = th->th_dport;
|
|
|
|
} else { /* stack side, reverse */
|
|
|
|
PF_ACPY(&key.addr[1], pd->src, key.af);
|
|
|
|
PF_ACPY(&key.addr[0], pd->dst, key.af);
|
|
|
|
key.port[1] = th->th_sport;
|
|
|
|
key.port[0] = th->th_dport;
|
2009-12-24 00:43:44 +00:00
|
|
|
}
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
|
|
|
|
#else
|
|
|
|
STATE_LOOKUP(kif, &key, direction, *state, m);
|
|
|
|
#endif
|
2009-12-24 00:43:44 +00:00
|
|
|
|
|
|
|
if (direction == (*state)->direction) {
|
|
|
|
src = &(*state)->src;
|
|
|
|
dst = &(*state)->dst;
|
|
|
|
} else {
|
|
|
|
src = &(*state)->dst;
|
|
|
|
dst = &(*state)->src;
|
|
|
|
}
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
sk = (*state)->key[pd->didx];
|
|
|
|
|
2009-12-24 00:43:44 +00:00
|
|
|
if ((*state)->src.state == PF_TCPS_PROXY_SRC) {
|
|
|
|
if (direction != (*state)->direction) {
|
|
|
|
REASON_SET(reason, PFRES_SYNPROXY);
|
|
|
|
return (PF_SYNPROXY_DROP);
|
|
|
|
}
|
|
|
|
if (th->th_flags & TH_SYN) {
|
|
|
|
if (ntohl(th->th_seq) != (*state)->src.seqlo) {
|
|
|
|
REASON_SET(reason, PFRES_SYNPROXY);
|
|
|
|
return (PF_DROP);
|
|
|
|
}
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst,
|
|
|
|
#else
|
|
|
|
pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
|
|
|
|
#endif
|
|
|
|
pd->src, th->th_dport, th->th_sport,
|
|
|
|
(*state)->src.seqhi, ntohl(th->th_seq) + 1,
|
|
|
|
TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1,
|
|
|
|
0, NULL, NULL);
|
|
|
|
REASON_SET(reason, PFRES_SYNPROXY);
|
|
|
|
return (PF_SYNPROXY_DROP);
|
|
|
|
} else if (!(th->th_flags & TH_ACK) ||
|
|
|
|
(ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
|
|
|
|
(ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
|
|
|
|
REASON_SET(reason, PFRES_SYNPROXY);
|
|
|
|
return (PF_DROP);
|
|
|
|
} else if ((*state)->src_node != NULL &&
|
|
|
|
pf_src_connlimit(state)) {
|
|
|
|
REASON_SET(reason, PFRES_SRCLIMIT);
|
|
|
|
return (PF_DROP);
|
|
|
|
} else
|
|
|
|
(*state)->src.state = PF_TCPS_PROXY_DST;
|
|
|
|
}
|
|
|
|
if ((*state)->src.state == PF_TCPS_PROXY_DST) {
|
|
|
|
if (direction == (*state)->direction) {
|
|
|
|
if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
|
|
|
|
(ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
|
|
|
|
(ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
|
|
|
|
REASON_SET(reason, PFRES_SYNPROXY);
|
|
|
|
return (PF_DROP);
|
|
|
|
}
|
|
|
|
(*state)->src.max_win = MAX(ntohs(th->th_win), 1);
|
|
|
|
if ((*state)->dst.seqhi == 1)
|
|
|
|
(*state)->dst.seqhi = htonl(arc4random());
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
|
|
|
|
#else
|
2011-06-28 11:57:25 +00:00
|
|
|
pf_send_tcp((*state)->rule.ptr, pd->af,
|
2009-12-24 00:43:44 +00:00
|
|
|
#endif
|
2011-06-28 11:57:25 +00:00
|
|
|
&sk->addr[pd->sidx], &sk->addr[pd->didx],
|
|
|
|
sk->port[pd->sidx], sk->port[pd->didx],
|
2009-12-24 00:43:44 +00:00
|
|
|
(*state)->dst.seqhi, 0, TH_SYN, 0,
|
|
|
|
(*state)->src.mss, 0, 0, (*state)->tag, NULL, NULL);
|
|
|
|
REASON_SET(reason, PFRES_SYNPROXY);
|
|
|
|
return (PF_SYNPROXY_DROP);
|
|
|
|
} else if (((th->th_flags & (TH_SYN|TH_ACK)) !=
|
|
|
|
(TH_SYN|TH_ACK)) ||
|
|
|
|
(ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) {
|
|
|
|
REASON_SET(reason, PFRES_SYNPROXY);
|
|
|
|
return (PF_DROP);
|
|
|
|
} else {
|
|
|
|
(*state)->dst.max_win = MAX(ntohs(th->th_win), 1);
|
|
|
|
(*state)->dst.seqlo = ntohl(th->th_seq);
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst,
|
|
|
|
#else
|
|
|
|
pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
|
|
|
|
#endif
|
|
|
|
pd->src, th->th_dport, th->th_sport,
|
|
|
|
ntohl(th->th_ack), ntohl(th->th_seq) + 1,
|
|
|
|
TH_ACK, (*state)->src.max_win, 0, 0, 0,
|
|
|
|
(*state)->tag, NULL, NULL);
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
|
|
|
|
#else
|
2011-06-28 11:57:25 +00:00
|
|
|
pf_send_tcp((*state)->rule.ptr, pd->af,
|
2009-12-24 00:43:44 +00:00
|
|
|
#endif
|
2011-06-28 11:57:25 +00:00
|
|
|
&sk->addr[pd->sidx], &sk->addr[pd->didx],
|
|
|
|
sk->port[pd->sidx], sk->port[pd->didx],
|
2009-12-24 00:43:44 +00:00
|
|
|
(*state)->src.seqhi + 1, (*state)->src.seqlo + 1,
|
|
|
|
TH_ACK, (*state)->dst.max_win, 0, 0, 1,
|
|
|
|
0, NULL, NULL);
|
|
|
|
(*state)->src.seqdiff = (*state)->dst.seqhi -
|
|
|
|
(*state)->src.seqlo;
|
|
|
|
(*state)->dst.seqdiff = (*state)->src.seqhi -
|
|
|
|
(*state)->dst.seqlo;
|
|
|
|
(*state)->src.seqhi = (*state)->src.seqlo +
|
|
|
|
(*state)->dst.max_win;
|
|
|
|
(*state)->dst.seqhi = (*state)->dst.seqlo +
|
|
|
|
(*state)->src.max_win;
|
|
|
|
(*state)->src.wscale = (*state)->dst.wscale = 0;
|
|
|
|
(*state)->src.state = (*state)->dst.state =
|
|
|
|
TCPS_ESTABLISHED;
|
|
|
|
REASON_SET(reason, PFRES_SYNPROXY);
|
|
|
|
return (PF_SYNPROXY_DROP);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) &&
|
|
|
|
dst->state >= TCPS_FIN_WAIT_2 &&
|
|
|
|
src->state >= TCPS_FIN_WAIT_2) {
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (V_pf_status.debug >= PF_DEBUG_MISC) {
|
|
|
|
#else
|
2009-12-24 00:43:44 +00:00
|
|
|
if (pf_status.debug >= PF_DEBUG_MISC) {
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2009-12-24 00:43:44 +00:00
|
|
|
printf("pf: state reuse ");
|
|
|
|
pf_print_state(*state);
|
|
|
|
pf_print_flags(th->th_flags);
|
|
|
|
printf("\n");
|
|
|
|
}
|
|
|
|
/* XXX make sure it's the same direction ?? */
|
|
|
|
(*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
|
|
|
|
pf_unlink_state(*state);
|
|
|
|
*state = NULL;
|
|
|
|
return (PF_DROP);
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((*state)->state_flags & PFSTATE_SLOPPY) {
|
|
|
|
if (pf_tcp_track_sloppy(src, dst, state, pd, reason) == PF_DROP)
|
|
|
|
return (PF_DROP);
|
|
|
|
} else {
|
|
|
|
if (pf_tcp_track_full(src, dst, state, kif, m, off, pd, reason,
|
|
|
|
©back) == PF_DROP)
|
|
|
|
return (PF_DROP);
|
|
|
|
}
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
/* translate source/destination address, if necessary */
|
2011-06-28 11:57:25 +00:00
|
|
|
if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
|
|
|
|
struct pf_state_key *nk = (*state)->key[pd->didx];
|
|
|
|
|
|
|
|
if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) ||
|
|
|
|
nk->port[pd->sidx] != th->th_sport)
|
2004-02-26 02:04:28 +00:00
|
|
|
pf_change_ap(pd->src, &th->th_sport, pd->ip_sum,
|
2011-06-28 11:57:25 +00:00
|
|
|
&th->th_sum, &nk->addr[pd->sidx],
|
|
|
|
nk->port[pd->sidx], 0, pd->af);
|
|
|
|
|
|
|
|
if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) ||
|
|
|
|
nk->port[pd->didx] != th->th_dport)
|
2004-02-26 02:04:28 +00:00
|
|
|
pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum,
|
2011-06-28 11:57:25 +00:00
|
|
|
&th->th_sum, &nk->addr[pd->didx],
|
|
|
|
nk->port[pd->didx], 0, pd->af);
|
|
|
|
copyback = 1;
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
/* Copyback sequence modulation or stateful scrub changes if needed */
|
|
|
|
if (copyback)
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
m_copyback(m, off, sizeof(*th), (caddr_t)th);
|
|
|
|
#else
|
|
|
|
m_copyback(m, off, sizeof(*th), th);
|
|
|
|
#endif
|
|
|
|
|
2004-02-26 02:04:28 +00:00
|
|
|
return (PF_PASS);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
2004-06-16 23:24:02 +00:00
|
|
|
pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
|
|
|
|
struct mbuf *m, int off, void *h, struct pf_pdesc *pd)
|
2004-02-26 02:04:28 +00:00
|
|
|
{
|
|
|
|
struct pf_state_peer *src, *dst;
|
2011-06-28 11:57:25 +00:00
|
|
|
struct pf_state_key_cmp key;
|
2004-02-26 02:04:28 +00:00
|
|
|
struct udphdr *uh = pd->hdr.udp;
|
|
|
|
|
|
|
|
key.af = pd->af;
|
|
|
|
key.proto = IPPROTO_UDP;
|
2011-06-28 11:57:25 +00:00
|
|
|
if (direction == PF_IN) { /* wire side, straight */
|
|
|
|
PF_ACPY(&key.addr[0], pd->src, key.af);
|
|
|
|
PF_ACPY(&key.addr[1], pd->dst, key.af);
|
|
|
|
key.port[0] = uh->uh_sport;
|
|
|
|
key.port[1] = uh->uh_dport;
|
|
|
|
} else { /* stack side, reverse */
|
|
|
|
PF_ACPY(&key.addr[1], pd->src, key.af);
|
|
|
|
PF_ACPY(&key.addr[0], pd->dst, key.af);
|
|
|
|
key.port[1] = uh->uh_sport;
|
|
|
|
key.port[0] = uh->uh_dport;
|
2004-06-16 23:24:02 +00:00
|
|
|
}
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
|
|
|
|
#else
|
|
|
|
STATE_LOOKUP(kif, &key, direction, *state, m);
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
if (direction == (*state)->direction) {
|
|
|
|
src = &(*state)->src;
|
|
|
|
dst = &(*state)->dst;
|
|
|
|
} else {
|
|
|
|
src = &(*state)->dst;
|
|
|
|
dst = &(*state)->src;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* update states */
|
|
|
|
if (src->state < PFUDPS_SINGLE)
|
|
|
|
src->state = PFUDPS_SINGLE;
|
|
|
|
if (dst->state == PFUDPS_SINGLE)
|
|
|
|
dst->state = PFUDPS_MULTIPLE;
|
|
|
|
|
|
|
|
/* update expire time */
|
2004-02-26 02:34:12 +00:00
|
|
|
(*state)->expire = time_second;
|
2004-02-26 02:04:28 +00:00
|
|
|
if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
|
|
|
|
(*state)->timeout = PFTM_UDP_MULTIPLE;
|
|
|
|
else
|
|
|
|
(*state)->timeout = PFTM_UDP_SINGLE;
|
|
|
|
|
|
|
|
/* translate source/destination address, if necessary */
|
2011-06-28 11:57:25 +00:00
|
|
|
if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
|
|
|
|
struct pf_state_key *nk = (*state)->key[pd->didx];
|
|
|
|
|
|
|
|
if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) ||
|
|
|
|
nk->port[pd->sidx] != uh->uh_sport)
|
2004-02-26 02:04:28 +00:00
|
|
|
pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum,
|
2011-06-28 11:57:25 +00:00
|
|
|
&uh->uh_sum, &nk->addr[pd->sidx],
|
|
|
|
nk->port[pd->sidx], 1, pd->af);
|
|
|
|
|
|
|
|
if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) ||
|
|
|
|
nk->port[pd->didx] != uh->uh_dport)
|
2004-02-26 02:04:28 +00:00
|
|
|
pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum,
|
2011-06-28 11:57:25 +00:00
|
|
|
&uh->uh_sum, &nk->addr[pd->didx],
|
|
|
|
nk->port[pd->didx], 1, pd->af);
|
|
|
|
#ifdef __FreeBSD__
|
2004-02-26 02:34:12 +00:00
|
|
|
m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
|
2011-06-28 11:57:25 +00:00
|
|
|
#else
|
|
|
|
m_copyback(m, off, sizeof(*uh), uh);
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return (PF_PASS);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
2004-06-16 23:24:02 +00:00
|
|
|
pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
|
2005-05-03 16:43:32 +00:00
|
|
|
struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason)
|
2004-02-26 02:04:28 +00:00
|
|
|
{
|
2011-06-28 11:57:25 +00:00
|
|
|
struct pf_addr *saddr = pd->src, *daddr = pd->dst;
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
u_int16_t icmpid = 0, *icmpsum;
|
|
|
|
#else
|
|
|
|
u_int16_t icmpid, *icmpsum;
|
|
|
|
#endif
|
|
|
|
u_int8_t icmptype;
|
2004-06-16 23:24:02 +00:00
|
|
|
int state_icmp = 0;
|
2011-06-28 11:57:25 +00:00
|
|
|
struct pf_state_key_cmp key;
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
switch (pd->proto) {
|
|
|
|
#ifdef INET
|
|
|
|
case IPPROTO_ICMP:
|
|
|
|
icmptype = pd->hdr.icmp->icmp_type;
|
|
|
|
icmpid = pd->hdr.icmp->icmp_id;
|
|
|
|
icmpsum = &pd->hdr.icmp->icmp_cksum;
|
|
|
|
|
|
|
|
if (icmptype == ICMP_UNREACH ||
|
|
|
|
icmptype == ICMP_SOURCEQUENCH ||
|
|
|
|
icmptype == ICMP_REDIRECT ||
|
|
|
|
icmptype == ICMP_TIMXCEED ||
|
|
|
|
icmptype == ICMP_PARAMPROB)
|
|
|
|
state_icmp++;
|
|
|
|
break;
|
|
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
|
|
|
case IPPROTO_ICMPV6:
|
|
|
|
icmptype = pd->hdr.icmp6->icmp6_type;
|
|
|
|
icmpid = pd->hdr.icmp6->icmp6_id;
|
|
|
|
icmpsum = &pd->hdr.icmp6->icmp6_cksum;
|
|
|
|
|
|
|
|
if (icmptype == ICMP6_DST_UNREACH ||
|
|
|
|
icmptype == ICMP6_PACKET_TOO_BIG ||
|
|
|
|
icmptype == ICMP6_TIME_EXCEEDED ||
|
|
|
|
icmptype == ICMP6_PARAM_PROB)
|
|
|
|
state_icmp++;
|
|
|
|
break;
|
|
|
|
#endif /* INET6 */
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!state_icmp) {
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ICMP query/reply message not related to a TCP/UDP packet.
|
|
|
|
* Search for an ICMP state.
|
|
|
|
*/
|
|
|
|
key.af = pd->af;
|
|
|
|
key.proto = pd->proto;
|
2011-06-28 11:57:25 +00:00
|
|
|
key.port[0] = key.port[1] = icmpid;
|
|
|
|
if (direction == PF_IN) { /* wire side, straight */
|
|
|
|
PF_ACPY(&key.addr[0], pd->src, key.af);
|
|
|
|
PF_ACPY(&key.addr[1], pd->dst, key.af);
|
|
|
|
} else { /* stack side, reverse */
|
|
|
|
PF_ACPY(&key.addr[1], pd->src, key.af);
|
|
|
|
PF_ACPY(&key.addr[0], pd->dst, key.af);
|
2004-06-16 23:24:02 +00:00
|
|
|
}
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
|
|
|
|
#else
|
|
|
|
STATE_LOOKUP(kif, &key, direction, *state, m);
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2004-02-26 02:34:12 +00:00
|
|
|
(*state)->expire = time_second;
|
2004-02-26 02:04:28 +00:00
|
|
|
(*state)->timeout = PFTM_ICMP_ERROR_REPLY;
|
|
|
|
|
|
|
|
/* translate source/destination address, if necessary */
|
2011-06-28 11:57:25 +00:00
|
|
|
if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
|
|
|
|
struct pf_state_key *nk = (*state)->key[pd->didx];
|
|
|
|
|
|
|
|
switch (pd->af) {
|
2004-02-26 02:04:28 +00:00
|
|
|
#ifdef INET
|
2011-06-28 11:57:25 +00:00
|
|
|
case AF_INET:
|
|
|
|
if (PF_ANEQ(pd->src,
|
|
|
|
&nk->addr[pd->sidx], AF_INET))
|
|
|
|
pf_change_a(&saddr->v4.s_addr,
|
|
|
|
pd->ip_sum,
|
|
|
|
nk->addr[pd->sidx].v4.s_addr, 0);
|
|
|
|
|
|
|
|
if (PF_ANEQ(pd->dst, &nk->addr[pd->didx],
|
|
|
|
AF_INET))
|
2004-02-26 02:04:28 +00:00
|
|
|
pf_change_a(&daddr->v4.s_addr,
|
|
|
|
pd->ip_sum,
|
2011-06-28 11:57:25 +00:00
|
|
|
nk->addr[pd->didx].v4.s_addr, 0);
|
|
|
|
|
|
|
|
if (nk->port[0] !=
|
|
|
|
pd->hdr.icmp->icmp_id) {
|
Commit imported changes to HEAD:
pf_ioctl.c Revision 1.153 Sun Aug 7 11:37:33 2005 UTC by dhartmei
| verify ticket in DIOCADDADDR, from Boris Polevoy, ok deraadt@
pf_ioctl.c Revision 1.158 Mon Sep 5 14:51:08 2005 UTC by dhartmei
| in DIOCCHANGERULE, properly initialize table, if used in NAT rule.
| from Boris Polevoy <vapcom at mail dot ru>, ok mcbride@
pf.c Revision 1.502 Mon Aug 22 11:54:25 2005 UTC by dhartmei
| when nat'ing icmp 'connections', replace icmp id with proxy values
| (similar to proxy ports for tcp/udp). not all clients use
| per-invokation random ids, this allows multiple concurrent
| connections from such clients.
| thanks for testing to Rod Whitworth, "looks ok" markus@
pf.c Revision 1.501 Mon Aug 22 09:48:05 2005 UTC by dhartmei
| fix rdr to bitmask replacement address pool. patch from Max Laier,
| reported by Boris Polevoy, tested by Jean Debogue, ok henning@
Obtained from: OpenBSD
MFC after: 3 days
2005-09-08 15:06:52 +00:00
|
|
|
pd->hdr.icmp->icmp_cksum =
|
|
|
|
pf_cksum_fixup(
|
|
|
|
pd->hdr.icmp->icmp_cksum, icmpid,
|
2011-06-28 11:57:25 +00:00
|
|
|
nk->port[pd->sidx], 0);
|
Commit imported changes to HEAD:
pf_ioctl.c Revision 1.153 Sun Aug 7 11:37:33 2005 UTC by dhartmei
| verify ticket in DIOCADDADDR, from Boris Polevoy, ok deraadt@
pf_ioctl.c Revision 1.158 Mon Sep 5 14:51:08 2005 UTC by dhartmei
| in DIOCCHANGERULE, properly initialize table, if used in NAT rule.
| from Boris Polevoy <vapcom at mail dot ru>, ok mcbride@
pf.c Revision 1.502 Mon Aug 22 11:54:25 2005 UTC by dhartmei
| when nat'ing icmp 'connections', replace icmp id with proxy values
| (similar to proxy ports for tcp/udp). not all clients use
| per-invokation random ids, this allows multiple concurrent
| connections from such clients.
| thanks for testing to Rod Whitworth, "looks ok" markus@
pf.c Revision 1.501 Mon Aug 22 09:48:05 2005 UTC by dhartmei
| fix rdr to bitmask replacement address pool. patch from Max Laier,
| reported by Boris Polevoy, tested by Jean Debogue, ok henning@
Obtained from: OpenBSD
MFC after: 3 days
2005-09-08 15:06:52 +00:00
|
|
|
pd->hdr.icmp->icmp_id =
|
2011-06-28 11:57:25 +00:00
|
|
|
nk->port[pd->sidx];
|
|
|
|
}
|
|
|
|
|
|
|
|
m_copyback(m, off, ICMP_MINLEN,
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
(caddr_t)
|
|
|
|
#endif
|
|
|
|
pd->hdr.icmp);
|
|
|
|
break;
|
2004-02-26 02:04:28 +00:00
|
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
2011-06-28 11:57:25 +00:00
|
|
|
case AF_INET6:
|
|
|
|
if (PF_ANEQ(pd->src,
|
|
|
|
&nk->addr[pd->sidx], AF_INET6))
|
|
|
|
pf_change_a6(saddr,
|
|
|
|
&pd->hdr.icmp6->icmp6_cksum,
|
|
|
|
&nk->addr[pd->sidx], 0);
|
|
|
|
|
|
|
|
if (PF_ANEQ(pd->dst,
|
|
|
|
&nk->addr[pd->didx], AF_INET6))
|
2004-02-26 02:04:28 +00:00
|
|
|
pf_change_a6(daddr,
|
|
|
|
&pd->hdr.icmp6->icmp6_cksum,
|
2011-06-28 11:57:25 +00:00
|
|
|
&nk->addr[pd->didx], 0);
|
|
|
|
|
|
|
|
m_copyback(m, off,
|
|
|
|
sizeof(struct icmp6_hdr),
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
(caddr_t)
|
|
|
|
#endif
|
|
|
|
pd->hdr.icmp6);
|
|
|
|
break;
|
2004-02-26 02:04:28 +00:00
|
|
|
#endif /* INET6 */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return (PF_PASS);
|
|
|
|
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* ICMP error message in response to a TCP/UDP packet.
|
|
|
|
* Extract the inner TCP/UDP header and search for that state.
|
|
|
|
*/
|
|
|
|
|
|
|
|
struct pf_pdesc pd2;
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
bzero(&pd2, sizeof pd2);
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
#ifdef INET
|
|
|
|
struct ip h2;
|
|
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
|
|
|
struct ip6_hdr h2_6;
|
|
|
|
int terminal = 0;
|
|
|
|
#endif /* INET6 */
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
int ipoff2 = 0;
|
|
|
|
int off2 = 0;
|
|
|
|
#else
|
|
|
|
int ipoff2;
|
|
|
|
int off2;
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
pd2.af = pd->af;
|
2011-06-28 11:57:25 +00:00
|
|
|
/* Payload packet is from the opposite direction. */
|
|
|
|
pd2.sidx = (direction == PF_IN) ? 1 : 0;
|
|
|
|
pd2.didx = (direction == PF_IN) ? 0 : 1;
|
2004-02-26 02:04:28 +00:00
|
|
|
switch (pd->af) {
|
|
|
|
#ifdef INET
|
|
|
|
case AF_INET:
|
|
|
|
/* offset of h2 in mbuf chain */
|
|
|
|
ipoff2 = off + ICMP_MINLEN;
|
|
|
|
|
|
|
|
if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2),
|
2005-05-03 16:43:32 +00:00
|
|
|
NULL, reason, pd2.af)) {
|
2004-02-26 02:04:28 +00:00
|
|
|
DPFPRINTF(PF_DEBUG_MISC,
|
|
|
|
("pf: ICMP error message too short "
|
|
|
|
"(ip)\n"));
|
|
|
|
return (PF_DROP);
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* ICMP error messages don't refer to non-first
|
|
|
|
* fragments
|
|
|
|
*/
|
2005-05-03 16:43:32 +00:00
|
|
|
if (h2.ip_off & htons(IP_OFFMASK)) {
|
|
|
|
REASON_SET(reason, PFRES_FRAG);
|
2004-02-26 02:04:28 +00:00
|
|
|
return (PF_DROP);
|
2005-05-03 16:43:32 +00:00
|
|
|
}
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
/* offset of protocol header that follows h2 */
|
|
|
|
off2 = ipoff2 + (h2.ip_hl << 2);
|
|
|
|
|
|
|
|
pd2.proto = h2.ip_p;
|
|
|
|
pd2.src = (struct pf_addr *)&h2.ip_src;
|
|
|
|
pd2.dst = (struct pf_addr *)&h2.ip_dst;
|
|
|
|
pd2.ip_sum = &h2.ip_sum;
|
|
|
|
break;
|
|
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
|
|
|
ipoff2 = off + sizeof(struct icmp6_hdr);
|
|
|
|
|
|
|
|
if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6),
|
2005-05-03 16:43:32 +00:00
|
|
|
NULL, reason, pd2.af)) {
|
2004-02-26 02:04:28 +00:00
|
|
|
DPFPRINTF(PF_DEBUG_MISC,
|
|
|
|
("pf: ICMP error message too short "
|
|
|
|
"(ip6)\n"));
|
|
|
|
return (PF_DROP);
|
|
|
|
}
|
|
|
|
pd2.proto = h2_6.ip6_nxt;
|
|
|
|
pd2.src = (struct pf_addr *)&h2_6.ip6_src;
|
|
|
|
pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
|
|
|
|
pd2.ip_sum = NULL;
|
|
|
|
off2 = ipoff2 + sizeof(h2_6);
|
|
|
|
do {
|
|
|
|
switch (pd2.proto) {
|
|
|
|
case IPPROTO_FRAGMENT:
|
|
|
|
/*
|
|
|
|
* ICMPv6 error messages for
|
|
|
|
* non-first fragments
|
|
|
|
*/
|
2005-05-03 16:43:32 +00:00
|
|
|
REASON_SET(reason, PFRES_FRAG);
|
2004-02-26 02:04:28 +00:00
|
|
|
return (PF_DROP);
|
|
|
|
case IPPROTO_AH:
|
|
|
|
case IPPROTO_HOPOPTS:
|
|
|
|
case IPPROTO_ROUTING:
|
|
|
|
case IPPROTO_DSTOPTS: {
|
|
|
|
/* get next header and header length */
|
|
|
|
struct ip6_ext opt6;
|
|
|
|
|
|
|
|
if (!pf_pull_hdr(m, off2, &opt6,
|
2005-05-03 16:43:32 +00:00
|
|
|
sizeof(opt6), NULL, reason,
|
|
|
|
pd2.af)) {
|
2004-02-26 02:04:28 +00:00
|
|
|
DPFPRINTF(PF_DEBUG_MISC,
|
|
|
|
("pf: ICMPv6 short opt\n"));
|
|
|
|
return (PF_DROP);
|
|
|
|
}
|
|
|
|
if (pd2.proto == IPPROTO_AH)
|
|
|
|
off2 += (opt6.ip6e_len + 2) * 4;
|
|
|
|
else
|
|
|
|
off2 += (opt6.ip6e_len + 1) * 8;
|
|
|
|
pd2.proto = opt6.ip6e_nxt;
|
|
|
|
/* goto the next header */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
terminal++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
} while (!terminal);
|
|
|
|
break;
|
|
|
|
#endif /* INET6 */
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (pd2.proto) {
|
|
|
|
case IPPROTO_TCP: {
|
|
|
|
struct tcphdr th;
|
|
|
|
u_int32_t seq;
|
|
|
|
struct pf_state_peer *src, *dst;
|
|
|
|
u_int8_t dws;
|
2004-04-11 17:35:40 +00:00
|
|
|
int copyback = 0;
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Only the first 8 bytes of the TCP header can be
|
|
|
|
* expected. Don't access any TCP header fields after
|
|
|
|
* th_seq, an ackskew test is not possible.
|
|
|
|
*/
|
2005-05-03 16:43:32 +00:00
|
|
|
if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason,
|
|
|
|
pd2.af)) {
|
2004-02-26 02:04:28 +00:00
|
|
|
DPFPRINTF(PF_DEBUG_MISC,
|
|
|
|
("pf: ICMP error message too short "
|
|
|
|
"(tcp)\n"));
|
|
|
|
return (PF_DROP);
|
|
|
|
}
|
|
|
|
|
|
|
|
key.af = pd2.af;
|
|
|
|
key.proto = IPPROTO_TCP;
|
2011-06-28 11:57:25 +00:00
|
|
|
PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
|
|
|
|
PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
|
|
|
|
key.port[pd2.sidx] = th.th_sport;
|
|
|
|
key.port[pd2.didx] = th.th_dport;
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
|
|
|
|
#else
|
|
|
|
STATE_LOOKUP(kif, &key, direction, *state, m);
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
if (direction == (*state)->direction) {
|
|
|
|
src = &(*state)->dst;
|
|
|
|
dst = &(*state)->src;
|
|
|
|
} else {
|
|
|
|
src = &(*state)->src;
|
|
|
|
dst = &(*state)->dst;
|
|
|
|
}
|
|
|
|
|
2007-08-23 09:30:58 +00:00
|
|
|
if (src->wscale && dst->wscale)
|
2004-02-26 02:04:28 +00:00
|
|
|
dws = dst->wscale & PF_WSCALE_MASK;
|
|
|
|
else
|
|
|
|
dws = 0;
|
|
|
|
|
|
|
|
/* Demodulate sequence number */
|
|
|
|
seq = ntohl(th.th_seq) - src->seqdiff;
|
2004-04-11 17:35:40 +00:00
|
|
|
if (src->seqdiff) {
|
|
|
|
pf_change_a(&th.th_seq, icmpsum,
|
2004-02-26 02:04:28 +00:00
|
|
|
htonl(seq), 0);
|
2004-04-11 17:35:40 +00:00
|
|
|
copyback = 1;
|
|
|
|
}
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2009-12-24 00:43:44 +00:00
|
|
|
if (!((*state)->state_flags & PFSTATE_SLOPPY) &&
|
|
|
|
(!SEQ_GEQ(src->seqhi, seq) ||
|
|
|
|
!SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) {
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (V_pf_status.debug >= PF_DEBUG_MISC) {
|
|
|
|
#else
|
2004-02-26 02:04:28 +00:00
|
|
|
if (pf_status.debug >= PF_DEBUG_MISC) {
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
printf("pf: BAD ICMP %d:%d ",
|
|
|
|
icmptype, pd->hdr.icmp->icmp_code);
|
|
|
|
pf_print_host(pd->src, 0, pd->af);
|
|
|
|
printf(" -> ");
|
|
|
|
pf_print_host(pd->dst, 0, pd->af);
|
|
|
|
printf(" state: ");
|
|
|
|
pf_print_state(*state);
|
|
|
|
printf(" seq=%u\n", seq);
|
|
|
|
}
|
2005-05-03 16:43:32 +00:00
|
|
|
REASON_SET(reason, PFRES_BADSTATE);
|
2004-02-26 02:04:28 +00:00
|
|
|
return (PF_DROP);
|
2011-06-28 11:57:25 +00:00
|
|
|
} else {
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (V_pf_status.debug >= PF_DEBUG_MISC) {
|
|
|
|
#else
|
|
|
|
if (pf_status.debug >= PF_DEBUG_MISC) {
|
|
|
|
#endif
|
|
|
|
printf("pf: OK ICMP %d:%d ",
|
|
|
|
icmptype, pd->hdr.icmp->icmp_code);
|
|
|
|
pf_print_host(pd->src, 0, pd->af);
|
|
|
|
printf(" -> ");
|
|
|
|
pf_print_host(pd->dst, 0, pd->af);
|
|
|
|
printf(" state: ");
|
|
|
|
pf_print_state(*state);
|
|
|
|
printf(" seq=%u\n", seq);
|
|
|
|
}
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
/* translate source/destination address, if necessary */
|
|
|
|
if ((*state)->key[PF_SK_WIRE] !=
|
|
|
|
(*state)->key[PF_SK_STACK]) {
|
|
|
|
struct pf_state_key *nk =
|
|
|
|
(*state)->key[pd->didx];
|
|
|
|
|
|
|
|
if (PF_ANEQ(pd2.src,
|
|
|
|
&nk->addr[pd2.sidx], pd2.af) ||
|
|
|
|
nk->port[pd2.sidx] != th.th_sport)
|
2004-02-26 02:04:28 +00:00
|
|
|
pf_change_icmp(pd2.src, &th.th_sport,
|
2011-06-28 11:57:25 +00:00
|
|
|
daddr, &nk->addr[pd2.sidx],
|
|
|
|
nk->port[pd2.sidx], NULL,
|
2004-02-26 02:04:28 +00:00
|
|
|
pd2.ip_sum, icmpsum,
|
|
|
|
pd->ip_sum, 0, pd2.af);
|
2011-06-28 11:57:25 +00:00
|
|
|
|
|
|
|
if (PF_ANEQ(pd2.dst,
|
|
|
|
&nk->addr[pd2.didx], pd2.af) ||
|
|
|
|
nk->port[pd2.didx] != th.th_dport)
|
2004-02-26 02:04:28 +00:00
|
|
|
pf_change_icmp(pd2.dst, &th.th_dport,
|
2011-06-28 11:57:25 +00:00
|
|
|
NULL, /* XXX Inbound NAT? */
|
|
|
|
&nk->addr[pd2.didx],
|
|
|
|
nk->port[pd2.didx], NULL,
|
2004-02-26 02:04:28 +00:00
|
|
|
pd2.ip_sum, icmpsum,
|
|
|
|
pd->ip_sum, 0, pd2.af);
|
2004-04-11 17:35:40 +00:00
|
|
|
copyback = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (copyback) {
|
2004-02-26 02:04:28 +00:00
|
|
|
switch (pd2.af) {
|
|
|
|
#ifdef INET
|
|
|
|
case AF_INET:
|
|
|
|
m_copyback(m, off, ICMP_MINLEN,
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
(caddr_t)
|
|
|
|
#endif
|
|
|
|
pd->hdr.icmp);
|
2004-02-26 02:04:28 +00:00
|
|
|
m_copyback(m, ipoff2, sizeof(h2),
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
(caddr_t)
|
|
|
|
#endif
|
|
|
|
&h2);
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
|
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
|
|
|
m_copyback(m, off,
|
|
|
|
sizeof(struct icmp6_hdr),
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
(caddr_t)
|
|
|
|
#endif
|
|
|
|
pd->hdr.icmp6);
|
2004-02-26 02:04:28 +00:00
|
|
|
m_copyback(m, ipoff2, sizeof(h2_6),
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
(caddr_t)
|
|
|
|
#endif
|
|
|
|
&h2_6);
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
|
|
|
#endif /* INET6 */
|
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
2004-02-26 02:34:12 +00:00
|
|
|
m_copyback(m, off2, 8, (caddr_t)&th);
|
2011-06-28 11:57:25 +00:00
|
|
|
#else
|
|
|
|
m_copyback(m, off2, 8, &th);
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return (PF_PASS);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IPPROTO_UDP: {
|
|
|
|
struct udphdr uh;
|
|
|
|
|
|
|
|
if (!pf_pull_hdr(m, off2, &uh, sizeof(uh),
|
2005-05-03 16:43:32 +00:00
|
|
|
NULL, reason, pd2.af)) {
|
2004-02-26 02:04:28 +00:00
|
|
|
DPFPRINTF(PF_DEBUG_MISC,
|
|
|
|
("pf: ICMP error message too short "
|
|
|
|
"(udp)\n"));
|
|
|
|
return (PF_DROP);
|
|
|
|
}
|
|
|
|
|
|
|
|
key.af = pd2.af;
|
|
|
|
key.proto = IPPROTO_UDP;
|
2011-06-28 11:57:25 +00:00
|
|
|
PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
|
|
|
|
PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
|
|
|
|
key.port[pd2.sidx] = uh.uh_sport;
|
|
|
|
key.port[pd2.didx] = uh.uh_dport;
|
|
|
|
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
|
|
|
|
#else
|
|
|
|
STATE_LOOKUP(kif, &key, direction, *state, m);
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
/* translate source/destination address, if necessary */
|
|
|
|
if ((*state)->key[PF_SK_WIRE] !=
|
|
|
|
(*state)->key[PF_SK_STACK]) {
|
|
|
|
struct pf_state_key *nk =
|
|
|
|
(*state)->key[pd->didx];
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
if (PF_ANEQ(pd2.src,
|
|
|
|
&nk->addr[pd2.sidx], pd2.af) ||
|
|
|
|
nk->port[pd2.sidx] != uh.uh_sport)
|
2004-02-26 02:04:28 +00:00
|
|
|
pf_change_icmp(pd2.src, &uh.uh_sport,
|
2011-06-28 11:57:25 +00:00
|
|
|
daddr, &nk->addr[pd2.sidx],
|
|
|
|
nk->port[pd2.sidx], &uh.uh_sum,
|
2004-02-26 02:04:28 +00:00
|
|
|
pd2.ip_sum, icmpsum,
|
|
|
|
pd->ip_sum, 1, pd2.af);
|
2011-06-28 11:57:25 +00:00
|
|
|
|
|
|
|
if (PF_ANEQ(pd2.dst,
|
|
|
|
&nk->addr[pd2.didx], pd2.af) ||
|
|
|
|
nk->port[pd2.didx] != uh.uh_dport)
|
2004-02-26 02:04:28 +00:00
|
|
|
pf_change_icmp(pd2.dst, &uh.uh_dport,
|
2011-06-28 11:57:25 +00:00
|
|
|
NULL, /* XXX Inbound NAT? */
|
|
|
|
&nk->addr[pd2.didx],
|
|
|
|
nk->port[pd2.didx], &uh.uh_sum,
|
2004-02-26 02:04:28 +00:00
|
|
|
pd2.ip_sum, icmpsum,
|
|
|
|
pd->ip_sum, 1, pd2.af);
|
2011-06-28 11:57:25 +00:00
|
|
|
|
2004-02-26 02:04:28 +00:00
|
|
|
switch (pd2.af) {
|
|
|
|
#ifdef INET
|
|
|
|
case AF_INET:
|
|
|
|
m_copyback(m, off, ICMP_MINLEN,
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
(caddr_t)
|
|
|
|
#endif
|
|
|
|
pd->hdr.icmp);
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
|
|
|
|
#else
|
|
|
|
m_copyback(m, ipoff2, sizeof(h2), &h2);
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
|
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
|
|
|
m_copyback(m, off,
|
|
|
|
sizeof(struct icmp6_hdr),
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
(caddr_t)
|
|
|
|
#endif
|
|
|
|
pd->hdr.icmp6);
|
2004-02-26 02:04:28 +00:00
|
|
|
m_copyback(m, ipoff2, sizeof(h2_6),
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
(caddr_t)
|
|
|
|
#endif
|
|
|
|
&h2_6);
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
|
|
|
#endif /* INET6 */
|
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
m_copyback(m, off2, sizeof(uh), (caddr_t)&uh);
|
|
|
|
#else
|
|
|
|
m_copyback(m, off2, sizeof(uh), &uh);
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
|
|
|
return (PF_PASS);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
#ifdef INET
|
|
|
|
case IPPROTO_ICMP: {
|
|
|
|
struct icmp iih;
|
|
|
|
|
|
|
|
if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN,
|
2005-05-03 16:43:32 +00:00
|
|
|
NULL, reason, pd2.af)) {
|
2004-02-26 02:04:28 +00:00
|
|
|
DPFPRINTF(PF_DEBUG_MISC,
|
|
|
|
("pf: ICMP error message too short i"
|
|
|
|
"(icmp)\n"));
|
|
|
|
return (PF_DROP);
|
|
|
|
}
|
|
|
|
|
|
|
|
key.af = pd2.af;
|
|
|
|
key.proto = IPPROTO_ICMP;
|
2011-06-28 11:57:25 +00:00
|
|
|
PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
|
|
|
|
PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
|
|
|
|
key.port[0] = key.port[1] = iih.icmp_id;
|
|
|
|
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
|
|
|
|
#else
|
|
|
|
STATE_LOOKUP(kif, &key, direction, *state, m);
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
/* translate source/destination address, if necessary */
|
|
|
|
if ((*state)->key[PF_SK_WIRE] !=
|
|
|
|
(*state)->key[PF_SK_STACK]) {
|
|
|
|
struct pf_state_key *nk =
|
|
|
|
(*state)->key[pd->didx];
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
if (PF_ANEQ(pd2.src,
|
|
|
|
&nk->addr[pd2.sidx], pd2.af) ||
|
|
|
|
nk->port[pd2.sidx] != iih.icmp_id)
|
2004-02-26 02:04:28 +00:00
|
|
|
pf_change_icmp(pd2.src, &iih.icmp_id,
|
2011-06-28 11:57:25 +00:00
|
|
|
daddr, &nk->addr[pd2.sidx],
|
|
|
|
nk->port[pd2.sidx], NULL,
|
2004-02-26 02:04:28 +00:00
|
|
|
pd2.ip_sum, icmpsum,
|
|
|
|
pd->ip_sum, 0, AF_INET);
|
2011-06-28 11:57:25 +00:00
|
|
|
|
|
|
|
if (PF_ANEQ(pd2.dst,
|
|
|
|
&nk->addr[pd2.didx], pd2.af) ||
|
|
|
|
nk->port[pd2.didx] != iih.icmp_id)
|
2004-02-26 02:04:28 +00:00
|
|
|
pf_change_icmp(pd2.dst, &iih.icmp_id,
|
2011-06-28 11:57:25 +00:00
|
|
|
NULL, /* XXX Inbound NAT? */
|
|
|
|
&nk->addr[pd2.didx],
|
|
|
|
nk->port[pd2.didx], NULL,
|
2004-02-26 02:04:28 +00:00
|
|
|
pd2.ip_sum, icmpsum,
|
|
|
|
pd->ip_sum, 0, AF_INET);
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp);
|
|
|
|
m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
|
|
|
|
m_copyback(m, off2, ICMP_MINLEN, (caddr_t)&iih);
|
|
|
|
#else
|
|
|
|
m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp);
|
|
|
|
m_copyback(m, ipoff2, sizeof(h2), &h2);
|
|
|
|
m_copyback(m, off2, ICMP_MINLEN, &iih);
|
|
|
|
#endif
|
|
|
|
}
|
2004-02-26 02:04:28 +00:00
|
|
|
return (PF_PASS);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
|
|
|
case IPPROTO_ICMPV6: {
|
|
|
|
struct icmp6_hdr iih;
|
|
|
|
|
|
|
|
if (!pf_pull_hdr(m, off2, &iih,
|
2005-05-03 16:43:32 +00:00
|
|
|
sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) {
|
2004-02-26 02:04:28 +00:00
|
|
|
DPFPRINTF(PF_DEBUG_MISC,
|
|
|
|
("pf: ICMP error message too short "
|
|
|
|
"(icmp6)\n"));
|
|
|
|
return (PF_DROP);
|
|
|
|
}
|
|
|
|
|
|
|
|
key.af = pd2.af;
|
|
|
|
key.proto = IPPROTO_ICMPV6;
|
2011-06-28 11:57:25 +00:00
|
|
|
PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
|
|
|
|
PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
|
|
|
|
key.port[0] = key.port[1] = iih.icmp6_id;
|
|
|
|
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
|
|
|
|
#else
|
|
|
|
STATE_LOOKUP(kif, &key, direction, *state, m);
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
/* translate source/destination address, if necessary */
|
|
|
|
if ((*state)->key[PF_SK_WIRE] !=
|
|
|
|
(*state)->key[PF_SK_STACK]) {
|
|
|
|
struct pf_state_key *nk =
|
|
|
|
(*state)->key[pd->didx];
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
if (PF_ANEQ(pd2.src,
|
|
|
|
&nk->addr[pd2.sidx], pd2.af) ||
|
|
|
|
nk->port[pd2.sidx] != iih.icmp6_id)
|
2004-02-26 02:04:28 +00:00
|
|
|
pf_change_icmp(pd2.src, &iih.icmp6_id,
|
2011-06-28 11:57:25 +00:00
|
|
|
daddr, &nk->addr[pd2.sidx],
|
|
|
|
nk->port[pd2.sidx], NULL,
|
2004-02-26 02:04:28 +00:00
|
|
|
pd2.ip_sum, icmpsum,
|
|
|
|
pd->ip_sum, 0, AF_INET6);
|
2011-06-28 11:57:25 +00:00
|
|
|
|
|
|
|
if (PF_ANEQ(pd2.dst,
|
|
|
|
&nk->addr[pd2.didx], pd2.af) ||
|
|
|
|
nk->port[pd2.didx] != iih.icmp6_id)
|
2004-02-26 02:04:28 +00:00
|
|
|
pf_change_icmp(pd2.dst, &iih.icmp6_id,
|
2011-06-28 11:57:25 +00:00
|
|
|
NULL, /* XXX Inbound NAT? */
|
|
|
|
&nk->addr[pd2.didx],
|
|
|
|
nk->port[pd2.didx], NULL,
|
2004-02-26 02:04:28 +00:00
|
|
|
pd2.ip_sum, icmpsum,
|
|
|
|
pd->ip_sum, 0, AF_INET6);
|
2011-06-28 11:57:25 +00:00
|
|
|
|
|
|
|
#ifdef __FreeBSD__
|
2004-02-26 02:04:28 +00:00
|
|
|
m_copyback(m, off, sizeof(struct icmp6_hdr),
|
2004-02-26 02:34:12 +00:00
|
|
|
(caddr_t)pd->hdr.icmp6);
|
2011-06-28 11:57:25 +00:00
|
|
|
m_copyback(m, ipoff2, sizeof(h2_6), (caddr_t)&h2_6);
|
2004-02-26 02:04:28 +00:00
|
|
|
m_copyback(m, off2, sizeof(struct icmp6_hdr),
|
2004-02-26 02:34:12 +00:00
|
|
|
(caddr_t)&iih);
|
2011-06-28 11:57:25 +00:00
|
|
|
#else
|
|
|
|
m_copyback(m, off, sizeof(struct icmp6_hdr),
|
|
|
|
pd->hdr.icmp6);
|
|
|
|
m_copyback(m, ipoff2, sizeof(h2_6), &h2_6);
|
|
|
|
m_copyback(m, off2, sizeof(struct icmp6_hdr),
|
|
|
|
&iih);
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
|
|
|
return (PF_PASS);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
#endif /* INET6 */
|
|
|
|
default: {
|
|
|
|
key.af = pd2.af;
|
|
|
|
key.proto = pd2.proto;
|
2011-06-28 11:57:25 +00:00
|
|
|
PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
|
|
|
|
PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
|
|
|
|
key.port[0] = key.port[1] = 0;
|
|
|
|
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
|
|
|
|
#else
|
|
|
|
STATE_LOOKUP(kif, &key, direction, *state, m);
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
/* translate source/destination address, if necessary */
|
|
|
|
if ((*state)->key[PF_SK_WIRE] !=
|
|
|
|
(*state)->key[PF_SK_STACK]) {
|
|
|
|
struct pf_state_key *nk =
|
|
|
|
(*state)->key[pd->didx];
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
if (PF_ANEQ(pd2.src,
|
|
|
|
&nk->addr[pd2.sidx], pd2.af))
|
|
|
|
pf_change_icmp(pd2.src, NULL, daddr,
|
|
|
|
&nk->addr[pd2.sidx], 0, NULL,
|
2004-02-26 02:04:28 +00:00
|
|
|
pd2.ip_sum, icmpsum,
|
|
|
|
pd->ip_sum, 0, pd2.af);
|
2011-06-28 11:57:25 +00:00
|
|
|
|
|
|
|
if (PF_ANEQ(pd2.dst,
|
|
|
|
&nk->addr[pd2.didx], pd2.af))
|
|
|
|
pf_change_icmp(pd2.src, NULL,
|
|
|
|
NULL, /* XXX Inbound NAT? */
|
|
|
|
&nk->addr[pd2.didx], 0, NULL,
|
2004-02-26 02:04:28 +00:00
|
|
|
pd2.ip_sum, icmpsum,
|
|
|
|
pd->ip_sum, 0, pd2.af);
|
2011-06-28 11:57:25 +00:00
|
|
|
|
2004-02-26 02:04:28 +00:00
|
|
|
switch (pd2.af) {
|
|
|
|
#ifdef INET
|
|
|
|
case AF_INET:
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
2004-02-26 02:04:28 +00:00
|
|
|
m_copyback(m, off, ICMP_MINLEN,
|
2004-02-26 02:34:12 +00:00
|
|
|
(caddr_t)pd->hdr.icmp);
|
2011-06-28 11:57:25 +00:00
|
|
|
m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
|
|
|
|
#else
|
|
|
|
m_copyback(m, off, ICMP_MINLEN,
|
|
|
|
pd->hdr.icmp);
|
|
|
|
m_copyback(m, ipoff2, sizeof(h2), &h2);
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
|
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
|
|
|
m_copyback(m, off,
|
|
|
|
sizeof(struct icmp6_hdr),
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
(caddr_t)
|
|
|
|
#endif
|
|
|
|
pd->hdr.icmp6);
|
2004-02-26 02:04:28 +00:00
|
|
|
m_copyback(m, ipoff2, sizeof(h2_6),
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
(caddr_t)
|
|
|
|
#endif
|
|
|
|
&h2_6);
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
|
|
|
#endif /* INET6 */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return (PF_PASS);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
2004-06-16 23:24:02 +00:00
|
|
|
pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
|
2011-06-28 11:57:25 +00:00
|
|
|
struct mbuf *m, struct pf_pdesc *pd)
|
2004-02-26 02:04:28 +00:00
|
|
|
{
|
|
|
|
struct pf_state_peer *src, *dst;
|
2011-06-28 11:57:25 +00:00
|
|
|
struct pf_state_key_cmp key;
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
key.af = pd->af;
|
|
|
|
key.proto = pd->proto;
|
2004-06-16 23:24:02 +00:00
|
|
|
if (direction == PF_IN) {
|
2011-06-28 11:57:25 +00:00
|
|
|
PF_ACPY(&key.addr[0], pd->src, key.af);
|
|
|
|
PF_ACPY(&key.addr[1], pd->dst, key.af);
|
|
|
|
key.port[0] = key.port[1] = 0;
|
2004-06-16 23:24:02 +00:00
|
|
|
} else {
|
2011-06-28 11:57:25 +00:00
|
|
|
PF_ACPY(&key.addr[1], pd->src, key.af);
|
|
|
|
PF_ACPY(&key.addr[0], pd->dst, key.af);
|
|
|
|
key.port[1] = key.port[0] = 0;
|
2004-06-16 23:24:02 +00:00
|
|
|
}
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
|
|
|
|
#else
|
|
|
|
STATE_LOOKUP(kif, &key, direction, *state, m);
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
if (direction == (*state)->direction) {
|
|
|
|
src = &(*state)->src;
|
|
|
|
dst = &(*state)->dst;
|
|
|
|
} else {
|
|
|
|
src = &(*state)->dst;
|
|
|
|
dst = &(*state)->src;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* update states */
|
|
|
|
if (src->state < PFOTHERS_SINGLE)
|
|
|
|
src->state = PFOTHERS_SINGLE;
|
|
|
|
if (dst->state == PFOTHERS_SINGLE)
|
|
|
|
dst->state = PFOTHERS_MULTIPLE;
|
|
|
|
|
|
|
|
/* update expire time */
|
2004-02-26 02:34:12 +00:00
|
|
|
(*state)->expire = time_second;
|
2004-02-26 02:04:28 +00:00
|
|
|
if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
|
|
|
|
(*state)->timeout = PFTM_OTHER_MULTIPLE;
|
|
|
|
else
|
|
|
|
(*state)->timeout = PFTM_OTHER_SINGLE;
|
|
|
|
|
|
|
|
/* translate source/destination address, if necessary */
|
2011-06-28 11:57:25 +00:00
|
|
|
if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
|
|
|
|
struct pf_state_key *nk = (*state)->key[pd->didx];
|
|
|
|
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
KASSERT(nk, ("%s: nk is null", __FUNCTION__));
|
|
|
|
KASSERT(pd, ("%s: pd is null", __FUNCTION__));
|
|
|
|
KASSERT(pd->src, ("%s: pd->src is null", __FUNCTION__));
|
|
|
|
KASSERT(pd->dst, ("%s: pd->dst is null", __FUNCTION__));
|
|
|
|
#else
|
|
|
|
KASSERT(nk);
|
|
|
|
KASSERT(pd);
|
|
|
|
KASSERT(pd->src);
|
|
|
|
KASSERT(pd->dst);
|
|
|
|
#endif
|
|
|
|
switch (pd->af) {
|
2004-02-26 02:04:28 +00:00
|
|
|
#ifdef INET
|
2011-06-28 11:57:25 +00:00
|
|
|
case AF_INET:
|
|
|
|
if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET))
|
2004-02-26 02:04:28 +00:00
|
|
|
pf_change_a(&pd->src->v4.s_addr,
|
2011-06-28 11:57:25 +00:00
|
|
|
pd->ip_sum,
|
|
|
|
nk->addr[pd->sidx].v4.s_addr,
|
2004-02-26 02:04:28 +00:00
|
|
|
0);
|
2011-06-28 11:57:25 +00:00
|
|
|
|
|
|
|
|
|
|
|
if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET))
|
2004-02-26 02:04:28 +00:00
|
|
|
pf_change_a(&pd->dst->v4.s_addr,
|
2011-06-28 11:57:25 +00:00
|
|
|
pd->ip_sum,
|
|
|
|
nk->addr[pd->didx].v4.s_addr,
|
2004-02-26 02:04:28 +00:00
|
|
|
0);
|
2011-06-28 11:57:25 +00:00
|
|
|
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
|
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
2011-06-28 11:57:25 +00:00
|
|
|
case AF_INET6:
|
|
|
|
if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET))
|
|
|
|
PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af);
|
|
|
|
|
|
|
|
if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET))
|
|
|
|
PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af);
|
2004-02-26 02:04:28 +00:00
|
|
|
#endif /* INET6 */
|
2011-06-28 11:57:25 +00:00
|
|
|
}
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
|
|
|
return (PF_PASS);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ipoff and off are measured from the start of the mbuf chain.
|
|
|
|
* h must be at "ipoff" on the mbuf chain.
|
|
|
|
*/
|
|
|
|
void *
|
|
|
|
pf_pull_hdr(struct mbuf *m, int off, void *p, int len,
|
|
|
|
u_short *actionp, u_short *reasonp, sa_family_t af)
|
|
|
|
{
|
|
|
|
switch (af) {
|
|
|
|
#ifdef INET
|
|
|
|
case AF_INET: {
|
|
|
|
struct ip *h = mtod(m, struct ip *);
|
|
|
|
u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
|
|
|
|
|
|
|
|
if (fragoff) {
|
|
|
|
if (fragoff >= len)
|
|
|
|
ACTION_SET(actionp, PF_PASS);
|
|
|
|
else {
|
|
|
|
ACTION_SET(actionp, PF_DROP);
|
|
|
|
REASON_SET(reasonp, PFRES_FRAG);
|
|
|
|
}
|
|
|
|
return (NULL);
|
|
|
|
}
|
2004-06-16 23:24:02 +00:00
|
|
|
if (m->m_pkthdr.len < off + len ||
|
|
|
|
ntohs(h->ip_len) < off + len) {
|
2004-02-26 02:04:28 +00:00
|
|
|
ACTION_SET(actionp, PF_DROP);
|
|
|
|
REASON_SET(reasonp, PFRES_SHORT);
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6: {
|
|
|
|
struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
|
|
|
|
|
|
|
|
if (m->m_pkthdr.len < off + len ||
|
|
|
|
(ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
|
|
|
|
(unsigned)(off + len)) {
|
|
|
|
ACTION_SET(actionp, PF_DROP);
|
|
|
|
REASON_SET(reasonp, PFRES_SHORT);
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
#endif /* INET6 */
|
|
|
|
}
|
|
|
|
m_copydata(m, off, len, p);
|
|
|
|
return (p);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
2007-07-03 12:16:07 +00:00
|
|
|
pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif)
|
2004-02-26 02:04:28 +00:00
|
|
|
{
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
#ifdef RADIX_MPATH
|
|
|
|
struct radix_node_head *rnh;
|
|
|
|
#endif
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
struct sockaddr_in *dst;
|
2007-07-03 12:16:07 +00:00
|
|
|
int ret = 1;
|
|
|
|
int check_mpath;
|
|
|
|
#ifndef __FreeBSD__
|
|
|
|
extern int ipmultipath;
|
|
|
|
#endif
|
2005-05-03 16:43:32 +00:00
|
|
|
#ifdef INET6
|
2007-07-03 12:16:07 +00:00
|
|
|
#ifndef __FreeBSD__
|
|
|
|
extern int ip6_multipath;
|
|
|
|
#endif
|
2005-05-03 16:43:32 +00:00
|
|
|
struct sockaddr_in6 *dst6;
|
|
|
|
struct route_in6 ro;
|
|
|
|
#else
|
2004-02-26 02:04:28 +00:00
|
|
|
struct route ro;
|
2005-05-03 16:43:32 +00:00
|
|
|
#endif
|
2007-07-03 12:16:07 +00:00
|
|
|
struct radix_node *rn;
|
|
|
|
struct rtentry *rt;
|
|
|
|
struct ifnet *ifp;
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2007-07-03 12:16:07 +00:00
|
|
|
check_mpath = 0;
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
#ifdef RADIX_MPATH
|
|
|
|
/* XXX: stick to table 0 for now */
|
|
|
|
rnh = rt_tables_get_rnh(0, af);
|
|
|
|
if (rnh != NULL && rn_mpath_capable(rnh))
|
|
|
|
check_mpath = 1;
|
|
|
|
#endif
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
bzero(&ro, sizeof(ro));
|
2005-05-03 16:43:32 +00:00
|
|
|
switch (af) {
|
|
|
|
case AF_INET:
|
|
|
|
dst = satosin(&ro.ro_dst);
|
|
|
|
dst->sin_family = AF_INET;
|
|
|
|
dst->sin_len = sizeof(*dst);
|
|
|
|
dst->sin_addr = addr->v4;
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifndef __FreeBSD__
|
2007-07-03 12:16:07 +00:00
|
|
|
if (ipmultipath)
|
|
|
|
check_mpath = 1;
|
|
|
|
#endif
|
2005-05-03 16:43:32 +00:00
|
|
|
break;
|
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
2011-06-28 11:57:25 +00:00
|
|
|
/*
|
|
|
|
* Skip check for addresses with embedded interface scope,
|
|
|
|
* as they would always match anyway.
|
|
|
|
*/
|
|
|
|
if (IN6_IS_SCOPE_EMBED(&addr->v6))
|
|
|
|
goto out;
|
2005-05-03 16:43:32 +00:00
|
|
|
dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
|
|
|
|
dst6->sin6_family = AF_INET6;
|
|
|
|
dst6->sin6_len = sizeof(*dst6);
|
|
|
|
dst6->sin6_addr = addr->v6;
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifndef __FreeBSD__
|
2007-07-03 12:16:07 +00:00
|
|
|
if (ip6_multipath)
|
|
|
|
check_mpath = 1;
|
|
|
|
#endif
|
2005-05-03 16:43:32 +00:00
|
|
|
break;
|
|
|
|
#endif /* INET6 */
|
|
|
|
default:
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2007-07-03 12:16:07 +00:00
|
|
|
/* Skip checks for ipsec interfaces */
|
|
|
|
if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC)
|
|
|
|
goto out;
|
|
|
|
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
Add code to allow the system to handle multiple routing tables.
This particular implementation is designed to be fully backwards compatible
and to be MFC-able to 7.x (and 6.x)
Currently the only protocol that can make use of the multiple tables is IPv4
Similar functionality exists in OpenBSD and Linux.
From my notes:
-----
One thing where FreeBSD has been falling behind, and which by chance I
have some time to work on is "policy based routing", which allows
different
packet streams to be routed by more than just the destination address.
Constraints:
------------
I want to make some form of this available in the 6.x tree
(and by extension 7.x) , but FreeBSD in general needs it so I might as
well do it in -current and back port the portions I need.
One of the ways that this can be done is to have the ability to
instantiate multiple kernel routing tables (which I will now
refer to as "Forwarding Information Bases" or "FIBs" for political
correctness reasons). Which FIB a particular packet uses to make
the next hop decision can be decided by a number of mechanisms.
The policies these mechanisms implement are the "Policies" referred
to in "Policy based routing".
One of the constraints I have if I try to back port this work to
6.x is that it must be implemented as a EXTENSION to the existing
ABIs in 6.x so that third party applications do not need to be
recompiled in timespan of the branch.
This first version will not have some of the bells and whistles that
will come with later versions. It will, for example, be limited to 16
tables in the first commit.
Implementation method, Compatible version. (part 1)
-------------------------------
For this reason I have implemented a "sufficient subset" of a
multiple routing table solution in Perforce, and back-ported it
to 6.x. (also in Perforce though not always caught up with what I
have done in -current/P4). The subset allows a number of FIBs
to be defined at compile time (8 is sufficient for my purposes in 6.x)
and implements the changes needed to allow IPV4 to use them. I have not
done the changes for ipv6 simply because I do not need it, and I do not
have enough knowledge of ipv6 (e.g. neighbor discovery) needed to do it.
Other protocol families are left untouched and should there be
users with proprietary protocol families, they should continue to work
and be oblivious to the existence of the extra FIBs.
To understand how this is done, one must know that the current FIB
code starts everything off with a single dimensional array of
pointers to FIB head structures (One per protocol family), each of
which in turn points to the trie of routes available to that family.
The basic change in the ABI compatible version of the change is to
extent that array to be a 2 dimensional array, so that
instead of protocol family X looking at rt_tables[X] for the
table it needs, it looks at rt_tables[Y][X] when for all
protocol families except ipv4 Y is always 0.
Code that is unaware of the change always just sees the first row
of the table, which of course looks just like the one dimensional
array that existed before.
The entry points rtrequest(), rtalloc(), rtalloc1(), rtalloc_ign()
are all maintained, but refer only to the first row of the array,
so that existing callers in proprietary protocols can continue to
do the "right thing".
Some new entry points are added, for the exclusive use of ipv4 code
called in_rtrequest(), in_rtalloc(), in_rtalloc1() and in_rtalloc_ign(),
which have an extra argument which refers the code to the correct row.
In addition, there are some new entry points (currently called
rtalloc_fib() and friends) that check the Address family being
looked up and call either rtalloc() (and friends) if the protocol
is not IPv4 forcing the action to row 0 or to the appropriate row
if it IS IPv4 (and that info is available). These are for calling
from code that is not specific to any particular protocol. The way
these are implemented would change in the non ABI preserving code
to be added later.
One feature of the first version of the code is that for ipv4,
the interface routes show up automatically on all the FIBs, so
that no matter what FIB you select you always have the basic
direct attached hosts available to you. (rtinit() does this
automatically).
You CAN delete an interface route from one FIB should you want
to but by default it's there. ARP information is also available
in each FIB. It's assumed that the same machine would have the
same MAC address, regardless of which FIB you are using to get
to it.
This brings us as to how the correct FIB is selected for an outgoing
IPV4 packet.
Firstly, all packets have a FIB associated with them. if nothing
has been done to change it, it will be FIB 0. The FIB is changed
in the following ways.
Packets fall into one of a number of classes.
1/ locally generated packets, coming from a socket/PCB.
Such packets select a FIB from a number associated with the
socket/PCB. This in turn is inherited from the process,
but can be changed by a socket option. The process in turn
inherits it on fork. I have written a utility call setfib
that acts a bit like nice..
setfib -3 ping target.example.com # will use fib 3 for ping.
It is an obvious extension to make it a property of a jail
but I have not done so. It can be achieved by combining the setfib and
jail commands.
2/ packets received on an interface for forwarding.
By default these packets would use table 0,
(or possibly a number settable in a sysctl(not yet)).
but prior to routing the firewall can inspect them (see below).
(possibly in the future you may be able to associate a FIB
with packets received on an interface.. An ifconfig arg, but not yet.)
3/ packets inspected by a packet classifier, which can arbitrarily
associate a fib with it on a packet by packet basis.
A fib assigned to a packet by a packet classifier
(such as ipfw) would over-ride a fib associated by
a more default source. (such as cases 1 or 2).
4/ a tcp listen socket associated with a fib will generate
accept sockets that are associated with that same fib.
5/ Packets generated in response to some other packet (e.g. reset
or icmp packets). These should use the FIB associated with the
packet being reponded to.
6/ Packets generated during encapsulation.
gif, tun and other tunnel interfaces will encapsulate using the FIB
that was in effect withthe proces that set up the tunnel.
thus setfib 1 ifconfig gif0 [tunnel instructions]
will set the fib for the tunnel to use to be fib 1.
Routing messages would be associated with their
process, and thus select one FIB or another.
messages from the kernel would be associated with the fib they
refer to and would only be received by a routing socket associated
with that fib. (not yet implemented)
In addition Netstat has been edited to be able to cope with the
fact that the array is now 2 dimensional. (It looks in system
memory using libkvm (!)). Old versions of netstat see only the first FIB.
In addition two sysctls are added to give:
a) the number of FIBs compiled in (active)
b) the default FIB of the calling process.
Early testing experience:
-------------------------
Basically our (IronPort's) appliance does this functionality already
using ipfw fwd but that method has some drawbacks.
For example,
It can't fully simulate a routing table because it can't influence the
socket's choice of local address when a connect() is done.
Testing during the generating of these changes has been
remarkably smooth so far. Multiple tables have co-existed
with no notable side effects, and packets have been routes
accordingly.
ipfw has grown 2 new keywords:
setfib N ip from anay to any
count ip from any to any fib N
In pf there seems to be a requirement to be able to give symbolic names to the
fibs but I do not have that capacity. I am not sure if it is required.
SCTP has interestingly enough built in support for this, called VRFs
in Cisco parlance. it will be interesting to see how that handles it
when it suddenly actually does something.
Where to next:
--------------------
After committing the ABI compatible version and MFCing it, I'd
like to proceed in a forward direction in -current. this will
result in some roto-tilling in the routing code.
Firstly: the current code's idea of having a separate tree per
protocol family, all of the same format, and pointed to by the
1 dimensional array is a bit silly. Especially when one considers that
there is code that makes assumptions about every protocol having the
same internal structures there. Some protocols don't WANT that
sort of structure. (for example the whole idea of a netmask is foreign
to appletalk). This needs to be made opaque to the external code.
My suggested first change is to add routing method pointers to the
'domain' structure, along with information pointing the data.
instead of having an array of pointers to uniform structures,
there would be an array pointing to the 'domain' structures
for each protocol address domain (protocol family),
and the methods this reached would be called. The methods would have
an argument that gives FIB number, but the protocol would be free
to ignore it.
When the ABI can be changed it raises the possibilty of the
addition of a fib entry into the "struct route". Currently,
the structure contains the sockaddr of the desination, and the resulting
fib entry. To make this work fully, one could add a fib number
so that given an address and a fib, one can find the third element, the
fib entry.
Interaction with the ARP layer/ LL layer would need to be
revisited as well. Qing Li has been working on this already.
This work was sponsored by Ironport Systems/Cisco
Reviewed by: several including rwatson, bz and mlair (parts each)
Obtained from: Ironport systems/Cisco
2008-05-09 23:03:00 +00:00
|
|
|
/* XXX MRT not always INET */ /* stick with table 0 though */
|
2011-05-31 15:05:29 +00:00
|
|
|
#ifdef INET
|
Add code to allow the system to handle multiple routing tables.
This particular implementation is designed to be fully backwards compatible
and to be MFC-able to 7.x (and 6.x)
Currently the only protocol that can make use of the multiple tables is IPv4
Similar functionality exists in OpenBSD and Linux.
From my notes:
-----
One thing where FreeBSD has been falling behind, and which by chance I
have some time to work on is "policy based routing", which allows
different
packet streams to be routed by more than just the destination address.
Constraints:
------------
I want to make some form of this available in the 6.x tree
(and by extension 7.x) , but FreeBSD in general needs it so I might as
well do it in -current and back port the portions I need.
One of the ways that this can be done is to have the ability to
instantiate multiple kernel routing tables (which I will now
refer to as "Forwarding Information Bases" or "FIBs" for political
correctness reasons). Which FIB a particular packet uses to make
the next hop decision can be decided by a number of mechanisms.
The policies these mechanisms implement are the "Policies" referred
to in "Policy based routing".
One of the constraints I have if I try to back port this work to
6.x is that it must be implemented as a EXTENSION to the existing
ABIs in 6.x so that third party applications do not need to be
recompiled in timespan of the branch.
This first version will not have some of the bells and whistles that
will come with later versions. It will, for example, be limited to 16
tables in the first commit.
Implementation method, Compatible version. (part 1)
-------------------------------
For this reason I have implemented a "sufficient subset" of a
multiple routing table solution in Perforce, and back-ported it
to 6.x. (also in Perforce though not always caught up with what I
have done in -current/P4). The subset allows a number of FIBs
to be defined at compile time (8 is sufficient for my purposes in 6.x)
and implements the changes needed to allow IPV4 to use them. I have not
done the changes for ipv6 simply because I do not need it, and I do not
have enough knowledge of ipv6 (e.g. neighbor discovery) needed to do it.
Other protocol families are left untouched and should there be
users with proprietary protocol families, they should continue to work
and be oblivious to the existence of the extra FIBs.
To understand how this is done, one must know that the current FIB
code starts everything off with a single dimensional array of
pointers to FIB head structures (One per protocol family), each of
which in turn points to the trie of routes available to that family.
The basic change in the ABI compatible version of the change is to
extent that array to be a 2 dimensional array, so that
instead of protocol family X looking at rt_tables[X] for the
table it needs, it looks at rt_tables[Y][X] when for all
protocol families except ipv4 Y is always 0.
Code that is unaware of the change always just sees the first row
of the table, which of course looks just like the one dimensional
array that existed before.
The entry points rtrequest(), rtalloc(), rtalloc1(), rtalloc_ign()
are all maintained, but refer only to the first row of the array,
so that existing callers in proprietary protocols can continue to
do the "right thing".
Some new entry points are added, for the exclusive use of ipv4 code
called in_rtrequest(), in_rtalloc(), in_rtalloc1() and in_rtalloc_ign(),
which have an extra argument which refers the code to the correct row.
In addition, there are some new entry points (currently called
rtalloc_fib() and friends) that check the Address family being
looked up and call either rtalloc() (and friends) if the protocol
is not IPv4 forcing the action to row 0 or to the appropriate row
if it IS IPv4 (and that info is available). These are for calling
from code that is not specific to any particular protocol. The way
these are implemented would change in the non ABI preserving code
to be added later.
One feature of the first version of the code is that for ipv4,
the interface routes show up automatically on all the FIBs, so
that no matter what FIB you select you always have the basic
direct attached hosts available to you. (rtinit() does this
automatically).
You CAN delete an interface route from one FIB should you want
to but by default it's there. ARP information is also available
in each FIB. It's assumed that the same machine would have the
same MAC address, regardless of which FIB you are using to get
to it.
This brings us as to how the correct FIB is selected for an outgoing
IPV4 packet.
Firstly, all packets have a FIB associated with them. if nothing
has been done to change it, it will be FIB 0. The FIB is changed
in the following ways.
Packets fall into one of a number of classes.
1/ locally generated packets, coming from a socket/PCB.
Such packets select a FIB from a number associated with the
socket/PCB. This in turn is inherited from the process,
but can be changed by a socket option. The process in turn
inherits it on fork. I have written a utility call setfib
that acts a bit like nice..
setfib -3 ping target.example.com # will use fib 3 for ping.
It is an obvious extension to make it a property of a jail
but I have not done so. It can be achieved by combining the setfib and
jail commands.
2/ packets received on an interface for forwarding.
By default these packets would use table 0,
(or possibly a number settable in a sysctl(not yet)).
but prior to routing the firewall can inspect them (see below).
(possibly in the future you may be able to associate a FIB
with packets received on an interface.. An ifconfig arg, but not yet.)
3/ packets inspected by a packet classifier, which can arbitrarily
associate a fib with it on a packet by packet basis.
A fib assigned to a packet by a packet classifier
(such as ipfw) would over-ride a fib associated by
a more default source. (such as cases 1 or 2).
4/ a tcp listen socket associated with a fib will generate
accept sockets that are associated with that same fib.
5/ Packets generated in response to some other packet (e.g. reset
or icmp packets). These should use the FIB associated with the
packet being reponded to.
6/ Packets generated during encapsulation.
gif, tun and other tunnel interfaces will encapsulate using the FIB
that was in effect withthe proces that set up the tunnel.
thus setfib 1 ifconfig gif0 [tunnel instructions]
will set the fib for the tunnel to use to be fib 1.
Routing messages would be associated with their
process, and thus select one FIB or another.
messages from the kernel would be associated with the fib they
refer to and would only be received by a routing socket associated
with that fib. (not yet implemented)
In addition Netstat has been edited to be able to cope with the
fact that the array is now 2 dimensional. (It looks in system
memory using libkvm (!)). Old versions of netstat see only the first FIB.
In addition two sysctls are added to give:
a) the number of FIBs compiled in (active)
b) the default FIB of the calling process.
Early testing experience:
-------------------------
Basically our (IronPort's) appliance does this functionality already
using ipfw fwd but that method has some drawbacks.
For example,
It can't fully simulate a routing table because it can't influence the
socket's choice of local address when a connect() is done.
Testing during the generating of these changes has been
remarkably smooth so far. Multiple tables have co-existed
with no notable side effects, and packets have been routes
accordingly.
ipfw has grown 2 new keywords:
setfib N ip from anay to any
count ip from any to any fib N
In pf there seems to be a requirement to be able to give symbolic names to the
fibs but I do not have that capacity. I am not sure if it is required.
SCTP has interestingly enough built in support for this, called VRFs
in Cisco parlance. it will be interesting to see how that handles it
when it suddenly actually does something.
Where to next:
--------------------
After committing the ABI compatible version and MFCing it, I'd
like to proceed in a forward direction in -current. this will
result in some roto-tilling in the routing code.
Firstly: the current code's idea of having a separate tree per
protocol family, all of the same format, and pointed to by the
1 dimensional array is a bit silly. Especially when one considers that
there is code that makes assumptions about every protocol having the
same internal structures there. Some protocols don't WANT that
sort of structure. (for example the whole idea of a netmask is foreign
to appletalk). This needs to be made opaque to the external code.
My suggested first change is to add routing method pointers to the
'domain' structure, along with information pointing the data.
instead of having an array of pointers to uniform structures,
there would be an array pointing to the 'domain' structures
for each protocol address domain (protocol family),
and the methods this reached would be called. The methods would have
an argument that gives FIB number, but the protocol would be free
to ignore it.
When the ABI can be changed it raises the possibilty of the
addition of a fib entry into the "struct route". Currently,
the structure contains the sockaddr of the desination, and the resulting
fib entry. To make this work fully, one could add a fib number
so that given an address and a fib, one can find the third element, the
fib entry.
Interaction with the ARP layer/ LL layer would need to be
revisited as well. Qing Li has been working on this already.
This work was sponsored by Ironport Systems/Cisco
Reviewed by: several including rwatson, bz and mlair (parts each)
Obtained from: Ironport systems/Cisco
2008-05-09 23:03:00 +00:00
|
|
|
if (af == AF_INET)
|
This main goals of this project are:
1. separating L2 tables (ARP, NDP) from the L3 routing tables
2. removing as much locking dependencies among these layers as
possible to allow for some parallelism in the search operations
3. simplify the logic in the routing code,
The most notable end result is the obsolescent of the route
cloning (RTF_CLONING) concept, which translated into code reduction
in both IPv4 ARP and IPv6 NDP related modules, and size reduction in
struct rtentry{}. The change in design obsoletes the semantics of
RTF_CLONING, RTF_WASCLONE and RTF_LLINFO routing flags. The userland
applications such as "arp" and "ndp" have been modified to reflect
those changes. The output from "netstat -r" shows only the routing
entries.
Quite a few developers have contributed to this project in the
past: Glebius Smirnoff, Luigi Rizzo, Alessandro Cerri, and
Andre Oppermann. And most recently:
- Kip Macy revised the locking code completely, thus completing
the last piece of the puzzle, Kip has also been conducting
active functional testing
- Sam Leffler has helped me improving/refactoring the code, and
provided valuable reviews
- Julian Elischer setup the perforce tree for me and has helped
me maintaining that branch before the svn conversion
2008-12-15 06:10:57 +00:00
|
|
|
in_rtalloc_ign((struct route *)&ro, 0, 0);
|
Add code to allow the system to handle multiple routing tables.
This particular implementation is designed to be fully backwards compatible
and to be MFC-able to 7.x (and 6.x)
Currently the only protocol that can make use of the multiple tables is IPv4
Similar functionality exists in OpenBSD and Linux.
From my notes:
-----
One thing where FreeBSD has been falling behind, and which by chance I
have some time to work on is "policy based routing", which allows
different
packet streams to be routed by more than just the destination address.
Constraints:
------------
I want to make some form of this available in the 6.x tree
(and by extension 7.x) , but FreeBSD in general needs it so I might as
well do it in -current and back port the portions I need.
One of the ways that this can be done is to have the ability to
instantiate multiple kernel routing tables (which I will now
refer to as "Forwarding Information Bases" or "FIBs" for political
correctness reasons). Which FIB a particular packet uses to make
the next hop decision can be decided by a number of mechanisms.
The policies these mechanisms implement are the "Policies" referred
to in "Policy based routing".
One of the constraints I have if I try to back port this work to
6.x is that it must be implemented as a EXTENSION to the existing
ABIs in 6.x so that third party applications do not need to be
recompiled in timespan of the branch.
This first version will not have some of the bells and whistles that
will come with later versions. It will, for example, be limited to 16
tables in the first commit.
Implementation method, Compatible version. (part 1)
-------------------------------
For this reason I have implemented a "sufficient subset" of a
multiple routing table solution in Perforce, and back-ported it
to 6.x. (also in Perforce though not always caught up with what I
have done in -current/P4). The subset allows a number of FIBs
to be defined at compile time (8 is sufficient for my purposes in 6.x)
and implements the changes needed to allow IPV4 to use them. I have not
done the changes for ipv6 simply because I do not need it, and I do not
have enough knowledge of ipv6 (e.g. neighbor discovery) needed to do it.
Other protocol families are left untouched and should there be
users with proprietary protocol families, they should continue to work
and be oblivious to the existence of the extra FIBs.
To understand how this is done, one must know that the current FIB
code starts everything off with a single dimensional array of
pointers to FIB head structures (One per protocol family), each of
which in turn points to the trie of routes available to that family.
The basic change in the ABI compatible version of the change is to
extent that array to be a 2 dimensional array, so that
instead of protocol family X looking at rt_tables[X] for the
table it needs, it looks at rt_tables[Y][X] when for all
protocol families except ipv4 Y is always 0.
Code that is unaware of the change always just sees the first row
of the table, which of course looks just like the one dimensional
array that existed before.
The entry points rtrequest(), rtalloc(), rtalloc1(), rtalloc_ign()
are all maintained, but refer only to the first row of the array,
so that existing callers in proprietary protocols can continue to
do the "right thing".
Some new entry points are added, for the exclusive use of ipv4 code
called in_rtrequest(), in_rtalloc(), in_rtalloc1() and in_rtalloc_ign(),
which have an extra argument which refers the code to the correct row.
In addition, there are some new entry points (currently called
rtalloc_fib() and friends) that check the Address family being
looked up and call either rtalloc() (and friends) if the protocol
is not IPv4 forcing the action to row 0 or to the appropriate row
if it IS IPv4 (and that info is available). These are for calling
from code that is not specific to any particular protocol. The way
these are implemented would change in the non ABI preserving code
to be added later.
One feature of the first version of the code is that for ipv4,
the interface routes show up automatically on all the FIBs, so
that no matter what FIB you select you always have the basic
direct attached hosts available to you. (rtinit() does this
automatically).
You CAN delete an interface route from one FIB should you want
to but by default it's there. ARP information is also available
in each FIB. It's assumed that the same machine would have the
same MAC address, regardless of which FIB you are using to get
to it.
This brings us as to how the correct FIB is selected for an outgoing
IPV4 packet.
Firstly, all packets have a FIB associated with them. if nothing
has been done to change it, it will be FIB 0. The FIB is changed
in the following ways.
Packets fall into one of a number of classes.
1/ locally generated packets, coming from a socket/PCB.
Such packets select a FIB from a number associated with the
socket/PCB. This in turn is inherited from the process,
but can be changed by a socket option. The process in turn
inherits it on fork. I have written a utility call setfib
that acts a bit like nice..
setfib -3 ping target.example.com # will use fib 3 for ping.
It is an obvious extension to make it a property of a jail
but I have not done so. It can be achieved by combining the setfib and
jail commands.
2/ packets received on an interface for forwarding.
By default these packets would use table 0,
(or possibly a number settable in a sysctl(not yet)).
but prior to routing the firewall can inspect them (see below).
(possibly in the future you may be able to associate a FIB
with packets received on an interface.. An ifconfig arg, but not yet.)
3/ packets inspected by a packet classifier, which can arbitrarily
associate a fib with it on a packet by packet basis.
A fib assigned to a packet by a packet classifier
(such as ipfw) would over-ride a fib associated by
a more default source. (such as cases 1 or 2).
4/ a tcp listen socket associated with a fib will generate
accept sockets that are associated with that same fib.
5/ Packets generated in response to some other packet (e.g. reset
or icmp packets). These should use the FIB associated with the
packet being reponded to.
6/ Packets generated during encapsulation.
gif, tun and other tunnel interfaces will encapsulate using the FIB
that was in effect withthe proces that set up the tunnel.
thus setfib 1 ifconfig gif0 [tunnel instructions]
will set the fib for the tunnel to use to be fib 1.
Routing messages would be associated with their
process, and thus select one FIB or another.
messages from the kernel would be associated with the fib they
refer to and would only be received by a routing socket associated
with that fib. (not yet implemented)
In addition Netstat has been edited to be able to cope with the
fact that the array is now 2 dimensional. (It looks in system
memory using libkvm (!)). Old versions of netstat see only the first FIB.
In addition two sysctls are added to give:
a) the number of FIBs compiled in (active)
b) the default FIB of the calling process.
Early testing experience:
-------------------------
Basically our (IronPort's) appliance does this functionality already
using ipfw fwd but that method has some drawbacks.
For example,
It can't fully simulate a routing table because it can't influence the
socket's choice of local address when a connect() is done.
Testing during the generating of these changes has been
remarkably smooth so far. Multiple tables have co-existed
with no notable side effects, and packets have been routes
accordingly.
ipfw has grown 2 new keywords:
setfib N ip from anay to any
count ip from any to any fib N
In pf there seems to be a requirement to be able to give symbolic names to the
fibs but I do not have that capacity. I am not sure if it is required.
SCTP has interestingly enough built in support for this, called VRFs
in Cisco parlance. it will be interesting to see how that handles it
when it suddenly actually does something.
Where to next:
--------------------
After committing the ABI compatible version and MFCing it, I'd
like to proceed in a forward direction in -current. this will
result in some roto-tilling in the routing code.
Firstly: the current code's idea of having a separate tree per
protocol family, all of the same format, and pointed to by the
1 dimensional array is a bit silly. Especially when one considers that
there is code that makes assumptions about every protocol having the
same internal structures there. Some protocols don't WANT that
sort of structure. (for example the whole idea of a netmask is foreign
to appletalk). This needs to be made opaque to the external code.
My suggested first change is to add routing method pointers to the
'domain' structure, along with information pointing the data.
instead of having an array of pointers to uniform structures,
there would be an array pointing to the 'domain' structures
for each protocol address domain (protocol family),
and the methods this reached would be called. The methods would have
an argument that gives FIB number, but the protocol would be free
to ignore it.
When the ABI can be changed it raises the possibilty of the
addition of a fib entry into the "struct route". Currently,
the structure contains the sockaddr of the desination, and the resulting
fib entry. To make this work fully, one could add a fib number
so that given an address and a fib, one can find the third element, the
fib entry.
Interaction with the ARP layer/ LL layer would need to be
revisited as well. Qing Li has been working on this already.
This work was sponsored by Ironport Systems/Cisco
Reviewed by: several including rwatson, bz and mlair (parts each)
Obtained from: Ironport systems/Cisco
2008-05-09 23:03:00 +00:00
|
|
|
else
|
2011-05-31 15:05:29 +00:00
|
|
|
#endif
|
This main goals of this project are:
1. separating L2 tables (ARP, NDP) from the L3 routing tables
2. removing as much locking dependencies among these layers as
possible to allow for some parallelism in the search operations
3. simplify the logic in the routing code,
The most notable end result is the obsolescent of the route
cloning (RTF_CLONING) concept, which translated into code reduction
in both IPv4 ARP and IPv6 NDP related modules, and size reduction in
struct rtentry{}. The change in design obsoletes the semantics of
RTF_CLONING, RTF_WASCLONE and RTF_LLINFO routing flags. The userland
applications such as "arp" and "ndp" have been modified to reflect
those changes. The output from "netstat -r" shows only the routing
entries.
Quite a few developers have contributed to this project in the
past: Glebius Smirnoff, Luigi Rizzo, Alessandro Cerri, and
Andre Oppermann. And most recently:
- Kip Macy revised the locking code completely, thus completing
the last piece of the puzzle, Kip has also been conducting
active functional testing
- Sam Leffler has helped me improving/refactoring the code, and
provided valuable reviews
- Julian Elischer setup the perforce tree for me and has helped
me maintaining that branch before the svn conversion
2008-12-15 06:10:57 +00:00
|
|
|
rtalloc_ign((struct route *)&ro, 0);
|
2004-02-26 02:34:12 +00:00
|
|
|
#else /* ! __FreeBSD__ */
|
2005-05-03 16:43:32 +00:00
|
|
|
rtalloc_noclone((struct route *)&ro, NO_CLONING);
|
2004-02-26 02:34:12 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
if (ro.ro_rt != NULL) {
|
2007-07-03 12:16:07 +00:00
|
|
|
/* No interface given, this is a no-route check */
|
|
|
|
if (kif == NULL)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
if (kif->pfik_ifp == NULL) {
|
|
|
|
ret = 0;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Perform uRPF check if passed input interface */
|
|
|
|
ret = 0;
|
|
|
|
rn = (struct radix_node *)ro.ro_rt;
|
|
|
|
do {
|
|
|
|
rt = (struct rtentry *)rn;
|
|
|
|
#ifndef __FreeBSD__ /* CARPDEV */
|
|
|
|
if (rt->rt_ifp->if_type == IFT_CARP)
|
|
|
|
ifp = rt->rt_ifp->if_carpdev;
|
|
|
|
else
|
|
|
|
#endif
|
|
|
|
ifp = rt->rt_ifp;
|
2005-05-03 16:43:32 +00:00
|
|
|
|
2007-07-03 12:16:07 +00:00
|
|
|
if (kif->pfik_ifp == ifp)
|
|
|
|
ret = 1;
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
#ifdef RADIX_MPATH
|
2007-07-03 12:16:07 +00:00
|
|
|
rn = rn_mpath_next(rn);
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
|
|
|
#else
|
|
|
|
rn = rn_mpath_next(rn, 0);
|
2007-07-03 12:16:07 +00:00
|
|
|
#endif
|
|
|
|
} while (check_mpath == 1 && rn != NULL && ret == 0);
|
|
|
|
} else
|
|
|
|
ret = 0;
|
|
|
|
out:
|
|
|
|
if (ro.ro_rt != NULL)
|
|
|
|
RTFREE(ro.ro_rt);
|
|
|
|
return (ret);
|
2005-05-03 16:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw)
|
|
|
|
{
|
|
|
|
struct sockaddr_in *dst;
|
|
|
|
#ifdef INET6
|
|
|
|
struct sockaddr_in6 *dst6;
|
|
|
|
struct route_in6 ro;
|
|
|
|
#else
|
|
|
|
struct route ro;
|
|
|
|
#endif
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
bzero(&ro, sizeof(ro));
|
|
|
|
switch (af) {
|
|
|
|
case AF_INET:
|
|
|
|
dst = satosin(&ro.ro_dst);
|
|
|
|
dst->sin_family = AF_INET;
|
|
|
|
dst->sin_len = sizeof(*dst);
|
|
|
|
dst->sin_addr = addr->v4;
|
|
|
|
break;
|
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
|
|
|
dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
|
|
|
|
dst6->sin6_family = AF_INET6;
|
|
|
|
dst6->sin6_len = sizeof(*dst6);
|
|
|
|
dst6->sin6_addr = addr->v6;
|
|
|
|
break;
|
|
|
|
#endif /* INET6 */
|
|
|
|
default:
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
# ifdef RTF_PRCLONING
|
|
|
|
rtalloc_ign((struct route *)&ro, (RTF_CLONING|RTF_PRCLONING));
|
|
|
|
# else /* !RTF_PRCLONING */
|
2011-05-31 15:05:29 +00:00
|
|
|
#ifdef INET
|
Add code to allow the system to handle multiple routing tables.
This particular implementation is designed to be fully backwards compatible
and to be MFC-able to 7.x (and 6.x)
Currently the only protocol that can make use of the multiple tables is IPv4
Similar functionality exists in OpenBSD and Linux.
From my notes:
-----
One thing where FreeBSD has been falling behind, and which by chance I
have some time to work on is "policy based routing", which allows
different
packet streams to be routed by more than just the destination address.
Constraints:
------------
I want to make some form of this available in the 6.x tree
(and by extension 7.x) , but FreeBSD in general needs it so I might as
well do it in -current and back port the portions I need.
One of the ways that this can be done is to have the ability to
instantiate multiple kernel routing tables (which I will now
refer to as "Forwarding Information Bases" or "FIBs" for political
correctness reasons). Which FIB a particular packet uses to make
the next hop decision can be decided by a number of mechanisms.
The policies these mechanisms implement are the "Policies" referred
to in "Policy based routing".
One of the constraints I have if I try to back port this work to
6.x is that it must be implemented as a EXTENSION to the existing
ABIs in 6.x so that third party applications do not need to be
recompiled in timespan of the branch.
This first version will not have some of the bells and whistles that
will come with later versions. It will, for example, be limited to 16
tables in the first commit.
Implementation method, Compatible version. (part 1)
-------------------------------
For this reason I have implemented a "sufficient subset" of a
multiple routing table solution in Perforce, and back-ported it
to 6.x. (also in Perforce though not always caught up with what I
have done in -current/P4). The subset allows a number of FIBs
to be defined at compile time (8 is sufficient for my purposes in 6.x)
and implements the changes needed to allow IPV4 to use them. I have not
done the changes for ipv6 simply because I do not need it, and I do not
have enough knowledge of ipv6 (e.g. neighbor discovery) needed to do it.
Other protocol families are left untouched and should there be
users with proprietary protocol families, they should continue to work
and be oblivious to the existence of the extra FIBs.
To understand how this is done, one must know that the current FIB
code starts everything off with a single dimensional array of
pointers to FIB head structures (One per protocol family), each of
which in turn points to the trie of routes available to that family.
The basic change in the ABI compatible version of the change is to
extent that array to be a 2 dimensional array, so that
instead of protocol family X looking at rt_tables[X] for the
table it needs, it looks at rt_tables[Y][X] when for all
protocol families except ipv4 Y is always 0.
Code that is unaware of the change always just sees the first row
of the table, which of course looks just like the one dimensional
array that existed before.
The entry points rtrequest(), rtalloc(), rtalloc1(), rtalloc_ign()
are all maintained, but refer only to the first row of the array,
so that existing callers in proprietary protocols can continue to
do the "right thing".
Some new entry points are added, for the exclusive use of ipv4 code
called in_rtrequest(), in_rtalloc(), in_rtalloc1() and in_rtalloc_ign(),
which have an extra argument which refers the code to the correct row.
In addition, there are some new entry points (currently called
rtalloc_fib() and friends) that check the Address family being
looked up and call either rtalloc() (and friends) if the protocol
is not IPv4 forcing the action to row 0 or to the appropriate row
if it IS IPv4 (and that info is available). These are for calling
from code that is not specific to any particular protocol. The way
these are implemented would change in the non ABI preserving code
to be added later.
One feature of the first version of the code is that for ipv4,
the interface routes show up automatically on all the FIBs, so
that no matter what FIB you select you always have the basic
direct attached hosts available to you. (rtinit() does this
automatically).
You CAN delete an interface route from one FIB should you want
to but by default it's there. ARP information is also available
in each FIB. It's assumed that the same machine would have the
same MAC address, regardless of which FIB you are using to get
to it.
This brings us as to how the correct FIB is selected for an outgoing
IPV4 packet.
Firstly, all packets have a FIB associated with them. if nothing
has been done to change it, it will be FIB 0. The FIB is changed
in the following ways.
Packets fall into one of a number of classes.
1/ locally generated packets, coming from a socket/PCB.
Such packets select a FIB from a number associated with the
socket/PCB. This in turn is inherited from the process,
but can be changed by a socket option. The process in turn
inherits it on fork. I have written a utility call setfib
that acts a bit like nice..
setfib -3 ping target.example.com # will use fib 3 for ping.
It is an obvious extension to make it a property of a jail
but I have not done so. It can be achieved by combining the setfib and
jail commands.
2/ packets received on an interface for forwarding.
By default these packets would use table 0,
(or possibly a number settable in a sysctl(not yet)).
but prior to routing the firewall can inspect them (see below).
(possibly in the future you may be able to associate a FIB
with packets received on an interface.. An ifconfig arg, but not yet.)
3/ packets inspected by a packet classifier, which can arbitrarily
associate a fib with it on a packet by packet basis.
A fib assigned to a packet by a packet classifier
(such as ipfw) would over-ride a fib associated by
a more default source. (such as cases 1 or 2).
4/ a tcp listen socket associated with a fib will generate
accept sockets that are associated with that same fib.
5/ Packets generated in response to some other packet (e.g. reset
or icmp packets). These should use the FIB associated with the
packet being reponded to.
6/ Packets generated during encapsulation.
gif, tun and other tunnel interfaces will encapsulate using the FIB
that was in effect withthe proces that set up the tunnel.
thus setfib 1 ifconfig gif0 [tunnel instructions]
will set the fib for the tunnel to use to be fib 1.
Routing messages would be associated with their
process, and thus select one FIB or another.
messages from the kernel would be associated with the fib they
refer to and would only be received by a routing socket associated
with that fib. (not yet implemented)
In addition Netstat has been edited to be able to cope with the
fact that the array is now 2 dimensional. (It looks in system
memory using libkvm (!)). Old versions of netstat see only the first FIB.
In addition two sysctls are added to give:
a) the number of FIBs compiled in (active)
b) the default FIB of the calling process.
Early testing experience:
-------------------------
Basically our (IronPort's) appliance does this functionality already
using ipfw fwd but that method has some drawbacks.
For example,
It can't fully simulate a routing table because it can't influence the
socket's choice of local address when a connect() is done.
Testing during the generating of these changes has been
remarkably smooth so far. Multiple tables have co-existed
with no notable side effects, and packets have been routes
accordingly.
ipfw has grown 2 new keywords:
setfib N ip from anay to any
count ip from any to any fib N
In pf there seems to be a requirement to be able to give symbolic names to the
fibs but I do not have that capacity. I am not sure if it is required.
SCTP has interestingly enough built in support for this, called VRFs
in Cisco parlance. it will be interesting to see how that handles it
when it suddenly actually does something.
Where to next:
--------------------
After committing the ABI compatible version and MFCing it, I'd
like to proceed in a forward direction in -current. this will
result in some roto-tilling in the routing code.
Firstly: the current code's idea of having a separate tree per
protocol family, all of the same format, and pointed to by the
1 dimensional array is a bit silly. Especially when one considers that
there is code that makes assumptions about every protocol having the
same internal structures there. Some protocols don't WANT that
sort of structure. (for example the whole idea of a netmask is foreign
to appletalk). This needs to be made opaque to the external code.
My suggested first change is to add routing method pointers to the
'domain' structure, along with information pointing the data.
instead of having an array of pointers to uniform structures,
there would be an array pointing to the 'domain' structures
for each protocol address domain (protocol family),
and the methods this reached would be called. The methods would have
an argument that gives FIB number, but the protocol would be free
to ignore it.
When the ABI can be changed it raises the possibilty of the
addition of a fib entry into the "struct route". Currently,
the structure contains the sockaddr of the desination, and the resulting
fib entry. To make this work fully, one could add a fib number
so that given an address and a fib, one can find the third element, the
fib entry.
Interaction with the ARP layer/ LL layer would need to be
revisited as well. Qing Li has been working on this already.
This work was sponsored by Ironport Systems/Cisco
Reviewed by: several including rwatson, bz and mlair (parts each)
Obtained from: Ironport systems/Cisco
2008-05-09 23:03:00 +00:00
|
|
|
if (af == AF_INET)
|
This main goals of this project are:
1. separating L2 tables (ARP, NDP) from the L3 routing tables
2. removing as much locking dependencies among these layers as
possible to allow for some parallelism in the search operations
3. simplify the logic in the routing code,
The most notable end result is the obsolescent of the route
cloning (RTF_CLONING) concept, which translated into code reduction
in both IPv4 ARP and IPv6 NDP related modules, and size reduction in
struct rtentry{}. The change in design obsoletes the semantics of
RTF_CLONING, RTF_WASCLONE and RTF_LLINFO routing flags. The userland
applications such as "arp" and "ndp" have been modified to reflect
those changes. The output from "netstat -r" shows only the routing
entries.
Quite a few developers have contributed to this project in the
past: Glebius Smirnoff, Luigi Rizzo, Alessandro Cerri, and
Andre Oppermann. And most recently:
- Kip Macy revised the locking code completely, thus completing
the last piece of the puzzle, Kip has also been conducting
active functional testing
- Sam Leffler has helped me improving/refactoring the code, and
provided valuable reviews
- Julian Elischer setup the perforce tree for me and has helped
me maintaining that branch before the svn conversion
2008-12-15 06:10:57 +00:00
|
|
|
in_rtalloc_ign((struct route *)&ro, 0, 0);
|
Add code to allow the system to handle multiple routing tables.
This particular implementation is designed to be fully backwards compatible
and to be MFC-able to 7.x (and 6.x)
Currently the only protocol that can make use of the multiple tables is IPv4
Similar functionality exists in OpenBSD and Linux.
From my notes:
-----
One thing where FreeBSD has been falling behind, and which by chance I
have some time to work on is "policy based routing", which allows
different
packet streams to be routed by more than just the destination address.
Constraints:
------------
I want to make some form of this available in the 6.x tree
(and by extension 7.x) , but FreeBSD in general needs it so I might as
well do it in -current and back port the portions I need.
One of the ways that this can be done is to have the ability to
instantiate multiple kernel routing tables (which I will now
refer to as "Forwarding Information Bases" or "FIBs" for political
correctness reasons). Which FIB a particular packet uses to make
the next hop decision can be decided by a number of mechanisms.
The policies these mechanisms implement are the "Policies" referred
to in "Policy based routing".
One of the constraints I have if I try to back port this work to
6.x is that it must be implemented as a EXTENSION to the existing
ABIs in 6.x so that third party applications do not need to be
recompiled in timespan of the branch.
This first version will not have some of the bells and whistles that
will come with later versions. It will, for example, be limited to 16
tables in the first commit.
Implementation method, Compatible version. (part 1)
-------------------------------
For this reason I have implemented a "sufficient subset" of a
multiple routing table solution in Perforce, and back-ported it
to 6.x. (also in Perforce though not always caught up with what I
have done in -current/P4). The subset allows a number of FIBs
to be defined at compile time (8 is sufficient for my purposes in 6.x)
and implements the changes needed to allow IPV4 to use them. I have not
done the changes for ipv6 simply because I do not need it, and I do not
have enough knowledge of ipv6 (e.g. neighbor discovery) needed to do it.
Other protocol families are left untouched and should there be
users with proprietary protocol families, they should continue to work
and be oblivious to the existence of the extra FIBs.
To understand how this is done, one must know that the current FIB
code starts everything off with a single dimensional array of
pointers to FIB head structures (One per protocol family), each of
which in turn points to the trie of routes available to that family.
The basic change in the ABI compatible version of the change is to
extent that array to be a 2 dimensional array, so that
instead of protocol family X looking at rt_tables[X] for the
table it needs, it looks at rt_tables[Y][X] when for all
protocol families except ipv4 Y is always 0.
Code that is unaware of the change always just sees the first row
of the table, which of course looks just like the one dimensional
array that existed before.
The entry points rtrequest(), rtalloc(), rtalloc1(), rtalloc_ign()
are all maintained, but refer only to the first row of the array,
so that existing callers in proprietary protocols can continue to
do the "right thing".
Some new entry points are added, for the exclusive use of ipv4 code
called in_rtrequest(), in_rtalloc(), in_rtalloc1() and in_rtalloc_ign(),
which have an extra argument which refers the code to the correct row.
In addition, there are some new entry points (currently called
rtalloc_fib() and friends) that check the Address family being
looked up and call either rtalloc() (and friends) if the protocol
is not IPv4 forcing the action to row 0 or to the appropriate row
if it IS IPv4 (and that info is available). These are for calling
from code that is not specific to any particular protocol. The way
these are implemented would change in the non ABI preserving code
to be added later.
One feature of the first version of the code is that for ipv4,
the interface routes show up automatically on all the FIBs, so
that no matter what FIB you select you always have the basic
direct attached hosts available to you. (rtinit() does this
automatically).
You CAN delete an interface route from one FIB should you want
to but by default it's there. ARP information is also available
in each FIB. It's assumed that the same machine would have the
same MAC address, regardless of which FIB you are using to get
to it.
This brings us as to how the correct FIB is selected for an outgoing
IPV4 packet.
Firstly, all packets have a FIB associated with them. if nothing
has been done to change it, it will be FIB 0. The FIB is changed
in the following ways.
Packets fall into one of a number of classes.
1/ locally generated packets, coming from a socket/PCB.
Such packets select a FIB from a number associated with the
socket/PCB. This in turn is inherited from the process,
but can be changed by a socket option. The process in turn
inherits it on fork. I have written a utility call setfib
that acts a bit like nice..
setfib -3 ping target.example.com # will use fib 3 for ping.
It is an obvious extension to make it a property of a jail
but I have not done so. It can be achieved by combining the setfib and
jail commands.
2/ packets received on an interface for forwarding.
By default these packets would use table 0,
(or possibly a number settable in a sysctl(not yet)).
but prior to routing the firewall can inspect them (see below).
(possibly in the future you may be able to associate a FIB
with packets received on an interface.. An ifconfig arg, but not yet.)
3/ packets inspected by a packet classifier, which can arbitrarily
associate a fib with it on a packet by packet basis.
A fib assigned to a packet by a packet classifier
(such as ipfw) would over-ride a fib associated by
a more default source. (such as cases 1 or 2).
4/ a tcp listen socket associated with a fib will generate
accept sockets that are associated with that same fib.
5/ Packets generated in response to some other packet (e.g. reset
or icmp packets). These should use the FIB associated with the
packet being reponded to.
6/ Packets generated during encapsulation.
gif, tun and other tunnel interfaces will encapsulate using the FIB
that was in effect withthe proces that set up the tunnel.
thus setfib 1 ifconfig gif0 [tunnel instructions]
will set the fib for the tunnel to use to be fib 1.
Routing messages would be associated with their
process, and thus select one FIB or another.
messages from the kernel would be associated with the fib they
refer to and would only be received by a routing socket associated
with that fib. (not yet implemented)
In addition Netstat has been edited to be able to cope with the
fact that the array is now 2 dimensional. (It looks in system
memory using libkvm (!)). Old versions of netstat see only the first FIB.
In addition two sysctls are added to give:
a) the number of FIBs compiled in (active)
b) the default FIB of the calling process.
Early testing experience:
-------------------------
Basically our (IronPort's) appliance does this functionality already
using ipfw fwd but that method has some drawbacks.
For example,
It can't fully simulate a routing table because it can't influence the
socket's choice of local address when a connect() is done.
Testing during the generating of these changes has been
remarkably smooth so far. Multiple tables have co-existed
with no notable side effects, and packets have been routes
accordingly.
ipfw has grown 2 new keywords:
setfib N ip from anay to any
count ip from any to any fib N
In pf there seems to be a requirement to be able to give symbolic names to the
fibs but I do not have that capacity. I am not sure if it is required.
SCTP has interestingly enough built in support for this, called VRFs
in Cisco parlance. it will be interesting to see how that handles it
when it suddenly actually does something.
Where to next:
--------------------
After committing the ABI compatible version and MFCing it, I'd
like to proceed in a forward direction in -current. this will
result in some roto-tilling in the routing code.
Firstly: the current code's idea of having a separate tree per
protocol family, all of the same format, and pointed to by the
1 dimensional array is a bit silly. Especially when one considers that
there is code that makes assumptions about every protocol having the
same internal structures there. Some protocols don't WANT that
sort of structure. (for example the whole idea of a netmask is foreign
to appletalk). This needs to be made opaque to the external code.
My suggested first change is to add routing method pointers to the
'domain' structure, along with information pointing the data.
instead of having an array of pointers to uniform structures,
there would be an array pointing to the 'domain' structures
for each protocol address domain (protocol family),
and the methods this reached would be called. The methods would have
an argument that gives FIB number, but the protocol would be free
to ignore it.
When the ABI can be changed it raises the possibilty of the
addition of a fib entry into the "struct route". Currently,
the structure contains the sockaddr of the desination, and the resulting
fib entry. To make this work fully, one could add a fib number
so that given an address and a fib, one can find the third element, the
fib entry.
Interaction with the ARP layer/ LL layer would need to be
revisited as well. Qing Li has been working on this already.
This work was sponsored by Ironport Systems/Cisco
Reviewed by: several including rwatson, bz and mlair (parts each)
Obtained from: Ironport systems/Cisco
2008-05-09 23:03:00 +00:00
|
|
|
else
|
2011-05-31 15:05:29 +00:00
|
|
|
#endif
|
This main goals of this project are:
1. separating L2 tables (ARP, NDP) from the L3 routing tables
2. removing as much locking dependencies among these layers as
possible to allow for some parallelism in the search operations
3. simplify the logic in the routing code,
The most notable end result is the obsolescent of the route
cloning (RTF_CLONING) concept, which translated into code reduction
in both IPv4 ARP and IPv6 NDP related modules, and size reduction in
struct rtentry{}. The change in design obsoletes the semantics of
RTF_CLONING, RTF_WASCLONE and RTF_LLINFO routing flags. The userland
applications such as "arp" and "ndp" have been modified to reflect
those changes. The output from "netstat -r" shows only the routing
entries.
Quite a few developers have contributed to this project in the
past: Glebius Smirnoff, Luigi Rizzo, Alessandro Cerri, and
Andre Oppermann. And most recently:
- Kip Macy revised the locking code completely, thus completing
the last piece of the puzzle, Kip has also been conducting
active functional testing
- Sam Leffler has helped me improving/refactoring the code, and
provided valuable reviews
- Julian Elischer setup the perforce tree for me and has helped
me maintaining that branch before the svn conversion
2008-12-15 06:10:57 +00:00
|
|
|
rtalloc_ign((struct route *)&ro, 0);
|
2005-05-03 16:43:32 +00:00
|
|
|
# endif
|
|
|
|
#else /* ! __FreeBSD__ */
|
|
|
|
rtalloc_noclone((struct route *)&ro, NO_CLONING);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if (ro.ro_rt != NULL) {
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
/* XXX_IMPORT: later */
|
|
|
|
#else
|
|
|
|
if (ro.ro_rt->rt_labelid == aw->v.rtlabel)
|
|
|
|
ret = 1;
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
RTFREE(ro.ro_rt);
|
|
|
|
}
|
|
|
|
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef INET
|
|
|
|
void
|
|
|
|
pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
|
2007-07-03 12:16:07 +00:00
|
|
|
struct pf_state *s, struct pf_pdesc *pd)
|
2004-02-26 02:04:28 +00:00
|
|
|
{
|
|
|
|
struct mbuf *m0, *m1;
|
|
|
|
struct route iproute;
|
2007-07-03 12:16:07 +00:00
|
|
|
struct route *ro = NULL;
|
2004-02-26 02:04:28 +00:00
|
|
|
struct sockaddr_in *dst;
|
|
|
|
struct ip *ip;
|
|
|
|
struct ifnet *ifp = NULL;
|
|
|
|
struct pf_addr naddr;
|
2004-06-16 23:24:02 +00:00
|
|
|
struct pf_src_node *sn = NULL;
|
2004-02-26 02:04:28 +00:00
|
|
|
int error = 0;
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2004-02-26 02:34:12 +00:00
|
|
|
int sw_csum;
|
|
|
|
#endif
|
2007-07-03 12:16:07 +00:00
|
|
|
#ifdef IPSEC
|
|
|
|
struct m_tag *mtag;
|
|
|
|
#endif /* IPSEC */
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
if (m == NULL || *m == NULL || r == NULL ||
|
|
|
|
(dir != PF_IN && dir != PF_OUT) || oifp == NULL)
|
|
|
|
panic("pf_route: invalid parameters");
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
2007-07-03 12:16:07 +00:00
|
|
|
if (pd->pf_mtag->routed++ > 3) {
|
2011-06-28 11:57:25 +00:00
|
|
|
#else
|
|
|
|
if ((*m)->m_pkthdr.pf.routed++ > 3) {
|
|
|
|
#endif
|
2007-07-03 12:16:07 +00:00
|
|
|
m0 = *m;
|
|
|
|
*m = NULL;
|
|
|
|
goto bad;
|
2004-07-17 17:15:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (r->rt == PF_DUPTO) {
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2004-07-17 17:15:15 +00:00
|
|
|
if ((m0 = m_dup(*m, M_DONTWAIT)) == NULL)
|
2004-02-26 02:34:12 +00:00
|
|
|
#else
|
2004-07-17 17:15:15 +00:00
|
|
|
if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL)
|
2004-02-26 02:34:12 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
return;
|
|
|
|
} else {
|
|
|
|
if ((r->rt == PF_REPLYTO) == (r->direction == dir))
|
|
|
|
return;
|
|
|
|
m0 = *m;
|
|
|
|
}
|
|
|
|
|
2005-05-03 16:43:32 +00:00
|
|
|
if (m0->m_len < sizeof(struct ip)) {
|
|
|
|
DPFPRINTF(PF_DEBUG_URGENT,
|
|
|
|
("pf_route: m0->m_len < sizeof(struct ip)\n"));
|
|
|
|
goto bad;
|
|
|
|
}
|
|
|
|
|
2004-02-26 02:04:28 +00:00
|
|
|
ip = mtod(m0, struct ip *);
|
|
|
|
|
|
|
|
ro = &iproute;
|
|
|
|
bzero((caddr_t)ro, sizeof(*ro));
|
|
|
|
dst = satosin(&ro->ro_dst);
|
|
|
|
dst->sin_family = AF_INET;
|
|
|
|
dst->sin_len = sizeof(*dst);
|
|
|
|
dst->sin_addr = ip->ip_dst;
|
|
|
|
|
|
|
|
if (r->rt == PF_FASTROUTE) {
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
Add code to allow the system to handle multiple routing tables.
This particular implementation is designed to be fully backwards compatible
and to be MFC-able to 7.x (and 6.x)
Currently the only protocol that can make use of the multiple tables is IPv4
Similar functionality exists in OpenBSD and Linux.
From my notes:
-----
One thing where FreeBSD has been falling behind, and which by chance I
have some time to work on is "policy based routing", which allows
different
packet streams to be routed by more than just the destination address.
Constraints:
------------
I want to make some form of this available in the 6.x tree
(and by extension 7.x) , but FreeBSD in general needs it so I might as
well do it in -current and back port the portions I need.
One of the ways that this can be done is to have the ability to
instantiate multiple kernel routing tables (which I will now
refer to as "Forwarding Information Bases" or "FIBs" for political
correctness reasons). Which FIB a particular packet uses to make
the next hop decision can be decided by a number of mechanisms.
The policies these mechanisms implement are the "Policies" referred
to in "Policy based routing".
One of the constraints I have if I try to back port this work to
6.x is that it must be implemented as a EXTENSION to the existing
ABIs in 6.x so that third party applications do not need to be
recompiled in timespan of the branch.
This first version will not have some of the bells and whistles that
will come with later versions. It will, for example, be limited to 16
tables in the first commit.
Implementation method, Compatible version. (part 1)
-------------------------------
For this reason I have implemented a "sufficient subset" of a
multiple routing table solution in Perforce, and back-ported it
to 6.x. (also in Perforce though not always caught up with what I
have done in -current/P4). The subset allows a number of FIBs
to be defined at compile time (8 is sufficient for my purposes in 6.x)
and implements the changes needed to allow IPV4 to use them. I have not
done the changes for ipv6 simply because I do not need it, and I do not
have enough knowledge of ipv6 (e.g. neighbor discovery) needed to do it.
Other protocol families are left untouched and should there be
users with proprietary protocol families, they should continue to work
and be oblivious to the existence of the extra FIBs.
To understand how this is done, one must know that the current FIB
code starts everything off with a single dimensional array of
pointers to FIB head structures (One per protocol family), each of
which in turn points to the trie of routes available to that family.
The basic change in the ABI compatible version of the change is to
extent that array to be a 2 dimensional array, so that
instead of protocol family X looking at rt_tables[X] for the
table it needs, it looks at rt_tables[Y][X] when for all
protocol families except ipv4 Y is always 0.
Code that is unaware of the change always just sees the first row
of the table, which of course looks just like the one dimensional
array that existed before.
The entry points rtrequest(), rtalloc(), rtalloc1(), rtalloc_ign()
are all maintained, but refer only to the first row of the array,
so that existing callers in proprietary protocols can continue to
do the "right thing".
Some new entry points are added, for the exclusive use of ipv4 code
called in_rtrequest(), in_rtalloc(), in_rtalloc1() and in_rtalloc_ign(),
which have an extra argument which refers the code to the correct row.
In addition, there are some new entry points (currently called
rtalloc_fib() and friends) that check the Address family being
looked up and call either rtalloc() (and friends) if the protocol
is not IPv4 forcing the action to row 0 or to the appropriate row
if it IS IPv4 (and that info is available). These are for calling
from code that is not specific to any particular protocol. The way
these are implemented would change in the non ABI preserving code
to be added later.
One feature of the first version of the code is that for ipv4,
the interface routes show up automatically on all the FIBs, so
that no matter what FIB you select you always have the basic
direct attached hosts available to you. (rtinit() does this
automatically).
You CAN delete an interface route from one FIB should you want
to but by default it's there. ARP information is also available
in each FIB. It's assumed that the same machine would have the
same MAC address, regardless of which FIB you are using to get
to it.
This brings us as to how the correct FIB is selected for an outgoing
IPV4 packet.
Firstly, all packets have a FIB associated with them. if nothing
has been done to change it, it will be FIB 0. The FIB is changed
in the following ways.
Packets fall into one of a number of classes.
1/ locally generated packets, coming from a socket/PCB.
Such packets select a FIB from a number associated with the
socket/PCB. This in turn is inherited from the process,
but can be changed by a socket option. The process in turn
inherits it on fork. I have written a utility call setfib
that acts a bit like nice..
setfib -3 ping target.example.com # will use fib 3 for ping.
It is an obvious extension to make it a property of a jail
but I have not done so. It can be achieved by combining the setfib and
jail commands.
2/ packets received on an interface for forwarding.
By default these packets would use table 0,
(or possibly a number settable in a sysctl(not yet)).
but prior to routing the firewall can inspect them (see below).
(possibly in the future you may be able to associate a FIB
with packets received on an interface.. An ifconfig arg, but not yet.)
3/ packets inspected by a packet classifier, which can arbitrarily
associate a fib with it on a packet by packet basis.
A fib assigned to a packet by a packet classifier
(such as ipfw) would over-ride a fib associated by
a more default source. (such as cases 1 or 2).
4/ a tcp listen socket associated with a fib will generate
accept sockets that are associated with that same fib.
5/ Packets generated in response to some other packet (e.g. reset
or icmp packets). These should use the FIB associated with the
packet being reponded to.
6/ Packets generated during encapsulation.
gif, tun and other tunnel interfaces will encapsulate using the FIB
that was in effect withthe proces that set up the tunnel.
thus setfib 1 ifconfig gif0 [tunnel instructions]
will set the fib for the tunnel to use to be fib 1.
Routing messages would be associated with their
process, and thus select one FIB or another.
messages from the kernel would be associated with the fib they
refer to and would only be received by a routing socket associated
with that fib. (not yet implemented)
In addition Netstat has been edited to be able to cope with the
fact that the array is now 2 dimensional. (It looks in system
memory using libkvm (!)). Old versions of netstat see only the first FIB.
In addition two sysctls are added to give:
a) the number of FIBs compiled in (active)
b) the default FIB of the calling process.
Early testing experience:
-------------------------
Basically our (IronPort's) appliance does this functionality already
using ipfw fwd but that method has some drawbacks.
For example,
It can't fully simulate a routing table because it can't influence the
socket's choice of local address when a connect() is done.
Testing during the generating of these changes has been
remarkably smooth so far. Multiple tables have co-existed
with no notable side effects, and packets have been routes
accordingly.
ipfw has grown 2 new keywords:
setfib N ip from anay to any
count ip from any to any fib N
In pf there seems to be a requirement to be able to give symbolic names to the
fibs but I do not have that capacity. I am not sure if it is required.
SCTP has interestingly enough built in support for this, called VRFs
in Cisco parlance. it will be interesting to see how that handles it
when it suddenly actually does something.
Where to next:
--------------------
After committing the ABI compatible version and MFCing it, I'd
like to proceed in a forward direction in -current. this will
result in some roto-tilling in the routing code.
Firstly: the current code's idea of having a separate tree per
protocol family, all of the same format, and pointed to by the
1 dimensional array is a bit silly. Especially when one considers that
there is code that makes assumptions about every protocol having the
same internal structures there. Some protocols don't WANT that
sort of structure. (for example the whole idea of a netmask is foreign
to appletalk). This needs to be made opaque to the external code.
My suggested first change is to add routing method pointers to the
'domain' structure, along with information pointing the data.
instead of having an array of pointers to uniform structures,
there would be an array pointing to the 'domain' structures
for each protocol address domain (protocol family),
and the methods this reached would be called. The methods would have
an argument that gives FIB number, but the protocol would be free
to ignore it.
When the ABI can be changed it raises the possibilty of the
addition of a fib entry into the "struct route". Currently,
the structure contains the sockaddr of the desination, and the resulting
fib entry. To make this work fully, one could add a fib number
so that given an address and a fib, one can find the third element, the
fib entry.
Interaction with the ARP layer/ LL layer would need to be
revisited as well. Qing Li has been working on this already.
This work was sponsored by Ironport Systems/Cisco
Reviewed by: several including rwatson, bz and mlair (parts each)
Obtained from: Ironport systems/Cisco
2008-05-09 23:03:00 +00:00
|
|
|
in_rtalloc(ro, 0);
|
2011-06-28 11:57:25 +00:00
|
|
|
#else
|
|
|
|
rtalloc(ro);
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
if (ro->ro_rt == 0) {
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
2009-08-02 19:43:32 +00:00
|
|
|
KMOD_IPSTAT_INC(ips_noroute);
|
2011-06-28 11:57:25 +00:00
|
|
|
#else
|
|
|
|
ipstat.ips_noroute++;
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
goto bad;
|
|
|
|
}
|
|
|
|
|
|
|
|
ifp = ro->ro_rt->rt_ifp;
|
|
|
|
ro->ro_rt->rt_use++;
|
|
|
|
|
|
|
|
if (ro->ro_rt->rt_flags & RTF_GATEWAY)
|
|
|
|
dst = satosin(ro->ro_rt->rt_gateway);
|
|
|
|
} else {
|
2005-05-03 16:43:32 +00:00
|
|
|
if (TAILQ_EMPTY(&r->rpool.list)) {
|
|
|
|
DPFPRINTF(PF_DEBUG_URGENT,
|
|
|
|
("pf_route: TAILQ_EMPTY(&r->rpool.list)\n"));
|
|
|
|
goto bad;
|
|
|
|
}
|
2004-02-26 02:04:28 +00:00
|
|
|
if (s == NULL) {
|
2004-06-16 23:24:02 +00:00
|
|
|
pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src,
|
|
|
|
&naddr, NULL, &sn);
|
2004-02-26 02:04:28 +00:00
|
|
|
if (!PF_AZERO(&naddr, AF_INET))
|
|
|
|
dst->sin_addr.s_addr = naddr.v4.s_addr;
|
2004-06-16 23:24:02 +00:00
|
|
|
ifp = r->rpool.cur->kif ?
|
|
|
|
r->rpool.cur->kif->pfik_ifp : NULL;
|
2004-02-26 02:04:28 +00:00
|
|
|
} else {
|
|
|
|
if (!PF_AZERO(&s->rt_addr, AF_INET))
|
|
|
|
dst->sin_addr.s_addr =
|
|
|
|
s->rt_addr.v4.s_addr;
|
2004-06-16 23:24:02 +00:00
|
|
|
ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if (ifp == NULL)
|
|
|
|
goto bad;
|
|
|
|
|
2004-06-17 16:59:47 +00:00
|
|
|
if (oifp != ifp) {
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2004-02-26 02:34:12 +00:00
|
|
|
PF_UNLOCK();
|
2005-05-03 16:43:32 +00:00
|
|
|
if (pf_test(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) {
|
2004-02-26 02:34:12 +00:00
|
|
|
PF_LOCK();
|
|
|
|
goto bad;
|
|
|
|
} else if (m0 == NULL) {
|
|
|
|
PF_LOCK();
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
PF_LOCK();
|
|
|
|
#else
|
2005-05-03 16:43:32 +00:00
|
|
|
if (pf_test(PF_OUT, ifp, &m0, NULL) != PF_PASS)
|
2004-02-26 02:04:28 +00:00
|
|
|
goto bad;
|
|
|
|
else if (m0 == NULL)
|
|
|
|
goto done;
|
2004-02-26 02:34:12 +00:00
|
|
|
#endif
|
2005-05-03 16:43:32 +00:00
|
|
|
if (m0->m_len < sizeof(struct ip)) {
|
|
|
|
DPFPRINTF(PF_DEBUG_URGENT,
|
|
|
|
("pf_route: m0->m_len < sizeof(struct ip)\n"));
|
|
|
|
goto bad;
|
|
|
|
}
|
2004-02-26 02:04:28 +00:00
|
|
|
ip = mtod(m0, struct ip *);
|
|
|
|
}
|
|
|
|
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2004-02-26 02:34:12 +00:00
|
|
|
/* Copied from FreeBSD 5.1-CURRENT ip_output. */
|
|
|
|
m0->m_pkthdr.csum_flags |= CSUM_IP;
|
|
|
|
sw_csum = m0->m_pkthdr.csum_flags & ~ifp->if_hwassist;
|
|
|
|
if (sw_csum & CSUM_DELAY_DATA) {
|
|
|
|
/*
|
|
|
|
* XXX: in_delayed_cksum assumes HBO for ip->ip_len (at least)
|
|
|
|
*/
|
|
|
|
NTOHS(ip->ip_len);
|
2011-06-28 11:57:25 +00:00
|
|
|
NTOHS(ip->ip_off); /* XXX: needed? */
|
2004-02-26 02:34:12 +00:00
|
|
|
in_delayed_cksum(m0);
|
|
|
|
HTONS(ip->ip_len);
|
|
|
|
HTONS(ip->ip_off);
|
|
|
|
sw_csum &= ~CSUM_DELAY_DATA;
|
|
|
|
}
|
|
|
|
m0->m_pkthdr.csum_flags &= ifp->if_hwassist;
|
|
|
|
|
|
|
|
if (ntohs(ip->ip_len) <= ifp->if_mtu ||
|
|
|
|
(ifp->if_hwassist & CSUM_FRAGMENT &&
|
2011-06-28 11:57:25 +00:00
|
|
|
((ip->ip_off & htons(IP_DF)) == 0))) {
|
2004-02-26 02:34:12 +00:00
|
|
|
/*
|
|
|
|
* ip->ip_len = htons(ip->ip_len);
|
|
|
|
* ip->ip_off = htons(ip->ip_off);
|
|
|
|
*/
|
|
|
|
ip->ip_sum = 0;
|
|
|
|
if (sw_csum & CSUM_DELAY_IP) {
|
|
|
|
/* From KAME */
|
|
|
|
if (ip->ip_v == IPVERSION &&
|
|
|
|
(ip->ip_hl << 2) == sizeof(*ip)) {
|
|
|
|
ip->ip_sum = in_cksum_hdr(ip);
|
|
|
|
} else {
|
|
|
|
ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
PF_UNLOCK();
|
2009-04-16 20:30:28 +00:00
|
|
|
error = (*ifp->if_output)(ifp, m0, sintosa(dst), ro);
|
2004-02-26 02:34:12 +00:00
|
|
|
PF_LOCK();
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
#else
|
2004-02-26 02:04:28 +00:00
|
|
|
/* Copied from ip_output. */
|
2004-06-16 23:24:02 +00:00
|
|
|
#ifdef IPSEC
|
|
|
|
/*
|
|
|
|
* If deferred crypto processing is needed, check that the
|
|
|
|
* interface supports it.
|
|
|
|
*/
|
|
|
|
if ((mtag = m_tag_find(m0, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL))
|
|
|
|
!= NULL && (ifp->if_capabilities & IFCAP_IPSEC) == 0) {
|
|
|
|
/* Notify IPsec to do its own crypto. */
|
|
|
|
ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1));
|
|
|
|
goto bad;
|
|
|
|
}
|
|
|
|
#endif /* IPSEC */
|
|
|
|
|
|
|
|
/* Catch routing changes wrt. hardware checksumming for TCP or UDP. */
|
2007-07-03 12:16:07 +00:00
|
|
|
if (m0->m_pkthdr.csum_flags & M_TCPV4_CSUM_OUT) {
|
2004-06-16 23:24:02 +00:00
|
|
|
if (!(ifp->if_capabilities & IFCAP_CSUM_TCPv4) ||
|
|
|
|
ifp->if_bridge != NULL) {
|
|
|
|
in_delayed_cksum(m0);
|
2011-06-28 11:57:25 +00:00
|
|
|
m0->m_pkthdr.csum_flags &= ~M_TCPV4_CSUM_OUT; /* Clr */
|
2004-06-16 23:24:02 +00:00
|
|
|
}
|
2007-07-03 12:16:07 +00:00
|
|
|
} else if (m0->m_pkthdr.csum_flags & M_UDPV4_CSUM_OUT) {
|
2004-06-16 23:24:02 +00:00
|
|
|
if (!(ifp->if_capabilities & IFCAP_CSUM_UDPv4) ||
|
|
|
|
ifp->if_bridge != NULL) {
|
|
|
|
in_delayed_cksum(m0);
|
2011-06-28 11:57:25 +00:00
|
|
|
m0->m_pkthdr.csum_flags &= ~M_UDPV4_CSUM_OUT; /* Clr */
|
2004-06-16 23:24:02 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2004-02-26 02:04:28 +00:00
|
|
|
if (ntohs(ip->ip_len) <= ifp->if_mtu) {
|
2011-06-28 11:57:25 +00:00
|
|
|
ip->ip_sum = 0;
|
2004-02-26 02:04:28 +00:00
|
|
|
if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
|
|
|
|
ifp->if_bridge == NULL) {
|
2007-07-03 12:16:07 +00:00
|
|
|
m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT;
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
2009-08-02 19:43:32 +00:00
|
|
|
KMOD_IPSTAT_INC(ips_outhwcsum);
|
2011-06-28 11:57:25 +00:00
|
|
|
#else
|
|
|
|
ipstat.ips_outhwcsum++;
|
|
|
|
#endif
|
|
|
|
} else
|
2004-02-26 02:04:28 +00:00
|
|
|
ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
|
|
|
|
/* Update relevant hardware checksum stats for TCP/UDP */
|
2007-07-03 12:16:07 +00:00
|
|
|
if (m0->m_pkthdr.csum_flags & M_TCPV4_CSUM_OUT)
|
2009-08-02 19:43:32 +00:00
|
|
|
KMOD_TCPSTAT_INC(tcps_outhwcsum);
|
2007-07-03 12:16:07 +00:00
|
|
|
else if (m0->m_pkthdr.csum_flags & M_UDPV4_CSUM_OUT)
|
2009-08-02 19:43:32 +00:00
|
|
|
KMOD_UDPSTAT_INC(udps_outhwcsum);
|
2004-02-26 02:04:28 +00:00
|
|
|
error = (*ifp->if_output)(ifp, m0, sintosa(dst), NULL);
|
|
|
|
goto done;
|
|
|
|
}
|
2004-02-26 02:34:12 +00:00
|
|
|
#endif
|
2011-06-28 11:57:25 +00:00
|
|
|
|
2004-02-26 02:04:28 +00:00
|
|
|
/*
|
|
|
|
* Too large for interface; fragment if possible.
|
|
|
|
* Must be able to put at least 8 bytes per fragment.
|
|
|
|
*/
|
2011-06-28 11:57:25 +00:00
|
|
|
if (ip->ip_off & htons(IP_DF)) {
|
|
|
|
#ifdef __FreeBSD__
|
2009-08-02 19:43:32 +00:00
|
|
|
KMOD_IPSTAT_INC(ips_cantfrag);
|
2011-06-28 11:57:25 +00:00
|
|
|
#else
|
|
|
|
ipstat.ips_cantfrag++;
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
if (r->rt != PF_DUPTO) {
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2004-02-26 02:34:12 +00:00
|
|
|
/* icmp_error() expects host byte ordering */
|
|
|
|
NTOHS(ip->ip_len);
|
|
|
|
NTOHS(ip->ip_off);
|
|
|
|
PF_UNLOCK();
|
2004-02-26 02:04:28 +00:00
|
|
|
icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
|
2005-05-04 19:51:06 +00:00
|
|
|
ifp->if_mtu);
|
2004-02-26 02:34:12 +00:00
|
|
|
PF_LOCK();
|
2005-05-04 15:55:29 +00:00
|
|
|
#else
|
|
|
|
icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
|
2007-07-03 12:16:07 +00:00
|
|
|
ifp->if_mtu);
|
2004-02-26 02:34:12 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
goto done;
|
|
|
|
} else
|
|
|
|
goto bad;
|
|
|
|
}
|
|
|
|
|
|
|
|
m1 = m0;
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2004-02-26 02:34:12 +00:00
|
|
|
/*
|
|
|
|
* XXX: is cheaper + less error prone than own function
|
|
|
|
*/
|
|
|
|
NTOHS(ip->ip_len);
|
|
|
|
NTOHS(ip->ip_off);
|
|
|
|
error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist, sw_csum);
|
|
|
|
#else
|
2004-02-26 02:04:28 +00:00
|
|
|
error = ip_fragment(m0, ifp, ifp->if_mtu);
|
2004-02-26 02:34:12 +00:00
|
|
|
#endif
|
2004-03-28 20:04:12 +00:00
|
|
|
if (error) {
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifndef __FreeBSD__ /* ip_fragment does not do m_freem() on FreeBSD */
|
2004-03-28 20:04:12 +00:00
|
|
|
m0 = NULL;
|
2004-02-26 02:34:12 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
goto bad;
|
2004-03-28 20:04:12 +00:00
|
|
|
}
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
for (m0 = m1; m0; m0 = m1) {
|
|
|
|
m1 = m0->m_nextpkt;
|
|
|
|
m0->m_nextpkt = 0;
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2004-02-26 02:34:12 +00:00
|
|
|
if (error == 0) {
|
|
|
|
PF_UNLOCK();
|
|
|
|
error = (*ifp->if_output)(ifp, m0, sintosa(dst),
|
|
|
|
NULL);
|
|
|
|
PF_LOCK();
|
|
|
|
} else
|
|
|
|
#else
|
2004-02-26 02:04:28 +00:00
|
|
|
if (error == 0)
|
|
|
|
error = (*ifp->if_output)(ifp, m0, sintosa(dst),
|
|
|
|
NULL);
|
|
|
|
else
|
2004-02-26 02:34:12 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
m_freem(m0);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (error == 0)
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
2009-08-02 19:43:32 +00:00
|
|
|
KMOD_IPSTAT_INC(ips_fragmented);
|
2011-06-28 11:57:25 +00:00
|
|
|
#else
|
|
|
|
ipstat.ips_fragmented++;
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
done:
|
|
|
|
if (r->rt != PF_DUPTO)
|
|
|
|
*m = NULL;
|
|
|
|
if (ro == &iproute && ro->ro_rt)
|
|
|
|
RTFREE(ro->ro_rt);
|
|
|
|
return;
|
|
|
|
|
|
|
|
bad:
|
|
|
|
m_freem(m0);
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
#endif /* INET */
|
|
|
|
|
|
|
|
#ifdef INET6
|
|
|
|
void
|
|
|
|
pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
|
2007-07-03 12:16:07 +00:00
|
|
|
struct pf_state *s, struct pf_pdesc *pd)
|
2004-02-26 02:04:28 +00:00
|
|
|
{
|
|
|
|
struct mbuf *m0;
|
|
|
|
struct route_in6 ip6route;
|
|
|
|
struct route_in6 *ro;
|
|
|
|
struct sockaddr_in6 *dst;
|
|
|
|
struct ip6_hdr *ip6;
|
|
|
|
struct ifnet *ifp = NULL;
|
|
|
|
struct pf_addr naddr;
|
2004-06-16 23:24:02 +00:00
|
|
|
struct pf_src_node *sn = NULL;
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
if (m == NULL || *m == NULL || r == NULL ||
|
|
|
|
(dir != PF_IN && dir != PF_OUT) || oifp == NULL)
|
|
|
|
panic("pf_route6: invalid parameters");
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
2007-07-03 12:16:07 +00:00
|
|
|
if (pd->pf_mtag->routed++ > 3) {
|
2011-06-28 11:57:25 +00:00
|
|
|
#else
|
|
|
|
if ((*m)->m_pkthdr.pf.routed++ > 3) {
|
|
|
|
#endif
|
2007-07-03 12:16:07 +00:00
|
|
|
m0 = *m;
|
|
|
|
*m = NULL;
|
|
|
|
goto bad;
|
2004-07-17 17:15:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (r->rt == PF_DUPTO) {
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2004-07-17 17:15:15 +00:00
|
|
|
if ((m0 = m_dup(*m, M_DONTWAIT)) == NULL)
|
2004-02-26 02:34:12 +00:00
|
|
|
#else
|
2004-07-17 17:15:15 +00:00
|
|
|
if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL)
|
2004-02-26 02:34:12 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
return;
|
|
|
|
} else {
|
|
|
|
if ((r->rt == PF_REPLYTO) == (r->direction == dir))
|
|
|
|
return;
|
|
|
|
m0 = *m;
|
|
|
|
}
|
|
|
|
|
2005-05-03 16:43:32 +00:00
|
|
|
if (m0->m_len < sizeof(struct ip6_hdr)) {
|
|
|
|
DPFPRINTF(PF_DEBUG_URGENT,
|
|
|
|
("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n"));
|
|
|
|
goto bad;
|
|
|
|
}
|
2004-02-26 02:04:28 +00:00
|
|
|
ip6 = mtod(m0, struct ip6_hdr *);
|
|
|
|
|
|
|
|
ro = &ip6route;
|
|
|
|
bzero((caddr_t)ro, sizeof(*ro));
|
|
|
|
dst = (struct sockaddr_in6 *)&ro->ro_dst;
|
|
|
|
dst->sin6_family = AF_INET6;
|
|
|
|
dst->sin6_len = sizeof(*dst);
|
|
|
|
dst->sin6_addr = ip6->ip6_dst;
|
|
|
|
|
2007-07-03 12:16:07 +00:00
|
|
|
/* Cheat. XXX why only in the v6 case??? */
|
2004-02-26 02:04:28 +00:00
|
|
|
if (r->rt == PF_FASTROUTE) {
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2004-07-17 05:10:06 +00:00
|
|
|
m0->m_flags |= M_SKIP_FIREWALL;
|
2004-02-26 02:34:12 +00:00
|
|
|
PF_UNLOCK();
|
|
|
|
ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
|
|
|
|
#else
|
2011-06-28 11:57:25 +00:00
|
|
|
m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
|
|
|
|
ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
|
2004-02-26 02:34:12 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2005-05-03 16:43:32 +00:00
|
|
|
if (TAILQ_EMPTY(&r->rpool.list)) {
|
|
|
|
DPFPRINTF(PF_DEBUG_URGENT,
|
|
|
|
("pf_route6: TAILQ_EMPTY(&r->rpool.list)\n"));
|
|
|
|
goto bad;
|
|
|
|
}
|
2004-02-26 02:04:28 +00:00
|
|
|
if (s == NULL) {
|
2004-06-16 23:24:02 +00:00
|
|
|
pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src,
|
|
|
|
&naddr, NULL, &sn);
|
2004-02-26 02:04:28 +00:00
|
|
|
if (!PF_AZERO(&naddr, AF_INET6))
|
|
|
|
PF_ACPY((struct pf_addr *)&dst->sin6_addr,
|
|
|
|
&naddr, AF_INET6);
|
2004-06-16 23:24:02 +00:00
|
|
|
ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL;
|
2004-02-26 02:04:28 +00:00
|
|
|
} else {
|
|
|
|
if (!PF_AZERO(&s->rt_addr, AF_INET6))
|
|
|
|
PF_ACPY((struct pf_addr *)&dst->sin6_addr,
|
|
|
|
&s->rt_addr, AF_INET6);
|
2004-06-16 23:24:02 +00:00
|
|
|
ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
|
|
|
if (ifp == NULL)
|
|
|
|
goto bad;
|
|
|
|
|
|
|
|
if (oifp != ifp) {
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2004-07-17 17:15:15 +00:00
|
|
|
PF_UNLOCK();
|
2005-05-03 16:43:32 +00:00
|
|
|
if (pf_test6(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) {
|
2004-07-17 17:15:15 +00:00
|
|
|
PF_LOCK();
|
|
|
|
goto bad;
|
|
|
|
} else if (m0 == NULL) {
|
2004-02-26 02:34:12 +00:00
|
|
|
PF_LOCK();
|
2004-07-17 17:15:15 +00:00
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
PF_LOCK();
|
2004-02-26 02:34:12 +00:00
|
|
|
#else
|
2005-05-03 16:43:32 +00:00
|
|
|
if (pf_test6(PF_OUT, ifp, &m0, NULL) != PF_PASS)
|
2004-07-17 17:15:15 +00:00
|
|
|
goto bad;
|
|
|
|
else if (m0 == NULL)
|
|
|
|
goto done;
|
2004-02-26 02:34:12 +00:00
|
|
|
#endif
|
2005-05-03 16:43:32 +00:00
|
|
|
if (m0->m_len < sizeof(struct ip6_hdr)) {
|
|
|
|
DPFPRINTF(PF_DEBUG_URGENT,
|
|
|
|
("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n"));
|
|
|
|
goto bad;
|
|
|
|
}
|
2004-07-17 17:15:15 +00:00
|
|
|
ip6 = mtod(m0, struct ip6_hdr *);
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the packet is too large for the outgoing interface,
|
|
|
|
* send back an icmp6 error.
|
|
|
|
*/
|
2007-07-03 12:16:07 +00:00
|
|
|
if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr))
|
2004-02-26 02:04:28 +00:00
|
|
|
dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index);
|
|
|
|
if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) {
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2004-02-26 02:34:12 +00:00
|
|
|
PF_UNLOCK();
|
|
|
|
#endif
|
2011-06-28 11:57:25 +00:00
|
|
|
nd6_output(ifp, ifp, m0, dst, NULL);
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2004-02-26 02:34:12 +00:00
|
|
|
PF_LOCK();
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
} else {
|
|
|
|
in6_ifstat_inc(ifp, ifs6_in_toobig);
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2004-02-26 02:34:12 +00:00
|
|
|
if (r->rt != PF_DUPTO) {
|
|
|
|
PF_UNLOCK();
|
|
|
|
icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
|
|
|
|
PF_LOCK();
|
2011-06-28 11:57:25 +00:00
|
|
|
} else
|
2004-02-26 02:34:12 +00:00
|
|
|
#else
|
2004-02-26 02:04:28 +00:00
|
|
|
if (r->rt != PF_DUPTO)
|
|
|
|
icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
|
|
|
|
else
|
2004-02-26 02:34:12 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
goto bad;
|
|
|
|
}
|
|
|
|
|
|
|
|
done:
|
|
|
|
if (r->rt != PF_DUPTO)
|
|
|
|
*m = NULL;
|
|
|
|
return;
|
|
|
|
|
|
|
|
bad:
|
|
|
|
m_freem(m0);
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
#endif /* INET6 */
|
|
|
|
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2004-02-26 02:34:12 +00:00
|
|
|
/*
|
2004-07-23 03:31:42 +00:00
|
|
|
* FreeBSD supports cksum offloads for the following drivers.
|
2004-11-08 20:24:52 +00:00
|
|
|
* em(4), fxp(4), ixgb(4), lge(4), ndis(4), nge(4), re(4),
|
2004-07-23 03:31:42 +00:00
|
|
|
* ti(4), txp(4), xl(4)
|
|
|
|
*
|
|
|
|
* CSUM_DATA_VALID | CSUM_PSEUDO_HDR :
|
|
|
|
* network driver performed cksum including pseudo header, need to verify
|
|
|
|
* csum_data
|
|
|
|
* CSUM_DATA_VALID :
|
|
|
|
* network driver performed cksum, needs to additional pseudo header
|
|
|
|
* cksum computation with partial csum_data(i.e. lack of H/W support for
|
|
|
|
* pseudo header, for instance hme(4), sk(4) and possibly gem(4))
|
|
|
|
*
|
|
|
|
* After validating the cksum of packet, set both flag CSUM_DATA_VALID and
|
|
|
|
* CSUM_PSEUDO_HDR in order to avoid recomputation of the cksum in upper
|
|
|
|
* TCP/UDP layer.
|
|
|
|
* Also, set csum_data to 0xffff to force cksum validation.
|
2004-02-26 02:34:12 +00:00
|
|
|
*/
|
|
|
|
int
|
|
|
|
pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af)
|
|
|
|
{
|
|
|
|
u_int16_t sum = 0;
|
|
|
|
int hw_assist = 0;
|
|
|
|
struct ip *ip;
|
|
|
|
|
|
|
|
if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
|
|
|
|
return (1);
|
|
|
|
if (m->m_pkthdr.len < off + len)
|
|
|
|
return (1);
|
|
|
|
|
|
|
|
switch (p) {
|
|
|
|
case IPPROTO_TCP:
|
|
|
|
if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
|
|
|
|
if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
|
|
|
|
sum = m->m_pkthdr.csum_data;
|
|
|
|
} else {
|
2011-06-28 11:57:25 +00:00
|
|
|
ip = mtod(m, struct ip *);
|
2004-02-26 02:34:12 +00:00
|
|
|
sum = in_pseudo(ip->ip_src.s_addr,
|
2011-06-28 11:57:25 +00:00
|
|
|
ip->ip_dst.s_addr, htonl((u_short)len +
|
|
|
|
m->m_pkthdr.csum_data + IPPROTO_TCP));
|
2004-02-26 02:34:12 +00:00
|
|
|
}
|
|
|
|
sum ^= 0xffff;
|
|
|
|
++hw_assist;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case IPPROTO_UDP:
|
|
|
|
if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
|
|
|
|
if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
|
|
|
|
sum = m->m_pkthdr.csum_data;
|
|
|
|
} else {
|
2011-06-28 11:57:25 +00:00
|
|
|
ip = mtod(m, struct ip *);
|
2004-02-26 02:34:12 +00:00
|
|
|
sum = in_pseudo(ip->ip_src.s_addr,
|
2011-06-28 11:57:25 +00:00
|
|
|
ip->ip_dst.s_addr, htonl((u_short)len +
|
|
|
|
m->m_pkthdr.csum_data + IPPROTO_UDP));
|
2004-02-26 02:34:12 +00:00
|
|
|
}
|
|
|
|
sum ^= 0xffff;
|
|
|
|
++hw_assist;
|
2011-06-28 11:57:25 +00:00
|
|
|
}
|
2004-02-26 02:34:12 +00:00
|
|
|
break;
|
|
|
|
case IPPROTO_ICMP:
|
|
|
|
#ifdef INET6
|
|
|
|
case IPPROTO_ICMPV6:
|
|
|
|
#endif /* INET6 */
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!hw_assist) {
|
|
|
|
switch (af) {
|
|
|
|
case AF_INET:
|
|
|
|
if (p == IPPROTO_ICMP) {
|
|
|
|
if (m->m_len < off)
|
|
|
|
return (1);
|
|
|
|
m->m_data += off;
|
|
|
|
m->m_len -= off;
|
|
|
|
sum = in_cksum(m, len);
|
|
|
|
m->m_data -= off;
|
|
|
|
m->m_len += off;
|
|
|
|
} else {
|
|
|
|
if (m->m_len < sizeof(struct ip))
|
|
|
|
return (1);
|
|
|
|
sum = in4_cksum(m, p, off, len);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
|
|
|
if (m->m_len < sizeof(struct ip6_hdr))
|
|
|
|
return (1);
|
|
|
|
sum = in6_cksum(m, p, off, len);
|
|
|
|
break;
|
|
|
|
#endif /* INET6 */
|
|
|
|
default:
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (sum) {
|
|
|
|
switch (p) {
|
|
|
|
case IPPROTO_TCP:
|
Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit
Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.
Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().
Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).
All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).
(*) netipsec/keysock.c did not validate depending on compile time options.
Implemented by: julian, bz, brooks, zec
Reviewed by: julian, bz, brooks, kris, rwatson, ...
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00
|
|
|
{
|
2009-08-02 19:43:32 +00:00
|
|
|
KMOD_TCPSTAT_INC(tcps_rcvbadsum);
|
2004-02-26 02:34:12 +00:00
|
|
|
break;
|
Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit
Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.
Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().
Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).
All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).
(*) netipsec/keysock.c did not validate depending on compile time options.
Implemented by: julian, bz, brooks, zec
Reviewed by: julian, bz, brooks, kris, rwatson, ...
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00
|
|
|
}
|
2004-02-26 02:34:12 +00:00
|
|
|
case IPPROTO_UDP:
|
Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit
Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.
Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().
Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).
All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).
(*) netipsec/keysock.c did not validate depending on compile time options.
Implemented by: julian, bz, brooks, zec
Reviewed by: julian, bz, brooks, kris, rwatson, ...
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00
|
|
|
{
|
2009-08-02 19:43:32 +00:00
|
|
|
KMOD_UDPSTAT_INC(udps_badsum);
|
2004-02-26 02:34:12 +00:00
|
|
|
break;
|
Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit
Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.
Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().
Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).
All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).
(*) netipsec/keysock.c did not validate depending on compile time options.
Implemented by: julian, bz, brooks, zec
Reviewed by: julian, bz, brooks, kris, rwatson, ...
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00
|
|
|
}
|
2011-05-31 15:05:29 +00:00
|
|
|
#ifdef INET
|
2004-02-26 02:34:12 +00:00
|
|
|
case IPPROTO_ICMP:
|
Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit
Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.
Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().
Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).
All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).
(*) netipsec/keysock.c did not validate depending on compile time options.
Implemented by: julian, bz, brooks, zec
Reviewed by: julian, bz, brooks, kris, rwatson, ...
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00
|
|
|
{
|
2009-08-02 19:43:32 +00:00
|
|
|
KMOD_ICMPSTAT_INC(icps_checksum);
|
2004-02-26 02:34:12 +00:00
|
|
|
break;
|
Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit
Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.
Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().
Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).
All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).
(*) netipsec/keysock.c did not validate depending on compile time options.
Implemented by: julian, bz, brooks, zec
Reviewed by: julian, bz, brooks, kris, rwatson, ...
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00
|
|
|
}
|
2011-05-31 15:05:29 +00:00
|
|
|
#endif
|
2004-02-26 02:34:12 +00:00
|
|
|
#ifdef INET6
|
|
|
|
case IPPROTO_ICMPV6:
|
Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit
Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.
Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().
Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).
All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).
(*) netipsec/keysock.c did not validate depending on compile time options.
Implemented by: julian, bz, brooks, zec
Reviewed by: julian, bz, brooks, kris, rwatson, ...
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00
|
|
|
{
|
2009-08-02 19:43:32 +00:00
|
|
|
KMOD_ICMP6STAT_INC(icp6s_checksum);
|
2004-02-26 02:34:12 +00:00
|
|
|
break;
|
Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit
Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.
Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().
Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).
All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).
(*) netipsec/keysock.c did not validate depending on compile time options.
Implemented by: julian, bz, brooks, zec
Reviewed by: julian, bz, brooks, kris, rwatson, ...
Approved by: julian (mentor)
Obtained from: //depot/projects/vimage-commit2/...
X-MFC after: never
Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00
|
|
|
}
|
2004-02-26 02:34:12 +00:00
|
|
|
#endif /* INET6 */
|
|
|
|
}
|
|
|
|
return (1);
|
2004-07-23 03:31:42 +00:00
|
|
|
} else {
|
|
|
|
if (p == IPPROTO_TCP || p == IPPROTO_UDP) {
|
|
|
|
m->m_pkthdr.csum_flags |=
|
|
|
|
(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
|
|
|
|
m->m_pkthdr.csum_data = 0xffff;
|
|
|
|
}
|
2004-02-26 02:34:12 +00:00
|
|
|
}
|
|
|
|
return (0);
|
|
|
|
}
|
2007-07-03 12:16:07 +00:00
|
|
|
#else /* !__FreeBSD__ */
|
2011-06-28 11:57:25 +00:00
|
|
|
|
2004-02-26 02:04:28 +00:00
|
|
|
/*
|
|
|
|
* check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag
|
|
|
|
* off is the offset where the protocol header starts
|
|
|
|
* len is the total length of protocol header plus payload
|
|
|
|
* returns 0 when the checksum is valid, otherwise returns 1.
|
|
|
|
*/
|
|
|
|
int
|
2004-06-16 23:24:02 +00:00
|
|
|
pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p,
|
|
|
|
sa_family_t af)
|
2004-02-26 02:04:28 +00:00
|
|
|
{
|
|
|
|
u_int16_t flag_ok, flag_bad;
|
|
|
|
u_int16_t sum;
|
|
|
|
|
|
|
|
switch (p) {
|
|
|
|
case IPPROTO_TCP:
|
|
|
|
flag_ok = M_TCP_CSUM_IN_OK;
|
|
|
|
flag_bad = M_TCP_CSUM_IN_BAD;
|
|
|
|
break;
|
|
|
|
case IPPROTO_UDP:
|
|
|
|
flag_ok = M_UDP_CSUM_IN_OK;
|
|
|
|
flag_bad = M_UDP_CSUM_IN_BAD;
|
|
|
|
break;
|
|
|
|
case IPPROTO_ICMP:
|
|
|
|
#ifdef INET6
|
|
|
|
case IPPROTO_ICMPV6:
|
|
|
|
#endif /* INET6 */
|
|
|
|
flag_ok = flag_bad = 0;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return (1);
|
|
|
|
}
|
2007-07-03 12:16:07 +00:00
|
|
|
if (m->m_pkthdr.csum_flags & flag_ok)
|
2004-02-26 02:04:28 +00:00
|
|
|
return (0);
|
2007-07-03 12:16:07 +00:00
|
|
|
if (m->m_pkthdr.csum_flags & flag_bad)
|
2004-02-26 02:04:28 +00:00
|
|
|
return (1);
|
|
|
|
if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
|
|
|
|
return (1);
|
|
|
|
if (m->m_pkthdr.len < off + len)
|
|
|
|
return (1);
|
2005-05-03 16:43:32 +00:00
|
|
|
switch (af) {
|
|
|
|
#ifdef INET
|
2004-02-26 02:04:28 +00:00
|
|
|
case AF_INET:
|
|
|
|
if (p == IPPROTO_ICMP) {
|
|
|
|
if (m->m_len < off)
|
|
|
|
return (1);
|
|
|
|
m->m_data += off;
|
|
|
|
m->m_len -= off;
|
|
|
|
sum = in_cksum(m, len);
|
|
|
|
m->m_data -= off;
|
|
|
|
m->m_len += off;
|
|
|
|
} else {
|
|
|
|
if (m->m_len < sizeof(struct ip))
|
|
|
|
return (1);
|
|
|
|
sum = in4_cksum(m, p, off, len);
|
|
|
|
}
|
|
|
|
break;
|
2005-05-03 16:43:32 +00:00
|
|
|
#endif /* INET */
|
2004-02-26 02:04:28 +00:00
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
|
|
|
if (m->m_len < sizeof(struct ip6_hdr))
|
|
|
|
return (1);
|
|
|
|
sum = in6_cksum(m, p, off, len);
|
|
|
|
break;
|
|
|
|
#endif /* INET6 */
|
|
|
|
default:
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
if (sum) {
|
2007-07-03 12:16:07 +00:00
|
|
|
m->m_pkthdr.csum_flags |= flag_bad;
|
2004-02-26 02:04:28 +00:00
|
|
|
switch (p) {
|
|
|
|
case IPPROTO_TCP:
|
2009-08-02 19:43:32 +00:00
|
|
|
KMOD_TCPSTAT_INC(tcps_rcvbadsum);
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
|
|
|
case IPPROTO_UDP:
|
2009-08-02 19:43:32 +00:00
|
|
|
KMOD_UDPSTAT_INC(udps_badsum);
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
2011-05-31 15:05:29 +00:00
|
|
|
#ifdef INET
|
2004-02-26 02:04:28 +00:00
|
|
|
case IPPROTO_ICMP:
|
2009-08-02 19:43:32 +00:00
|
|
|
KMOD_ICMPSTAT_INC(icps_checksum);
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
2011-05-31 15:05:29 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
#ifdef INET6
|
|
|
|
case IPPROTO_ICMPV6:
|
2009-08-02 19:43:32 +00:00
|
|
|
KMOD_ICMP6STAT_INC(icp6s_checksum);
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
|
|
|
#endif /* INET6 */
|
|
|
|
}
|
|
|
|
return (1);
|
|
|
|
}
|
2007-07-03 12:16:07 +00:00
|
|
|
m->m_pkthdr.csum_flags |= flag_ok;
|
2004-06-16 23:24:02 +00:00
|
|
|
return (0);
|
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef __FreeBSD__
|
|
|
|
struct pf_divert *
|
|
|
|
pf_find_divert(struct mbuf *m)
|
|
|
|
{
|
|
|
|
struct m_tag *mtag;
|
|
|
|
|
|
|
|
if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL)
|
|
|
|
return (NULL);
|
|
|
|
|
|
|
|
return ((struct pf_divert *)(mtag + 1));
|
|
|
|
}
|
|
|
|
|
|
|
|
struct pf_divert *
|
|
|
|
pf_get_divert(struct mbuf *m)
|
|
|
|
{
|
|
|
|
struct m_tag *mtag;
|
|
|
|
|
|
|
|
if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) {
|
|
|
|
mtag = m_tag_get(PACKET_TAG_PF_DIVERT, sizeof(struct pf_divert),
|
|
|
|
M_NOWAIT);
|
|
|
|
if (mtag == NULL)
|
|
|
|
return (NULL);
|
|
|
|
bzero(mtag + 1, sizeof(struct pf_divert));
|
|
|
|
m_tag_prepend(m, mtag);
|
|
|
|
}
|
|
|
|
|
|
|
|
return ((struct pf_divert *)(mtag + 1));
|
|
|
|
}
|
|
|
|
#endif
|
2004-06-16 23:24:02 +00:00
|
|
|
|
2004-02-26 02:04:28 +00:00
|
|
|
#ifdef INET
|
|
|
|
int
|
2004-09-29 04:54:33 +00:00
|
|
|
#ifdef __FreeBSD__
|
2005-05-03 16:43:32 +00:00
|
|
|
pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
|
|
|
|
struct ether_header *eh, struct inpcb *inp)
|
2004-09-29 04:54:33 +00:00
|
|
|
#else
|
2005-05-03 16:43:32 +00:00
|
|
|
pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
|
|
|
|
struct ether_header *eh)
|
2004-09-29 04:54:33 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
{
|
2004-06-16 23:24:02 +00:00
|
|
|
struct pfi_kif *kif;
|
|
|
|
u_short action, reason = 0, log = 0;
|
|
|
|
struct mbuf *m = *m0;
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
struct ip *h = NULL;
|
|
|
|
struct m_tag *ipfwtag;
|
|
|
|
struct pf_rule *a = NULL, *r = &V_pf_default_rule, *tr, *nr;
|
|
|
|
#else
|
|
|
|
struct ip *h;
|
2004-06-16 23:24:02 +00:00
|
|
|
struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr;
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2004-06-16 23:24:02 +00:00
|
|
|
struct pf_state *s = NULL;
|
|
|
|
struct pf_ruleset *ruleset = NULL;
|
|
|
|
struct pf_pdesc pd;
|
|
|
|
int off, dirndx, pqid = 0;
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2004-02-26 02:34:12 +00:00
|
|
|
PF_LOCK();
|
2011-06-28 11:57:25 +00:00
|
|
|
if (!V_pf_status.running)
|
2007-07-03 12:16:07 +00:00
|
|
|
{
|
2004-02-26 02:34:12 +00:00
|
|
|
PF_UNLOCK();
|
2007-07-03 12:16:07 +00:00
|
|
|
return (PF_PASS);
|
2004-02-26 02:34:12 +00:00
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
#else
|
|
|
|
if (!pf_status.running)
|
|
|
|
return (PF_PASS);
|
2007-07-03 12:16:07 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
memset(&pd, 0, sizeof(pd));
|
|
|
|
#ifdef __FreeBSD__
|
2011-06-28 11:57:25 +00:00
|
|
|
if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) {
|
2007-07-03 12:16:07 +00:00
|
|
|
PF_UNLOCK();
|
|
|
|
DPFPRINTF(PF_DEBUG_URGENT,
|
|
|
|
("pf_test: pf_get_mtag returned NULL\n"));
|
|
|
|
return (PF_DROP);
|
|
|
|
}
|
|
|
|
#endif
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifndef __FreeBSD__
|
2005-05-03 16:43:32 +00:00
|
|
|
if (ifp->if_type == IFT_CARP && ifp->if_carpdev)
|
2011-06-28 11:57:25 +00:00
|
|
|
kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif;
|
|
|
|
else
|
2005-05-03 16:43:32 +00:00
|
|
|
#endif
|
2011-06-28 11:57:25 +00:00
|
|
|
kif = (struct pfi_kif *)ifp->if_pf_kif;
|
2005-05-03 16:43:32 +00:00
|
|
|
|
2004-06-16 23:24:02 +00:00
|
|
|
if (kif == NULL) {
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
PF_UNLOCK();
|
|
|
|
#endif
|
2005-05-03 16:43:32 +00:00
|
|
|
DPFPRINTF(PF_DEBUG_URGENT,
|
|
|
|
("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname));
|
2004-06-16 23:24:02 +00:00
|
|
|
return (PF_DROP);
|
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
if (kif->pfik_flags & PFI_IFLAG_SKIP)
|
2005-05-03 16:43:32 +00:00
|
|
|
#ifdef __FreeBSD__
|
2011-06-28 11:57:25 +00:00
|
|
|
{
|
2005-05-03 16:43:32 +00:00
|
|
|
PF_UNLOCK();
|
|
|
|
#endif
|
|
|
|
return (PF_PASS);
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
2005-05-03 16:43:32 +00:00
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2004-06-16 23:24:02 +00:00
|
|
|
|
|
|
|
#ifdef __FreeBSD__
|
2004-02-26 02:34:12 +00:00
|
|
|
M_ASSERTPKTHDR(m);
|
|
|
|
#else
|
2004-02-26 02:04:28 +00:00
|
|
|
#ifdef DIAGNOSTIC
|
|
|
|
if ((m->m_flags & M_PKTHDR) == 0)
|
|
|
|
panic("non-M_PKTHDR is passed to pf_test");
|
2005-05-03 16:43:32 +00:00
|
|
|
#endif /* DIAGNOSTIC */
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
if (m->m_pkthdr.len < (int)sizeof(*h)) {
|
|
|
|
action = PF_DROP;
|
|
|
|
REASON_SET(&reason, PFRES_SHORT);
|
|
|
|
log = 1;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (m->m_flags & M_SKIP_FIREWALL) {
|
|
|
|
PF_UNLOCK();
|
|
|
|
return (PF_PASS);
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
if (m->m_pkthdr.pf.flags & PF_TAG_GENERATED)
|
|
|
|
return (PF_PASS);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (ip_divert_ptr != NULL &&
|
|
|
|
((ipfwtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL)) != NULL)) {
|
|
|
|
struct ipfw_rule_ref *rr = (struct ipfw_rule_ref *)(ipfwtag+1);
|
|
|
|
if (rr->info & IPFW_IS_DIVERT && rr->rulenum == 0) {
|
|
|
|
pd.pf_mtag->flags |= PF_PACKET_LOOPED;
|
|
|
|
m_tag_delete(m, ipfwtag);
|
|
|
|
}
|
|
|
|
if (pd.pf_mtag->flags & PF_FASTFWD_OURS_PRESENT) {
|
|
|
|
m->m_flags |= M_FASTFWD_OURS;
|
|
|
|
pd.pf_mtag->flags &= ~PF_FASTFWD_OURS_PRESENT;
|
|
|
|
}
|
|
|
|
} else
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
/* We do IP header normalization and packet reassembly here */
|
2005-05-03 16:43:32 +00:00
|
|
|
if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) {
|
2004-02-26 02:04:28 +00:00
|
|
|
action = PF_DROP;
|
|
|
|
goto done;
|
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
m = *m0; /* pf_normalize messes with m0 */
|
2004-02-26 02:04:28 +00:00
|
|
|
h = mtod(m, struct ip *);
|
|
|
|
|
|
|
|
off = h->ip_hl << 2;
|
|
|
|
if (off < (int)sizeof(*h)) {
|
|
|
|
action = PF_DROP;
|
|
|
|
REASON_SET(&reason, PFRES_SHORT);
|
|
|
|
log = 1;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
pd.src = (struct pf_addr *)&h->ip_src;
|
|
|
|
pd.dst = (struct pf_addr *)&h->ip_dst;
|
2011-06-28 11:57:25 +00:00
|
|
|
pd.sport = pd.dport = NULL;
|
2004-02-26 02:04:28 +00:00
|
|
|
pd.ip_sum = &h->ip_sum;
|
2011-06-28 11:57:25 +00:00
|
|
|
pd.proto_sum = NULL;
|
2004-02-26 02:04:28 +00:00
|
|
|
pd.proto = h->ip_p;
|
2011-06-28 11:57:25 +00:00
|
|
|
pd.dir = dir;
|
|
|
|
pd.sidx = (dir == PF_IN) ? 0 : 1;
|
|
|
|
pd.didx = (dir == PF_IN) ? 1 : 0;
|
2004-02-26 02:04:28 +00:00
|
|
|
pd.af = AF_INET;
|
|
|
|
pd.tos = h->ip_tos;
|
|
|
|
pd.tot_len = ntohs(h->ip_len);
|
2005-05-03 16:43:32 +00:00
|
|
|
pd.eh = eh;
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
/* handle fragments that didn't get reassembled by normalization */
|
|
|
|
if (h->ip_off & htons(IP_MF | IP_OFFMASK)) {
|
2004-06-16 23:24:02 +00:00
|
|
|
action = pf_test_fragment(&r, dir, kif, m, h,
|
2004-02-26 02:04:28 +00:00
|
|
|
&pd, &a, &ruleset);
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (h->ip_p) {
|
|
|
|
|
|
|
|
case IPPROTO_TCP: {
|
|
|
|
struct tcphdr th;
|
|
|
|
|
|
|
|
pd.hdr.tcp = &th;
|
|
|
|
if (!pf_pull_hdr(m, off, &th, sizeof(th),
|
|
|
|
&action, &reason, AF_INET)) {
|
|
|
|
log = action != PF_PASS;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
pd.p_len = pd.tot_len - off - (th.th_off << 2);
|
|
|
|
if ((th.th_flags & TH_ACK) && pd.p_len == 0)
|
|
|
|
pqid = 1;
|
2004-06-16 23:24:02 +00:00
|
|
|
action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
|
2004-02-26 02:04:28 +00:00
|
|
|
if (action == PF_DROP)
|
2004-06-16 23:24:02 +00:00
|
|
|
goto done;
|
|
|
|
action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
|
2004-02-26 02:04:28 +00:00
|
|
|
&reason);
|
|
|
|
if (action == PF_PASS) {
|
2011-06-28 11:57:25 +00:00
|
|
|
#if NPFSYNC > 0
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (pfsync_update_state_ptr != NULL)
|
|
|
|
pfsync_update_state_ptr(s);
|
|
|
|
#else
|
2004-06-16 23:24:02 +00:00
|
|
|
pfsync_update_state(s);
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2005-05-03 16:43:32 +00:00
|
|
|
#endif /* NPFSYNC */
|
2004-02-26 02:04:28 +00:00
|
|
|
r = s->rule.ptr;
|
2004-06-16 23:24:02 +00:00
|
|
|
a = s->anchor.ptr;
|
2004-02-26 02:04:28 +00:00
|
|
|
log = s->log;
|
|
|
|
} else if (s == NULL)
|
2004-09-29 04:54:33 +00:00
|
|
|
#ifdef __FreeBSD__
|
2011-06-28 11:57:25 +00:00
|
|
|
action = pf_test_rule(&r, &s, dir, kif,
|
2005-05-03 16:43:32 +00:00
|
|
|
m, off, h, &pd, &a, &ruleset, NULL, inp);
|
2004-09-29 04:54:33 +00:00
|
|
|
#else
|
2011-06-28 11:57:25 +00:00
|
|
|
action = pf_test_rule(&r, &s, dir, kif,
|
2005-05-03 16:43:32 +00:00
|
|
|
m, off, h, &pd, &a, &ruleset, &ipintrq);
|
2004-09-29 04:54:33 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case IPPROTO_UDP: {
|
|
|
|
struct udphdr uh;
|
|
|
|
|
|
|
|
pd.hdr.udp = &uh;
|
|
|
|
if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
|
|
|
|
&action, &reason, AF_INET)) {
|
|
|
|
log = action != PF_PASS;
|
|
|
|
goto done;
|
|
|
|
}
|
2004-06-16 23:24:02 +00:00
|
|
|
if (uh.uh_dport == 0 ||
|
|
|
|
ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
|
|
|
|
ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
|
|
|
|
action = PF_DROP;
|
2007-07-03 12:16:07 +00:00
|
|
|
REASON_SET(&reason, PFRES_SHORT);
|
2004-06-16 23:24:02 +00:00
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
|
2004-02-26 02:04:28 +00:00
|
|
|
if (action == PF_PASS) {
|
2011-06-28 11:57:25 +00:00
|
|
|
#if NPFSYNC > 0
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (pfsync_update_state_ptr != NULL)
|
|
|
|
pfsync_update_state_ptr(s);
|
|
|
|
#else
|
2004-06-16 23:24:02 +00:00
|
|
|
pfsync_update_state(s);
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2005-05-03 16:43:32 +00:00
|
|
|
#endif /* NPFSYNC */
|
2004-02-26 02:04:28 +00:00
|
|
|
r = s->rule.ptr;
|
|
|
|
a = s->anchor.ptr;
|
|
|
|
log = s->log;
|
|
|
|
} else if (s == NULL)
|
2004-09-29 04:54:33 +00:00
|
|
|
#ifdef __FreeBSD__
|
2011-06-28 11:57:25 +00:00
|
|
|
action = pf_test_rule(&r, &s, dir, kif,
|
2005-05-03 16:43:32 +00:00
|
|
|
m, off, h, &pd, &a, &ruleset, NULL, inp);
|
2004-09-29 04:54:33 +00:00
|
|
|
#else
|
2011-06-28 11:57:25 +00:00
|
|
|
action = pf_test_rule(&r, &s, dir, kif,
|
2005-05-03 16:43:32 +00:00
|
|
|
m, off, h, &pd, &a, &ruleset, &ipintrq);
|
2004-09-29 04:54:33 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case IPPROTO_ICMP: {
|
|
|
|
struct icmp ih;
|
|
|
|
|
|
|
|
pd.hdr.icmp = &ih;
|
|
|
|
if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN,
|
|
|
|
&action, &reason, AF_INET)) {
|
|
|
|
log = action != PF_PASS;
|
|
|
|
goto done;
|
|
|
|
}
|
2005-05-03 16:43:32 +00:00
|
|
|
action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd,
|
|
|
|
&reason);
|
2004-02-26 02:04:28 +00:00
|
|
|
if (action == PF_PASS) {
|
2011-06-28 11:57:25 +00:00
|
|
|
#if NPFSYNC > 0
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (pfsync_update_state_ptr != NULL)
|
|
|
|
pfsync_update_state_ptr(s);
|
|
|
|
#else
|
2004-06-16 23:24:02 +00:00
|
|
|
pfsync_update_state(s);
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2005-05-03 16:43:32 +00:00
|
|
|
#endif /* NPFSYNC */
|
2004-02-26 02:04:28 +00:00
|
|
|
r = s->rule.ptr;
|
|
|
|
a = s->anchor.ptr;
|
|
|
|
log = s->log;
|
|
|
|
} else if (s == NULL)
|
2005-05-03 16:43:32 +00:00
|
|
|
#ifdef __FreeBSD__
|
2011-06-28 11:57:25 +00:00
|
|
|
action = pf_test_rule(&r, &s, dir, kif,
|
|
|
|
m, off, h, &pd, &a, &ruleset, NULL, inp);
|
2005-05-03 16:43:32 +00:00
|
|
|
#else
|
2011-06-28 11:57:25 +00:00
|
|
|
action = pf_test_rule(&r, &s, dir, kif,
|
2005-05-03 16:43:32 +00:00
|
|
|
m, off, h, &pd, &a, &ruleset, &ipintrq);
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef INET6
|
|
|
|
case IPPROTO_ICMPV6: {
|
|
|
|
action = PF_DROP;
|
|
|
|
DPFPRINTF(PF_DEBUG_MISC,
|
|
|
|
("pf: dropping IPv4 packet with ICMPv6 payload\n"));
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2004-02-26 02:04:28 +00:00
|
|
|
default:
|
2011-06-28 11:57:25 +00:00
|
|
|
action = pf_test_state_other(&s, dir, kif, m, &pd);
|
2004-02-26 02:04:28 +00:00
|
|
|
if (action == PF_PASS) {
|
2011-06-28 11:57:25 +00:00
|
|
|
#if NPFSYNC > 0
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (pfsync_update_state_ptr != NULL)
|
|
|
|
pfsync_update_state_ptr(s);
|
|
|
|
#else
|
2004-06-16 23:24:02 +00:00
|
|
|
pfsync_update_state(s);
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2005-05-03 16:43:32 +00:00
|
|
|
#endif /* NPFSYNC */
|
2004-02-26 02:04:28 +00:00
|
|
|
r = s->rule.ptr;
|
|
|
|
a = s->anchor.ptr;
|
|
|
|
log = s->log;
|
|
|
|
} else if (s == NULL)
|
2005-05-03 16:43:32 +00:00
|
|
|
#ifdef __FreeBSD__
|
2011-06-28 11:57:25 +00:00
|
|
|
action = pf_test_rule(&r, &s, dir, kif, m, off, h,
|
|
|
|
&pd, &a, &ruleset, NULL, inp);
|
2005-05-03 16:43:32 +00:00
|
|
|
#else
|
2011-06-28 11:57:25 +00:00
|
|
|
action = pf_test_rule(&r, &s, dir, kif, m, off, h,
|
2005-05-03 16:43:32 +00:00
|
|
|
&pd, &a, &ruleset, &ipintrq);
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
done:
|
|
|
|
if (action == PF_PASS && h->ip_hl > 5 &&
|
2009-12-24 00:43:44 +00:00
|
|
|
!((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) {
|
2004-02-26 02:04:28 +00:00
|
|
|
action = PF_DROP;
|
2005-05-03 16:43:32 +00:00
|
|
|
REASON_SET(&reason, PFRES_IPOPTIONS);
|
2004-02-26 02:04:28 +00:00
|
|
|
log = 1;
|
|
|
|
DPFPRINTF(PF_DEBUG_MISC,
|
|
|
|
("pf: dropping packet with ip options\n"));
|
|
|
|
}
|
|
|
|
|
2007-07-03 12:16:07 +00:00
|
|
|
if ((s && s->tag) || r->rtableid)
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
pf_tag_packet(m, s ? s->tag : 0, r->rtableid, pd.pf_mtag);
|
|
|
|
#else
|
|
|
|
pf_tag_packet(m, s ? s->tag : 0, r->rtableid);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if (dir == PF_IN && s && s->key[PF_SK_STACK])
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
pd.pf_mtag->statekey = s->key[PF_SK_STACK];
|
|
|
|
#else
|
|
|
|
m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK];
|
|
|
|
#endif
|
2005-05-03 16:43:32 +00:00
|
|
|
|
2004-02-26 02:04:28 +00:00
|
|
|
#ifdef ALTQ
|
|
|
|
if (action == PF_PASS && r->qid) {
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
2007-07-03 12:16:07 +00:00
|
|
|
if (pqid || (pd.tos & IPTOS_LOWDELAY))
|
|
|
|
pd.pf_mtag->qid = r->pqid;
|
|
|
|
else
|
|
|
|
pd.pf_mtag->qid = r->qid;
|
|
|
|
/* add hints for ecn */
|
|
|
|
pd.pf_mtag->hdr = h;
|
2011-06-28 11:57:25 +00:00
|
|
|
|
|
|
|
#else
|
|
|
|
if (pqid || (pd.tos & IPTOS_LOWDELAY))
|
|
|
|
m->m_pkthdr.pf.qid = r->pqid;
|
|
|
|
else
|
|
|
|
m->m_pkthdr.pf.qid = r->qid;
|
|
|
|
/* add hints for ecn */
|
|
|
|
m->m_pkthdr.pf.hdr = h;
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
2005-05-03 16:43:32 +00:00
|
|
|
#endif /* ALTQ */
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2004-06-16 23:24:02 +00:00
|
|
|
/*
|
|
|
|
* connections redirected to loopback should not match sockets
|
|
|
|
* bound specifically to loopback due to security implications,
|
|
|
|
* see tcp_input() and in_pcblookup_listen().
|
|
|
|
*/
|
|
|
|
if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
|
|
|
|
pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
|
|
|
|
(s->nat_rule.ptr->action == PF_RDR ||
|
|
|
|
s->nat_rule.ptr->action == PF_BINAT) &&
|
2007-07-03 12:16:07 +00:00
|
|
|
(ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
m->m_flags |= M_SKIP_FIREWALL;
|
|
|
|
#else
|
|
|
|
m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (action == PF_PASS && r->divert.port &&
|
|
|
|
ip_divert_ptr != NULL && !PACKET_LOOPED()) {
|
|
|
|
|
|
|
|
ipfwtag = m_tag_alloc(MTAG_IPFW_RULE, 0,
|
|
|
|
sizeof(struct ipfw_rule_ref), M_NOWAIT | M_ZERO);
|
|
|
|
if (ipfwtag != NULL) {
|
|
|
|
((struct ipfw_rule_ref *)(ipfwtag+1))->info = r->divert.port;
|
|
|
|
((struct ipfw_rule_ref *)(ipfwtag+1))->rulenum = dir;
|
|
|
|
|
|
|
|
m_tag_prepend(m, ipfwtag);
|
|
|
|
|
|
|
|
PF_UNLOCK();
|
|
|
|
|
|
|
|
if (m->m_flags & M_FASTFWD_OURS) {
|
|
|
|
pd.pf_mtag->flags |= PF_FASTFWD_OURS_PRESENT;
|
|
|
|
m->m_flags &= ~M_FASTFWD_OURS;
|
|
|
|
}
|
|
|
|
|
|
|
|
ip_divert_ptr(*m0,
|
|
|
|
dir == PF_IN ? DIR_IN : DIR_OUT);
|
|
|
|
*m0 = NULL;
|
|
|
|
return (action);
|
|
|
|
} else {
|
|
|
|
/* XXX: ipfw has the same behaviour! */
|
|
|
|
action = PF_DROP;
|
|
|
|
REASON_SET(&reason, PFRES_MEMORY);
|
|
|
|
log = 1;
|
|
|
|
DPFPRINTF(PF_DEBUG_MISC,
|
|
|
|
("pf: failed to allocate divert tag\n"));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
if (dir == PF_IN && action == PF_PASS && r->divert.port) {
|
|
|
|
struct pf_divert *divert;
|
|
|
|
|
|
|
|
if ((divert = pf_get_divert(m))) {
|
|
|
|
m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED;
|
|
|
|
divert->port = r->divert.port;
|
|
|
|
divert->addr.ipv4 = r->divert.addr.v4;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
2004-06-16 23:24:02 +00:00
|
|
|
|
2007-07-03 12:16:07 +00:00
|
|
|
if (log) {
|
|
|
|
struct pf_rule *lr;
|
|
|
|
|
|
|
|
if (s != NULL && s->nat_rule.ptr != NULL &&
|
|
|
|
s->nat_rule.ptr->log & PF_LOG_ALL)
|
|
|
|
lr = s->nat_rule.ptr;
|
|
|
|
else
|
|
|
|
lr = r;
|
|
|
|
PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, lr, a, ruleset,
|
|
|
|
&pd);
|
|
|
|
}
|
2004-06-16 23:24:02 +00:00
|
|
|
|
|
|
|
kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
|
|
|
|
kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++;
|
|
|
|
|
|
|
|
if (action == PF_PASS || r->action == PF_DROP) {
|
2007-07-03 12:16:07 +00:00
|
|
|
dirndx = (dir == PF_OUT);
|
|
|
|
r->packets[dirndx]++;
|
|
|
|
r->bytes[dirndx] += pd.tot_len;
|
2004-06-16 23:24:02 +00:00
|
|
|
if (a != NULL) {
|
2007-07-03 12:16:07 +00:00
|
|
|
a->packets[dirndx]++;
|
|
|
|
a->bytes[dirndx] += pd.tot_len;
|
2004-06-16 23:24:02 +00:00
|
|
|
}
|
|
|
|
if (s != NULL) {
|
|
|
|
if (s->nat_rule.ptr != NULL) {
|
2007-07-03 12:16:07 +00:00
|
|
|
s->nat_rule.ptr->packets[dirndx]++;
|
|
|
|
s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
|
2004-06-16 23:24:02 +00:00
|
|
|
}
|
|
|
|
if (s->src_node != NULL) {
|
2007-07-03 12:16:07 +00:00
|
|
|
s->src_node->packets[dirndx]++;
|
|
|
|
s->src_node->bytes[dirndx] += pd.tot_len;
|
2004-06-16 23:24:02 +00:00
|
|
|
}
|
|
|
|
if (s->nat_src_node != NULL) {
|
2007-07-03 12:16:07 +00:00
|
|
|
s->nat_src_node->packets[dirndx]++;
|
|
|
|
s->nat_src_node->bytes[dirndx] += pd.tot_len;
|
2004-06-16 23:24:02 +00:00
|
|
|
}
|
2007-07-03 12:16:07 +00:00
|
|
|
dirndx = (dir == s->direction) ? 0 : 1;
|
|
|
|
s->packets[dirndx]++;
|
|
|
|
s->bytes[dirndx] += pd.tot_len;
|
2004-06-16 23:24:02 +00:00
|
|
|
}
|
|
|
|
tr = r;
|
|
|
|
nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (nr != NULL && r == &V_pf_default_rule)
|
|
|
|
#else
|
|
|
|
if (nr != NULL && r == &pf_default_rule)
|
|
|
|
#endif
|
|
|
|
tr = nr;
|
2004-06-16 23:24:02 +00:00
|
|
|
if (tr->src.addr.type == PF_ADDR_TABLE)
|
2011-06-28 11:57:25 +00:00
|
|
|
pfr_update_stats(tr->src.addr.p.tbl,
|
|
|
|
(s == NULL) ? pd.src :
|
|
|
|
&s->key[(s->direction == PF_IN)]->
|
|
|
|
addr[(s->direction == PF_OUT)],
|
|
|
|
pd.af, pd.tot_len, dir == PF_OUT,
|
|
|
|
r->action == PF_PASS, tr->src.neg);
|
2004-06-16 23:24:02 +00:00
|
|
|
if (tr->dst.addr.type == PF_ADDR_TABLE)
|
2011-06-28 11:57:25 +00:00
|
|
|
pfr_update_stats(tr->dst.addr.p.tbl,
|
|
|
|
(s == NULL) ? pd.dst :
|
|
|
|
&s->key[(s->direction == PF_IN)]->
|
|
|
|
addr[(s->direction == PF_IN)],
|
|
|
|
pd.af, pd.tot_len, dir == PF_OUT,
|
|
|
|
r->action == PF_PASS, tr->dst.neg);
|
2004-06-16 23:24:02 +00:00
|
|
|
}
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
switch (action) {
|
|
|
|
case PF_SYNPROXY_DROP:
|
2004-02-26 02:04:28 +00:00
|
|
|
m_freem(*m0);
|
2011-06-28 11:57:25 +00:00
|
|
|
case PF_DEFER:
|
2004-02-26 02:04:28 +00:00
|
|
|
*m0 = NULL;
|
|
|
|
action = PF_PASS;
|
2011-06-28 11:57:25 +00:00
|
|
|
break;
|
|
|
|
default:
|
2004-02-26 02:04:28 +00:00
|
|
|
/* pf_route can free the mbuf causing *m0 to become NULL */
|
2011-06-28 11:57:25 +00:00
|
|
|
if (r->rt)
|
|
|
|
pf_route(m0, r, dir, kif->pfik_ifp, s, &pd);
|
|
|
|
break;
|
|
|
|
}
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2004-02-26 02:34:12 +00:00
|
|
|
PF_UNLOCK();
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
return (action);
|
|
|
|
}
|
|
|
|
#endif /* INET */
|
|
|
|
|
|
|
|
#ifdef INET6
|
|
|
|
int
|
2004-09-29 04:54:33 +00:00
|
|
|
#ifdef __FreeBSD__
|
2005-05-03 16:43:32 +00:00
|
|
|
pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
|
|
|
|
struct ether_header *eh, struct inpcb *inp)
|
2004-09-29 04:54:33 +00:00
|
|
|
#else
|
2005-05-03 16:43:32 +00:00
|
|
|
pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
|
|
|
|
struct ether_header *eh)
|
2004-09-29 04:54:33 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
{
|
2004-06-16 23:24:02 +00:00
|
|
|
struct pfi_kif *kif;
|
|
|
|
u_short action, reason = 0, log = 0;
|
2007-07-03 12:16:07 +00:00
|
|
|
struct mbuf *m = *m0, *n = NULL;
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
struct ip6_hdr *h = NULL;
|
|
|
|
struct pf_rule *a = NULL, *r = &V_pf_default_rule, *tr, *nr;
|
|
|
|
#else
|
2007-07-03 12:16:07 +00:00
|
|
|
struct ip6_hdr *h;
|
2004-06-16 23:24:02 +00:00
|
|
|
struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr;
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2004-06-16 23:24:02 +00:00
|
|
|
struct pf_state *s = NULL;
|
|
|
|
struct pf_ruleset *ruleset = NULL;
|
|
|
|
struct pf_pdesc pd;
|
2007-05-21 20:08:59 +00:00
|
|
|
int off, terminal = 0, dirndx, rh_cnt = 0;
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2004-02-26 02:34:12 +00:00
|
|
|
PF_LOCK();
|
2011-06-28 11:57:25 +00:00
|
|
|
if (!V_pf_status.running) {
|
2004-02-26 02:34:12 +00:00
|
|
|
PF_UNLOCK();
|
2004-02-26 02:04:28 +00:00
|
|
|
return (PF_PASS);
|
2004-02-26 02:34:12 +00:00
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
#else
|
|
|
|
if (!pf_status.running)
|
|
|
|
return (PF_PASS);
|
2007-07-03 12:16:07 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
memset(&pd, 0, sizeof(pd));
|
|
|
|
#ifdef __FreeBSD__
|
2011-06-28 11:57:25 +00:00
|
|
|
if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) {
|
2007-07-03 12:16:07 +00:00
|
|
|
PF_UNLOCK();
|
|
|
|
DPFPRINTF(PF_DEBUG_URGENT,
|
2011-06-28 11:57:25 +00:00
|
|
|
("pf_test: pf_get_mtag returned NULL\n"));
|
2007-07-03 12:16:07 +00:00
|
|
|
return (PF_DROP);
|
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
|
|
|
#ifndef __FreeBSD__
|
2005-05-03 16:43:32 +00:00
|
|
|
if (ifp->if_type == IFT_CARP && ifp->if_carpdev)
|
2011-06-28 11:57:25 +00:00
|
|
|
kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif;
|
|
|
|
else
|
2005-05-03 16:43:32 +00:00
|
|
|
#endif
|
2011-06-28 11:57:25 +00:00
|
|
|
kif = (struct pfi_kif *)ifp->if_pf_kif;
|
2005-05-03 16:43:32 +00:00
|
|
|
|
2004-06-16 23:24:02 +00:00
|
|
|
if (kif == NULL) {
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
PF_UNLOCK();
|
|
|
|
#endif
|
2005-05-03 16:43:32 +00:00
|
|
|
DPFPRINTF(PF_DEBUG_URGENT,
|
|
|
|
("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname));
|
2004-06-16 23:24:02 +00:00
|
|
|
return (PF_DROP);
|
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
if (kif->pfik_flags & PFI_IFLAG_SKIP)
|
2005-05-03 16:43:32 +00:00
|
|
|
#ifdef __FreeBSD__
|
2011-06-28 11:57:25 +00:00
|
|
|
{
|
2005-05-03 16:43:32 +00:00
|
|
|
PF_UNLOCK();
|
|
|
|
#endif
|
|
|
|
return (PF_PASS);
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
2005-05-03 16:43:32 +00:00
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2004-06-16 23:24:02 +00:00
|
|
|
|
|
|
|
#ifdef __FreeBSD__
|
2004-02-26 02:34:12 +00:00
|
|
|
M_ASSERTPKTHDR(m);
|
|
|
|
#else
|
2004-02-26 02:04:28 +00:00
|
|
|
#ifdef DIAGNOSTIC
|
|
|
|
if ((m->m_flags & M_PKTHDR) == 0)
|
2005-05-03 16:43:32 +00:00
|
|
|
panic("non-M_PKTHDR is passed to pf_test6");
|
|
|
|
#endif /* DIAGNOSTIC */
|
2004-02-26 02:04:28 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
if (m->m_pkthdr.len < (int)sizeof(*h)) {
|
|
|
|
action = PF_DROP;
|
|
|
|
REASON_SET(&reason, PFRES_SHORT);
|
|
|
|
log = 1;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (pd.pf_mtag->flags & PF_TAG_GENERATED)
|
|
|
|
#else
|
|
|
|
if (m->m_pkthdr.pf.flags & PF_TAG_GENERATED)
|
|
|
|
#endif
|
|
|
|
return (PF_PASS);
|
|
|
|
|
2004-02-26 02:04:28 +00:00
|
|
|
/* We do IP header normalization and packet reassembly here */
|
2005-05-03 16:43:32 +00:00
|
|
|
if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) {
|
2004-02-26 02:04:28 +00:00
|
|
|
action = PF_DROP;
|
|
|
|
goto done;
|
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
m = *m0; /* pf_normalize messes with m0 */
|
2004-02-26 02:04:28 +00:00
|
|
|
h = mtod(m, struct ip6_hdr *);
|
|
|
|
|
2007-05-21 20:08:59 +00:00
|
|
|
#if 1
|
|
|
|
/*
|
|
|
|
* we do not support jumbogram yet. if we keep going, zero ip6_plen
|
|
|
|
* will do something bad, so drop the packet for now.
|
|
|
|
*/
|
|
|
|
if (htons(h->ip6_plen) == 0) {
|
|
|
|
action = PF_DROP;
|
|
|
|
REASON_SET(&reason, PFRES_NORM); /*XXX*/
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2004-02-26 02:04:28 +00:00
|
|
|
pd.src = (struct pf_addr *)&h->ip6_src;
|
|
|
|
pd.dst = (struct pf_addr *)&h->ip6_dst;
|
2011-06-28 11:57:25 +00:00
|
|
|
pd.sport = pd.dport = NULL;
|
2004-02-26 02:04:28 +00:00
|
|
|
pd.ip_sum = NULL;
|
2011-06-28 11:57:25 +00:00
|
|
|
pd.proto_sum = NULL;
|
|
|
|
pd.dir = dir;
|
|
|
|
pd.sidx = (dir == PF_IN) ? 0 : 1;
|
|
|
|
pd.didx = (dir == PF_IN) ? 1 : 0;
|
2004-02-26 02:04:28 +00:00
|
|
|
pd.af = AF_INET6;
|
|
|
|
pd.tos = 0;
|
|
|
|
pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
|
2005-05-03 16:43:32 +00:00
|
|
|
pd.eh = eh;
|
2004-02-26 02:04:28 +00:00
|
|
|
|
|
|
|
off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr);
|
|
|
|
pd.proto = h->ip6_nxt;
|
|
|
|
do {
|
|
|
|
switch (pd.proto) {
|
|
|
|
case IPPROTO_FRAGMENT:
|
2004-06-16 23:24:02 +00:00
|
|
|
action = pf_test_fragment(&r, dir, kif, m, h,
|
2004-02-26 02:04:28 +00:00
|
|
|
&pd, &a, &ruleset);
|
|
|
|
if (action == PF_DROP)
|
|
|
|
REASON_SET(&reason, PFRES_FRAG);
|
|
|
|
goto done;
|
2007-05-21 20:08:59 +00:00
|
|
|
case IPPROTO_ROUTING: {
|
|
|
|
struct ip6_rthdr rthdr;
|
|
|
|
|
|
|
|
if (rh_cnt++) {
|
|
|
|
DPFPRINTF(PF_DEBUG_MISC,
|
|
|
|
("pf: IPv6 more than one rthdr\n"));
|
|
|
|
action = PF_DROP;
|
|
|
|
REASON_SET(&reason, PFRES_IPOPTIONS);
|
|
|
|
log = 1;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
if (!pf_pull_hdr(m, off, &rthdr, sizeof(rthdr), NULL,
|
|
|
|
&reason, pd.af)) {
|
|
|
|
DPFPRINTF(PF_DEBUG_MISC,
|
|
|
|
("pf: IPv6 short rthdr\n"));
|
|
|
|
action = PF_DROP;
|
|
|
|
REASON_SET(&reason, PFRES_SHORT);
|
|
|
|
log = 1;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) {
|
|
|
|
DPFPRINTF(PF_DEBUG_MISC,
|
|
|
|
("pf: IPv6 rthdr0\n"));
|
|
|
|
action = PF_DROP;
|
|
|
|
REASON_SET(&reason, PFRES_IPOPTIONS);
|
|
|
|
log = 1;
|
|
|
|
goto done;
|
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
/* FALLTHROUGH */
|
2007-05-21 20:08:59 +00:00
|
|
|
}
|
2004-02-26 02:04:28 +00:00
|
|
|
case IPPROTO_AH:
|
|
|
|
case IPPROTO_HOPOPTS:
|
|
|
|
case IPPROTO_DSTOPTS: {
|
|
|
|
/* get next header and header length */
|
|
|
|
struct ip6_ext opt6;
|
|
|
|
|
|
|
|
if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6),
|
2005-05-03 16:43:32 +00:00
|
|
|
NULL, &reason, pd.af)) {
|
2004-02-26 02:04:28 +00:00
|
|
|
DPFPRINTF(PF_DEBUG_MISC,
|
|
|
|
("pf: IPv6 short opt\n"));
|
|
|
|
action = PF_DROP;
|
|
|
|
log = 1;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
if (pd.proto == IPPROTO_AH)
|
|
|
|
off += (opt6.ip6e_len + 2) * 4;
|
|
|
|
else
|
|
|
|
off += (opt6.ip6e_len + 1) * 8;
|
|
|
|
pd.proto = opt6.ip6e_nxt;
|
|
|
|
/* goto the next header */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
terminal++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
} while (!terminal);
|
|
|
|
|
2007-07-03 12:16:07 +00:00
|
|
|
/* if there's no routing header, use unmodified mbuf for checksumming */
|
|
|
|
if (!n)
|
|
|
|
n = m;
|
|
|
|
|
2004-02-26 02:04:28 +00:00
|
|
|
switch (pd.proto) {
|
|
|
|
|
|
|
|
case IPPROTO_TCP: {
|
|
|
|
struct tcphdr th;
|
|
|
|
|
|
|
|
pd.hdr.tcp = &th;
|
|
|
|
if (!pf_pull_hdr(m, off, &th, sizeof(th),
|
|
|
|
&action, &reason, AF_INET6)) {
|
|
|
|
log = action != PF_PASS;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
pd.p_len = pd.tot_len - off - (th.th_off << 2);
|
2004-06-16 23:24:02 +00:00
|
|
|
action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
|
2004-02-26 02:04:28 +00:00
|
|
|
if (action == PF_DROP)
|
2004-06-16 23:24:02 +00:00
|
|
|
goto done;
|
|
|
|
action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
|
2004-02-26 02:04:28 +00:00
|
|
|
&reason);
|
|
|
|
if (action == PF_PASS) {
|
2011-06-28 11:57:25 +00:00
|
|
|
#if NPFSYNC > 0
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (pfsync_update_state_ptr != NULL)
|
|
|
|
pfsync_update_state_ptr(s);
|
|
|
|
#else
|
2004-06-16 23:24:02 +00:00
|
|
|
pfsync_update_state(s);
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2005-05-03 16:43:32 +00:00
|
|
|
#endif /* NPFSYNC */
|
2004-02-26 02:04:28 +00:00
|
|
|
r = s->rule.ptr;
|
2004-06-16 23:24:02 +00:00
|
|
|
a = s->anchor.ptr;
|
2004-02-26 02:04:28 +00:00
|
|
|
log = s->log;
|
|
|
|
} else if (s == NULL)
|
2004-09-29 04:54:33 +00:00
|
|
|
#ifdef __FreeBSD__
|
2011-06-28 11:57:25 +00:00
|
|
|
action = pf_test_rule(&r, &s, dir, kif,
|
2005-05-03 16:43:32 +00:00
|
|
|
m, off, h, &pd, &a, &ruleset, NULL, inp);
|
2004-09-29 04:54:33 +00:00
|
|
|
#else
|
2011-06-28 11:57:25 +00:00
|
|
|
action = pf_test_rule(&r, &s, dir, kif,
|
2005-05-03 16:43:32 +00:00
|
|
|
m, off, h, &pd, &a, &ruleset, &ip6intrq);
|
2004-09-29 04:54:33 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case IPPROTO_UDP: {
|
|
|
|
struct udphdr uh;
|
|
|
|
|
|
|
|
pd.hdr.udp = &uh;
|
|
|
|
if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
|
|
|
|
&action, &reason, AF_INET6)) {
|
|
|
|
log = action != PF_PASS;
|
|
|
|
goto done;
|
|
|
|
}
|
2004-06-16 23:24:02 +00:00
|
|
|
if (uh.uh_dport == 0 ||
|
|
|
|
ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
|
|
|
|
ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
|
|
|
|
action = PF_DROP;
|
2007-07-03 12:16:07 +00:00
|
|
|
REASON_SET(&reason, PFRES_SHORT);
|
2004-06-16 23:24:02 +00:00
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
|
2004-02-26 02:04:28 +00:00
|
|
|
if (action == PF_PASS) {
|
2011-06-28 11:57:25 +00:00
|
|
|
#if NPFSYNC > 0
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (pfsync_update_state_ptr != NULL)
|
|
|
|
pfsync_update_state_ptr(s);
|
|
|
|
#else
|
2004-06-16 23:24:02 +00:00
|
|
|
pfsync_update_state(s);
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2005-05-03 16:43:32 +00:00
|
|
|
#endif /* NPFSYNC */
|
2004-02-26 02:04:28 +00:00
|
|
|
r = s->rule.ptr;
|
2004-06-16 23:24:02 +00:00
|
|
|
a = s->anchor.ptr;
|
2004-02-26 02:04:28 +00:00
|
|
|
log = s->log;
|
|
|
|
} else if (s == NULL)
|
2004-09-29 04:54:33 +00:00
|
|
|
#ifdef __FreeBSD__
|
2011-06-28 11:57:25 +00:00
|
|
|
action = pf_test_rule(&r, &s, dir, kif,
|
2005-05-03 16:43:32 +00:00
|
|
|
m, off, h, &pd, &a, &ruleset, NULL, inp);
|
2004-09-29 04:54:33 +00:00
|
|
|
#else
|
2011-06-28 11:57:25 +00:00
|
|
|
action = pf_test_rule(&r, &s, dir, kif,
|
2005-05-03 16:43:32 +00:00
|
|
|
m, off, h, &pd, &a, &ruleset, &ip6intrq);
|
2004-09-29 04:54:33 +00:00
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
case IPPROTO_ICMP: {
|
|
|
|
action = PF_DROP;
|
|
|
|
DPFPRINTF(PF_DEBUG_MISC,
|
|
|
|
("pf: dropping IPv6 packet with ICMPv4 payload\n"));
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
2004-02-26 02:04:28 +00:00
|
|
|
case IPPROTO_ICMPV6: {
|
|
|
|
struct icmp6_hdr ih;
|
|
|
|
|
|
|
|
pd.hdr.icmp6 = &ih;
|
|
|
|
if (!pf_pull_hdr(m, off, &ih, sizeof(ih),
|
|
|
|
&action, &reason, AF_INET6)) {
|
|
|
|
log = action != PF_PASS;
|
|
|
|
goto done;
|
|
|
|
}
|
2004-06-16 23:24:02 +00:00
|
|
|
action = pf_test_state_icmp(&s, dir, kif,
|
2005-05-03 16:43:32 +00:00
|
|
|
m, off, h, &pd, &reason);
|
2004-02-26 02:04:28 +00:00
|
|
|
if (action == PF_PASS) {
|
2011-06-28 11:57:25 +00:00
|
|
|
#if NPFSYNC > 0
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (pfsync_update_state_ptr != NULL)
|
|
|
|
pfsync_update_state_ptr(s);
|
|
|
|
#else
|
2004-06-16 23:24:02 +00:00
|
|
|
pfsync_update_state(s);
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2005-05-03 16:43:32 +00:00
|
|
|
#endif /* NPFSYNC */
|
2004-02-26 02:04:28 +00:00
|
|
|
r = s->rule.ptr;
|
2004-06-16 23:24:02 +00:00
|
|
|
a = s->anchor.ptr;
|
2004-02-26 02:04:28 +00:00
|
|
|
log = s->log;
|
|
|
|
} else if (s == NULL)
|
2005-05-03 16:43:32 +00:00
|
|
|
#ifdef __FreeBSD__
|
2011-06-28 11:57:25 +00:00
|
|
|
action = pf_test_rule(&r, &s, dir, kif,
|
|
|
|
m, off, h, &pd, &a, &ruleset, NULL, inp);
|
2005-05-03 16:43:32 +00:00
|
|
|
#else
|
2011-06-28 11:57:25 +00:00
|
|
|
action = pf_test_rule(&r, &s, dir, kif,
|
2005-05-03 16:43:32 +00:00
|
|
|
m, off, h, &pd, &a, &ruleset, &ip6intrq);
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
default:
|
2011-06-28 11:57:25 +00:00
|
|
|
action = pf_test_state_other(&s, dir, kif, m, &pd);
|
2004-06-16 23:24:02 +00:00
|
|
|
if (action == PF_PASS) {
|
2011-06-28 11:57:25 +00:00
|
|
|
#if NPFSYNC > 0
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (pfsync_update_state_ptr != NULL)
|
|
|
|
pfsync_update_state_ptr(s);
|
|
|
|
#else
|
2005-05-03 16:43:32 +00:00
|
|
|
pfsync_update_state(s);
|
2011-06-28 11:57:25 +00:00
|
|
|
#endif
|
2005-05-03 16:43:32 +00:00
|
|
|
#endif /* NPFSYNC */
|
2004-06-16 23:24:02 +00:00
|
|
|
r = s->rule.ptr;
|
|
|
|
a = s->anchor.ptr;
|
|
|
|
log = s->log;
|
|
|
|
} else if (s == NULL)
|
2005-05-03 16:43:32 +00:00
|
|
|
#ifdef __FreeBSD__
|
2011-06-28 11:57:25 +00:00
|
|
|
action = pf_test_rule(&r, &s, dir, kif, m, off, h,
|
|
|
|
&pd, &a, &ruleset, NULL, inp);
|
2005-05-03 16:43:32 +00:00
|
|
|
#else
|
2011-06-28 11:57:25 +00:00
|
|
|
action = pf_test_rule(&r, &s, dir, kif, m, off, h,
|
2005-05-03 16:43:32 +00:00
|
|
|
&pd, &a, &ruleset, &ip6intrq);
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
done:
|
2011-06-28 11:57:25 +00:00
|
|
|
if (n != m) {
|
|
|
|
m_freem(n);
|
|
|
|
n = NULL;
|
|
|
|
}
|
|
|
|
|
2007-05-21 20:08:59 +00:00
|
|
|
/* handle dangerous IPv6 extension headers. */
|
|
|
|
if (action == PF_PASS && rh_cnt &&
|
2009-12-24 00:43:44 +00:00
|
|
|
!((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) {
|
2007-05-21 20:08:59 +00:00
|
|
|
action = PF_DROP;
|
|
|
|
REASON_SET(&reason, PFRES_IPOPTIONS);
|
|
|
|
log = 1;
|
|
|
|
DPFPRINTF(PF_DEBUG_MISC,
|
|
|
|
("pf: dropping packet with dangerous v6 headers\n"));
|
|
|
|
}
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2007-07-03 12:16:07 +00:00
|
|
|
if ((s && s->tag) || r->rtableid)
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
pf_tag_packet(m, s ? s->tag : 0, r->rtableid, pd.pf_mtag);
|
|
|
|
#else
|
|
|
|
pf_tag_packet(m, s ? s->tag : 0, r->rtableid);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if (dir == PF_IN && s && s->key[PF_SK_STACK])
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
pd.pf_mtag->statekey = s->key[PF_SK_STACK];
|
|
|
|
#else
|
|
|
|
m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK];
|
|
|
|
#endif
|
2005-05-03 16:43:32 +00:00
|
|
|
|
2004-02-26 02:04:28 +00:00
|
|
|
#ifdef ALTQ
|
|
|
|
if (action == PF_PASS && r->qid) {
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
2007-07-03 12:16:07 +00:00
|
|
|
if (pd.tos & IPTOS_LOWDELAY)
|
|
|
|
pd.pf_mtag->qid = r->pqid;
|
|
|
|
else
|
|
|
|
pd.pf_mtag->qid = r->qid;
|
|
|
|
/* add hints for ecn */
|
|
|
|
pd.pf_mtag->hdr = h;
|
2011-06-28 11:57:25 +00:00
|
|
|
#else
|
|
|
|
if (pd.tos & IPTOS_LOWDELAY)
|
|
|
|
m->m_pkthdr.pf.qid = r->pqid;
|
|
|
|
else
|
|
|
|
m->m_pkthdr.pf.qid = r->qid;
|
|
|
|
/* add hints for ecn */
|
|
|
|
m->m_pkthdr.pf.hdr = h;
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
}
|
2005-05-03 16:43:32 +00:00
|
|
|
#endif /* ALTQ */
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2004-06-16 23:24:02 +00:00
|
|
|
if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
|
|
|
|
pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
|
|
|
|
(s->nat_rule.ptr->action == PF_RDR ||
|
|
|
|
s->nat_rule.ptr->action == PF_BINAT) &&
|
2007-07-03 12:16:07 +00:00
|
|
|
IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
m->m_flags |= M_SKIP_FIREWALL;
|
|
|
|
#else
|
|
|
|
m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
/* XXX: Anybody working on it?! */
|
|
|
|
if (r->divert.port)
|
|
|
|
printf("pf: divert(9) is not supported for IPv6\n");
|
|
|
|
#else
|
|
|
|
if (dir == PF_IN && action == PF_PASS && r->divert.port) {
|
|
|
|
struct pf_divert *divert;
|
|
|
|
|
|
|
|
if ((divert = pf_get_divert(m))) {
|
|
|
|
m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED;
|
|
|
|
divert->port = r->divert.port;
|
|
|
|
divert->addr.ipv6 = r->divert.addr.v6;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
2004-06-16 23:24:02 +00:00
|
|
|
|
2007-07-03 12:16:07 +00:00
|
|
|
if (log) {
|
|
|
|
struct pf_rule *lr;
|
|
|
|
|
|
|
|
if (s != NULL && s->nat_rule.ptr != NULL &&
|
|
|
|
s->nat_rule.ptr->log & PF_LOG_ALL)
|
|
|
|
lr = s->nat_rule.ptr;
|
|
|
|
else
|
|
|
|
lr = r;
|
|
|
|
PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, lr, a, ruleset,
|
|
|
|
&pd);
|
|
|
|
}
|
2004-06-16 23:24:02 +00:00
|
|
|
|
|
|
|
kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
|
|
|
|
kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++;
|
|
|
|
|
|
|
|
if (action == PF_PASS || r->action == PF_DROP) {
|
2007-07-03 12:16:07 +00:00
|
|
|
dirndx = (dir == PF_OUT);
|
|
|
|
r->packets[dirndx]++;
|
|
|
|
r->bytes[dirndx] += pd.tot_len;
|
2004-06-16 23:24:02 +00:00
|
|
|
if (a != NULL) {
|
2007-07-03 12:16:07 +00:00
|
|
|
a->packets[dirndx]++;
|
|
|
|
a->bytes[dirndx] += pd.tot_len;
|
2004-06-16 23:24:02 +00:00
|
|
|
}
|
|
|
|
if (s != NULL) {
|
|
|
|
if (s->nat_rule.ptr != NULL) {
|
2007-07-03 12:16:07 +00:00
|
|
|
s->nat_rule.ptr->packets[dirndx]++;
|
|
|
|
s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
|
2004-06-16 23:24:02 +00:00
|
|
|
}
|
|
|
|
if (s->src_node != NULL) {
|
2007-07-03 12:16:07 +00:00
|
|
|
s->src_node->packets[dirndx]++;
|
|
|
|
s->src_node->bytes[dirndx] += pd.tot_len;
|
2004-06-16 23:24:02 +00:00
|
|
|
}
|
|
|
|
if (s->nat_src_node != NULL) {
|
2007-07-03 12:16:07 +00:00
|
|
|
s->nat_src_node->packets[dirndx]++;
|
|
|
|
s->nat_src_node->bytes[dirndx] += pd.tot_len;
|
2004-06-16 23:24:02 +00:00
|
|
|
}
|
2007-07-03 12:16:07 +00:00
|
|
|
dirndx = (dir == s->direction) ? 0 : 1;
|
|
|
|
s->packets[dirndx]++;
|
|
|
|
s->bytes[dirndx] += pd.tot_len;
|
2004-06-16 23:24:02 +00:00
|
|
|
}
|
|
|
|
tr = r;
|
|
|
|
nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
|
2011-06-28 11:57:25 +00:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (nr != NULL && r == &V_pf_default_rule)
|
|
|
|
#else
|
|
|
|
if (nr != NULL && r == &pf_default_rule)
|
|
|
|
#endif
|
|
|
|
tr = nr;
|
2004-06-16 23:24:02 +00:00
|
|
|
if (tr->src.addr.type == PF_ADDR_TABLE)
|
2011-06-28 11:57:25 +00:00
|
|
|
pfr_update_stats(tr->src.addr.p.tbl,
|
|
|
|
(s == NULL) ? pd.src :
|
|
|
|
&s->key[(s->direction == PF_IN)]->addr[0],
|
|
|
|
pd.af, pd.tot_len, dir == PF_OUT,
|
|
|
|
r->action == PF_PASS, tr->src.neg);
|
2004-06-16 23:24:02 +00:00
|
|
|
if (tr->dst.addr.type == PF_ADDR_TABLE)
|
2011-06-28 11:57:25 +00:00
|
|
|
pfr_update_stats(tr->dst.addr.p.tbl,
|
|
|
|
(s == NULL) ? pd.dst :
|
|
|
|
&s->key[(s->direction == PF_IN)]->addr[1],
|
|
|
|
pd.af, pd.tot_len, dir == PF_OUT,
|
|
|
|
r->action == PF_PASS, tr->dst.neg);
|
2004-06-16 23:24:02 +00:00
|
|
|
}
|
|
|
|
|
2011-06-28 11:57:25 +00:00
|
|
|
switch (action) {
|
|
|
|
case PF_SYNPROXY_DROP:
|
2004-02-26 02:04:28 +00:00
|
|
|
m_freem(*m0);
|
2011-06-28 11:57:25 +00:00
|
|
|
case PF_DEFER:
|
2004-02-26 02:04:28 +00:00
|
|
|
*m0 = NULL;
|
|
|
|
action = PF_PASS;
|
2011-06-28 11:57:25 +00:00
|
|
|
break;
|
|
|
|
default:
|
2004-02-26 02:04:28 +00:00
|
|
|
/* pf_route6 can free the mbuf causing *m0 to become NULL */
|
2011-06-28 11:57:25 +00:00
|
|
|
if (r->rt)
|
|
|
|
pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd);
|
|
|
|
break;
|
|
|
|
}
|
2004-02-26 02:04:28 +00:00
|
|
|
|
2004-03-17 21:11:02 +00:00
|
|
|
#ifdef __FreeBSD__
|
2004-02-26 02:34:12 +00:00
|
|
|
PF_UNLOCK();
|
|
|
|
#endif
|
2004-02-26 02:04:28 +00:00
|
|
|
return (action);
|
|
|
|
}
|
|
|
|
#endif /* INET6 */
|
2005-05-03 16:43:32 +00:00
|
|
|
|
|
|
|
int
|
|
|
|
pf_check_congestion(struct ifqueue *ifq)
|
|
|
|
{
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
/* XXX_IMPORT: later */
|
|
|
|
return (0);
|
|
|
|
#else
|
|
|
|
if (ifq->ifq_congestion)
|
|
|
|
return (1);
|
|
|
|
else
|
|
|
|
return (0);
|
|
|
|
#endif
|
|
|
|
}
|
2011-06-28 11:57:25 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* must be called whenever any addressing information such as
|
|
|
|
* address, port, protocol has changed
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
pf_pkt_addr_changed(struct mbuf *m)
|
|
|
|
{
|
|
|
|
#ifdef __FreeBSD__
|
|
|
|
struct pf_mtag *pf_tag;
|
|
|
|
|
|
|
|
if ((pf_tag = pf_find_mtag(m)) != NULL)
|
|
|
|
pf_tag->statekey = NULL;
|
|
|
|
#else
|
|
|
|
m->m_pkthdr.pf.statekey = NULL;
|
|
|
|
#endif
|
|
|
|
}
|